Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)

committer Sasha Levin <sashal@kernel.org>

Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)
author Sasha Levin <sashal@kernel.org>
Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)
committer Sasha Levin <sashal@kernel.org>
Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)
diff --git a/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch b/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch

new file mode 100644 (file)

index 0000000..71b78eb
--- /dev/null
+++ b/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch
@@ -0,0 +1,79 @@
+From 5ecc09ed8ee97b25ce54ed8b04f58ded5529bbfe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Aug 2023 20:40:51 +0900
+Subject: ALSA: hda/realtek: Add quirk for HP Victus 16-d1xxx to enable mute
+ LED
+
+From: SungHwan Jung <onenowy@gmail.com>
+
+[ Upstream commit 93dc18e11b1ab2d485b69f91c973e6b83e47ebd0 ]
+
+This quirk enables mute LED on HP Victus 16-d1xxx (8A25) laptops, which
+use ALC245 codec.
+
+Signed-off-by: SungHwan Jung <onenowy@gmail.com>
+Link: https://lore.kernel.org/r/20230823114051.3921-1-onenowy@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 7d549229d0b95..e81bc0c026eba 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4639,6 +4639,22 @@ static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec,
+       }
+ }
+ 
++static void alc245_fixup_hp_mute_led_coefbit(struct hda_codec *codec,
++                                        const struct hda_fixup *fix,
++                                        int action)
++{
++      struct alc_spec *spec = codec->spec;
++
++      if (action == HDA_FIXUP_ACT_PRE_PROBE) {
++              spec->mute_led_polarity = 0;
++              spec->mute_led_coef.idx = 0x0b;
++              spec->mute_led_coef.mask = 3 << 2;
++              spec->mute_led_coef.on = 2 << 2;
++              spec->mute_led_coef.off = 1 << 2;
++              snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
++      }
++}
++
+ /* turn on/off mic-mute LED per capture hook by coef bit */
+ static int coef_micmute_led_set(struct led_classdev *led_cdev,
+                               enum led_brightness brightness)
+@@ -7289,6 +7305,7 @@ enum {
+       ALC236_FIXUP_DELL_DUAL_CODECS,
+       ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
+       ALC287_FIXUP_TAS2781_I2C,
++      ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
+ };
+ 
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9364,6 +9381,10 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+       },
++      [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc245_fixup_hp_mute_led_coefbit,
++      },
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -9630,6 +9651,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+       SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++      SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
+       SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+-- 
+2.40.1
+
diff --git a/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch b/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch

new file mode 100644 (file)

index 0000000..1af651c
--- /dev/null
+++ b/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch
@@ -0,0 +1,58 @@
+From 7028ce65c3db5104a485063bfaf593e5c3d0e817 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Aug 2023 20:39:48 +0200
+Subject: ALSA: hda/realtek: Add quirk for mute LEDs on HP ENVY x360 15-eu0xxx
+
+From: Fabian Vogt <fabian@ritter-vogt.de>
+
+[ Upstream commit c99c26b16c1544534ebd6a5f27a034f3e44d2597 ]
+
+The LED for the mic mute button is controlled by GPIO2.
+The mute button LED is slightly more complex, it's controlled by two bits
+in coeff 0x0b.
+
+Signed-off-by: Fabian Vogt <fabian@ritter-vogt.de>
+Link: https://lore.kernel.org/r/2693091.mvXUDI8C0e@fabians-envy
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index e81bc0c026eba..e01af481e0d0d 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7306,6 +7306,7 @@ enum {
+       ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
+       ALC287_FIXUP_TAS2781_I2C,
+       ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
++      ALC245_FIXUP_HP_X360_MUTE_LEDS,
+ };
+ 
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9385,6 +9386,12 @@ static const struct hda_fixup alc269_fixups[] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc245_fixup_hp_mute_led_coefbit,
+       },
++      [ALC245_FIXUP_HP_X360_MUTE_LEDS] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc245_fixup_hp_mute_led_coefbit,
++              .chained = true,
++              .chain_id = ALC245_FIXUP_HP_GPIO_LED
++      },
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -9620,6 +9627,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
++      SND_PCI_QUIRK(0x103c, 0x888a, "HP ENVY x360 Convertible 15-eu0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS),
+       SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
+-- 
+2.40.1
+
diff --git a/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch b/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch

new file mode 100644 (file)

index 0000000..312dd4f
--- /dev/null
+++ b/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch
@@ -0,0 +1,89 @@
+From 180826e595afdaf93f61a7d8929efb5e586ee9b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 16:50:41 +0800
+Subject: ALSA: hda/realtek - ALC287 I2S speaker platform support
+
+From: Kailang Yang <kailang@realtek.com>
+
+[ Upstream commit e43252db7e207a2e194e6a4883a43a31a776a968 ]
+
+0x17 was only speaker pin, DAC assigned will be 0x03. Headphone
+assigned to 0x02.
+Playback via headphone will get EQ filter processing. So,it needs to
+swap DAC.
+
+Tested-by: Mark Pearson <mpearson@lenovo.com>
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Link: https://lore.kernel.org/r/4e4cfa1b3b4c46838aecafc6e8b6f876@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index e01af481e0d0d..62476b6fd248c 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7046,6 +7046,27 @@ static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec,
+       }
+ }
+ 
++/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */
++static void alc287_fixup_bind_dacs(struct hda_codec *codec,
++                                  const struct hda_fixup *fix, int action)
++{
++      struct alc_spec *spec = codec->spec;
++      static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */
++      static const hda_nid_t preferred_pairs[] = {
++              0x17, 0x02, 0x21, 0x03, 0
++      };
++
++      if (action != HDA_FIXUP_ACT_PRE_PROBE)
++              return;
++
++      snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
++      spec->gen.preferred_dacs = preferred_pairs;
++      spec->gen.auto_mute_via_amp = 1;
++      snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
++                          0x0); /* Make sure 0x14 was disable */
++}
++
++
+ enum {
+       ALC269_FIXUP_GPIO2,
+       ALC269_FIXUP_SONY_VAIO,
+@@ -7307,6 +7328,7 @@ enum {
+       ALC287_FIXUP_TAS2781_I2C,
+       ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
+       ALC245_FIXUP_HP_X360_MUTE_LEDS,
++      ALC287_FIXUP_THINKPAD_I2S_SPK,
+ };
+ 
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9392,6 +9414,10 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC245_FIXUP_HP_GPIO_LED
+       },
++      [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc287_fixup_bind_dacs,
++      },
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -10514,6 +10540,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+               {0x17, 0x90170111},
+               {0x19, 0x03a11030},
+               {0x21, 0x03211020}),
++      SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
++              {0x17, 0x90170110},
++              {0x19, 0x03a11030},
++              {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x17, 0x90170110},
+-- 
+2.40.1
+
diff --git a/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch b/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch

new file mode 100644 (file)

index 0000000..4aeaadf
--- /dev/null
+++ b/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch
@@ -0,0 +1,43 @@
+From 948225e3347ea22b5e5ad668c64ed799bb47d8e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 16:27:16 +0800
+Subject: ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support
+
+From: Kailang Yang <kailang@realtek.com>
+
+[ Upstream commit 41b07476da38ac2878a14e5b8fe0312c41ea36e3 ]
+
+New platform SSID:0x231f.
+
+0x17 was only speaker pin, DAC assigned will be 0x03. Headphone
+assigned to 0x02.
+Playback via headphone will get EQ filter processing.
+So, it needs to swap DAC.
+
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 62476b6fd248c..3bea49e772a1f 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10544,6 +10544,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+               {0x17, 0x90170110},
+               {0x19, 0x03a11030},
+               {0x21, 0x03211020}),
++      SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
++              {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */
++              {0x19, 0x04a11040},
++              {0x21, 0x04211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x17, 0x90170110},
+-- 
+2.40.1
+
diff --git a/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch b/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch

new file mode 100644 (file)

index 0000000..adf2824
--- /dev/null
+++ b/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch
@@ -0,0 +1,180 @@
+From 07910699cda1dfec48856de36c7d2b2cf7f89f31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Aug 2023 16:58:35 +0800
+Subject: ALSA: hda/tas2781: Add tas2781 HDA driver
+
+From: Shenghao Ding <shenghao-ding@ti.com>
+
+[ Upstream commit 3babae915f4c15d76a5134e55806a1c1588e2865 ]
+
+Integrate tas2781 configs for Lenovo Laptops. All of the tas2781s in the
+laptop will be aggregated as one audio device. The code support realtek
+as the primary codec. Rename "struct cs35l41_dev_name" to
+"struct scodec_dev_name" for all other side codecs instead of the certain
+one.
+
+Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
+Link: https://lore.kernel.org/r/20230818085836.1442-1-shenghao-ding@ti.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 88 +++++++++++++++++++++++++++++++++--
+ 1 file changed, 85 insertions(+), 3 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 57e07aa4e136c..7d549229d0b95 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6721,7 +6721,7 @@ static void comp_generic_playback_hook(struct hda_pcm_stream *hinfo, struct hda_
+       }
+ }
+ 
+-struct cs35l41_dev_name {
++struct scodec_dev_name {
+       const char *bus;
+       const char *hid;
+       int index;
+@@ -6730,7 +6730,7 @@ struct cs35l41_dev_name {
+ /* match the device name in a slightly relaxed manner */
+ static int comp_match_cs35l41_dev_name(struct device *dev, void *data)
+ {
+-      struct cs35l41_dev_name *p = data;
++      struct scodec_dev_name *p = data;
+       const char *d = dev_name(dev);
+       int n = strlen(p->bus);
+       char tmp[32];
+@@ -6746,12 +6746,32 @@ static int comp_match_cs35l41_dev_name(struct device *dev, void *data)
+       return !strcmp(d + n, tmp);
+ }
+ 
++static int comp_match_tas2781_dev_name(struct device *dev,
++      void *data)
++{
++      struct scodec_dev_name *p = data;
++      const char *d = dev_name(dev);
++      int n = strlen(p->bus);
++      char tmp[32];
++
++      /* check the bus name */
++      if (strncmp(d, p->bus, n))
++              return 0;
++      /* skip the bus number */
++      if (isdigit(d[n]))
++              n++;
++      /* the rest must be exact matching */
++      snprintf(tmp, sizeof(tmp), "-%s:00", p->hid);
++
++      return !strcmp(d + n, tmp);
++}
++
+ static void cs35l41_generic_fixup(struct hda_codec *cdc, int action, const char *bus,
+                                 const char *hid, int count)
+ {
+       struct device *dev = hda_codec_dev(cdc);
+       struct alc_spec *spec = cdc->spec;
+-      struct cs35l41_dev_name *rec;
++      struct scodec_dev_name *rec;
+       int ret, i;
+ 
+       switch (action) {
+@@ -6776,6 +6796,41 @@ static void cs35l41_generic_fixup(struct hda_codec *cdc, int action, const char
+       }
+ }
+ 
++static void tas2781_generic_fixup(struct hda_codec *cdc, int action,
++      const char *bus, const char *hid)
++{
++      struct device *dev = hda_codec_dev(cdc);
++      struct alc_spec *spec = cdc->spec;
++      struct scodec_dev_name *rec;
++      int ret;
++
++      switch (action) {
++      case HDA_FIXUP_ACT_PRE_PROBE:
++              rec = devm_kmalloc(dev, sizeof(*rec), GFP_KERNEL);
++              if (!rec)
++                      return;
++              rec->bus = bus;
++              rec->hid = hid;
++              rec->index = 0;
++              spec->comps[0].codec = cdc;
++              component_match_add(dev, &spec->match,
++                      comp_match_tas2781_dev_name, rec);
++              ret = component_master_add_with_match(dev, &comp_master_ops,
++                      spec->match);
++              if (ret)
++                      codec_err(cdc,
++                              "Fail to register component aggregator %d\n",
++                              ret);
++              else
++                      spec->gen.pcm_playback_hook =
++                              comp_generic_playback_hook;
++              break;
++      case HDA_FIXUP_ACT_FREE:
++              component_master_del(dev, &comp_master_ops);
++              break;
++      }
++}
++
+ static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+ {
+       cs35l41_generic_fixup(cdc, action, "i2c", "CSC3551", 2);
+@@ -6803,6 +6858,12 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st
+       cs35l41_generic_fixup(cdc, action, "i2c", "CLSA0101", 2);
+ }
+ 
++static void tas2781_fixup_i2c(struct hda_codec *cdc,
++      const struct hda_fixup *fix, int action)
++{
++       tas2781_generic_fixup(cdc, action, "i2c", "TIAS2781");
++}
++
+ /* for alc295_fixup_hp_top_speakers */
+ #include "hp_x360_helper.c"
+ 
+@@ -7227,6 +7288,7 @@ enum {
+       ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
+       ALC236_FIXUP_DELL_DUAL_CODECS,
+       ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
++      ALC287_FIXUP_TAS2781_I2C,
+ };
+ 
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9296,6 +9358,12 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+       },
++      [ALC287_FIXUP_TAS2781_I2C] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = tas2781_fixup_i2c,
++              .chained = true,
++              .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
++      },
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -9867,6 +9935,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6),
+       SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
++      SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual powe mode2 YC", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38ba, "Yoga S780-14.5 Air AMD quad YC", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38bb, "Yoga S780-14.5 Air AMD quad AAC", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38be, "Yoga S980-14.5 proX YC Dual", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38bf, "Yoga S980-14.5 proX LX Dual", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
++      SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
+       SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
+       SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
+-- 
+2.40.1
+
diff --git a/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch b/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch

new file mode 100644 (file)

index 0000000..43f113c
--- /dev/null
+++ b/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch
@@ -0,0 +1,60 @@
+From 5542cbfe40b5adaa14b2ef882139a5058195903b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Jan 2023 12:19:55 -0300
+Subject: arm64: Avoid repeated AA64MMFR1_EL1 register read on pagefault path
+
+From: Gabriel Krisman Bertazi <krisman@suse.de>
+
+[ Upstream commit a89c6bcdac22bec1bfbe6e64060b4cf5838d4f47 ]
+
+Accessing AA64MMFR1_EL1 is expensive in KVM guests, since it is emulated
+in the hypervisor.  In fact, ARM documentation mentions some feature
+registers are not supposed to be accessed frequently by the OS, and
+therefore should be emulated for guests [1].
+
+Commit 0388f9c74330 ("arm64: mm: Implement
+arch_wants_old_prefaulted_pte()") introduced a read of this register in
+the page fault path.  But, even when the feature of setting faultaround
+pages with the old flag is disabled for a given cpu, we are still paying
+the cost of checking the register on every pagefault. This results in an
+explosion of vmexit events in KVM guests, which directly impacts the
+performance of virtualized workloads.  For instance, running kernbench
+yields a 15% increase in system time solely due to the increased vmexit
+cycles.
+
+This patch avoids the extra cost by using the sanitized cached value.
+It should be safe to do so, since this register mustn't change for a
+given cpu.
+
+[1] https://developer.arm.com/-/media/Arm%20Developer%20Community/PDF/Learn%20the%20Architecture/Armv8-A%20virtualization.pdf?revision=a765a7df-1a00-434d-b241-357bfda2dd31
+
+Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
+Acked-by: Will Deacon <will@kernel.org>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Link: https://lore.kernel.org/r/20230109151955.8292-1-krisman@suse.de
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/cpufeature.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
+index f73f11b550425..5bf0f9aa46267 100644
+--- a/arch/arm64/include/asm/cpufeature.h
++++ b/arch/arm64/include/asm/cpufeature.h
+@@ -863,7 +863,11 @@ static inline bool cpu_has_hw_af(void)
+       if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+               return false;
+ 
+-      mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
++      /*
++       * Use cached version to avoid emulated msr operation on KVM
++       * guests.
++       */
++      mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+       return cpuid_feature_extract_unsigned_field(mmfr1,
+                                               ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch b/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch

new file mode 100644 (file)

index 0000000..20b5c46
--- /dev/null
+++ b/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch
@@ -0,0 +1,35 @@
+From bb5d98d6ac2702dae4a90e5e00540e700d4b378e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Sep 2023 20:32:24 +0530
+Subject: ASoC: soc-utils: Export snd_soc_dai_is_dummy() symbol
+
+From: Sameer Pujar <spujar@nvidia.com>
+
+[ Upstream commit f101583fa9f8c3f372d4feb61d67da0ccbf4d9a5 ]
+
+Export symbol snd_soc_dai_is_dummy() for usage outside core driver
+modules. This is required by Tegra ASoC machine driver.
+
+Signed-off-by: Sameer Pujar <spujar@nvidia.com>
+Link: https://lore.kernel.org/r/1694098945-32760-2-git-send-email-spujar@nvidia.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/soc-utils.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
+index a4dba0b751e76..1bbd1d077dfd9 100644
+--- a/sound/soc/soc-utils.c
++++ b/sound/soc/soc-utils.c
+@@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai)
+               return 1;
+       return 0;
+ }
++EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy);
+ 
+ int snd_soc_component_is_dummy(struct snd_soc_component *component)
+ {
+-- 
+2.40.1
+
diff --git a/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch b/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch

new file mode 100644 (file)

index 0000000..a98e0a1
--- /dev/null
+++ b/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch
@@ -0,0 +1,90 @@
+From aacf2dd084dffdc963d1a0d15608150066363397 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Sep 2023 20:32:25 +0530
+Subject: ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates
+
+From: Sameer Pujar <spujar@nvidia.com>
+
+[ Upstream commit e765886249c533e1bb5cbc3cd741bad677417312 ]
+
+Tegra audio graph card has many DAI links which connects internal
+AHUB modules and external audio codecs. Since these are DPCM links,
+hw_params() call in the machine driver happens for each connected
+BE link and PLLA is updated every time. This is not really needed
+for all links as only I/O link DAIs derive respective clocks from
+PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock
+updates to DAIs over I/O links.
+
+This found to be fixing a DMIC clock discrepancy which is suspected
+to happen because of back to back quick PLLA and PLLA_OUT0 rate
+updates. This was observed on Jetson TX2 platform where DMIC clock
+ended up with unexpected value.
+
+Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sameer Pujar <spujar@nvidia.com>
+Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/tegra/tegra_audio_graph_card.c | 30 ++++++++++++++----------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c
+index 1f2c5018bf5ac..4737e776d3837 100644
+--- a/sound/soc/tegra/tegra_audio_graph_card.c
++++ b/sound/soc/tegra/tegra_audio_graph_card.c
+@@ -10,6 +10,7 @@
+ #include <linux/platform_device.h>
+ #include <sound/graph_card.h>
+ #include <sound/pcm_params.h>
++#include <sound/soc-dai.h>
+ 
+ #define MAX_PLLA_OUT0_DIV 128
+ 
+@@ -44,6 +45,21 @@ struct tegra_audio_cdata {
+       unsigned int plla_out0_rates[NUM_RATE_TYPE];
+ };
+ 
++static bool need_clk_update(struct snd_soc_dai *dai)
++{
++      if (snd_soc_dai_is_dummy(dai) ||
++          !dai->driver->ops ||
++          !dai->driver->name)
++              return false;
++
++      if (strstr(dai->driver->name, "I2S") ||
++          strstr(dai->driver->name, "DMIC") ||
++          strstr(dai->driver->name, "DSPK"))
++              return true;
++
++      return false;
++}
++
+ /* Setup PLL clock as per the given sample rate */
+ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream,
+                                       struct snd_pcm_hw_params *params)
+@@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream,
+       struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+       int err;
+ 
+-      /*
+-       * This gets called for each DAI link (FE or BE) when DPCM is used.
+-       * We may not want to update PLLA rate for each call. So PLLA update
+-       * must be restricted to external I/O links (I2S, DMIC or DSPK) since
+-       * they actually depend on it. I/O modules update their clocks in
+-       * hw_param() of their respective component driver and PLLA rate
+-       * update here helps them to derive appropriate rates.
+-       *
+-       * TODO: When more HW accelerators get added (like sample rate
+-       * converter, volume gain controller etc., which don't really
+-       * depend on PLLA) we need a better way to filter here.
+-       */
+-      if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) {
++      if (need_clk_update(cpu_dai)) {
+               err = tegra_audio_graph_update_pll(substream, params);
+               if (err)
+                       return err;
+-- 
+2.40.1
+
diff --git a/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch b/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch

new file mode 100644 (file)

index 0000000..d7d03bb
--- /dev/null
+++ b/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch
@@ -0,0 +1,152 @@
+From 4d1fc727026b5029e2f304f3e50aedbc215eb4ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 09:06:23 +0900
+Subject: ata: libata-scsi: Fix delayed scsi_rescan_device() execution
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 8b4d9469d0b0e553208ee6f62f2807111fde18b9 ]
+
+Commit 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after
+device resume") modified ata_scsi_dev_rescan() to check the scsi device
+"is_suspended" power field to ensure that the scsi device associated
+with an ATA device is fully resumed when scsi_rescan_device() is
+executed. However, this fix is problematic as:
+1) It relies on a PM internal field that should not be used without PM
+   device locking protection.
+2) The check for is_suspended and the call to scsi_rescan_device() are
+   not atomic and a suspend PM event may be triggered between them,
+   casuing scsi_rescan_device() to be called on a suspended device and
+   in that function blocking while holding the scsi device lock. This
+   would deadlock a following resume operation.
+These problems can trigger PM deadlocks on resume, especially with
+resume operations triggered quickly after or during suspend operations.
+E.g., a simple bash script like:
+
+for (( i=0; i<10; i++ )); do
+       echo "+2 > /sys/class/rtc/rtc0/wakealarm
+       echo mem > /sys/power/state
+done
+
+that triggers a resume 2 seconds after starting suspending a system can
+quickly lead to a PM deadlock preventing the system from correctly
+resuming.
+
+Fix this by replacing the check on is_suspended with a check on the
+return value given by scsi_rescan_device() as that function will fail if
+called against a suspended device. Also make sure rescan tasks already
+scheduled are first cancelled before suspending an ata port.
+
+Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-core.c | 16 ++++++++++++++++
+ drivers/ata/libata-scsi.c | 33 +++++++++++++++------------------
+ 2 files changed, 31 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index 25b9bdf2fc380..6a053cd0cf410 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5022,11 +5022,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET
+ 
+ static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg)
+ {
++      /*
++       * We are about to suspend the port, so we do not care about
++       * scsi_rescan_device() calls scheduled by previous resume operations.
++       * The next resume will schedule the rescan again. So cancel any rescan
++       * that is not done yet.
++       */
++      cancel_delayed_work_sync(&ap->scsi_rescan_task);
++
+       ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false);
+ }
+ 
+ static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg)
+ {
++      /*
++       * We are about to suspend the port, so we do not care about
++       * scsi_rescan_device() calls scheduled by previous resume operations.
++       * The next resume will schedule the rescan again. So cancel any rescan
++       * that is not done yet.
++       */
++      cancel_delayed_work_sync(&ap->scsi_rescan_task);
++
+       ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true);
+ }
+ 
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index b348f77b91231..7b9c9264b9a72 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -4648,7 +4648,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
+       struct ata_link *link;
+       struct ata_device *dev;
+       unsigned long flags;
+-      bool delay_rescan = false;
++      int ret = 0;
+ 
+       mutex_lock(&ap->scsi_scan_mutex);
+       spin_lock_irqsave(ap->lock, flags);
+@@ -4657,37 +4657,34 @@ void ata_scsi_dev_rescan(struct work_struct *work)
+               ata_for_each_dev(dev, link, ENABLED) {
+                       struct scsi_device *sdev = dev->sdev;
+ 
++                      /*
++                       * If the port was suspended before this was scheduled,
++                       * bail out.
++                       */
++                      if (ap->pflags & ATA_PFLAG_SUSPENDED)
++                              goto unlock;
++
+                       if (!sdev)
+                               continue;
+                       if (scsi_device_get(sdev))
+                               continue;
+ 
+-                      /*
+-                       * If the rescan work was scheduled because of a resume
+-                       * event, the port is already fully resumed, but the
+-                       * SCSI device may not yet be fully resumed. In such
+-                       * case, executing scsi_rescan_device() may cause a
+-                       * deadlock with the PM code on device_lock(). Prevent
+-                       * this by giving up and retrying rescan after a short
+-                       * delay.
+-                       */
+-                      delay_rescan = sdev->sdev_gendev.power.is_suspended;
+-                      if (delay_rescan) {
+-                              scsi_device_put(sdev);
+-                              break;
+-                      }
+-
+                       spin_unlock_irqrestore(ap->lock, flags);
+-                      scsi_rescan_device(sdev);
++                      ret = scsi_rescan_device(sdev);
+                       scsi_device_put(sdev);
+                       spin_lock_irqsave(ap->lock, flags);
++
++                      if (ret)
++                              goto unlock;
+               }
+       }
+ 
++unlock:
+       spin_unlock_irqrestore(ap->lock, flags);
+       mutex_unlock(&ap->scsi_scan_mutex);
+ 
+-      if (delay_rescan)
++      /* Reschedule with a delay if scsi_rescan_device() returned an error */
++      if (ret)
+               schedule_delayed_work(&ap->scsi_rescan_task,
+                                     msecs_to_jiffies(5));
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch b/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch

new file mode 100644 (file)

index 0000000..97cf41a
--- /dev/null
+++ b/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch
@@ -0,0 +1,123 @@
+From 3bdffcb01d762ec932de44fbb950d6d3a828d08f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jul 2023 13:23:14 +0900
+Subject: ata,scsi: do not issue START STOP UNIT on resume
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 0a8589055936d8feb56477123a8373ac634018fa ]
+
+During system resume, ata_port_pm_resume() triggers ata EH to
+1) Resume the controller
+2) Reset and rescan the ports
+3) Revalidate devices
+This EH execution is started asynchronously from ata_port_pm_resume(),
+which means that when sd_resume() is executed, none or only part of the
+above processing may have been executed. However, sd_resume() issues a
+START STOP UNIT to wake up the drive from sleep mode. This command is
+translated to ATA with ata_scsi_start_stop_xlat() and issued to the
+device. However, depending on the state of execution of the EH process
+and revalidation triggerred by ata_port_pm_resume(), two things may
+happen:
+1) The START STOP UNIT fails if it is received before the controller has
+   been reenabled at the beginning of the EH execution. This is visible
+   with error messages like:
+
+ata10.00: device reported invalid CHS sector 0
+sd 9:0:0:0: [sdc] Start/Stop Unit failed: Result: hostbyte=DID_OK driverbyte=DRIVER_OK
+sd 9:0:0:0: [sdc] Sense Key : Illegal Request [current]
+sd 9:0:0:0: [sdc] Add. Sense: Unaligned write command
+sd 9:0:0:0: PM: dpm_run_callback(): scsi_bus_resume+0x0/0x90 returns -5
+sd 9:0:0:0: PM: failed to resume async: error -5
+
+2) The START STOP UNIT command is received while the EH process is
+   on-going, which mean that it is stopped and must wait for its
+   completion, at which point the command is rather useless as the drive
+   is already fully spun up already. This case results also in a
+   significant delay in sd_resume() which is observable by users as
+   the entire system resume completion is delayed.
+
+Given that ATA devices will be woken up by libata activity on resume,
+sd_resume() has no need to issue a START STOP UNIT command, which solves
+the above mentioned problems. Do not issue this command by introducing
+the new scsi_device flag no_start_on_resume and setting this flag to 1
+in ata_scsi_dev_config(). sd_resume() is modified to issue a START STOP
+UNIT command only if this flag is not set.
+
+Reported-by: Paul Ausbeck <paula@soe.ucsc.edu>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=215880
+Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Tested-by: Tanner Watkins <dalzot@gmail.com>
+Tested-by: Paul Ausbeck <paula@soe.ucsc.edu>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Stable-dep-of: 99398d2070ab ("scsi: sd: Do not issue commands to suspended disks on shutdown")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-scsi.c  | 7 +++++++
+ drivers/scsi/sd.c          | 9 ++++++---
+ include/scsi/scsi_device.h | 1 +
+ 3 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index d28628b964e29..9c8dd9f86cbb3 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -1081,7 +1081,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
+               }
+       } else {
+               sdev->sector_size = ata_id_logical_sector_size(dev->id);
++              /*
++               * Stop the drive on suspend but do not issue START STOP UNIT
++               * on resume as this is not necessary and may fail: the device
++               * will be woken up by ata_port_pm_resume() with a port reset
++               * and device revalidation.
++               */
+               sdev->manage_start_stop = 1;
++              sdev->no_start_on_resume = 1;
+       }
+ 
+       /*
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index e934779bf05c8..5bfca49415113 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3718,7 +3718,7 @@ static int sd_suspend_runtime(struct device *dev)
+ static int sd_resume(struct device *dev)
+ {
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+-      int ret;
++      int ret = 0;
+ 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+@@ -3726,8 +3726,11 @@ static int sd_resume(struct device *dev)
+       if (!sdkp->device->manage_start_stop)
+               return 0;
+ 
+-      sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+-      ret = sd_start_stop_device(sdkp, 1);
++      if (!sdkp->device->no_start_on_resume) {
++              sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
++              ret = sd_start_stop_device(sdkp, 1);
++      }
++
+       if (!ret)
+               opal_unlock_from_suspend(sdkp->opal_dev);
+       return ret;
+diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+index 006858ed04e8c..9fdc77db3a2a8 100644
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -193,6 +193,7 @@ struct scsi_device {
+       unsigned no_start_on_add:1;     /* do not issue start on add */
+       unsigned allow_restart:1; /* issue START_UNIT in error handler */
+       unsigned manage_start_stop:1;   /* Let HLD (sd) manage start/stop */
++      unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
+       unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
+       unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
+       unsigned select_no_atn:1;
+-- 
+2.40.1
+
diff --git a/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch b/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch

new file mode 100644 (file)

index 0000000..a24297c
--- /dev/null
+++ b/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch
@@ -0,0 +1,738 @@
+From 22c78442fa7b92733ad1e22f57af18e0444b8a19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 12:58:26 -0400
+Subject: btrfs: setup qstr from dentrys using fscrypt helper
+
+From: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+
+[ Upstream commit ab3c5c18e8fa3f8ea116016095d25adab466cd39 ]
+
+Most places where we get a struct qstr, we are doing so from a dentry.
+With fscrypt, the dentry's name may be encrypted on-disk, so fscrypt
+provides a helper to convert a dentry name to the appropriate disk name
+if necessary. Convert each of the dentry name accesses to use
+fscrypt_setup_filename(), then convert the resulting fscrypt_name back
+to an unencrypted qstr. This does not work for nokey names, but the
+specific locations that could spawn nokey names are noted.
+
+At present, since there are no encrypted directories, nothing goes down
+the filename encryption paths.
+
+Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h       |   3 +
+ fs/btrfs/inode.c       | 192 +++++++++++++++++++++++++++++++----------
+ fs/btrfs/transaction.c |  40 ++++++---
+ fs/btrfs/tree-log.c    |  11 ++-
+ 4 files changed, 189 insertions(+), 57 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 6718cee57a94e..5120cea15b096 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -28,6 +28,7 @@
+ #include <linux/refcount.h>
+ #include <linux/crc32c.h>
+ #include <linux/iomap.h>
++#include <linux/fscrypt.h>
+ #include "extent-io-tree.h"
+ #include "extent_io.h"
+ #include "extent_map.h"
+@@ -3396,6 +3397,8 @@ struct btrfs_new_inode_args {
+        */
+       struct posix_acl *default_acl;
+       struct posix_acl *acl;
++      struct fscrypt_name fname;
++      struct qstr name;
+ };
+ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
+                           unsigned int *trans_num_items);
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index a5e61ad2ba696..b5224dbaa4165 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4415,28 +4415,41 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+       struct btrfs_trans_handle *trans;
+       struct inode *inode = d_inode(dentry);
+       int ret;
++      struct fscrypt_name fname;
++      struct qstr name;
++
++      ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
++      if (ret)
++              return ret;
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
++      /* This needs to handle no-key deletions later on */
+ 
+       trans = __unlink_start_trans(dir);
+-      if (IS_ERR(trans))
+-              return PTR_ERR(trans);
++      if (IS_ERR(trans)) {
++              ret = PTR_ERR(trans);
++              goto fscrypt_free;
++      }
+ 
+       btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+                       0);
+ 
+       ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+-                               &dentry->d_name);
++                               &name);
+       if (ret)
+-              goto out;
++              goto end_trans;
+ 
+       if (inode->i_nlink == 0) {
+               ret = btrfs_orphan_add(trans, BTRFS_I(inode));
+               if (ret)
+-                      goto out;
++                      goto end_trans;
+       }
+ 
+-out:
++end_trans:
+       btrfs_end_transaction(trans);
+       btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
++fscrypt_free:
++      fscrypt_free_filename(&fname);
+       return ret;
+ }
+ 
+@@ -4449,11 +4462,19 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+-      const struct qstr *name = &dentry->d_name;
++      struct qstr name;
+       u64 index;
+       int ret;
+       u64 objectid;
+       u64 dir_ino = btrfs_ino(BTRFS_I(dir));
++      struct fscrypt_name fname;
++
++      ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
++      if (ret)
++              return ret;
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
++      /* This needs to handle no-key deletions later on */
+ 
+       if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
+               objectid = inode->root->root_key.objectid;
+@@ -4461,14 +4482,17 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               objectid = inode->location.objectid;
+       } else {
+               WARN_ON(1);
++              fscrypt_free_filename(&fname);
+               return -EINVAL;
+       }
+ 
+       path = btrfs_alloc_path();
+-      if (!path)
+-              return -ENOMEM;
++      if (!path) {
++              ret = -ENOMEM;
++              goto out;
++      }
+ 
+-      di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1);
++      di = btrfs_lookup_dir_item(trans, root, path, dir_ino, &name, -1);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -4494,7 +4518,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+        * call btrfs_del_root_ref, and it _shouldn't_ fail.
+        */
+       if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+-              di = btrfs_search_dir_index_item(root, path, dir_ino, name);
++              di = btrfs_search_dir_index_item(root, path, dir_ino, &name);
+               if (IS_ERR_OR_NULL(di)) {
+                       if (!di)
+                               ret = -ENOENT;
+@@ -4511,7 +4535,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       } else {
+               ret = btrfs_del_root_ref(trans, objectid,
+                                        root->root_key.objectid, dir_ino,
+-                                       &index, name);
++                                       &index, &name);
+               if (ret) {
+                       btrfs_abort_transaction(trans, ret);
+                       goto out;
+@@ -4524,7 +4548,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name->len * 2);
++      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name.len * 2);
+       inode_inc_iversion(dir);
+       dir->i_mtime = current_time(dir);
+       dir->i_ctime = dir->i_mtime;
+@@ -4533,6 +4557,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               btrfs_abort_transaction(trans, ret);
+ out:
+       btrfs_free_path(path);
++      fscrypt_free_filename(&fname);
+       return ret;
+ }
+ 
+@@ -4796,6 +4821,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+       int err = 0;
+       struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
++      struct fscrypt_name fname;
++      struct qstr name;
+ 
+       if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+               return -ENOTEMPTY;
+@@ -4808,9 +4835,18 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+               return btrfs_delete_subvolume(dir, dentry);
+       }
+ 
++      err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
++      if (err)
++              return err;
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
++      /* This needs to handle no-key deletions later on */
++
+       trans = __unlink_start_trans(dir);
+-      if (IS_ERR(trans))
+-              return PTR_ERR(trans);
++      if (IS_ERR(trans)) {
++              err = PTR_ERR(trans);
++              goto out_notrans;
++      }
+ 
+       if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+               err = btrfs_unlink_subvol(trans, dir, dentry);
+@@ -4825,7 +4861,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+ 
+       /* now the directory is empty */
+       err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+-                               &dentry->d_name);
++                               &name);
+       if (!err) {
+               btrfs_i_size_write(BTRFS_I(inode), 0);
+               /*
+@@ -4844,7 +4880,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+       }
+ out:
+       btrfs_end_transaction(trans);
++out_notrans:
+       btrfs_btree_balance_dirty(fs_info);
++      fscrypt_free_filename(&fname);
+ 
+       return err;
+ }
+@@ -5525,18 +5563,27 @@ void btrfs_evict_inode(struct inode *inode)
+ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+                              struct btrfs_key *location, u8 *type)
+ {
+-      const struct qstr *name = &dentry->d_name;
++      struct qstr name;
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+       int ret = 0;
++      struct fscrypt_name fname;
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+ 
++      ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
++      if (ret)
++              goto out;
++
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
++      /* This needs to handle no-key deletions later on */
++
+       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+-                                 name, 0);
++                                 &name, 0);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -5548,12 +5595,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+               ret = -EUCLEAN;
+               btrfs_warn(root->fs_info,
+ "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+-                         __func__, name->name, btrfs_ino(BTRFS_I(dir)),
++                         __func__, name.name, btrfs_ino(BTRFS_I(dir)),
+                          location->objectid, location->type, location->offset);
+       }
+       if (!ret)
+               *type = btrfs_dir_type(path->nodes[0], di);
+ out:
++      fscrypt_free_filename(&fname);
+       btrfs_free_path(path);
+       return ret;
+ }
+@@ -5576,6 +5624,14 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
+       struct btrfs_key key;
+       int ret;
+       int err = 0;
++      struct fscrypt_name fname;
++      struct qstr name;
++
++      ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
++      if (ret)
++              return ret;
++
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+ 
+       path = btrfs_alloc_path();
+       if (!path) {
+@@ -5598,12 +5654,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
+       leaf = path->nodes[0];
+       ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+       if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
+-          btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
++          btrfs_root_ref_name_len(leaf, ref) != name.len)
+               goto out;
+ 
+-      ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
+-                                 (unsigned long)(ref + 1),
+-                                 dentry->d_name.len);
++      ret = memcmp_extent_buffer(leaf, name.name, (unsigned long)(ref + 1),
++                                 name.len);
+       if (ret)
+               goto out;
+ 
+@@ -5622,6 +5677,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
+       err = 0;
+ out:
+       btrfs_free_path(path);
++      fscrypt_free_filename(&fname);
+       return err;
+ }
+ 
+@@ -6230,9 +6286,19 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
+       struct inode *inode = args->inode;
+       int ret;
+ 
++      if (!args->orphan) {
++              ret = fscrypt_setup_filename(dir, &args->dentry->d_name, 0,
++                                           &args->fname);
++              if (ret)
++                      return ret;
++              args->name = (struct qstr)FSTR_TO_QSTR(&args->fname.disk_name);
++      }
++
+       ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl);
+-      if (ret)
++      if (ret) {
++              fscrypt_free_filename(&args->fname);
+               return ret;
++      }
+ 
+       /* 1 to add inode item */
+       *trans_num_items = 1;
+@@ -6272,6 +6338,7 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args)
+ {
+       posix_acl_release(args->acl);
+       posix_acl_release(args->default_acl);
++      fscrypt_free_filename(&args->fname);
+ }
+ 
+ /*
+@@ -6697,6 +6764,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+       struct inode *inode = d_inode(old_dentry);
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
++      struct fscrypt_name fname;
++      struct qstr name;
+       u64 index;
+       int err;
+       int drop_inode = 0;
+@@ -6708,6 +6777,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       if (inode->i_nlink >= BTRFS_LINK_MAX)
+               return -EMLINK;
+ 
++      err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
++      if (err)
++              goto fail;
++
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
+       err = btrfs_set_inode_index(BTRFS_I(dir), &index);
+       if (err)
+               goto fail;
+@@ -6734,7 +6809,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
+ 
+       err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+-                           &dentry->d_name, 1, index);
++                           &name, 1, index);
+ 
+       if (err) {
+               drop_inode = 1;
+@@ -6758,6 +6833,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       }
+ 
+ fail:
++      fscrypt_free_filename(&fname);
+       if (trans)
+               btrfs_end_transaction(trans);
+       if (drop_inode) {
+@@ -9030,6 +9106,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       int ret;
+       int ret2;
+       bool need_abort = false;
++      struct fscrypt_name old_fname, new_fname;
++      struct qstr old_name, new_name;
+ 
+       /*
+        * For non-subvolumes allow exchange only within one subvolume, in the
+@@ -9041,6 +9119,19 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+            new_ino != BTRFS_FIRST_FREE_OBJECTID))
+               return -EXDEV;
+ 
++      ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname);
++      if (ret)
++              return ret;
++
++      ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname);
++      if (ret) {
++              fscrypt_free_filename(&old_fname);
++              return ret;
++      }
++
++      old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name);
++      new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name);
++
+       /* close the race window with snapshot create/destroy ioctl */
+       if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+           new_ino == BTRFS_FIRST_FREE_OBJECTID)
+@@ -9108,8 +9199,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name,
+-                                           old_ino,
++              ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino,
+                                            btrfs_ino(BTRFS_I(new_dir)),
+                                            old_idx);
+               if (ret)
+@@ -9122,8 +9212,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, root, &old_dentry->d_name,
+-                                           new_ino,
++              ret = btrfs_insert_inode_ref(trans, root, &old_name, new_ino,
+                                            btrfs_ino(BTRFS_I(old_dir)),
+                                            new_idx);
+               if (ret) {
+@@ -9158,8 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* src is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+                                          BTRFS_I(old_dentry->d_inode),
+-                                         &old_dentry->d_name,
+-                                         &old_rename_ctx);
++                                         &old_name, &old_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+       }
+@@ -9174,8 +9262,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* dest is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                          BTRFS_I(new_dentry->d_inode),
+-                                         &new_dentry->d_name,
+-                                         &new_rename_ctx);
++                                         &new_name, &new_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
+       }
+@@ -9185,14 +9272,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           &new_dentry->d_name, 0, old_idx);
++                           &new_name, 0, old_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
+-                           &old_dentry->d_name, 0, new_idx);
++                           &old_name, 0, new_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+@@ -9235,6 +9322,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+           old_ino == BTRFS_FIRST_FREE_OBJECTID)
+               up_read(&fs_info->subvol_sem);
+ 
++      fscrypt_free_filename(&new_fname);
++      fscrypt_free_filename(&old_fname);
+       return ret;
+ }
+ 
+@@ -9274,6 +9363,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       int ret;
+       int ret2;
+       u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
++      struct fscrypt_name old_fname, new_fname;
++      struct qstr old_name, new_name;
+ 
+       if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               return -EPERM;
+@@ -9290,21 +9381,32 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+           new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+               return -ENOTEMPTY;
+ 
++      ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname);
++      if (ret)
++              return ret;
++
++      ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname);
++      if (ret) {
++              fscrypt_free_filename(&old_fname);
++              return ret;
++      }
++
++      old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name);
++      new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name);
+ 
+       /* check for collisions, even if the  name isn't there */
+-      ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
+-                                           &new_dentry->d_name);
++      ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_name);
+ 
+       if (ret) {
+               if (ret == -EEXIST) {
+                       /* we shouldn't get
+                        * eexist without a new_inode */
+                       if (WARN_ON(!new_inode)) {
+-                              return ret;
++                              goto out_fscrypt_names;
+                       }
+               } else {
+                       /* maybe -EOVERFLOW */
+-                      return ret;
++                      goto out_fscrypt_names;
+               }
+       }
+       ret = 0;
+@@ -9387,8 +9489,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name,
+-                                           old_ino,
++              ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino,
+                                            btrfs_ino(BTRFS_I(new_dir)), index);
+               if (ret)
+                       goto out_fail;
+@@ -9412,7 +9513,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       } else {
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+                                          BTRFS_I(d_inode(old_dentry)),
+-                                         &old_dentry->d_name, &rename_ctx);
++                                         &old_name, &rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+       }
+@@ -9431,7 +9532,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               } else {
+                       ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                                BTRFS_I(d_inode(new_dentry)),
+-                                               &new_dentry->d_name);
++                                               &new_name);
+               }
+               if (!ret && new_inode->i_nlink == 0)
+                       ret = btrfs_orphan_add(trans,
+@@ -9443,7 +9544,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           &new_dentry->d_name, 0, index);
++                           &new_name, 0, index);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+@@ -9478,6 +9579,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+ out_whiteout_inode:
+       if (flags & RENAME_WHITEOUT)
+               iput(whiteout_args.inode);
++out_fscrypt_names:
++      fscrypt_free_filename(&old_fname);
++      fscrypt_free_filename(&new_fname);
+       return ret;
+ }
+ 
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index b0fe054c9f401..c8918bdf15ccd 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -6,6 +6,7 @@
+ #include <linux/fs.h>
+ #include <linux/slab.h>
+ #include <linux/sched.h>
++#include <linux/sched/mm.h>
+ #include <linux/writeback.h>
+ #include <linux/pagemap.h>
+ #include <linux/blkdev.h>
+@@ -1627,10 +1628,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       struct btrfs_root *root = pending->root;
+       struct btrfs_root *parent_root;
+       struct btrfs_block_rsv *rsv;
+-      struct inode *parent_inode;
++      struct inode *parent_inode = pending->dir;
+       struct btrfs_path *path;
+       struct btrfs_dir_item *dir_item;
+-      struct dentry *dentry;
+       struct extent_buffer *tmp;
+       struct extent_buffer *old;
+       struct timespec64 cur_time;
+@@ -1639,6 +1639,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       u64 index = 0;
+       u64 objectid;
+       u64 root_flags;
++      unsigned int nofs_flags;
++      struct fscrypt_name fname;
++      struct qstr name;
+ 
+       ASSERT(pending->path);
+       path = pending->path;
+@@ -1646,9 +1649,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       ASSERT(pending->root_item);
+       new_root_item = pending->root_item;
+ 
++      /*
++       * We're inside a transaction and must make sure that any potential
++       * allocations with GFP_KERNEL in fscrypt won't recurse back to
++       * filesystem.
++       */
++      nofs_flags = memalloc_nofs_save();
++      pending->error = fscrypt_setup_filename(parent_inode,
++                                              &pending->dentry->d_name, 0,
++                                              &fname);
++      memalloc_nofs_restore(nofs_flags);
++      if (pending->error)
++              goto free_pending;
++      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
++
+       pending->error = btrfs_get_free_objectid(tree_root, &objectid);
+       if (pending->error)
+-              goto no_free_objectid;
++              goto free_fname;
+ 
+       /*
+        * Make qgroup to skip current new snapshot's qgroupid, as it is
+@@ -1677,8 +1694,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       trace_btrfs_space_reservation(fs_info, "transaction",
+                                     trans->transid,
+                                     trans->bytes_reserved, 1);
+-      dentry = pending->dentry;
+-      parent_inode = pending->dir;
+       parent_root = BTRFS_I(parent_inode)->root;
+       ret = record_root_in_trans(trans, parent_root, 0);
+       if (ret)
+@@ -1694,7 +1709,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       /* check if there is a file/dir which has the same name. */
+       dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
+                                        btrfs_ino(BTRFS_I(parent_inode)),
+-                                       &dentry->d_name, 0);
++                                       &name, 0);
+       if (dir_item != NULL && !IS_ERR(dir_item)) {
+               pending->error = -EEXIST;
+               goto dir_item_existed;
+@@ -1789,7 +1804,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       ret = btrfs_add_root_ref(trans, objectid,
+                                parent_root->root_key.objectid,
+                                btrfs_ino(BTRFS_I(parent_inode)), index,
+-                               &dentry->d_name);
++                               &name);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+@@ -1821,9 +1836,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       if (ret < 0)
+               goto fail;
+ 
+-      ret = btrfs_insert_dir_item(trans, &dentry->d_name,
+-                                  BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
+-                                  index);
++      ret = btrfs_insert_dir_item(trans, &name, BTRFS_I(parent_inode), &key,
++                                  BTRFS_FT_DIR, index);
+       /* We have check then name at the beginning, so it is impossible. */
+       BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
+       if (ret) {
+@@ -1832,7 +1846,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       }
+ 
+       btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
+-                                       dentry->d_name.len * 2);
++                                                name.len * 2);
+       parent_inode->i_mtime = current_time(parent_inode);
+       parent_inode->i_ctime = parent_inode->i_mtime;
+       ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
+@@ -1864,7 +1878,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       trans->bytes_reserved = 0;
+ clear_skip_qgroup:
+       btrfs_clear_skip_qgroup(trans);
+-no_free_objectid:
++free_fname:
++      fscrypt_free_filename(&fname);
++free_pending:
+       kfree(new_root_item);
+       pending->root_item = NULL;
+       btrfs_free_path(path);
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 9f55e81acc0ef..25fd3f34b8f21 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7471,9 +7471,16 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+       if (old_dir && old_dir->logged_trans == trans->transid) {
+               struct btrfs_root *log = old_dir->root->log_root;
+               struct btrfs_path *path;
++              struct fscrypt_name fname;
++              struct qstr name;
+ 
+               ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+ 
++              ret = fscrypt_setup_filename(&old_dir->vfs_inode,
++                                           &old_dentry->d_name, 0, &fname);
++              if (ret)
++                      goto out;
++              name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+               /*
+                * We have two inodes to update in the log, the old directory and
+                * the inode that got renamed, so we must pin the log to prevent
+@@ -7493,6 +7500,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+               path = btrfs_alloc_path();
+               if (!path) {
+                       ret = -ENOMEM;
++                      fscrypt_free_filename(&fname);
+                       goto out;
+               }
+ 
+@@ -7508,7 +7516,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                */
+               mutex_lock(&old_dir->log_mutex);
+               ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
+-                                      &old_dentry->d_name, old_dir_index);
++                                      &name, old_dir_index);
+               if (ret > 0) {
+                       /*
+                        * The dentry does not exist in the log, so record its
+@@ -7522,6 +7530,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+               mutex_unlock(&old_dir->log_mutex);
+ 
+               btrfs_free_path(path);
++              fscrypt_free_filename(&fname);
+               if (ret < 0)
+                       goto out;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch b/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch

new file mode 100644 (file)

index 0000000..a05652f
--- /dev/null
+++ b/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch
@@ -0,0 +1,990 @@
+From a3bedc8e6ad79645b5ddb23c0d404f4e8ea95003 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 12:58:27 -0400
+Subject: btrfs: use struct fscrypt_str instead of struct qstr
+
+From: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+
+[ Upstream commit 6db75318823a169e836a478ca57d6a7c0a156b77 ]
+
+While struct qstr is more natural without fscrypt, since it's provided
+by dentries, struct fscrypt_str is provided by the fscrypt handlers
+processing dentries, and is thus more natural in the fscrypt world.
+Replace all of the struct qstr uses with struct fscrypt_str.
+
+Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h       | 19 +++++----
+ fs/btrfs/dir-item.c    | 10 ++---
+ fs/btrfs/inode-item.c  | 14 +++----
+ fs/btrfs/inode-item.h  | 10 ++---
+ fs/btrfs/inode.c       | 87 +++++++++++++++++-------------------------
+ fs/btrfs/ioctl.c       |  4 +-
+ fs/btrfs/root-tree.c   |  4 +-
+ fs/btrfs/send.c        |  4 +-
+ fs/btrfs/super.c       |  2 +-
+ fs/btrfs/transaction.c | 13 +++----
+ fs/btrfs/tree-log.c    | 42 ++++++++++----------
+ fs/btrfs/tree-log.h    |  4 +-
+ 12 files changed, 95 insertions(+), 118 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 5120cea15b096..27d06bb5e5c05 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3240,10 +3240,10 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
+ /* root-item.c */
+ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+                      u64 ref_id, u64 dirid, u64 sequence,
+-                     const struct qstr *name);
++                     const struct fscrypt_str *name);
+ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+                      u64 ref_id, u64 dirid, u64 *sequence,
+-                     const struct qstr *name);
++                     const struct fscrypt_str *name);
+ int btrfs_del_root(struct btrfs_trans_handle *trans,
+                  const struct btrfs_key *key);
+ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+@@ -3272,23 +3272,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
+ 
+ /* dir-item.c */
+ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+-                        const struct qstr *name);
++                        const struct fscrypt_str *name);
+ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
+-                        const struct qstr *name, struct btrfs_inode *dir,
++                        const struct fscrypt_str *name, struct btrfs_inode *dir,
+                         struct btrfs_key *location, u8 type, u64 index);
+ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+-                                           const struct qstr *name, int mod);
++                                           const struct fscrypt_str *name, int mod);
+ struct btrfs_dir_item *
+ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+-                          u64 index, const struct qstr *name, int mod);
++                          u64 index, const struct fscrypt_str *name, int mod);
+ struct btrfs_dir_item *
+ btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+-                          const struct qstr *name);
++                          const struct fscrypt_str *name);
+ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_path *path,
+@@ -3369,10 +3369,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
+ int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
+-                     const struct qstr *name);
++                     const struct fscrypt_str *name);
+ int btrfs_add_link(struct btrfs_trans_handle *trans,
+                  struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
+-                 const struct qstr *name, int add_backref, u64 index);
++                 const struct fscrypt_str *name, int add_backref, u64 index);
+ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
+ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
+                        int front);
+@@ -3398,7 +3398,6 @@ struct btrfs_new_inode_args {
+       struct posix_acl *default_acl;
+       struct posix_acl *acl;
+       struct fscrypt_name fname;
+-      struct qstr name;
+ };
+ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
+                           unsigned int *trans_num_items);
+diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
+index 8c60f37eb13fd..fdab48c1abb8a 100644
+--- a/fs/btrfs/dir-item.c
++++ b/fs/btrfs/dir-item.c
+@@ -104,7 +104,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+  * Will return 0 or -ENOMEM
+  */
+ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
+-                        const struct qstr *name, struct btrfs_inode *dir,
++                        const struct fscrypt_str *name, struct btrfs_inode *dir,
+                         struct btrfs_key *location, u8 type, u64 index)
+ {
+       int ret = 0;
+@@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir(
+ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+-                                           const struct qstr *name,
++                                           const struct fscrypt_str *name,
+                                            int mod)
+ {
+       struct btrfs_key key;
+@@ -225,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ }
+ 
+ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+-                                 const struct qstr *name)
++                                 const struct fscrypt_str *name)
+ {
+       int ret;
+       struct btrfs_key key;
+@@ -302,7 +302,7 @@ struct btrfs_dir_item *
+ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+-                          u64 index, const struct qstr *name, int mod)
++                          u64 index, const struct fscrypt_str *name, int mod)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+@@ -321,7 +321,7 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+ 
+ struct btrfs_dir_item *
+ btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
+-                          u64 dirid, const struct qstr *name)
++                          u64 dirid, const struct fscrypt_str *name)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
+index 61b323517a40b..5add022d3534f 100644
+--- a/fs/btrfs/inode-item.c
++++ b/fs/btrfs/inode-item.c
+@@ -11,7 +11,7 @@
+ 
+ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+                                                  int slot,
+-                                                 const struct qstr *name)
++                                                 const struct fscrypt_str *name)
+ {
+       struct btrfs_inode_ref *ref;
+       unsigned long ptr;
+@@ -38,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+ 
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+               struct extent_buffer *leaf, int slot, u64 ref_objectid,
+-              const struct qstr *name)
++              const struct fscrypt_str *name)
+ {
+       struct btrfs_inode_extref *extref;
+       unsigned long ptr;
+@@ -77,7 +77,7 @@ struct btrfs_inode_extref *
+ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+-                        const struct qstr *name,
++                        const struct fscrypt_str *name,
+                         u64 inode_objectid, u64 ref_objectid, int ins_len,
+                         int cow)
+ {
+@@ -100,7 +100,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+ 
+ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const struct qstr *name,
++                                const struct fscrypt_str *name,
+                                 u64 inode_objectid, u64 ref_objectid,
+                                 u64 *index)
+ {
+@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+ }
+ 
+ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+-                      struct btrfs_root *root, const struct qstr *name,
++                      struct btrfs_root *root, const struct fscrypt_str *name,
+                       u64 inode_objectid, u64 ref_objectid, u64 *index)
+ {
+       struct btrfs_path *path;
+@@ -247,7 +247,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+  */
+ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+                                    struct btrfs_root *root,
+-                                   const struct qstr *name,
++                                   const struct fscrypt_str *name,
+                                    u64 inode_objectid, u64 ref_objectid,
+                                    u64 index)
+ {
+@@ -302,7 +302,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ 
+ /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
+ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+-                         struct btrfs_root *root, const struct qstr *name,
++                         struct btrfs_root *root, const struct fscrypt_str *name,
+                          u64 inode_objectid, u64 ref_objectid, u64 index)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
+index 3c657c670cfdf..b80aeb7157010 100644
+--- a/fs/btrfs/inode-item.h
++++ b/fs/btrfs/inode-item.h
+@@ -64,10 +64,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_truncate_control *control);
+ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+-                         struct btrfs_root *root, const struct qstr *name,
++                         struct btrfs_root *root, const struct fscrypt_str *name,
+                          u64 inode_objectid, u64 ref_objectid, u64 index);
+ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+-                      struct btrfs_root *root, const struct qstr *name,
++                      struct btrfs_root *root, const struct fscrypt_str *name,
+                       u64 inode_objectid, u64 ref_objectid, u64 *index);
+ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+@@ -80,15 +80,15 @@ struct btrfs_inode_extref *btrfs_lookup_inode_extref(
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+-                        const struct qstr *name,
++                        const struct fscrypt_str *name,
+                         u64 inode_objectid, u64 ref_objectid, int ins_len,
+                         int cow);
+ 
+ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+                                                  int slot,
+-                                                 const struct qstr *name);
++                                                 const struct fscrypt_str *name);
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+               struct extent_buffer *leaf, int slot, u64 ref_objectid,
+-              const struct qstr *name);
++              const struct fscrypt_str *name);
+ 
+ #endif
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index b5224dbaa4165..47c5be597368b 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_inode *dir,
+                               struct btrfs_inode *inode,
+-                              const struct qstr *name,
++                              const struct fscrypt_str *name,
+                               struct btrfs_rename_ctx *rename_ctx)
+ {
+       struct btrfs_root *root = dir->root;
+@@ -4375,7 +4375,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+ 
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
+-                     const struct qstr *name)
++                     const struct fscrypt_str *name)
+ {
+       int ret;
+ 
+@@ -4416,12 +4416,10 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+       struct inode *inode = d_inode(dentry);
+       int ret;
+       struct fscrypt_name fname;
+-      struct qstr name;
+ 
+       ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+       if (ret)
+               return ret;
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+ 
+       /* This needs to handle no-key deletions later on */
+ 
+@@ -4435,7 +4433,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+                       0);
+ 
+       ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+-                               &name);
++                               &fname.disk_name);
+       if (ret)
+               goto end_trans;
+ 
+@@ -4462,7 +4460,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+-      struct qstr name;
+       u64 index;
+       int ret;
+       u64 objectid;
+@@ -4472,7 +4469,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+       if (ret)
+               return ret;
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+ 
+       /* This needs to handle no-key deletions later on */
+ 
+@@ -4492,7 +4488,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      di = btrfs_lookup_dir_item(trans, root, path, dir_ino, &name, -1);
++      di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
++                                 &fname.disk_name, -1);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -4518,7 +4515,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+        * call btrfs_del_root_ref, and it _shouldn't_ fail.
+        */
+       if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+-              di = btrfs_search_dir_index_item(root, path, dir_ino, &name);
++              di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name);
+               if (IS_ERR_OR_NULL(di)) {
+                       if (!di)
+                               ret = -ENOENT;
+@@ -4535,7 +4532,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       } else {
+               ret = btrfs_del_root_ref(trans, objectid,
+                                        root->root_key.objectid, dir_ino,
+-                                       &index, &name);
++                                       &index, &fname.disk_name);
+               if (ret) {
+                       btrfs_abort_transaction(trans, ret);
+                       goto out;
+@@ -4548,7 +4545,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name.len * 2);
++      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - fname.disk_name.len * 2);
+       inode_inc_iversion(dir);
+       dir->i_mtime = current_time(dir);
+       dir->i_ctime = dir->i_mtime;
+@@ -4571,7 +4568,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
+       struct btrfs_path *path;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+-      struct qstr name = QSTR_INIT("default", 7);
++      struct fscrypt_str name = FSTR_INIT("default", 7);
+       u64 dir_id;
+       int ret;
+ 
+@@ -4822,7 +4819,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+       struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
+       struct fscrypt_name fname;
+-      struct qstr name;
+ 
+       if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+               return -ENOTEMPTY;
+@@ -4838,7 +4834,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+       err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+       if (err)
+               return err;
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+ 
+       /* This needs to handle no-key deletions later on */
+ 
+@@ -4861,7 +4856,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+ 
+       /* now the directory is empty */
+       err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+-                               &name);
++                               &fname.disk_name);
+       if (!err) {
+               btrfs_i_size_write(BTRFS_I(inode), 0);
+               /*
+@@ -5563,7 +5558,6 @@ void btrfs_evict_inode(struct inode *inode)
+ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+                              struct btrfs_key *location, u8 *type)
+ {
+-      struct qstr name;
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+@@ -5578,12 +5572,10 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+       if (ret)
+               goto out;
+ 
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+-
+       /* This needs to handle no-key deletions later on */
+ 
+       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+-                                 &name, 0);
++                                 &fname.disk_name, 0);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -5595,7 +5587,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+               ret = -EUCLEAN;
+               btrfs_warn(root->fs_info,
+ "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+-                         __func__, name.name, btrfs_ino(BTRFS_I(dir)),
++                         __func__, fname.disk_name.name, btrfs_ino(BTRFS_I(dir)),
+                          location->objectid, location->type, location->offset);
+       }
+       if (!ret)
+@@ -5625,14 +5617,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
+       int ret;
+       int err = 0;
+       struct fscrypt_name fname;
+-      struct qstr name;
+ 
+       ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
+       if (ret)
+               return ret;
+ 
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+-
+       path = btrfs_alloc_path();
+       if (!path) {
+               err = -ENOMEM;
+@@ -5654,11 +5643,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
+       leaf = path->nodes[0];
+       ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+       if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
+-          btrfs_root_ref_name_len(leaf, ref) != name.len)
++          btrfs_root_ref_name_len(leaf, ref) != fname.disk_name.len)
+               goto out;
+ 
+-      ret = memcmp_extent_buffer(leaf, name.name, (unsigned long)(ref + 1),
+-                                 name.len);
++      ret = memcmp_extent_buffer(leaf, fname.disk_name.name,
++                                 (unsigned long)(ref + 1), fname.disk_name.len);
+       if (ret)
+               goto out;
+ 
+@@ -6291,7 +6280,6 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
+                                            &args->fname);
+               if (ret)
+                       return ret;
+-              args->name = (struct qstr)FSTR_TO_QSTR(&args->fname.disk_name);
+       }
+ 
+       ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl);
+@@ -6374,7 +6362,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+ {
+       struct inode *dir = args->dir;
+       struct inode *inode = args->inode;
+-      const struct qstr *name = args->orphan ? NULL : &args->dentry->d_name;
++      const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name;
+       struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+       struct btrfs_root *root;
+       struct btrfs_inode_item *inode_item;
+@@ -6609,7 +6597,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+  */
+ int btrfs_add_link(struct btrfs_trans_handle *trans,
+                  struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
+-                 const struct qstr *name, int add_backref, u64 index)
++                 const struct fscrypt_str *name, int add_backref, u64 index)
+ {
+       int ret = 0;
+       struct btrfs_key key;
+@@ -6765,7 +6753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       struct inode *inode = d_inode(old_dentry);
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct fscrypt_name fname;
+-      struct qstr name;
+       u64 index;
+       int err;
+       int drop_inode = 0;
+@@ -6781,8 +6768,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       if (err)
+               goto fail;
+ 
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+-
+       err = btrfs_set_inode_index(BTRFS_I(dir), &index);
+       if (err)
+               goto fail;
+@@ -6809,7 +6794,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
+ 
+       err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+-                           &name, 1, index);
++                           &fname.disk_name, 1, index);
+ 
+       if (err) {
+               drop_inode = 1;
+@@ -9107,7 +9092,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       int ret2;
+       bool need_abort = false;
+       struct fscrypt_name old_fname, new_fname;
+-      struct qstr old_name, new_name;
++      struct fscrypt_str *old_name, *new_name;
+ 
+       /*
+        * For non-subvolumes allow exchange only within one subvolume, in the
+@@ -9129,8 +9114,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               return ret;
+       }
+ 
+-      old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name);
+-      new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name);
++      old_name = &old_fname.disk_name;
++      new_name = &new_fname.disk_name;
+ 
+       /* close the race window with snapshot create/destroy ioctl */
+       if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+@@ -9199,7 +9184,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino,
++              ret = btrfs_insert_inode_ref(trans, dest, new_name, old_ino,
+                                            btrfs_ino(BTRFS_I(new_dir)),
+                                            old_idx);
+               if (ret)
+@@ -9212,7 +9197,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, root, &old_name, new_ino,
++              ret = btrfs_insert_inode_ref(trans, root, old_name, new_ino,
+                                            btrfs_ino(BTRFS_I(old_dir)),
+                                            new_idx);
+               if (ret) {
+@@ -9247,7 +9232,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* src is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+                                          BTRFS_I(old_dentry->d_inode),
+-                                         &old_name, &old_rename_ctx);
++                                         old_name, &old_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+       }
+@@ -9262,7 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* dest is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                          BTRFS_I(new_dentry->d_inode),
+-                                         &new_name, &new_rename_ctx);
++                                         new_name, &new_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
+       }
+@@ -9272,14 +9257,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           &new_name, 0, old_idx);
++                           new_name, 0, old_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
+-                           &old_name, 0, new_idx);
++                           old_name, 0, new_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+@@ -9364,7 +9349,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       int ret2;
+       u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+       struct fscrypt_name old_fname, new_fname;
+-      struct qstr old_name, new_name;
+ 
+       if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               return -EPERM;
+@@ -9391,12 +9375,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               return ret;
+       }
+ 
+-      old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name);
+-      new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name);
+-
+       /* check for collisions, even if the  name isn't there */
+-      ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_name);
+-
++      ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_fname.disk_name);
+       if (ret) {
+               if (ret == -EEXIST) {
+                       /* we shouldn't get
+@@ -9489,8 +9469,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino,
+-                                           btrfs_ino(BTRFS_I(new_dir)), index);
++              ret = btrfs_insert_inode_ref(trans, dest, &new_fname.disk_name,
++                                           old_ino, btrfs_ino(BTRFS_I(new_dir)),
++                                           index);
+               if (ret)
+                       goto out_fail;
+       }
+@@ -9513,7 +9494,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       } else {
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+                                          BTRFS_I(d_inode(old_dentry)),
+-                                         &old_name, &rename_ctx);
++                                         &old_fname.disk_name, &rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+       }
+@@ -9532,7 +9513,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               } else {
+                       ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                                BTRFS_I(d_inode(new_dentry)),
+-                                               &new_name);
++                                               &new_fname.disk_name);
+               }
+               if (!ret && new_inode->i_nlink == 0)
+                       ret = btrfs_orphan_add(trans,
+@@ -9544,7 +9525,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           &new_name, 0, index);
++                           &new_fname.disk_name, 0, index);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 5305d98905cea..9e323420c96d3 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -951,7 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
+       struct inode *dir = d_inode(parent->dentry);
+       struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+       struct dentry *dentry;
+-      struct qstr name_str = QSTR_INIT(name, namelen);
++      struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
+       int error;
+ 
+       error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
+@@ -3782,7 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
+       struct btrfs_trans_handle *trans;
+       struct btrfs_path *path = NULL;
+       struct btrfs_disk_key disk_key;
+-      struct qstr name = QSTR_INIT("default", 7);
++      struct fscrypt_str name = FSTR_INIT("default", 7);
+       u64 objectid = 0;
+       u64 dir_id;
+       int ret;
+diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
+index cf29241b9b310..7d783f0943068 100644
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -328,7 +328,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
+ 
+ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+                      u64 ref_id, u64 dirid, u64 *sequence,
+-                     const struct qstr *name)
++                     const struct fscrypt_str *name)
+ {
+       struct btrfs_root *tree_root = trans->fs_info->tree_root;
+       struct btrfs_path *path;
+@@ -400,7 +400,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+  */
+ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+                      u64 ref_id, u64 dirid, u64 sequence,
+-                     const struct qstr *name)
++                     const struct fscrypt_str *name)
+ {
+       struct btrfs_root *tree_root = trans->fs_info->tree_root;
+       struct btrfs_key key;
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index 833364527554c..547b5c2292186 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -1596,7 +1596,7 @@ static int gen_unique_name(struct send_ctx *sctx,
+               return -ENOMEM;
+ 
+       while (1) {
+-              struct qstr tmp_name;
++              struct fscrypt_str tmp_name;
+ 
+               len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
+                               ino, gen, idx);
+@@ -1756,7 +1756,7 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       struct btrfs_path *path;
+-      struct qstr name_str = QSTR_INIT(name, name_len);
++      struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
+ 
+       path = alloc_path_for_send();
+       if (!path)
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index bf56e4d6b9f48..2c562febd801e 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -1398,7 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_key location;
+-      struct qstr name = QSTR_INIT("default", 7);
++      struct fscrypt_str name = FSTR_INIT("default", 7);
+       u64 dir_id;
+ 
+       path = btrfs_alloc_path();
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index c8918bdf15ccd..1193214ba8c10 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1641,7 +1641,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       u64 root_flags;
+       unsigned int nofs_flags;
+       struct fscrypt_name fname;
+-      struct qstr name;
+ 
+       ASSERT(pending->path);
+       path = pending->path;
+@@ -1661,7 +1660,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       memalloc_nofs_restore(nofs_flags);
+       if (pending->error)
+               goto free_pending;
+-      name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+ 
+       pending->error = btrfs_get_free_objectid(tree_root, &objectid);
+       if (pending->error)
+@@ -1709,7 +1707,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       /* check if there is a file/dir which has the same name. */
+       dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
+                                        btrfs_ino(BTRFS_I(parent_inode)),
+-                                       &name, 0);
++                                       &fname.disk_name, 0);
+       if (dir_item != NULL && !IS_ERR(dir_item)) {
+               pending->error = -EEXIST;
+               goto dir_item_existed;
+@@ -1804,7 +1802,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       ret = btrfs_add_root_ref(trans, objectid,
+                                parent_root->root_key.objectid,
+                                btrfs_ino(BTRFS_I(parent_inode)), index,
+-                               &name);
++                               &fname.disk_name);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+@@ -1836,8 +1834,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       if (ret < 0)
+               goto fail;
+ 
+-      ret = btrfs_insert_dir_item(trans, &name, BTRFS_I(parent_inode), &key,
+-                                  BTRFS_FT_DIR, index);
++      ret = btrfs_insert_dir_item(trans, &fname.disk_name,
++                                  BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
++                                  index);
+       /* We have check then name at the beginning, so it is impossible. */
+       BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
+       if (ret) {
+@@ -1846,7 +1845,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       }
+ 
+       btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
+-                                                name.len * 2);
++                                                fname.disk_name.len * 2);
+       parent_inode->i_mtime = current_time(parent_inode);
+       parent_inode->i_ctime = parent_inode->i_mtime;
+       ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 25fd3f34b8f21..ab7893debf07a 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -596,7 +596,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
+ }
+ 
+ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
+-                             struct qstr *name)
++                             struct fscrypt_str *name)
+ {
+       char *buf;
+ 
+@@ -916,7 +916,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
+ static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
+                                      struct btrfs_inode *dir,
+                                      struct btrfs_inode *inode,
+-                                     const struct qstr *name)
++                                     const struct fscrypt_str *name)
+ {
+       int ret;
+ 
+@@ -947,7 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ {
+       struct btrfs_root *root = dir->root;
+       struct inode *inode;
+-      struct qstr name;
++      struct fscrypt_str name;
+       struct extent_buffer *leaf;
+       struct btrfs_key location;
+       int ret;
+@@ -988,7 +988,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ static noinline int inode_in_dir(struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                u64 dirid, u64 objectid, u64 index,
+-                               struct qstr *name)
++                               struct fscrypt_str *name)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key location;
+@@ -1035,7 +1035,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
+ static noinline int backref_in_log(struct btrfs_root *log,
+                                  struct btrfs_key *key,
+                                  u64 ref_objectid,
+-                                 const struct qstr *name)
++                                 const struct fscrypt_str *name)
+ {
+       struct btrfs_path *path;
+       int ret;
+@@ -1071,7 +1071,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+                                 struct btrfs_inode *dir,
+                                 struct btrfs_inode *inode,
+                                 u64 inode_objectid, u64 parent_objectid,
+-                                u64 ref_index, struct qstr *name)
++                                u64 ref_index, struct fscrypt_str *name)
+ {
+       int ret;
+       struct extent_buffer *leaf;
+@@ -1105,7 +1105,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+               ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
+               while (ptr < ptr_end) {
+-                      struct qstr victim_name;
++                      struct fscrypt_str victim_name;
+ 
+                       victim_ref = (struct btrfs_inode_ref *)ptr;
+                       ret = read_alloc_one_name(leaf, (victim_ref + 1),
+@@ -1155,7 +1155,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+               base = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ 
+               while (cur_offset < item_size) {
+-                      struct qstr victim_name;
++                      struct fscrypt_str victim_name;
+ 
+                       extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ 
+@@ -1230,7 +1230,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+ }
+ 
+ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+-                           struct qstr *name, u64 *index,
++                           struct fscrypt_str *name, u64 *index,
+                            u64 *parent_objectid)
+ {
+       struct btrfs_inode_extref *extref;
+@@ -1252,7 +1252,7 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ }
+ 
+ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+-                        struct qstr *name, u64 *index)
++                        struct fscrypt_str *name, u64 *index)
+ {
+       struct btrfs_inode_ref *ref;
+       int ret;
+@@ -1304,7 +1304,7 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+       ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+       ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]);
+       while (ref_ptr < ref_end) {
+-              struct qstr name;
++              struct fscrypt_str name;
+               u64 parent_id;
+ 
+               if (key->type == BTRFS_INODE_EXTREF_KEY) {
+@@ -1372,7 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+       struct inode *inode = NULL;
+       unsigned long ref_ptr;
+       unsigned long ref_end;
+-      struct qstr name;
++      struct fscrypt_str name;
+       int ret;
+       int log_ref_ver = 0;
+       u64 parent_objectid;
+@@ -1766,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
+ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   u64 dirid, u64 index,
+-                                  const struct qstr *name,
++                                  const struct fscrypt_str *name,
+                                   struct btrfs_key *location)
+ {
+       struct inode *inode;
+@@ -1844,7 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+                                   struct btrfs_dir_item *di,
+                                   struct btrfs_key *key)
+ {
+-      struct qstr name;
++      struct fscrypt_str name;
+       struct btrfs_dir_item *dir_dst_di;
+       struct btrfs_dir_item *index_dst_di;
+       bool dir_dst_matches = false;
+@@ -2124,7 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_dir_item *di;
+-      struct qstr name;
++      struct fscrypt_str name;
+       struct inode *inode = NULL;
+       struct btrfs_key location;
+ 
+@@ -3417,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *log,
+                            struct btrfs_path *path,
+                            u64 dir_ino,
+-                           const struct qstr *name,
++                           const struct fscrypt_str *name,
+                            u64 index)
+ {
+       struct btrfs_dir_item *di;
+@@ -3464,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
+  */
+ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const struct qstr *name,
++                                const struct fscrypt_str *name,
+                                 struct btrfs_inode *dir, u64 index)
+ {
+       struct btrfs_path *path;
+@@ -3503,7 +3503,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+ /* see comments for btrfs_del_dir_entries_in_log */
+ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+-                              const struct qstr *name,
++                              const struct fscrypt_str *name,
+                               struct btrfs_inode *inode, u64 dirid)
+ {
+       struct btrfs_root *log;
+@@ -5267,7 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+               u32 this_len;
+               unsigned long name_ptr;
+               struct btrfs_dir_item *di;
+-              struct qstr name_str;
++              struct fscrypt_str name_str;
+ 
+               if (key->type == BTRFS_INODE_REF_KEY) {
+                       struct btrfs_inode_ref *iref;
+@@ -7472,7 +7472,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+               struct btrfs_root *log = old_dir->root->log_root;
+               struct btrfs_path *path;
+               struct fscrypt_name fname;
+-              struct qstr name;
+ 
+               ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+ 
+@@ -7480,7 +7479,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                                            &old_dentry->d_name, 0, &fname);
+               if (ret)
+                       goto out;
+-              name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name);
+               /*
+                * We have two inodes to update in the log, the old directory and
+                * the inode that got renamed, so we must pin the log to prevent
+@@ -7516,7 +7514,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                */
+               mutex_lock(&old_dir->log_mutex);
+               ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
+-                                      &name, old_dir_index);
++                                      &fname.disk_name, old_dir_index);
+               if (ret > 0) {
+                       /*
+                        * The dentry does not exist in the log, so record its
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index 6c0dc79787f05..8adebf4c9adaf 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
+                         struct btrfs_log_ctx *ctx);
+ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const struct qstr *name,
++                                const struct fscrypt_str *name,
+                                 struct btrfs_inode *dir, u64 index);
+ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+-                              const struct qstr *name,
++                              const struct fscrypt_str *name,
+                               struct btrfs_inode *inode, u64 dirid);
+ void btrfs_end_log_trans(struct btrfs_root *root);
+ void btrfs_pin_log_trans(struct btrfs_root *root);
+-- 
+2.40.1
+
diff --git a/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch b/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch

new file mode 100644 (file)

index 0000000..0e63b37
--- /dev/null
+++ b/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch
@@ -0,0 +1,1959 @@
+From 8c61a5386d7b8715600196b9b81b7371a9fd1017 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 12:58:25 -0400
+Subject: btrfs: use struct qstr instead of name and namelen pairs
+
+From: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+
+[ Upstream commit e43eec81c5167b655b72c781b0e75e62a05e415e ]
+
+Many functions throughout btrfs take name buffer and name length
+arguments. Most of these functions at the highest level are usually
+called with these arguments extracted from a supplied dentry's name.
+But the entire name can be passed instead, making each function a little
+more elegant.
+
+Each function whose arguments are currently the name and length
+extracted from a dentry is herein converted to instead take a pointer to
+the name in the dentry. The couple of calls to these calls without a
+struct dentry are converted to create an appropriate qstr to pass in.
+Additionally, every function which is only called with a name/len
+extracted directly from a qstr is also converted.
+
+This change has positive effect on stack consumption, frame of many
+functions is reduced but this will be used in the future for fscrypt
+related structures.
+
+Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h       |  26 ++--
+ fs/btrfs/dir-item.c    |  50 ++++----
+ fs/btrfs/inode-item.c  |  73 ++++++-----
+ fs/btrfs/inode-item.h  |  20 ++-
+ fs/btrfs/inode.c       | 130 +++++++++-----------
+ fs/btrfs/ioctl.c       |   7 +-
+ fs/btrfs/root-tree.c   |  19 ++-
+ fs/btrfs/send.c        |  12 +-
+ fs/btrfs/super.c       |   3 +-
+ fs/btrfs/transaction.c |  11 +-
+ fs/btrfs/tree-log.c    | 267 +++++++++++++++++++----------------------
+ fs/btrfs/tree-log.h    |   4 +-
+ 12 files changed, 287 insertions(+), 335 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 3bcef0c4d6fc4..6718cee57a94e 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3238,11 +3238,11 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
+ 
+ /* root-item.c */
+ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+-                     u64 ref_id, u64 dirid, u64 sequence, const char *name,
+-                     int name_len);
++                     u64 ref_id, u64 dirid, u64 sequence,
++                     const struct qstr *name);
+ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+-                     u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+-                     int name_len);
++                     u64 ref_id, u64 dirid, u64 *sequence,
++                     const struct qstr *name);
+ int btrfs_del_root(struct btrfs_trans_handle *trans,
+                  const struct btrfs_key *key);
+ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+@@ -3271,25 +3271,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
+ 
+ /* dir-item.c */
+ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+-                        const char *name, int name_len);
+-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+-                        int name_len, struct btrfs_inode *dir,
++                        const struct qstr *name);
++int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
++                        const struct qstr *name, struct btrfs_inode *dir,
+                         struct btrfs_key *location, u8 type, u64 index);
+ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+-                                           const char *name, int name_len,
+-                                           int mod);
++                                           const struct qstr *name, int mod);
+ struct btrfs_dir_item *
+ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+-                          u64 index, const char *name, int name_len,
+-                          int mod);
++                          u64 index, const struct qstr *name, int mod);
+ struct btrfs_dir_item *
+ btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+-                          const char *name, int name_len);
++                          const struct qstr *name);
+ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_path *path,
+@@ -3370,10 +3368,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
+ int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
+-                     const char *name, int name_len);
++                     const struct qstr *name);
+ int btrfs_add_link(struct btrfs_trans_handle *trans,
+                  struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
+-                 const char *name, int name_len, int add_backref, u64 index);
++                 const struct qstr *name, int add_backref, u64 index);
+ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
+ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
+                        int front);
+diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
+index 72fb2c518a2b4..8c60f37eb13fd 100644
+--- a/fs/btrfs/dir-item.c
++++ b/fs/btrfs/dir-item.c
+@@ -103,8 +103,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+  * to use for the second index (if one is created).
+  * Will return 0 or -ENOMEM
+  */
+-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+-                        int name_len, struct btrfs_inode *dir,
++int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
++                        const struct qstr *name, struct btrfs_inode *dir,
+                         struct btrfs_key *location, u8 type, u64 index)
+ {
+       int ret = 0;
+@@ -120,7 +120,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+ 
+       key.objectid = btrfs_ino(dir);
+       key.type = BTRFS_DIR_ITEM_KEY;
+-      key.offset = btrfs_name_hash(name, name_len);
++      key.offset = btrfs_name_hash(name->name, name->len);
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+@@ -128,9 +128,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+ 
+       btrfs_cpu_key_to_disk(&disk_key, location);
+ 
+-      data_size = sizeof(*dir_item) + name_len;
++      data_size = sizeof(*dir_item) + name->len;
+       dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+-                                      name, name_len);
++                                      name->name, name->len);
+       if (IS_ERR(dir_item)) {
+               ret = PTR_ERR(dir_item);
+               if (ret == -EEXIST)
+@@ -142,11 +142,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+       btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+       btrfs_set_dir_type(leaf, dir_item, type);
+       btrfs_set_dir_data_len(leaf, dir_item, 0);
+-      btrfs_set_dir_name_len(leaf, dir_item, name_len);
++      btrfs_set_dir_name_len(leaf, dir_item, name->len);
+       btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+       name_ptr = (unsigned long)(dir_item + 1);
+ 
+-      write_extent_buffer(leaf, name, name_ptr, name_len);
++      write_extent_buffer(leaf, name->name, name_ptr, name->len);
+       btrfs_mark_buffer_dirty(leaf);
+ 
+ second_insert:
+@@ -157,7 +157,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
+       }
+       btrfs_release_path(path);
+ 
+-      ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
++      ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir,
+                                             &disk_key, type, index);
+ out_free:
+       btrfs_free_path(path);
+@@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir(
+ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+-                                           const char *name, int name_len,
++                                           const struct qstr *name,
+                                            int mod)
+ {
+       struct btrfs_key key;
+@@ -214,9 +214,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ 
+       key.objectid = dir;
+       key.type = BTRFS_DIR_ITEM_KEY;
+-      key.offset = btrfs_name_hash(name, name_len);
++      key.offset = btrfs_name_hash(name->name, name->len);
+ 
+-      di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
++      di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
++                                  name->len, mod);
+       if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
+               return NULL;
+ 
+@@ -224,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ }
+ 
+ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+-                                 const char *name, int name_len)
++                                 const struct qstr *name)
+ {
+       int ret;
+       struct btrfs_key key;
+@@ -240,9 +241,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+ 
+       key.objectid = dir;
+       key.type = BTRFS_DIR_ITEM_KEY;
+-      key.offset = btrfs_name_hash(name, name_len);
++      key.offset = btrfs_name_hash(name->name, name->len);
+ 
+-      di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
++      di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name,
++                                  name->len, 0);
+       if (IS_ERR(di)) {
+               ret = PTR_ERR(di);
+               /* Nothing found, we're safe */
+@@ -262,11 +264,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+               goto out;
+       }
+ 
+-      /*
+-       * see if there is room in the item to insert this
+-       * name
+-       */
+-      data_size = sizeof(*di) + name_len;
++      /* See if there is room in the item to insert this name. */
++      data_size = sizeof(*di) + name->len;
+       leaf = path->nodes[0];
+       slot = path->slots[0];
+       if (data_size + btrfs_item_size(leaf, slot) +
+@@ -303,8 +302,7 @@ struct btrfs_dir_item *
+ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+-                          u64 index, const char *name, int name_len,
+-                          int mod)
++                          u64 index, const struct qstr *name, int mod)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+@@ -313,7 +311,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = index;
+ 
+-      di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
++      di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
++                                  name->len, mod);
+       if (di == ERR_PTR(-ENOENT))
+               return NULL;
+ 
+@@ -321,9 +320,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+ }
+ 
+ struct btrfs_dir_item *
+-btrfs_search_dir_index_item(struct btrfs_root *root,
+-                          struct btrfs_path *path, u64 dirid,
+-                          const char *name, int name_len)
++btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
++                          u64 dirid, const struct qstr *name)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+@@ -338,7 +336,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root,
+                       break;
+ 
+               di = btrfs_match_dir_item_name(root->fs_info, path,
+-                                             name, name_len);
++                                             name->name, name->len);
+               if (di)
+                       return di;
+       }
+diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
+index 0eeb5ea878948..61b323517a40b 100644
+--- a/fs/btrfs/inode-item.c
++++ b/fs/btrfs/inode-item.c
+@@ -10,8 +10,8 @@
+ #include "print-tree.h"
+ 
+ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+-                                                 int slot, const char *name,
+-                                                 int name_len)
++                                                 int slot,
++                                                 const struct qstr *name)
+ {
+       struct btrfs_inode_ref *ref;
+       unsigned long ptr;
+@@ -27,9 +27,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+               len = btrfs_inode_ref_name_len(leaf, ref);
+               name_ptr = (unsigned long)(ref + 1);
+               cur_offset += len + sizeof(*ref);
+-              if (len != name_len)
++              if (len != name->len)
+                       continue;
+-              if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
++              if (memcmp_extent_buffer(leaf, name->name, name_ptr,
++                                       name->len) == 0)
+                       return ref;
+       }
+       return NULL;
+@@ -37,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+ 
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+               struct extent_buffer *leaf, int slot, u64 ref_objectid,
+-              const char *name, int name_len)
++              const struct qstr *name)
+ {
+       struct btrfs_inode_extref *extref;
+       unsigned long ptr;
+@@ -60,9 +61,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+               name_ptr = (unsigned long)(&extref->name);
+               ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+ 
+-              if (ref_name_len == name_len &&
++              if (ref_name_len == name->len &&
+                   btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
+-                  (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0))
++                  (memcmp_extent_buffer(leaf, name->name, name_ptr,
++                                        name->len) == 0))
+                       return extref;
+ 
+               cur_offset += ref_name_len + sizeof(*extref);
+@@ -75,7 +77,7 @@ struct btrfs_inode_extref *
+ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+-                        const char *name, int name_len,
++                        const struct qstr *name,
+                         u64 inode_objectid, u64 ref_objectid, int ins_len,
+                         int cow)
+ {
+@@ -84,7 +86,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+ 
+       key.objectid = inode_objectid;
+       key.type = BTRFS_INODE_EXTREF_KEY;
+-      key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
++      key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
+ 
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+@@ -92,13 +94,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+       if (ret > 0)
+               return NULL;
+       return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+-                                            ref_objectid, name, name_len);
++                                            ref_objectid, name);
+ 
+ }
+ 
+ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const char *name, int name_len,
++                                const struct qstr *name,
+                                 u64 inode_objectid, u64 ref_objectid,
+                                 u64 *index)
+ {
+@@ -107,14 +109,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+       struct btrfs_inode_extref *extref;
+       struct extent_buffer *leaf;
+       int ret;
+-      int del_len = name_len + sizeof(*extref);
++      int del_len = name->len + sizeof(*extref);
+       unsigned long ptr;
+       unsigned long item_start;
+       u32 item_size;
+ 
+       key.objectid = inode_objectid;
+       key.type = BTRFS_INODE_EXTREF_KEY;
+-      key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
++      key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+@@ -132,7 +134,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+        * readonly.
+        */
+       extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+-                                              ref_objectid, name, name_len);
++                                              ref_objectid, name);
+       if (!extref) {
+               btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
+               ret = -EROFS;
+@@ -168,8 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+ }
+ 
+ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+-                      struct btrfs_root *root,
+-                      const char *name, int name_len,
++                      struct btrfs_root *root, const struct qstr *name,
+                       u64 inode_objectid, u64 ref_objectid, u64 *index)
+ {
+       struct btrfs_path *path;
+@@ -182,7 +183,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+       u32 sub_item_len;
+       int ret;
+       int search_ext_refs = 0;
+-      int del_len = name_len + sizeof(*ref);
++      int del_len = name->len + sizeof(*ref);
+ 
+       key.objectid = inode_objectid;
+       key.offset = ref_objectid;
+@@ -201,8 +202,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name,
+-                                       name_len);
++      ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name);
+       if (!ref) {
+               ret = -ENOENT;
+               search_ext_refs = 1;
+@@ -219,7 +219,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+       ptr = (unsigned long)ref;
+-      sub_item_len = name_len + sizeof(*ref);
++      sub_item_len = name->len + sizeof(*ref);
+       item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
+                             item_size - (ptr + sub_item_len - item_start));
+@@ -233,7 +233,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+                * name in our ref array. Find and remove the extended
+                * inode ref then.
+                */
+-              return btrfs_del_inode_extref(trans, root, name, name_len,
++              return btrfs_del_inode_extref(trans, root, name,
+                                             inode_objectid, ref_objectid, index);
+       }
+ 
+@@ -247,12 +247,13 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+  */
+ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+                                    struct btrfs_root *root,
+-                                   const char *name, int name_len,
+-                                   u64 inode_objectid, u64 ref_objectid, u64 index)
++                                   const struct qstr *name,
++                                   u64 inode_objectid, u64 ref_objectid,
++                                   u64 index)
+ {
+       struct btrfs_inode_extref *extref;
+       int ret;
+-      int ins_len = name_len + sizeof(*extref);
++      int ins_len = name->len + sizeof(*extref);
+       unsigned long ptr;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+@@ -260,7 +261,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ 
+       key.objectid = inode_objectid;
+       key.type = BTRFS_INODE_EXTREF_KEY;
+-      key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
++      key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+@@ -272,7 +273,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+               if (btrfs_find_name_in_ext_backref(path->nodes[0],
+                                                  path->slots[0],
+                                                  ref_objectid,
+-                                                 name, name_len))
++                                                 name))
+                       goto out;
+ 
+               btrfs_extend_item(path, ins_len);
+@@ -286,12 +287,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+       ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len;
+       extref = (struct btrfs_inode_extref *)ptr;
+ 
+-      btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
++      btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len);
+       btrfs_set_inode_extref_index(path->nodes[0], extref, index);
+       btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
+ 
+       ptr = (unsigned long)&extref->name;
+-      write_extent_buffer(path->nodes[0], name, ptr, name_len);
++      write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
+ 
+ out:
+@@ -301,8 +302,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ 
+ /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
+ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+-                         struct btrfs_root *root,
+-                         const char *name, int name_len,
++                         struct btrfs_root *root, const struct qstr *name,
+                          u64 inode_objectid, u64 ref_objectid, u64 index)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+@@ -311,7 +311,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+       struct btrfs_inode_ref *ref;
+       unsigned long ptr;
+       int ret;
+-      int ins_len = name_len + sizeof(*ref);
++      int ins_len = name->len + sizeof(*ref);
+ 
+       key.objectid = inode_objectid;
+       key.offset = ref_objectid;
+@@ -327,7 +327,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+       if (ret == -EEXIST) {
+               u32 old_size;
+               ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+-                                               name, name_len);
++                                               name);
+               if (ref)
+                       goto out;
+ 
+@@ -336,7 +336,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+               ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                    struct btrfs_inode_ref);
+               ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
+-              btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
++              btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
+               btrfs_set_inode_ref_index(path->nodes[0], ref, index);
+               ptr = (unsigned long)(ref + 1);
+               ret = 0;
+@@ -344,7 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+               if (ret == -EOVERFLOW) {
+                       if (btrfs_find_name_in_backref(path->nodes[0],
+                                                      path->slots[0],
+-                                                     name, name_len))
++                                                     name))
+                               ret = -EEXIST;
+                       else
+                               ret = -EMLINK;
+@@ -353,11 +353,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+       } else {
+               ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                    struct btrfs_inode_ref);
+-              btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
++              btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
+               btrfs_set_inode_ref_index(path->nodes[0], ref, index);
+               ptr = (unsigned long)(ref + 1);
+       }
+-      write_extent_buffer(path->nodes[0], name, ptr, name_len);
++      write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
+ 
+ out:
+@@ -370,7 +370,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+               if (btrfs_super_incompat_flags(disk_super)
+                   & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+                       ret = btrfs_insert_inode_extref(trans, root, name,
+-                                                      name_len,
+                                                       inode_objectid,
+                                                       ref_objectid, index);
+       }
+diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
+index a8fc16d0147f6..3c657c670cfdf 100644
+--- a/fs/btrfs/inode-item.h
++++ b/fs/btrfs/inode-item.h
+@@ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_truncate_control *control);
+ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+-                         struct btrfs_root *root,
+-                         const char *name, int name_len,
++                         struct btrfs_root *root, const struct qstr *name,
+                          u64 inode_objectid, u64 ref_objectid, u64 index);
+ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+-                         struct btrfs_root *root,
+-                         const char *name, int name_len,
+-                         u64 inode_objectid, u64 ref_objectid, u64 *index);
++                      struct btrfs_root *root, const struct qstr *name,
++                      u64 inode_objectid, u64 ref_objectid, u64 *index);
+ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path, u64 objectid);
+-int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+-                     *root, struct btrfs_path *path,
++int btrfs_lookup_inode(struct btrfs_trans_handle *trans,
++                     struct btrfs_root *root, struct btrfs_path *path,
+                      struct btrfs_key *location, int mod);
+ 
+ struct btrfs_inode_extref *btrfs_lookup_inode_extref(
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+-                        const char *name, int name_len,
++                        const struct qstr *name,
+                         u64 inode_objectid, u64 ref_objectid, int ins_len,
+                         int cow);
+ 
+ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
+-                                                 int slot, const char *name,
+-                                                 int name_len);
++                                                 int slot,
++                                                 const struct qstr *name);
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+               struct extent_buffer *leaf, int slot, u64 ref_objectid,
+-              const char *name, int name_len);
++              const struct qstr *name);
+ 
+ #endif
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 222068bf80031..a5e61ad2ba696 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3627,7 +3627,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
+       spin_unlock(&fs_info->delayed_iput_lock);
+ }
+ 
+-/**
++/*
+  * Wait for flushing all delayed iputs
+  *
+  * @fs_info:  the filesystem
+@@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_inode *dir,
+                               struct btrfs_inode *inode,
+-                              const char *name, int name_len,
++                              const struct qstr *name,
+                               struct btrfs_rename_ctx *rename_ctx)
+ {
+       struct btrfs_root *root = dir->root;
+@@ -4290,8 +4290,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
+-                                  name, name_len, -1);
++      di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto err;
+@@ -4319,12 +4318,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+               }
+       }
+ 
+-      ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
+-                                dir_ino, &index);
++      ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
+       if (ret) {
+               btrfs_info(fs_info,
+                       "failed to delete reference to %.*s, inode %llu parent %llu",
+-                      name_len, name, ino, dir_ino);
++                      name->len, name->name, ino, dir_ino);
+               btrfs_abort_transaction(trans, ret);
+               goto err;
+       }
+@@ -4345,10 +4343,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+        * operations on the log tree, increasing latency for applications.
+        */
+       if (!rename_ctx) {
+-              btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+-                                         dir_ino);
+-              btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+-                                           index);
++              btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino);
++              btrfs_del_dir_entries_in_log(trans, root, name, dir, index);
+       }
+ 
+       /*
+@@ -4366,7 +4362,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+       if (ret)
+               goto out;
+ 
+-      btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
++      btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
+       inode_inc_iversion(&inode->vfs_inode);
+       inode_inc_iversion(&dir->vfs_inode);
+       inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+@@ -4379,10 +4375,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+ 
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
+-                     const char *name, int name_len)
++                     const struct qstr *name)
+ {
+       int ret;
+-      ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
++
++      ret = __btrfs_unlink_inode(trans, dir, inode, name, NULL);
+       if (!ret) {
+               drop_nlink(&inode->vfs_inode);
+               ret = btrfs_update_inode(trans, inode->root, inode);
+@@ -4426,9 +4423,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+       btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+                       0);
+ 
+-      ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
+-                      BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+-                      dentry->d_name.len);
++      ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
++                               &dentry->d_name);
+       if (ret)
+               goto out;
+ 
+@@ -4453,8 +4449,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+-      const char *name = dentry->d_name.name;
+-      int name_len = dentry->d_name.len;
++      const struct qstr *name = &dentry->d_name;
+       u64 index;
+       int ret;
+       u64 objectid;
+@@ -4473,8 +4468,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       if (!path)
+               return -ENOMEM;
+ 
+-      di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
+-                                 name, name_len, -1);
++      di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -4500,8 +4494,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+        * call btrfs_del_root_ref, and it _shouldn't_ fail.
+        */
+       if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+-              di = btrfs_search_dir_index_item(root, path, dir_ino,
+-                                               name, name_len);
++              di = btrfs_search_dir_index_item(root, path, dir_ino, name);
+               if (IS_ERR_OR_NULL(di)) {
+                       if (!di)
+                               ret = -ENOENT;
+@@ -4518,7 +4511,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+       } else {
+               ret = btrfs_del_root_ref(trans, objectid,
+                                        root->root_key.objectid, dir_ino,
+-                                       &index, name, name_len);
++                                       &index, name);
+               if (ret) {
+                       btrfs_abort_transaction(trans, ret);
+                       goto out;
+@@ -4531,7 +4524,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+               goto out;
+       }
+ 
+-      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
++      btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name->len * 2);
+       inode_inc_iversion(dir);
+       dir->i_mtime = current_time(dir);
+       dir->i_ctime = dir->i_mtime;
+@@ -4553,6 +4546,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
+       struct btrfs_path *path;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
++      struct qstr name = QSTR_INIT("default", 7);
+       u64 dir_id;
+       int ret;
+ 
+@@ -4563,7 +4557,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
+       /* Make sure this root isn't set as the default subvol */
+       dir_id = btrfs_super_root_dir(fs_info->super_copy);
+       di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
+-                                 dir_id, "default", 7, 0);
++                                 dir_id, &name, 0);
+       if (di && !IS_ERR(di)) {
+               btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+               if (key.objectid == root->root_key.objectid) {
+@@ -4830,9 +4824,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+ 
+       /* now the directory is empty */
+-      err = btrfs_unlink_inode(trans, BTRFS_I(dir),
+-                      BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+-                      dentry->d_name.len);
++      err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
++                               &dentry->d_name);
+       if (!err) {
+               btrfs_i_size_write(BTRFS_I(inode), 0);
+               /*
+@@ -5532,8 +5525,7 @@ void btrfs_evict_inode(struct inode *inode)
+ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+                              struct btrfs_key *location, u8 *type)
+ {
+-      const char *name = dentry->d_name.name;
+-      int namelen = dentry->d_name.len;
++      const struct qstr *name = &dentry->d_name;
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+@@ -5544,7 +5536,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+               return -ENOMEM;
+ 
+       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+-                      name, namelen, 0);
++                                 name, 0);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+@@ -5556,7 +5548,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
+               ret = -EUCLEAN;
+               btrfs_warn(root->fs_info,
+ "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+-                         __func__, name, btrfs_ino(BTRFS_I(dir)),
++                         __func__, name->name, btrfs_ino(BTRFS_I(dir)),
+                          location->objectid, location->type, location->offset);
+       }
+       if (!ret)
+@@ -6315,8 +6307,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+ {
+       struct inode *dir = args->dir;
+       struct inode *inode = args->inode;
+-      const char *name = args->orphan ? NULL : args->dentry->d_name.name;
+-      int name_len = args->orphan ? 0 : args->dentry->d_name.len;
++      const struct qstr *name = args->orphan ? NULL : &args->dentry->d_name;
+       struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+       struct btrfs_root *root;
+       struct btrfs_inode_item *inode_item;
+@@ -6417,7 +6408,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+                       sizes[1] = 2 + sizeof(*ref);
+               } else {
+                       key[1].offset = btrfs_ino(BTRFS_I(dir));
+-                      sizes[1] = name_len + sizeof(*ref);
++                      sizes[1] = name->len + sizeof(*ref);
+               }
+       }
+ 
+@@ -6456,10 +6447,12 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+                       btrfs_set_inode_ref_index(path->nodes[0], ref, 0);
+                       write_extent_buffer(path->nodes[0], "..", ptr, 2);
+               } else {
+-                      btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
++                      btrfs_set_inode_ref_name_len(path->nodes[0], ref,
++                                                   name->len);
+                       btrfs_set_inode_ref_index(path->nodes[0], ref,
+                                                 BTRFS_I(inode)->dir_index);
+-                      write_extent_buffer(path->nodes[0], name, ptr, name_len);
++                      write_extent_buffer(path->nodes[0], name->name, ptr,
++                                          name->len);
+               }
+       }
+ 
+@@ -6520,7 +6513,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+               ret = btrfs_orphan_add(trans, BTRFS_I(inode));
+       } else {
+               ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
+-                                   name_len, 0, BTRFS_I(inode)->dir_index);
++                                   0, BTRFS_I(inode)->dir_index);
+       }
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+@@ -6549,7 +6542,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
+  */
+ int btrfs_add_link(struct btrfs_trans_handle *trans,
+                  struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
+-                 const char *name, int name_len, int add_backref, u64 index)
++                 const struct qstr *name, int add_backref, u64 index)
+ {
+       int ret = 0;
+       struct btrfs_key key;
+@@ -6568,17 +6561,17 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
+       if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               ret = btrfs_add_root_ref(trans, key.objectid,
+                                        root->root_key.objectid, parent_ino,
+-                                       index, name, name_len);
++                                       index, name);
+       } else if (add_backref) {
+-              ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
+-                                           parent_ino, index);
++              ret = btrfs_insert_inode_ref(trans, root, name,
++                                           ino, parent_ino, index);
+       }
+ 
+       /* Nothing to clean up yet */
+       if (ret)
+               return ret;
+ 
+-      ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
++      ret = btrfs_insert_dir_item(trans, name, parent_inode, &key,
+                                   btrfs_inode_type(&inode->vfs_inode), index);
+       if (ret == -EEXIST || ret == -EOVERFLOW)
+               goto fail_dir_item;
+@@ -6588,7 +6581,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
+       }
+ 
+       btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
+-                         name_len * 2);
++                         name->len * 2);
+       inode_inc_iversion(&parent_inode->vfs_inode);
+       /*
+        * If we are replaying a log tree, we do not want to update the mtime
+@@ -6613,15 +6606,15 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
+               int err;
+               err = btrfs_del_root_ref(trans, key.objectid,
+                                        root->root_key.objectid, parent_ino,
+-                                       &local_index, name, name_len);
++                                       &local_index, name);
+               if (err)
+                       btrfs_abort_transaction(trans, err);
+       } else if (add_backref) {
+               u64 local_index;
+               int err;
+ 
+-              err = btrfs_del_inode_ref(trans, root, name, name_len,
+-                                        ino, parent_ino, &local_index);
++              err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino,
++                                        &local_index);
+               if (err)
+                       btrfs_abort_transaction(trans, err);
+       }
+@@ -6741,7 +6734,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
+ 
+       err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+-                           dentry->d_name.name, dentry->d_name.len, 1, index);
++                           &dentry->d_name, 1, index);
+ 
+       if (err) {
+               drop_inode = 1;
+@@ -9115,9 +9108,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest,
+-                                           new_dentry->d_name.name,
+-                                           new_dentry->d_name.len,
++              ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name,
+                                            old_ino,
+                                            btrfs_ino(BTRFS_I(new_dir)),
+                                            old_idx);
+@@ -9131,9 +9122,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, root,
+-                                           old_dentry->d_name.name,
+-                                           old_dentry->d_name.len,
++              ret = btrfs_insert_inode_ref(trans, root, &old_dentry->d_name,
+                                            new_ino,
+                                            btrfs_ino(BTRFS_I(old_dir)),
+                                            new_idx);
+@@ -9169,8 +9158,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* src is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+                                          BTRFS_I(old_dentry->d_inode),
+-                                         old_dentry->d_name.name,
+-                                         old_dentry->d_name.len,
++                                         &old_dentry->d_name,
+                                          &old_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+@@ -9186,8 +9174,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       } else { /* dest is an inode */
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                          BTRFS_I(new_dentry->d_inode),
+-                                         new_dentry->d_name.name,
+-                                         new_dentry->d_name.len,
++                                         &new_dentry->d_name,
+                                          &new_rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
+@@ -9198,16 +9185,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           new_dentry->d_name.name,
+-                           new_dentry->d_name.len, 0, old_idx);
++                           &new_dentry->d_name, 0, old_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
+-                           old_dentry->d_name.name,
+-                           old_dentry->d_name.len, 0, new_idx);
++                           &old_dentry->d_name, 0, new_idx);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+@@ -9308,8 +9293,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+ 
+       /* check for collisions, even if the  name isn't there */
+       ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
+-                           new_dentry->d_name.name,
+-                           new_dentry->d_name.len);
++                                           &new_dentry->d_name);
+ 
+       if (ret) {
+               if (ret == -EEXIST) {
+@@ -9403,9 +9387,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               /* force full log commit if subvolume involved. */
+               btrfs_set_log_full_commit(trans);
+       } else {
+-              ret = btrfs_insert_inode_ref(trans, dest,
+-                                           new_dentry->d_name.name,
+-                                           new_dentry->d_name.len,
++              ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name,
+                                            old_ino,
+                                            btrfs_ino(BTRFS_I(new_dir)), index);
+               if (ret)
+@@ -9429,10 +9411,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
+       } else {
+               ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+-                                      BTRFS_I(d_inode(old_dentry)),
+-                                      old_dentry->d_name.name,
+-                                      old_dentry->d_name.len,
+-                                      &rename_ctx);
++                                         BTRFS_I(d_inode(old_dentry)),
++                                         &old_dentry->d_name, &rename_ctx);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
+       }
+@@ -9451,8 +9431,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+               } else {
+                       ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+                                                BTRFS_I(d_inode(new_dentry)),
+-                                               new_dentry->d_name.name,
+-                                               new_dentry->d_name.len);
++                                               &new_dentry->d_name);
+               }
+               if (!ret && new_inode->i_nlink == 0)
+                       ret = btrfs_orphan_add(trans,
+@@ -9464,8 +9443,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
+-                           new_dentry->d_name.name,
+-                           new_dentry->d_name.len, 0, index);
++                           &new_dentry->d_name, 0, index);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_fail;
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 2e29fafe0e7d9..5305d98905cea 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -951,6 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
+       struct inode *dir = d_inode(parent->dentry);
+       struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+       struct dentry *dentry;
++      struct qstr name_str = QSTR_INIT(name, namelen);
+       int error;
+ 
+       error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
+@@ -971,8 +972,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
+        * check for them now when we can safely fail
+        */
+       error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
+-                                             dir->i_ino, name,
+-                                             namelen);
++                                             dir->i_ino, &name_str);
+       if (error)
+               goto out_dput;
+ 
+@@ -3782,6 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
+       struct btrfs_trans_handle *trans;
+       struct btrfs_path *path = NULL;
+       struct btrfs_disk_key disk_key;
++      struct qstr name = QSTR_INIT("default", 7);
+       u64 objectid = 0;
+       u64 dir_id;
+       int ret;
+@@ -3825,7 +3826,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
+ 
+       dir_id = btrfs_super_root_dir(fs_info->super_copy);
+       di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
+-                                 dir_id, "default", 7, 1);
++                                 dir_id, &name, 1);
+       if (IS_ERR_OR_NULL(di)) {
+               btrfs_release_path(path);
+               btrfs_end_transaction(trans);
+diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
+index e1f599d7a9164..cf29241b9b310 100644
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -327,9 +327,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
+ }
+ 
+ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+-                     u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+-                     int name_len)
+-
++                     u64 ref_id, u64 dirid, u64 *sequence,
++                     const struct qstr *name)
+ {
+       struct btrfs_root *tree_root = trans->fs_info->tree_root;
+       struct btrfs_path *path;
+@@ -356,8 +355,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+                                    struct btrfs_root_ref);
+               ptr = (unsigned long)(ref + 1);
+               if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
+-                  (btrfs_root_ref_name_len(leaf, ref) != name_len) ||
+-                  memcmp_extent_buffer(leaf, name, ptr, name_len)) {
++                  (btrfs_root_ref_name_len(leaf, ref) != name->len) ||
++                  memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+@@ -400,8 +399,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+  * Will return 0, -ENOMEM, or anything from the CoW path
+  */
+ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+-                     u64 ref_id, u64 dirid, u64 sequence, const char *name,
+-                     int name_len)
++                     u64 ref_id, u64 dirid, u64 sequence,
++                     const struct qstr *name)
+ {
+       struct btrfs_root *tree_root = trans->fs_info->tree_root;
+       struct btrfs_key key;
+@@ -420,7 +419,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+       key.offset = ref_id;
+ again:
+       ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
+-                                    sizeof(*ref) + name_len);
++                                    sizeof(*ref) + name->len);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               btrfs_free_path(path);
+@@ -431,9 +430,9 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+       ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+       btrfs_set_root_ref_dirid(leaf, ref, dirid);
+       btrfs_set_root_ref_sequence(leaf, ref, sequence);
+-      btrfs_set_root_ref_name_len(leaf, ref, name_len);
++      btrfs_set_root_ref_name_len(leaf, ref, name->len);
+       ptr = (unsigned long)(ref + 1);
+-      write_extent_buffer(leaf, name, ptr, name_len);
++      write_extent_buffer(leaf, name->name, ptr, name->len);
+       btrfs_mark_buffer_dirty(leaf);
+ 
+       if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index 35e889fe2a95d..833364527554c 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -1596,13 +1596,17 @@ static int gen_unique_name(struct send_ctx *sctx,
+               return -ENOMEM;
+ 
+       while (1) {
++              struct qstr tmp_name;
++
+               len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
+                               ino, gen, idx);
+               ASSERT(len < sizeof(tmp));
++              tmp_name.name = tmp;
++              tmp_name.len = strlen(tmp);
+ 
+               di = btrfs_lookup_dir_item(NULL, sctx->send_root,
+                               path, BTRFS_FIRST_FREE_OBJECTID,
+-                              tmp, strlen(tmp), 0);
++                              &tmp_name, 0);
+               btrfs_release_path(path);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+@@ -1622,7 +1626,7 @@ static int gen_unique_name(struct send_ctx *sctx,
+ 
+               di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
+                               path, BTRFS_FIRST_FREE_OBJECTID,
+-                              tmp, strlen(tmp), 0);
++                              &tmp_name, 0);
+               btrfs_release_path(path);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+@@ -1752,13 +1756,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       struct btrfs_path *path;
++      struct qstr name_str = QSTR_INIT(name, name_len);
+ 
+       path = alloc_path_for_send();
+       if (!path)
+               return -ENOMEM;
+ 
+-      di = btrfs_lookup_dir_item(NULL, root, path,
+-                      dir, name, name_len, 0);
++      di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
+       if (IS_ERR_OR_NULL(di)) {
+               ret = di ? PTR_ERR(di) : -ENOENT;
+               goto out;
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 582b71b7fa779..bf56e4d6b9f48 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -1398,6 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_key location;
++      struct qstr name = QSTR_INIT("default", 7);
+       u64 dir_id;
+ 
+       path = btrfs_alloc_path();
+@@ -1410,7 +1411,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
+        * to mount.
+        */
+       dir_id = btrfs_super_root_dir(fs_info->super_copy);
+-      di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
++      di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
+       if (IS_ERR(di)) {
+               btrfs_free_path(path);
+               return PTR_ERR(di);
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index a555567594418..b0fe054c9f401 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1694,8 +1694,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       /* check if there is a file/dir which has the same name. */
+       dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
+                                        btrfs_ino(BTRFS_I(parent_inode)),
+-                                       dentry->d_name.name,
+-                                       dentry->d_name.len, 0);
++                                       &dentry->d_name, 0);
+       if (dir_item != NULL && !IS_ERR(dir_item)) {
+               pending->error = -EEXIST;
+               goto dir_item_existed;
+@@ -1790,7 +1789,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       ret = btrfs_add_root_ref(trans, objectid,
+                                parent_root->root_key.objectid,
+                                btrfs_ino(BTRFS_I(parent_inode)), index,
+-                               dentry->d_name.name, dentry->d_name.len);
++                               &dentry->d_name);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+@@ -1822,9 +1821,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+       if (ret < 0)
+               goto fail;
+ 
+-      ret = btrfs_insert_dir_item(trans, dentry->d_name.name,
+-                                  dentry->d_name.len, BTRFS_I(parent_inode),
+-                                  &key, BTRFS_FT_DIR, index);
++      ret = btrfs_insert_dir_item(trans, &dentry->d_name,
++                                  BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
++                                  index);
+       /* We have check then name at the beginning, so it is impossible. */
+       BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
+       if (ret) {
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 00be69ce7b90f..9f55e81acc0ef 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -595,6 +595,21 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
+       return do_overwrite_item(trans, root, path, eb, slot, key);
+ }
+ 
++static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
++                             struct qstr *name)
++{
++      char *buf;
++
++      buf = kmalloc(len, GFP_NOFS);
++      if (!buf)
++              return -ENOMEM;
++
++      read_extent_buffer(eb, buf, (unsigned long)start, len);
++      name->name = buf;
++      name->len = len;
++      return 0;
++}
++
+ /*
+  * simple helper to read an inode off the disk from a given root
+  * This can only be called for subvolume roots and not for the log
+@@ -901,12 +916,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
+ static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
+                                      struct btrfs_inode *dir,
+                                      struct btrfs_inode *inode,
+-                                     const char *name,
+-                                     int name_len)
++                                     const struct qstr *name)
+ {
+       int ret;
+ 
+-      ret = btrfs_unlink_inode(trans, dir, inode, name, name_len);
++      ret = btrfs_unlink_inode(trans, dir, inode, name);
+       if (ret)
+               return ret;
+       /*
+@@ -933,8 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ {
+       struct btrfs_root *root = dir->root;
+       struct inode *inode;
+-      char *name;
+-      int name_len;
++      struct qstr name;
+       struct extent_buffer *leaf;
+       struct btrfs_key location;
+       int ret;
+@@ -942,12 +955,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+       leaf = path->nodes[0];
+ 
+       btrfs_dir_item_key_to_cpu(leaf, di, &location);
+-      name_len = btrfs_dir_name_len(leaf, di);
+-      name = kmalloc(name_len, GFP_NOFS);
+-      if (!name)
++      ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name);
++      if (ret)
+               return -ENOMEM;
+ 
+-      read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
+       btrfs_release_path(path);
+ 
+       inode = read_one_inode(root, location.objectid);
+@@ -960,10 +971,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+       if (ret)
+               goto out;
+ 
+-      ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name,
+-                      name_len);
++      ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name);
+ out:
+-      kfree(name);
++      kfree(name.name);
+       iput(inode);
+       return ret;
+ }
+@@ -978,14 +988,14 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ static noinline int inode_in_dir(struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                u64 dirid, u64 objectid, u64 index,
+-                               const char *name, int name_len)
++                               struct qstr *name)
+ {
+       struct btrfs_dir_item *di;
+       struct btrfs_key location;
+       int ret = 0;
+ 
+       di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
+-                                       index, name, name_len, 0);
++                                       index, name, 0);
+       if (IS_ERR(di)) {
+               ret = PTR_ERR(di);
+               goto out;
+@@ -998,7 +1008,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
+       }
+ 
+       btrfs_release_path(path);
+-      di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
++      di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, 0);
+       if (IS_ERR(di)) {
+               ret = PTR_ERR(di);
+               goto out;
+@@ -1025,7 +1035,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
+ static noinline int backref_in_log(struct btrfs_root *log,
+                                  struct btrfs_key *key,
+                                  u64 ref_objectid,
+-                                 const char *name, int namelen)
++                                 const struct qstr *name)
+ {
+       struct btrfs_path *path;
+       int ret;
+@@ -1045,12 +1055,10 @@ static noinline int backref_in_log(struct btrfs_root *log,
+       if (key->type == BTRFS_INODE_EXTREF_KEY)
+               ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
+                                                      path->slots[0],
+-                                                     ref_objectid,
+-                                                     name, namelen);
++                                                     ref_objectid, name);
+       else
+               ret = !!btrfs_find_name_in_backref(path->nodes[0],
+-                                                 path->slots[0],
+-                                                 name, namelen);
++                                                 path->slots[0], name);
+ out:
+       btrfs_free_path(path);
+       return ret;
+@@ -1063,11 +1071,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+                                 struct btrfs_inode *dir,
+                                 struct btrfs_inode *inode,
+                                 u64 inode_objectid, u64 parent_objectid,
+-                                u64 ref_index, char *name, int namelen)
++                                u64 ref_index, struct qstr *name)
+ {
+       int ret;
+-      char *victim_name;
+-      int victim_name_len;
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key search_key;
+@@ -1099,43 +1105,40 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+               ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
+               while (ptr < ptr_end) {
+-                      victim_ref = (struct btrfs_inode_ref *)ptr;
+-                      victim_name_len = btrfs_inode_ref_name_len(leaf,
+-                                                                 victim_ref);
+-                      victim_name = kmalloc(victim_name_len, GFP_NOFS);
+-                      if (!victim_name)
+-                              return -ENOMEM;
++                      struct qstr victim_name;
+ 
+-                      read_extent_buffer(leaf, victim_name,
+-                                         (unsigned long)(victim_ref + 1),
+-                                         victim_name_len);
++                      victim_ref = (struct btrfs_inode_ref *)ptr;
++                      ret = read_alloc_one_name(leaf, (victim_ref + 1),
++                               btrfs_inode_ref_name_len(leaf, victim_ref),
++                               &victim_name);
++                      if (ret)
++                              return ret;
+ 
+                       ret = backref_in_log(log_root, &search_key,
+-                                           parent_objectid, victim_name,
+-                                           victim_name_len);
++                                           parent_objectid, &victim_name);
+                       if (ret < 0) {
+-                              kfree(victim_name);
++                              kfree(victim_name.name);
+                               return ret;
+                       } else if (!ret) {
+                               inc_nlink(&inode->vfs_inode);
+                               btrfs_release_path(path);
+ 
+                               ret = unlink_inode_for_log_replay(trans, dir, inode,
+-                                              victim_name, victim_name_len);
+-                              kfree(victim_name);
++                                              &victim_name);
++                              kfree(victim_name.name);
+                               if (ret)
+                                       return ret;
+                               goto again;
+                       }
+-                      kfree(victim_name);
++                      kfree(victim_name.name);
+ 
+-                      ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
++                      ptr = (unsigned long)(victim_ref + 1) + victim_name.len;
+               }
+       }
+       btrfs_release_path(path);
+ 
+       /* Same search but for extended refs */
+-      extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
++      extref = btrfs_lookup_inode_extref(NULL, root, path, name,
+                                          inode_objectid, parent_objectid, 0,
+                                          0);
+       if (IS_ERR(extref)) {
+@@ -1152,29 +1155,28 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+               base = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ 
+               while (cur_offset < item_size) {
+-                      extref = (struct btrfs_inode_extref *)(base + cur_offset);
++                      struct qstr victim_name;
+ 
+-                      victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
++                      extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ 
+                       if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
+                               goto next;
+ 
+-                      victim_name = kmalloc(victim_name_len, GFP_NOFS);
+-                      if (!victim_name)
+-                              return -ENOMEM;
+-                      read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
+-                                         victim_name_len);
++                      ret = read_alloc_one_name(leaf, &extref->name,
++                               btrfs_inode_extref_name_len(leaf, extref),
++                               &victim_name);
++                      if (ret)
++                              return ret;
+ 
+                       search_key.objectid = inode_objectid;
+                       search_key.type = BTRFS_INODE_EXTREF_KEY;
+                       search_key.offset = btrfs_extref_hash(parent_objectid,
+-                                                            victim_name,
+-                                                            victim_name_len);
++                                                            victim_name.name,
++                                                            victim_name.len);
+                       ret = backref_in_log(log_root, &search_key,
+-                                           parent_objectid, victim_name,
+-                                           victim_name_len);
++                                           parent_objectid, &victim_name);
+                       if (ret < 0) {
+-                              kfree(victim_name);
++                              kfree(victim_name.name);
+                               return ret;
+                       } else if (!ret) {
+                               ret = -ENOENT;
+@@ -1186,26 +1188,24 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+ 
+                                       ret = unlink_inode_for_log_replay(trans,
+                                                       BTRFS_I(victim_parent),
+-                                                      inode,
+-                                                      victim_name,
+-                                                      victim_name_len);
++                                                      inode, &victim_name);
+                               }
+                               iput(victim_parent);
+-                              kfree(victim_name);
++                              kfree(victim_name.name);
+                               if (ret)
+                                       return ret;
+                               goto again;
+                       }
+-                      kfree(victim_name);
++                      kfree(victim_name.name);
+ next:
+-                      cur_offset += victim_name_len + sizeof(*extref);
++                      cur_offset += victim_name.len + sizeof(*extref);
+               }
+       }
+       btrfs_release_path(path);
+ 
+       /* look for a conflicting sequence number */
+       di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+-                                       ref_index, name, namelen, 0);
++                                       ref_index, name, 0);
+       if (IS_ERR(di)) {
+               return PTR_ERR(di);
+       } else if (di) {
+@@ -1216,8 +1216,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+       btrfs_release_path(path);
+ 
+       /* look for a conflicting name */
+-      di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
+-                                 name, namelen, 0);
++      di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0);
+       if (IS_ERR(di)) {
+               return PTR_ERR(di);
+       } else if (di) {
+@@ -1231,20 +1230,18 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
+ }
+ 
+ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+-                           u32 *namelen, char **name, u64 *index,
++                           struct qstr *name, u64 *index,
+                            u64 *parent_objectid)
+ {
+       struct btrfs_inode_extref *extref;
++      int ret;
+ 
+       extref = (struct btrfs_inode_extref *)ref_ptr;
+ 
+-      *namelen = btrfs_inode_extref_name_len(eb, extref);
+-      *name = kmalloc(*namelen, GFP_NOFS);
+-      if (*name == NULL)
+-              return -ENOMEM;
+-
+-      read_extent_buffer(eb, *name, (unsigned long)&extref->name,
+-                         *namelen);
++      ret = read_alloc_one_name(eb, &extref->name,
++                                btrfs_inode_extref_name_len(eb, extref), name);
++      if (ret)
++              return ret;
+ 
+       if (index)
+               *index = btrfs_inode_extref_index(eb, extref);
+@@ -1255,18 +1252,17 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ }
+ 
+ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+-                        u32 *namelen, char **name, u64 *index)
++                        struct qstr *name, u64 *index)
+ {
+       struct btrfs_inode_ref *ref;
++      int ret;
+ 
+       ref = (struct btrfs_inode_ref *)ref_ptr;
+ 
+-      *namelen = btrfs_inode_ref_name_len(eb, ref);
+-      *name = kmalloc(*namelen, GFP_NOFS);
+-      if (*name == NULL)
+-              return -ENOMEM;
+-
+-      read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
++      ret = read_alloc_one_name(eb, ref + 1, btrfs_inode_ref_name_len(eb, ref),
++                                name);
++      if (ret)
++              return ret;
+ 
+       if (index)
+               *index = btrfs_inode_ref_index(eb, ref);
+@@ -1308,28 +1304,24 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+       ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+       ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]);
+       while (ref_ptr < ref_end) {
+-              char *name = NULL;
+-              int namelen;
++              struct qstr name;
+               u64 parent_id;
+ 
+               if (key->type == BTRFS_INODE_EXTREF_KEY) {
+-                      ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
++                      ret = extref_get_fields(eb, ref_ptr, &name,
+                                               NULL, &parent_id);
+               } else {
+                       parent_id = key->offset;
+-                      ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+-                                           NULL);
++                      ret = ref_get_fields(eb, ref_ptr, &name, NULL);
+               }
+               if (ret)
+                       goto out;
+ 
+               if (key->type == BTRFS_INODE_EXTREF_KEY)
+                       ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot,
+-                                                             parent_id, name,
+-                                                             namelen);
++                                                             parent_id, &name);
+               else
+-                      ret = !!btrfs_find_name_in_backref(log_eb, log_slot,
+-                                                         name, namelen);
++                      ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name);
+ 
+               if (!ret) {
+                       struct inode *dir;
+@@ -1338,20 +1330,20 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+                       dir = read_one_inode(root, parent_id);
+                       if (!dir) {
+                               ret = -ENOENT;
+-                              kfree(name);
++                              kfree(name.name);
+                               goto out;
+                       }
+                       ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
+-                                               inode, name, namelen);
+-                      kfree(name);
++                                               inode, &name);
++                      kfree(name.name);
+                       iput(dir);
+                       if (ret)
+                               goto out;
+                       goto again;
+               }
+ 
+-              kfree(name);
+-              ref_ptr += namelen;
++              kfree(name.name);
++              ref_ptr += name.len;
+               if (key->type == BTRFS_INODE_EXTREF_KEY)
+                       ref_ptr += sizeof(struct btrfs_inode_extref);
+               else
+@@ -1380,8 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+       struct inode *inode = NULL;
+       unsigned long ref_ptr;
+       unsigned long ref_end;
+-      char *name = NULL;
+-      int namelen;
++      struct qstr name;
+       int ret;
+       int log_ref_ver = 0;
+       u64 parent_objectid;
+@@ -1425,7 +1416,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+ 
+       while (ref_ptr < ref_end) {
+               if (log_ref_ver) {
+-                      ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
++                      ret = extref_get_fields(eb, ref_ptr, &name,
+                                               &ref_index, &parent_objectid);
+                       /*
+                        * parent object can change from one array
+@@ -1438,15 +1429,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+                               goto out;
+                       }
+               } else {
+-                      ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+-                                           &ref_index);
++                      ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
+               }
+               if (ret)
+                       goto out;
+ 
+               ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+-                                 btrfs_ino(BTRFS_I(inode)), ref_index,
+-                                 name, namelen);
++                                 btrfs_ino(BTRFS_I(inode)), ref_index, &name);
+               if (ret < 0) {
+                       goto out;
+               } else if (ret == 0) {
+@@ -1460,7 +1449,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+                       ret = __add_inode_ref(trans, root, path, log,
+                                             BTRFS_I(dir), BTRFS_I(inode),
+                                             inode_objectid, parent_objectid,
+-                                            ref_index, name, namelen);
++                                            ref_index, &name);
+                       if (ret) {
+                               if (ret == 1)
+                                       ret = 0;
+@@ -1469,7 +1458,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+ 
+                       /* insert our name */
+                       ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+-                                           name, namelen, 0, ref_index);
++                                           &name, 0, ref_index);
+                       if (ret)
+                               goto out;
+ 
+@@ -1479,9 +1468,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+               }
+               /* Else, ret == 1, we already have a perfect match, we're done. */
+ 
+-              ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
+-              kfree(name);
+-              name = NULL;
++              ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len;
++              kfree(name.name);
++              name.name = NULL;
+               if (log_ref_ver) {
+                       iput(dir);
+                       dir = NULL;
+@@ -1505,7 +1494,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+       ret = overwrite_item(trans, root, path, eb, slot, key);
+ out:
+       btrfs_release_path(path);
+-      kfree(name);
++      kfree(name.name);
+       iput(dir);
+       iput(inode);
+       return ret;
+@@ -1777,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
+ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   u64 dirid, u64 index,
+-                                  char *name, int name_len,
++                                  const struct qstr *name,
+                                   struct btrfs_key *location)
+ {
+       struct inode *inode;
+@@ -1795,7 +1784,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
+       }
+ 
+       ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
+-                      name_len, 1, index);
++                           1, index);
+ 
+       /* FIXME, put inode into FIXUP list */
+ 
+@@ -1855,8 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+                                   struct btrfs_dir_item *di,
+                                   struct btrfs_key *key)
+ {
+-      char *name;
+-      int name_len;
++      struct qstr name;
+       struct btrfs_dir_item *dir_dst_di;
+       struct btrfs_dir_item *index_dst_di;
+       bool dir_dst_matches = false;
+@@ -1874,17 +1862,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+       if (!dir)
+               return -EIO;
+ 
+-      name_len = btrfs_dir_name_len(eb, di);
+-      name = kmalloc(name_len, GFP_NOFS);
+-      if (!name) {
+-              ret = -ENOMEM;
++      ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
++      if (ret)
+               goto out;
+-      }
+ 
+       log_type = btrfs_dir_type(eb, di);
+-      read_extent_buffer(eb, name, (unsigned long)(di + 1),
+-                 name_len);
+-
+       btrfs_dir_item_key_to_cpu(eb, di, &log_key);
+       ret = btrfs_lookup_inode(trans, root, path, &log_key, 0);
+       btrfs_release_path(path);
+@@ -1894,7 +1876,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+       ret = 0;
+ 
+       dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
+-                                         name, name_len, 1);
++                                         &name, 1);
+       if (IS_ERR(dir_dst_di)) {
+               ret = PTR_ERR(dir_dst_di);
+               goto out;
+@@ -1911,7 +1893,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+ 
+       index_dst_di = btrfs_lookup_dir_index_item(trans, root, path,
+                                                  key->objectid, key->offset,
+-                                                 name, name_len, 1);
++                                                 &name, 1);
+       if (IS_ERR(index_dst_di)) {
+               ret = PTR_ERR(index_dst_di);
+               goto out;
+@@ -1939,7 +1921,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+       search_key.objectid = log_key.objectid;
+       search_key.type = BTRFS_INODE_REF_KEY;
+       search_key.offset = key->objectid;
+-      ret = backref_in_log(root->log_root, &search_key, 0, name, name_len);
++      ret = backref_in_log(root->log_root, &search_key, 0, &name);
+       if (ret < 0) {
+               goto out;
+       } else if (ret) {
+@@ -1952,8 +1934,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+       search_key.objectid = log_key.objectid;
+       search_key.type = BTRFS_INODE_EXTREF_KEY;
+       search_key.offset = key->objectid;
+-      ret = backref_in_log(root->log_root, &search_key, key->objectid, name,
+-                           name_len);
++      ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
+       if (ret < 0) {
+               goto out;
+       } else if (ret) {
+@@ -1964,7 +1945,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+       }
+       btrfs_release_path(path);
+       ret = insert_one_name(trans, root, key->objectid, key->offset,
+-                            name, name_len, &log_key);
++                            &name, &log_key);
+       if (ret && ret != -ENOENT && ret != -EEXIST)
+               goto out;
+       if (!ret)
+@@ -1974,10 +1955,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
+ 
+ out:
+       if (!ret && update_size) {
+-              btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
++              btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2);
+               ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
+       }
+-      kfree(name);
++      kfree(name.name);
+       iput(dir);
+       if (!ret && name_added)
+               ret = 1;
+@@ -2143,8 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_dir_item *di;
+-      int name_len;
+-      char *name;
++      struct qstr name;
+       struct inode *inode = NULL;
+       struct btrfs_key location;
+ 
+@@ -2159,22 +2139,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+-      name_len = btrfs_dir_name_len(eb, di);
+-      name = kmalloc(name_len, GFP_NOFS);
+-      if (!name) {
+-              ret = -ENOMEM;
++      ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
++      if (ret)
+               goto out;
+-      }
+-
+-      read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len);
+ 
+       if (log) {
+               struct btrfs_dir_item *log_di;
+ 
+               log_di = btrfs_lookup_dir_index_item(trans, log, log_path,
+                                                    dir_key->objectid,
+-                                                   dir_key->offset,
+-                                                   name, name_len, 0);
++                                                   dir_key->offset, &name, 0);
+               if (IS_ERR(log_di)) {
+                       ret = PTR_ERR(log_di);
+                       goto out;
+@@ -2200,7 +2174,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+ 
+       inc_nlink(inode);
+       ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode),
+-                                        name, name_len);
++                                        &name);
+       /*
+        * Unlike dir item keys, dir index keys can only have one name (entry) in
+        * them, as there are no key collisions since each key has a unique offset
+@@ -2209,7 +2183,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+ out:
+       btrfs_release_path(path);
+       btrfs_release_path(log_path);
+-      kfree(name);
++      kfree(name.name);
+       iput(inode);
+       return ret;
+ }
+@@ -3443,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *log,
+                            struct btrfs_path *path,
+                            u64 dir_ino,
+-                           const char *name, int name_len,
++                           const struct qstr *name,
+                            u64 index)
+ {
+       struct btrfs_dir_item *di;
+@@ -3453,7 +3427,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
+        * for dir item keys.
+        */
+       di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
+-                                       index, name, name_len, -1);
++                                       index, name, -1);
+       if (IS_ERR(di))
+               return PTR_ERR(di);
+       else if (!di)
+@@ -3490,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
+  */
+ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const char *name, int name_len,
++                                const struct qstr *name,
+                                 struct btrfs_inode *dir, u64 index)
+ {
+       struct btrfs_path *path;
+@@ -3517,7 +3491,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+       }
+ 
+       ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir),
+-                              name, name_len, index);
++                              name, index);
+       btrfs_free_path(path);
+ out_unlock:
+       mutex_unlock(&dir->log_mutex);
+@@ -3529,7 +3503,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+ /* see comments for btrfs_del_dir_entries_in_log */
+ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+-                              const char *name, int name_len,
++                              const struct qstr *name,
+                               struct btrfs_inode *inode, u64 dirid)
+ {
+       struct btrfs_root *log;
+@@ -3550,7 +3524,7 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+       log = root->log_root;
+       mutex_lock(&inode->log_mutex);
+ 
+-      ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
++      ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode),
+                                 dirid, &index);
+       mutex_unlock(&inode->log_mutex);
+       if (ret < 0 && ret != -ENOENT)
+@@ -5293,6 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+               u32 this_len;
+               unsigned long name_ptr;
+               struct btrfs_dir_item *di;
++              struct qstr name_str;
+ 
+               if (key->type == BTRFS_INODE_REF_KEY) {
+                       struct btrfs_inode_ref *iref;
+@@ -5326,8 +5301,11 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+               }
+ 
+               read_extent_buffer(eb, name, name_ptr, this_name_len);
++
++              name_str.name = name;
++              name_str.len = this_name_len;
+               di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
+-                              parent, name, this_name_len, 0);
++                              parent, &name_str, 0);
+               if (di && !IS_ERR(di)) {
+                       struct btrfs_key di_key;
+ 
+@@ -7530,8 +7508,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                */
+               mutex_lock(&old_dir->log_mutex);
+               ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
+-                                      old_dentry->d_name.name,
+-                                      old_dentry->d_name.len, old_dir_index);
++                                      &old_dentry->d_name, old_dir_index);
+               if (ret > 0) {
+                       /*
+                        * The dentry does not exist in the log, so record its
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index bcca74128c3bb..6c0dc79787f05 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
+                         struct btrfs_log_ctx *ctx);
+ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+-                                const char *name, int name_len,
++                                const struct qstr *name,
+                                 struct btrfs_inode *dir, u64 index);
+ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+-                              const char *name, int name_len,
++                              const struct qstr *name,
+                               struct btrfs_inode *inode, u64 dirid);
+ void btrfs_end_log_trans(struct btrfs_root *root);
+ void btrfs_pin_log_trans(struct btrfs_root *root);
+-- 
+2.40.1
+
diff --git a/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch b/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch

new file mode 100644 (file)

index 0000000..6be5a6c
--- /dev/null
+++ b/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch
@@ -0,0 +1,61 @@
+From 562b1f633915512df826243cd52eeb774147186c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 14:12:35 -0400
+Subject: maple_tree: add mas_is_active() to detect in-tree walks
+
+From: Liam R. Howlett <Liam.Howlett@oracle.com>
+
+[ Upstream commit 5c590804b6b0ff933ed4e5cee5d76de3a5048d9f ]
+
+Patch series "maple_tree: Fix mas_prev() state regression".
+
+Pedro Falcato retported an mprotect regression [1] which was bisected back
+to the iterator changes for maple tree.  Root cause analysis showed the
+mas_prev() running off the end of the VMA space (previous from 0) followed
+by mas_find(), would skip the first value.
+
+This patchset introduces maple state underflow/overflow so the sequence of
+calls on the maple state will return what the user expects.
+
+Users who encounter this bug may see mprotect(), userfaultfd_register(),
+and mlock() fail on VMAs mapped with address 0.
+
+This patch (of 2):
+
+Instead of constantly checking each possibility of the maple state,
+create a fast path that will skip over checking unlikely states.
+
+Link: https://lkml.kernel.org/r/20230921181236.509072-1-Liam.Howlett@oracle.com
+Link: https://lkml.kernel.org/r/20230921181236.509072-2-Liam.Howlett@oracle.com
+Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: Pedro Falcato <pedro.falcato@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/maple_tree.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
+index 443dec917ec64..27864178d1918 100644
+--- a/include/linux/maple_tree.h
++++ b/include/linux/maple_tree.h
+@@ -488,6 +488,15 @@ static inline bool mas_is_paused(struct ma_state *mas)
+       return mas->node == MAS_PAUSE;
+ }
+ 
++/* Check if the mas is pointing to a node or not */
++static inline bool mas_is_active(struct ma_state *mas)
++{
++      if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE)
++              return true;
++
++      return false;
++}
++
+ /**
+  * mas_reset() - Reset a Maple Tree operation state.
+  * @mas: Maple Tree operation state.
+-- 
+2.40.1
+
diff --git a/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch b/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch

new file mode 100644 (file)

index 0000000..51387cc
--- /dev/null
+++ b/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch
@@ -0,0 +1,55 @@
+From 2c6b47348d1fd76c15dc3f4068829747a550419a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 May 2023 11:12:47 +0800
+Subject: maple_tree: relocate the declaration of mas_empty_area_rev().
+
+From: Peng Zhang <zhangpeng.00@bytedance.com>
+
+[ Upstream commit 06b27ce36a1a3dc5ea6f8314d0c7d1baa9f8ece7 ]
+
+Relocate the declaration of mas_empty_area_rev() so that mas_empty_area()
+and mas_empty_area_rev() are together.
+
+Link: https://lkml.kernel.org/r/20230524031247.65949-11-zhangpeng.00@bytedance.com
+Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 5c590804b6b0 ("maple_tree: add mas_is_active() to detect in-tree walks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/maple_tree.h | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
+index 1a424edb71a65..443dec917ec64 100644
+--- a/include/linux/maple_tree.h
++++ b/include/linux/maple_tree.h
+@@ -469,6 +469,12 @@ void *mas_next(struct ma_state *mas, unsigned long max);
+ 
+ int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
+                  unsigned long size);
++/*
++ * This finds an empty area from the highest address to the lowest.
++ * AKA "Topdown" version,
++ */
++int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
++                     unsigned long max, unsigned long size);
+ 
+ /* Checks if a mas has not found anything */
+ static inline bool mas_is_none(struct ma_state *mas)
+@@ -482,12 +488,6 @@ static inline bool mas_is_paused(struct ma_state *mas)
+       return mas->node == MAS_PAUSE;
+ }
+ 
+-/*
+- * This finds an empty area from the highest address to the lowest.
+- * AKA "Topdown" version,
+- */
+-int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
+-                     unsigned long max, unsigned long size);
+ /**
+  * mas_reset() - Reset a Maple Tree operation state.
+  * @mas: Maple Tree operation state.
+-- 
+2.40.1
+
diff --git a/queue-6.1/maple_tree-remove-the-redundant-code.patch b/queue-6.1/maple_tree-remove-the-redundant-code.patch

new file mode 100644 (file)

index 0000000..0031b86
--- /dev/null
+++ b/queue-6.1/maple_tree-remove-the-redundant-code.patch
@@ -0,0 +1,48 @@
+From 8115a697c47c8d0d1e35d67364eb7fb1b5fcf922 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 14:00:56 +0800
+Subject: maple_tree: remove the redundant code
+
+From: Vernon Yang <vernon2gm@gmail.com>
+
+[ Upstream commit eabb305293835b191ffe60234587ae8bf5e4e9fd ]
+
+The macros CONFIG_DEBUG_MAPLE_TREE_VERBOSE no one uses, functions
+mas_dup_tree() and mas_dup_store() are not implemented, just function
+declaration, so drop it.
+
+Link: https://lkml.kernel.org/r/20221221060058.609003-6-vernon2gm@gmail.com
+Signed-off-by: Vernon Yang <vernon2gm@gmail.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 5c590804b6b0 ("maple_tree: add mas_is_active() to detect in-tree walks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/maple_tree.h | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
+index e594db58a0f14..1a424edb71a65 100644
+--- a/include/linux/maple_tree.h
++++ b/include/linux/maple_tree.h
+@@ -12,7 +12,6 @@
+ #include <linux/rcupdate.h>
+ #include <linux/spinlock.h>
+ /* #define CONFIG_MAPLE_RCU_DISABLED */
+-/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */
+ 
+ /*
+  * Allocated nodes are mutable until they have been inserted into the tree,
+@@ -483,9 +482,6 @@ static inline bool mas_is_paused(struct ma_state *mas)
+       return mas->node == MAS_PAUSE;
+ }
+ 
+-void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas);
+-void mas_dup_store(struct ma_state *mas, void *entry);
+-
+ /*
+  * This finds an empty area from the highest address to the lowest.
+  * AKA "Topdown" version,
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-memory-add-vm_normal_folio.patch b/queue-6.1/mm-memory-add-vm_normal_folio.patch

new file mode 100644 (file)

index 0000000..64d1388
--- /dev/null
+++ b/queue-6.1/mm-memory-add-vm_normal_folio.patch
@@ -0,0 +1,76 @@
+From 4dd74e82b56a3b09dafbbfc7ff1d61da63c67d27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 10:08:45 -0800
+Subject: mm/memory: add vm_normal_folio()
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit 318e9342fbbb6888d903d86e83865609901a1c65 ]
+
+Patch series "Convert deactivate_page() to folio_deactivate()", v4.
+
+Deactivate_page() has already been converted to use folios.  This patch
+series modifies the callers of deactivate_page() to use folios.  It also
+introduces vm_normal_folio() to assist with folio conversions, and
+converts deactivate_page() to folio_deactivate() which takes in a folio.
+
+This patch (of 4):
+
+Introduce a wrapper function called vm_normal_folio().  This function
+calls vm_normal_page() and returns the folio of the page found, or null if
+no page is found.
+
+This function allows callers to get a folio from a pte, which will
+eventually allow them to completely replace their struct page variables
+with struct folio instead.
+
+Link: https://lkml.kernel.org/r/20221221180848.20774-1-vishal.moola@gmail.com
+Link: https://lkml.kernel.org/r/20221221180848.20774-2-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: SeongJae Park <sj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mm.h |  2 ++
+ mm/memory.c        | 10 ++++++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 104ec00823da8..eefb0948110ae 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1906,6 +1906,8 @@ static inline bool can_do_mlock(void) { return false; }
+ extern int user_shm_lock(size_t, struct ucounts *);
+ extern void user_shm_unlock(size_t, struct ucounts *);
+ 
++struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
++                           pte_t pte);
+ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+                            pte_t pte);
+ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+diff --git a/mm/memory.c b/mm/memory.c
+index 2083078cd0615..0d1b3ee8fcd7a 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -672,6 +672,16 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+       return pfn_to_page(pfn);
+ }
+ 
++struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
++                          pte_t pte)
++{
++      struct page *page = vm_normal_page(vma, addr, pte);
++
++      if (page)
++              return page_folio(page);
++      return NULL;
++}
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+                               pmd_t pmd)
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch b/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch

new file mode 100644 (file)

index 0000000..309e750
--- /dev/null
+++ b/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch
@@ -0,0 +1,112 @@
+From 8af17b9c7c656fa77bc58151f3bdf3a5b7283303 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Jan 2023 12:18:33 -0800
+Subject: mm/mempolicy: convert migrate_page_add() to migrate_folio_add()
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit 4a64981dfee9119aa2c1f243b48f34cbbd67779c ]
+
+Replace migrate_page_add() with migrate_folio_add().  migrate_folio_add()
+does the same a migrate_page_add() but takes in a folio instead of a page.
+This removes a couple of calls to compound_head().
+
+Link: https://lkml.kernel.org/r/20230130201833.27042-7-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 39 ++++++++++++++++++++-------------------
+ 1 file changed, 20 insertions(+), 19 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 2ae6c8f18aba1..158b0bcd12fd7 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -414,7 +414,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
+       },
+ };
+ 
+-static int migrate_page_add(struct page *page, struct list_head *pagelist,
++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags);
+ 
+ struct queue_pages {
+@@ -476,7 +476,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+       /* go to folio migration */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+               if (!vma_migratable(walk->vma) ||
+-                  migrate_page_add(&folio->page, qp->pagelist, flags)) {
++                  migrate_folio_add(folio, qp->pagelist, flags)) {
+                       ret = 1;
+                       goto unlock;
+               }
+@@ -544,7 +544,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+                        * temporary off LRU pages in the range.  Still
+                        * need migrate other LRU pages.
+                        */
+-                      if (migrate_page_add(&folio->page, qp->pagelist, flags))
++                      if (migrate_folio_add(folio, qp->pagelist, flags))
+                               has_unmovable = true;
+               } else
+                       break;
+@@ -1012,27 +1012,28 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
+ }
+ 
+ #ifdef CONFIG_MIGRATION
+-/*
+- * page migration, thp tail pages can be passed.
+- */
+-static int migrate_page_add(struct page *page, struct list_head *pagelist,
++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags)
+ {
+-      struct page *head = compound_head(page);
+       /*
+-       * Avoid migrating a page that is shared with others.
++       * We try to migrate only unshared folios. If it is shared it
++       * is likely not worth migrating.
++       *
++       * To check if the folio is shared, ideally we want to make sure
++       * every page is mapped to the same process. Doing that is very
++       * expensive, so check the estimated mapcount of the folio instead.
+        */
+-      if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) {
+-              if (!isolate_lru_page(head)) {
+-                      list_add_tail(&head->lru, pagelist);
+-                      mod_node_page_state(page_pgdat(head),
+-                              NR_ISOLATED_ANON + page_is_file_lru(head),
+-                              thp_nr_pages(head));
++      if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) {
++              if (!folio_isolate_lru(folio)) {
++                      list_add_tail(&folio->lru, foliolist);
++                      node_stat_mod_folio(folio,
++                              NR_ISOLATED_ANON + folio_is_file_lru(folio),
++                              folio_nr_pages(folio));
+               } else if (flags & MPOL_MF_STRICT) {
+                       /*
+-                       * Non-movable page may reach here.  And, there may be
+-                       * temporary off LRU pages or non-LRU movable pages.
+-                       * Treat them as unmovable pages since they can't be
++                       * Non-movable folio may reach here.  And, there may be
++                       * temporary off LRU folios or non-LRU movable folios.
++                       * Treat them as unmovable folios since they can't be
+                        * isolated, so they can't be moved at the moment.  It
+                        * should return -EIO for this case too.
+                        */
+@@ -1224,7 +1225,7 @@ static struct page *new_page(struct page *page, unsigned long start)
+ }
+ #else
+ 
+-static int migrate_page_add(struct page *page, struct list_head *pagelist,
++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags)
+ {
+       return -EIO;
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch b/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch

new file mode 100644 (file)

index 0000000..a99a38c
--- /dev/null
+++ b/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch
@@ -0,0 +1,96 @@
+From 299e39e2131d8ec58eec46daaa826d503335bada Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Jan 2023 12:18:29 -0800
+Subject: mm/mempolicy: convert queue_pages_pmd() to queue_folios_pmd()
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit de1f5055523e9a035b38533f25a56df03d45034a ]
+
+The function now operates on a folio instead of the page associated with a
+pmd.
+
+This change is in preparation for the conversion of queue_pages_required()
+to queue_folio_required() and migrate_page_add() to migrate_folio_add().
+
+Link: https://lkml.kernel.org/r/20230130201833.27042-3-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Cc: "Yin, Fengwei" <fengwei.yin@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 7d36dd95d1fff..3a291026e1896 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -442,21 +442,21 @@ static inline bool queue_pages_required(struct page *page,
+ }
+ 
+ /*
+- * queue_pages_pmd() has three possible return values:
+- * 0 - pages are placed on the right node or queued successfully, or
++ * queue_folios_pmd() has three possible return values:
++ * 0 - folios are placed on the right node or queued successfully, or
+  *     special page is met, i.e. huge zero page.
+- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
++ * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+  *     specified.
+  * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+- *        existing page was already on a node that does not follow the
++ *        existing folio was already on a node that does not follow the
+  *        policy.
+  */
+-static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
++static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+                               unsigned long end, struct mm_walk *walk)
+       __releases(ptl)
+ {
+       int ret = 0;
+-      struct page *page;
++      struct folio *folio;
+       struct queue_pages *qp = walk->private;
+       unsigned long flags;
+ 
+@@ -464,19 +464,19 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+               ret = -EIO;
+               goto unlock;
+       }
+-      page = pmd_page(*pmd);
+-      if (is_huge_zero_page(page)) {
++      folio = pfn_folio(pmd_pfn(*pmd));
++      if (is_huge_zero_page(&folio->page)) {
+               walk->action = ACTION_CONTINUE;
+               goto unlock;
+       }
+-      if (!queue_pages_required(page, qp))
++      if (!queue_pages_required(&folio->page, qp))
+               goto unlock;
+ 
+       flags = qp->flags;
+-      /* go to thp migration */
++      /* go to folio migration */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+               if (!vma_migratable(walk->vma) ||
+-                  migrate_page_add(page, qp->pagelist, flags)) {
++                  migrate_page_add(&folio->page, qp->pagelist, flags)) {
+                       ret = 1;
+                       goto unlock;
+               }
+@@ -512,7 +512,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+ 
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl)
+-              return queue_pages_pmd(pmd, ptl, addr, end, walk);
++              return queue_folios_pmd(pmd, ptl, addr, end, walk);
+ 
+       if (pmd_trans_unstable(pmd))
+               return 0;
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch b/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch

new file mode 100644 (file)

index 0000000..ddf9ed1
--- /dev/null
+++ b/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch
@@ -0,0 +1,102 @@
+From 35c8cbe38cb84466882c63f46dd0129bdba26456 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Jan 2023 12:18:30 -0800
+Subject: mm/mempolicy: convert queue_pages_pte_range() to
+ queue_folios_pte_range()
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit 3dae02bbd07f40e37bbfec2d77119628db461eaa ]
+
+This function now operates on folios associated with ptes instead of
+pages.
+
+This change is in preparation for the conversion of queue_pages_required()
+to queue_folio_required() and migrate_page_add() to migrate_folio_add().
+
+Link: https://lkml.kernel.org/r/20230130201833.27042-4-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Cc: "Yin, Fengwei" <fengwei.yin@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 3a291026e1896..2ae6c8f18aba1 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -491,19 +491,19 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+  * Scan through pages checking if pages follow certain conditions,
+  * and move them to the pagelist if they do.
+  *
+- * queue_pages_pte_range() has three possible return values:
+- * 0 - pages are placed on the right node or queued successfully, or
++ * queue_folios_pte_range() has three possible return values:
++ * 0 - folios are placed on the right node or queued successfully, or
+  *     special page is met, i.e. zero page.
+- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
++ * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+  *     specified.
+- * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
++ * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
+  *        on a node that does not follow the policy.
+  */
+-static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
++static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
+ {
+       struct vm_area_struct *vma = walk->vma;
+-      struct page *page;
++      struct folio *folio;
+       struct queue_pages *qp = walk->private;
+       unsigned long flags = qp->flags;
+       bool has_unmovable = false;
+@@ -521,16 +521,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               if (!pte_present(*pte))
+                       continue;
+-              page = vm_normal_page(vma, addr, *pte);
+-              if (!page || is_zone_device_page(page))
++              folio = vm_normal_folio(vma, addr, *pte);
++              if (!folio || folio_is_zone_device(folio))
+                       continue;
+               /*
+-               * vm_normal_page() filters out zero pages, but there might
+-               * still be PageReserved pages to skip, perhaps in a VDSO.
++               * vm_normal_folio() filters out zero pages, but there might
++               * still be reserved folios to skip, perhaps in a VDSO.
+                */
+-              if (PageReserved(page))
++              if (folio_test_reserved(folio))
+                       continue;
+-              if (!queue_pages_required(page, qp))
++              if (!queue_pages_required(&folio->page, qp))
+                       continue;
+               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+                       /* MPOL_MF_STRICT must be specified if we get here */
+@@ -544,7 +544,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+                        * temporary off LRU pages in the range.  Still
+                        * need migrate other LRU pages.
+                        */
+-                      if (migrate_page_add(page, qp->pagelist, flags))
++                      if (migrate_page_add(&folio->page, qp->pagelist, flags))
+                               has_unmovable = true;
+               } else
+                       break;
+@@ -705,7 +705,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
+ 
+ static const struct mm_walk_ops queue_pages_walk_ops = {
+       .hugetlb_entry          = queue_pages_hugetlb,
+-      .pmd_entry              = queue_pages_pte_range,
++      .pmd_entry              = queue_folios_pte_range,
+       .test_walk              = queue_pages_test_walk,
+ };
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch b/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch

new file mode 100644 (file)

index 0000000..d3100b1
--- /dev/null
+++ b/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch
@@ -0,0 +1,189 @@
+From 03117032d2162841bbbe9c4bafd56777cab40033 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Sep 2023 15:32:42 -0700
+Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE
+ are specified
+
+From: Yang Shi <yang@os.amperecomputing.com>
+
+[ Upstream commit 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 ]
+
+When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel
+should attempt to migrate all existing pages, and return -EIO if there is
+misplaced or unmovable page.  Then commit 6f4576e3687b ("mempolicy: apply
+page table walker on queue_pages_range()") messed up the return value and
+didn't break VMA scan early ianymore when MPOL_MF_STRICT alone.  The
+return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy:
+make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke
+the VMA walk early if unmovable page is met, it may cause some pages are
+not migrated as expected.
+
+The code should conceptually do:
+
+ if (MPOL_MF_MOVE|MOVEALL)
+     scan all vmas
+     try to migrate the existing pages
+     return success
+ else if (MPOL_MF_MOVE* | MPOL_MF_STRICT)
+     scan all vmas
+     try to migrate the existing pages
+     return -EIO if unmovable or migration failed
+ else /* MPOL_MF_STRICT alone */
+     break early if meets unmovable and don't call mbind_range() at all
+ else /* none of those flags */
+     check the ranges in test_walk, EFAULT without mbind_range() if discontig.
+
+Fixed the behavior.
+
+Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.com
+Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified")
+Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org>   [4.9+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 158b0bcd12fd7..bfe2d1d50fbee 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -424,6 +424,7 @@ struct queue_pages {
+       unsigned long start;
+       unsigned long end;
+       struct vm_area_struct *first;
++      bool has_unmovable;
+ };
+ 
+ /*
+@@ -444,9 +445,8 @@ static inline bool queue_pages_required(struct page *page,
+ /*
+  * queue_folios_pmd() has three possible return values:
+  * 0 - folios are placed on the right node or queued successfully, or
+- *     special page is met, i.e. huge zero page.
+- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+- *     specified.
++ *     special page is met, i.e. zero page, or unmovable page is found
++ *     but continue walking (indicated by queue_pages.has_unmovable).
+  * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+  *        existing folio was already on a node that does not follow the
+  *        policy.
+@@ -477,7 +477,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+               if (!vma_migratable(walk->vma) ||
+                   migrate_folio_add(folio, qp->pagelist, flags)) {
+-                      ret = 1;
++                      qp->has_unmovable = true;
+                       goto unlock;
+               }
+       } else
+@@ -493,9 +493,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+  *
+  * queue_folios_pte_range() has three possible return values:
+  * 0 - folios are placed on the right node or queued successfully, or
+- *     special page is met, i.e. zero page.
+- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+- *     specified.
++ *     special page is met, i.e. zero page, or unmovable page is found
++ *     but continue walking (indicated by queue_pages.has_unmovable).
+  * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
+  *        on a node that does not follow the policy.
+  */
+@@ -506,7 +505,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+       struct folio *folio;
+       struct queue_pages *qp = walk->private;
+       unsigned long flags = qp->flags;
+-      bool has_unmovable = false;
+       pte_t *pte, *mapped_pte;
+       spinlock_t *ptl;
+ 
+@@ -533,11 +531,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+               if (!queue_pages_required(&folio->page, qp))
+                       continue;
+               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+-                      /* MPOL_MF_STRICT must be specified if we get here */
+-                      if (!vma_migratable(vma)) {
+-                              has_unmovable = true;
+-                              break;
+-                      }
++                      /*
++                       * MPOL_MF_STRICT must be specified if we get here.
++                       * Continue walking vmas due to MPOL_MF_MOVE* flags.
++                       */
++                      if (!vma_migratable(vma))
++                              qp->has_unmovable = true;
+ 
+                       /*
+                        * Do not abort immediately since there may be
+@@ -545,16 +544,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+                        * need migrate other LRU pages.
+                        */
+                       if (migrate_folio_add(folio, qp->pagelist, flags))
+-                              has_unmovable = true;
++                              qp->has_unmovable = true;
+               } else
+                       break;
+       }
+       pte_unmap_unlock(mapped_pte, ptl);
+       cond_resched();
+ 
+-      if (has_unmovable)
+-              return 1;
+-
+       return addr != end ? -EIO : 0;
+ }
+ 
+@@ -594,7 +590,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+                * Detecting misplaced page but allow migrating pages which
+                * have been queued.
+                */
+-              ret = 1;
++              qp->has_unmovable = true;
+               goto unlock;
+       }
+ 
+@@ -608,7 +604,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+                        * Failed to isolate page but allow migrating pages
+                        * which have been queued.
+                        */
+-                      ret = 1;
++                      qp->has_unmovable = true;
+       }
+ unlock:
+       spin_unlock(ptl);
+@@ -737,10 +733,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
+               .start = start,
+               .end = end,
+               .first = NULL,
++              .has_unmovable = false,
+       };
+ 
+       err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+ 
++      if (qp.has_unmovable)
++              err = 1;
+       if (!qp.first)
+               /* whole range in hole */
+               err = -EFAULT;
+@@ -1338,7 +1337,7 @@ static long do_mbind(unsigned long start, unsigned long len,
+                               putback_movable_pages(&pagelist);
+               }
+ 
+-              if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
++              if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
+                       err = -EIO;
+       } else {
+ up_out:
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch b/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch

new file mode 100644 (file)

index 0000000..f96425b
--- /dev/null
+++ b/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch
@@ -0,0 +1,55 @@
+From 9d8e793d000a5f5b70e270f999cc52c2dd78b5cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 10:17:13 +0000
+Subject: mm/page_alloc: always remove pages from temporary list
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit c3e58a70425ac6ddaae1529c8146e88b4f7252bb ]
+
+Patch series "Leave IRQs enabled for per-cpu page allocations", v3.
+
+This patch (of 2):
+
+free_unref_page_list() has neglected to remove pages properly from the
+list of pages to free since forever.  It works by coincidence because
+list_add happened to do the right thing adding the pages to just the PCP
+lists.  However, a later patch added pages to either the PCP list or the
+zone list but only properly deleted the page from the list in one path
+leading to list corruption and a subsequent failure.  As a preparation
+patch, always delete the pages from one list properly before adding to
+another.  On its own, this fixes nothing although it adds a fractional
+amount of overhead but is critical to the next patch.
+
+Link: https://lkml.kernel.org/r/20221118101714.19590-1-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20221118101714.19590-2-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Reported-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Yu Zhao <yuzhao@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 7b086755fb8c ("mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 69668817fed37..d94ac6d87bc97 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3547,6 +3547,8 @@ void free_unref_page_list(struct list_head *list)
+       list_for_each_entry_safe(page, next, list, lru) {
+               struct zone *zone = page_zone(page);
+ 
++              list_del(&page->lru);
++
+               /* Different zone, different pcp lock. */
+               if (zone != locked_zone) {
+                       if (pcp)
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch b/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch

new file mode 100644 (file)

index 0000000..c14aec6
--- /dev/null
+++ b/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch
@@ -0,0 +1,94 @@
+From 65647c5732d410e694b93da2cffed9f76a5e599b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Sep 2023 14:11:08 -0400
+Subject: mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy
+ list
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+[ Upstream commit 7b086755fb8cdbb6b3e45a1bbddc00e7f9b1dc03 ]
+
+Commit 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock")
+bypasses the pcplist on lock contention and returns the page directly to
+the buddy list of the page's migratetype.
+
+For pages that don't have their own pcplist, such as CMA and HIGHATOMIC,
+the migratetype is temporarily updated such that the page can hitch a ride
+on the MOVABLE pcplist.  Their true type is later reassessed when flushing
+in free_pcppages_bulk().  However, when lock contention is detected after
+the type was already overridden, the bypass will then put the page on the
+wrong buddy list.
+
+Once on the MOVABLE buddy list, the page becomes eligible for fallbacks
+and even stealing.  In the case of HIGHATOMIC, otherwise ineligible
+allocations can dip into the highatomic reserves.  In the case of CMA, the
+page can be lost from the CMA region permanently.
+
+Use a separate pcpmigratetype variable for the pcplist override.  Use the
+original migratetype when going directly to the buddy.  This fixes the bug
+and should make the intentions more obvious in the code.
+
+Originally sent here to address the HIGHATOMIC case:
+https://lore.kernel.org/lkml/20230821183733.106619-4-hannes@cmpxchg.org/
+
+Changelog updated in response to the CMA-specific bug report.
+
+[mgorman@techsingularity.net: updated changelog]
+Link: https://lkml.kernel.org/r/20230911181108.GA104295@cmpxchg.org
+Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Joe Liu <joe.liu@mediatek.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 90082f75660f2..ca017c6008b7c 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3448,7 +3448,7 @@ void free_unref_page(struct page *page, unsigned int order)
+       struct per_cpu_pages *pcp;
+       struct zone *zone;
+       unsigned long pfn = page_to_pfn(page);
+-      int migratetype;
++      int migratetype, pcpmigratetype;
+ 
+       if (!free_unref_page_prepare(page, pfn, order))
+               return;
+@@ -3456,24 +3456,24 @@ void free_unref_page(struct page *page, unsigned int order)
+       /*
+        * We only track unmovable, reclaimable and movable on pcp lists.
+        * Place ISOLATE pages on the isolated list because they are being
+-       * offlined but treat HIGHATOMIC as movable pages so we can get those
+-       * areas back if necessary. Otherwise, we may have to free
++       * offlined but treat HIGHATOMIC and CMA as movable pages so we can
++       * get those areas back if necessary. Otherwise, we may have to free
+        * excessively into the page allocator
+        */
+-      migratetype = get_pcppage_migratetype(page);
++      migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+       if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+               if (unlikely(is_migrate_isolate(migratetype))) {
+                       free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+                       return;
+               }
+-              migratetype = MIGRATE_MOVABLE;
++              pcpmigratetype = MIGRATE_MOVABLE;
+       }
+ 
+       zone = page_zone(page);
+       pcp_trylock_prepare(UP_flags);
+       pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+       if (pcp) {
+-              free_unref_page_commit(zone, pcp, page, migratetype, order);
++              free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+               pcp_spin_unlock(pcp);
+       } else {
+               free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+-- 
+2.40.1
+
diff --git a/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch b/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch

new file mode 100644 (file)

index 0000000..5edf3ea
--- /dev/null
+++ b/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch
@@ -0,0 +1,354 @@
+From a7473a18b6b2c4fd29dbf3399db70ba026294e3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 10:17:14 +0000
+Subject: mm/page_alloc: leave IRQs enabled for per-cpu page allocations
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 5749077415994eb02d660b2559b9d8278521e73d ]
+
+The pcp_spin_lock_irqsave protecting the PCP lists is IRQ-safe as a task
+allocating from the PCP must not re-enter the allocator from IRQ context.
+In each instance where IRQ-reentrancy is possible, the lock is acquired
+using pcp_spin_trylock_irqsave() even though IRQs are disabled and
+re-entrancy is impossible.
+
+Demote the lock to pcp_spin_lock avoids an IRQ disable/enable in the
+common case at the cost of some IRQ allocations taking a slower path.  If
+the PCP lists need to be refilled, the zone lock still needs to disable
+IRQs but that will only happen on PCP refill and drain.  If an IRQ is
+raised when a PCP allocation is in progress, the trylock will fail and
+fallback to using the buddy lists directly.  Note that this may not be a
+universal win if an interrupt-intensive workload also allocates heavily
+from interrupt context and contends heavily on the zone->lock as a result.
+
+[mgorman@techsingularity.net: migratetype might be wrong if a PCP was locked]
+  Link: https://lkml.kernel.org/r/20221122131229.5263-2-mgorman@techsingularity.net
+[yuzhao@google.com: reported lockdep issue on IO completion from softirq]
+[hughd@google.com: fix list corruption, lock improvements, micro-optimsations]
+Link: https://lkml.kernel.org/r/20221118101714.19590-3-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 7b086755fb8c ("mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 124 +++++++++++++++++++++---------------------------
+ 1 file changed, 54 insertions(+), 70 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index d94ac6d87bc97..90082f75660f2 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
+       _ret;                                                           \
+ })
+ 
+-#define pcpu_spin_lock_irqsave(type, member, ptr, flags)              \
++#define pcpu_spin_trylock(type, member, ptr)                          \
+ ({                                                                    \
+       type *_ret;                                                     \
+       pcpu_task_pin();                                                \
+       _ret = this_cpu_ptr(ptr);                                       \
+-      spin_lock_irqsave(&_ret->member, flags);                        \
+-      _ret;                                                           \
+-})
+-
+-#define pcpu_spin_trylock_irqsave(type, member, ptr, flags)           \
+-({                                                                    \
+-      type *_ret;                                                     \
+-      pcpu_task_pin();                                                \
+-      _ret = this_cpu_ptr(ptr);                                       \
+-      if (!spin_trylock_irqsave(&_ret->member, flags)) {              \
++      if (!spin_trylock(&_ret->member)) {                             \
+               pcpu_task_unpin();                                      \
+               _ret = NULL;                                            \
+       }                                                               \
+@@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
+       pcpu_task_unpin();                                              \
+ })
+ 
+-#define pcpu_spin_unlock_irqrestore(member, ptr, flags)                       \
+-({                                                                    \
+-      spin_unlock_irqrestore(&ptr->member, flags);                    \
+-      pcpu_task_unpin();                                              \
+-})
+-
+ /* struct per_cpu_pages specific helpers. */
+ #define pcp_spin_lock(ptr)                                            \
+       pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
+ 
+-#define pcp_spin_lock_irqsave(ptr, flags)                             \
+-      pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags)
+-
+-#define pcp_spin_trylock_irqsave(ptr, flags)                          \
+-      pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags)
++#define pcp_spin_trylock(ptr)                                         \
++      pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
+ 
+ #define pcp_spin_unlock(ptr)                                          \
+       pcpu_spin_unlock(lock, ptr)
+ 
+-#define pcp_spin_unlock_irqrestore(ptr, flags)                                \
+-      pcpu_spin_unlock_irqrestore(lock, ptr, flags)
+ #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
+ DEFINE_PER_CPU(int, numa_node);
+ EXPORT_PER_CPU_SYMBOL(numa_node);
+@@ -1548,6 +1528,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
+                                       struct per_cpu_pages *pcp,
+                                       int pindex)
+ {
++      unsigned long flags;
+       int min_pindex = 0;
+       int max_pindex = NR_PCP_LISTS - 1;
+       unsigned int order;
+@@ -1563,8 +1544,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
+       /* Ensure requested pindex is drained first. */
+       pindex = pindex - 1;
+ 
+-      /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
+-      spin_lock(&zone->lock);
++      spin_lock_irqsave(&zone->lock, flags);
+       isolated_pageblocks = has_isolate_pageblock(zone);
+ 
+       while (count > 0) {
+@@ -1612,7 +1592,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
+               } while (count > 0 && !list_empty(list));
+       }
+ 
+-      spin_unlock(&zone->lock);
++      spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ 
+ static void free_one_page(struct zone *zone,
+@@ -3126,10 +3106,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
+                       unsigned long count, struct list_head *list,
+                       int migratetype, unsigned int alloc_flags)
+ {
++      unsigned long flags;
+       int i, allocated = 0;
+ 
+-      /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
+-      spin_lock(&zone->lock);
++      spin_lock_irqsave(&zone->lock, flags);
+       for (i = 0; i < count; ++i) {
+               struct page *page = __rmqueue(zone, order, migratetype,
+                                                               alloc_flags);
+@@ -3163,7 +3143,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
+        * pages added to the pcp list.
+        */
+       __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+-      spin_unlock(&zone->lock);
++      spin_unlock_irqrestore(&zone->lock, flags);
+       return allocated;
+ }
+ 
+@@ -3180,16 +3160,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
+       batch = READ_ONCE(pcp->batch);
+       to_drain = min(pcp->count, batch);
+       if (to_drain > 0) {
+-              unsigned long flags;
+-
+-              /*
+-               * free_pcppages_bulk expects IRQs disabled for zone->lock
+-               * so even though pcp->lock is not intended to be IRQ-safe,
+-               * it's needed in this context.
+-               */
+-              spin_lock_irqsave(&pcp->lock, flags);
++              spin_lock(&pcp->lock);
+               free_pcppages_bulk(zone, to_drain, pcp, 0);
+-              spin_unlock_irqrestore(&pcp->lock, flags);
++              spin_unlock(&pcp->lock);
+       }
+ }
+ #endif
+@@ -3203,12 +3176,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
+ 
+       pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+       if (pcp->count) {
+-              unsigned long flags;
+-
+-              /* See drain_zone_pages on why this is disabling IRQs */
+-              spin_lock_irqsave(&pcp->lock, flags);
++              spin_lock(&pcp->lock);
+               free_pcppages_bulk(zone, pcp->count, pcp, 0);
+-              spin_unlock_irqrestore(&pcp->lock, flags);
++              spin_unlock(&pcp->lock);
+       }
+ }
+ 
+@@ -3474,7 +3444,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
+  */
+ void free_unref_page(struct page *page, unsigned int order)
+ {
+-      unsigned long flags;
+       unsigned long __maybe_unused UP_flags;
+       struct per_cpu_pages *pcp;
+       struct zone *zone;
+@@ -3502,10 +3471,10 @@ void free_unref_page(struct page *page, unsigned int order)
+ 
+       zone = page_zone(page);
+       pcp_trylock_prepare(UP_flags);
+-      pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
++      pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+       if (pcp) {
+               free_unref_page_commit(zone, pcp, page, migratetype, order);
+-              pcp_spin_unlock_irqrestore(pcp, flags);
++              pcp_spin_unlock(pcp);
+       } else {
+               free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+       }
+@@ -3517,10 +3486,10 @@ void free_unref_page(struct page *page, unsigned int order)
+  */
+ void free_unref_page_list(struct list_head *list)
+ {
++      unsigned long __maybe_unused UP_flags;
+       struct page *page, *next;
+       struct per_cpu_pages *pcp = NULL;
+       struct zone *locked_zone = NULL;
+-      unsigned long flags;
+       int batch_count = 0;
+       int migratetype;
+ 
+@@ -3548,21 +3517,36 @@ void free_unref_page_list(struct list_head *list)
+               struct zone *zone = page_zone(page);
+ 
+               list_del(&page->lru);
++              migratetype = get_pcppage_migratetype(page);
+ 
+               /* Different zone, different pcp lock. */
+               if (zone != locked_zone) {
+-                      if (pcp)
+-                              pcp_spin_unlock_irqrestore(pcp, flags);
++                      if (pcp) {
++                              pcp_spin_unlock(pcp);
++                              pcp_trylock_finish(UP_flags);
++                      }
+ 
++                      /*
++                       * trylock is necessary as pages may be getting freed
++                       * from IRQ or SoftIRQ context after an IO completion.
++                       */
++                      pcp_trylock_prepare(UP_flags);
++                      pcp = pcp_spin_trylock(zone->per_cpu_pageset);
++                      if (unlikely(!pcp)) {
++                              pcp_trylock_finish(UP_flags);
++                              free_one_page(zone, page, page_to_pfn(page),
++                                            0, migratetype, FPI_NONE);
++                              locked_zone = NULL;
++                              continue;
++                      }
+                       locked_zone = zone;
+-                      pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
++                      batch_count = 0;
+               }
+ 
+               /*
+                * Non-isolated types over MIGRATE_PCPTYPES get added
+                * to the MIGRATE_MOVABLE pcp list.
+                */
+-              migratetype = get_pcppage_migratetype(page);
+               if (unlikely(migratetype >= MIGRATE_PCPTYPES))
+                       migratetype = MIGRATE_MOVABLE;
+ 
+@@ -3570,18 +3554,23 @@ void free_unref_page_list(struct list_head *list)
+               free_unref_page_commit(zone, pcp, page, migratetype, 0);
+ 
+               /*
+-               * Guard against excessive IRQ disabled times when we get
+-               * a large list of pages to free.
++               * Guard against excessive lock hold times when freeing
++               * a large list of pages. Lock will be reacquired if
++               * necessary on the next iteration.
+                */
+               if (++batch_count == SWAP_CLUSTER_MAX) {
+-                      pcp_spin_unlock_irqrestore(pcp, flags);
++                      pcp_spin_unlock(pcp);
++                      pcp_trylock_finish(UP_flags);
+                       batch_count = 0;
+-                      pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
++                      pcp = NULL;
++                      locked_zone = NULL;
+               }
+       }
+ 
+-      if (pcp)
+-              pcp_spin_unlock_irqrestore(pcp, flags);
++      if (pcp) {
++              pcp_spin_unlock(pcp);
++              pcp_trylock_finish(UP_flags);
++      }
+ }
+ 
+ /*
+@@ -3782,15 +3771,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+       struct per_cpu_pages *pcp;
+       struct list_head *list;
+       struct page *page;
+-      unsigned long flags;
+       unsigned long __maybe_unused UP_flags;
+ 
+-      /*
+-       * spin_trylock may fail due to a parallel drain. In the future, the
+-       * trylock will also protect against IRQ reentrancy.
+-       */
++      /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
+       pcp_trylock_prepare(UP_flags);
+-      pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
++      pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+       if (!pcp) {
+               pcp_trylock_finish(UP_flags);
+               return NULL;
+@@ -3804,7 +3789,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+       pcp->free_factor >>= 1;
+       list = &pcp->lists[order_to_pindex(migratetype, order)];
+       page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
+-      pcp_spin_unlock_irqrestore(pcp, flags);
++      pcp_spin_unlock(pcp);
+       pcp_trylock_finish(UP_flags);
+       if (page) {
+               __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+@@ -5375,7 +5360,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+                       struct page **page_array)
+ {
+       struct page *page;
+-      unsigned long flags;
+       unsigned long __maybe_unused UP_flags;
+       struct zone *zone;
+       struct zoneref *z;
+@@ -5457,9 +5441,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+       if (unlikely(!zone))
+               goto failed;
+ 
+-      /* Is a parallel drain in progress? */
++      /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
+       pcp_trylock_prepare(UP_flags);
+-      pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
++      pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+       if (!pcp)
+               goto failed_irq;
+ 
+@@ -5478,7 +5462,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+               if (unlikely(!page)) {
+                       /* Try and allocate at least one page */
+                       if (!nr_account) {
+-                              pcp_spin_unlock_irqrestore(pcp, flags);
++                              pcp_spin_unlock(pcp);
+                               goto failed_irq;
+                       }
+                       break;
+@@ -5493,7 +5477,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+               nr_populated++;
+       }
+ 
+-      pcp_spin_unlock_irqrestore(pcp, flags);
++      pcp_spin_unlock(pcp);
+       pcp_trylock_finish(UP_flags);
+ 
+       __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
+-- 
+2.40.1
+
diff --git a/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch b/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch

new file mode 100644 (file)

index 0000000..814f661
--- /dev/null
+++ b/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch
@@ -0,0 +1,93 @@
+From b8e7f84416f19aecd3af15f048fee98e03e1aacb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Mar 2023 20:57:45 +0000
+Subject: mptcp: annotate lockless accesses to sk->sk_err
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9ae8e5ad99b8ebcd3d3dd46075f3825e6f08f063 ]
+
+mptcp_poll() reads sk->sk_err without socket lock held/owned.
+
+Add READ_ONCE() and WRITE_ONCE() to avoid load/store tearing.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: d5fbeff1ab81 ("mptcp: move __mptcp_error_report in protocol.c")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 2 +-
+ net/mptcp/protocol.c   | 8 ++++----
+ net/mptcp/subflow.c    | 4 ++--
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 9127a7fd5269c..5d845fcf3d09e 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -2047,7 +2047,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
+           nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
+               return -EMSGSIZE;
+ 
+-      sk_err = ssk->sk_err;
++      sk_err = READ_ONCE(ssk->sk_err);
+       if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
+           nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
+               return -EMSGSIZE;
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 60e65f6325c3c..84f107854eac9 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2517,15 +2517,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
+       /* Mirror the tcp_reset() error propagation */
+       switch (sk->sk_state) {
+       case TCP_SYN_SENT:
+-              sk->sk_err = ECONNREFUSED;
++              WRITE_ONCE(sk->sk_err, ECONNREFUSED);
+               break;
+       case TCP_CLOSE_WAIT:
+-              sk->sk_err = EPIPE;
++              WRITE_ONCE(sk->sk_err, EPIPE);
+               break;
+       case TCP_CLOSE:
+               return;
+       default:
+-              sk->sk_err = ECONNRESET;
++              WRITE_ONCE(sk->sk_err, ECONNRESET);
+       }
+ 
+       inet_sk_state_store(sk, TCP_CLOSE);
+@@ -3893,7 +3893,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
+ 
+       /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
+       smp_rmb();
+-      if (sk->sk_err)
++      if (READ_ONCE(sk->sk_err))
+               mask |= EPOLLERR;
+ 
+       return mask;
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 168dced2434b3..032661c8273f2 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1248,7 +1248,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
+                       subflow->reset_reason = MPTCP_RST_EMPTCP;
+ 
+ reset:
+-                      ssk->sk_err = EBADMSG;
++                      WRITE_ONCE(ssk->sk_err, EBADMSG);
+                       tcp_set_state(ssk, TCP_CLOSE);
+                       while ((skb = skb_peek(&ssk->sk_receive_queue)))
+                               sk_eat_skb(ssk, skb);
+@@ -1332,7 +1332,7 @@ void __mptcp_error_report(struct sock *sk)
+               ssk_state = inet_sk_state_load(ssk);
+               if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+                       inet_sk_state_store(sk, ssk_state);
+-              sk->sk_err = -err;
++              WRITE_ONCE(sk->sk_err, -err);
+ 
+               /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+               smp_wmb();
+-- 
+2.40.1
+
diff --git a/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch b/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch

new file mode 100644 (file)

index 0000000..383b476
--- /dev/null
+++ b/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch
@@ -0,0 +1,269 @@
+From 508f8535214e8e82ecb63fcf8f6a69c18230a889 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Sep 2023 12:52:49 +0200
+Subject: mptcp: fix dangling connection hang-up
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 27e5ccc2d5a50ed61bb73153edb1066104b108b3 ]
+
+According to RFC 8684 section 3.3:
+
+  A connection is not closed unless [...] or an implementation-specific
+  connection-level send timeout.
+
+Currently the MPTCP protocol does not implement such timeout, and
+connection timing-out at the TCP-level never move to close state.
+
+Introduces a catch-up condition at subflow close time to move the
+MPTCP socket to close, too.
+
+That additionally allows removing similar existing inside the worker.
+
+Finally, allow some additional timeout for plain ESTABLISHED mptcp
+sockets, as the protocol allows creating new subflows even at that
+point and making the connection functional again.
+
+This issue is actually present since the beginning, but it is basically
+impossible to solve without a long chain of functional pre-requisites
+topped by commit bbd49d114d57 ("mptcp: consolidate transition to
+TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current
+patch, please also backport this other commit as well.
+
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
+Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 90 ++++++++++++++++++++++----------------------
+ net/mptcp/protocol.h | 22 +++++++++++
+ net/mptcp/subflow.c  |  1 +
+ 3 files changed, 67 insertions(+), 46 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 93b60b049be27..60e65f6325c3c 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -846,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
+ 
+       mptcp_sockopt_sync_locked(msk, ssk);
+       mptcp_subflow_joined(msk, ssk);
++      mptcp_stop_tout_timer(sk);
+       return true;
+ }
+ 
+@@ -2349,18 +2350,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+       bool dispose_it, need_push = false;
+ 
+       /* If the first subflow moved to a close state before accept, e.g. due
+-       * to an incoming reset, mptcp either:
+-       * - if either the subflow or the msk are dead, destroy the context
+-       *   (the subflow socket is deleted by inet_child_forget) and the msk
+-       * - otherwise do nothing at the moment and take action at accept and/or
+-       *   listener shutdown - user-space must be able to accept() the closed
+-       *   socket.
++       * to an incoming reset or listener shutdown, the subflow socket is
++       * already deleted by inet_child_forget() and the mptcp socket can't
++       * survive too.
+        */
+-      if (msk->in_accept_queue && msk->first == ssk) {
+-              if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
+-                      return;
+-
++      if (msk->in_accept_queue && msk->first == ssk &&
++          (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
+               /* ensure later check in mptcp_worker() will dispose the msk */
++              mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
+               sock_set_flag(sk, SOCK_DEAD);
+               lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+               mptcp_subflow_drop_ctx(ssk);
+@@ -2426,6 +2423,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ 
+       if (need_push)
+               __mptcp_push_pending(sk, 0);
++
++      /* Catch every 'all subflows closed' scenario, including peers silently
++       * closing them, e.g. due to timeout.
++       * For established sockets, allow an additional timeout before closing,
++       * as the protocol can still create more subflows.
++       */
++      if (list_is_singular(&msk->conn_list) && msk->first &&
++          inet_sk_state_load(msk->first) == TCP_CLOSE) {
++              if (sk->sk_state != TCP_ESTABLISHED ||
++                  msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
++                      inet_sk_state_store(sk, TCP_CLOSE);
++                      mptcp_close_wake_up(sk);
++              } else {
++                      mptcp_start_tout_timer(sk);
++              }
++      }
+ }
+ 
+ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+@@ -2469,23 +2482,14 @@ static void __mptcp_close_subflow(struct sock *sk)
+ 
+ }
+ 
+-static bool mptcp_should_close(const struct sock *sk)
++static bool mptcp_close_tout_expired(const struct sock *sk)
+ {
+-      s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
+-      struct mptcp_subflow_context *subflow;
+-
+-      if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
+-              return true;
++      if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
++          sk->sk_state == TCP_CLOSE)
++              return false;
+ 
+-      /* if all subflows are in closed status don't bother with additional
+-       * timeout
+-       */
+-      mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+-              if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
+-                  TCP_CLOSE)
+-                      return false;
+-      }
+-      return true;
++      return time_after32(tcp_jiffies32,
++                inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
+ }
+ 
+ static void mptcp_check_fastclose(struct mptcp_sock *msk)
+@@ -2609,15 +2613,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
+       struct sock *sk = (struct sock *)msk;
+       unsigned long timeout, close_timeout;
+ 
+-      if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
++      if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
+               return;
+ 
+-      close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
++      close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
++                      TCP_TIMEWAIT_LEN;
+ 
+       /* the close timeout takes precedence on the fail one, and here at least one of
+        * them is active
+        */
+-      timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
++      timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
+ 
+       sk_reset_timer(sk, &sk->sk_timer, timeout);
+ }
+@@ -2636,8 +2641,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
+       mptcp_subflow_reset(ssk);
+       WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
+       unlock_sock_fast(ssk, slow);
+-
+-      mptcp_reset_tout_timer(msk, 0);
+ }
+ 
+ static void mptcp_do_fastclose(struct sock *sk)
+@@ -2676,19 +2679,15 @@ static void mptcp_worker(struct work_struct *work)
+       if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+               __mptcp_close_subflow(sk);
+ 
+-      /* There is no point in keeping around an orphaned sk timedout or
+-       * closed, but we need the msk around to reply to incoming DATA_FIN,
+-       * even if it is orphaned and in FIN_WAIT2 state
+-       */
+-      if (sock_flag(sk, SOCK_DEAD)) {
+-              if (mptcp_should_close(sk)) {
+-                      inet_sk_state_store(sk, TCP_CLOSE);
+-                      mptcp_do_fastclose(sk);
+-              }
+-              if (sk->sk_state == TCP_CLOSE) {
+-                      __mptcp_destroy_sock(sk);
+-                      goto unlock;
+-              }
++      if (mptcp_close_tout_expired(sk)) {
++              inet_sk_state_store(sk, TCP_CLOSE);
++              mptcp_do_fastclose(sk);
++              mptcp_close_wake_up(sk);
++      }
++
++      if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
++              __mptcp_destroy_sock(sk);
++              goto unlock;
+       }
+ 
+       if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
+@@ -2984,7 +2983,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
+ 
+ cleanup:
+       /* orphan all the subflows */
+-      inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               bool slow = lock_sock_fast_nested(ssk);
+@@ -3021,7 +3019,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+               __mptcp_destroy_sock(sk);
+               do_cancel_work = true;
+       } else {
+-              mptcp_reset_tout_timer(msk, 0);
++              mptcp_start_tout_timer(sk);
+       }
+ 
+       return do_cancel_work;
+@@ -3085,7 +3083,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+       inet_sk_state_store(sk, TCP_CLOSE);
+ 
+       mptcp_stop_rtx_timer(sk);
+-      sk_stop_timer(sk, &sk->sk_timer);
++      mptcp_stop_tout_timer(sk);
+ 
+       if (mptcp_sk(sk)->token)
+               mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index b73160c5e2cf8..91d89a0aeb586 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -682,6 +682,28 @@ void mptcp_get_options(const struct sk_buff *skb,
+ void mptcp_finish_connect(struct sock *sk);
+ void __mptcp_set_connected(struct sock *sk);
+ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
++
++static inline void mptcp_stop_tout_timer(struct sock *sk)
++{
++      if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
++              return;
++
++      sk_stop_timer(sk, &sk->sk_timer);
++      inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
++}
++
++static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
++{
++      /* avoid 0 timestamp, as that means no close timeout */
++      inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
++}
++
++static inline void mptcp_start_tout_timer(struct sock *sk)
++{
++      mptcp_set_close_tout(sk, tcp_jiffies32);
++      mptcp_reset_tout_timer(mptcp_sk(sk), 0);
++}
++
+ static inline bool mptcp_is_fully_established(struct sock *sk)
+ {
+       return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 6c8148c6e7710..168dced2434b3 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1527,6 +1527,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+       mptcp_sock_graft(ssk, sk->sk_socket);
+       iput(SOCK_INODE(sf));
+       WRITE_ONCE(msk->allow_infinite_fallback, false);
++      mptcp_stop_tout_timer(sk);
+       return 0;
+ 
+ failed_unlink:
+-- 
+2.40.1
+
diff --git a/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch b/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch

new file mode 100644 (file)

index 0000000..a81cc46
--- /dev/null
+++ b/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch
@@ -0,0 +1,122 @@
+From 9b949b2bfa5363c42d1b085bfad979a21c50d114 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Sep 2023 12:52:46 +0200
+Subject: mptcp: move __mptcp_error_report in protocol.c
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit d5fbeff1ab812b6c473b6924bee8748469462e2c ]
+
+This will simplify the next patch ("mptcp: process pending subflow error
+on close").
+
+No functional change intended.
+
+Cc: stable@vger.kernel.org # v5.12+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++
+ net/mptcp/subflow.c  | 36 ------------------------------------
+ 2 files changed, 36 insertions(+), 36 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 84f107854eac9..193f2bdc8fe1b 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -765,6 +765,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
+       return moved;
+ }
+ 
++void __mptcp_error_report(struct sock *sk)
++{
++      struct mptcp_subflow_context *subflow;
++      struct mptcp_sock *msk = mptcp_sk(sk);
++
++      mptcp_for_each_subflow(msk, subflow) {
++              struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
++              int err = sock_error(ssk);
++              int ssk_state;
++
++              if (!err)
++                      continue;
++
++              /* only propagate errors on fallen-back sockets or
++               * on MPC connect
++               */
++              if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
++                      continue;
++
++              /* We need to propagate only transition to CLOSE state.
++               * Orphaned socket will see such state change via
++               * subflow_sched_work_if_closed() and that path will properly
++               * destroy the msk as needed.
++               */
++              ssk_state = inet_sk_state_load(ssk);
++              if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
++                      inet_sk_state_store(sk, ssk_state);
++              WRITE_ONCE(sk->sk_err, -err);
++
++              /* This barrier is coupled with smp_rmb() in mptcp_poll() */
++              smp_wmb();
++              sk_error_report(sk);
++              break;
++      }
++}
++
+ /* In most cases we will be able to lock the mptcp socket.  If its already
+  * owned, we need to defer to the work queue to avoid ABBA deadlock.
+  */
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 032661c8273f2..b93b08a75017b 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1305,42 +1305,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
+       *full_space = tcp_full_space(sk);
+ }
+ 
+-void __mptcp_error_report(struct sock *sk)
+-{
+-      struct mptcp_subflow_context *subflow;
+-      struct mptcp_sock *msk = mptcp_sk(sk);
+-
+-      mptcp_for_each_subflow(msk, subflow) {
+-              struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+-              int err = sock_error(ssk);
+-              int ssk_state;
+-
+-              if (!err)
+-                      continue;
+-
+-              /* only propagate errors on fallen-back sockets or
+-               * on MPC connect
+-               */
+-              if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+-                      continue;
+-
+-              /* We need to propagate only transition to CLOSE state.
+-               * Orphaned socket will see such state change via
+-               * subflow_sched_work_if_closed() and that path will properly
+-               * destroy the msk as needed.
+-               */
+-              ssk_state = inet_sk_state_load(ssk);
+-              if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+-                      inet_sk_state_store(sk, ssk_state);
+-              WRITE_ONCE(sk->sk_err, -err);
+-
+-              /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+-              smp_wmb();
+-              sk_error_report(sk);
+-              break;
+-      }
+-}
+-
+ static void subflow_error_report(struct sock *ssk)
+ {
+       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+-- 
+2.40.1
+
diff --git a/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch b/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch

new file mode 100644 (file)

index 0000000..064572d
--- /dev/null
+++ b/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch
@@ -0,0 +1,118 @@
+From 25532afde428aef9d85923b345150355c5ff2c0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Sep 2023 12:52:47 +0200
+Subject: mptcp: process pending subflow error on close
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 9f1a98813b4b686482e5ef3c9d998581cace0ba6 ]
+
+On incoming TCP reset, subflow closing could happen before error
+propagation. That in turn could cause the socket error being ignored,
+and a missing socket state transition, as reported by Daire-Byrne.
+
+Address the issues explicitly checking for subflow socket error at
+close time. To avoid code duplication, factor-out of __mptcp_error_report()
+a new helper implementing the relevant bits.
+
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429
+Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 63 ++++++++++++++++++++++++--------------------
+ 1 file changed, 34 insertions(+), 29 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 193f2bdc8fe1b..b6e0579e72644 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -765,40 +765,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
+       return moved;
+ }
+ 
+-void __mptcp_error_report(struct sock *sk)
++static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+ {
+-      struct mptcp_subflow_context *subflow;
+-      struct mptcp_sock *msk = mptcp_sk(sk);
++      int err = sock_error(ssk);
++      int ssk_state;
+ 
+-      mptcp_for_each_subflow(msk, subflow) {
+-              struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+-              int err = sock_error(ssk);
+-              int ssk_state;
++      if (!err)
++              return false;
+ 
+-              if (!err)
+-                      continue;
++      /* only propagate errors on fallen-back sockets or
++       * on MPC connect
++       */
++      if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
++              return false;
+ 
+-              /* only propagate errors on fallen-back sockets or
+-               * on MPC connect
+-               */
+-              if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+-                      continue;
++      /* We need to propagate only transition to CLOSE state.
++       * Orphaned socket will see such state change via
++       * subflow_sched_work_if_closed() and that path will properly
++       * destroy the msk as needed.
++       */
++      ssk_state = inet_sk_state_load(ssk);
++      if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
++              inet_sk_state_store(sk, ssk_state);
++      WRITE_ONCE(sk->sk_err, -err);
+ 
+-              /* We need to propagate only transition to CLOSE state.
+-               * Orphaned socket will see such state change via
+-               * subflow_sched_work_if_closed() and that path will properly
+-               * destroy the msk as needed.
+-               */
+-              ssk_state = inet_sk_state_load(ssk);
+-              if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+-                      inet_sk_state_store(sk, ssk_state);
+-              WRITE_ONCE(sk->sk_err, -err);
+-
+-              /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+-              smp_wmb();
+-              sk_error_report(sk);
+-              break;
+-      }
++      /* This barrier is coupled with smp_rmb() in mptcp_poll() */
++      smp_wmb();
++      sk_error_report(sk);
++      return true;
++}
++
++void __mptcp_error_report(struct sock *sk)
++{
++      struct mptcp_subflow_context *subflow;
++      struct mptcp_sock *msk = mptcp_sk(sk);
++
++      mptcp_for_each_subflow(msk, subflow)
++              if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
++                      break;
+ }
+ 
+ /* In most cases we will be able to lock the mptcp socket.  If its already
+@@ -2446,6 +2450,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+       }
+ 
+ out_release:
++      __mptcp_subflow_error_report(sk, ssk);
+       release_sock(ssk);
+ 
+       sock_put(ssk);
+-- 
+2.40.1
+
diff --git a/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch b/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch

new file mode 100644 (file)

index 0000000..9c7fda7
--- /dev/null
+++ b/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch
@@ -0,0 +1,210 @@
+From f49a0d87f8120207dbf164907a56445911a28011 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Sep 2023 12:52:48 +0200
+Subject: mptcp: rename timer related helper to less confusing names
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit f6909dc1c1f4452879278128012da6c76bc186a5 ]
+
+The msk socket uses to different timeout to track close related
+events and retransmissions. The existing helpers do not indicate
+clearly which timer they actually touch, making the related code
+quite confusing.
+
+Change the existing helpers name to avoid such confusion. No
+functional change intended.
+
+This patch is linked to the next one ("mptcp: fix dangling connection
+hang-up"). The two patches are supposed to be backported together.
+
+Cc: stable@vger.kernel.org # v5.11+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 27e5ccc2d5a5 ("mptcp: fix dangling connection hang-up")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 42 +++++++++++++++++++++---------------------
+ net/mptcp/protocol.h |  2 +-
+ net/mptcp/subflow.c  |  2 +-
+ 3 files changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 6dd880d6b0518..93b60b049be27 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -401,7 +401,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
+       return false;
+ }
+ 
+-static void mptcp_stop_timer(struct sock *sk)
++static void mptcp_stop_rtx_timer(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+@@ -865,12 +865,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
+       }
+ }
+ 
+-static bool mptcp_timer_pending(struct sock *sk)
++static bool mptcp_rtx_timer_pending(struct sock *sk)
+ {
+       return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
+ }
+ 
+-static void mptcp_reset_timer(struct sock *sk)
++static void mptcp_reset_rtx_timer(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       unsigned long tout;
+@@ -1054,10 +1054,10 @@ static void __mptcp_clean_una(struct sock *sk)
+ out:
+       if (snd_una == READ_ONCE(msk->snd_nxt) &&
+           snd_una == READ_ONCE(msk->write_seq)) {
+-              if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+-                      mptcp_stop_timer(sk);
++              if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
++                      mptcp_stop_rtx_timer(sk);
+       } else {
+-              mptcp_reset_timer(sk);
++              mptcp_reset_rtx_timer(sk);
+       }
+ }
+ 
+@@ -1606,8 +1606,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+ 
+ out:
+       /* ensure the rtx timer is running */
+-      if (!mptcp_timer_pending(sk))
+-              mptcp_reset_timer(sk);
++      if (!mptcp_rtx_timer_pending(sk))
++              mptcp_reset_rtx_timer(sk);
+       if (do_check_data_fin)
+               mptcp_check_send_data_fin(sk);
+ }
+@@ -1665,8 +1665,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+       if (copied) {
+               tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+                        info.size_goal);
+-              if (!mptcp_timer_pending(sk))
+-                      mptcp_reset_timer(sk);
++              if (!mptcp_rtx_timer_pending(sk))
++                      mptcp_reset_rtx_timer(sk);
+ 
+               if (msk->snd_data_fin_enable &&
+                   msk->snd_nxt + 1 == msk->write_seq)
+@@ -2227,7 +2227,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
+       sock_put(sk);
+ }
+ 
+-static void mptcp_timeout_timer(struct timer_list *t)
++static void mptcp_tout_timer(struct timer_list *t)
+ {
+       struct sock *sk = from_timer(sk, t, sk_timer);
+ 
+@@ -2597,14 +2597,14 @@ static void __mptcp_retrans(struct sock *sk)
+ reset_timer:
+       mptcp_check_and_set_pending(sk);
+ 
+-      if (!mptcp_timer_pending(sk))
+-              mptcp_reset_timer(sk);
++      if (!mptcp_rtx_timer_pending(sk))
++              mptcp_reset_rtx_timer(sk);
+ }
+ 
+ /* schedule the timeout timer for the relevant event: either close timeout
+  * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
+  */
+-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
++void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
+ {
+       struct sock *sk = (struct sock *)msk;
+       unsigned long timeout, close_timeout;
+@@ -2637,7 +2637,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
+       WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
+       unlock_sock_fast(ssk, slow);
+ 
+-      mptcp_reset_timeout(msk, 0);
++      mptcp_reset_tout_timer(msk, 0);
+ }
+ 
+ static void mptcp_do_fastclose(struct sock *sk)
+@@ -2728,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk)
+ 
+       /* re-use the csk retrans timer for MPTCP-level retrans */
+       timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
+-      timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
++      timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
+ 
+       return 0;
+ }
+@@ -2820,8 +2820,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
+               } else {
+                       pr_debug("Sending DATA_FIN on subflow %p", ssk);
+                       tcp_send_ack(ssk);
+-                      if (!mptcp_timer_pending(sk))
+-                              mptcp_reset_timer(sk);
++                      if (!mptcp_rtx_timer_pending(sk))
++                              mptcp_reset_rtx_timer(sk);
+               }
+               break;
+       }
+@@ -2904,7 +2904,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
+ 
+       might_sleep();
+ 
+-      mptcp_stop_timer(sk);
++      mptcp_stop_rtx_timer(sk);
+       sk_stop_timer(sk, &sk->sk_timer);
+       msk->pm.status = 0;
+ 
+@@ -3021,7 +3021,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+               __mptcp_destroy_sock(sk);
+               do_cancel_work = true;
+       } else {
+-              mptcp_reset_timeout(msk, 0);
++              mptcp_reset_tout_timer(msk, 0);
+       }
+ 
+       return do_cancel_work;
+@@ -3084,7 +3084,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+       mptcp_check_listen_stop(sk);
+       inet_sk_state_store(sk, TCP_CLOSE);
+ 
+-      mptcp_stop_timer(sk);
++      mptcp_stop_rtx_timer(sk);
+       sk_stop_timer(sk, &sk->sk_timer);
+ 
+       if (mptcp_sk(sk)->token)
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index d77b25636125b..b73160c5e2cf8 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -681,7 +681,7 @@ void mptcp_get_options(const struct sk_buff *skb,
+ 
+ void mptcp_finish_connect(struct sock *sk);
+ void __mptcp_set_connected(struct sock *sk);
+-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
++void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+ static inline bool mptcp_is_fully_established(struct sock *sk)
+ {
+       return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 52a747a80e88e..6c8148c6e7710 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1161,7 +1161,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+       WRITE_ONCE(subflow->fail_tout, fail_tout);
+       tcp_send_ack(ssk);
+ 
+-      mptcp_reset_timeout(msk, subflow->fail_tout);
++      mptcp_reset_tout_timer(msk, subflow->fail_tout);
+ }
+ 
+ static bool subflow_check_data_avail(struct sock *ssk)
+-- 
+2.40.1
+
diff --git a/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch b/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch

new file mode 100644 (file)

index 0000000..debf65f
--- /dev/null
+++ b/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch
@@ -0,0 +1,39 @@
+From 9aa0b2eda8095073eadf5292d4ea37420997d704 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Apr 2023 12:17:35 -0400
+Subject: NFS: Cleanup unused rpc_clnt variable
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit e025f0a73f6acb920d86549b2177a5883535421d ]
+
+The root rpc_clnt is not used here, clean it up.
+
+Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled")
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Stable-dep-of: 956fd46f97d2 ("NFSv4: Fix a state manager thread deadlock regression")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4state.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 5b49e5365bb30..1b707573fbf8d 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1209,10 +1209,6 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
+ {
+       struct task_struct *task;
+       char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
+-      struct rpc_clnt *cl = clp->cl_rpcclient;
+-
+-      while (cl != cl->cl_parent)
+-              cl = cl->cl_parent;
+ 
+       set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+       if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
+-- 
+2.40.1
+
diff --git a/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch b/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch

new file mode 100644 (file)

index 0000000..eadfd5a
--- /dev/null
+++ b/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch
@@ -0,0 +1,73 @@
+From 307a9be3b1a34bf9bd085bb0bb1088324672d52d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 14:07:22 -0400
+Subject: NFS: rename nfs_client_kset to nfs_kset
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 8b18a2edecc0741b0eecf8b18fdb356a0f8682de ]
+
+Be brief and match the subsystem name.  There's no need to distinguish this
+kset variable from the server.
+
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Stable-dep-of: 956fd46f97d2 ("NFSv4: Fix a state manager thread deadlock regression")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/sysfs.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
+index a6f7403669631..edb535a0ff973 100644
+--- a/fs/nfs/sysfs.c
++++ b/fs/nfs/sysfs.c
+@@ -18,7 +18,7 @@
+ #include "sysfs.h"
+ 
+ struct kobject *nfs_client_kobj;
+-static struct kset *nfs_client_kset;
++static struct kset *nfs_kset;
+ 
+ static void nfs_netns_object_release(struct kobject *kobj)
+ {
+@@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name,
+ 
+ int nfs_sysfs_init(void)
+ {
+-      nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj);
+-      if (!nfs_client_kset)
++      nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj);
++      if (!nfs_kset)
+               return -ENOMEM;
+-      nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL);
++      nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL);
+       if  (!nfs_client_kobj) {
+-              kset_unregister(nfs_client_kset);
+-              nfs_client_kset = NULL;
++              kset_unregister(nfs_kset);
++              nfs_kset = NULL;
+               return -ENOMEM;
+       }
+       return 0;
+@@ -70,7 +70,7 @@ int nfs_sysfs_init(void)
+ void nfs_sysfs_exit(void)
+ {
+       kobject_put(nfs_client_kobj);
+-      kset_unregister(nfs_client_kset);
++      kset_unregister(nfs_kset);
+ }
+ 
+ static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
+@@ -159,7 +159,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (p) {
+               p->net = net;
+-              p->kobject.kset = nfs_client_kset;
++              p->kobject.kset = nfs_kset;
+               if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type,
+                                       parent, "nfs_client") == 0)
+                       return p;
+-- 
+2.40.1
+
diff --git a/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch b/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch

new file mode 100644 (file)

index 0000000..938a067
--- /dev/null
+++ b/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch
@@ -0,0 +1,117 @@
+From 7eca5fbc29ba9fd59b57061e5611643168a6bb58 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Sep 2023 13:14:15 -0400
+Subject: NFSv4: Fix a state manager thread deadlock regression
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 956fd46f97d238032cb5fa4771cdaccc6e760f9a ]
+
+Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit
+aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it
+prevents the setup of new threads to handle reboot recovery, while the
+older recovery thread is stuck returning delegations.
+
+Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled")
+Cc: stable@vger.kernel.org
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4proc.c  |  4 +++-
+ fs/nfs/nfs4state.c | 36 +++++++++++++++++++++++++-----------
+ 2 files changed, 28 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index be570c65ae154..b927a7d1b46d4 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -10629,7 +10629,9 @@ static void nfs4_disable_swap(struct inode *inode)
+        */
+       struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 
+-      nfs4_schedule_state_manager(clp);
++      set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
++      clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
++      wake_up_var(&clp->cl_state);
+ }
+ 
+ static const struct inode_operations nfs4_dir_inode_operations = {
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 1b707573fbf8d..ed789e0cb9431 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1209,13 +1209,23 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
+ {
+       struct task_struct *task;
+       char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
++      struct rpc_clnt *clnt = clp->cl_rpcclient;
++      bool swapon = false;
+ 
+       set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+-      if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
+-              wake_up_var(&clp->cl_state);
+-              return;
++
++      if (atomic_read(&clnt->cl_swapper)) {
++              swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
++                                         &clp->cl_state);
++              if (!swapon) {
++                      wake_up_var(&clp->cl_state);
++                      return;
++              }
+       }
+-      set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
++
++      if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
++              return;
++
+       __module_get(THIS_MODULE);
+       refcount_inc(&clp->cl_count);
+ 
+@@ -1232,8 +1242,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
+                       __func__, PTR_ERR(task));
+               if (!nfs_client_init_is_complete(clp))
+                       nfs_mark_client_ready(clp, PTR_ERR(task));
++              if (swapon)
++                      clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+               nfs4_clear_state_manager_bit(clp);
+-              clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+               nfs_put_client(clp);
+               module_put(THIS_MODULE);
+       }
+@@ -2737,22 +2748,25 @@ static int nfs4_run_state_manager(void *ptr)
+ 
+       allow_signal(SIGKILL);
+ again:
+-      set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+       nfs4_state_manager(clp);
+-      if (atomic_read(&cl->cl_swapper)) {
++
++      if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
++          !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
+               wait_var_event_interruptible(&clp->cl_state,
+                                            test_bit(NFS4CLNT_RUN_MANAGER,
+                                                     &clp->cl_state));
+-              if (atomic_read(&cl->cl_swapper) &&
+-                  test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
++              if (!atomic_read(&cl->cl_swapper))
++                      clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
++              if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
++                  !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
+                       goto again;
+               /* Either no longer a swapper, or were signalled */
++              clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+       }
+-      clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ 
+       if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+           test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+-          !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
++          !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
+               goto again;
+ 
+       nfs_put_client(clp);
+-- 
+2.40.1
+
diff --git a/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch b/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch

new file mode 100644 (file)

index 0000000..2a3896d
--- /dev/null
+++ b/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch
@@ -0,0 +1,59 @@
+From 878cf5ac73826139a016ebb3154ba1e8b0130bd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 14:31:49 -0400
+Subject: Revert "NFSv4: Retry LOCK on OLD_STATEID during delegation return"
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 5b4a82a0724af1dfd1320826e0266117b6a57fbd ]
+
+Olga Kornievskaia reports that this patch breaks NFSv4.0 state recovery.
+It also introduces additional complexity in the error paths for cases not
+related to the original problem.  Let's revert it for now, and address the
+original problem in another manner.
+
+This reverts commit f5ea16137a3fa2858620dc9084466491c128535f.
+
+Fixes: f5ea16137a3f ("NFSv4: Retry LOCK on OLD_STATEID during delegation return")
+Reported-by: Kornievskaia, Olga <Olga.Kornievskaia@netapp.com>
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4proc.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index b927a7d1b46d4..e1297c6bcfbe2 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -7157,7 +7157,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+ {
+       struct nfs4_lockdata *data = calldata;
+       struct nfs4_lock_state *lsp = data->lsp;
+-      struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
+ 
+       if (!nfs4_sequence_done(task, &data->res.seq_res))
+               return;
+@@ -7165,7 +7164,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+       data->rpc_status = task->tk_status;
+       switch (task->tk_status) {
+       case 0:
+-              renew_lease(server, data->timestamp);
++              renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
++                              data->timestamp);
+               if (data->arg.new_lock && !data->cancelled) {
+                       data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+                       if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
+@@ -7193,8 +7193,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+                       if (!nfs4_stateid_match(&data->arg.open_stateid,
+                                               &lsp->ls_state->open_stateid))
+                               goto out_restart;
+-                      else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
+-                              goto out_restart;
+               } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
+                                               &lsp->ls_stateid))
+                               goto out_restart;
+-- 
+2.40.1
+
diff --git a/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch b/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch

new file mode 100644 (file)

index 0000000..118c6a2
--- /dev/null
+++ b/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch
@@ -0,0 +1,149 @@
+From 3ba4e64542bd7ce2b45b115607a32a814492eb6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 20:54:25 +0800
+Subject: ring-buffer: Fix bytes info in per_cpu buffer stats
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+[ Upstream commit 45d99ea451d0c30bfd4864f0fe485d7dac014902 ]
+
+The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of
+bytes in cpu buffer that have not been consumed. However, currently
+after consuming data by reading file 'trace_pipe', the 'bytes' info
+was not changed as expected.
+
+  # cat per_cpu/cpu0/stats
+  entries: 0
+  overrun: 0
+  commit overrun: 0
+  bytes: 568             <--- 'bytes' is problematical !!!
+  oldest event ts:  8651.371479
+  now ts:  8653.912224
+  dropped events: 0
+  read events: 8
+
+The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it:
+  1. When stat 'read_bytes', account consumed event in rb_advance_reader();
+  2. When stat 'entries_bytes', exclude the discarded padding event which
+     is smaller than minimum size because it is invisible to reader. Then
+     use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for
+     page-based read/remove/overrun.
+
+Also correct the comments of ring_buffer_bytes_cpu() in this patch.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengyejian1@huawei.com
+
+Cc: stable@vger.kernel.org
+Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events")
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index 51737b3d54b35..b7383358c4ea1 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage)
+       local_set(&bpage->commit, 0);
+ }
+ 
++static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
++{
++      return local_read(&bpage->page->commit);
++}
++
+ static void free_buffer_page(struct buffer_page *bpage)
+ {
+       free_page((unsigned long)bpage->page);
+@@ -2020,7 +2025,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
+                        * Increment overrun to account for the lost events.
+                        */
+                       local_add(page_entries, &cpu_buffer->overrun);
+-                      local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++                      local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
+                       local_inc(&cpu_buffer->pages_lost);
+               }
+ 
+@@ -2364,11 +2369,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
+                              cpu_buffer->reader_page->read);
+ }
+ 
+-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
+-{
+-      return local_read(&bpage->page->commit);
+-}
+-
+ static struct ring_buffer_event *
+ rb_iter_head_event(struct ring_buffer_iter *iter)
+ {
+@@ -2514,7 +2514,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
+                * the counters.
+                */
+               local_add(entries, &cpu_buffer->overrun);
+-              local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++              local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
+               local_inc(&cpu_buffer->pages_lost);
+ 
+               /*
+@@ -2657,9 +2657,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+ 
+       event = __rb_page_index(tail_page, tail);
+ 
+-      /* account for padding bytes */
+-      local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+-
+       /*
+        * Save the original length to the meta data.
+        * This will be used by the reader to add lost event
+@@ -2673,7 +2670,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+        * write counter enough to allow another writer to slip
+        * in on this page.
+        * We put in a discarded commit instead, to make sure
+-       * that this space is not used again.
++       * that this space is not used again, and this space will
++       * not be accounted into 'entries_bytes'.
+        *
+        * If we are less than the minimum size, we don't need to
+        * worry about it.
+@@ -2698,6 +2696,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+       /* time delta must be non zero */
+       event->time_delta = 1;
+ 
++      /* account for padding bytes */
++      local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
++
+       /* Make sure the padding is visible before the tail_page->write update */
+       smp_wmb();
+ 
+@@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
+ EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
+ 
+ /**
+- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
++ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
+  * @buffer: The ring buffer
+  * @cpu: The per CPU buffer to read from.
+  */
+@@ -4725,6 +4726,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+ 
+       length = rb_event_length(event);
+       cpu_buffer->reader_page->read += length;
++      cpu_buffer->read_bytes += length;
+ }
+ 
+ static void rb_advance_iter(struct ring_buffer_iter *iter)
+@@ -5820,7 +5822,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
+       } else {
+               /* update the entry counter */
+               cpu_buffer->read += rb_page_entries(reader);
+-              cpu_buffer->read_bytes += BUF_PAGE_SIZE;
++              cpu_buffer->read_bytes += rb_page_commit(reader);
+ 
+               /* swap the pages */
+               rb_init_page(bpage);
+-- 
+2.40.1
+
diff --git a/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch b/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch

new file mode 100644 (file)

index 0000000..fd703ab
--- /dev/null
+++ b/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch
@@ -0,0 +1,50 @@
+From 54e23956944147b55c7e5a2e07d96653611e0832 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Mar 2023 15:24:46 +0100
+Subject: ring-buffer: remove obsolete comment for free_buffer_page()
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+[ Upstream commit a98151ad53b53f010ee364ec2fd06445b328578b ]
+
+The comment refers to mm/slob.c which is being removed. It comes from
+commit ed56829cb319 ("ring_buffer: reset buffer page when freeing") and
+according to Steven the borrowed code was a page mapcount and mapping
+reset, which was later removed by commit e4c2ce82ca27 ("ring_buffer:
+allocate buffer page pointer"). Thus the comment is not accurate anyway,
+remove it.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20230315142446.27040-1-vbabka@suse.cz
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>
+Reported-by: Mike Rapoport <mike.rapoport@gmail.com>
+Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Fixes: e4c2ce82ca27 ("ring_buffer: allocate buffer page pointer")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 45d99ea451d0 ("ring-buffer: Fix bytes info in per_cpu buffer stats")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index 2f562cf961e0a..51737b3d54b35 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -354,10 +354,6 @@ static void rb_init_page(struct buffer_data_page *bpage)
+       local_set(&bpage->commit, 0);
+ }
+ 
+-/*
+- * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+- * this issue out.
+- */
+ static void free_buffer_page(struct buffer_page *bpage)
+ {
+       free_page((unsigned long)bpage->page);
+-- 
+2.40.1
+
diff --git a/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch b/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch

new file mode 100644 (file)

index 0000000..0ba3727
--- /dev/null
+++ b/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch
@@ -0,0 +1,198 @@
+From 40d723c10314e9132382eed0470564cce942d865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Aug 2023 08:30:41 -0700
+Subject: scsi: core: Improve type safety of scsi_rescan_device()
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit 79519528a180c64a90863db2ce70887de6c49d16 ]
+
+Most callers of scsi_rescan_device() have the scsi_device pointer readily
+available. Pass a struct scsi_device pointer to scsi_rescan_device()
+instead of a struct device pointer. This change prevents that a pointer to
+another struct device would be passed accidentally to scsi_rescan_device().
+
+Remove the scsi_rescan_device() declaration from the scsi_priv.h header
+file since it duplicates the declaration in <scsi/scsi_host.h>.
+
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Cc: Mike Christie <michael.christie@oracle.com>
+Cc: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Link: https://lore.kernel.org/r/20230822153043.4046244-1-bvanassche@acm.org
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Stable-dep-of: 8b4d9469d0b0 ("ata: libata-scsi: Fix delayed scsi_rescan_device() execution")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-scsi.c             | 2 +-
+ drivers/scsi/aacraid/commsup.c        | 2 +-
+ drivers/scsi/mvumi.c                  | 2 +-
+ drivers/scsi/scsi_lib.c               | 2 +-
+ drivers/scsi/scsi_priv.h              | 1 -
+ drivers/scsi/scsi_scan.c              | 4 ++--
+ drivers/scsi/scsi_sysfs.c             | 4 ++--
+ drivers/scsi/smartpqi/smartpqi_init.c | 2 +-
+ drivers/scsi/storvsc_drv.c            | 2 +-
+ drivers/scsi/virtio_scsi.c            | 2 +-
+ include/scsi/scsi_host.h              | 2 +-
+ 11 files changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index 8cc8268327f0c..b348f77b91231 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -4678,7 +4678,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
+                       }
+ 
+                       spin_unlock_irqrestore(ap->lock, flags);
+-                      scsi_rescan_device(&(sdev->sdev_gendev));
++                      scsi_rescan_device(sdev);
+                       scsi_device_put(sdev);
+                       spin_lock_irqsave(ap->lock, flags);
+               }
+diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
+index 3f062e4013ab6..013a9a334972e 100644
+--- a/drivers/scsi/aacraid/commsup.c
++++ b/drivers/scsi/aacraid/commsup.c
+@@ -1451,7 +1451,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
+ #endif
+                               break;
+                       }
+-                      scsi_rescan_device(&device->sdev_gendev);
++                      scsi_rescan_device(device);
+                       break;
+ 
+               default:
+diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
+index 05d3ce9b72dba..c4acf65379d20 100644
+--- a/drivers/scsi/mvumi.c
++++ b/drivers/scsi/mvumi.c
+@@ -1500,7 +1500,7 @@ static void mvumi_rescan_devices(struct mvumi_hba *mhba, int id)
+ 
+       sdev = scsi_device_lookup(mhba->shost, 0, id, 0);
+       if (sdev) {
+-              scsi_rescan_device(&sdev->sdev_gendev);
++              scsi_rescan_device(sdev);
+               scsi_device_put(sdev);
+       }
+ }
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index fb6e9a7a7f58b..d25e1c2472538 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -2445,7 +2445,7 @@ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
+               envp[idx++] = "SDEV_MEDIA_CHANGE=1";
+               break;
+       case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
+-              scsi_rescan_device(&sdev->sdev_gendev);
++              scsi_rescan_device(sdev);
+               envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
+               break;
+       case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
+diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
+index c52de9a973e46..b14545acb40f5 100644
+--- a/drivers/scsi/scsi_priv.h
++++ b/drivers/scsi/scsi_priv.h
+@@ -132,7 +132,6 @@ extern int scsi_complete_async_scans(void);
+ extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int,
+                                  unsigned int, u64, enum scsi_scan_mode);
+ extern void scsi_forget_host(struct Scsi_Host *);
+-extern void scsi_rescan_device(struct device *);
+ 
+ /* scsi_sysctl.c */
+ #ifdef CONFIG_SYSCTL
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index d12f2dcb4040a..445989f44d3f2 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -1611,9 +1611,9 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
+ }
+ EXPORT_SYMBOL(scsi_add_device);
+ 
+-void scsi_rescan_device(struct device *dev)
++void scsi_rescan_device(struct scsi_device *sdev)
+ {
+-      struct scsi_device *sdev = to_scsi_device(dev);
++      struct device *dev = &sdev->sdev_gendev;
+ 
+       device_lock(dev);
+ 
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index cac7c902cf70a..1f531063d6331 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -762,7 +762,7 @@ static ssize_t
+ store_rescan_field (struct device *dev, struct device_attribute *attr,
+                   const char *buf, size_t count)
+ {
+-      scsi_rescan_device(dev);
++      scsi_rescan_device(to_scsi_device(dev));
+       return count;
+ }
+ static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
+@@ -855,7 +855,7 @@ store_state_field(struct device *dev, struct device_attribute *attr,
+                * waiting for pending I/O to finish.
+                */
+               blk_mq_run_hw_queues(sdev->request_queue, true);
+-              scsi_rescan_device(dev);
++              scsi_rescan_device(sdev);
+       }
+ 
+       return ret == 0 ? count : -EINVAL;
+diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
+index 9f0f69c1ed665..47d487729635c 100644
+--- a/drivers/scsi/smartpqi/smartpqi_init.c
++++ b/drivers/scsi/smartpqi/smartpqi_init.c
+@@ -2278,7 +2278,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
+                       device->advertised_queue_depth = device->queue_depth;
+                       scsi_change_queue_depth(device->sdev, device->advertised_queue_depth);
+                       if (device->rescan) {
+-                              scsi_rescan_device(&device->sdev->sdev_gendev);
++                              scsi_rescan_device(device->sdev);
+                               device->rescan = false;
+                       }
+               }
+diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
+index 7a1dc5c7c49ee..c2d981d5a2dd5 100644
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -471,7 +471,7 @@ static void storvsc_device_scan(struct work_struct *work)
+       sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
+       if (!sdev)
+               goto done;
+-      scsi_rescan_device(&sdev->sdev_gendev);
++      scsi_rescan_device(sdev);
+       scsi_device_put(sdev);
+ 
+ done:
+diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
+index 2a79ab16134b1..3f8c553f3d91e 100644
+--- a/drivers/scsi/virtio_scsi.c
++++ b/drivers/scsi/virtio_scsi.c
+@@ -325,7 +325,7 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
+       /* Handle "Parameters changed", "Mode parameters changed", and
+          "Capacity data has changed".  */
+       if (asc == 0x2a && (ascq == 0x00 || ascq == 0x01 || ascq == 0x09))
+-              scsi_rescan_device(&sdev->sdev_gendev);
++              scsi_rescan_device(sdev);
+ 
+       scsi_device_put(sdev);
+ }
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index d27d9fb7174c8..16848def47a1d 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
+                                              struct device *,
+                                              struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+-extern void scsi_rescan_device(struct device *);
++extern void scsi_rescan_device(struct scsi_device *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
+ extern int scsi_host_busy(struct Scsi_Host *shost);
+-- 
+2.40.1
+
diff --git a/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch b/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch

new file mode 100644 (file)

index 0000000..2ccde96
--- /dev/null
+++ b/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch
@@ -0,0 +1,98 @@
+From bc1853103b803f47e566a48893bfdac0dcd86f99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Sep 2023 15:00:13 +0900
+Subject: scsi: Do not attempt to rescan suspended devices
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit ff48b37802e5c134e2dfc4d091f10b2eb5065a72 ]
+
+scsi_rescan_device() takes a scsi device lock before executing a device
+handler and device driver rescan methods. Waiting for the completion of
+any command issued to the device by these methods will thus be done with
+the device lock held. As a result, there is a risk of deadlocking within
+the power management code if scsi_rescan_device() is called to handle a
+device resume with the associated scsi device not yet resumed.
+
+Avoid such situation by checking that the target scsi device is in the
+running state, that is, fully capable of executing commands, before
+proceeding with the rescan and bailout returning -EWOULDBLOCK otherwise.
+With this error return, the caller can retry rescaning the device after
+a delay.
+
+The state check is done with the device lock held and is thus safe
+against incoming suspend power management operations.
+
+Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Stable-dep-of: 8b4d9469d0b0 ("ata: libata-scsi: Fix delayed scsi_rescan_device() execution")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/scsi_scan.c | 18 +++++++++++++++++-
+ include/scsi/scsi_host.h |  2 +-
+ 2 files changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index 445989f44d3f2..ed26c52ed8474 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -1611,12 +1611,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
+ }
+ EXPORT_SYMBOL(scsi_add_device);
+ 
+-void scsi_rescan_device(struct scsi_device *sdev)
++int scsi_rescan_device(struct scsi_device *sdev)
+ {
+       struct device *dev = &sdev->sdev_gendev;
++      int ret = 0;
+ 
+       device_lock(dev);
+ 
++      /*
++       * Bail out if the device is not running. Otherwise, the rescan may
++       * block waiting for commands to be executed, with us holding the
++       * device lock. This can result in a potential deadlock in the power
++       * management core code when system resume is on-going.
++       */
++      if (sdev->sdev_state != SDEV_RUNNING) {
++              ret = -EWOULDBLOCK;
++              goto unlock;
++      }
++
+       scsi_attach_vpd(sdev);
+ 
+       if (sdev->handler && sdev->handler->rescan)
+@@ -1629,7 +1641,11 @@ void scsi_rescan_device(struct scsi_device *sdev)
+                       drv->rescan(dev);
+               module_put(dev->driver->owner);
+       }
++
++unlock:
+       device_unlock(dev);
++
++      return ret;
+ }
+ EXPORT_SYMBOL(scsi_rescan_device);
+ 
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index 16848def47a1d..71def41b1ad78 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
+                                              struct device *,
+                                              struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+-extern void scsi_rescan_device(struct scsi_device *);
++extern int scsi_rescan_device(struct scsi_device *sdev);
+ extern void scsi_remove_host(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
+ extern int scsi_host_busy(struct Scsi_Host *shost);
+-- 
+2.40.1
+
diff --git a/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch b/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch

new file mode 100644 (file)

index 0000000..1232bed
--- /dev/null
+++ b/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch
@@ -0,0 +1,301 @@
+From 3b3411e173405c039b240cffc037812620e3b00b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Sep 2023 10:02:41 +0900
+Subject: scsi: sd: Differentiate system and runtime start/stop management
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 3cc2ffe5c16dc65dfac354bc5b5bc98d3b397567 ]
+
+The underlying device and driver of a SCSI disk may have different
+system and runtime power mode control requirements. This is because
+runtime power management affects only the SCSI disk, while system level
+power management affects all devices, including the controller for the
+SCSI disk.
+
+For instance, issuing a START STOP UNIT command when a SCSI disk is
+runtime suspended and resumed is fine: the command is translated to a
+STANDBY IMMEDIATE command to spin down the ATA disk and to a VERIFY
+command to wake it up. The SCSI disk runtime operations have no effect
+on the ata port device used to connect the ATA disk. However, for
+system suspend/resume operations, the ATA port used to connect the
+device will also be suspended and resumed, with the resume operation
+requiring re-validating the device link and the device itself. In this
+case, issuing a VERIFY command to spinup the disk must be done before
+starting to revalidate the device, when the ata port is being resumed.
+In such case, we must not allow the SCSI disk driver to issue START STOP
+UNIT commands.
+
+Allow a low level driver to refine the SCSI disk start/stop management
+by differentiating system and runtime cases with two new SCSI device
+flags: manage_system_start_stop and manage_runtime_start_stop. These new
+flags replace the current manage_start_stop flag. Drivers setting the
+manage_start_stop are modifed to set both new flags, thus preserving the
+existing start/stop management behavior. For backward compatibility, the
+old manage_start_stop sysfs device attribute is kept as a read-only
+attribute showing a value of 1 for devices enabling both new flags and 0
+otherwise.
+
+Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Stable-dep-of: 99398d2070ab ("scsi: sd: Do not issue commands to suspended disks on shutdown")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libata-scsi.c  |  3 +-
+ drivers/firewire/sbp2.c    |  9 ++--
+ drivers/scsi/sd.c          | 90 ++++++++++++++++++++++++++++++--------
+ include/scsi/scsi_device.h |  5 ++-
+ 4 files changed, 84 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index 9c8dd9f86cbb3..8cc8268327f0c 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -1087,7 +1087,8 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
+                * will be woken up by ata_port_pm_resume() with a port reset
+                * and device revalidation.
+                */
+-              sdev->manage_start_stop = 1;
++              sdev->manage_system_start_stop = true;
++              sdev->manage_runtime_start_stop = true;
+               sdev->no_start_on_resume = 1;
+       }
+ 
+diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
+index 60051c0cabeaa..e322a326546b5 100644
+--- a/drivers/firewire/sbp2.c
++++ b/drivers/firewire/sbp2.c
+@@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
+  *
+  * - power condition
+  *   Set the power condition field in the START STOP UNIT commands sent by
+- *   sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
++ *   sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or
++ *   manage_runtime_start_stop is on).
+  *   Some disks need this to spin down or to resume properly.
+  *
+  * - override internal blacklist
+@@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
+ 
+       sdev->use_10_for_rw = 1;
+ 
+-      if (sbp2_param_exclusive_login)
+-              sdev->manage_start_stop = 1;
++      if (sbp2_param_exclusive_login) {
++              sdev->manage_system_start_stop = true;
++              sdev->manage_runtime_start_stop = true;
++      }
+ 
+       if (sdev->type == TYPE_ROM)
+               sdev->use_10_for_ms = 1;
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index 5bfca49415113..2ed57dfaf9ee0 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -213,18 +213,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
+ }
+ 
+ static ssize_t
+-manage_start_stop_show(struct device *dev, struct device_attribute *attr,
+-                     char *buf)
++manage_start_stop_show(struct device *dev,
++                     struct device_attribute *attr, char *buf)
+ {
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct scsi_device *sdp = sdkp->device;
+ 
+-      return sprintf(buf, "%u\n", sdp->manage_start_stop);
++      return sysfs_emit(buf, "%u\n",
++                        sdp->manage_system_start_stop &&
++                        sdp->manage_runtime_start_stop);
+ }
++static DEVICE_ATTR_RO(manage_start_stop);
+ 
+ static ssize_t
+-manage_start_stop_store(struct device *dev, struct device_attribute *attr,
+-                      const char *buf, size_t count)
++manage_system_start_stop_show(struct device *dev,
++                            struct device_attribute *attr, char *buf)
++{
++      struct scsi_disk *sdkp = to_scsi_disk(dev);
++      struct scsi_device *sdp = sdkp->device;
++
++      return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop);
++}
++
++static ssize_t
++manage_system_start_stop_store(struct device *dev,
++                             struct device_attribute *attr,
++                             const char *buf, size_t count)
+ {
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct scsi_device *sdp = sdkp->device;
+@@ -236,11 +250,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr,
+       if (kstrtobool(buf, &v))
+               return -EINVAL;
+ 
+-      sdp->manage_start_stop = v;
++      sdp->manage_system_start_stop = v;
+ 
+       return count;
+ }
+-static DEVICE_ATTR_RW(manage_start_stop);
++static DEVICE_ATTR_RW(manage_system_start_stop);
++
++static ssize_t
++manage_runtime_start_stop_show(struct device *dev,
++                             struct device_attribute *attr, char *buf)
++{
++      struct scsi_disk *sdkp = to_scsi_disk(dev);
++      struct scsi_device *sdp = sdkp->device;
++
++      return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop);
++}
++
++static ssize_t
++manage_runtime_start_stop_store(struct device *dev,
++                              struct device_attribute *attr,
++                              const char *buf, size_t count)
++{
++      struct scsi_disk *sdkp = to_scsi_disk(dev);
++      struct scsi_device *sdp = sdkp->device;
++      bool v;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return -EACCES;
++
++      if (kstrtobool(buf, &v))
++              return -EINVAL;
++
++      sdp->manage_runtime_start_stop = v;
++
++      return count;
++}
++static DEVICE_ATTR_RW(manage_runtime_start_stop);
+ 
+ static ssize_t
+ allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf)
+@@ -572,6 +617,8 @@ static struct attribute *sd_disk_attrs[] = {
+       &dev_attr_FUA.attr,
+       &dev_attr_allow_restart.attr,
+       &dev_attr_manage_start_stop.attr,
++      &dev_attr_manage_system_start_stop.attr,
++      &dev_attr_manage_runtime_start_stop.attr,
+       &dev_attr_protection_type.attr,
+       &dev_attr_protection_mode.attr,
+       &dev_attr_app_tag_own.attr,
+@@ -3652,13 +3699,20 @@ static void sd_shutdown(struct device *dev)
+               sd_sync_cache(sdkp, NULL);
+       }
+ 
+-      if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
++      if (system_state != SYSTEM_RESTART &&
++          sdkp->device->manage_system_start_stop) {
+               sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
+               sd_start_stop_device(sdkp, 0);
+       }
+ }
+ 
+-static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
++static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
++{
++      return (sdev->manage_system_start_stop && !runtime) ||
++              (sdev->manage_runtime_start_stop && runtime);
++}
++
++static int sd_suspend_common(struct device *dev, bool runtime)
+ {
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+       struct scsi_sense_hdr sshdr;
+@@ -3690,12 +3744,12 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+               }
+       }
+ 
+-      if (sdkp->device->manage_start_stop) {
++      if (sd_do_start_stop(sdkp->device, runtime)) {
+               if (!sdkp->device->silence_suspend)
+                       sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
+               /* an error is not worth aborting a system sleep */
+               ret = sd_start_stop_device(sdkp, 0);
+-              if (ignore_stop_errors)
++              if (!runtime)
+                       ret = 0;
+       }
+ 
+@@ -3707,23 +3761,23 @@ static int sd_suspend_system(struct device *dev)
+       if (pm_runtime_suspended(dev))
+               return 0;
+ 
+-      return sd_suspend_common(dev, true);
++      return sd_suspend_common(dev, false);
+ }
+ 
+ static int sd_suspend_runtime(struct device *dev)
+ {
+-      return sd_suspend_common(dev, false);
++      return sd_suspend_common(dev, true);
+ }
+ 
+-static int sd_resume(struct device *dev)
++static int sd_resume(struct device *dev, bool runtime)
+ {
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+-      int ret = 0;
++      int ret;
+ 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+ 
+-      if (!sdkp->device->manage_start_stop)
++      if (!sd_do_start_stop(sdkp->device, runtime))
+               return 0;
+ 
+       if (!sdkp->device->no_start_on_resume) {
+@@ -3741,7 +3795,7 @@ static int sd_resume_system(struct device *dev)
+       if (pm_runtime_suspended(dev))
+               return 0;
+ 
+-      return sd_resume(dev);
++      return sd_resume(dev, false);
+ }
+ 
+ static int sd_resume_runtime(struct device *dev)
+@@ -3765,7 +3819,7 @@ static int sd_resume_runtime(struct device *dev)
+                                 "Failed to clear sense data\n");
+       }
+ 
+-      return sd_resume(dev);
++      return sd_resume(dev, true);
+ }
+ 
+ /**
+diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+index 9fdc77db3a2a8..dc2cff18b68bd 100644
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -161,6 +161,10 @@ struct scsi_device {
+                                * pass settings from slave_alloc to scsi
+                                * core. */
+       unsigned int eh_timeout; /* Error handling timeout */
++
++      bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */
++      bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */
++
+       unsigned removable:1;
+       unsigned changed:1;     /* Data invalid due to media change */
+       unsigned busy:1;        /* Used to prevent races */
+@@ -192,7 +196,6 @@ struct scsi_device {
+       unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
+       unsigned no_start_on_add:1;     /* do not issue start on add */
+       unsigned allow_restart:1; /* issue START_UNIT in error handler */
+-      unsigned manage_start_stop:1;   /* Let HLD (sd) manage start/stop */
+       unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
+       unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
+       unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
+-- 
+2.40.1
+
diff --git a/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch b/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch

new file mode 100644 (file)

index 0000000..174000f
--- /dev/null
+++ b/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch
@@ -0,0 +1,106 @@
+From 29c60a5e17517ecf23bd2258340ac055e6ef1832 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Sep 2023 17:03:15 +0900
+Subject: scsi: sd: Do not issue commands to suspended disks on shutdown
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 99398d2070ab03d13f90b758ad397e19a65fffb0 ]
+
+If an error occurs when resuming a host adapter before the devices
+attached to the adapter are resumed, the adapter low level driver may
+remove the scsi host, resulting in a call to sd_remove() for the
+disks of the host. This in turn results in a call to sd_shutdown() which
+will issue a synchronize cache command and a start stop unit command to
+spindown the disk. sd_shutdown() issues the commands only if the device
+is not already runtime suspended but does not check the power state for
+system-wide suspend/resume. That is, the commands may be issued with the
+device in a suspended state, which causes PM resume to hang, forcing a
+reset of the machine to recover.
+
+Fix this by tracking the suspended state of a disk by introducing the
+suspended boolean field in the scsi_disk structure. This flag is set to
+true when the disk is suspended is sd_suspend_common() and resumed with
+sd_resume(). When suspended is true, sd_shutdown() is not executed from
+sd_remove().
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/sd.c | 17 +++++++++++++----
+ drivers/scsi/sd.h |  1 +
+ 2 files changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index 2ed57dfaf9ee0..30184f7b762c1 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3626,7 +3626,8 @@ static int sd_remove(struct device *dev)
+ 
+       device_del(&sdkp->disk_dev);
+       del_gendisk(sdkp->disk);
+-      sd_shutdown(dev);
++      if (!sdkp->suspended)
++              sd_shutdown(dev);
+ 
+       put_disk(sdkp->disk);
+       return 0;
+@@ -3753,6 +3754,9 @@ static int sd_suspend_common(struct device *dev, bool runtime)
+                       ret = 0;
+       }
+ 
++      if (!ret)
++              sdkp->suspended = true;
++
+       return ret;
+ }
+ 
+@@ -3772,21 +3776,26 @@ static int sd_suspend_runtime(struct device *dev)
+ static int sd_resume(struct device *dev, bool runtime)
+ {
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+-      int ret;
++      int ret = 0;
+ 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+ 
+-      if (!sd_do_start_stop(sdkp->device, runtime))
++      if (!sd_do_start_stop(sdkp->device, runtime)) {
++              sdkp->suspended = false;
+               return 0;
++      }
+ 
+       if (!sdkp->device->no_start_on_resume) {
+               sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+               ret = sd_start_stop_device(sdkp, 1);
+       }
+ 
+-      if (!ret)
++      if (!ret) {
+               opal_unlock_from_suspend(sdkp->opal_dev);
++              sdkp->suspended = false;
++      }
++
+       return ret;
+ }
+ 
+diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
+index 5eea762f84d18..409dda5350d10 100644
+--- a/drivers/scsi/sd.h
++++ b/drivers/scsi/sd.h
+@@ -131,6 +131,7 @@ struct scsi_disk {
+       u8              provisioning_mode;
+       u8              zeroing_mode;
+       u8              nr_actuators;           /* Number of actuators */
++      bool            suspended;      /* Disk is suspended (stopped) */
+       unsigned        ATO : 1;        /* state of disk ATO bit */
+       unsigned        cache_override : 1; /* temp override of WCE,RCD */
+       unsigned        WCE : 1;        /* state of disk WCE bit */
+-- 
+2.40.1
+
diff --git a/queue-6.1/series b/queue-6.1/series

new file mode 100644 (file)

index 0000000..267d52f
--- /dev/null
+++ b/queue-6.1/series
@@ -0,0 +1,41 @@
+spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch
+spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch
+alsa-hda-tas2781-add-tas2781-hda-driver.patch
+alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch
+alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch
+alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch
+alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch
+asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch
+asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch
+maple_tree-remove-the-redundant-code.patch
+maple_tree-relocate-the-declaration-of-mas_empty_are.patch
+maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch
+mptcp-rename-timer-related-helper-to-less-confusing-.patch
+mptcp-fix-dangling-connection-hang-up.patch
+mptcp-annotate-lockless-accesses-to-sk-sk_err.patch
+mptcp-move-__mptcp_error_report-in-protocol.c.patch
+mptcp-process-pending-subflow-error-on-close.patch
+ata-scsi-do-not-issue-start-stop-unit-on-resume.patch
+scsi-sd-differentiate-system-and-runtime-start-stop-.patch
+scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch
+scsi-core-improve-type-safety-of-scsi_rescan_device.patch
+scsi-do-not-attempt-to-rescan-suspended-devices.patch
+ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch
+nfs-cleanup-unused-rpc_clnt-variable.patch
+nfs-rename-nfs_client_kset-to-nfs_kset.patch
+nfsv4-fix-a-state-manager-thread-deadlock-regression.patch
+mm-memory-add-vm_normal_folio.patch
+mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch
+mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch
+mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch
+mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch
+mm-page_alloc-always-remove-pages-from-temporary-lis.patch
+mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch
+mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch
+ring-buffer-remove-obsolete-comment-for-free_buffer_.patch
+ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch
+btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch
+btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch
+btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch
+revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch
+arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch
diff --git a/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch b/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch

new file mode 100644 (file)

index 0000000..1684da5
--- /dev/null
+++ b/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch
@@ -0,0 +1,66 @@
+From a0dd03214a34d747df5a47cacec49c4693890454 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 18:20:41 +0100
+Subject: spi: zynqmp-gqspi: Convert to platform remove callback returning void
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 3ffefa1d9c9eba60c7f8b4a9ce2df3e4c7f4a88e ]
+
+The .remove() callback for a platform driver returns an int which makes
+many driver authors wrongly assume it's possible to do error handling by
+returning an error code. However the value returned is (mostly) ignored
+and this typically results in resource leaks. To improve here there is a
+quest to make the remove callback return void. In the first step of this
+quest all drivers are converted to .remove_new() which already returns
+void.
+
+Trivially convert this driver from always returning zero in the remove
+callback to the void returning variant.
+
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Link: https://lore.kernel.org/r/20230303172041.2103336-88-u.kleine-koenig@pengutronix.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-zynqmp-gqspi.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
+index c760aac070e54..876a41c5d1664 100644
+--- a/drivers/spi/spi-zynqmp-gqspi.c
++++ b/drivers/spi/spi-zynqmp-gqspi.c
+@@ -1240,7 +1240,7 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
+  *
+  * Return:    0 Always
+  */
+-static int zynqmp_qspi_remove(struct platform_device *pdev)
++static void zynqmp_qspi_remove(struct platform_device *pdev)
+ {
+       struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev);
+ 
+@@ -1249,8 +1249,6 @@ static int zynqmp_qspi_remove(struct platform_device *pdev)
+       clk_disable_unprepare(xqspi->pclk);
+       pm_runtime_set_suspended(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+-
+-      return 0;
+ }
+ 
+ static const struct of_device_id zynqmp_qspi_of_match[] = {
+@@ -1262,7 +1260,7 @@ MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);
+ 
+ static struct platform_driver zynqmp_qspi_driver = {
+       .probe = zynqmp_qspi_probe,
+-      .remove = zynqmp_qspi_remove,
++      .remove_new = zynqmp_qspi_remove,
+       .driver = {
+               .name = "zynqmp-qspi",
+               .of_match_table = zynqmp_qspi_of_match,
+-- 
+2.40.1
+
diff --git a/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch b/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch

new file mode 100644 (file)

index 0000000..c3a7d52
--- /dev/null
+++ b/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch
@@ -0,0 +1,62 @@
+From bc5eee6846668dfa31bf34227ce977ed041fd14d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Jun 2023 10:24:35 +0200
+Subject: spi: zynqmp-gqspi: fix clock imbalance on probe failure
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 1527b076ae2cb6a9c590a02725ed39399fcad1cf ]
+
+Make sure that the device is not runtime suspended before explicitly
+disabling the clocks on probe failure and on driver unbind to avoid a
+clock enable-count imbalance.
+
+Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support")
+Cc: stable@vger.kernel.org     # 4.19
+Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli@xilinx.com>
+Cc: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro@kernel.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-zynqmp-gqspi.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
+index 876a41c5d1664..f2dcd1ae77c7d 100644
+--- a/drivers/spi/spi-zynqmp-gqspi.c
++++ b/drivers/spi/spi-zynqmp-gqspi.c
+@@ -1218,9 +1218,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
+       return 0;
+ 
+ clk_dis_all:
+-      pm_runtime_put_sync(&pdev->dev);
+-      pm_runtime_set_suspended(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
++      pm_runtime_put_noidle(&pdev->dev);
++      pm_runtime_set_suspended(&pdev->dev);
+       clk_disable_unprepare(xqspi->refclk);
+ clk_dis_pclk:
+       clk_disable_unprepare(xqspi->pclk);
+@@ -1244,11 +1244,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev)
+ {
+       struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev);
+ 
++      pm_runtime_get_sync(&pdev->dev);
++
+       zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
++
++      pm_runtime_disable(&pdev->dev);
++      pm_runtime_put_noidle(&pdev->dev);
++      pm_runtime_set_suspended(&pdev->dev);
+       clk_disable_unprepare(xqspi->refclk);
+       clk_disable_unprepare(xqspi->pclk);
+-      pm_runtime_set_suspended(&pdev->dev);
+-      pm_runtime_disable(&pdev->dev);
+ }
+ 
+ static const struct of_device_id zynqmp_qspi_of_match[] = {
+-- 
+2.40.1
+
author	Sasha Levin <sashal@kernel.org>
	Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 6 Oct 2023 13:29:52 +0000 (09:29 -0400)
queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/maple_tree-remove-the-redundant-code.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-memory-add-vm_normal_folio.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-fix-dangling-connection-hang-up.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-process-pending-subflow-error-on-close.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series	[new file with mode: 0644]	patch \| blob
queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch	[new file with mode: 0644]	patch \| blob