From: Sasha Levin Date: Fri, 6 Oct 2023 13:29:52 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v4.14.327~90 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=030bd8a1c83c38b58c3c746091bff98d05cb8230;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch b/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch new file mode 100644 index 00000000000..71b78ebc37b --- /dev/null +++ b/queue-6.1/alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch @@ -0,0 +1,79 @@ +From 5ecc09ed8ee97b25ce54ed8b04f58ded5529bbfe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Aug 2023 20:40:51 +0900 +Subject: ALSA: hda/realtek: Add quirk for HP Victus 16-d1xxx to enable mute + LED + +From: SungHwan Jung + +[ Upstream commit 93dc18e11b1ab2d485b69f91c973e6b83e47ebd0 ] + +This quirk enables mute LED on HP Victus 16-d1xxx (8A25) laptops, which +use ALC245 codec. + +Signed-off-by: SungHwan Jung +Link: https://lore.kernel.org/r/20230823114051.3921-1-onenowy@gmail.com +Signed-off-by: Takashi Iwai +Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 7d549229d0b95..e81bc0c026eba 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -4639,6 +4639,22 @@ static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec, + } + } + ++static void alc245_fixup_hp_mute_led_coefbit(struct hda_codec *codec, ++ const struct hda_fixup *fix, ++ int action) ++{ ++ struct alc_spec *spec = codec->spec; ++ ++ if (action == HDA_FIXUP_ACT_PRE_PROBE) { ++ spec->mute_led_polarity = 0; ++ spec->mute_led_coef.idx = 0x0b; ++ spec->mute_led_coef.mask = 3 << 2; ++ spec->mute_led_coef.on = 2 << 2; ++ spec->mute_led_coef.off = 1 << 2; ++ snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); ++ } ++} ++ + /* turn on/off mic-mute LED per capture hook by coef bit */ + static int coef_micmute_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +@@ -7289,6 +7305,7 @@ enum { + ALC236_FIXUP_DELL_DUAL_CODECS, + ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, + ALC287_FIXUP_TAS2781_I2C, ++ ALC245_FIXUP_HP_MUTE_LED_COEFBIT, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -9364,6 +9381,10 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + }, ++ [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc245_fixup_hp_mute_led_coefbit, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -9630,6 +9651,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), ++ SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), +-- +2.40.1 + diff --git a/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch b/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch new file mode 100644 index 00000000000..1af651cffb4 --- /dev/null +++ b/queue-6.1/alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch @@ -0,0 +1,58 @@ +From 7028ce65c3db5104a485063bfaf593e5c3d0e817 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Aug 2023 20:39:48 +0200 +Subject: ALSA: hda/realtek: Add quirk for mute LEDs on HP ENVY x360 15-eu0xxx + +From: Fabian Vogt + +[ Upstream commit c99c26b16c1544534ebd6a5f27a034f3e44d2597 ] + +The LED for the mic mute button is controlled by GPIO2. +The mute button LED is slightly more complex, it's controlled by two bits +in coeff 0x0b. + +Signed-off-by: Fabian Vogt +Link: https://lore.kernel.org/r/2693091.mvXUDI8C0e@fabians-envy +Signed-off-by: Takashi Iwai +Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index e81bc0c026eba..e01af481e0d0d 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -7306,6 +7306,7 @@ enum { + ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, + ALC287_FIXUP_TAS2781_I2C, + ALC245_FIXUP_HP_MUTE_LED_COEFBIT, ++ ALC245_FIXUP_HP_X360_MUTE_LEDS, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -9385,6 +9386,12 @@ static const struct hda_fixup alc269_fixups[] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_mute_led_coefbit, + }, ++ [ALC245_FIXUP_HP_X360_MUTE_LEDS] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc245_fixup_hp_mute_led_coefbit, ++ .chained = true, ++ .chain_id = ALC245_FIXUP_HP_GPIO_LED ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -9620,6 +9627,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), ++ SND_PCI_QUIRK(0x103c, 0x888a, "HP ENVY x360 Convertible 15-eu0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), + SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), +-- +2.40.1 + diff --git a/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch b/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch new file mode 100644 index 00000000000..312dd4f046a --- /dev/null +++ b/queue-6.1/alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch @@ -0,0 +1,89 @@ +From 180826e595afdaf93f61a7d8929efb5e586ee9b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Sep 2023 16:50:41 +0800 +Subject: ALSA: hda/realtek - ALC287 I2S speaker platform support + +From: Kailang Yang + +[ Upstream commit e43252db7e207a2e194e6a4883a43a31a776a968 ] + +0x17 was only speaker pin, DAC assigned will be 0x03. Headphone +assigned to 0x02. +Playback via headphone will get EQ filter processing. So,it needs to +swap DAC. + +Tested-by: Mark Pearson +Signed-off-by: Kailang Yang +Link: https://lore.kernel.org/r/4e4cfa1b3b4c46838aecafc6e8b6f876@realtek.com +Signed-off-by: Takashi Iwai +Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index e01af481e0d0d..62476b6fd248c 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -7046,6 +7046,27 @@ static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec, + } + } + ++/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */ ++static void alc287_fixup_bind_dacs(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ struct alc_spec *spec = codec->spec; ++ static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */ ++ static const hda_nid_t preferred_pairs[] = { ++ 0x17, 0x02, 0x21, 0x03, 0 ++ }; ++ ++ if (action != HDA_FIXUP_ACT_PRE_PROBE) ++ return; ++ ++ snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); ++ spec->gen.preferred_dacs = preferred_pairs; ++ spec->gen.auto_mute_via_amp = 1; ++ snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, ++ 0x0); /* Make sure 0x14 was disable */ ++} ++ ++ + enum { + ALC269_FIXUP_GPIO2, + ALC269_FIXUP_SONY_VAIO, +@@ -7307,6 +7328,7 @@ enum { + ALC287_FIXUP_TAS2781_I2C, + ALC245_FIXUP_HP_MUTE_LED_COEFBIT, + ALC245_FIXUP_HP_X360_MUTE_LEDS, ++ ALC287_FIXUP_THINKPAD_I2S_SPK, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -9392,6 +9414,10 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC245_FIXUP_HP_GPIO_LED + }, ++ [ALC287_FIXUP_THINKPAD_I2S_SPK] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc287_fixup_bind_dacs, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -10514,6 +10540,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + {0x17, 0x90170111}, + {0x19, 0x03a11030}, + {0x21, 0x03211020}), ++ SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, ++ {0x17, 0x90170110}, ++ {0x19, 0x03a11030}, ++ {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x17, 0x90170110}, +-- +2.40.1 + diff --git a/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch b/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch new file mode 100644 index 00000000000..4aeaadf27fb --- /dev/null +++ b/queue-6.1/alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch @@ -0,0 +1,43 @@ +From 948225e3347ea22b5e5ad668c64ed799bb47d8e4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Sep 2023 16:27:16 +0800 +Subject: ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support + +From: Kailang Yang + +[ Upstream commit 41b07476da38ac2878a14e5b8fe0312c41ea36e3 ] + +New platform SSID:0x231f. + +0x17 was only speaker pin, DAC assigned will be 0x03. Headphone +assigned to 0x02. +Playback via headphone will get EQ filter processing. +So, it needs to swap DAC. + +Signed-off-by: Kailang Yang +Cc: +Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 62476b6fd248c..3bea49e772a1f 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10544,6 +10544,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + {0x17, 0x90170110}, + {0x19, 0x03a11030}, + {0x21, 0x03211020}), ++ SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, ++ {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */ ++ {0x19, 0x04a11040}, ++ {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x17, 0x90170110}, +-- +2.40.1 + diff --git a/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch b/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch new file mode 100644 index 00000000000..adf28245754 --- /dev/null +++ b/queue-6.1/alsa-hda-tas2781-add-tas2781-hda-driver.patch @@ -0,0 +1,180 @@ +From 07910699cda1dfec48856de36c7d2b2cf7f89f31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 16:58:35 +0800 +Subject: ALSA: hda/tas2781: Add tas2781 HDA driver + +From: Shenghao Ding + +[ Upstream commit 3babae915f4c15d76a5134e55806a1c1588e2865 ] + +Integrate tas2781 configs for Lenovo Laptops. All of the tas2781s in the +laptop will be aggregated as one audio device. The code support realtek +as the primary codec. Rename "struct cs35l41_dev_name" to +"struct scodec_dev_name" for all other side codecs instead of the certain +one. + +Signed-off-by: Shenghao Ding +Link: https://lore.kernel.org/r/20230818085836.1442-1-shenghao-ding@ti.com +Signed-off-by: Takashi Iwai +Stable-dep-of: 41b07476da38 ("ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 88 +++++++++++++++++++++++++++++++++-- + 1 file changed, 85 insertions(+), 3 deletions(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 57e07aa4e136c..7d549229d0b95 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -6721,7 +6721,7 @@ static void comp_generic_playback_hook(struct hda_pcm_stream *hinfo, struct hda_ + } + } + +-struct cs35l41_dev_name { ++struct scodec_dev_name { + const char *bus; + const char *hid; + int index; +@@ -6730,7 +6730,7 @@ struct cs35l41_dev_name { + /* match the device name in a slightly relaxed manner */ + static int comp_match_cs35l41_dev_name(struct device *dev, void *data) + { +- struct cs35l41_dev_name *p = data; ++ struct scodec_dev_name *p = data; + const char *d = dev_name(dev); + int n = strlen(p->bus); + char tmp[32]; +@@ -6746,12 +6746,32 @@ static int comp_match_cs35l41_dev_name(struct device *dev, void *data) + return !strcmp(d + n, tmp); + } + ++static int comp_match_tas2781_dev_name(struct device *dev, ++ void *data) ++{ ++ struct scodec_dev_name *p = data; ++ const char *d = dev_name(dev); ++ int n = strlen(p->bus); ++ char tmp[32]; ++ ++ /* check the bus name */ ++ if (strncmp(d, p->bus, n)) ++ return 0; ++ /* skip the bus number */ ++ if (isdigit(d[n])) ++ n++; ++ /* the rest must be exact matching */ ++ snprintf(tmp, sizeof(tmp), "-%s:00", p->hid); ++ ++ return !strcmp(d + n, tmp); ++} ++ + static void cs35l41_generic_fixup(struct hda_codec *cdc, int action, const char *bus, + const char *hid, int count) + { + struct device *dev = hda_codec_dev(cdc); + struct alc_spec *spec = cdc->spec; +- struct cs35l41_dev_name *rec; ++ struct scodec_dev_name *rec; + int ret, i; + + switch (action) { +@@ -6776,6 +6796,41 @@ static void cs35l41_generic_fixup(struct hda_codec *cdc, int action, const char + } + } + ++static void tas2781_generic_fixup(struct hda_codec *cdc, int action, ++ const char *bus, const char *hid) ++{ ++ struct device *dev = hda_codec_dev(cdc); ++ struct alc_spec *spec = cdc->spec; ++ struct scodec_dev_name *rec; ++ int ret; ++ ++ switch (action) { ++ case HDA_FIXUP_ACT_PRE_PROBE: ++ rec = devm_kmalloc(dev, sizeof(*rec), GFP_KERNEL); ++ if (!rec) ++ return; ++ rec->bus = bus; ++ rec->hid = hid; ++ rec->index = 0; ++ spec->comps[0].codec = cdc; ++ component_match_add(dev, &spec->match, ++ comp_match_tas2781_dev_name, rec); ++ ret = component_master_add_with_match(dev, &comp_master_ops, ++ spec->match); ++ if (ret) ++ codec_err(cdc, ++ "Fail to register component aggregator %d\n", ++ ret); ++ else ++ spec->gen.pcm_playback_hook = ++ comp_generic_playback_hook; ++ break; ++ case HDA_FIXUP_ACT_FREE: ++ component_master_del(dev, &comp_master_ops); ++ break; ++ } ++} ++ + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) + { + cs35l41_generic_fixup(cdc, action, "i2c", "CSC3551", 2); +@@ -6803,6 +6858,12 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st + cs35l41_generic_fixup(cdc, action, "i2c", "CLSA0101", 2); + } + ++static void tas2781_fixup_i2c(struct hda_codec *cdc, ++ const struct hda_fixup *fix, int action) ++{ ++ tas2781_generic_fixup(cdc, action, "i2c", "TIAS2781"); ++} ++ + /* for alc295_fixup_hp_top_speakers */ + #include "hp_x360_helper.c" + +@@ -7227,6 +7288,7 @@ enum { + ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, + ALC236_FIXUP_DELL_DUAL_CODECS, + ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, ++ ALC287_FIXUP_TAS2781_I2C, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -9296,6 +9358,12 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + }, ++ [ALC287_FIXUP_TAS2781_I2C] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = tas2781_fixup_i2c, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_THINKPAD_ACPI, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -9867,6 +9935,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), + SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), ++ SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual powe mode2 YC", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38ba, "Yoga S780-14.5 Air AMD quad YC", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38bb, "Yoga S780-14.5 Air AMD quad AAC", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38be, "Yoga S980-14.5 proX YC Dual", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38bf, "Yoga S980-14.5 proX LX Dual", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C), ++ SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), +-- +2.40.1 + diff --git a/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch b/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch new file mode 100644 index 00000000000..43f113c540f --- /dev/null +++ b/queue-6.1/arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch @@ -0,0 +1,60 @@ +From 5542cbfe40b5adaa14b2ef882139a5058195903b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Jan 2023 12:19:55 -0300 +Subject: arm64: Avoid repeated AA64MMFR1_EL1 register read on pagefault path + +From: Gabriel Krisman Bertazi + +[ Upstream commit a89c6bcdac22bec1bfbe6e64060b4cf5838d4f47 ] + +Accessing AA64MMFR1_EL1 is expensive in KVM guests, since it is emulated +in the hypervisor. In fact, ARM documentation mentions some feature +registers are not supposed to be accessed frequently by the OS, and +therefore should be emulated for guests [1]. + +Commit 0388f9c74330 ("arm64: mm: Implement +arch_wants_old_prefaulted_pte()") introduced a read of this register in +the page fault path. But, even when the feature of setting faultaround +pages with the old flag is disabled for a given cpu, we are still paying +the cost of checking the register on every pagefault. This results in an +explosion of vmexit events in KVM guests, which directly impacts the +performance of virtualized workloads. For instance, running kernbench +yields a 15% increase in system time solely due to the increased vmexit +cycles. + +This patch avoids the extra cost by using the sanitized cached value. +It should be safe to do so, since this register mustn't change for a +given cpu. + +[1] https://developer.arm.com/-/media/Arm%20Developer%20Community/PDF/Learn%20the%20Architecture/Armv8-A%20virtualization.pdf?revision=a765a7df-1a00-434d-b241-357bfda2dd31 + +Signed-off-by: Gabriel Krisman Bertazi +Acked-by: Will Deacon +Reviewed-by: Anshuman Khandual +Link: https://lore.kernel.org/r/20230109151955.8292-1-krisman@suse.de +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/cpufeature.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h +index f73f11b550425..5bf0f9aa46267 100644 +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -863,7 +863,11 @@ static inline bool cpu_has_hw_af(void) + if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) + return false; + +- mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); ++ /* ++ * Use cached version to avoid emulated msr operation on KVM ++ * guests. ++ */ ++ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); + } +-- +2.40.1 + diff --git a/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch b/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch new file mode 100644 index 00000000000..20b5c461c1f --- /dev/null +++ b/queue-6.1/asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch @@ -0,0 +1,35 @@ +From bb5d98d6ac2702dae4a90e5e00540e700d4b378e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Sep 2023 20:32:24 +0530 +Subject: ASoC: soc-utils: Export snd_soc_dai_is_dummy() symbol + +From: Sameer Pujar + +[ Upstream commit f101583fa9f8c3f372d4feb61d67da0ccbf4d9a5 ] + +Export symbol snd_soc_dai_is_dummy() for usage outside core driver +modules. This is required by Tegra ASoC machine driver. + +Signed-off-by: Sameer Pujar +Link: https://lore.kernel.org/r/1694098945-32760-2-git-send-email-spujar@nvidia.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/soc-utils.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c +index a4dba0b751e76..1bbd1d077dfd9 100644 +--- a/sound/soc/soc-utils.c ++++ b/sound/soc/soc-utils.c +@@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai) + return 1; + return 0; + } ++EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy); + + int snd_soc_component_is_dummy(struct snd_soc_component *component) + { +-- +2.40.1 + diff --git a/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch b/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch new file mode 100644 index 00000000000..a98e0a1f873 --- /dev/null +++ b/queue-6.1/asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch @@ -0,0 +1,90 @@ +From aacf2dd084dffdc963d1a0d15608150066363397 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Sep 2023 20:32:25 +0530 +Subject: ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates + +From: Sameer Pujar + +[ Upstream commit e765886249c533e1bb5cbc3cd741bad677417312 ] + +Tegra audio graph card has many DAI links which connects internal +AHUB modules and external audio codecs. Since these are DPCM links, +hw_params() call in the machine driver happens for each connected +BE link and PLLA is updated every time. This is not really needed +for all links as only I/O link DAIs derive respective clocks from +PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock +updates to DAIs over I/O links. + +This found to be fixing a DMIC clock discrepancy which is suspected +to happen because of back to back quick PLLA and PLLA_OUT0 rate +updates. This was observed on Jetson TX2 platform where DMIC clock +ended up with unexpected value. + +Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") +Cc: stable@vger.kernel.org +Signed-off-by: Sameer Pujar +Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/tegra/tegra_audio_graph_card.c | 30 ++++++++++++++---------- + 1 file changed, 17 insertions(+), 13 deletions(-) + +diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c +index 1f2c5018bf5ac..4737e776d3837 100644 +--- a/sound/soc/tegra/tegra_audio_graph_card.c ++++ b/sound/soc/tegra/tegra_audio_graph_card.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #define MAX_PLLA_OUT0_DIV 128 + +@@ -44,6 +45,21 @@ struct tegra_audio_cdata { + unsigned int plla_out0_rates[NUM_RATE_TYPE]; + }; + ++static bool need_clk_update(struct snd_soc_dai *dai) ++{ ++ if (snd_soc_dai_is_dummy(dai) || ++ !dai->driver->ops || ++ !dai->driver->name) ++ return false; ++ ++ if (strstr(dai->driver->name, "I2S") || ++ strstr(dai->driver->name, "DMIC") || ++ strstr(dai->driver->name, "DSPK")) ++ return true; ++ ++ return false; ++} ++ + /* Setup PLL clock as per the given sample rate */ + static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +@@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + int err; + +- /* +- * This gets called for each DAI link (FE or BE) when DPCM is used. +- * We may not want to update PLLA rate for each call. So PLLA update +- * must be restricted to external I/O links (I2S, DMIC or DSPK) since +- * they actually depend on it. I/O modules update their clocks in +- * hw_param() of their respective component driver and PLLA rate +- * update here helps them to derive appropriate rates. +- * +- * TODO: When more HW accelerators get added (like sample rate +- * converter, volume gain controller etc., which don't really +- * depend on PLLA) we need a better way to filter here. +- */ +- if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { ++ if (need_clk_update(cpu_dai)) { + err = tegra_audio_graph_update_pll(substream, params); + if (err) + return err; +-- +2.40.1 + diff --git a/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch b/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch new file mode 100644 index 00000000000..d7d03bbe818 --- /dev/null +++ b/queue-6.1/ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch @@ -0,0 +1,152 @@ +From 4d1fc727026b5029e2f304f3e50aedbc215eb4ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Sep 2023 09:06:23 +0900 +Subject: ata: libata-scsi: Fix delayed scsi_rescan_device() execution + +From: Damien Le Moal + +[ Upstream commit 8b4d9469d0b0e553208ee6f62f2807111fde18b9 ] + +Commit 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after +device resume") modified ata_scsi_dev_rescan() to check the scsi device +"is_suspended" power field to ensure that the scsi device associated +with an ATA device is fully resumed when scsi_rescan_device() is +executed. However, this fix is problematic as: +1) It relies on a PM internal field that should not be used without PM + device locking protection. +2) The check for is_suspended and the call to scsi_rescan_device() are + not atomic and a suspend PM event may be triggered between them, + casuing scsi_rescan_device() to be called on a suspended device and + in that function blocking while holding the scsi device lock. This + would deadlock a following resume operation. +These problems can trigger PM deadlocks on resume, especially with +resume operations triggered quickly after or during suspend operations. +E.g., a simple bash script like: + +for (( i=0; i<10; i++ )); do + echo "+2 > /sys/class/rtc/rtc0/wakealarm + echo mem > /sys/power/state +done + +that triggers a resume 2 seconds after starting suspending a system can +quickly lead to a PM deadlock preventing the system from correctly +resuming. + +Fix this by replacing the check on is_suspended with a check on the +return value given by scsi_rescan_device() as that function will fail if +called against a suspended device. Also make sure rescan tasks already +scheduled are first cancelled before suspending an ata port. + +Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Niklas Cassel +Tested-by: Geert Uytterhoeven +Reviewed-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-core.c | 16 ++++++++++++++++ + drivers/ata/libata-scsi.c | 33 +++++++++++++++------------------ + 2 files changed, 31 insertions(+), 18 deletions(-) + +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index 25b9bdf2fc380..6a053cd0cf410 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -5022,11 +5022,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET + + static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg) + { ++ /* ++ * We are about to suspend the port, so we do not care about ++ * scsi_rescan_device() calls scheduled by previous resume operations. ++ * The next resume will schedule the rescan again. So cancel any rescan ++ * that is not done yet. ++ */ ++ cancel_delayed_work_sync(&ap->scsi_rescan_task); ++ + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false); + } + + static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg) + { ++ /* ++ * We are about to suspend the port, so we do not care about ++ * scsi_rescan_device() calls scheduled by previous resume operations. ++ * The next resume will schedule the rescan again. So cancel any rescan ++ * that is not done yet. ++ */ ++ cancel_delayed_work_sync(&ap->scsi_rescan_task); ++ + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true); + } + +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index b348f77b91231..7b9c9264b9a72 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -4648,7 +4648,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) + struct ata_link *link; + struct ata_device *dev; + unsigned long flags; +- bool delay_rescan = false; ++ int ret = 0; + + mutex_lock(&ap->scsi_scan_mutex); + spin_lock_irqsave(ap->lock, flags); +@@ -4657,37 +4657,34 @@ void ata_scsi_dev_rescan(struct work_struct *work) + ata_for_each_dev(dev, link, ENABLED) { + struct scsi_device *sdev = dev->sdev; + ++ /* ++ * If the port was suspended before this was scheduled, ++ * bail out. ++ */ ++ if (ap->pflags & ATA_PFLAG_SUSPENDED) ++ goto unlock; ++ + if (!sdev) + continue; + if (scsi_device_get(sdev)) + continue; + +- /* +- * If the rescan work was scheduled because of a resume +- * event, the port is already fully resumed, but the +- * SCSI device may not yet be fully resumed. In such +- * case, executing scsi_rescan_device() may cause a +- * deadlock with the PM code on device_lock(). Prevent +- * this by giving up and retrying rescan after a short +- * delay. +- */ +- delay_rescan = sdev->sdev_gendev.power.is_suspended; +- if (delay_rescan) { +- scsi_device_put(sdev); +- break; +- } +- + spin_unlock_irqrestore(ap->lock, flags); +- scsi_rescan_device(sdev); ++ ret = scsi_rescan_device(sdev); + scsi_device_put(sdev); + spin_lock_irqsave(ap->lock, flags); ++ ++ if (ret) ++ goto unlock; + } + } + ++unlock: + spin_unlock_irqrestore(ap->lock, flags); + mutex_unlock(&ap->scsi_scan_mutex); + +- if (delay_rescan) ++ /* Reschedule with a delay if scsi_rescan_device() returned an error */ ++ if (ret) + schedule_delayed_work(&ap->scsi_rescan_task, + msecs_to_jiffies(5)); + } +-- +2.40.1 + diff --git a/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch b/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch new file mode 100644 index 00000000000..97cf41a10a5 --- /dev/null +++ b/queue-6.1/ata-scsi-do-not-issue-start-stop-unit-on-resume.patch @@ -0,0 +1,123 @@ +From 3bdffcb01d762ec932de44fbb950d6d3a828d08f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jul 2023 13:23:14 +0900 +Subject: ata,scsi: do not issue START STOP UNIT on resume + +From: Damien Le Moal + +[ Upstream commit 0a8589055936d8feb56477123a8373ac634018fa ] + +During system resume, ata_port_pm_resume() triggers ata EH to +1) Resume the controller +2) Reset and rescan the ports +3) Revalidate devices +This EH execution is started asynchronously from ata_port_pm_resume(), +which means that when sd_resume() is executed, none or only part of the +above processing may have been executed. However, sd_resume() issues a +START STOP UNIT to wake up the drive from sleep mode. This command is +translated to ATA with ata_scsi_start_stop_xlat() and issued to the +device. However, depending on the state of execution of the EH process +and revalidation triggerred by ata_port_pm_resume(), two things may +happen: +1) The START STOP UNIT fails if it is received before the controller has + been reenabled at the beginning of the EH execution. This is visible + with error messages like: + +ata10.00: device reported invalid CHS sector 0 +sd 9:0:0:0: [sdc] Start/Stop Unit failed: Result: hostbyte=DID_OK driverbyte=DRIVER_OK +sd 9:0:0:0: [sdc] Sense Key : Illegal Request [current] +sd 9:0:0:0: [sdc] Add. Sense: Unaligned write command +sd 9:0:0:0: PM: dpm_run_callback(): scsi_bus_resume+0x0/0x90 returns -5 +sd 9:0:0:0: PM: failed to resume async: error -5 + +2) The START STOP UNIT command is received while the EH process is + on-going, which mean that it is stopped and must wait for its + completion, at which point the command is rather useless as the drive + is already fully spun up already. This case results also in a + significant delay in sd_resume() which is observable by users as + the entire system resume completion is delayed. + +Given that ATA devices will be woken up by libata activity on resume, +sd_resume() has no need to issue a START STOP UNIT command, which solves +the above mentioned problems. Do not issue this command by introducing +the new scsi_device flag no_start_on_resume and setting this flag to 1 +in ata_scsi_dev_config(). sd_resume() is modified to issue a START STOP +UNIT command only if this flag is not set. + +Reported-by: Paul Ausbeck +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=215880 +Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management") +Signed-off-by: Damien Le Moal +Tested-by: Tanner Watkins +Tested-by: Paul Ausbeck +Reviewed-by: Hannes Reinecke +Reviewed-by: Bart Van Assche +Stable-dep-of: 99398d2070ab ("scsi: sd: Do not issue commands to suspended disks on shutdown") +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-scsi.c | 7 +++++++ + drivers/scsi/sd.c | 9 ++++++--- + include/scsi/scsi_device.h | 1 + + 3 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index d28628b964e29..9c8dd9f86cbb3 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -1081,7 +1081,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) + } + } else { + sdev->sector_size = ata_id_logical_sector_size(dev->id); ++ /* ++ * Stop the drive on suspend but do not issue START STOP UNIT ++ * on resume as this is not necessary and may fail: the device ++ * will be woken up by ata_port_pm_resume() with a port reset ++ * and device revalidation. ++ */ + sdev->manage_start_stop = 1; ++ sdev->no_start_on_resume = 1; + } + + /* +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index e934779bf05c8..5bfca49415113 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3718,7 +3718,7 @@ static int sd_suspend_runtime(struct device *dev) + static int sd_resume(struct device *dev) + { + struct scsi_disk *sdkp = dev_get_drvdata(dev); +- int ret; ++ int ret = 0; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; +@@ -3726,8 +3726,11 @@ static int sd_resume(struct device *dev) + if (!sdkp->device->manage_start_stop) + return 0; + +- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); +- ret = sd_start_stop_device(sdkp, 1); ++ if (!sdkp->device->no_start_on_resume) { ++ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ++ ret = sd_start_stop_device(sdkp, 1); ++ } ++ + if (!ret) + opal_unlock_from_suspend(sdkp->opal_dev); + return ret; +diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h +index 006858ed04e8c..9fdc77db3a2a8 100644 +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -193,6 +193,7 @@ struct scsi_device { + unsigned no_start_on_add:1; /* do not issue start on add */ + unsigned allow_restart:1; /* issue START_UNIT in error handler */ + unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */ ++ unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */ + unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */ + unsigned no_uld_attach:1; /* disable connecting to upper level drivers */ + unsigned select_no_atn:1; +-- +2.40.1 + diff --git a/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch b/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch new file mode 100644 index 00000000000..a24297c5a06 --- /dev/null +++ b/queue-6.1/btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch @@ -0,0 +1,738 @@ +From 22c78442fa7b92733ad1e22f57af18e0444b8a19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 12:58:26 -0400 +Subject: btrfs: setup qstr from dentrys using fscrypt helper + +From: Sweet Tea Dorminy + +[ Upstream commit ab3c5c18e8fa3f8ea116016095d25adab466cd39 ] + +Most places where we get a struct qstr, we are doing so from a dentry. +With fscrypt, the dentry's name may be encrypted on-disk, so fscrypt +provides a helper to convert a dentry name to the appropriate disk name +if necessary. Convert each of the dentry name accesses to use +fscrypt_setup_filename(), then convert the resulting fscrypt_name back +to an unencrypted qstr. This does not work for nokey names, but the +specific locations that could spawn nokey names are noted. + +At present, since there are no encrypted directories, nothing goes down +the filename encryption paths. + +Signed-off-by: Sweet Tea Dorminy +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write") +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 3 + + fs/btrfs/inode.c | 192 +++++++++++++++++++++++++++++++---------- + fs/btrfs/transaction.c | 40 ++++++--- + fs/btrfs/tree-log.c | 11 ++- + 4 files changed, 189 insertions(+), 57 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 6718cee57a94e..5120cea15b096 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include "extent-io-tree.h" + #include "extent_io.h" + #include "extent_map.h" +@@ -3396,6 +3397,8 @@ struct btrfs_new_inode_args { + */ + struct posix_acl *default_acl; + struct posix_acl *acl; ++ struct fscrypt_name fname; ++ struct qstr name; + }; + int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, + unsigned int *trans_num_items); +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index a5e61ad2ba696..b5224dbaa4165 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4415,28 +4415,41 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + struct btrfs_trans_handle *trans; + struct inode *inode = d_inode(dentry); + int ret; ++ struct fscrypt_name fname; ++ struct qstr name; ++ ++ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); ++ if (ret) ++ return ret; ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ ++ /* This needs to handle no-key deletions later on */ + + trans = __unlink_start_trans(dir); +- if (IS_ERR(trans)) +- return PTR_ERR(trans); ++ if (IS_ERR(trans)) { ++ ret = PTR_ERR(trans); ++ goto fscrypt_free; ++ } + + btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + 0); + + ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), +- &dentry->d_name); ++ &name); + if (ret) +- goto out; ++ goto end_trans; + + if (inode->i_nlink == 0) { + ret = btrfs_orphan_add(trans, BTRFS_I(inode)); + if (ret) +- goto out; ++ goto end_trans; + } + +-out: ++end_trans: + btrfs_end_transaction(trans); + btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info); ++fscrypt_free: ++ fscrypt_free_filename(&fname); + return ret; + } + +@@ -4449,11 +4462,19 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; +- const struct qstr *name = &dentry->d_name; ++ struct qstr name; + u64 index; + int ret; + u64 objectid; + u64 dir_ino = btrfs_ino(BTRFS_I(dir)); ++ struct fscrypt_name fname; ++ ++ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); ++ if (ret) ++ return ret; ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ ++ /* This needs to handle no-key deletions later on */ + + if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { + objectid = inode->root->root_key.objectid; +@@ -4461,14 +4482,17 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + objectid = inode->location.objectid; + } else { + WARN_ON(1); ++ fscrypt_free_filename(&fname); + return -EINVAL; + } + + path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; ++ if (!path) { ++ ret = -ENOMEM; ++ goto out; ++ } + +- di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); ++ di = btrfs_lookup_dir_item(trans, root, path, dir_ino, &name, -1); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -4494,7 +4518,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + * call btrfs_del_root_ref, and it _shouldn't_ fail. + */ + if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { +- di = btrfs_search_dir_index_item(root, path, dir_ino, name); ++ di = btrfs_search_dir_index_item(root, path, dir_ino, &name); + if (IS_ERR_OR_NULL(di)) { + if (!di) + ret = -ENOENT; +@@ -4511,7 +4535,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + } else { + ret = btrfs_del_root_ref(trans, objectid, + root->root_key.objectid, dir_ino, +- &index, name); ++ &index, &name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; +@@ -4524,7 +4548,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + goto out; + } + +- btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name->len * 2); ++ btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name.len * 2); + inode_inc_iversion(dir); + dir->i_mtime = current_time(dir); + dir->i_ctime = dir->i_mtime; +@@ -4533,6 +4557,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + btrfs_abort_transaction(trans, ret); + out: + btrfs_free_path(path); ++ fscrypt_free_filename(&fname); + return ret; + } + +@@ -4796,6 +4821,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + int err = 0; + struct btrfs_trans_handle *trans; + u64 last_unlink_trans; ++ struct fscrypt_name fname; ++ struct qstr name; + + if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; +@@ -4808,9 +4835,18 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + return btrfs_delete_subvolume(dir, dentry); + } + ++ err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); ++ if (err) ++ return err; ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ ++ /* This needs to handle no-key deletions later on */ ++ + trans = __unlink_start_trans(dir); +- if (IS_ERR(trans)) +- return PTR_ERR(trans); ++ if (IS_ERR(trans)) { ++ err = PTR_ERR(trans); ++ goto out_notrans; ++ } + + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + err = btrfs_unlink_subvol(trans, dir, dentry); +@@ -4825,7 +4861,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + + /* now the directory is empty */ + err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), +- &dentry->d_name); ++ &name); + if (!err) { + btrfs_i_size_write(BTRFS_I(inode), 0); + /* +@@ -4844,7 +4880,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + } + out: + btrfs_end_transaction(trans); ++out_notrans: + btrfs_btree_balance_dirty(fs_info); ++ fscrypt_free_filename(&fname); + + return err; + } +@@ -5525,18 +5563,27 @@ void btrfs_evict_inode(struct inode *inode) + static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + struct btrfs_key *location, u8 *type) + { +- const struct qstr *name = &dentry->d_name; ++ struct qstr name; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; + int ret = 0; ++ struct fscrypt_name fname; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ++ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); ++ if (ret) ++ goto out; ++ ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ ++ /* This needs to handle no-key deletions later on */ ++ + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), +- name, 0); ++ &name, 0); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -5548,12 +5595,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ret = -EUCLEAN; + btrfs_warn(root->fs_info, + "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", +- __func__, name->name, btrfs_ino(BTRFS_I(dir)), ++ __func__, name.name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + } + if (!ret) + *type = btrfs_dir_type(path->nodes[0], di); + out: ++ fscrypt_free_filename(&fname); + btrfs_free_path(path); + return ret; + } +@@ -5576,6 +5624,14 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, + struct btrfs_key key; + int ret; + int err = 0; ++ struct fscrypt_name fname; ++ struct qstr name; ++ ++ ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); ++ if (ret) ++ return ret; ++ ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + + path = btrfs_alloc_path(); + if (!path) { +@@ -5598,12 +5654,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) || +- btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) ++ btrfs_root_ref_name_len(leaf, ref) != name.len) + goto out; + +- ret = memcmp_extent_buffer(leaf, dentry->d_name.name, +- (unsigned long)(ref + 1), +- dentry->d_name.len); ++ ret = memcmp_extent_buffer(leaf, name.name, (unsigned long)(ref + 1), ++ name.len); + if (ret) + goto out; + +@@ -5622,6 +5677,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, + err = 0; + out: + btrfs_free_path(path); ++ fscrypt_free_filename(&fname); + return err; + } + +@@ -6230,9 +6286,19 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, + struct inode *inode = args->inode; + int ret; + ++ if (!args->orphan) { ++ ret = fscrypt_setup_filename(dir, &args->dentry->d_name, 0, ++ &args->fname); ++ if (ret) ++ return ret; ++ args->name = (struct qstr)FSTR_TO_QSTR(&args->fname.disk_name); ++ } ++ + ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl); +- if (ret) ++ if (ret) { ++ fscrypt_free_filename(&args->fname); + return ret; ++ } + + /* 1 to add inode item */ + *trans_num_items = 1; +@@ -6272,6 +6338,7 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args) + { + posix_acl_release(args->acl); + posix_acl_release(args->default_acl); ++ fscrypt_free_filename(&args->fname); + } + + /* +@@ -6697,6 +6764,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = d_inode(old_dentry); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); ++ struct fscrypt_name fname; ++ struct qstr name; + u64 index; + int err; + int drop_inode = 0; +@@ -6708,6 +6777,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + if (inode->i_nlink >= BTRFS_LINK_MAX) + return -EMLINK; + ++ err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); ++ if (err) ++ goto fail; ++ ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ + err = btrfs_set_inode_index(BTRFS_I(dir), &index); + if (err) + goto fail; +@@ -6734,7 +6809,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); + + err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), +- &dentry->d_name, 1, index); ++ &name, 1, index); + + if (err) { + drop_inode = 1; +@@ -6758,6 +6833,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + } + + fail: ++ fscrypt_free_filename(&fname); + if (trans) + btrfs_end_transaction(trans); + if (drop_inode) { +@@ -9030,6 +9106,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, + int ret; + int ret2; + bool need_abort = false; ++ struct fscrypt_name old_fname, new_fname; ++ struct qstr old_name, new_name; + + /* + * For non-subvolumes allow exchange only within one subvolume, in the +@@ -9041,6 +9119,19 @@ static int btrfs_rename_exchange(struct inode *old_dir, + new_ino != BTRFS_FIRST_FREE_OBJECTID)) + return -EXDEV; + ++ ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); ++ if (ret) ++ return ret; ++ ++ ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); ++ if (ret) { ++ fscrypt_free_filename(&old_fname); ++ return ret; ++ } ++ ++ old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name); ++ new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name); ++ + /* close the race window with snapshot create/destroy ioctl */ + if (old_ino == BTRFS_FIRST_FREE_OBJECTID || + new_ino == BTRFS_FIRST_FREE_OBJECTID) +@@ -9108,8 +9199,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name, +- old_ino, ++ ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino, + btrfs_ino(BTRFS_I(new_dir)), + old_idx); + if (ret) +@@ -9122,8 +9212,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, root, &old_dentry->d_name, +- new_ino, ++ ret = btrfs_insert_inode_ref(trans, root, &old_name, new_ino, + btrfs_ino(BTRFS_I(old_dir)), + new_idx); + if (ret) { +@@ -9158,8 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* src is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(old_dentry->d_inode), +- &old_dentry->d_name, +- &old_rename_ctx); ++ &old_name, &old_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); + } +@@ -9174,8 +9262,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* dest is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(new_dentry->d_inode), +- &new_dentry->d_name, +- &new_rename_ctx); ++ &new_name, &new_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode)); + } +@@ -9185,14 +9272,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- &new_dentry->d_name, 0, old_idx); ++ &new_name, 0, old_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; + } + + ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), +- &old_dentry->d_name, 0, new_idx); ++ &old_name, 0, new_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +@@ -9235,6 +9322,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, + old_ino == BTRFS_FIRST_FREE_OBJECTID) + up_read(&fs_info->subvol_sem); + ++ fscrypt_free_filename(&new_fname); ++ fscrypt_free_filename(&old_fname); + return ret; + } + +@@ -9274,6 +9363,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + int ret; + int ret2; + u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); ++ struct fscrypt_name old_fname, new_fname; ++ struct qstr old_name, new_name; + + if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; +@@ -9290,21 +9381,32 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; + ++ ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); ++ if (ret) ++ return ret; ++ ++ ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); ++ if (ret) { ++ fscrypt_free_filename(&old_fname); ++ return ret; ++ } ++ ++ old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name); ++ new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name); + + /* check for collisions, even if the name isn't there */ +- ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, +- &new_dentry->d_name); ++ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_name); + + if (ret) { + if (ret == -EEXIST) { + /* we shouldn't get + * eexist without a new_inode */ + if (WARN_ON(!new_inode)) { +- return ret; ++ goto out_fscrypt_names; + } + } else { + /* maybe -EOVERFLOW */ +- return ret; ++ goto out_fscrypt_names; + } + } + ret = 0; +@@ -9387,8 +9489,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name, +- old_ino, ++ ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino, + btrfs_ino(BTRFS_I(new_dir)), index); + if (ret) + goto out_fail; +@@ -9412,7 +9513,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } else { + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(d_inode(old_dentry)), +- &old_dentry->d_name, &rename_ctx); ++ &old_name, &rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); + } +@@ -9431,7 +9532,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } else { + ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(d_inode(new_dentry)), +- &new_dentry->d_name); ++ &new_name); + } + if (!ret && new_inode->i_nlink == 0) + ret = btrfs_orphan_add(trans, +@@ -9443,7 +9544,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- &new_dentry->d_name, 0, index); ++ &new_name, 0, index); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +@@ -9478,6 +9579,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + out_whiteout_inode: + if (flags & RENAME_WHITEOUT) + iput(whiteout_args.inode); ++out_fscrypt_names: ++ fscrypt_free_filename(&old_fname); ++ fscrypt_free_filename(&new_fname); + return ret; + } + +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index b0fe054c9f401..c8918bdf15ccd 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1627,10 +1628,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_root *root = pending->root; + struct btrfs_root *parent_root; + struct btrfs_block_rsv *rsv; +- struct inode *parent_inode; ++ struct inode *parent_inode = pending->dir; + struct btrfs_path *path; + struct btrfs_dir_item *dir_item; +- struct dentry *dentry; + struct extent_buffer *tmp; + struct extent_buffer *old; + struct timespec64 cur_time; +@@ -1639,6 +1639,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + u64 index = 0; + u64 objectid; + u64 root_flags; ++ unsigned int nofs_flags; ++ struct fscrypt_name fname; ++ struct qstr name; + + ASSERT(pending->path); + path = pending->path; +@@ -1646,9 +1649,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + ASSERT(pending->root_item); + new_root_item = pending->root_item; + ++ /* ++ * We're inside a transaction and must make sure that any potential ++ * allocations with GFP_KERNEL in fscrypt won't recurse back to ++ * filesystem. ++ */ ++ nofs_flags = memalloc_nofs_save(); ++ pending->error = fscrypt_setup_filename(parent_inode, ++ &pending->dentry->d_name, 0, ++ &fname); ++ memalloc_nofs_restore(nofs_flags); ++ if (pending->error) ++ goto free_pending; ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); ++ + pending->error = btrfs_get_free_objectid(tree_root, &objectid); + if (pending->error) +- goto no_free_objectid; ++ goto free_fname; + + /* + * Make qgroup to skip current new snapshot's qgroupid, as it is +@@ -1677,8 +1694,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + trace_btrfs_space_reservation(fs_info, "transaction", + trans->transid, + trans->bytes_reserved, 1); +- dentry = pending->dentry; +- parent_inode = pending->dir; + parent_root = BTRFS_I(parent_inode)->root; + ret = record_root_in_trans(trans, parent_root, 0); + if (ret) +@@ -1694,7 +1709,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + /* check if there is a file/dir which has the same name. */ + dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, + btrfs_ino(BTRFS_I(parent_inode)), +- &dentry->d_name, 0); ++ &name, 0); + if (dir_item != NULL && !IS_ERR(dir_item)) { + pending->error = -EEXIST; + goto dir_item_existed; +@@ -1789,7 +1804,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + ret = btrfs_add_root_ref(trans, objectid, + parent_root->root_key.objectid, + btrfs_ino(BTRFS_I(parent_inode)), index, +- &dentry->d_name); ++ &name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; +@@ -1821,9 +1836,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + if (ret < 0) + goto fail; + +- ret = btrfs_insert_dir_item(trans, &dentry->d_name, +- BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, +- index); ++ ret = btrfs_insert_dir_item(trans, &name, BTRFS_I(parent_inode), &key, ++ BTRFS_FT_DIR, index); + /* We have check then name at the beginning, so it is impossible. */ + BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); + if (ret) { +@@ -1832,7 +1846,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + } + + btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + +- dentry->d_name.len * 2); ++ name.len * 2); + parent_inode->i_mtime = current_time(parent_inode); + parent_inode->i_ctime = parent_inode->i_mtime; + ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); +@@ -1864,7 +1878,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + trans->bytes_reserved = 0; + clear_skip_qgroup: + btrfs_clear_skip_qgroup(trans); +-no_free_objectid: ++free_fname: ++ fscrypt_free_filename(&fname); ++free_pending: + kfree(new_root_item); + pending->root_item = NULL; + btrfs_free_path(path); +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 9f55e81acc0ef..25fd3f34b8f21 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -7471,9 +7471,16 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + if (old_dir && old_dir->logged_trans == trans->transid) { + struct btrfs_root *log = old_dir->root->log_root; + struct btrfs_path *path; ++ struct fscrypt_name fname; ++ struct qstr name; + + ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX); + ++ ret = fscrypt_setup_filename(&old_dir->vfs_inode, ++ &old_dentry->d_name, 0, &fname); ++ if (ret) ++ goto out; ++ name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + /* + * We have two inodes to update in the log, the old directory and + * the inode that got renamed, so we must pin the log to prevent +@@ -7493,6 +7500,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; ++ fscrypt_free_filename(&fname); + goto out; + } + +@@ -7508,7 +7516,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + */ + mutex_lock(&old_dir->log_mutex); + ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), +- &old_dentry->d_name, old_dir_index); ++ &name, old_dir_index); + if (ret > 0) { + /* + * The dentry does not exist in the log, so record its +@@ -7522,6 +7530,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + mutex_unlock(&old_dir->log_mutex); + + btrfs_free_path(path); ++ fscrypt_free_filename(&fname); + if (ret < 0) + goto out; + } +-- +2.40.1 + diff --git a/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch b/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch new file mode 100644 index 00000000000..a05652f1bcb --- /dev/null +++ b/queue-6.1/btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch @@ -0,0 +1,990 @@ +From a3bedc8e6ad79645b5ddb23c0d404f4e8ea95003 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 12:58:27 -0400 +Subject: btrfs: use struct fscrypt_str instead of struct qstr + +From: Sweet Tea Dorminy + +[ Upstream commit 6db75318823a169e836a478ca57d6a7c0a156b77 ] + +While struct qstr is more natural without fscrypt, since it's provided +by dentries, struct fscrypt_str is provided by the fscrypt handlers +processing dentries, and is thus more natural in the fscrypt world. +Replace all of the struct qstr uses with struct fscrypt_str. + +Signed-off-by: Sweet Tea Dorminy +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write") +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 19 +++++---- + fs/btrfs/dir-item.c | 10 ++--- + fs/btrfs/inode-item.c | 14 +++---- + fs/btrfs/inode-item.h | 10 ++--- + fs/btrfs/inode.c | 87 +++++++++++++++++------------------------- + fs/btrfs/ioctl.c | 4 +- + fs/btrfs/root-tree.c | 4 +- + fs/btrfs/send.c | 4 +- + fs/btrfs/super.c | 2 +- + fs/btrfs/transaction.c | 13 +++---- + fs/btrfs/tree-log.c | 42 ++++++++++---------- + fs/btrfs/tree-log.h | 4 +- + 12 files changed, 95 insertions(+), 118 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 5120cea15b096..27d06bb5e5c05 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3240,10 +3240,10 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb) + /* root-item.c */ + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 sequence, +- const struct qstr *name); ++ const struct fscrypt_str *name); + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 *sequence, +- const struct qstr *name); ++ const struct fscrypt_str *name); + int btrfs_del_root(struct btrfs_trans_handle *trans, + const struct btrfs_key *key); + int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, +@@ -3272,23 +3272,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); + + /* dir-item.c */ + int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +- const struct qstr *name); ++ const struct fscrypt_str *name); + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, +- const struct qstr *name, struct btrfs_inode *dir, ++ const struct fscrypt_str *name, struct btrfs_inode *dir, + struct btrfs_key *location, u8 type, u64 index); + struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- const struct qstr *name, int mod); ++ const struct fscrypt_str *name, int mod); + struct btrfs_dir_item * + btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- u64 index, const struct qstr *name, int mod); ++ u64 index, const struct fscrypt_str *name, int mod); + struct btrfs_dir_item * + btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, +- const struct qstr *name); ++ const struct fscrypt_str *name); + int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +@@ -3369,10 +3369,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); + int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, struct btrfs_inode *inode, +- const struct qstr *name); ++ const struct fscrypt_str *name); + int btrfs_add_link(struct btrfs_trans_handle *trans, + struct btrfs_inode *parent_inode, struct btrfs_inode *inode, +- const struct qstr *name, int add_backref, u64 index); ++ const struct fscrypt_str *name, int add_backref, u64 index); + int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry); + int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, + int front); +@@ -3398,7 +3398,6 @@ struct btrfs_new_inode_args { + struct posix_acl *default_acl; + struct posix_acl *acl; + struct fscrypt_name fname; +- struct qstr name; + }; + int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, + unsigned int *trans_num_items); +diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c +index 8c60f37eb13fd..fdab48c1abb8a 100644 +--- a/fs/btrfs/dir-item.c ++++ b/fs/btrfs/dir-item.c +@@ -104,7 +104,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + * Will return 0 or -ENOMEM + */ + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, +- const struct qstr *name, struct btrfs_inode *dir, ++ const struct fscrypt_str *name, struct btrfs_inode *dir, + struct btrfs_key *location, u8 type, u64 index) + { + int ret = 0; +@@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir( + struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- const struct qstr *name, ++ const struct fscrypt_str *name, + int mod) + { + struct btrfs_key key; +@@ -225,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + } + + int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + int ret; + struct btrfs_key key; +@@ -302,7 +302,7 @@ struct btrfs_dir_item * + btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- u64 index, const struct qstr *name, int mod) ++ u64 index, const struct fscrypt_str *name, int mod) + { + struct btrfs_dir_item *di; + struct btrfs_key key; +@@ -321,7 +321,7 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + + struct btrfs_dir_item * + btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, +- u64 dirid, const struct qstr *name) ++ u64 dirid, const struct fscrypt_str *name) + { + struct btrfs_dir_item *di; + struct btrfs_key key; +diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c +index 61b323517a40b..5add022d3534f 100644 +--- a/fs/btrfs/inode-item.c ++++ b/fs/btrfs/inode-item.c +@@ -11,7 +11,7 @@ + + struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + int slot, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + struct btrfs_inode_ref *ref; + unsigned long ptr; +@@ -38,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + + struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( + struct extent_buffer *leaf, int slot, u64 ref_objectid, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + struct btrfs_inode_extref *extref; + unsigned long ptr; +@@ -77,7 +77,7 @@ struct btrfs_inode_extref * + btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +- const struct qstr *name, ++ const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, int ins_len, + int cow) + { +@@ -100,7 +100,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, + + static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, + u64 *index) + { +@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + } + + int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, const struct qstr *name, ++ struct btrfs_root *root, const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, u64 *index) + { + struct btrfs_path *path; +@@ -247,7 +247,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + */ + static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, + u64 index) + { +@@ -302,7 +302,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + + /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */ + int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, const struct qstr *name, ++ struct btrfs_root *root, const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, u64 index) + { + struct btrfs_fs_info *fs_info = root->fs_info; +diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h +index 3c657c670cfdf..b80aeb7157010 100644 +--- a/fs/btrfs/inode-item.h ++++ b/fs/btrfs/inode-item.h +@@ -64,10 +64,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_truncate_control *control); + int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, const struct qstr *name, ++ struct btrfs_root *root, const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, u64 index); + int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, const struct qstr *name, ++ struct btrfs_root *root, const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, u64 *index); + int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +@@ -80,15 +80,15 @@ struct btrfs_inode_extref *btrfs_lookup_inode_extref( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +- const struct qstr *name, ++ const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, int ins_len, + int cow); + + struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + int slot, +- const struct qstr *name); ++ const struct fscrypt_str *name); + struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( + struct extent_buffer *leaf, int slot, u64 ref_objectid, +- const struct qstr *name); ++ const struct fscrypt_str *name); + + #endif +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index b5224dbaa4165..47c5be597368b 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_rename_ctx *rename_ctx) + { + struct btrfs_root *root = dir->root; +@@ -4375,7 +4375,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, struct btrfs_inode *inode, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + int ret; + +@@ -4416,12 +4416,10 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + struct inode *inode = d_inode(dentry); + int ret; + struct fscrypt_name fname; +- struct qstr name; + + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + return ret; +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + + /* This needs to handle no-key deletions later on */ + +@@ -4435,7 +4433,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + 0); + + ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), +- &name); ++ &fname.disk_name); + if (ret) + goto end_trans; + +@@ -4462,7 +4460,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; +- struct qstr name; + u64 index; + int ret; + u64 objectid; +@@ -4472,7 +4469,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + return ret; +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + + /* This needs to handle no-key deletions later on */ + +@@ -4492,7 +4488,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + goto out; + } + +- di = btrfs_lookup_dir_item(trans, root, path, dir_ino, &name, -1); ++ di = btrfs_lookup_dir_item(trans, root, path, dir_ino, ++ &fname.disk_name, -1); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -4518,7 +4515,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + * call btrfs_del_root_ref, and it _shouldn't_ fail. + */ + if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { +- di = btrfs_search_dir_index_item(root, path, dir_ino, &name); ++ di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name); + if (IS_ERR_OR_NULL(di)) { + if (!di) + ret = -ENOENT; +@@ -4535,7 +4532,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + } else { + ret = btrfs_del_root_ref(trans, objectid, + root->root_key.objectid, dir_ino, +- &index, &name); ++ &index, &fname.disk_name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; +@@ -4548,7 +4545,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + goto out; + } + +- btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name.len * 2); ++ btrfs_i_size_write(BTRFS_I(dir), dir->i_size - fname.disk_name.len * 2); + inode_inc_iversion(dir); + dir->i_mtime = current_time(dir); + dir->i_ctime = dir->i_mtime; +@@ -4571,7 +4568,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) + struct btrfs_path *path; + struct btrfs_dir_item *di; + struct btrfs_key key; +- struct qstr name = QSTR_INIT("default", 7); ++ struct fscrypt_str name = FSTR_INIT("default", 7); + u64 dir_id; + int ret; + +@@ -4822,7 +4819,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + struct btrfs_trans_handle *trans; + u64 last_unlink_trans; + struct fscrypt_name fname; +- struct qstr name; + + if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; +@@ -4838,7 +4834,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (err) + return err; +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + + /* This needs to handle no-key deletions later on */ + +@@ -4861,7 +4856,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + + /* now the directory is empty */ + err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), +- &name); ++ &fname.disk_name); + if (!err) { + btrfs_i_size_write(BTRFS_I(inode), 0); + /* +@@ -5563,7 +5558,6 @@ void btrfs_evict_inode(struct inode *inode) + static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + struct btrfs_key *location, u8 *type) + { +- struct qstr name; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; +@@ -5578,12 +5572,10 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + if (ret) + goto out; + +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); +- + /* This needs to handle no-key deletions later on */ + + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), +- &name, 0); ++ &fname.disk_name, 0); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -5595,7 +5587,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ret = -EUCLEAN; + btrfs_warn(root->fs_info, + "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", +- __func__, name.name, btrfs_ino(BTRFS_I(dir)), ++ __func__, fname.disk_name.name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + } + if (!ret) +@@ -5625,14 +5617,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, + int ret; + int err = 0; + struct fscrypt_name fname; +- struct qstr name; + + ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (ret) + return ret; + +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); +- + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; +@@ -5654,11 +5643,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) || +- btrfs_root_ref_name_len(leaf, ref) != name.len) ++ btrfs_root_ref_name_len(leaf, ref) != fname.disk_name.len) + goto out; + +- ret = memcmp_extent_buffer(leaf, name.name, (unsigned long)(ref + 1), +- name.len); ++ ret = memcmp_extent_buffer(leaf, fname.disk_name.name, ++ (unsigned long)(ref + 1), fname.disk_name.len); + if (ret) + goto out; + +@@ -6291,7 +6280,6 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, + &args->fname); + if (ret) + return ret; +- args->name = (struct qstr)FSTR_TO_QSTR(&args->fname.disk_name); + } + + ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl); +@@ -6374,7 +6362,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + { + struct inode *dir = args->dir; + struct inode *inode = args->inode; +- const struct qstr *name = args->orphan ? NULL : &args->dentry->d_name; ++ const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name; + struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); + struct btrfs_root *root; + struct btrfs_inode_item *inode_item; +@@ -6609,7 +6597,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + */ + int btrfs_add_link(struct btrfs_trans_handle *trans, + struct btrfs_inode *parent_inode, struct btrfs_inode *inode, +- const struct qstr *name, int add_backref, u64 index) ++ const struct fscrypt_str *name, int add_backref, u64 index) + { + int ret = 0; + struct btrfs_key key; +@@ -6765,7 +6753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct inode *inode = d_inode(old_dentry); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct fscrypt_name fname; +- struct qstr name; + u64 index; + int err; + int drop_inode = 0; +@@ -6781,8 +6768,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + if (err) + goto fail; + +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); +- + err = btrfs_set_inode_index(BTRFS_I(dir), &index); + if (err) + goto fail; +@@ -6809,7 +6794,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); + + err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), +- &name, 1, index); ++ &fname.disk_name, 1, index); + + if (err) { + drop_inode = 1; +@@ -9107,7 +9092,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + int ret2; + bool need_abort = false; + struct fscrypt_name old_fname, new_fname; +- struct qstr old_name, new_name; ++ struct fscrypt_str *old_name, *new_name; + + /* + * For non-subvolumes allow exchange only within one subvolume, in the +@@ -9129,8 +9114,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, + return ret; + } + +- old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name); +- new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name); ++ old_name = &old_fname.disk_name; ++ new_name = &new_fname.disk_name; + + /* close the race window with snapshot create/destroy ioctl */ + if (old_ino == BTRFS_FIRST_FREE_OBJECTID || +@@ -9199,7 +9184,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino, ++ ret = btrfs_insert_inode_ref(trans, dest, new_name, old_ino, + btrfs_ino(BTRFS_I(new_dir)), + old_idx); + if (ret) +@@ -9212,7 +9197,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, root, &old_name, new_ino, ++ ret = btrfs_insert_inode_ref(trans, root, old_name, new_ino, + btrfs_ino(BTRFS_I(old_dir)), + new_idx); + if (ret) { +@@ -9247,7 +9232,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* src is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(old_dentry->d_inode), +- &old_name, &old_rename_ctx); ++ old_name, &old_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); + } +@@ -9262,7 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* dest is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(new_dentry->d_inode), +- &new_name, &new_rename_ctx); ++ new_name, &new_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode)); + } +@@ -9272,14 +9257,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- &new_name, 0, old_idx); ++ new_name, 0, old_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; + } + + ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), +- &old_name, 0, new_idx); ++ old_name, 0, new_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +@@ -9364,7 +9349,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + int ret2; + u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); + struct fscrypt_name old_fname, new_fname; +- struct qstr old_name, new_name; + + if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; +@@ -9391,12 +9375,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + return ret; + } + +- old_name = (struct qstr)FSTR_TO_QSTR(&old_fname.disk_name); +- new_name = (struct qstr)FSTR_TO_QSTR(&new_fname.disk_name); +- + /* check for collisions, even if the name isn't there */ +- ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_name); +- ++ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_fname.disk_name); + if (ret) { + if (ret == -EEXIST) { + /* we shouldn't get +@@ -9489,8 +9469,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, &new_name, old_ino, +- btrfs_ino(BTRFS_I(new_dir)), index); ++ ret = btrfs_insert_inode_ref(trans, dest, &new_fname.disk_name, ++ old_ino, btrfs_ino(BTRFS_I(new_dir)), ++ index); + if (ret) + goto out_fail; + } +@@ -9513,7 +9494,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } else { + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(d_inode(old_dentry)), +- &old_name, &rename_ctx); ++ &old_fname.disk_name, &rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); + } +@@ -9532,7 +9513,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } else { + ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(d_inode(new_dentry)), +- &new_name); ++ &new_fname.disk_name); + } + if (!ret && new_inode->i_nlink == 0) + ret = btrfs_orphan_add(trans, +@@ -9544,7 +9525,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- &new_name, 0, index); ++ &new_fname.disk_name, 0, index); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 5305d98905cea..9e323420c96d3 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -951,7 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, + struct inode *dir = d_inode(parent->dentry); + struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); + struct dentry *dentry; +- struct qstr name_str = QSTR_INIT(name, namelen); ++ struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen); + int error; + + error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); +@@ -3782,7 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) + struct btrfs_trans_handle *trans; + struct btrfs_path *path = NULL; + struct btrfs_disk_key disk_key; +- struct qstr name = QSTR_INIT("default", 7); ++ struct fscrypt_str name = FSTR_INIT("default", 7); + u64 objectid = 0; + u64 dir_id; + int ret; +diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c +index cf29241b9b310..7d783f0943068 100644 +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -328,7 +328,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, + + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 *sequence, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + struct btrfs_root *tree_root = trans->fs_info->tree_root; + struct btrfs_path *path; +@@ -400,7 +400,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + */ + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 sequence, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + struct btrfs_root *tree_root = trans->fs_info->tree_root; + struct btrfs_key key; +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index 833364527554c..547b5c2292186 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -1596,7 +1596,7 @@ static int gen_unique_name(struct send_ctx *sctx, + return -ENOMEM; + + while (1) { +- struct qstr tmp_name; ++ struct fscrypt_str tmp_name; + + len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", + ino, gen, idx); +@@ -1756,7 +1756,7 @@ static int lookup_dir_item_inode(struct btrfs_root *root, + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path *path; +- struct qstr name_str = QSTR_INIT(name, name_len); ++ struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len); + + path = alloc_path_for_send(); + if (!path) +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index bf56e4d6b9f48..2c562febd801e 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1398,7 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_key location; +- struct qstr name = QSTR_INIT("default", 7); ++ struct fscrypt_str name = FSTR_INIT("default", 7); + u64 dir_id; + + path = btrfs_alloc_path(); +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index c8918bdf15ccd..1193214ba8c10 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -1641,7 +1641,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + u64 root_flags; + unsigned int nofs_flags; + struct fscrypt_name fname; +- struct qstr name; + + ASSERT(pending->path); + path = pending->path; +@@ -1661,7 +1660,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + memalloc_nofs_restore(nofs_flags); + if (pending->error) + goto free_pending; +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + + pending->error = btrfs_get_free_objectid(tree_root, &objectid); + if (pending->error) +@@ -1709,7 +1707,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + /* check if there is a file/dir which has the same name. */ + dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, + btrfs_ino(BTRFS_I(parent_inode)), +- &name, 0); ++ &fname.disk_name, 0); + if (dir_item != NULL && !IS_ERR(dir_item)) { + pending->error = -EEXIST; + goto dir_item_existed; +@@ -1804,7 +1802,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + ret = btrfs_add_root_ref(trans, objectid, + parent_root->root_key.objectid, + btrfs_ino(BTRFS_I(parent_inode)), index, +- &name); ++ &fname.disk_name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; +@@ -1836,8 +1834,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + if (ret < 0) + goto fail; + +- ret = btrfs_insert_dir_item(trans, &name, BTRFS_I(parent_inode), &key, +- BTRFS_FT_DIR, index); ++ ret = btrfs_insert_dir_item(trans, &fname.disk_name, ++ BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, ++ index); + /* We have check then name at the beginning, so it is impossible. */ + BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); + if (ret) { +@@ -1846,7 +1845,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + } + + btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + +- name.len * 2); ++ fname.disk_name.len * 2); + parent_inode->i_mtime = current_time(parent_inode); + parent_inode->i_ctime = parent_inode->i_mtime; + ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 25fd3f34b8f21..ab7893debf07a 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -596,7 +596,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans, + } + + static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, +- struct qstr *name) ++ struct fscrypt_str *name) + { + char *buf; + +@@ -916,7 +916,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + int ret; + +@@ -947,7 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + { + struct btrfs_root *root = dir->root; + struct inode *inode; +- struct qstr name; ++ struct fscrypt_str name; + struct extent_buffer *leaf; + struct btrfs_key location; + int ret; +@@ -988,7 +988,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + static noinline int inode_in_dir(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 objectid, u64 index, +- struct qstr *name) ++ struct fscrypt_str *name) + { + struct btrfs_dir_item *di; + struct btrfs_key location; +@@ -1035,7 +1035,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, + static noinline int backref_in_log(struct btrfs_root *log, + struct btrfs_key *key, + u64 ref_objectid, +- const struct qstr *name) ++ const struct fscrypt_str *name) + { + struct btrfs_path *path; + int ret; +@@ -1071,7 +1071,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, + u64 inode_objectid, u64 parent_objectid, +- u64 ref_index, struct qstr *name) ++ u64 ref_index, struct fscrypt_str *name) + { + int ret; + struct extent_buffer *leaf; +@@ -1105,7 +1105,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); + while (ptr < ptr_end) { +- struct qstr victim_name; ++ struct fscrypt_str victim_name; + + victim_ref = (struct btrfs_inode_ref *)ptr; + ret = read_alloc_one_name(leaf, (victim_ref + 1), +@@ -1155,7 +1155,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + base = btrfs_item_ptr_offset(leaf, path->slots[0]); + + while (cur_offset < item_size) { +- struct qstr victim_name; ++ struct fscrypt_str victim_name; + + extref = (struct btrfs_inode_extref *)(base + cur_offset); + +@@ -1230,7 +1230,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + } + + static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, +- struct qstr *name, u64 *index, ++ struct fscrypt_str *name, u64 *index, + u64 *parent_objectid) + { + struct btrfs_inode_extref *extref; +@@ -1252,7 +1252,7 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, + } + + static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, +- struct qstr *name, u64 *index) ++ struct fscrypt_str *name, u64 *index) + { + struct btrfs_inode_ref *ref; + int ret; +@@ -1304,7 +1304,7 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, + ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); + ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]); + while (ref_ptr < ref_end) { +- struct qstr name; ++ struct fscrypt_str name; + u64 parent_id; + + if (key->type == BTRFS_INODE_EXTREF_KEY) { +@@ -1372,7 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + struct inode *inode = NULL; + unsigned long ref_ptr; + unsigned long ref_end; +- struct qstr name; ++ struct fscrypt_str name; + int ret; + int log_ref_ver = 0; + u64 parent_objectid; +@@ -1766,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, + static noinline int insert_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 dirid, u64 index, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_key *location) + { + struct inode *inode; +@@ -1844,7 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + struct btrfs_dir_item *di, + struct btrfs_key *key) + { +- struct qstr name; ++ struct fscrypt_str name; + struct btrfs_dir_item *dir_dst_di; + struct btrfs_dir_item *index_dst_di; + bool dir_dst_matches = false; +@@ -2124,7 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + struct extent_buffer *eb; + int slot; + struct btrfs_dir_item *di; +- struct qstr name; ++ struct fscrypt_str name; + struct inode *inode = NULL; + struct btrfs_key location; + +@@ -3417,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dir_ino, +- const struct qstr *name, ++ const struct fscrypt_str *name, + u64 index) + { + struct btrfs_dir_item *di; +@@ -3464,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, + */ + void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_inode *dir, u64 index) + { + struct btrfs_path *path; +@@ -3503,7 +3503,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + /* see comments for btrfs_del_dir_entries_in_log */ + void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_inode *inode, u64 dirid) + { + struct btrfs_root *log; +@@ -5267,7 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, + u32 this_len; + unsigned long name_ptr; + struct btrfs_dir_item *di; +- struct qstr name_str; ++ struct fscrypt_str name_str; + + if (key->type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *iref; +@@ -7472,7 +7472,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + struct btrfs_root *log = old_dir->root->log_root; + struct btrfs_path *path; + struct fscrypt_name fname; +- struct qstr name; + + ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX); + +@@ -7480,7 +7479,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + &old_dentry->d_name, 0, &fname); + if (ret) + goto out; +- name = (struct qstr)FSTR_TO_QSTR(&fname.disk_name); + /* + * We have two inodes to update in the log, the old directory and + * the inode that got renamed, so we must pin the log to prevent +@@ -7516,7 +7514,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + */ + mutex_lock(&old_dir->log_mutex); + ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), +- &name, old_dir_index); ++ &fname.disk_name, old_dir_index); + if (ret > 0) { + /* + * The dentry does not exist in the log, so record its +diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h +index 6c0dc79787f05..8adebf4c9adaf 100644 +--- a/fs/btrfs/tree-log.h ++++ b/fs/btrfs/tree-log.h +@@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_log_ctx *ctx); + void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_inode *dir, u64 index); + void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const struct qstr *name, ++ const struct fscrypt_str *name, + struct btrfs_inode *inode, u64 dirid); + void btrfs_end_log_trans(struct btrfs_root *root); + void btrfs_pin_log_trans(struct btrfs_root *root); +-- +2.40.1 + diff --git a/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch b/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch new file mode 100644 index 00000000000..0e63b37e263 --- /dev/null +++ b/queue-6.1/btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch @@ -0,0 +1,1959 @@ +From 8c61a5386d7b8715600196b9b81b7371a9fd1017 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 12:58:25 -0400 +Subject: btrfs: use struct qstr instead of name and namelen pairs + +From: Sweet Tea Dorminy + +[ Upstream commit e43eec81c5167b655b72c781b0e75e62a05e415e ] + +Many functions throughout btrfs take name buffer and name length +arguments. Most of these functions at the highest level are usually +called with these arguments extracted from a supplied dentry's name. +But the entire name can be passed instead, making each function a little +more elegant. + +Each function whose arguments are currently the name and length +extracted from a dentry is herein converted to instead take a pointer to +the name in the dentry. The couple of calls to these calls without a +struct dentry are converted to create an appropriate qstr to pass in. +Additionally, every function which is only called with a name/len +extracted directly from a qstr is also converted. + +This change has positive effect on stack consumption, frame of many +functions is reduced but this will be used in the future for fscrypt +related structures. + +Signed-off-by: Sweet Tea Dorminy +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 9af86694fd5d ("btrfs: file_remove_privs needs an exclusive lock in direct io write") +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.h | 26 ++-- + fs/btrfs/dir-item.c | 50 ++++---- + fs/btrfs/inode-item.c | 73 ++++++----- + fs/btrfs/inode-item.h | 20 ++- + fs/btrfs/inode.c | 130 +++++++++----------- + fs/btrfs/ioctl.c | 7 +- + fs/btrfs/root-tree.c | 19 ++- + fs/btrfs/send.c | 12 +- + fs/btrfs/super.c | 3 +- + fs/btrfs/transaction.c | 11 +- + fs/btrfs/tree-log.c | 267 +++++++++++++++++++---------------------- + fs/btrfs/tree-log.h | 4 +- + 12 files changed, 287 insertions(+), 335 deletions(-) + +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 3bcef0c4d6fc4..6718cee57a94e 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3238,11 +3238,11 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb) + + /* root-item.c */ + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, +- u64 ref_id, u64 dirid, u64 sequence, const char *name, +- int name_len); ++ u64 ref_id, u64 dirid, u64 sequence, ++ const struct qstr *name); + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, +- u64 ref_id, u64 dirid, u64 *sequence, const char *name, +- int name_len); ++ u64 ref_id, u64 dirid, u64 *sequence, ++ const struct qstr *name); + int btrfs_del_root(struct btrfs_trans_handle *trans, + const struct btrfs_key *key); + int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, +@@ -3271,25 +3271,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); + + /* dir-item.c */ + int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +- const char *name, int name_len); +-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, +- int name_len, struct btrfs_inode *dir, ++ const struct qstr *name); ++int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, ++ const struct qstr *name, struct btrfs_inode *dir, + struct btrfs_key *location, u8 type, u64 index); + struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- const char *name, int name_len, +- int mod); ++ const struct qstr *name, int mod); + struct btrfs_dir_item * + btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- u64 index, const char *name, int name_len, +- int mod); ++ u64 index, const struct qstr *name, int mod); + struct btrfs_dir_item * + btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, +- const char *name, int name_len); ++ const struct qstr *name); + int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +@@ -3370,10 +3368,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); + int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, struct btrfs_inode *inode, +- const char *name, int name_len); ++ const struct qstr *name); + int btrfs_add_link(struct btrfs_trans_handle *trans, + struct btrfs_inode *parent_inode, struct btrfs_inode *inode, +- const char *name, int name_len, int add_backref, u64 index); ++ const struct qstr *name, int add_backref, u64 index); + int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry); + int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, + int front); +diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c +index 72fb2c518a2b4..8c60f37eb13fd 100644 +--- a/fs/btrfs/dir-item.c ++++ b/fs/btrfs/dir-item.c +@@ -103,8 +103,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + * to use for the second index (if one is created). + * Will return 0 or -ENOMEM + */ +-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, +- int name_len, struct btrfs_inode *dir, ++int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, ++ const struct qstr *name, struct btrfs_inode *dir, + struct btrfs_key *location, u8 type, u64 index) + { + int ret = 0; +@@ -120,7 +120,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, + + key.objectid = btrfs_ino(dir); + key.type = BTRFS_DIR_ITEM_KEY; +- key.offset = btrfs_name_hash(name, name_len); ++ key.offset = btrfs_name_hash(name->name, name->len); + + path = btrfs_alloc_path(); + if (!path) +@@ -128,9 +128,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, + + btrfs_cpu_key_to_disk(&disk_key, location); + +- data_size = sizeof(*dir_item) + name_len; ++ data_size = sizeof(*dir_item) + name->len; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, +- name, name_len); ++ name->name, name->len); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + if (ret == -EEXIST) +@@ -142,11 +142,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_data_len(leaf, dir_item, 0); +- btrfs_set_dir_name_len(leaf, dir_item, name_len); ++ btrfs_set_dir_name_len(leaf, dir_item, name->len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); + name_ptr = (unsigned long)(dir_item + 1); + +- write_extent_buffer(leaf, name, name_ptr, name_len); ++ write_extent_buffer(leaf, name->name, name_ptr, name->len); + btrfs_mark_buffer_dirty(leaf); + + second_insert: +@@ -157,7 +157,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, + } + btrfs_release_path(path); + +- ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir, ++ ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir, + &disk_key, type, index); + out_free: + btrfs_free_path(path); +@@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir( + struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- const char *name, int name_len, ++ const struct qstr *name, + int mod) + { + struct btrfs_key key; +@@ -214,9 +214,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + + key.objectid = dir; + key.type = BTRFS_DIR_ITEM_KEY; +- key.offset = btrfs_name_hash(name, name_len); ++ key.offset = btrfs_name_hash(name->name, name->len); + +- di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); ++ di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, ++ name->len, mod); + if (IS_ERR(di) && PTR_ERR(di) == -ENOENT) + return NULL; + +@@ -224,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + } + + int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +- const char *name, int name_len) ++ const struct qstr *name) + { + int ret; + struct btrfs_key key; +@@ -240,9 +241,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, + + key.objectid = dir; + key.type = BTRFS_DIR_ITEM_KEY; +- key.offset = btrfs_name_hash(name, name_len); ++ key.offset = btrfs_name_hash(name->name, name->len); + +- di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0); ++ di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name, ++ name->len, 0); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + /* Nothing found, we're safe */ +@@ -262,11 +264,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, + goto out; + } + +- /* +- * see if there is room in the item to insert this +- * name +- */ +- data_size = sizeof(*di) + name_len; ++ /* See if there is room in the item to insert this name. */ ++ data_size = sizeof(*di) + name->len; + leaf = path->nodes[0]; + slot = path->slots[0]; + if (data_size + btrfs_item_size(leaf, slot) + +@@ -303,8 +302,7 @@ struct btrfs_dir_item * + btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, +- u64 index, const char *name, int name_len, +- int mod) ++ u64 index, const struct qstr *name, int mod) + { + struct btrfs_dir_item *di; + struct btrfs_key key; +@@ -313,7 +311,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + +- di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); ++ di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, ++ name->len, mod); + if (di == ERR_PTR(-ENOENT)) + return NULL; + +@@ -321,9 +320,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + } + + struct btrfs_dir_item * +-btrfs_search_dir_index_item(struct btrfs_root *root, +- struct btrfs_path *path, u64 dirid, +- const char *name, int name_len) ++btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, ++ u64 dirid, const struct qstr *name) + { + struct btrfs_dir_item *di; + struct btrfs_key key; +@@ -338,7 +336,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root, + break; + + di = btrfs_match_dir_item_name(root->fs_info, path, +- name, name_len); ++ name->name, name->len); + if (di) + return di; + } +diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c +index 0eeb5ea878948..61b323517a40b 100644 +--- a/fs/btrfs/inode-item.c ++++ b/fs/btrfs/inode-item.c +@@ -10,8 +10,8 @@ + #include "print-tree.h" + + struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, +- int slot, const char *name, +- int name_len) ++ int slot, ++ const struct qstr *name) + { + struct btrfs_inode_ref *ref; + unsigned long ptr; +@@ -27,9 +27,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + len = btrfs_inode_ref_name_len(leaf, ref); + name_ptr = (unsigned long)(ref + 1); + cur_offset += len + sizeof(*ref); +- if (len != name_len) ++ if (len != name->len) + continue; +- if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) ++ if (memcmp_extent_buffer(leaf, name->name, name_ptr, ++ name->len) == 0) + return ref; + } + return NULL; +@@ -37,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, + + struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( + struct extent_buffer *leaf, int slot, u64 ref_objectid, +- const char *name, int name_len) ++ const struct qstr *name) + { + struct btrfs_inode_extref *extref; + unsigned long ptr; +@@ -60,9 +61,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( + name_ptr = (unsigned long)(&extref->name); + ref_name_len = btrfs_inode_extref_name_len(leaf, extref); + +- if (ref_name_len == name_len && ++ if (ref_name_len == name->len && + btrfs_inode_extref_parent(leaf, extref) == ref_objectid && +- (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) ++ (memcmp_extent_buffer(leaf, name->name, name_ptr, ++ name->len) == 0)) + return extref; + + cur_offset += ref_name_len + sizeof(*extref); +@@ -75,7 +77,7 @@ struct btrfs_inode_extref * + btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +- const char *name, int name_len, ++ const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, int ins_len, + int cow) + { +@@ -84,7 +86,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_EXTREF_KEY; +- key.offset = btrfs_extref_hash(ref_objectid, name, name_len); ++ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) +@@ -92,13 +94,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, + if (ret > 0) + return NULL; + return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], +- ref_objectid, name, name_len); ++ ref_objectid, name); + + } + + static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, ++ const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, + u64 *index) + { +@@ -107,14 +109,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + int ret; +- int del_len = name_len + sizeof(*extref); ++ int del_len = name->len + sizeof(*extref); + unsigned long ptr; + unsigned long item_start; + u32 item_size; + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_EXTREF_KEY; +- key.offset = btrfs_extref_hash(ref_objectid, name, name_len); ++ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); + + path = btrfs_alloc_path(); + if (!path) +@@ -132,7 +134,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + * readonly. + */ + extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], +- ref_objectid, name, name_len); ++ ref_objectid, name); + if (!extref) { + btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); + ret = -EROFS; +@@ -168,8 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + } + + int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- const char *name, int name_len, ++ struct btrfs_root *root, const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, u64 *index) + { + struct btrfs_path *path; +@@ -182,7 +183,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + u32 sub_item_len; + int ret; + int search_ext_refs = 0; +- int del_len = name_len + sizeof(*ref); ++ int del_len = name->len + sizeof(*ref); + + key.objectid = inode_objectid; + key.offset = ref_objectid; +@@ -201,8 +202,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + goto out; + } + +- ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name, +- name_len); ++ ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name); + if (!ref) { + ret = -ENOENT; + search_ext_refs = 1; +@@ -219,7 +219,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + goto out; + } + ptr = (unsigned long)ref; +- sub_item_len = name_len + sizeof(*ref); ++ sub_item_len = name->len + sizeof(*ref); + item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, + item_size - (ptr + sub_item_len - item_start)); +@@ -233,7 +233,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + * name in our ref array. Find and remove the extended + * inode ref then. + */ +- return btrfs_del_inode_extref(trans, root, name, name_len, ++ return btrfs_del_inode_extref(trans, root, name, + inode_objectid, ref_objectid, index); + } + +@@ -247,12 +247,13 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + */ + static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, +- u64 inode_objectid, u64 ref_objectid, u64 index) ++ const struct qstr *name, ++ u64 inode_objectid, u64 ref_objectid, ++ u64 index) + { + struct btrfs_inode_extref *extref; + int ret; +- int ins_len = name_len + sizeof(*extref); ++ int ins_len = name->len + sizeof(*extref); + unsigned long ptr; + struct btrfs_path *path; + struct btrfs_key key; +@@ -260,7 +261,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_EXTREF_KEY; +- key.offset = btrfs_extref_hash(ref_objectid, name, name_len); ++ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); + + path = btrfs_alloc_path(); + if (!path) +@@ -272,7 +273,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + if (btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], + ref_objectid, +- name, name_len)) ++ name)) + goto out; + + btrfs_extend_item(path, ins_len); +@@ -286,12 +287,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len; + extref = (struct btrfs_inode_extref *)ptr; + +- btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); ++ btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len); + btrfs_set_inode_extref_index(path->nodes[0], extref, index); + btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); + + ptr = (unsigned long)&extref->name; +- write_extent_buffer(path->nodes[0], name, ptr, name_len); ++ write_extent_buffer(path->nodes[0], name->name, ptr, name->len); + btrfs_mark_buffer_dirty(path->nodes[0]); + + out: +@@ -301,8 +302,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + + /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */ + int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- const char *name, int name_len, ++ struct btrfs_root *root, const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, u64 index) + { + struct btrfs_fs_info *fs_info = root->fs_info; +@@ -311,7 +311,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_inode_ref *ref; + unsigned long ptr; + int ret; +- int ins_len = name_len + sizeof(*ref); ++ int ins_len = name->len + sizeof(*ref); + + key.objectid = inode_objectid; + key.offset = ref_objectid; +@@ -327,7 +327,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + if (ret == -EEXIST) { + u32 old_size; + ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], +- name, name_len); ++ name); + if (ref) + goto out; + +@@ -336,7 +336,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); +- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); ++ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); + ptr = (unsigned long)(ref + 1); + ret = 0; +@@ -344,7 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + if (ret == -EOVERFLOW) { + if (btrfs_find_name_in_backref(path->nodes[0], + path->slots[0], +- name, name_len)) ++ name)) + ret = -EEXIST; + else + ret = -EMLINK; +@@ -353,11 +353,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + } else { + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); +- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); ++ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); + ptr = (unsigned long)(ref + 1); + } +- write_extent_buffer(path->nodes[0], name, ptr, name_len); ++ write_extent_buffer(path->nodes[0], name->name, ptr, name->len); + btrfs_mark_buffer_dirty(path->nodes[0]); + + out: +@@ -370,7 +370,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + if (btrfs_super_incompat_flags(disk_super) + & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) + ret = btrfs_insert_inode_extref(trans, root, name, +- name_len, + inode_objectid, + ref_objectid, index); + } +diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h +index a8fc16d0147f6..3c657c670cfdf 100644 +--- a/fs/btrfs/inode-item.h ++++ b/fs/btrfs/inode-item.h +@@ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_truncate_control *control); + int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- const char *name, int name_len, ++ struct btrfs_root *root, const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, u64 index); + int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- const char *name, int name_len, +- u64 inode_objectid, u64 ref_objectid, u64 *index); ++ struct btrfs_root *root, const struct qstr *name, ++ u64 inode_objectid, u64 ref_objectid, u64 *index); + int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); +-int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root +- *root, struct btrfs_path *path, ++int btrfs_lookup_inode(struct btrfs_trans_handle *trans, ++ struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *location, int mod); + + struct btrfs_inode_extref *btrfs_lookup_inode_extref( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, +- const char *name, int name_len, ++ const struct qstr *name, + u64 inode_objectid, u64 ref_objectid, int ins_len, + int cow); + + struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, +- int slot, const char *name, +- int name_len); ++ int slot, ++ const struct qstr *name); + struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( + struct extent_buffer *leaf, int slot, u64 ref_objectid, +- const char *name, int name_len); ++ const struct qstr *name); + + #endif +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 222068bf80031..a5e61ad2ba696 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3627,7 +3627,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) + spin_unlock(&fs_info->delayed_iput_lock); + } + +-/** ++/* + * Wait for flushing all delayed iputs + * + * @fs_info: the filesystem +@@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, +- const char *name, int name_len, ++ const struct qstr *name, + struct btrfs_rename_ctx *rename_ctx) + { + struct btrfs_root *root = dir->root; +@@ -4290,8 +4290,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + goto out; + } + +- di = btrfs_lookup_dir_item(trans, root, path, dir_ino, +- name, name_len, -1); ++ di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto err; +@@ -4319,12 +4318,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + } + } + +- ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, +- dir_ino, &index); ++ ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); + if (ret) { + btrfs_info(fs_info, + "failed to delete reference to %.*s, inode %llu parent %llu", +- name_len, name, ino, dir_ino); ++ name->len, name->name, ino, dir_ino); + btrfs_abort_transaction(trans, ret); + goto err; + } +@@ -4345,10 +4343,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + * operations on the log tree, increasing latency for applications. + */ + if (!rename_ctx) { +- btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, +- dir_ino); +- btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, +- index); ++ btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino); ++ btrfs_del_dir_entries_in_log(trans, root, name, dir, index); + } + + /* +@@ -4366,7 +4362,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + if (ret) + goto out; + +- btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2); ++ btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); + inode_inc_iversion(&inode->vfs_inode); + inode_inc_iversion(&dir->vfs_inode); + inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); +@@ -4379,10 +4375,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, struct btrfs_inode *inode, +- const char *name, int name_len) ++ const struct qstr *name) + { + int ret; +- ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL); ++ ++ ret = __btrfs_unlink_inode(trans, dir, inode, name, NULL); + if (!ret) { + drop_nlink(&inode->vfs_inode); + ret = btrfs_update_inode(trans, inode->root, inode); +@@ -4426,9 +4423,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + 0); + +- ret = btrfs_unlink_inode(trans, BTRFS_I(dir), +- BTRFS_I(d_inode(dentry)), dentry->d_name.name, +- dentry->d_name.len); ++ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), ++ &dentry->d_name); + if (ret) + goto out; + +@@ -4453,8 +4449,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; +- const char *name = dentry->d_name.name; +- int name_len = dentry->d_name.len; ++ const struct qstr *name = &dentry->d_name; + u64 index; + int ret; + u64 objectid; +@@ -4473,8 +4468,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + if (!path) + return -ENOMEM; + +- di = btrfs_lookup_dir_item(trans, root, path, dir_ino, +- name, name_len, -1); ++ di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -4500,8 +4494,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + * call btrfs_del_root_ref, and it _shouldn't_ fail. + */ + if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { +- di = btrfs_search_dir_index_item(root, path, dir_ino, +- name, name_len); ++ di = btrfs_search_dir_index_item(root, path, dir_ino, name); + if (IS_ERR_OR_NULL(di)) { + if (!di) + ret = -ENOENT; +@@ -4518,7 +4511,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + } else { + ret = btrfs_del_root_ref(trans, objectid, + root->root_key.objectid, dir_ino, +- &index, name, name_len); ++ &index, name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; +@@ -4531,7 +4524,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + goto out; + } + +- btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2); ++ btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name->len * 2); + inode_inc_iversion(dir); + dir->i_mtime = current_time(dir); + dir->i_ctime = dir->i_mtime; +@@ -4553,6 +4546,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) + struct btrfs_path *path; + struct btrfs_dir_item *di; + struct btrfs_key key; ++ struct qstr name = QSTR_INIT("default", 7); + u64 dir_id; + int ret; + +@@ -4563,7 +4557,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) + /* Make sure this root isn't set as the default subvol */ + dir_id = btrfs_super_root_dir(fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path, +- dir_id, "default", 7, 0); ++ dir_id, &name, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.objectid == root->root_key.objectid) { +@@ -4830,9 +4824,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; + + /* now the directory is empty */ +- err = btrfs_unlink_inode(trans, BTRFS_I(dir), +- BTRFS_I(d_inode(dentry)), dentry->d_name.name, +- dentry->d_name.len); ++ err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), ++ &dentry->d_name); + if (!err) { + btrfs_i_size_write(BTRFS_I(inode), 0); + /* +@@ -5532,8 +5525,7 @@ void btrfs_evict_inode(struct inode *inode) + static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + struct btrfs_key *location, u8 *type) + { +- const char *name = dentry->d_name.name; +- int namelen = dentry->d_name.len; ++ const struct qstr *name = &dentry->d_name; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; +@@ -5544,7 +5536,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + return -ENOMEM; + + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), +- name, namelen, 0); ++ name, 0); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +@@ -5556,7 +5548,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ret = -EUCLEAN; + btrfs_warn(root->fs_info, + "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", +- __func__, name, btrfs_ino(BTRFS_I(dir)), ++ __func__, name->name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + } + if (!ret) +@@ -6315,8 +6307,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + { + struct inode *dir = args->dir; + struct inode *inode = args->inode; +- const char *name = args->orphan ? NULL : args->dentry->d_name.name; +- int name_len = args->orphan ? 0 : args->dentry->d_name.len; ++ const struct qstr *name = args->orphan ? NULL : &args->dentry->d_name; + struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); + struct btrfs_root *root; + struct btrfs_inode_item *inode_item; +@@ -6417,7 +6408,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + sizes[1] = 2 + sizeof(*ref); + } else { + key[1].offset = btrfs_ino(BTRFS_I(dir)); +- sizes[1] = name_len + sizeof(*ref); ++ sizes[1] = name->len + sizeof(*ref); + } + } + +@@ -6456,10 +6447,12 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + btrfs_set_inode_ref_index(path->nodes[0], ref, 0); + write_extent_buffer(path->nodes[0], "..", ptr, 2); + } else { +- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); ++ btrfs_set_inode_ref_name_len(path->nodes[0], ref, ++ name->len); + btrfs_set_inode_ref_index(path->nodes[0], ref, + BTRFS_I(inode)->dir_index); +- write_extent_buffer(path->nodes[0], name, ptr, name_len); ++ write_extent_buffer(path->nodes[0], name->name, ptr, ++ name->len); + } + } + +@@ -6520,7 +6513,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + ret = btrfs_orphan_add(trans, BTRFS_I(inode)); + } else { + ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, +- name_len, 0, BTRFS_I(inode)->dir_index); ++ 0, BTRFS_I(inode)->dir_index); + } + if (ret) { + btrfs_abort_transaction(trans, ret); +@@ -6549,7 +6542,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + */ + int btrfs_add_link(struct btrfs_trans_handle *trans, + struct btrfs_inode *parent_inode, struct btrfs_inode *inode, +- const char *name, int name_len, int add_backref, u64 index) ++ const struct qstr *name, int add_backref, u64 index) + { + int ret = 0; + struct btrfs_key key; +@@ -6568,17 +6561,17 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, + if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_add_root_ref(trans, key.objectid, + root->root_key.objectid, parent_ino, +- index, name, name_len); ++ index, name); + } else if (add_backref) { +- ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, +- parent_ino, index); ++ ret = btrfs_insert_inode_ref(trans, root, name, ++ ino, parent_ino, index); + } + + /* Nothing to clean up yet */ + if (ret) + return ret; + +- ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key, ++ ret = btrfs_insert_dir_item(trans, name, parent_inode, &key, + btrfs_inode_type(&inode->vfs_inode), index); + if (ret == -EEXIST || ret == -EOVERFLOW) + goto fail_dir_item; +@@ -6588,7 +6581,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, + } + + btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + +- name_len * 2); ++ name->len * 2); + inode_inc_iversion(&parent_inode->vfs_inode); + /* + * If we are replaying a log tree, we do not want to update the mtime +@@ -6613,15 +6606,15 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, + int err; + err = btrfs_del_root_ref(trans, key.objectid, + root->root_key.objectid, parent_ino, +- &local_index, name, name_len); ++ &local_index, name); + if (err) + btrfs_abort_transaction(trans, err); + } else if (add_backref) { + u64 local_index; + int err; + +- err = btrfs_del_inode_ref(trans, root, name, name_len, +- ino, parent_ino, &local_index); ++ err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, ++ &local_index); + if (err) + btrfs_abort_transaction(trans, err); + } +@@ -6741,7 +6734,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); + + err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), +- dentry->d_name.name, dentry->d_name.len, 1, index); ++ &dentry->d_name, 1, index); + + if (err) { + drop_inode = 1; +@@ -9115,9 +9108,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, +- new_dentry->d_name.name, +- new_dentry->d_name.len, ++ ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name, + old_ino, + btrfs_ino(BTRFS_I(new_dir)), + old_idx); +@@ -9131,9 +9122,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, root, +- old_dentry->d_name.name, +- old_dentry->d_name.len, ++ ret = btrfs_insert_inode_ref(trans, root, &old_dentry->d_name, + new_ino, + btrfs_ino(BTRFS_I(old_dir)), + new_idx); +@@ -9169,8 +9158,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* src is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(old_dentry->d_inode), +- old_dentry->d_name.name, +- old_dentry->d_name.len, ++ &old_dentry->d_name, + &old_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); +@@ -9186,8 +9174,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } else { /* dest is an inode */ + ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(new_dentry->d_inode), +- new_dentry->d_name.name, +- new_dentry->d_name.len, ++ &new_dentry->d_name, + &new_rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode)); +@@ -9198,16 +9185,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- new_dentry->d_name.name, +- new_dentry->d_name.len, 0, old_idx); ++ &new_dentry->d_name, 0, old_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; + } + + ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), +- old_dentry->d_name.name, +- old_dentry->d_name.len, 0, new_idx); ++ &old_dentry->d_name, 0, new_idx); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +@@ -9308,8 +9293,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + + /* check for collisions, even if the name isn't there */ + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, +- new_dentry->d_name.name, +- new_dentry->d_name.len); ++ &new_dentry->d_name); + + if (ret) { + if (ret == -EEXIST) { +@@ -9403,9 +9387,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + /* force full log commit if subvolume involved. */ + btrfs_set_log_full_commit(trans); + } else { +- ret = btrfs_insert_inode_ref(trans, dest, +- new_dentry->d_name.name, +- new_dentry->d_name.len, ++ ret = btrfs_insert_inode_ref(trans, dest, &new_dentry->d_name, + old_ino, + btrfs_ino(BTRFS_I(new_dir)), index); + if (ret) +@@ -9429,10 +9411,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); + } else { + ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), +- BTRFS_I(d_inode(old_dentry)), +- old_dentry->d_name.name, +- old_dentry->d_name.len, +- &rename_ctx); ++ BTRFS_I(d_inode(old_dentry)), ++ &old_dentry->d_name, &rename_ctx); + if (!ret) + ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); + } +@@ -9451,8 +9431,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } else { + ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(d_inode(new_dentry)), +- new_dentry->d_name.name, +- new_dentry->d_name.len); ++ &new_dentry->d_name); + } + if (!ret && new_inode->i_nlink == 0) + ret = btrfs_orphan_add(trans, +@@ -9464,8 +9443,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + } + + ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), +- new_dentry->d_name.name, +- new_dentry->d_name.len, 0, index); ++ &new_dentry->d_name, 0, index); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_fail; +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 2e29fafe0e7d9..5305d98905cea 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -951,6 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, + struct inode *dir = d_inode(parent->dentry); + struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); + struct dentry *dentry; ++ struct qstr name_str = QSTR_INIT(name, namelen); + int error; + + error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); +@@ -971,8 +972,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, + * check for them now when we can safely fail + */ + error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, +- dir->i_ino, name, +- namelen); ++ dir->i_ino, &name_str); + if (error) + goto out_dput; + +@@ -3782,6 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) + struct btrfs_trans_handle *trans; + struct btrfs_path *path = NULL; + struct btrfs_disk_key disk_key; ++ struct qstr name = QSTR_INIT("default", 7); + u64 objectid = 0; + u64 dir_id; + int ret; +@@ -3825,7 +3826,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) + + dir_id = btrfs_super_root_dir(fs_info->super_copy); + di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, +- dir_id, "default", 7, 1); ++ dir_id, &name, 1); + if (IS_ERR_OR_NULL(di)) { + btrfs_release_path(path); + btrfs_end_transaction(trans); +diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c +index e1f599d7a9164..cf29241b9b310 100644 +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -327,9 +327,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, + } + + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, +- u64 ref_id, u64 dirid, u64 *sequence, const char *name, +- int name_len) +- ++ u64 ref_id, u64 dirid, u64 *sequence, ++ const struct qstr *name) + { + struct btrfs_root *tree_root = trans->fs_info->tree_root; + struct btrfs_path *path; +@@ -356,8 +355,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + struct btrfs_root_ref); + ptr = (unsigned long)(ref + 1); + if ((btrfs_root_ref_dirid(leaf, ref) != dirid) || +- (btrfs_root_ref_name_len(leaf, ref) != name_len) || +- memcmp_extent_buffer(leaf, name, ptr, name_len)) { ++ (btrfs_root_ref_name_len(leaf, ref) != name->len) || ++ memcmp_extent_buffer(leaf, name->name, ptr, name->len)) { + ret = -ENOENT; + goto out; + } +@@ -400,8 +399,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + * Will return 0, -ENOMEM, or anything from the CoW path + */ + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, +- u64 ref_id, u64 dirid, u64 sequence, const char *name, +- int name_len) ++ u64 ref_id, u64 dirid, u64 sequence, ++ const struct qstr *name) + { + struct btrfs_root *tree_root = trans->fs_info->tree_root; + struct btrfs_key key; +@@ -420,7 +419,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + key.offset = ref_id; + again: + ret = btrfs_insert_empty_item(trans, tree_root, path, &key, +- sizeof(*ref) + name_len); ++ sizeof(*ref) + name->len); + if (ret) { + btrfs_abort_transaction(trans, ret); + btrfs_free_path(path); +@@ -431,9 +430,9 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + btrfs_set_root_ref_dirid(leaf, ref, dirid); + btrfs_set_root_ref_sequence(leaf, ref, sequence); +- btrfs_set_root_ref_name_len(leaf, ref, name_len); ++ btrfs_set_root_ref_name_len(leaf, ref, name->len); + ptr = (unsigned long)(ref + 1); +- write_extent_buffer(leaf, name, ptr, name_len); ++ write_extent_buffer(leaf, name->name, ptr, name->len); + btrfs_mark_buffer_dirty(leaf); + + if (key.type == BTRFS_ROOT_BACKREF_KEY) { +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index 35e889fe2a95d..833364527554c 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -1596,13 +1596,17 @@ static int gen_unique_name(struct send_ctx *sctx, + return -ENOMEM; + + while (1) { ++ struct qstr tmp_name; ++ + len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", + ino, gen, idx); + ASSERT(len < sizeof(tmp)); ++ tmp_name.name = tmp; ++ tmp_name.len = strlen(tmp); + + di = btrfs_lookup_dir_item(NULL, sctx->send_root, + path, BTRFS_FIRST_FREE_OBJECTID, +- tmp, strlen(tmp), 0); ++ &tmp_name, 0); + btrfs_release_path(path); + if (IS_ERR(di)) { + ret = PTR_ERR(di); +@@ -1622,7 +1626,7 @@ static int gen_unique_name(struct send_ctx *sctx, + + di = btrfs_lookup_dir_item(NULL, sctx->parent_root, + path, BTRFS_FIRST_FREE_OBJECTID, +- tmp, strlen(tmp), 0); ++ &tmp_name, 0); + btrfs_release_path(path); + if (IS_ERR(di)) { + ret = PTR_ERR(di); +@@ -1752,13 +1756,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root, + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path *path; ++ struct qstr name_str = QSTR_INIT(name, name_len); + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + +- di = btrfs_lookup_dir_item(NULL, root, path, +- dir, name, name_len, 0); ++ di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0); + if (IS_ERR_OR_NULL(di)) { + ret = di ? PTR_ERR(di) : -ENOENT; + goto out; +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 582b71b7fa779..bf56e4d6b9f48 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1398,6 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_key location; ++ struct qstr name = QSTR_INIT("default", 7); + u64 dir_id; + + path = btrfs_alloc_path(); +@@ -1410,7 +1411,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec + * to mount. + */ + dir_id = btrfs_super_root_dir(fs_info->super_copy); +- di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); ++ di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); + if (IS_ERR(di)) { + btrfs_free_path(path); + return PTR_ERR(di); +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index a555567594418..b0fe054c9f401 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -1694,8 +1694,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + /* check if there is a file/dir which has the same name. */ + dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, + btrfs_ino(BTRFS_I(parent_inode)), +- dentry->d_name.name, +- dentry->d_name.len, 0); ++ &dentry->d_name, 0); + if (dir_item != NULL && !IS_ERR(dir_item)) { + pending->error = -EEXIST; + goto dir_item_existed; +@@ -1790,7 +1789,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + ret = btrfs_add_root_ref(trans, objectid, + parent_root->root_key.objectid, + btrfs_ino(BTRFS_I(parent_inode)), index, +- dentry->d_name.name, dentry->d_name.len); ++ &dentry->d_name); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; +@@ -1822,9 +1821,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, + if (ret < 0) + goto fail; + +- ret = btrfs_insert_dir_item(trans, dentry->d_name.name, +- dentry->d_name.len, BTRFS_I(parent_inode), +- &key, BTRFS_FT_DIR, index); ++ ret = btrfs_insert_dir_item(trans, &dentry->d_name, ++ BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, ++ index); + /* We have check then name at the beginning, so it is impossible. */ + BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); + if (ret) { +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 00be69ce7b90f..9f55e81acc0ef 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -595,6 +595,21 @@ static int overwrite_item(struct btrfs_trans_handle *trans, + return do_overwrite_item(trans, root, path, eb, slot, key); + } + ++static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, ++ struct qstr *name) ++{ ++ char *buf; ++ ++ buf = kmalloc(len, GFP_NOFS); ++ if (!buf) ++ return -ENOMEM; ++ ++ read_extent_buffer(eb, buf, (unsigned long)start, len); ++ name->name = buf; ++ name->len = len; ++ return 0; ++} ++ + /* + * simple helper to read an inode off the disk from a given root + * This can only be called for subvolume roots and not for the log +@@ -901,12 +916,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, +- const char *name, +- int name_len) ++ const struct qstr *name) + { + int ret; + +- ret = btrfs_unlink_inode(trans, dir, inode, name, name_len); ++ ret = btrfs_unlink_inode(trans, dir, inode, name); + if (ret) + return ret; + /* +@@ -933,8 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + { + struct btrfs_root *root = dir->root; + struct inode *inode; +- char *name; +- int name_len; ++ struct qstr name; + struct extent_buffer *leaf; + struct btrfs_key location; + int ret; +@@ -942,12 +955,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + leaf = path->nodes[0]; + + btrfs_dir_item_key_to_cpu(leaf, di, &location); +- name_len = btrfs_dir_name_len(leaf, di); +- name = kmalloc(name_len, GFP_NOFS); +- if (!name) ++ ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name); ++ if (ret) + return -ENOMEM; + +- read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); + btrfs_release_path(path); + + inode = read_one_inode(root, location.objectid); +@@ -960,10 +971,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + if (ret) + goto out; + +- ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name, +- name_len); ++ ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name); + out: +- kfree(name); ++ kfree(name.name); + iput(inode); + return ret; + } +@@ -978,14 +988,14 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + static noinline int inode_in_dir(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 objectid, u64 index, +- const char *name, int name_len) ++ struct qstr *name) + { + struct btrfs_dir_item *di; + struct btrfs_key location; + int ret = 0; + + di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, +- index, name, name_len, 0); ++ index, name, 0); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; +@@ -998,7 +1008,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, + } + + btrfs_release_path(path); +- di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); ++ di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, 0); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; +@@ -1025,7 +1035,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, + static noinline int backref_in_log(struct btrfs_root *log, + struct btrfs_key *key, + u64 ref_objectid, +- const char *name, int namelen) ++ const struct qstr *name) + { + struct btrfs_path *path; + int ret; +@@ -1045,12 +1055,10 @@ static noinline int backref_in_log(struct btrfs_root *log, + if (key->type == BTRFS_INODE_EXTREF_KEY) + ret = !!btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], +- ref_objectid, +- name, namelen); ++ ref_objectid, name); + else + ret = !!btrfs_find_name_in_backref(path->nodes[0], +- path->slots[0], +- name, namelen); ++ path->slots[0], name); + out: + btrfs_free_path(path); + return ret; +@@ -1063,11 +1071,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_inode *dir, + struct btrfs_inode *inode, + u64 inode_objectid, u64 parent_objectid, +- u64 ref_index, char *name, int namelen) ++ u64 ref_index, struct qstr *name) + { + int ret; +- char *victim_name; +- int victim_name_len; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key search_key; +@@ -1099,43 +1105,40 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); + while (ptr < ptr_end) { +- victim_ref = (struct btrfs_inode_ref *)ptr; +- victim_name_len = btrfs_inode_ref_name_len(leaf, +- victim_ref); +- victim_name = kmalloc(victim_name_len, GFP_NOFS); +- if (!victim_name) +- return -ENOMEM; ++ struct qstr victim_name; + +- read_extent_buffer(leaf, victim_name, +- (unsigned long)(victim_ref + 1), +- victim_name_len); ++ victim_ref = (struct btrfs_inode_ref *)ptr; ++ ret = read_alloc_one_name(leaf, (victim_ref + 1), ++ btrfs_inode_ref_name_len(leaf, victim_ref), ++ &victim_name); ++ if (ret) ++ return ret; + + ret = backref_in_log(log_root, &search_key, +- parent_objectid, victim_name, +- victim_name_len); ++ parent_objectid, &victim_name); + if (ret < 0) { +- kfree(victim_name); ++ kfree(victim_name.name); + return ret; + } else if (!ret) { + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + + ret = unlink_inode_for_log_replay(trans, dir, inode, +- victim_name, victim_name_len); +- kfree(victim_name); ++ &victim_name); ++ kfree(victim_name.name); + if (ret) + return ret; + goto again; + } +- kfree(victim_name); ++ kfree(victim_name.name); + +- ptr = (unsigned long)(victim_ref + 1) + victim_name_len; ++ ptr = (unsigned long)(victim_ref + 1) + victim_name.len; + } + } + btrfs_release_path(path); + + /* Same search but for extended refs */ +- extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, ++ extref = btrfs_lookup_inode_extref(NULL, root, path, name, + inode_objectid, parent_objectid, 0, + 0); + if (IS_ERR(extref)) { +@@ -1152,29 +1155,28 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + base = btrfs_item_ptr_offset(leaf, path->slots[0]); + + while (cur_offset < item_size) { +- extref = (struct btrfs_inode_extref *)(base + cur_offset); ++ struct qstr victim_name; + +- victim_name_len = btrfs_inode_extref_name_len(leaf, extref); ++ extref = (struct btrfs_inode_extref *)(base + cur_offset); + + if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) + goto next; + +- victim_name = kmalloc(victim_name_len, GFP_NOFS); +- if (!victim_name) +- return -ENOMEM; +- read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, +- victim_name_len); ++ ret = read_alloc_one_name(leaf, &extref->name, ++ btrfs_inode_extref_name_len(leaf, extref), ++ &victim_name); ++ if (ret) ++ return ret; + + search_key.objectid = inode_objectid; + search_key.type = BTRFS_INODE_EXTREF_KEY; + search_key.offset = btrfs_extref_hash(parent_objectid, +- victim_name, +- victim_name_len); ++ victim_name.name, ++ victim_name.len); + ret = backref_in_log(log_root, &search_key, +- parent_objectid, victim_name, +- victim_name_len); ++ parent_objectid, &victim_name); + if (ret < 0) { +- kfree(victim_name); ++ kfree(victim_name.name); + return ret; + } else if (!ret) { + ret = -ENOENT; +@@ -1186,26 +1188,24 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + + ret = unlink_inode_for_log_replay(trans, + BTRFS_I(victim_parent), +- inode, +- victim_name, +- victim_name_len); ++ inode, &victim_name); + } + iput(victim_parent); +- kfree(victim_name); ++ kfree(victim_name.name); + if (ret) + return ret; + goto again; + } +- kfree(victim_name); ++ kfree(victim_name.name); + next: +- cur_offset += victim_name_len + sizeof(*extref); ++ cur_offset += victim_name.len + sizeof(*extref); + } + } + btrfs_release_path(path); + + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), +- ref_index, name, namelen, 0); ++ ref_index, name, 0); + if (IS_ERR(di)) { + return PTR_ERR(di); + } else if (di) { +@@ -1216,8 +1216,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + btrfs_release_path(path); + + /* look for a conflicting name */ +- di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), +- name, namelen, 0); ++ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0); + if (IS_ERR(di)) { + return PTR_ERR(di); + } else if (di) { +@@ -1231,20 +1230,18 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + } + + static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, +- u32 *namelen, char **name, u64 *index, ++ struct qstr *name, u64 *index, + u64 *parent_objectid) + { + struct btrfs_inode_extref *extref; ++ int ret; + + extref = (struct btrfs_inode_extref *)ref_ptr; + +- *namelen = btrfs_inode_extref_name_len(eb, extref); +- *name = kmalloc(*namelen, GFP_NOFS); +- if (*name == NULL) +- return -ENOMEM; +- +- read_extent_buffer(eb, *name, (unsigned long)&extref->name, +- *namelen); ++ ret = read_alloc_one_name(eb, &extref->name, ++ btrfs_inode_extref_name_len(eb, extref), name); ++ if (ret) ++ return ret; + + if (index) + *index = btrfs_inode_extref_index(eb, extref); +@@ -1255,18 +1252,17 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, + } + + static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, +- u32 *namelen, char **name, u64 *index) ++ struct qstr *name, u64 *index) + { + struct btrfs_inode_ref *ref; ++ int ret; + + ref = (struct btrfs_inode_ref *)ref_ptr; + +- *namelen = btrfs_inode_ref_name_len(eb, ref); +- *name = kmalloc(*namelen, GFP_NOFS); +- if (*name == NULL) +- return -ENOMEM; +- +- read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); ++ ret = read_alloc_one_name(eb, ref + 1, btrfs_inode_ref_name_len(eb, ref), ++ name); ++ if (ret) ++ return ret; + + if (index) + *index = btrfs_inode_ref_index(eb, ref); +@@ -1308,28 +1304,24 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, + ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); + ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]); + while (ref_ptr < ref_end) { +- char *name = NULL; +- int namelen; ++ struct qstr name; + u64 parent_id; + + if (key->type == BTRFS_INODE_EXTREF_KEY) { +- ret = extref_get_fields(eb, ref_ptr, &namelen, &name, ++ ret = extref_get_fields(eb, ref_ptr, &name, + NULL, &parent_id); + } else { + parent_id = key->offset; +- ret = ref_get_fields(eb, ref_ptr, &namelen, &name, +- NULL); ++ ret = ref_get_fields(eb, ref_ptr, &name, NULL); + } + if (ret) + goto out; + + if (key->type == BTRFS_INODE_EXTREF_KEY) + ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot, +- parent_id, name, +- namelen); ++ parent_id, &name); + else +- ret = !!btrfs_find_name_in_backref(log_eb, log_slot, +- name, namelen); ++ ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name); + + if (!ret) { + struct inode *dir; +@@ -1338,20 +1330,20 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, + dir = read_one_inode(root, parent_id); + if (!dir) { + ret = -ENOENT; +- kfree(name); ++ kfree(name.name); + goto out; + } + ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), +- inode, name, namelen); +- kfree(name); ++ inode, &name); ++ kfree(name.name); + iput(dir); + if (ret) + goto out; + goto again; + } + +- kfree(name); +- ref_ptr += namelen; ++ kfree(name.name); ++ ref_ptr += name.len; + if (key->type == BTRFS_INODE_EXTREF_KEY) + ref_ptr += sizeof(struct btrfs_inode_extref); + else +@@ -1380,8 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + struct inode *inode = NULL; + unsigned long ref_ptr; + unsigned long ref_end; +- char *name = NULL; +- int namelen; ++ struct qstr name; + int ret; + int log_ref_ver = 0; + u64 parent_objectid; +@@ -1425,7 +1416,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + + while (ref_ptr < ref_end) { + if (log_ref_ver) { +- ret = extref_get_fields(eb, ref_ptr, &namelen, &name, ++ ret = extref_get_fields(eb, ref_ptr, &name, + &ref_index, &parent_objectid); + /* + * parent object can change from one array +@@ -1438,15 +1429,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + goto out; + } + } else { +- ret = ref_get_fields(eb, ref_ptr, &namelen, &name, +- &ref_index); ++ ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); + } + if (ret) + goto out; + + ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), +- btrfs_ino(BTRFS_I(inode)), ref_index, +- name, namelen); ++ btrfs_ino(BTRFS_I(inode)), ref_index, &name); + if (ret < 0) { + goto out; + } else if (ret == 0) { +@@ -1460,7 +1449,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + ret = __add_inode_ref(trans, root, path, log, + BTRFS_I(dir), BTRFS_I(inode), + inode_objectid, parent_objectid, +- ref_index, name, namelen); ++ ref_index, &name); + if (ret) { + if (ret == 1) + ret = 0; +@@ -1469,7 +1458,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + + /* insert our name */ + ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), +- name, namelen, 0, ref_index); ++ &name, 0, ref_index); + if (ret) + goto out; + +@@ -1479,9 +1468,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + } + /* Else, ret == 1, we already have a perfect match, we're done. */ + +- ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; +- kfree(name); +- name = NULL; ++ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len; ++ kfree(name.name); ++ name.name = NULL; + if (log_ref_ver) { + iput(dir); + dir = NULL; +@@ -1505,7 +1494,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + ret = overwrite_item(trans, root, path, eb, slot, key); + out: + btrfs_release_path(path); +- kfree(name); ++ kfree(name.name); + iput(dir); + iput(inode); + return ret; +@@ -1777,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, + static noinline int insert_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 dirid, u64 index, +- char *name, int name_len, ++ const struct qstr *name, + struct btrfs_key *location) + { + struct inode *inode; +@@ -1795,7 +1784,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, + } + + ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, +- name_len, 1, index); ++ 1, index); + + /* FIXME, put inode into FIXUP list */ + +@@ -1855,8 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + struct btrfs_dir_item *di, + struct btrfs_key *key) + { +- char *name; +- int name_len; ++ struct qstr name; + struct btrfs_dir_item *dir_dst_di; + struct btrfs_dir_item *index_dst_di; + bool dir_dst_matches = false; +@@ -1874,17 +1862,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + if (!dir) + return -EIO; + +- name_len = btrfs_dir_name_len(eb, di); +- name = kmalloc(name_len, GFP_NOFS); +- if (!name) { +- ret = -ENOMEM; ++ ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); ++ if (ret) + goto out; +- } + + log_type = btrfs_dir_type(eb, di); +- read_extent_buffer(eb, name, (unsigned long)(di + 1), +- name_len); +- + btrfs_dir_item_key_to_cpu(eb, di, &log_key); + ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); + btrfs_release_path(path); +@@ -1894,7 +1876,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + ret = 0; + + dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, +- name, name_len, 1); ++ &name, 1); + if (IS_ERR(dir_dst_di)) { + ret = PTR_ERR(dir_dst_di); + goto out; +@@ -1911,7 +1893,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + + index_dst_di = btrfs_lookup_dir_index_item(trans, root, path, + key->objectid, key->offset, +- name, name_len, 1); ++ &name, 1); + if (IS_ERR(index_dst_di)) { + ret = PTR_ERR(index_dst_di); + goto out; +@@ -1939,7 +1921,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + search_key.objectid = log_key.objectid; + search_key.type = BTRFS_INODE_REF_KEY; + search_key.offset = key->objectid; +- ret = backref_in_log(root->log_root, &search_key, 0, name, name_len); ++ ret = backref_in_log(root->log_root, &search_key, 0, &name); + if (ret < 0) { + goto out; + } else if (ret) { +@@ -1952,8 +1934,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + search_key.objectid = log_key.objectid; + search_key.type = BTRFS_INODE_EXTREF_KEY; + search_key.offset = key->objectid; +- ret = backref_in_log(root->log_root, &search_key, key->objectid, name, +- name_len); ++ ret = backref_in_log(root->log_root, &search_key, key->objectid, &name); + if (ret < 0) { + goto out; + } else if (ret) { +@@ -1964,7 +1945,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + } + btrfs_release_path(path); + ret = insert_one_name(trans, root, key->objectid, key->offset, +- name, name_len, &log_key); ++ &name, &log_key); + if (ret && ret != -ENOENT && ret != -EEXIST) + goto out; + if (!ret) +@@ -1974,10 +1955,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, + + out: + if (!ret && update_size) { +- btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2); ++ btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2); + ret = btrfs_update_inode(trans, root, BTRFS_I(dir)); + } +- kfree(name); ++ kfree(name.name); + iput(dir); + if (!ret && name_added) + ret = 1; +@@ -2143,8 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + struct extent_buffer *eb; + int slot; + struct btrfs_dir_item *di; +- int name_len; +- char *name; ++ struct qstr name; + struct inode *inode = NULL; + struct btrfs_key location; + +@@ -2159,22 +2139,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + eb = path->nodes[0]; + slot = path->slots[0]; + di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); +- name_len = btrfs_dir_name_len(eb, di); +- name = kmalloc(name_len, GFP_NOFS); +- if (!name) { +- ret = -ENOMEM; ++ ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); ++ if (ret) + goto out; +- } +- +- read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); + + if (log) { + struct btrfs_dir_item *log_di; + + log_di = btrfs_lookup_dir_index_item(trans, log, log_path, + dir_key->objectid, +- dir_key->offset, +- name, name_len, 0); ++ dir_key->offset, &name, 0); + if (IS_ERR(log_di)) { + ret = PTR_ERR(log_di); + goto out; +@@ -2200,7 +2174,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + + inc_nlink(inode); + ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode), +- name, name_len); ++ &name); + /* + * Unlike dir item keys, dir index keys can only have one name (entry) in + * them, as there are no key collisions since each key has a unique offset +@@ -2209,7 +2183,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + out: + btrfs_release_path(path); + btrfs_release_path(log_path); +- kfree(name); ++ kfree(name.name); + iput(inode); + return ret; + } +@@ -3443,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dir_ino, +- const char *name, int name_len, ++ const struct qstr *name, + u64 index) + { + struct btrfs_dir_item *di; +@@ -3453,7 +3427,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, + * for dir item keys. + */ + di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, +- index, name, name_len, -1); ++ index, name, -1); + if (IS_ERR(di)) + return PTR_ERR(di); + else if (!di) +@@ -3490,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, + */ + void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, ++ const struct qstr *name, + struct btrfs_inode *dir, u64 index) + { + struct btrfs_path *path; +@@ -3517,7 +3491,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + } + + ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir), +- name, name_len, index); ++ name, index); + btrfs_free_path(path); + out_unlock: + mutex_unlock(&dir->log_mutex); +@@ -3529,7 +3503,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + /* see comments for btrfs_del_dir_entries_in_log */ + void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, ++ const struct qstr *name, + struct btrfs_inode *inode, u64 dirid) + { + struct btrfs_root *log; +@@ -3550,7 +3524,7 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + log = root->log_root; + mutex_lock(&inode->log_mutex); + +- ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), ++ ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), + dirid, &index); + mutex_unlock(&inode->log_mutex); + if (ret < 0 && ret != -ENOENT) +@@ -5293,6 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, + u32 this_len; + unsigned long name_ptr; + struct btrfs_dir_item *di; ++ struct qstr name_str; + + if (key->type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *iref; +@@ -5326,8 +5301,11 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, + } + + read_extent_buffer(eb, name, name_ptr, this_name_len); ++ ++ name_str.name = name; ++ name_str.len = this_name_len; + di = btrfs_lookup_dir_item(NULL, inode->root, search_path, +- parent, name, this_name_len, 0); ++ parent, &name_str, 0); + if (di && !IS_ERR(di)) { + struct btrfs_key di_key; + +@@ -7530,8 +7508,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, + */ + mutex_lock(&old_dir->log_mutex); + ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), +- old_dentry->d_name.name, +- old_dentry->d_name.len, old_dir_index); ++ &old_dentry->d_name, old_dir_index); + if (ret > 0) { + /* + * The dentry does not exist in the log, so record its +diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h +index bcca74128c3bb..6c0dc79787f05 100644 +--- a/fs/btrfs/tree-log.h ++++ b/fs/btrfs/tree-log.h +@@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_log_ctx *ctx); + void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, ++ const struct qstr *name, + struct btrfs_inode *dir, u64 index); + void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, +- const char *name, int name_len, ++ const struct qstr *name, + struct btrfs_inode *inode, u64 dirid); + void btrfs_end_log_trans(struct btrfs_root *root); + void btrfs_pin_log_trans(struct btrfs_root *root); +-- +2.40.1 + diff --git a/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch b/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch new file mode 100644 index 00000000000..6be5a6c7030 --- /dev/null +++ b/queue-6.1/maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch @@ -0,0 +1,61 @@ +From 562b1f633915512df826243cd52eeb774147186c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 14:12:35 -0400 +Subject: maple_tree: add mas_is_active() to detect in-tree walks + +From: Liam R. Howlett + +[ Upstream commit 5c590804b6b0ff933ed4e5cee5d76de3a5048d9f ] + +Patch series "maple_tree: Fix mas_prev() state regression". + +Pedro Falcato retported an mprotect regression [1] which was bisected back +to the iterator changes for maple tree. Root cause analysis showed the +mas_prev() running off the end of the VMA space (previous from 0) followed +by mas_find(), would skip the first value. + +This patchset introduces maple state underflow/overflow so the sequence of +calls on the maple state will return what the user expects. + +Users who encounter this bug may see mprotect(), userfaultfd_register(), +and mlock() fail on VMAs mapped with address 0. + +This patch (of 2): + +Instead of constantly checking each possibility of the maple state, +create a fast path that will skip over checking unlikely states. + +Link: https://lkml.kernel.org/r/20230921181236.509072-1-Liam.Howlett@oracle.com +Link: https://lkml.kernel.org/r/20230921181236.509072-2-Liam.Howlett@oracle.com +Signed-off-by: Liam R. Howlett +Cc: Pedro Falcato +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/maple_tree.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h +index 443dec917ec64..27864178d1918 100644 +--- a/include/linux/maple_tree.h ++++ b/include/linux/maple_tree.h +@@ -488,6 +488,15 @@ static inline bool mas_is_paused(struct ma_state *mas) + return mas->node == MAS_PAUSE; + } + ++/* Check if the mas is pointing to a node or not */ ++static inline bool mas_is_active(struct ma_state *mas) ++{ ++ if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) ++ return true; ++ ++ return false; ++} ++ + /** + * mas_reset() - Reset a Maple Tree operation state. + * @mas: Maple Tree operation state. +-- +2.40.1 + diff --git a/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch b/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch new file mode 100644 index 00000000000..51387cc6cb7 --- /dev/null +++ b/queue-6.1/maple_tree-relocate-the-declaration-of-mas_empty_are.patch @@ -0,0 +1,55 @@ +From 2c6b47348d1fd76c15dc3f4068829747a550419a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 May 2023 11:12:47 +0800 +Subject: maple_tree: relocate the declaration of mas_empty_area_rev(). + +From: Peng Zhang + +[ Upstream commit 06b27ce36a1a3dc5ea6f8314d0c7d1baa9f8ece7 ] + +Relocate the declaration of mas_empty_area_rev() so that mas_empty_area() +and mas_empty_area_rev() are together. + +Link: https://lkml.kernel.org/r/20230524031247.65949-11-zhangpeng.00@bytedance.com +Signed-off-by: Peng Zhang +Reviewed-by: Liam R. Howlett +Signed-off-by: Andrew Morton +Stable-dep-of: 5c590804b6b0 ("maple_tree: add mas_is_active() to detect in-tree walks") +Signed-off-by: Sasha Levin +--- + include/linux/maple_tree.h | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h +index 1a424edb71a65..443dec917ec64 100644 +--- a/include/linux/maple_tree.h ++++ b/include/linux/maple_tree.h +@@ -469,6 +469,12 @@ void *mas_next(struct ma_state *mas, unsigned long max); + + int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, + unsigned long size); ++/* ++ * This finds an empty area from the highest address to the lowest. ++ * AKA "Topdown" version, ++ */ ++int mas_empty_area_rev(struct ma_state *mas, unsigned long min, ++ unsigned long max, unsigned long size); + + /* Checks if a mas has not found anything */ + static inline bool mas_is_none(struct ma_state *mas) +@@ -482,12 +488,6 @@ static inline bool mas_is_paused(struct ma_state *mas) + return mas->node == MAS_PAUSE; + } + +-/* +- * This finds an empty area from the highest address to the lowest. +- * AKA "Topdown" version, +- */ +-int mas_empty_area_rev(struct ma_state *mas, unsigned long min, +- unsigned long max, unsigned long size); + /** + * mas_reset() - Reset a Maple Tree operation state. + * @mas: Maple Tree operation state. +-- +2.40.1 + diff --git a/queue-6.1/maple_tree-remove-the-redundant-code.patch b/queue-6.1/maple_tree-remove-the-redundant-code.patch new file mode 100644 index 00000000000..0031b866538 --- /dev/null +++ b/queue-6.1/maple_tree-remove-the-redundant-code.patch @@ -0,0 +1,48 @@ +From 8115a697c47c8d0d1e35d67364eb7fb1b5fcf922 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 14:00:56 +0800 +Subject: maple_tree: remove the redundant code + +From: Vernon Yang + +[ Upstream commit eabb305293835b191ffe60234587ae8bf5e4e9fd ] + +The macros CONFIG_DEBUG_MAPLE_TREE_VERBOSE no one uses, functions +mas_dup_tree() and mas_dup_store() are not implemented, just function +declaration, so drop it. + +Link: https://lkml.kernel.org/r/20221221060058.609003-6-vernon2gm@gmail.com +Signed-off-by: Vernon Yang +Reviewed-by: Liam R. Howlett +Signed-off-by: Andrew Morton +Stable-dep-of: 5c590804b6b0 ("maple_tree: add mas_is_active() to detect in-tree walks") +Signed-off-by: Sasha Levin +--- + include/linux/maple_tree.h | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h +index e594db58a0f14..1a424edb71a65 100644 +--- a/include/linux/maple_tree.h ++++ b/include/linux/maple_tree.h +@@ -12,7 +12,6 @@ + #include + #include + /* #define CONFIG_MAPLE_RCU_DISABLED */ +-/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */ + + /* + * Allocated nodes are mutable until they have been inserted into the tree, +@@ -483,9 +482,6 @@ static inline bool mas_is_paused(struct ma_state *mas) + return mas->node == MAS_PAUSE; + } + +-void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas); +-void mas_dup_store(struct ma_state *mas, void *entry); +- + /* + * This finds an empty area from the highest address to the lowest. + * AKA "Topdown" version, +-- +2.40.1 + diff --git a/queue-6.1/mm-memory-add-vm_normal_folio.patch b/queue-6.1/mm-memory-add-vm_normal_folio.patch new file mode 100644 index 00000000000..64d1388d597 --- /dev/null +++ b/queue-6.1/mm-memory-add-vm_normal_folio.patch @@ -0,0 +1,76 @@ +From 4dd74e82b56a3b09dafbbfc7ff1d61da63c67d27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 10:08:45 -0800 +Subject: mm/memory: add vm_normal_folio() + +From: Vishal Moola (Oracle) + +[ Upstream commit 318e9342fbbb6888d903d86e83865609901a1c65 ] + +Patch series "Convert deactivate_page() to folio_deactivate()", v4. + +Deactivate_page() has already been converted to use folios. This patch +series modifies the callers of deactivate_page() to use folios. It also +introduces vm_normal_folio() to assist with folio conversions, and +converts deactivate_page() to folio_deactivate() which takes in a folio. + +This patch (of 4): + +Introduce a wrapper function called vm_normal_folio(). This function +calls vm_normal_page() and returns the folio of the page found, or null if +no page is found. + +This function allows callers to get a folio from a pte, which will +eventually allow them to completely replace their struct page variables +with struct folio instead. + +Link: https://lkml.kernel.org/r/20221221180848.20774-1-vishal.moola@gmail.com +Link: https://lkml.kernel.org/r/20221221180848.20774-2-vishal.moola@gmail.com +Signed-off-by: Vishal Moola (Oracle) +Reviewed-by: Matthew Wilcox (Oracle) +Cc: SeongJae Park +Signed-off-by: Andrew Morton +Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified") +Signed-off-by: Sasha Levin +--- + include/linux/mm.h | 2 ++ + mm/memory.c | 10 ++++++++++ + 2 files changed, 12 insertions(+) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 104ec00823da8..eefb0948110ae 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1906,6 +1906,8 @@ static inline bool can_do_mlock(void) { return false; } + extern int user_shm_lock(size_t, struct ucounts *); + extern void user_shm_unlock(size_t, struct ucounts *); + ++struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, ++ pte_t pte); + struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, +diff --git a/mm/memory.c b/mm/memory.c +index 2083078cd0615..0d1b3ee8fcd7a 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -672,6 +672,16 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + return pfn_to_page(pfn); + } + ++struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, ++ pte_t pte) ++{ ++ struct page *page = vm_normal_page(vma, addr, pte); ++ ++ if (page) ++ return page_folio(page); ++ return NULL; ++} ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd) +-- +2.40.1 + diff --git a/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch b/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch new file mode 100644 index 00000000000..309e750a3eb --- /dev/null +++ b/queue-6.1/mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch @@ -0,0 +1,112 @@ +From 8af17b9c7c656fa77bc58151f3bdf3a5b7283303 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Jan 2023 12:18:33 -0800 +Subject: mm/mempolicy: convert migrate_page_add() to migrate_folio_add() + +From: Vishal Moola (Oracle) + +[ Upstream commit 4a64981dfee9119aa2c1f243b48f34cbbd67779c ] + +Replace migrate_page_add() with migrate_folio_add(). migrate_folio_add() +does the same a migrate_page_add() but takes in a folio instead of a page. +This removes a couple of calls to compound_head(). + +Link: https://lkml.kernel.org/r/20230130201833.27042-7-vishal.moola@gmail.com +Signed-off-by: Vishal Moola (Oracle) +Reviewed-by: Yin Fengwei +Cc: David Hildenbrand +Cc: Jane Chu +Signed-off-by: Andrew Morton +Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified") +Signed-off-by: Sasha Levin +--- + mm/mempolicy.c | 39 ++++++++++++++++++++------------------- + 1 file changed, 20 insertions(+), 19 deletions(-) + +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index 2ae6c8f18aba1..158b0bcd12fd7 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -414,7 +414,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { + }, + }; + +-static int migrate_page_add(struct page *page, struct list_head *pagelist, ++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, + unsigned long flags); + + struct queue_pages { +@@ -476,7 +476,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + /* go to folio migration */ + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma) || +- migrate_page_add(&folio->page, qp->pagelist, flags)) { ++ migrate_folio_add(folio, qp->pagelist, flags)) { + ret = 1; + goto unlock; + } +@@ -544,7 +544,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, + * temporary off LRU pages in the range. Still + * need migrate other LRU pages. + */ +- if (migrate_page_add(&folio->page, qp->pagelist, flags)) ++ if (migrate_folio_add(folio, qp->pagelist, flags)) + has_unmovable = true; + } else + break; +@@ -1012,27 +1012,28 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, + } + + #ifdef CONFIG_MIGRATION +-/* +- * page migration, thp tail pages can be passed. +- */ +-static int migrate_page_add(struct page *page, struct list_head *pagelist, ++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, + unsigned long flags) + { +- struct page *head = compound_head(page); + /* +- * Avoid migrating a page that is shared with others. ++ * We try to migrate only unshared folios. If it is shared it ++ * is likely not worth migrating. ++ * ++ * To check if the folio is shared, ideally we want to make sure ++ * every page is mapped to the same process. Doing that is very ++ * expensive, so check the estimated mapcount of the folio instead. + */ +- if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) { +- if (!isolate_lru_page(head)) { +- list_add_tail(&head->lru, pagelist); +- mod_node_page_state(page_pgdat(head), +- NR_ISOLATED_ANON + page_is_file_lru(head), +- thp_nr_pages(head)); ++ if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) { ++ if (!folio_isolate_lru(folio)) { ++ list_add_tail(&folio->lru, foliolist); ++ node_stat_mod_folio(folio, ++ NR_ISOLATED_ANON + folio_is_file_lru(folio), ++ folio_nr_pages(folio)); + } else if (flags & MPOL_MF_STRICT) { + /* +- * Non-movable page may reach here. And, there may be +- * temporary off LRU pages or non-LRU movable pages. +- * Treat them as unmovable pages since they can't be ++ * Non-movable folio may reach here. And, there may be ++ * temporary off LRU folios or non-LRU movable folios. ++ * Treat them as unmovable folios since they can't be + * isolated, so they can't be moved at the moment. It + * should return -EIO for this case too. + */ +@@ -1224,7 +1225,7 @@ static struct page *new_page(struct page *page, unsigned long start) + } + #else + +-static int migrate_page_add(struct page *page, struct list_head *pagelist, ++static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, + unsigned long flags) + { + return -EIO; +-- +2.40.1 + diff --git a/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch b/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch new file mode 100644 index 00000000000..a99a38cb89b --- /dev/null +++ b/queue-6.1/mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch @@ -0,0 +1,96 @@ +From 299e39e2131d8ec58eec46daaa826d503335bada Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Jan 2023 12:18:29 -0800 +Subject: mm/mempolicy: convert queue_pages_pmd() to queue_folios_pmd() + +From: Vishal Moola (Oracle) + +[ Upstream commit de1f5055523e9a035b38533f25a56df03d45034a ] + +The function now operates on a folio instead of the page associated with a +pmd. + +This change is in preparation for the conversion of queue_pages_required() +to queue_folio_required() and migrate_page_add() to migrate_folio_add(). + +Link: https://lkml.kernel.org/r/20230130201833.27042-3-vishal.moola@gmail.com +Signed-off-by: Vishal Moola (Oracle) +Cc: David Hildenbrand +Cc: Jane Chu +Cc: "Yin, Fengwei" +Signed-off-by: Andrew Morton +Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified") +Signed-off-by: Sasha Levin +--- + mm/mempolicy.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index 7d36dd95d1fff..3a291026e1896 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -442,21 +442,21 @@ static inline bool queue_pages_required(struct page *page, + } + + /* +- * queue_pages_pmd() has three possible return values: +- * 0 - pages are placed on the right node or queued successfully, or ++ * queue_folios_pmd() has three possible return values: ++ * 0 - folios are placed on the right node or queued successfully, or + * special page is met, i.e. huge zero page. +- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were ++ * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * specified. + * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an +- * existing page was already on a node that does not follow the ++ * existing folio was already on a node that does not follow the + * policy. + */ +-static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ++static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + unsigned long end, struct mm_walk *walk) + __releases(ptl) + { + int ret = 0; +- struct page *page; ++ struct folio *folio; + struct queue_pages *qp = walk->private; + unsigned long flags; + +@@ -464,19 +464,19 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + ret = -EIO; + goto unlock; + } +- page = pmd_page(*pmd); +- if (is_huge_zero_page(page)) { ++ folio = pfn_folio(pmd_pfn(*pmd)); ++ if (is_huge_zero_page(&folio->page)) { + walk->action = ACTION_CONTINUE; + goto unlock; + } +- if (!queue_pages_required(page, qp)) ++ if (!queue_pages_required(&folio->page, qp)) + goto unlock; + + flags = qp->flags; +- /* go to thp migration */ ++ /* go to folio migration */ + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma) || +- migrate_page_add(page, qp->pagelist, flags)) { ++ migrate_page_add(&folio->page, qp->pagelist, flags)) { + ret = 1; + goto unlock; + } +@@ -512,7 +512,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) +- return queue_pages_pmd(pmd, ptl, addr, end, walk); ++ return queue_folios_pmd(pmd, ptl, addr, end, walk); + + if (pmd_trans_unstable(pmd)) + return 0; +-- +2.40.1 + diff --git a/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch b/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch new file mode 100644 index 00000000000..ddf9ed1231e --- /dev/null +++ b/queue-6.1/mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch @@ -0,0 +1,102 @@ +From 35c8cbe38cb84466882c63f46dd0129bdba26456 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Jan 2023 12:18:30 -0800 +Subject: mm/mempolicy: convert queue_pages_pte_range() to + queue_folios_pte_range() + +From: Vishal Moola (Oracle) + +[ Upstream commit 3dae02bbd07f40e37bbfec2d77119628db461eaa ] + +This function now operates on folios associated with ptes instead of +pages. + +This change is in preparation for the conversion of queue_pages_required() +to queue_folio_required() and migrate_page_add() to migrate_folio_add(). + +Link: https://lkml.kernel.org/r/20230130201833.27042-4-vishal.moola@gmail.com +Signed-off-by: Vishal Moola (Oracle) +Cc: David Hildenbrand +Cc: Jane Chu +Cc: "Yin, Fengwei" +Signed-off-by: Andrew Morton +Stable-dep-of: 24526268f4e3 ("mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified") +Signed-off-by: Sasha Levin +--- + mm/mempolicy.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index 3a291026e1896..2ae6c8f18aba1 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -491,19 +491,19 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + * Scan through pages checking if pages follow certain conditions, + * and move them to the pagelist if they do. + * +- * queue_pages_pte_range() has three possible return values: +- * 0 - pages are placed on the right node or queued successfully, or ++ * queue_folios_pte_range() has three possible return values: ++ * 0 - folios are placed on the right node or queued successfully, or + * special page is met, i.e. zero page. +- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were ++ * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * specified. +- * -EIO - only MPOL_MF_STRICT was specified and an existing page was already ++ * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already + * on a node that does not follow the policy. + */ +-static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ++static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) + { + struct vm_area_struct *vma = walk->vma; +- struct page *page; ++ struct folio *folio; + struct queue_pages *qp = walk->private; + unsigned long flags = qp->flags; + bool has_unmovable = false; +@@ -521,16 +521,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + for (; addr != end; pte++, addr += PAGE_SIZE) { + if (!pte_present(*pte)) + continue; +- page = vm_normal_page(vma, addr, *pte); +- if (!page || is_zone_device_page(page)) ++ folio = vm_normal_folio(vma, addr, *pte); ++ if (!folio || folio_is_zone_device(folio)) + continue; + /* +- * vm_normal_page() filters out zero pages, but there might +- * still be PageReserved pages to skip, perhaps in a VDSO. ++ * vm_normal_folio() filters out zero pages, but there might ++ * still be reserved folios to skip, perhaps in a VDSO. + */ +- if (PageReserved(page)) ++ if (folio_test_reserved(folio)) + continue; +- if (!queue_pages_required(page, qp)) ++ if (!queue_pages_required(&folio->page, qp)) + continue; + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + /* MPOL_MF_STRICT must be specified if we get here */ +@@ -544,7 +544,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + * temporary off LRU pages in the range. Still + * need migrate other LRU pages. + */ +- if (migrate_page_add(page, qp->pagelist, flags)) ++ if (migrate_page_add(&folio->page, qp->pagelist, flags)) + has_unmovable = true; + } else + break; +@@ -705,7 +705,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, + + static const struct mm_walk_ops queue_pages_walk_ops = { + .hugetlb_entry = queue_pages_hugetlb, +- .pmd_entry = queue_pages_pte_range, ++ .pmd_entry = queue_folios_pte_range, + .test_walk = queue_pages_test_walk, + }; + +-- +2.40.1 + diff --git a/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch b/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch new file mode 100644 index 00000000000..d3100b1ea21 --- /dev/null +++ b/queue-6.1/mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch @@ -0,0 +1,189 @@ +From 03117032d2162841bbbe9c4bafd56777cab40033 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Sep 2023 15:32:42 -0700 +Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE + are specified + +From: Yang Shi + +[ Upstream commit 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 ] + +When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel +should attempt to migrate all existing pages, and return -EIO if there is +misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply +page table walker on queue_pages_range()") messed up the return value and +didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The +return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: +make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke +the VMA walk early if unmovable page is met, it may cause some pages are +not migrated as expected. + +The code should conceptually do: + + if (MPOL_MF_MOVE|MOVEALL) + scan all vmas + try to migrate the existing pages + return success + else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) + scan all vmas + try to migrate the existing pages + return -EIO if unmovable or migration failed + else /* MPOL_MF_STRICT alone */ + break early if meets unmovable and don't call mbind_range() at all + else /* none of those flags */ + check the ranges in test_walk, EFAULT without mbind_range() if discontig. + +Fixed the behavior. + +Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.com +Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") +Signed-off-by: Yang Shi +Cc: Hugh Dickins +Cc: Suren Baghdasaryan +Cc: Matthew Wilcox +Cc: Michal Hocko +Cc: Vlastimil Babka +Cc: Oscar Salvador +Cc: Rafael Aquini +Cc: Kirill A. Shutemov +Cc: David Rientjes +Cc: [4.9+] +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/mempolicy.c | 39 +++++++++++++++++++-------------------- + 1 file changed, 19 insertions(+), 20 deletions(-) + +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index 158b0bcd12fd7..bfe2d1d50fbee 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -424,6 +424,7 @@ struct queue_pages { + unsigned long start; + unsigned long end; + struct vm_area_struct *first; ++ bool has_unmovable; + }; + + /* +@@ -444,9 +445,8 @@ static inline bool queue_pages_required(struct page *page, + /* + * queue_folios_pmd() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or +- * special page is met, i.e. huge zero page. +- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were +- * specified. ++ * special page is met, i.e. zero page, or unmovable page is found ++ * but continue walking (indicated by queue_pages.has_unmovable). + * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an + * existing folio was already on a node that does not follow the + * policy. +@@ -477,7 +477,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma) || + migrate_folio_add(folio, qp->pagelist, flags)) { +- ret = 1; ++ qp->has_unmovable = true; + goto unlock; + } + } else +@@ -493,9 +493,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + * + * queue_folios_pte_range() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or +- * special page is met, i.e. zero page. +- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were +- * specified. ++ * special page is met, i.e. zero page, or unmovable page is found ++ * but continue walking (indicated by queue_pages.has_unmovable). + * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already + * on a node that does not follow the policy. + */ +@@ -506,7 +505,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, + struct folio *folio; + struct queue_pages *qp = walk->private; + unsigned long flags = qp->flags; +- bool has_unmovable = false; + pte_t *pte, *mapped_pte; + spinlock_t *ptl; + +@@ -533,11 +531,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, + if (!queue_pages_required(&folio->page, qp)) + continue; + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { +- /* MPOL_MF_STRICT must be specified if we get here */ +- if (!vma_migratable(vma)) { +- has_unmovable = true; +- break; +- } ++ /* ++ * MPOL_MF_STRICT must be specified if we get here. ++ * Continue walking vmas due to MPOL_MF_MOVE* flags. ++ */ ++ if (!vma_migratable(vma)) ++ qp->has_unmovable = true; + + /* + * Do not abort immediately since there may be +@@ -545,16 +544,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, + * need migrate other LRU pages. + */ + if (migrate_folio_add(folio, qp->pagelist, flags)) +- has_unmovable = true; ++ qp->has_unmovable = true; + } else + break; + } + pte_unmap_unlock(mapped_pte, ptl); + cond_resched(); + +- if (has_unmovable) +- return 1; +- + return addr != end ? -EIO : 0; + } + +@@ -594,7 +590,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, + * Detecting misplaced page but allow migrating pages which + * have been queued. + */ +- ret = 1; ++ qp->has_unmovable = true; + goto unlock; + } + +@@ -608,7 +604,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, + * Failed to isolate page but allow migrating pages + * which have been queued. + */ +- ret = 1; ++ qp->has_unmovable = true; + } + unlock: + spin_unlock(ptl); +@@ -737,10 +733,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, + .start = start, + .end = end, + .first = NULL, ++ .has_unmovable = false, + }; + + err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); + ++ if (qp.has_unmovable) ++ err = 1; + if (!qp.first) + /* whole range in hole */ + err = -EFAULT; +@@ -1338,7 +1337,7 @@ static long do_mbind(unsigned long start, unsigned long len, + putback_movable_pages(&pagelist); + } + +- if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) ++ if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) + err = -EIO; + } else { + up_out: +-- +2.40.1 + diff --git a/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch b/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch new file mode 100644 index 00000000000..f96425bbd6f --- /dev/null +++ b/queue-6.1/mm-page_alloc-always-remove-pages-from-temporary-lis.patch @@ -0,0 +1,55 @@ +From 9d8e793d000a5f5b70e270f999cc52c2dd78b5cd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 10:17:13 +0000 +Subject: mm/page_alloc: always remove pages from temporary list + +From: Mel Gorman + +[ Upstream commit c3e58a70425ac6ddaae1529c8146e88b4f7252bb ] + +Patch series "Leave IRQs enabled for per-cpu page allocations", v3. + +This patch (of 2): + +free_unref_page_list() has neglected to remove pages properly from the +list of pages to free since forever. It works by coincidence because +list_add happened to do the right thing adding the pages to just the PCP +lists. However, a later patch added pages to either the PCP list or the +zone list but only properly deleted the page from the list in one path +leading to list corruption and a subsequent failure. As a preparation +patch, always delete the pages from one list properly before adding to +another. On its own, this fixes nothing although it adds a fractional +amount of overhead but is critical to the next patch. + +Link: https://lkml.kernel.org/r/20221118101714.19590-1-mgorman@techsingularity.net +Link: https://lkml.kernel.org/r/20221118101714.19590-2-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Reported-by: Hugh Dickins +Reviewed-by: Vlastimil Babka +Cc: Marcelo Tosatti +Cc: Marek Szyprowski +Cc: Michal Hocko +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Stable-dep-of: 7b086755fb8c ("mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list") +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 69668817fed37..d94ac6d87bc97 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3547,6 +3547,8 @@ void free_unref_page_list(struct list_head *list) + list_for_each_entry_safe(page, next, list, lru) { + struct zone *zone = page_zone(page); + ++ list_del(&page->lru); ++ + /* Different zone, different pcp lock. */ + if (zone != locked_zone) { + if (pcp) +-- +2.40.1 + diff --git a/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch b/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch new file mode 100644 index 00000000000..c14aec60453 --- /dev/null +++ b/queue-6.1/mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch @@ -0,0 +1,94 @@ +From 65647c5732d410e694b93da2cffed9f76a5e599b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 14:11:08 -0400 +Subject: mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy + list + +From: Johannes Weiner + +[ Upstream commit 7b086755fb8cdbb6b3e45a1bbddc00e7f9b1dc03 ] + +Commit 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock") +bypasses the pcplist on lock contention and returns the page directly to +the buddy list of the page's migratetype. + +For pages that don't have their own pcplist, such as CMA and HIGHATOMIC, +the migratetype is temporarily updated such that the page can hitch a ride +on the MOVABLE pcplist. Their true type is later reassessed when flushing +in free_pcppages_bulk(). However, when lock contention is detected after +the type was already overridden, the bypass will then put the page on the +wrong buddy list. + +Once on the MOVABLE buddy list, the page becomes eligible for fallbacks +and even stealing. In the case of HIGHATOMIC, otherwise ineligible +allocations can dip into the highatomic reserves. In the case of CMA, the +page can be lost from the CMA region permanently. + +Use a separate pcpmigratetype variable for the pcplist override. Use the +original migratetype when going directly to the buddy. This fixes the bug +and should make the intentions more obvious in the code. + +Originally sent here to address the HIGHATOMIC case: +https://lore.kernel.org/lkml/20230821183733.106619-4-hannes@cmpxchg.org/ + +Changelog updated in response to the CMA-specific bug report. + +[mgorman@techsingularity.net: updated changelog] +Link: https://lkml.kernel.org/r/20230911181108.GA104295@cmpxchg.org +Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock") +Signed-off-by: Johannes Weiner +Reported-by: Joe Liu +Reviewed-by: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 90082f75660f2..ca017c6008b7c 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3448,7 +3448,7 @@ void free_unref_page(struct page *page, unsigned int order) + struct per_cpu_pages *pcp; + struct zone *zone; + unsigned long pfn = page_to_pfn(page); +- int migratetype; ++ int migratetype, pcpmigratetype; + + if (!free_unref_page_prepare(page, pfn, order)) + return; +@@ -3456,24 +3456,24 @@ void free_unref_page(struct page *page, unsigned int order) + /* + * We only track unmovable, reclaimable and movable on pcp lists. + * Place ISOLATE pages on the isolated list because they are being +- * offlined but treat HIGHATOMIC as movable pages so we can get those +- * areas back if necessary. Otherwise, we may have to free ++ * offlined but treat HIGHATOMIC and CMA as movable pages so we can ++ * get those areas back if necessary. Otherwise, we may have to free + * excessively into the page allocator + */ +- migratetype = get_pcppage_migratetype(page); ++ migratetype = pcpmigratetype = get_pcppage_migratetype(page); + if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { + if (unlikely(is_migrate_isolate(migratetype))) { + free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); + return; + } +- migratetype = MIGRATE_MOVABLE; ++ pcpmigratetype = MIGRATE_MOVABLE; + } + + zone = page_zone(page); + pcp_trylock_prepare(UP_flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (pcp) { +- free_unref_page_commit(zone, pcp, page, migratetype, order); ++ free_unref_page_commit(zone, pcp, page, pcpmigratetype, order); + pcp_spin_unlock(pcp); + } else { + free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); +-- +2.40.1 + diff --git a/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch b/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch new file mode 100644 index 00000000000..5edf3ea3303 --- /dev/null +++ b/queue-6.1/mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch @@ -0,0 +1,354 @@ +From a7473a18b6b2c4fd29dbf3399db70ba026294e3f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 10:17:14 +0000 +Subject: mm/page_alloc: leave IRQs enabled for per-cpu page allocations + +From: Mel Gorman + +[ Upstream commit 5749077415994eb02d660b2559b9d8278521e73d ] + +The pcp_spin_lock_irqsave protecting the PCP lists is IRQ-safe as a task +allocating from the PCP must not re-enter the allocator from IRQ context. +In each instance where IRQ-reentrancy is possible, the lock is acquired +using pcp_spin_trylock_irqsave() even though IRQs are disabled and +re-entrancy is impossible. + +Demote the lock to pcp_spin_lock avoids an IRQ disable/enable in the +common case at the cost of some IRQ allocations taking a slower path. If +the PCP lists need to be refilled, the zone lock still needs to disable +IRQs but that will only happen on PCP refill and drain. If an IRQ is +raised when a PCP allocation is in progress, the trylock will fail and +fallback to using the buddy lists directly. Note that this may not be a +universal win if an interrupt-intensive workload also allocates heavily +from interrupt context and contends heavily on the zone->lock as a result. + +[mgorman@techsingularity.net: migratetype might be wrong if a PCP was locked] + Link: https://lkml.kernel.org/r/20221122131229.5263-2-mgorman@techsingularity.net +[yuzhao@google.com: reported lockdep issue on IO completion from softirq] +[hughd@google.com: fix list corruption, lock improvements, micro-optimsations] +Link: https://lkml.kernel.org/r/20221118101714.19590-3-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Reviewed-by: Vlastimil Babka +Cc: Marcelo Tosatti +Cc: Marek Szyprowski +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Stable-dep-of: 7b086755fb8c ("mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list") +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 124 +++++++++++++++++++++--------------------------- + 1 file changed, 54 insertions(+), 70 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index d94ac6d87bc97..90082f75660f2 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock); + _ret; \ + }) + +-#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \ ++#define pcpu_spin_trylock(type, member, ptr) \ + ({ \ + type *_ret; \ + pcpu_task_pin(); \ + _ret = this_cpu_ptr(ptr); \ +- spin_lock_irqsave(&_ret->member, flags); \ +- _ret; \ +-}) +- +-#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \ +-({ \ +- type *_ret; \ +- pcpu_task_pin(); \ +- _ret = this_cpu_ptr(ptr); \ +- if (!spin_trylock_irqsave(&_ret->member, flags)) { \ ++ if (!spin_trylock(&_ret->member)) { \ + pcpu_task_unpin(); \ + _ret = NULL; \ + } \ +@@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock); + pcpu_task_unpin(); \ + }) + +-#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \ +-({ \ +- spin_unlock_irqrestore(&ptr->member, flags); \ +- pcpu_task_unpin(); \ +-}) +- + /* struct per_cpu_pages specific helpers. */ + #define pcp_spin_lock(ptr) \ + pcpu_spin_lock(struct per_cpu_pages, lock, ptr) + +-#define pcp_spin_lock_irqsave(ptr, flags) \ +- pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags) +- +-#define pcp_spin_trylock_irqsave(ptr, flags) \ +- pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags) ++#define pcp_spin_trylock(ptr) \ ++ pcpu_spin_trylock(struct per_cpu_pages, lock, ptr) + + #define pcp_spin_unlock(ptr) \ + pcpu_spin_unlock(lock, ptr) + +-#define pcp_spin_unlock_irqrestore(ptr, flags) \ +- pcpu_spin_unlock_irqrestore(lock, ptr, flags) + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID + DEFINE_PER_CPU(int, numa_node); + EXPORT_PER_CPU_SYMBOL(numa_node); +@@ -1548,6 +1528,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, + struct per_cpu_pages *pcp, + int pindex) + { ++ unsigned long flags; + int min_pindex = 0; + int max_pindex = NR_PCP_LISTS - 1; + unsigned int order; +@@ -1563,8 +1544,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, + /* Ensure requested pindex is drained first. */ + pindex = pindex - 1; + +- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ +- spin_lock(&zone->lock); ++ spin_lock_irqsave(&zone->lock, flags); + isolated_pageblocks = has_isolate_pageblock(zone); + + while (count > 0) { +@@ -1612,7 +1592,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, + } while (count > 0 && !list_empty(list)); + } + +- spin_unlock(&zone->lock); ++ spin_unlock_irqrestore(&zone->lock, flags); + } + + static void free_one_page(struct zone *zone, +@@ -3126,10 +3106,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + unsigned long count, struct list_head *list, + int migratetype, unsigned int alloc_flags) + { ++ unsigned long flags; + int i, allocated = 0; + +- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ +- spin_lock(&zone->lock); ++ spin_lock_irqsave(&zone->lock, flags); + for (i = 0; i < count; ++i) { + struct page *page = __rmqueue(zone, order, migratetype, + alloc_flags); +@@ -3163,7 +3143,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + * pages added to the pcp list. + */ + __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); +- spin_unlock(&zone->lock); ++ spin_unlock_irqrestore(&zone->lock, flags); + return allocated; + } + +@@ -3180,16 +3160,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) + batch = READ_ONCE(pcp->batch); + to_drain = min(pcp->count, batch); + if (to_drain > 0) { +- unsigned long flags; +- +- /* +- * free_pcppages_bulk expects IRQs disabled for zone->lock +- * so even though pcp->lock is not intended to be IRQ-safe, +- * it's needed in this context. +- */ +- spin_lock_irqsave(&pcp->lock, flags); ++ spin_lock(&pcp->lock); + free_pcppages_bulk(zone, to_drain, pcp, 0); +- spin_unlock_irqrestore(&pcp->lock, flags); ++ spin_unlock(&pcp->lock); + } + } + #endif +@@ -3203,12 +3176,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) + + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + if (pcp->count) { +- unsigned long flags; +- +- /* See drain_zone_pages on why this is disabling IRQs */ +- spin_lock_irqsave(&pcp->lock, flags); ++ spin_lock(&pcp->lock); + free_pcppages_bulk(zone, pcp->count, pcp, 0); +- spin_unlock_irqrestore(&pcp->lock, flags); ++ spin_unlock(&pcp->lock); + } + } + +@@ -3474,7 +3444,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, + */ + void free_unref_page(struct page *page, unsigned int order) + { +- unsigned long flags; + unsigned long __maybe_unused UP_flags; + struct per_cpu_pages *pcp; + struct zone *zone; +@@ -3502,10 +3471,10 @@ void free_unref_page(struct page *page, unsigned int order) + + zone = page_zone(page); + pcp_trylock_prepare(UP_flags); +- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); ++ pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (pcp) { + free_unref_page_commit(zone, pcp, page, migratetype, order); +- pcp_spin_unlock_irqrestore(pcp, flags); ++ pcp_spin_unlock(pcp); + } else { + free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); + } +@@ -3517,10 +3486,10 @@ void free_unref_page(struct page *page, unsigned int order) + */ + void free_unref_page_list(struct list_head *list) + { ++ unsigned long __maybe_unused UP_flags; + struct page *page, *next; + struct per_cpu_pages *pcp = NULL; + struct zone *locked_zone = NULL; +- unsigned long flags; + int batch_count = 0; + int migratetype; + +@@ -3548,21 +3517,36 @@ void free_unref_page_list(struct list_head *list) + struct zone *zone = page_zone(page); + + list_del(&page->lru); ++ migratetype = get_pcppage_migratetype(page); + + /* Different zone, different pcp lock. */ + if (zone != locked_zone) { +- if (pcp) +- pcp_spin_unlock_irqrestore(pcp, flags); ++ if (pcp) { ++ pcp_spin_unlock(pcp); ++ pcp_trylock_finish(UP_flags); ++ } + ++ /* ++ * trylock is necessary as pages may be getting freed ++ * from IRQ or SoftIRQ context after an IO completion. ++ */ ++ pcp_trylock_prepare(UP_flags); ++ pcp = pcp_spin_trylock(zone->per_cpu_pageset); ++ if (unlikely(!pcp)) { ++ pcp_trylock_finish(UP_flags); ++ free_one_page(zone, page, page_to_pfn(page), ++ 0, migratetype, FPI_NONE); ++ locked_zone = NULL; ++ continue; ++ } + locked_zone = zone; +- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); ++ batch_count = 0; + } + + /* + * Non-isolated types over MIGRATE_PCPTYPES get added + * to the MIGRATE_MOVABLE pcp list. + */ +- migratetype = get_pcppage_migratetype(page); + if (unlikely(migratetype >= MIGRATE_PCPTYPES)) + migratetype = MIGRATE_MOVABLE; + +@@ -3570,18 +3554,23 @@ void free_unref_page_list(struct list_head *list) + free_unref_page_commit(zone, pcp, page, migratetype, 0); + + /* +- * Guard against excessive IRQ disabled times when we get +- * a large list of pages to free. ++ * Guard against excessive lock hold times when freeing ++ * a large list of pages. Lock will be reacquired if ++ * necessary on the next iteration. + */ + if (++batch_count == SWAP_CLUSTER_MAX) { +- pcp_spin_unlock_irqrestore(pcp, flags); ++ pcp_spin_unlock(pcp); ++ pcp_trylock_finish(UP_flags); + batch_count = 0; +- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); ++ pcp = NULL; ++ locked_zone = NULL; + } + } + +- if (pcp) +- pcp_spin_unlock_irqrestore(pcp, flags); ++ if (pcp) { ++ pcp_spin_unlock(pcp); ++ pcp_trylock_finish(UP_flags); ++ } + } + + /* +@@ -3782,15 +3771,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + struct per_cpu_pages *pcp; + struct list_head *list; + struct page *page; +- unsigned long flags; + unsigned long __maybe_unused UP_flags; + +- /* +- * spin_trylock may fail due to a parallel drain. In the future, the +- * trylock will also protect against IRQ reentrancy. +- */ ++ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ + pcp_trylock_prepare(UP_flags); +- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); ++ pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (!pcp) { + pcp_trylock_finish(UP_flags); + return NULL; +@@ -3804,7 +3789,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + pcp->free_factor >>= 1; + list = &pcp->lists[order_to_pindex(migratetype, order)]; + page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); +- pcp_spin_unlock_irqrestore(pcp, flags); ++ pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + if (page) { + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); +@@ -5375,7 +5360,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + struct page **page_array) + { + struct page *page; +- unsigned long flags; + unsigned long __maybe_unused UP_flags; + struct zone *zone; + struct zoneref *z; +@@ -5457,9 +5441,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + if (unlikely(!zone)) + goto failed; + +- /* Is a parallel drain in progress? */ ++ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ + pcp_trylock_prepare(UP_flags); +- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); ++ pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (!pcp) + goto failed_irq; + +@@ -5478,7 +5462,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + if (unlikely(!page)) { + /* Try and allocate at least one page */ + if (!nr_account) { +- pcp_spin_unlock_irqrestore(pcp, flags); ++ pcp_spin_unlock(pcp); + goto failed_irq; + } + break; +@@ -5493,7 +5477,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + nr_populated++; + } + +- pcp_spin_unlock_irqrestore(pcp, flags); ++ pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + + __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); +-- +2.40.1 + diff --git a/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch b/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch new file mode 100644 index 00000000000..814f6612b83 --- /dev/null +++ b/queue-6.1/mptcp-annotate-lockless-accesses-to-sk-sk_err.patch @@ -0,0 +1,93 @@ +From b8e7f84416f19aecd3af15f048fee98e03e1aacb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Mar 2023 20:57:45 +0000 +Subject: mptcp: annotate lockless accesses to sk->sk_err + +From: Eric Dumazet + +[ Upstream commit 9ae8e5ad99b8ebcd3d3dd46075f3825e6f08f063 ] + +mptcp_poll() reads sk->sk_err without socket lock held/owned. + +Add READ_ONCE() and WRITE_ONCE() to avoid load/store tearing. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Stable-dep-of: d5fbeff1ab81 ("mptcp: move __mptcp_error_report in protocol.c") +Signed-off-by: Sasha Levin +--- + net/mptcp/pm_netlink.c | 2 +- + net/mptcp/protocol.c | 8 ++++---- + net/mptcp/subflow.c | 4 ++-- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c +index 9127a7fd5269c..5d845fcf3d09e 100644 +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -2047,7 +2047,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, + nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) + return -EMSGSIZE; + +- sk_err = ssk->sk_err; ++ sk_err = READ_ONCE(ssk->sk_err); + if (sk_err && sk->sk_state == TCP_ESTABLISHED && + nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) + return -EMSGSIZE; +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 60e65f6325c3c..84f107854eac9 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2517,15 +2517,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) + /* Mirror the tcp_reset() error propagation */ + switch (sk->sk_state) { + case TCP_SYN_SENT: +- sk->sk_err = ECONNREFUSED; ++ WRITE_ONCE(sk->sk_err, ECONNREFUSED); + break; + case TCP_CLOSE_WAIT: +- sk->sk_err = EPIPE; ++ WRITE_ONCE(sk->sk_err, EPIPE); + break; + case TCP_CLOSE: + return; + default: +- sk->sk_err = ECONNRESET; ++ WRITE_ONCE(sk->sk_err, ECONNRESET); + } + + inet_sk_state_store(sk, TCP_CLOSE); +@@ -3893,7 +3893,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, + + /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ + smp_rmb(); +- if (sk->sk_err) ++ if (READ_ONCE(sk->sk_err)) + mask |= EPOLLERR; + + return mask; +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 168dced2434b3..032661c8273f2 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1248,7 +1248,7 @@ static bool subflow_check_data_avail(struct sock *ssk) + subflow->reset_reason = MPTCP_RST_EMPTCP; + + reset: +- ssk->sk_err = EBADMSG; ++ WRITE_ONCE(ssk->sk_err, EBADMSG); + tcp_set_state(ssk, TCP_CLOSE); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); +@@ -1332,7 +1332,7 @@ void __mptcp_error_report(struct sock *sk) + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); +- sk->sk_err = -err; ++ WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); +-- +2.40.1 + diff --git a/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch b/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch new file mode 100644 index 00000000000..383b4764c86 --- /dev/null +++ b/queue-6.1/mptcp-fix-dangling-connection-hang-up.patch @@ -0,0 +1,269 @@ +From 508f8535214e8e82ecb63fcf8f6a69c18230a889 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Sep 2023 12:52:49 +0200 +Subject: mptcp: fix dangling connection hang-up + +From: Paolo Abeni + +[ Upstream commit 27e5ccc2d5a50ed61bb73153edb1066104b108b3 ] + +According to RFC 8684 section 3.3: + + A connection is not closed unless [...] or an implementation-specific + connection-level send timeout. + +Currently the MPTCP protocol does not implement such timeout, and +connection timing-out at the TCP-level never move to close state. + +Introduces a catch-up condition at subflow close time to move the +MPTCP socket to close, too. + +That additionally allows removing similar existing inside the worker. + +Finally, allow some additional timeout for plain ESTABLISHED mptcp +sockets, as the protocol allows creating new subflows even at that +point and making the connection functional again. + +This issue is actually present since the beginning, but it is basically +impossible to solve without a long chain of functional pre-requisites +topped by commit bbd49d114d57 ("mptcp: consolidate transition to +TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current +patch, please also backport this other commit as well. + +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430 +Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 90 ++++++++++++++++++++++---------------------- + net/mptcp/protocol.h | 22 +++++++++++ + net/mptcp/subflow.c | 1 + + 3 files changed, 67 insertions(+), 46 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 93b60b049be27..60e65f6325c3c 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -846,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) + + mptcp_sockopt_sync_locked(msk, ssk); + mptcp_subflow_joined(msk, ssk); ++ mptcp_stop_tout_timer(sk); + return true; + } + +@@ -2349,18 +2350,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + bool dispose_it, need_push = false; + + /* If the first subflow moved to a close state before accept, e.g. due +- * to an incoming reset, mptcp either: +- * - if either the subflow or the msk are dead, destroy the context +- * (the subflow socket is deleted by inet_child_forget) and the msk +- * - otherwise do nothing at the moment and take action at accept and/or +- * listener shutdown - user-space must be able to accept() the closed +- * socket. ++ * to an incoming reset or listener shutdown, the subflow socket is ++ * already deleted by inet_child_forget() and the mptcp socket can't ++ * survive too. + */ +- if (msk->in_accept_queue && msk->first == ssk) { +- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) +- return; +- ++ if (msk->in_accept_queue && msk->first == ssk && ++ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { + /* ensure later check in mptcp_worker() will dispose the msk */ ++ mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); + sock_set_flag(sk, SOCK_DEAD); + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_drop_ctx(ssk); +@@ -2426,6 +2423,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + + if (need_push) + __mptcp_push_pending(sk, 0); ++ ++ /* Catch every 'all subflows closed' scenario, including peers silently ++ * closing them, e.g. due to timeout. ++ * For established sockets, allow an additional timeout before closing, ++ * as the protocol can still create more subflows. ++ */ ++ if (list_is_singular(&msk->conn_list) && msk->first && ++ inet_sk_state_load(msk->first) == TCP_CLOSE) { ++ if (sk->sk_state != TCP_ESTABLISHED || ++ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { ++ inet_sk_state_store(sk, TCP_CLOSE); ++ mptcp_close_wake_up(sk); ++ } else { ++ mptcp_start_tout_timer(sk); ++ } ++ } + } + + void mptcp_close_ssk(struct sock *sk, struct sock *ssk, +@@ -2469,23 +2482,14 @@ static void __mptcp_close_subflow(struct sock *sk) + + } + +-static bool mptcp_should_close(const struct sock *sk) ++static bool mptcp_close_tout_expired(const struct sock *sk) + { +- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; +- struct mptcp_subflow_context *subflow; +- +- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) +- return true; ++ if (!inet_csk(sk)->icsk_mtup.probe_timestamp || ++ sk->sk_state == TCP_CLOSE) ++ return false; + +- /* if all subflows are in closed status don't bother with additional +- * timeout +- */ +- mptcp_for_each_subflow(mptcp_sk(sk), subflow) { +- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != +- TCP_CLOSE) +- return false; +- } +- return true; ++ return time_after32(tcp_jiffies32, ++ inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); + } + + static void mptcp_check_fastclose(struct mptcp_sock *msk) +@@ -2609,15 +2613,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) + struct sock *sk = (struct sock *)msk; + unsigned long timeout, close_timeout; + +- if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) ++ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + +- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; ++ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + ++ TCP_TIMEWAIT_LEN; + + /* the close timeout takes precedence on the fail one, and here at least one of + * them is active + */ +- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; ++ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; + + sk_reset_timer(sk, &sk->sk_timer, timeout); + } +@@ -2636,8 +2641,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) + mptcp_subflow_reset(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); + unlock_sock_fast(ssk, slow); +- +- mptcp_reset_tout_timer(msk, 0); + } + + static void mptcp_do_fastclose(struct sock *sk) +@@ -2676,19 +2679,15 @@ static void mptcp_worker(struct work_struct *work) + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(sk); + +- /* There is no point in keeping around an orphaned sk timedout or +- * closed, but we need the msk around to reply to incoming DATA_FIN, +- * even if it is orphaned and in FIN_WAIT2 state +- */ +- if (sock_flag(sk, SOCK_DEAD)) { +- if (mptcp_should_close(sk)) { +- inet_sk_state_store(sk, TCP_CLOSE); +- mptcp_do_fastclose(sk); +- } +- if (sk->sk_state == TCP_CLOSE) { +- __mptcp_destroy_sock(sk); +- goto unlock; +- } ++ if (mptcp_close_tout_expired(sk)) { ++ inet_sk_state_store(sk, TCP_CLOSE); ++ mptcp_do_fastclose(sk); ++ mptcp_close_wake_up(sk); ++ } ++ ++ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { ++ __mptcp_destroy_sock(sk); ++ goto unlock; + } + + if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) +@@ -2984,7 +2983,6 @@ bool __mptcp_close(struct sock *sk, long timeout) + + cleanup: + /* orphan all the subflows */ +- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast_nested(ssk); +@@ -3021,7 +3019,7 @@ bool __mptcp_close(struct sock *sk, long timeout) + __mptcp_destroy_sock(sk); + do_cancel_work = true; + } else { +- mptcp_reset_tout_timer(msk, 0); ++ mptcp_start_tout_timer(sk); + } + + return do_cancel_work; +@@ -3085,7 +3083,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) + inet_sk_state_store(sk, TCP_CLOSE); + + mptcp_stop_rtx_timer(sk); +- sk_stop_timer(sk, &sk->sk_timer); ++ mptcp_stop_tout_timer(sk); + + if (mptcp_sk(sk)->token) + mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index b73160c5e2cf8..91d89a0aeb586 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -682,6 +682,28 @@ void mptcp_get_options(const struct sk_buff *skb, + void mptcp_finish_connect(struct sock *sk); + void __mptcp_set_connected(struct sock *sk); + void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); ++ ++static inline void mptcp_stop_tout_timer(struct sock *sk) ++{ ++ if (!inet_csk(sk)->icsk_mtup.probe_timestamp) ++ return; ++ ++ sk_stop_timer(sk, &sk->sk_timer); ++ inet_csk(sk)->icsk_mtup.probe_timestamp = 0; ++} ++ ++static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) ++{ ++ /* avoid 0 timestamp, as that means no close timeout */ ++ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; ++} ++ ++static inline void mptcp_start_tout_timer(struct sock *sk) ++{ ++ mptcp_set_close_tout(sk, tcp_jiffies32); ++ mptcp_reset_tout_timer(mptcp_sk(sk), 0); ++} ++ + static inline bool mptcp_is_fully_established(struct sock *sk) + { + return inet_sk_state_load(sk) == TCP_ESTABLISHED && +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 6c8148c6e7710..168dced2434b3 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1527,6 +1527,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, + mptcp_sock_graft(ssk, sk->sk_socket); + iput(SOCK_INODE(sf)); + WRITE_ONCE(msk->allow_infinite_fallback, false); ++ mptcp_stop_tout_timer(sk); + return 0; + + failed_unlink: +-- +2.40.1 + diff --git a/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch b/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch new file mode 100644 index 00000000000..a81cc46994d --- /dev/null +++ b/queue-6.1/mptcp-move-__mptcp_error_report-in-protocol.c.patch @@ -0,0 +1,122 @@ +From 9b949b2bfa5363c42d1b085bfad979a21c50d114 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Sep 2023 12:52:46 +0200 +Subject: mptcp: move __mptcp_error_report in protocol.c + +From: Paolo Abeni + +[ Upstream commit d5fbeff1ab812b6c473b6924bee8748469462e2c ] + +This will simplify the next patch ("mptcp: process pending subflow error +on close"). + +No functional change intended. + +Cc: stable@vger.kernel.org # v5.12+ +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++ + net/mptcp/subflow.c | 36 ------------------------------------ + 2 files changed, 36 insertions(+), 36 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 84f107854eac9..193f2bdc8fe1b 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -765,6 +765,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) + return moved; + } + ++void __mptcp_error_report(struct sock *sk) ++{ ++ struct mptcp_subflow_context *subflow; ++ struct mptcp_sock *msk = mptcp_sk(sk); ++ ++ mptcp_for_each_subflow(msk, subflow) { ++ struct sock *ssk = mptcp_subflow_tcp_sock(subflow); ++ int err = sock_error(ssk); ++ int ssk_state; ++ ++ if (!err) ++ continue; ++ ++ /* only propagate errors on fallen-back sockets or ++ * on MPC connect ++ */ ++ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) ++ continue; ++ ++ /* We need to propagate only transition to CLOSE state. ++ * Orphaned socket will see such state change via ++ * subflow_sched_work_if_closed() and that path will properly ++ * destroy the msk as needed. ++ */ ++ ssk_state = inet_sk_state_load(ssk); ++ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) ++ inet_sk_state_store(sk, ssk_state); ++ WRITE_ONCE(sk->sk_err, -err); ++ ++ /* This barrier is coupled with smp_rmb() in mptcp_poll() */ ++ smp_wmb(); ++ sk_error_report(sk); ++ break; ++ } ++} ++ + /* In most cases we will be able to lock the mptcp socket. If its already + * owned, we need to defer to the work queue to avoid ABBA deadlock. + */ +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 032661c8273f2..b93b08a75017b 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1305,42 +1305,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) + *full_space = tcp_full_space(sk); + } + +-void __mptcp_error_report(struct sock *sk) +-{ +- struct mptcp_subflow_context *subflow; +- struct mptcp_sock *msk = mptcp_sk(sk); +- +- mptcp_for_each_subflow(msk, subflow) { +- struct sock *ssk = mptcp_subflow_tcp_sock(subflow); +- int err = sock_error(ssk); +- int ssk_state; +- +- if (!err) +- continue; +- +- /* only propagate errors on fallen-back sockets or +- * on MPC connect +- */ +- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) +- continue; +- +- /* We need to propagate only transition to CLOSE state. +- * Orphaned socket will see such state change via +- * subflow_sched_work_if_closed() and that path will properly +- * destroy the msk as needed. +- */ +- ssk_state = inet_sk_state_load(ssk); +- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) +- inet_sk_state_store(sk, ssk_state); +- WRITE_ONCE(sk->sk_err, -err); +- +- /* This barrier is coupled with smp_rmb() in mptcp_poll() */ +- smp_wmb(); +- sk_error_report(sk); +- break; +- } +-} +- + static void subflow_error_report(struct sock *ssk) + { + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; +-- +2.40.1 + diff --git a/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch b/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch new file mode 100644 index 00000000000..064572d0c1d --- /dev/null +++ b/queue-6.1/mptcp-process-pending-subflow-error-on-close.patch @@ -0,0 +1,118 @@ +From 25532afde428aef9d85923b345150355c5ff2c0e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Sep 2023 12:52:47 +0200 +Subject: mptcp: process pending subflow error on close + +From: Paolo Abeni + +[ Upstream commit 9f1a98813b4b686482e5ef3c9d998581cace0ba6 ] + +On incoming TCP reset, subflow closing could happen before error +propagation. That in turn could cause the socket error being ignored, +and a missing socket state transition, as reported by Daire-Byrne. + +Address the issues explicitly checking for subflow socket error at +close time. To avoid code duplication, factor-out of __mptcp_error_report() +a new helper implementing the relevant bits. + +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429 +Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 63 ++++++++++++++++++++++++-------------------- + 1 file changed, 34 insertions(+), 29 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 193f2bdc8fe1b..b6e0579e72644 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -765,40 +765,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) + return moved; + } + +-void __mptcp_error_report(struct sock *sk) ++static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) + { +- struct mptcp_subflow_context *subflow; +- struct mptcp_sock *msk = mptcp_sk(sk); ++ int err = sock_error(ssk); ++ int ssk_state; + +- mptcp_for_each_subflow(msk, subflow) { +- struct sock *ssk = mptcp_subflow_tcp_sock(subflow); +- int err = sock_error(ssk); +- int ssk_state; ++ if (!err) ++ return false; + +- if (!err) +- continue; ++ /* only propagate errors on fallen-back sockets or ++ * on MPC connect ++ */ ++ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) ++ return false; + +- /* only propagate errors on fallen-back sockets or +- * on MPC connect +- */ +- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) +- continue; ++ /* We need to propagate only transition to CLOSE state. ++ * Orphaned socket will see such state change via ++ * subflow_sched_work_if_closed() and that path will properly ++ * destroy the msk as needed. ++ */ ++ ssk_state = inet_sk_state_load(ssk); ++ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) ++ inet_sk_state_store(sk, ssk_state); ++ WRITE_ONCE(sk->sk_err, -err); + +- /* We need to propagate only transition to CLOSE state. +- * Orphaned socket will see such state change via +- * subflow_sched_work_if_closed() and that path will properly +- * destroy the msk as needed. +- */ +- ssk_state = inet_sk_state_load(ssk); +- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) +- inet_sk_state_store(sk, ssk_state); +- WRITE_ONCE(sk->sk_err, -err); +- +- /* This barrier is coupled with smp_rmb() in mptcp_poll() */ +- smp_wmb(); +- sk_error_report(sk); +- break; +- } ++ /* This barrier is coupled with smp_rmb() in mptcp_poll() */ ++ smp_wmb(); ++ sk_error_report(sk); ++ return true; ++} ++ ++void __mptcp_error_report(struct sock *sk) ++{ ++ struct mptcp_subflow_context *subflow; ++ struct mptcp_sock *msk = mptcp_sk(sk); ++ ++ mptcp_for_each_subflow(msk, subflow) ++ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) ++ break; + } + + /* In most cases we will be able to lock the mptcp socket. If its already +@@ -2446,6 +2450,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + } + + out_release: ++ __mptcp_subflow_error_report(sk, ssk); + release_sock(ssk); + + sock_put(ssk); +-- +2.40.1 + diff --git a/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch b/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch new file mode 100644 index 00000000000..9c7fda7c5e0 --- /dev/null +++ b/queue-6.1/mptcp-rename-timer-related-helper-to-less-confusing-.patch @@ -0,0 +1,210 @@ +From f49a0d87f8120207dbf164907a56445911a28011 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Sep 2023 12:52:48 +0200 +Subject: mptcp: rename timer related helper to less confusing names + +From: Paolo Abeni + +[ Upstream commit f6909dc1c1f4452879278128012da6c76bc186a5 ] + +The msk socket uses to different timeout to track close related +events and retransmissions. The existing helpers do not indicate +clearly which timer they actually touch, making the related code +quite confusing. + +Change the existing helpers name to avoid such confusion. No +functional change intended. + +This patch is linked to the next one ("mptcp: fix dangling connection +hang-up"). The two patches are supposed to be backported together. + +Cc: stable@vger.kernel.org # v5.11+ +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Stable-dep-of: 27e5ccc2d5a5 ("mptcp: fix dangling connection hang-up") +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 42 +++++++++++++++++++++--------------------- + net/mptcp/protocol.h | 2 +- + net/mptcp/subflow.c | 2 +- + 3 files changed, 23 insertions(+), 23 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 6dd880d6b0518..93b60b049be27 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -401,7 +401,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, + return false; + } + +-static void mptcp_stop_timer(struct sock *sk) ++static void mptcp_stop_rtx_timer(struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); + +@@ -865,12 +865,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list + } + } + +-static bool mptcp_timer_pending(struct sock *sk) ++static bool mptcp_rtx_timer_pending(struct sock *sk) + { + return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); + } + +-static void mptcp_reset_timer(struct sock *sk) ++static void mptcp_reset_rtx_timer(struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned long tout; +@@ -1054,10 +1054,10 @@ static void __mptcp_clean_una(struct sock *sk) + out: + if (snd_una == READ_ONCE(msk->snd_nxt) && + snd_una == READ_ONCE(msk->write_seq)) { +- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) +- mptcp_stop_timer(sk); ++ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) ++ mptcp_stop_rtx_timer(sk); + } else { +- mptcp_reset_timer(sk); ++ mptcp_reset_rtx_timer(sk); + } + } + +@@ -1606,8 +1606,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) + + out: + /* ensure the rtx timer is running */ +- if (!mptcp_timer_pending(sk)) +- mptcp_reset_timer(sk); ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + if (do_check_data_fin) + mptcp_check_send_data_fin(sk); + } +@@ -1665,8 +1665,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) + if (copied) { + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); +- if (!mptcp_timer_pending(sk)) +- mptcp_reset_timer(sk); ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + + if (msk->snd_data_fin_enable && + msk->snd_nxt + 1 == msk->write_seq) +@@ -2227,7 +2227,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) + sock_put(sk); + } + +-static void mptcp_timeout_timer(struct timer_list *t) ++static void mptcp_tout_timer(struct timer_list *t) + { + struct sock *sk = from_timer(sk, t, sk_timer); + +@@ -2597,14 +2597,14 @@ static void __mptcp_retrans(struct sock *sk) + reset_timer: + mptcp_check_and_set_pending(sk); + +- if (!mptcp_timer_pending(sk)) +- mptcp_reset_timer(sk); ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + } + + /* schedule the timeout timer for the relevant event: either close timeout + * or mp_fail timeout. The close timeout takes precedence on the mp_fail one + */ +-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) ++void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) + { + struct sock *sk = (struct sock *)msk; + unsigned long timeout, close_timeout; +@@ -2637,7 +2637,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) + WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); + unlock_sock_fast(ssk, slow); + +- mptcp_reset_timeout(msk, 0); ++ mptcp_reset_tout_timer(msk, 0); + } + + static void mptcp_do_fastclose(struct sock *sk) +@@ -2728,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk) + + /* re-use the csk retrans timer for MPTCP-level retrans */ + timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); +- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); ++ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); + + return 0; + } +@@ -2820,8 +2820,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) + } else { + pr_debug("Sending DATA_FIN on subflow %p", ssk); + tcp_send_ack(ssk); +- if (!mptcp_timer_pending(sk)) +- mptcp_reset_timer(sk); ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + } + break; + } +@@ -2904,7 +2904,7 @@ static void __mptcp_destroy_sock(struct sock *sk) + + might_sleep(); + +- mptcp_stop_timer(sk); ++ mptcp_stop_rtx_timer(sk); + sk_stop_timer(sk, &sk->sk_timer); + msk->pm.status = 0; + +@@ -3021,7 +3021,7 @@ bool __mptcp_close(struct sock *sk, long timeout) + __mptcp_destroy_sock(sk); + do_cancel_work = true; + } else { +- mptcp_reset_timeout(msk, 0); ++ mptcp_reset_tout_timer(msk, 0); + } + + return do_cancel_work; +@@ -3084,7 +3084,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) + mptcp_check_listen_stop(sk); + inet_sk_state_store(sk, TCP_CLOSE); + +- mptcp_stop_timer(sk); ++ mptcp_stop_rtx_timer(sk); + sk_stop_timer(sk, &sk->sk_timer); + + if (mptcp_sk(sk)->token) +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index d77b25636125b..b73160c5e2cf8 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -681,7 +681,7 @@ void mptcp_get_options(const struct sk_buff *skb, + + void mptcp_finish_connect(struct sock *sk); + void __mptcp_set_connected(struct sock *sk); +-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); ++void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + static inline bool mptcp_is_fully_established(struct sock *sk) + { + return inet_sk_state_load(sk) == TCP_ESTABLISHED && +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 52a747a80e88e..6c8148c6e7710 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1161,7 +1161,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) + WRITE_ONCE(subflow->fail_tout, fail_tout); + tcp_send_ack(ssk); + +- mptcp_reset_timeout(msk, subflow->fail_tout); ++ mptcp_reset_tout_timer(msk, subflow->fail_tout); + } + + static bool subflow_check_data_avail(struct sock *ssk) +-- +2.40.1 + diff --git a/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch b/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch new file mode 100644 index 00000000000..debf65f9eb9 --- /dev/null +++ b/queue-6.1/nfs-cleanup-unused-rpc_clnt-variable.patch @@ -0,0 +1,39 @@ +From 9aa0b2eda8095073eadf5292d4ea37420997d704 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Apr 2023 12:17:35 -0400 +Subject: NFS: Cleanup unused rpc_clnt variable + +From: Benjamin Coddington + +[ Upstream commit e025f0a73f6acb920d86549b2177a5883535421d ] + +The root rpc_clnt is not used here, clean it up. + +Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") +Signed-off-by: Benjamin Coddington +Reviewed-by: NeilBrown +Signed-off-by: Anna Schumaker +Stable-dep-of: 956fd46f97d2 ("NFSv4: Fix a state manager thread deadlock regression") +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4state.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c +index 5b49e5365bb30..1b707573fbf8d 100644 +--- a/fs/nfs/nfs4state.c ++++ b/fs/nfs/nfs4state.c +@@ -1209,10 +1209,6 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) + { + struct task_struct *task; + char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; +- struct rpc_clnt *cl = clp->cl_rpcclient; +- +- while (cl != cl->cl_parent) +- cl = cl->cl_parent; + + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { +-- +2.40.1 + diff --git a/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch b/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch new file mode 100644 index 00000000000..eadfd5af7af --- /dev/null +++ b/queue-6.1/nfs-rename-nfs_client_kset-to-nfs_kset.patch @@ -0,0 +1,73 @@ +From 307a9be3b1a34bf9bd085bb0bb1088324672d52d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 14:07:22 -0400 +Subject: NFS: rename nfs_client_kset to nfs_kset + +From: Benjamin Coddington + +[ Upstream commit 8b18a2edecc0741b0eecf8b18fdb356a0f8682de ] + +Be brief and match the subsystem name. There's no need to distinguish this +kset variable from the server. + +Signed-off-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Stable-dep-of: 956fd46f97d2 ("NFSv4: Fix a state manager thread deadlock regression") +Signed-off-by: Sasha Levin +--- + fs/nfs/sysfs.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c +index a6f7403669631..edb535a0ff973 100644 +--- a/fs/nfs/sysfs.c ++++ b/fs/nfs/sysfs.c +@@ -18,7 +18,7 @@ + #include "sysfs.h" + + struct kobject *nfs_client_kobj; +-static struct kset *nfs_client_kset; ++static struct kset *nfs_kset; + + static void nfs_netns_object_release(struct kobject *kobj) + { +@@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name, + + int nfs_sysfs_init(void) + { +- nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); +- if (!nfs_client_kset) ++ nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj); ++ if (!nfs_kset) + return -ENOMEM; +- nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); ++ nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); + if (!nfs_client_kobj) { +- kset_unregister(nfs_client_kset); +- nfs_client_kset = NULL; ++ kset_unregister(nfs_kset); ++ nfs_kset = NULL; + return -ENOMEM; + } + return 0; +@@ -70,7 +70,7 @@ int nfs_sysfs_init(void) + void nfs_sysfs_exit(void) + { + kobject_put(nfs_client_kobj); +- kset_unregister(nfs_client_kset); ++ kset_unregister(nfs_kset); + } + + static ssize_t nfs_netns_identifier_show(struct kobject *kobj, +@@ -159,7 +159,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + p->net = net; +- p->kobject.kset = nfs_client_kset; ++ p->kobject.kset = nfs_kset; + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, + parent, "nfs_client") == 0) + return p; +-- +2.40.1 + diff --git a/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch b/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch new file mode 100644 index 00000000000..938a0672405 --- /dev/null +++ b/queue-6.1/nfsv4-fix-a-state-manager-thread-deadlock-regression.patch @@ -0,0 +1,117 @@ +From 7eca5fbc29ba9fd59b57061e5611643168a6bb58 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Sep 2023 13:14:15 -0400 +Subject: NFSv4: Fix a state manager thread deadlock regression + +From: Trond Myklebust + +[ Upstream commit 956fd46f97d238032cb5fa4771cdaccc6e760f9a ] + +Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit +aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it +prevents the setup of new threads to handle reboot recovery, while the +older recovery thread is stuck returning delegations. + +Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 4 +++- + fs/nfs/nfs4state.c | 36 +++++++++++++++++++++++++----------- + 2 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index be570c65ae154..b927a7d1b46d4 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -10629,7 +10629,9 @@ static void nfs4_disable_swap(struct inode *inode) + */ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + +- nfs4_schedule_state_manager(clp); ++ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); ++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); ++ wake_up_var(&clp->cl_state); + } + + static const struct inode_operations nfs4_dir_inode_operations = { +diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c +index 1b707573fbf8d..ed789e0cb9431 100644 +--- a/fs/nfs/nfs4state.c ++++ b/fs/nfs/nfs4state.c +@@ -1209,13 +1209,23 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) + { + struct task_struct *task; + char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; ++ struct rpc_clnt *clnt = clp->cl_rpcclient; ++ bool swapon = false; + + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); +- if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { +- wake_up_var(&clp->cl_state); +- return; ++ ++ if (atomic_read(&clnt->cl_swapper)) { ++ swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, ++ &clp->cl_state); ++ if (!swapon) { ++ wake_up_var(&clp->cl_state); ++ return; ++ } + } +- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); ++ ++ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) ++ return; ++ + __module_get(THIS_MODULE); + refcount_inc(&clp->cl_count); + +@@ -1232,8 +1242,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) + __func__, PTR_ERR(task)); + if (!nfs_client_init_is_complete(clp)) + nfs_mark_client_ready(clp, PTR_ERR(task)); ++ if (swapon) ++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + nfs4_clear_state_manager_bit(clp); +- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + nfs_put_client(clp); + module_put(THIS_MODULE); + } +@@ -2737,22 +2748,25 @@ static int nfs4_run_state_manager(void *ptr) + + allow_signal(SIGKILL); + again: +- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + nfs4_state_manager(clp); +- if (atomic_read(&cl->cl_swapper)) { ++ ++ if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && ++ !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { + wait_var_event_interruptible(&clp->cl_state, + test_bit(NFS4CLNT_RUN_MANAGER, + &clp->cl_state)); +- if (atomic_read(&cl->cl_swapper) && +- test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) ++ if (!atomic_read(&cl->cl_swapper)) ++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); ++ if (refcount_read(&clp->cl_count) > 1 && !signalled() && ++ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) + goto again; + /* Either no longer a swapper, or were signalled */ ++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + } +- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && +- !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) ++ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) + goto again; + + nfs_put_client(clp); +-- +2.40.1 + diff --git a/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch b/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch new file mode 100644 index 00000000000..2a3896d6fdb --- /dev/null +++ b/queue-6.1/revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch @@ -0,0 +1,59 @@ +From 878cf5ac73826139a016ebb3154ba1e8b0130bd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Jun 2023 14:31:49 -0400 +Subject: Revert "NFSv4: Retry LOCK on OLD_STATEID during delegation return" + +From: Benjamin Coddington + +[ Upstream commit 5b4a82a0724af1dfd1320826e0266117b6a57fbd ] + +Olga Kornievskaia reports that this patch breaks NFSv4.0 state recovery. +It also introduces additional complexity in the error paths for cases not +related to the original problem. Let's revert it for now, and address the +original problem in another manner. + +This reverts commit f5ea16137a3fa2858620dc9084466491c128535f. + +Fixes: f5ea16137a3f ("NFSv4: Retry LOCK on OLD_STATEID during delegation return") +Reported-by: Kornievskaia, Olga +Signed-off-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index b927a7d1b46d4..e1297c6bcfbe2 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -7157,7 +7157,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + { + struct nfs4_lockdata *data = calldata; + struct nfs4_lock_state *lsp = data->lsp; +- struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; +@@ -7165,7 +7164,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + data->rpc_status = task->tk_status; + switch (task->tk_status) { + case 0: +- renew_lease(server, data->timestamp); ++ renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), ++ data->timestamp); + if (data->arg.new_lock && !data->cancelled) { + data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) +@@ -7193,8 +7193,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + if (!nfs4_stateid_match(&data->arg.open_stateid, + &lsp->ls_state->open_stateid)) + goto out_restart; +- else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) +- goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + &lsp->ls_stateid)) + goto out_restart; +-- +2.40.1 + diff --git a/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch b/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch new file mode 100644 index 00000000000..118c6a24305 --- /dev/null +++ b/queue-6.1/ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch @@ -0,0 +1,149 @@ +From 3ba4e64542bd7ce2b45b115607a32a814492eb6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 20:54:25 +0800 +Subject: ring-buffer: Fix bytes info in per_cpu buffer stats + +From: Zheng Yejian + +[ Upstream commit 45d99ea451d0c30bfd4864f0fe485d7dac014902 ] + +The 'bytes' info in file 'per_cpu/cpu/stats' means the number of +bytes in cpu buffer that have not been consumed. However, currently +after consuming data by reading file 'trace_pipe', the 'bytes' info +was not changed as expected. + + # cat per_cpu/cpu0/stats + entries: 0 + overrun: 0 + commit overrun: 0 + bytes: 568 <--- 'bytes' is problematical !!! + oldest event ts: 8651.371479 + now ts: 8653.912224 + dropped events: 0 + read events: 8 + +The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: + 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); + 2. When stat 'entries_bytes', exclude the discarded padding event which + is smaller than minimum size because it is invisible to reader. Then + use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for + page-based read/remove/overrun. + +Also correct the comments of ring_buffer_bytes_cpu() in this patch. + +Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengyejian1@huawei.com + +Cc: stable@vger.kernel.org +Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/ring_buffer.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index 51737b3d54b35..b7383358c4ea1 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) + local_set(&bpage->commit, 0); + } + ++static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) ++{ ++ return local_read(&bpage->page->commit); ++} ++ + static void free_buffer_page(struct buffer_page *bpage) + { + free_page((unsigned long)bpage->page); +@@ -2020,7 +2025,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) + * Increment overrun to account for the lost events. + */ + local_add(page_entries, &cpu_buffer->overrun); +- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); + } + +@@ -2364,11 +2369,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) + cpu_buffer->reader_page->read); + } + +-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) +-{ +- return local_read(&bpage->page->commit); +-} +- + static struct ring_buffer_event * + rb_iter_head_event(struct ring_buffer_iter *iter) + { +@@ -2514,7 +2514,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, + * the counters. + */ + local_add(entries, &cpu_buffer->overrun); +- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); + + /* +@@ -2657,9 +2657,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, + + event = __rb_page_index(tail_page, tail); + +- /* account for padding bytes */ +- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); +- + /* + * Save the original length to the meta data. + * This will be used by the reader to add lost event +@@ -2673,7 +2670,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, + * write counter enough to allow another writer to slip + * in on this page. + * We put in a discarded commit instead, to make sure +- * that this space is not used again. ++ * that this space is not used again, and this space will ++ * not be accounted into 'entries_bytes'. + * + * If we are less than the minimum size, we don't need to + * worry about it. +@@ -2698,6 +2696,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, + /* time delta must be non zero */ + event->time_delta = 1; + ++ /* account for padding bytes */ ++ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); ++ + /* Make sure the padding is visible before the tail_page->write update */ + smp_wmb(); + +@@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) + EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); + + /** +- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer ++ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer + * @buffer: The ring buffer + * @cpu: The per CPU buffer to read from. + */ +@@ -4725,6 +4726,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) + + length = rb_event_length(event); + cpu_buffer->reader_page->read += length; ++ cpu_buffer->read_bytes += length; + } + + static void rb_advance_iter(struct ring_buffer_iter *iter) +@@ -5820,7 +5822,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, + } else { + /* update the entry counter */ + cpu_buffer->read += rb_page_entries(reader); +- cpu_buffer->read_bytes += BUF_PAGE_SIZE; ++ cpu_buffer->read_bytes += rb_page_commit(reader); + + /* swap the pages */ + rb_init_page(bpage); +-- +2.40.1 + diff --git a/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch b/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch new file mode 100644 index 00000000000..fd703abbe22 --- /dev/null +++ b/queue-6.1/ring-buffer-remove-obsolete-comment-for-free_buffer_.patch @@ -0,0 +1,50 @@ +From 54e23956944147b55c7e5a2e07d96653611e0832 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Mar 2023 15:24:46 +0100 +Subject: ring-buffer: remove obsolete comment for free_buffer_page() + +From: Vlastimil Babka + +[ Upstream commit a98151ad53b53f010ee364ec2fd06445b328578b ] + +The comment refers to mm/slob.c which is being removed. It comes from +commit ed56829cb319 ("ring_buffer: reset buffer page when freeing") and +according to Steven the borrowed code was a page mapcount and mapping +reset, which was later removed by commit e4c2ce82ca27 ("ring_buffer: +allocate buffer page pointer"). Thus the comment is not accurate anyway, +remove it. + +Link: https://lore.kernel.org/linux-trace-kernel/20230315142446.27040-1-vbabka@suse.cz + +Cc: Masami Hiramatsu +Cc: Ingo Molnar +Reported-by: Mike Rapoport +Suggested-by: Steven Rostedt (Google) +Fixes: e4c2ce82ca27 ("ring_buffer: allocate buffer page pointer") +Signed-off-by: Vlastimil Babka +Reviewed-by: Mukesh Ojha +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 45d99ea451d0 ("ring-buffer: Fix bytes info in per_cpu buffer stats") +Signed-off-by: Sasha Levin +--- + kernel/trace/ring_buffer.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index 2f562cf961e0a..51737b3d54b35 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -354,10 +354,6 @@ static void rb_init_page(struct buffer_data_page *bpage) + local_set(&bpage->commit, 0); + } + +-/* +- * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing +- * this issue out. +- */ + static void free_buffer_page(struct buffer_page *bpage) + { + free_page((unsigned long)bpage->page); +-- +2.40.1 + diff --git a/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch b/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch new file mode 100644 index 00000000000..0ba372725de --- /dev/null +++ b/queue-6.1/scsi-core-improve-type-safety-of-scsi_rescan_device.patch @@ -0,0 +1,198 @@ +From 40d723c10314e9132382eed0470564cce942d865 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Aug 2023 08:30:41 -0700 +Subject: scsi: core: Improve type safety of scsi_rescan_device() + +From: Bart Van Assche + +[ Upstream commit 79519528a180c64a90863db2ce70887de6c49d16 ] + +Most callers of scsi_rescan_device() have the scsi_device pointer readily +available. Pass a struct scsi_device pointer to scsi_rescan_device() +instead of a struct device pointer. This change prevents that a pointer to +another struct device would be passed accidentally to scsi_rescan_device(). + +Remove the scsi_rescan_device() declaration from the scsi_priv.h header +file since it duplicates the declaration in . + +Reviewed-by: Hannes Reinecke +Reviewed-by: Damien Le Moal +Reviewed-by: John Garry +Cc: Mike Christie +Cc: Ming Lei +Signed-off-by: Bart Van Assche +Link: https://lore.kernel.org/r/20230822153043.4046244-1-bvanassche@acm.org +Signed-off-by: Martin K. Petersen +Stable-dep-of: 8b4d9469d0b0 ("ata: libata-scsi: Fix delayed scsi_rescan_device() execution") +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-scsi.c | 2 +- + drivers/scsi/aacraid/commsup.c | 2 +- + drivers/scsi/mvumi.c | 2 +- + drivers/scsi/scsi_lib.c | 2 +- + drivers/scsi/scsi_priv.h | 1 - + drivers/scsi/scsi_scan.c | 4 ++-- + drivers/scsi/scsi_sysfs.c | 4 ++-- + drivers/scsi/smartpqi/smartpqi_init.c | 2 +- + drivers/scsi/storvsc_drv.c | 2 +- + drivers/scsi/virtio_scsi.c | 2 +- + include/scsi/scsi_host.h | 2 +- + 11 files changed, 12 insertions(+), 13 deletions(-) + +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index 8cc8268327f0c..b348f77b91231 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -4678,7 +4678,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) + } + + spin_unlock_irqrestore(ap->lock, flags); +- scsi_rescan_device(&(sdev->sdev_gendev)); ++ scsi_rescan_device(sdev); + scsi_device_put(sdev); + spin_lock_irqsave(ap->lock, flags); + } +diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c +index 3f062e4013ab6..013a9a334972e 100644 +--- a/drivers/scsi/aacraid/commsup.c ++++ b/drivers/scsi/aacraid/commsup.c +@@ -1451,7 +1451,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) + #endif + break; + } +- scsi_rescan_device(&device->sdev_gendev); ++ scsi_rescan_device(device); + break; + + default: +diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c +index 05d3ce9b72dba..c4acf65379d20 100644 +--- a/drivers/scsi/mvumi.c ++++ b/drivers/scsi/mvumi.c +@@ -1500,7 +1500,7 @@ static void mvumi_rescan_devices(struct mvumi_hba *mhba, int id) + + sdev = scsi_device_lookup(mhba->shost, 0, id, 0); + if (sdev) { +- scsi_rescan_device(&sdev->sdev_gendev); ++ scsi_rescan_device(sdev); + scsi_device_put(sdev); + } + } +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index fb6e9a7a7f58b..d25e1c2472538 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -2445,7 +2445,7 @@ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt) + envp[idx++] = "SDEV_MEDIA_CHANGE=1"; + break; + case SDEV_EVT_INQUIRY_CHANGE_REPORTED: +- scsi_rescan_device(&sdev->sdev_gendev); ++ scsi_rescan_device(sdev); + envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED"; + break; + case SDEV_EVT_CAPACITY_CHANGE_REPORTED: +diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h +index c52de9a973e46..b14545acb40f5 100644 +--- a/drivers/scsi/scsi_priv.h ++++ b/drivers/scsi/scsi_priv.h +@@ -132,7 +132,6 @@ extern int scsi_complete_async_scans(void); + extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, + unsigned int, u64, enum scsi_scan_mode); + extern void scsi_forget_host(struct Scsi_Host *); +-extern void scsi_rescan_device(struct device *); + + /* scsi_sysctl.c */ + #ifdef CONFIG_SYSCTL +diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c +index d12f2dcb4040a..445989f44d3f2 100644 +--- a/drivers/scsi/scsi_scan.c ++++ b/drivers/scsi/scsi_scan.c +@@ -1611,9 +1611,9 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, + } + EXPORT_SYMBOL(scsi_add_device); + +-void scsi_rescan_device(struct device *dev) ++void scsi_rescan_device(struct scsi_device *sdev) + { +- struct scsi_device *sdev = to_scsi_device(dev); ++ struct device *dev = &sdev->sdev_gendev; + + device_lock(dev); + +diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c +index cac7c902cf70a..1f531063d6331 100644 +--- a/drivers/scsi/scsi_sysfs.c ++++ b/drivers/scsi/scsi_sysfs.c +@@ -762,7 +762,7 @@ static ssize_t + store_rescan_field (struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) + { +- scsi_rescan_device(dev); ++ scsi_rescan_device(to_scsi_device(dev)); + return count; + } + static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field); +@@ -855,7 +855,7 @@ store_state_field(struct device *dev, struct device_attribute *attr, + * waiting for pending I/O to finish. + */ + blk_mq_run_hw_queues(sdev->request_queue, true); +- scsi_rescan_device(dev); ++ scsi_rescan_device(sdev); + } + + return ret == 0 ? count : -EINVAL; +diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c +index 9f0f69c1ed665..47d487729635c 100644 +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2278,7 +2278,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, + device->advertised_queue_depth = device->queue_depth; + scsi_change_queue_depth(device->sdev, device->advertised_queue_depth); + if (device->rescan) { +- scsi_rescan_device(&device->sdev->sdev_gendev); ++ scsi_rescan_device(device->sdev); + device->rescan = false; + } + } +diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c +index 7a1dc5c7c49ee..c2d981d5a2dd5 100644 +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -471,7 +471,7 @@ static void storvsc_device_scan(struct work_struct *work) + sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); + if (!sdev) + goto done; +- scsi_rescan_device(&sdev->sdev_gendev); ++ scsi_rescan_device(sdev); + scsi_device_put(sdev); + + done: +diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c +index 2a79ab16134b1..3f8c553f3d91e 100644 +--- a/drivers/scsi/virtio_scsi.c ++++ b/drivers/scsi/virtio_scsi.c +@@ -325,7 +325,7 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi, + /* Handle "Parameters changed", "Mode parameters changed", and + "Capacity data has changed". */ + if (asc == 0x2a && (ascq == 0x00 || ascq == 0x01 || ascq == 0x09)) +- scsi_rescan_device(&sdev->sdev_gendev); ++ scsi_rescan_device(sdev); + + scsi_device_put(sdev); + } +diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h +index d27d9fb7174c8..16848def47a1d 100644 +--- a/include/scsi/scsi_host.h ++++ b/include/scsi/scsi_host.h +@@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, + struct device *, + struct device *); + extern void scsi_scan_host(struct Scsi_Host *); +-extern void scsi_rescan_device(struct device *); ++extern void scsi_rescan_device(struct scsi_device *); + extern void scsi_remove_host(struct Scsi_Host *); + extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); + extern int scsi_host_busy(struct Scsi_Host *shost); +-- +2.40.1 + diff --git a/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch b/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch new file mode 100644 index 00000000000..2ccde96961d --- /dev/null +++ b/queue-6.1/scsi-do-not-attempt-to-rescan-suspended-devices.patch @@ -0,0 +1,98 @@ +From bc1853103b803f47e566a48893bfdac0dcd86f99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 15:00:13 +0900 +Subject: scsi: Do not attempt to rescan suspended devices + +From: Damien Le Moal + +[ Upstream commit ff48b37802e5c134e2dfc4d091f10b2eb5065a72 ] + +scsi_rescan_device() takes a scsi device lock before executing a device +handler and device driver rescan methods. Waiting for the completion of +any command issued to the device by these methods will thus be done with +the device lock held. As a result, there is a risk of deadlocking within +the power management code if scsi_rescan_device() is called to handle a +device resume with the associated scsi device not yet resumed. + +Avoid such situation by checking that the target scsi device is in the +running state, that is, fully capable of executing commands, before +proceeding with the rescan and bailout returning -EWOULDBLOCK otherwise. +With this error return, the caller can retry rescaning the device after +a delay. + +The state check is done with the device lock held and is thus safe +against incoming suspend power management operations. + +Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Niklas Cassel +Tested-by: Geert Uytterhoeven +Reviewed-by: Martin K. Petersen +Reviewed-by: Bart Van Assche +Stable-dep-of: 8b4d9469d0b0 ("ata: libata-scsi: Fix delayed scsi_rescan_device() execution") +Signed-off-by: Sasha Levin +--- + drivers/scsi/scsi_scan.c | 18 +++++++++++++++++- + include/scsi/scsi_host.h | 2 +- + 2 files changed, 18 insertions(+), 2 deletions(-) + +diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c +index 445989f44d3f2..ed26c52ed8474 100644 +--- a/drivers/scsi/scsi_scan.c ++++ b/drivers/scsi/scsi_scan.c +@@ -1611,12 +1611,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, + } + EXPORT_SYMBOL(scsi_add_device); + +-void scsi_rescan_device(struct scsi_device *sdev) ++int scsi_rescan_device(struct scsi_device *sdev) + { + struct device *dev = &sdev->sdev_gendev; ++ int ret = 0; + + device_lock(dev); + ++ /* ++ * Bail out if the device is not running. Otherwise, the rescan may ++ * block waiting for commands to be executed, with us holding the ++ * device lock. This can result in a potential deadlock in the power ++ * management core code when system resume is on-going. ++ */ ++ if (sdev->sdev_state != SDEV_RUNNING) { ++ ret = -EWOULDBLOCK; ++ goto unlock; ++ } ++ + scsi_attach_vpd(sdev); + + if (sdev->handler && sdev->handler->rescan) +@@ -1629,7 +1641,11 @@ void scsi_rescan_device(struct scsi_device *sdev) + drv->rescan(dev); + module_put(dev->driver->owner); + } ++ ++unlock: + device_unlock(dev); ++ ++ return ret; + } + EXPORT_SYMBOL(scsi_rescan_device); + +diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h +index 16848def47a1d..71def41b1ad78 100644 +--- a/include/scsi/scsi_host.h ++++ b/include/scsi/scsi_host.h +@@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, + struct device *, + struct device *); + extern void scsi_scan_host(struct Scsi_Host *); +-extern void scsi_rescan_device(struct scsi_device *); ++extern int scsi_rescan_device(struct scsi_device *sdev); + extern void scsi_remove_host(struct Scsi_Host *); + extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); + extern int scsi_host_busy(struct Scsi_Host *shost); +-- +2.40.1 + diff --git a/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch b/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch new file mode 100644 index 00000000000..1232bed5888 --- /dev/null +++ b/queue-6.1/scsi-sd-differentiate-system-and-runtime-start-stop-.patch @@ -0,0 +1,301 @@ +From 3b3411e173405c039b240cffc037812620e3b00b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 10:02:41 +0900 +Subject: scsi: sd: Differentiate system and runtime start/stop management + +From: Damien Le Moal + +[ Upstream commit 3cc2ffe5c16dc65dfac354bc5b5bc98d3b397567 ] + +The underlying device and driver of a SCSI disk may have different +system and runtime power mode control requirements. This is because +runtime power management affects only the SCSI disk, while system level +power management affects all devices, including the controller for the +SCSI disk. + +For instance, issuing a START STOP UNIT command when a SCSI disk is +runtime suspended and resumed is fine: the command is translated to a +STANDBY IMMEDIATE command to spin down the ATA disk and to a VERIFY +command to wake it up. The SCSI disk runtime operations have no effect +on the ata port device used to connect the ATA disk. However, for +system suspend/resume operations, the ATA port used to connect the +device will also be suspended and resumed, with the resume operation +requiring re-validating the device link and the device itself. In this +case, issuing a VERIFY command to spinup the disk must be done before +starting to revalidate the device, when the ata port is being resumed. +In such case, we must not allow the SCSI disk driver to issue START STOP +UNIT commands. + +Allow a low level driver to refine the SCSI disk start/stop management +by differentiating system and runtime cases with two new SCSI device +flags: manage_system_start_stop and manage_runtime_start_stop. These new +flags replace the current manage_start_stop flag. Drivers setting the +manage_start_stop are modifed to set both new flags, thus preserving the +existing start/stop management behavior. For backward compatibility, the +old manage_start_stop sysfs device attribute is kept as a read-only +attribute showing a value of 1 for devices enabling both new flags and 0 +otherwise. + +Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Tested-by: Geert Uytterhoeven +Reviewed-by: Martin K. Petersen +Stable-dep-of: 99398d2070ab ("scsi: sd: Do not issue commands to suspended disks on shutdown") +Signed-off-by: Sasha Levin +--- + drivers/ata/libata-scsi.c | 3 +- + drivers/firewire/sbp2.c | 9 ++-- + drivers/scsi/sd.c | 90 ++++++++++++++++++++++++++++++-------- + include/scsi/scsi_device.h | 5 ++- + 4 files changed, 84 insertions(+), 23 deletions(-) + +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index 9c8dd9f86cbb3..8cc8268327f0c 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -1087,7 +1087,8 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) + * will be woken up by ata_port_pm_resume() with a port reset + * and device revalidation. + */ +- sdev->manage_start_stop = 1; ++ sdev->manage_system_start_stop = true; ++ sdev->manage_runtime_start_stop = true; + sdev->no_start_on_resume = 1; + } + +diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c +index 60051c0cabeaa..e322a326546b5 100644 +--- a/drivers/firewire/sbp2.c ++++ b/drivers/firewire/sbp2.c +@@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " + * + * - power condition + * Set the power condition field in the START STOP UNIT commands sent by +- * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on). ++ * sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or ++ * manage_runtime_start_stop is on). + * Some disks need this to spin down or to resume properly. + * + * - override internal blacklist +@@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev) + + sdev->use_10_for_rw = 1; + +- if (sbp2_param_exclusive_login) +- sdev->manage_start_stop = 1; ++ if (sbp2_param_exclusive_login) { ++ sdev->manage_system_start_stop = true; ++ sdev->manage_runtime_start_stop = true; ++ } + + if (sdev->type == TYPE_ROM) + sdev->use_10_for_ms = 1; +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index 5bfca49415113..2ed57dfaf9ee0 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -213,18 +213,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr, + } + + static ssize_t +-manage_start_stop_show(struct device *dev, struct device_attribute *attr, +- char *buf) ++manage_start_stop_show(struct device *dev, ++ struct device_attribute *attr, char *buf) + { + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + +- return sprintf(buf, "%u\n", sdp->manage_start_stop); ++ return sysfs_emit(buf, "%u\n", ++ sdp->manage_system_start_stop && ++ sdp->manage_runtime_start_stop); + } ++static DEVICE_ATTR_RO(manage_start_stop); + + static ssize_t +-manage_start_stop_store(struct device *dev, struct device_attribute *attr, +- const char *buf, size_t count) ++manage_system_start_stop_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_disk *sdkp = to_scsi_disk(dev); ++ struct scsi_device *sdp = sdkp->device; ++ ++ return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); ++} ++ ++static ssize_t ++manage_system_start_stop_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) + { + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; +@@ -236,11 +250,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr, + if (kstrtobool(buf, &v)) + return -EINVAL; + +- sdp->manage_start_stop = v; ++ sdp->manage_system_start_stop = v; + + return count; + } +-static DEVICE_ATTR_RW(manage_start_stop); ++static DEVICE_ATTR_RW(manage_system_start_stop); ++ ++static ssize_t ++manage_runtime_start_stop_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_disk *sdkp = to_scsi_disk(dev); ++ struct scsi_device *sdp = sdkp->device; ++ ++ return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); ++} ++ ++static ssize_t ++manage_runtime_start_stop_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct scsi_disk *sdkp = to_scsi_disk(dev); ++ struct scsi_device *sdp = sdkp->device; ++ bool v; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ if (kstrtobool(buf, &v)) ++ return -EINVAL; ++ ++ sdp->manage_runtime_start_stop = v; ++ ++ return count; ++} ++static DEVICE_ATTR_RW(manage_runtime_start_stop); + + static ssize_t + allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) +@@ -572,6 +617,8 @@ static struct attribute *sd_disk_attrs[] = { + &dev_attr_FUA.attr, + &dev_attr_allow_restart.attr, + &dev_attr_manage_start_stop.attr, ++ &dev_attr_manage_system_start_stop.attr, ++ &dev_attr_manage_runtime_start_stop.attr, + &dev_attr_protection_type.attr, + &dev_attr_protection_mode.attr, + &dev_attr_app_tag_own.attr, +@@ -3652,13 +3699,20 @@ static void sd_shutdown(struct device *dev) + sd_sync_cache(sdkp, NULL); + } + +- if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { ++ if (system_state != SYSTEM_RESTART && ++ sdkp->device->manage_system_start_stop) { + sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + sd_start_stop_device(sdkp, 0); + } + } + +-static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) ++static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) ++{ ++ return (sdev->manage_system_start_stop && !runtime) || ++ (sdev->manage_runtime_start_stop && runtime); ++} ++ ++static int sd_suspend_common(struct device *dev, bool runtime) + { + struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_sense_hdr sshdr; +@@ -3690,12 +3744,12 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) + } + } + +- if (sdkp->device->manage_start_stop) { ++ if (sd_do_start_stop(sdkp->device, runtime)) { + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + /* an error is not worth aborting a system sleep */ + ret = sd_start_stop_device(sdkp, 0); +- if (ignore_stop_errors) ++ if (!runtime) + ret = 0; + } + +@@ -3707,23 +3761,23 @@ static int sd_suspend_system(struct device *dev) + if (pm_runtime_suspended(dev)) + return 0; + +- return sd_suspend_common(dev, true); ++ return sd_suspend_common(dev, false); + } + + static int sd_suspend_runtime(struct device *dev) + { +- return sd_suspend_common(dev, false); ++ return sd_suspend_common(dev, true); + } + +-static int sd_resume(struct device *dev) ++static int sd_resume(struct device *dev, bool runtime) + { + struct scsi_disk *sdkp = dev_get_drvdata(dev); +- int ret = 0; ++ int ret; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + +- if (!sdkp->device->manage_start_stop) ++ if (!sd_do_start_stop(sdkp->device, runtime)) + return 0; + + if (!sdkp->device->no_start_on_resume) { +@@ -3741,7 +3795,7 @@ static int sd_resume_system(struct device *dev) + if (pm_runtime_suspended(dev)) + return 0; + +- return sd_resume(dev); ++ return sd_resume(dev, false); + } + + static int sd_resume_runtime(struct device *dev) +@@ -3765,7 +3819,7 @@ static int sd_resume_runtime(struct device *dev) + "Failed to clear sense data\n"); + } + +- return sd_resume(dev); ++ return sd_resume(dev, true); + } + + /** +diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h +index 9fdc77db3a2a8..dc2cff18b68bd 100644 +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -161,6 +161,10 @@ struct scsi_device { + * pass settings from slave_alloc to scsi + * core. */ + unsigned int eh_timeout; /* Error handling timeout */ ++ ++ bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */ ++ bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */ ++ + unsigned removable:1; + unsigned changed:1; /* Data invalid due to media change */ + unsigned busy:1; /* Used to prevent races */ +@@ -192,7 +196,6 @@ struct scsi_device { + unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */ + unsigned no_start_on_add:1; /* do not issue start on add */ + unsigned allow_restart:1; /* issue START_UNIT in error handler */ +- unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */ + unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */ + unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */ + unsigned no_uld_attach:1; /* disable connecting to upper level drivers */ +-- +2.40.1 + diff --git a/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch b/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch new file mode 100644 index 00000000000..174000f2c6d --- /dev/null +++ b/queue-6.1/scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch @@ -0,0 +1,106 @@ +From 29c60a5e17517ecf23bd2258340ac055e6ef1832 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 17:03:15 +0900 +Subject: scsi: sd: Do not issue commands to suspended disks on shutdown + +From: Damien Le Moal + +[ Upstream commit 99398d2070ab03d13f90b758ad397e19a65fffb0 ] + +If an error occurs when resuming a host adapter before the devices +attached to the adapter are resumed, the adapter low level driver may +remove the scsi host, resulting in a call to sd_remove() for the +disks of the host. This in turn results in a call to sd_shutdown() which +will issue a synchronize cache command and a start stop unit command to +spindown the disk. sd_shutdown() issues the commands only if the device +is not already runtime suspended but does not check the power state for +system-wide suspend/resume. That is, the commands may be issued with the +device in a suspended state, which causes PM resume to hang, forcing a +reset of the machine to recover. + +Fix this by tracking the suspended state of a disk by introducing the +suspended boolean field in the scsi_disk structure. This flag is set to +true when the disk is suspended is sd_suspend_common() and resumed with +sd_resume(). When suspended is true, sd_shutdown() is not executed from +sd_remove(). + +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Bart Van Assche +Reviewed-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/sd.c | 17 +++++++++++++---- + drivers/scsi/sd.h | 1 + + 2 files changed, 14 insertions(+), 4 deletions(-) + +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index 2ed57dfaf9ee0..30184f7b762c1 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3626,7 +3626,8 @@ static int sd_remove(struct device *dev) + + device_del(&sdkp->disk_dev); + del_gendisk(sdkp->disk); +- sd_shutdown(dev); ++ if (!sdkp->suspended) ++ sd_shutdown(dev); + + put_disk(sdkp->disk); + return 0; +@@ -3753,6 +3754,9 @@ static int sd_suspend_common(struct device *dev, bool runtime) + ret = 0; + } + ++ if (!ret) ++ sdkp->suspended = true; ++ + return ret; + } + +@@ -3772,21 +3776,26 @@ static int sd_suspend_runtime(struct device *dev) + static int sd_resume(struct device *dev, bool runtime) + { + struct scsi_disk *sdkp = dev_get_drvdata(dev); +- int ret; ++ int ret = 0; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + +- if (!sd_do_start_stop(sdkp->device, runtime)) ++ if (!sd_do_start_stop(sdkp->device, runtime)) { ++ sdkp->suspended = false; + return 0; ++ } + + if (!sdkp->device->no_start_on_resume) { + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + ret = sd_start_stop_device(sdkp, 1); + } + +- if (!ret) ++ if (!ret) { + opal_unlock_from_suspend(sdkp->opal_dev); ++ sdkp->suspended = false; ++ } ++ + return ret; + } + +diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h +index 5eea762f84d18..409dda5350d10 100644 +--- a/drivers/scsi/sd.h ++++ b/drivers/scsi/sd.h +@@ -131,6 +131,7 @@ struct scsi_disk { + u8 provisioning_mode; + u8 zeroing_mode; + u8 nr_actuators; /* Number of actuators */ ++ bool suspended; /* Disk is suspended (stopped) */ + unsigned ATO : 1; /* state of disk ATO bit */ + unsigned cache_override : 1; /* temp override of WCE,RCD */ + unsigned WCE : 1; /* state of disk WCE bit */ +-- +2.40.1 + diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..267d52f1263 --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,41 @@ +spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch +spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch +alsa-hda-tas2781-add-tas2781-hda-driver.patch +alsa-hda-realtek-add-quirk-for-hp-victus-16-d1xxx-to.patch +alsa-hda-realtek-add-quirk-for-mute-leds-on-hp-envy-.patch +alsa-hda-realtek-alc287-i2s-speaker-platform-support.patch +alsa-hda-realtek-alc287-realtek-i2s-speaker-platform.patch +asoc-soc-utils-export-snd_soc_dai_is_dummy-symbol.patch +asoc-tegra-fix-redundant-plla-and-plla_out0-updates.patch +maple_tree-remove-the-redundant-code.patch +maple_tree-relocate-the-declaration-of-mas_empty_are.patch +maple_tree-add-mas_is_active-to-detect-in-tree-walks.patch +mptcp-rename-timer-related-helper-to-less-confusing-.patch +mptcp-fix-dangling-connection-hang-up.patch +mptcp-annotate-lockless-accesses-to-sk-sk_err.patch +mptcp-move-__mptcp_error_report-in-protocol.c.patch +mptcp-process-pending-subflow-error-on-close.patch +ata-scsi-do-not-issue-start-stop-unit-on-resume.patch +scsi-sd-differentiate-system-and-runtime-start-stop-.patch +scsi-sd-do-not-issue-commands-to-suspended-disks-on-.patch +scsi-core-improve-type-safety-of-scsi_rescan_device.patch +scsi-do-not-attempt-to-rescan-suspended-devices.patch +ata-libata-scsi-fix-delayed-scsi_rescan_device-execu.patch +nfs-cleanup-unused-rpc_clnt-variable.patch +nfs-rename-nfs_client_kset-to-nfs_kset.patch +nfsv4-fix-a-state-manager-thread-deadlock-regression.patch +mm-memory-add-vm_normal_folio.patch +mm-mempolicy-convert-queue_pages_pmd-to-queue_folios.patch +mm-mempolicy-convert-queue_pages_pte_range-to-queue_.patch +mm-mempolicy-convert-migrate_page_add-to-migrate_fol.patch +mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-an.patch +mm-page_alloc-always-remove-pages-from-temporary-lis.patch +mm-page_alloc-leave-irqs-enabled-for-per-cpu-page-al.patch +mm-page_alloc-fix-cma-and-highatomic-landing-on-the-.patch +ring-buffer-remove-obsolete-comment-for-free_buffer_.patch +ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch +btrfs-use-struct-qstr-instead-of-name-and-namelen-pa.patch +btrfs-setup-qstr-from-dentrys-using-fscrypt-helper.patch +btrfs-use-struct-fscrypt_str-instead-of-struct-qstr.patch +revert-nfsv4-retry-lock-on-old_stateid-during-delega.patch +arm64-avoid-repeated-aa64mmfr1_el1-register-read-on-.patch diff --git a/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch b/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch new file mode 100644 index 00000000000..1684da509cd --- /dev/null +++ b/queue-6.1/spi-zynqmp-gqspi-convert-to-platform-remove-callback.patch @@ -0,0 +1,66 @@ +From a0dd03214a34d747df5a47cacec49c4693890454 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 18:20:41 +0100 +Subject: spi: zynqmp-gqspi: Convert to platform remove callback returning void +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +[ Upstream commit 3ffefa1d9c9eba60c7f8b4a9ce2df3e4c7f4a88e ] + +The .remove() callback for a platform driver returns an int which makes +many driver authors wrongly assume it's possible to do error handling by +returning an error code. However the value returned is (mostly) ignored +and this typically results in resource leaks. To improve here there is a +quest to make the remove callback return void. In the first step of this +quest all drivers are converted to .remove_new() which already returns +void. + +Trivially convert this driver from always returning zero in the remove +callback to the void returning variant. + +Signed-off-by: Uwe Kleine-König +Link: https://lore.kernel.org/r/20230303172041.2103336-88-u.kleine-koenig@pengutronix.de +Signed-off-by: Mark Brown +Stable-dep-of: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-zynqmp-gqspi.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c +index c760aac070e54..876a41c5d1664 100644 +--- a/drivers/spi/spi-zynqmp-gqspi.c ++++ b/drivers/spi/spi-zynqmp-gqspi.c +@@ -1240,7 +1240,7 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) + * + * Return: 0 Always + */ +-static int zynqmp_qspi_remove(struct platform_device *pdev) ++static void zynqmp_qspi_remove(struct platform_device *pdev) + { + struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + +@@ -1249,8 +1249,6 @@ static int zynqmp_qspi_remove(struct platform_device *pdev) + clk_disable_unprepare(xqspi->pclk); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_disable(&pdev->dev); +- +- return 0; + } + + static const struct of_device_id zynqmp_qspi_of_match[] = { +@@ -1262,7 +1260,7 @@ MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match); + + static struct platform_driver zynqmp_qspi_driver = { + .probe = zynqmp_qspi_probe, +- .remove = zynqmp_qspi_remove, ++ .remove_new = zynqmp_qspi_remove, + .driver = { + .name = "zynqmp-qspi", + .of_match_table = zynqmp_qspi_of_match, +-- +2.40.1 + diff --git a/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch b/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch new file mode 100644 index 00000000000..c3a7d52266a --- /dev/null +++ b/queue-6.1/spi-zynqmp-gqspi-fix-clock-imbalance-on-probe-failur.patch @@ -0,0 +1,62 @@ +From bc5eee6846668dfa31bf34227ce977ed041fd14d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Jun 2023 10:24:35 +0200 +Subject: spi: zynqmp-gqspi: fix clock imbalance on probe failure + +From: Johan Hovold + +[ Upstream commit 1527b076ae2cb6a9c590a02725ed39399fcad1cf ] + +Make sure that the device is not runtime suspended before explicitly +disabling the clocks on probe failure and on driver unbind to avoid a +clock enable-count imbalance. + +Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") +Cc: stable@vger.kernel.org # 4.19 +Cc: Naga Sureshkumar Relli +Cc: Shubhrajyoti Datta +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro@kernel.org> +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-zynqmp-gqspi.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c +index 876a41c5d1664..f2dcd1ae77c7d 100644 +--- a/drivers/spi/spi-zynqmp-gqspi.c ++++ b/drivers/spi/spi-zynqmp-gqspi.c +@@ -1218,9 +1218,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) + return 0; + + clk_dis_all: +- pm_runtime_put_sync(&pdev->dev); +- pm_runtime_set_suspended(&pdev->dev); + pm_runtime_disable(&pdev->dev); ++ pm_runtime_put_noidle(&pdev->dev); ++ pm_runtime_set_suspended(&pdev->dev); + clk_disable_unprepare(xqspi->refclk); + clk_dis_pclk: + clk_disable_unprepare(xqspi->pclk); +@@ -1244,11 +1244,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) + { + struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + ++ pm_runtime_get_sync(&pdev->dev); ++ + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); ++ ++ pm_runtime_disable(&pdev->dev); ++ pm_runtime_put_noidle(&pdev->dev); ++ pm_runtime_set_suspended(&pdev->dev); + clk_disable_unprepare(xqspi->refclk); + clk_disable_unprepare(xqspi->pclk); +- pm_runtime_set_suspended(&pdev->dev); +- pm_runtime_disable(&pdev->dev); + } + + static const struct of_device_id zynqmp_qspi_of_match[] = { +-- +2.40.1 +