From 259cd27e88c4a91f69a83f26c53f0668841f658e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 1 Oct 2022 18:03:39 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- ...orrect-number-of-sdo-lines-for-tegra.patch | 94 +++++++++ ...tegra-fix-tegra194-hda-reset-failure.patch | 129 ++++++++++++ queue-5.4/alsa-hda-tegra-reset-hardware.patch | 86 ++++++++ .../alsa-hda-tegra-use-clk_bulk-helpers.patch | 143 +++++++++++++ ...uring-unmount-when-stopping-a-space-.patch | 165 +++++++++++++++ ...umber-of-retries-after-discarding-pr.patch | 80 ++++++++ ...roduce-pcpu-seqcnt-for-freeing-pa-to.patch | 194 ++++++++++++++++++ ...actor-ext4_mb_discard_preallocations.patch | 68 ++++++ queue-5.4/series | 8 + 9 files changed, 967 insertions(+) create mode 100644 queue-5.4/alsa-hda-tegra-correct-number-of-sdo-lines-for-tegra.patch create mode 100644 queue-5.4/alsa-hda-tegra-fix-tegra194-hda-reset-failure.patch create mode 100644 queue-5.4/alsa-hda-tegra-reset-hardware.patch create mode 100644 queue-5.4/alsa-hda-tegra-use-clk_bulk-helpers.patch create mode 100644 queue-5.4/btrfs-fix-hang-during-unmount-when-stopping-a-space-.patch create mode 100644 queue-5.4/ext4-limit-the-number-of-retries-after-discarding-pr.patch create mode 100644 queue-5.4/ext4-mballoc-introduce-pcpu-seqcnt-for-freeing-pa-to.patch create mode 100644 queue-5.4/ext4-mballoc-refactor-ext4_mb_discard_preallocations.patch create mode 100644 queue-5.4/series diff --git a/queue-5.4/alsa-hda-tegra-correct-number-of-sdo-lines-for-tegra.patch b/queue-5.4/alsa-hda-tegra-correct-number-of-sdo-lines-for-tegra.patch new file mode 100644 index 00000000000..dcf0dc13517 --- /dev/null +++ b/queue-5.4/alsa-hda-tegra-correct-number-of-sdo-lines-for-tegra.patch @@ -0,0 +1,94 @@ +From d6cfbd60528f3bdbf9000b939d7ff2e2eb994ecb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 May 2020 13:46:14 +0530 +Subject: ALSA: hda/tegra: correct number of SDO lines for Tegra194 + +From: Sameer Pujar + +[ Upstream commit bb9b02a4589cee66cdb92eb9b7191d6557afdd6f ] + +Tegra194 supports 4 SDO lines but GCAP register indicates 2 lines. Thus it +does not reflect the true capability of the HW. This patch presents a +workaround by updating NSDO value accordingly in T_AZA_DBG_CFG_2 register. + +Signed-off-by: Sameer Pujar +Link: https://lore.kernel.org/r/1588580176-2801-2-git-send-email-spujar@nvidia.com +Signed-off-by: Takashi Iwai +Stable-dep-of: f89e409402e2 ("ALSA: hda: Fix Nvidia dp infoframe") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/hda_tegra.c | 33 +++++++++++++++++++++++++++++++++ + 1 file changed, 33 insertions(+) + +diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c +index e235c3ec634d..6d9448c253dc 100644 +--- a/sound/pci/hda/hda_tegra.c ++++ b/sound/pci/hda/hda_tegra.c +@@ -52,10 +52,21 @@ + #define HDA_IPFS_INTR_MASK 0x188 + #define HDA_IPFS_EN_INTR (1 << 16) + ++/* FPCI */ ++#define FPCI_DBG_CFG_2 0x10F4 ++#define FPCI_GCAP_NSDO_SHIFT 18 ++#define FPCI_GCAP_NSDO_MASK (0x3 << FPCI_GCAP_NSDO_SHIFT) ++ + /* max number of SDs */ + #define NUM_CAPTURE_SD 1 + #define NUM_PLAYBACK_SD 1 + ++/* ++ * Tegra194 does not reflect correct number of SDO lines. Below macro ++ * is used to update the GCAP register to workaround the issue. ++ */ ++#define TEGRA194_NUM_SDO_LINES 4 ++ + struct hda_tegra { + struct azx chip; + struct device *dev; +@@ -284,6 +295,7 @@ static int hda_tegra_init_clk(struct hda_tegra *hda) + + static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) + { ++ struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + struct hdac_bus *bus = azx_bus(chip); + struct snd_card *card = chip->card; + int err; +@@ -311,6 +323,26 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) + + synchronize_irq(bus->irq); + ++ /* ++ * Tegra194 has 4 SDO lines and the STRIPE can be used to ++ * indicate how many of the SDO lines the stream should be ++ * striped. But GCAP register does not reflect the true ++ * capability of HW. Below workaround helps to fix this. ++ * ++ * GCAP_NSDO is bits 19:18 in T_AZA_DBG_CFG_2, ++ * 0 for 1 SDO, 1 for 2 SDO, 2 for 4 SDO lines. ++ */ ++ if (of_device_is_compatible(np, "nvidia,tegra194-hda")) { ++ u32 val; ++ ++ dev_info(card->dev, "Override SDO lines to %u\n", ++ TEGRA194_NUM_SDO_LINES); ++ ++ val = readl(hda->regs + FPCI_DBG_CFG_2) & ~FPCI_GCAP_NSDO_MASK; ++ val |= (TEGRA194_NUM_SDO_LINES >> 1) << FPCI_GCAP_NSDO_SHIFT; ++ writel(val, hda->regs + FPCI_DBG_CFG_2); ++ } ++ + gcap = azx_readw(chip, GCAP); + dev_dbg(card->dev, "chipset global capabilities = 0x%x\n", gcap); + +@@ -421,6 +453,7 @@ static int hda_tegra_create(struct snd_card *card, + + static const struct of_device_id hda_tegra_match[] = { + { .compatible = "nvidia,tegra30-hda" }, ++ { .compatible = "nvidia,tegra194-hda" }, + {}, + }; + MODULE_DEVICE_TABLE(of, hda_tegra_match); +-- +2.35.1 + diff --git a/queue-5.4/alsa-hda-tegra-fix-tegra194-hda-reset-failure.patch b/queue-5.4/alsa-hda-tegra-fix-tegra194-hda-reset-failure.patch new file mode 100644 index 00000000000..d989aaae911 --- /dev/null +++ b/queue-5.4/alsa-hda-tegra-fix-tegra194-hda-reset-failure.patch @@ -0,0 +1,129 @@ +From d3b0d144d758e0a1896a2e42265e75805be35c91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 17:23:49 +0530 +Subject: ALSA: hda/tegra: Fix Tegra194 HDA reset failure + +From: Sameer Pujar + +[ Upstream commit d278dc9151a034674b31ffeda24cdfb0073570f3 ] + +HDA regression is recently reported on Tegra194 based platforms. +This happens because "hda2codec_2x" reset does not really exist +in Tegra194 and it causes probe failure. All the HDA based audio +tests fail at the moment. This underlying issue is exposed by +commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP +response") which now checks return code of BPMP command response. +Fix this issue by skipping unavailable reset on Tegra194. + +Cc: stable@vger.kernel.org +Signed-off-by: Sameer Pujar +Reviewed-by: Dmitry Osipenko +Link: https://lore.kernel.org/r/1640260431-11613-2-git-send-email-spujar@nvidia.com +Signed-off-by: Takashi Iwai +Stable-dep-of: f89e409402e2 ("ALSA: hda: Fix Nvidia dp infoframe") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/hda_tegra.c | 43 +++++++++++++++++++++++++++++++-------- + 1 file changed, 34 insertions(+), 9 deletions(-) + +diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c +index bfd1341f6681..9118e89e8f11 100644 +--- a/sound/pci/hda/hda_tegra.c ++++ b/sound/pci/hda/hda_tegra.c +@@ -68,14 +68,20 @@ + */ + #define TEGRA194_NUM_SDO_LINES 4 + ++struct hda_tegra_soc { ++ bool has_hda2codec_2x_reset; ++}; ++ + struct hda_tegra { + struct azx chip; + struct device *dev; +- struct reset_control *reset; ++ struct reset_control_bulk_data resets[3]; + struct clk_bulk_data clocks[3]; ++ unsigned int nresets; + unsigned int nclocks; + void __iomem *regs; + struct work_struct probe_work; ++ const struct hda_tegra_soc *soc; + }; + + #ifdef CONFIG_PM +@@ -172,7 +178,7 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev) + int rc; + + if (!chip->running) { +- rc = reset_control_assert(hda->reset); ++ rc = reset_control_bulk_assert(hda->nresets, hda->resets); + if (rc) + return rc; + } +@@ -189,7 +195,7 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev) + } else { + usleep_range(10, 100); + +- rc = reset_control_deassert(hda->reset); ++ rc = reset_control_bulk_deassert(hda->nresets, hda->resets); + if (rc) + return rc; + } +@@ -411,9 +417,17 @@ static int hda_tegra_create(struct snd_card *card, + return 0; + } + ++static const struct hda_tegra_soc tegra30_data = { ++ .has_hda2codec_2x_reset = true, ++}; ++ ++static const struct hda_tegra_soc tegra194_data = { ++ .has_hda2codec_2x_reset = false, ++}; ++ + static const struct of_device_id hda_tegra_match[] = { +- { .compatible = "nvidia,tegra30-hda" }, +- { .compatible = "nvidia,tegra194-hda" }, ++ { .compatible = "nvidia,tegra30-hda", .data = &tegra30_data }, ++ { .compatible = "nvidia,tegra194-hda", .data = &tegra194_data }, + {}, + }; + MODULE_DEVICE_TABLE(of, hda_tegra_match); +@@ -434,6 +448,8 @@ static int hda_tegra_probe(struct platform_device *pdev) + hda->dev = &pdev->dev; + chip = &hda->chip; + ++ hda->soc = of_device_get_match_data(&pdev->dev); ++ + err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, + THIS_MODULE, 0, &card); + if (err < 0) { +@@ -441,11 +457,20 @@ static int hda_tegra_probe(struct platform_device *pdev) + return err; + } + +- hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev); +- if (IS_ERR(hda->reset)) { +- err = PTR_ERR(hda->reset); ++ hda->resets[hda->nresets++].id = "hda"; ++ hda->resets[hda->nresets++].id = "hda2hdmi"; ++ /* ++ * "hda2codec_2x" reset is not present on Tegra194. Though DT would ++ * be updated to reflect this, but to have backward compatibility ++ * below is necessary. ++ */ ++ if (hda->soc->has_hda2codec_2x_reset) ++ hda->resets[hda->nresets++].id = "hda2codec_2x"; ++ ++ err = devm_reset_control_bulk_get_exclusive(&pdev->dev, hda->nresets, ++ hda->resets); ++ if (err) + goto out_free; +- } + + hda->clocks[hda->nclocks++].id = "hda"; + hda->clocks[hda->nclocks++].id = "hda2hdmi"; +-- +2.35.1 + diff --git a/queue-5.4/alsa-hda-tegra-reset-hardware.patch b/queue-5.4/alsa-hda-tegra-reset-hardware.patch new file mode 100644 index 00000000000..c6985d717fe --- /dev/null +++ b/queue-5.4/alsa-hda-tegra-reset-hardware.patch @@ -0,0 +1,86 @@ +From bf56a3ac8724b32e1b2c43918bc5622abb5e45f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jan 2021 03:31:50 +0300 +Subject: ALSA: hda/tegra: Reset hardware + +From: Dmitry Osipenko + +[ Upstream commit 87f0e46e7559beb6f1d1ff99f8f48b1b9d86db52 ] + +Reset hardware on RPM-resume in order to bring it into a predictable +state. + +Tested-by: Peter Geis # Ouya T30 audio works +Tested-by: Matt Merhar # Ouya T30 boot-tested +Tested-by: Nicolas Chauvet # TK1 boot-tested +Signed-off-by: Dmitry Osipenko +Link: https://lore.kernel.org/r/20210120003154.26749-3-digetx@gmail.com +Signed-off-by: Takashi Iwai +Stable-dep-of: f89e409402e2 ("ALSA: hda: Fix Nvidia dp infoframe") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/hda_tegra.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c +index 0c039b5033a1..bfd1341f6681 100644 +--- a/sound/pci/hda/hda_tegra.c ++++ b/sound/pci/hda/hda_tegra.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -70,6 +71,7 @@ + struct hda_tegra { + struct azx chip; + struct device *dev; ++ struct reset_control *reset; + struct clk_bulk_data clocks[3]; + unsigned int nclocks; + void __iomem *regs; +@@ -169,6 +171,12 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev) + struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + int rc; + ++ if (!chip->running) { ++ rc = reset_control_assert(hda->reset); ++ if (rc) ++ return rc; ++ } ++ + rc = clk_bulk_prepare_enable(hda->nclocks, hda->clocks); + if (rc != 0) + return rc; +@@ -178,6 +186,12 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev) + /* disable controller wake up event*/ + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & + ~STATESTS_INT_MASK); ++ } else { ++ usleep_range(10, 100); ++ ++ rc = reset_control_deassert(hda->reset); ++ if (rc) ++ return rc; + } + + return 0; +@@ -427,6 +441,12 @@ static int hda_tegra_probe(struct platform_device *pdev) + return err; + } + ++ hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev); ++ if (IS_ERR(hda->reset)) { ++ err = PTR_ERR(hda->reset); ++ goto out_free; ++ } ++ + hda->clocks[hda->nclocks++].id = "hda"; + hda->clocks[hda->nclocks++].id = "hda2hdmi"; + hda->clocks[hda->nclocks++].id = "hda2codec_2x"; +-- +2.35.1 + diff --git a/queue-5.4/alsa-hda-tegra-use-clk_bulk-helpers.patch b/queue-5.4/alsa-hda-tegra-use-clk_bulk-helpers.patch new file mode 100644 index 00000000000..42cd2126a1d --- /dev/null +++ b/queue-5.4/alsa-hda-tegra-use-clk_bulk-helpers.patch @@ -0,0 +1,143 @@ +From bcacdb6e88b8a2e1769287dcefed7beec6e78448 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jan 2021 03:31:49 +0300 +Subject: ALSA: hda/tegra: Use clk_bulk helpers + +From: Dmitry Osipenko + +[ Upstream commit 3a465f027a33cbd2af74f882ad41729583195e8f ] + +Use clk_bulk helpers to make code cleaner. Note that this patch changed +the order in which clocks are enabled to make code look nicer, but this +doesn't matter in terms of hardware. + +Tested-by: Peter Geis # Ouya T30 audio works +Tested-by: Matt Merhar # Ouya T30 boot-tested +Tested-by: Nicolas Chauvet # TK1 boot-tested +Acked-by: Thierry Reding +Signed-off-by: Dmitry Osipenko +Link: https://lore.kernel.org/r/20210120003154.26749-2-digetx@gmail.com +Signed-off-by: Takashi Iwai +Stable-dep-of: f89e409402e2 ("ALSA: hda: Fix Nvidia dp infoframe") +Signed-off-by: Sasha Levin +--- + sound/pci/hda/hda_tegra.c | 68 ++++++--------------------------------- + 1 file changed, 9 insertions(+), 59 deletions(-) + +diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c +index 6d9448c253dc..0c039b5033a1 100644 +--- a/sound/pci/hda/hda_tegra.c ++++ b/sound/pci/hda/hda_tegra.c +@@ -70,9 +70,8 @@ + struct hda_tegra { + struct azx chip; + struct device *dev; +- struct clk *hda_clk; +- struct clk *hda2codec_2x_clk; +- struct clk *hda2hdmi_clk; ++ struct clk_bulk_data clocks[3]; ++ unsigned int nclocks; + void __iomem *regs; + struct work_struct probe_work; + }; +@@ -113,36 +112,6 @@ static void hda_tegra_init(struct hda_tegra *hda) + writel(v, hda->regs + HDA_IPFS_INTR_MASK); + } + +-static int hda_tegra_enable_clocks(struct hda_tegra *data) +-{ +- int rc; +- +- rc = clk_prepare_enable(data->hda_clk); +- if (rc) +- return rc; +- rc = clk_prepare_enable(data->hda2codec_2x_clk); +- if (rc) +- goto disable_hda; +- rc = clk_prepare_enable(data->hda2hdmi_clk); +- if (rc) +- goto disable_codec_2x; +- +- return 0; +- +-disable_codec_2x: +- clk_disable_unprepare(data->hda2codec_2x_clk); +-disable_hda: +- clk_disable_unprepare(data->hda_clk); +- return rc; +-} +- +-static void hda_tegra_disable_clocks(struct hda_tegra *data) +-{ +- clk_disable_unprepare(data->hda2hdmi_clk); +- clk_disable_unprepare(data->hda2codec_2x_clk); +- clk_disable_unprepare(data->hda_clk); +-} +- + /* + * power management + */ +@@ -188,7 +157,7 @@ static int __maybe_unused hda_tegra_runtime_suspend(struct device *dev) + synchronize_irq(bus->irq); + azx_enter_link_reset(chip); + } +- hda_tegra_disable_clocks(hda); ++ clk_bulk_disable_unprepare(hda->nclocks, hda->clocks); + + return 0; + } +@@ -200,7 +169,7 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev) + struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + int rc; + +- rc = hda_tegra_enable_clocks(hda); ++ rc = clk_bulk_prepare_enable(hda->nclocks, hda->clocks); + if (rc != 0) + return rc; + if (chip && chip->running) { +@@ -270,29 +239,6 @@ static int hda_tegra_init_chip(struct azx *chip, struct platform_device *pdev) + return 0; + } + +-static int hda_tegra_init_clk(struct hda_tegra *hda) +-{ +- struct device *dev = hda->dev; +- +- hda->hda_clk = devm_clk_get(dev, "hda"); +- if (IS_ERR(hda->hda_clk)) { +- dev_err(dev, "failed to get hda clock\n"); +- return PTR_ERR(hda->hda_clk); +- } +- hda->hda2codec_2x_clk = devm_clk_get(dev, "hda2codec_2x"); +- if (IS_ERR(hda->hda2codec_2x_clk)) { +- dev_err(dev, "failed to get hda2codec_2x clock\n"); +- return PTR_ERR(hda->hda2codec_2x_clk); +- } +- hda->hda2hdmi_clk = devm_clk_get(dev, "hda2hdmi"); +- if (IS_ERR(hda->hda2hdmi_clk)) { +- dev_err(dev, "failed to get hda2hdmi clock\n"); +- return PTR_ERR(hda->hda2hdmi_clk); +- } +- +- return 0; +-} +- + static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) + { + struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); +@@ -481,7 +427,11 @@ static int hda_tegra_probe(struct platform_device *pdev) + return err; + } + +- err = hda_tegra_init_clk(hda); ++ hda->clocks[hda->nclocks++].id = "hda"; ++ hda->clocks[hda->nclocks++].id = "hda2hdmi"; ++ hda->clocks[hda->nclocks++].id = "hda2codec_2x"; ++ ++ err = devm_clk_bulk_get(&pdev->dev, hda->nclocks, hda->clocks); + if (err < 0) + goto out_free; + +-- +2.35.1 + diff --git a/queue-5.4/btrfs-fix-hang-during-unmount-when-stopping-a-space-.patch b/queue-5.4/btrfs-fix-hang-during-unmount-when-stopping-a-space-.patch new file mode 100644 index 00000000000..458a2a932d7 --- /dev/null +++ b/queue-5.4/btrfs-fix-hang-during-unmount-when-stopping-a-space-.patch @@ -0,0 +1,165 @@ +From 501dfd82587fd41fb4207c0bbe9dce6c0778522e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Sep 2022 12:31:51 +0100 +Subject: btrfs: fix hang during unmount when stopping a space reclaim worker + +From: Filipe Manana + +[ Upstream commit a362bb864b8db4861977d00bd2c3222503ccc34b ] + +Often when running generic/562 from fstests we can hang during unmount, +resulting in a trace like this: + + Sep 07 11:52:00 debian9 unknown: run fstests generic/562 at 2022-09-07 11:52:00 + Sep 07 11:55:32 debian9 kernel: INFO: task umount:49438 blocked for more than 120 seconds. + Sep 07 11:55:32 debian9 kernel: Not tainted 6.0.0-rc2-btrfs-next-122 #1 + Sep 07 11:55:32 debian9 kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + Sep 07 11:55:32 debian9 kernel: task:umount state:D stack: 0 pid:49438 ppid: 25683 flags:0x00004000 + Sep 07 11:55:32 debian9 kernel: Call Trace: + Sep 07 11:55:32 debian9 kernel: + Sep 07 11:55:32 debian9 kernel: __schedule+0x3c8/0xec0 + Sep 07 11:55:32 debian9 kernel: ? rcu_read_lock_sched_held+0x12/0x70 + Sep 07 11:55:32 debian9 kernel: schedule+0x5d/0xf0 + Sep 07 11:55:32 debian9 kernel: schedule_timeout+0xf1/0x130 + Sep 07 11:55:32 debian9 kernel: ? lock_release+0x224/0x4a0 + Sep 07 11:55:32 debian9 kernel: ? lock_acquired+0x1a0/0x420 + Sep 07 11:55:32 debian9 kernel: ? trace_hardirqs_on+0x2c/0xd0 + Sep 07 11:55:32 debian9 kernel: __wait_for_common+0xac/0x200 + Sep 07 11:55:32 debian9 kernel: ? usleep_range_state+0xb0/0xb0 + Sep 07 11:55:32 debian9 kernel: __flush_work+0x26d/0x530 + Sep 07 11:55:32 debian9 kernel: ? flush_workqueue_prep_pwqs+0x140/0x140 + Sep 07 11:55:32 debian9 kernel: ? trace_clock_local+0xc/0x30 + Sep 07 11:55:32 debian9 kernel: __cancel_work_timer+0x11f/0x1b0 + Sep 07 11:55:32 debian9 kernel: ? close_ctree+0x12b/0x5b3 [btrfs] + Sep 07 11:55:32 debian9 kernel: ? __trace_bputs+0x10b/0x170 + Sep 07 11:55:32 debian9 kernel: close_ctree+0x152/0x5b3 [btrfs] + Sep 07 11:55:32 debian9 kernel: ? evict_inodes+0x166/0x1c0 + Sep 07 11:55:32 debian9 kernel: generic_shutdown_super+0x71/0x120 + Sep 07 11:55:32 debian9 kernel: kill_anon_super+0x14/0x30 + Sep 07 11:55:32 debian9 kernel: btrfs_kill_super+0x12/0x20 [btrfs] + Sep 07 11:55:32 debian9 kernel: deactivate_locked_super+0x2e/0xa0 + Sep 07 11:55:32 debian9 kernel: cleanup_mnt+0x100/0x160 + Sep 07 11:55:32 debian9 kernel: task_work_run+0x59/0xa0 + Sep 07 11:55:32 debian9 kernel: exit_to_user_mode_prepare+0x1a6/0x1b0 + Sep 07 11:55:32 debian9 kernel: syscall_exit_to_user_mode+0x16/0x40 + Sep 07 11:55:32 debian9 kernel: do_syscall_64+0x48/0x90 + Sep 07 11:55:32 debian9 kernel: entry_SYSCALL_64_after_hwframe+0x63/0xcd + Sep 07 11:55:32 debian9 kernel: RIP: 0033:0x7fcde59a57a7 + Sep 07 11:55:32 debian9 kernel: RSP: 002b:00007ffe914217c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 + Sep 07 11:55:32 debian9 kernel: RAX: 0000000000000000 RBX: 00007fcde5ae8264 RCX: 00007fcde59a57a7 + Sep 07 11:55:32 debian9 kernel: RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000055b57556cdd0 + Sep 07 11:55:32 debian9 kernel: RBP: 000055b57556cba0 R08: 0000000000000000 R09: 00007ffe91420570 + Sep 07 11:55:32 debian9 kernel: R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + Sep 07 11:55:32 debian9 kernel: R13: 000055b57556cdd0 R14: 000055b57556ccb8 R15: 0000000000000000 + Sep 07 11:55:32 debian9 kernel: + +What happens is the following: + +1) The cleaner kthread tries to start a transaction to delete an unused + block group, but the metadata reservation can not be satisfied right + away, so a reservation ticket is created and it starts the async + metadata reclaim task (fs_info->async_reclaim_work); + +2) Writeback for all the filler inodes with an i_size of 2K starts + (generic/562 creates a lot of 2K files with the goal of filling + metadata space). We try to create an inline extent for them, but we + fail when trying to insert the inline extent with -ENOSPC (at + cow_file_range_inline()) - since this is not critical, we fallback + to non-inline mode (back to cow_file_range()), reserve extents, create + extent maps and create the ordered extents; + +3) An unmount starts, enters close_ctree(); + +4) The async reclaim task is flushing stuff, entering the flush states one + by one, until it reaches RUN_DELAYED_IPUTS. There it runs all current + delayed iputs. + + After running the delayed iputs and before calling + btrfs_wait_on_delayed_iputs(), one or more ordered extents complete, + and btrfs_add_delayed_iput() is called for each one through + btrfs_finish_ordered_io() -> btrfs_put_ordered_extent(). This results + in bumping fs_info->nr_delayed_iputs from 0 to some positive value. + + So the async reclaim task blocks at btrfs_wait_on_delayed_iputs() waiting + for fs_info->nr_delayed_iputs to become 0; + +5) The current transaction is committed by the transaction kthread, we then + start unpinning extents and end up calling btrfs_try_granting_tickets() + through unpin_extent_range(), since we released some space. + This results in satisfying the ticket created by the cleaner kthread at + step 1, waking up the cleaner kthread; + +6) At close_ctree() we ask the cleaner kthread to park; + +7) The cleaner kthread starts the transaction, deletes the unused block + group, and then calls kthread_should_park(), which returns true, so it + parks. And at this point we have the delayed iputs added by the + completion of the ordered extents still pending; + +8) Then later at close_ctree(), when we call: + + cancel_work_sync(&fs_info->async_reclaim_work); + + We hang forever, since the cleaner was parked and no one else can run + delayed iputs after that, while the reclaim task is waiting for the + remaining delayed iputs to be completed. + +Fix this by waiting for all ordered extents to complete and running the +delayed iputs before attempting to stop the async reclaim tasks. Note that +we can not wait for ordered extents with btrfs_wait_ordered_roots() (or +other similar functions) because that waits for the BTRFS_ORDERED_COMPLETE +flag to be set on an ordered extent, but the delayed iput is added after +that, when doing the final btrfs_put_ordered_extent(). So instead wait for +the work queues used for executing ordered extent completion to be empty, +which works because we do the final put on an ordered extent at +btrfs_finish_ordered_io() (while we are in the unmount context). + +Fixes: d6fd0ae25c6495 ("Btrfs: fix missing delayed iputs on unmount") +CC: stable@vger.kernel.org # 5.15+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index b94d68035c5d..bf46954d6237 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4052,6 +4052,31 @@ void close_ctree(struct btrfs_fs_info *fs_info) + /* clear out the rbtree of defraggable inodes */ + btrfs_cleanup_defrag_inodes(fs_info); + ++ /* ++ * After we parked the cleaner kthread, ordered extents may have ++ * completed and created new delayed iputs. If one of the async reclaim ++ * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we ++ * can hang forever trying to stop it, because if a delayed iput is ++ * added after it ran btrfs_run_delayed_iputs() and before it called ++ * btrfs_wait_on_delayed_iputs(), it will hang forever since there is ++ * no one else to run iputs. ++ * ++ * So wait for all ongoing ordered extents to complete and then run ++ * delayed iputs. This works because once we reach this point no one ++ * can either create new ordered extents nor create delayed iputs ++ * through some other means. ++ * ++ * Also note that btrfs_wait_ordered_roots() is not safe here, because ++ * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, ++ * but the delayed iput for the respective inode is made only when doing ++ * the final btrfs_put_ordered_extent() (which must happen at ++ * btrfs_finish_ordered_io() when we are unmounting). ++ */ ++ btrfs_flush_workqueue(fs_info->endio_write_workers); ++ /* Ordered extents for free space inodes. */ ++ btrfs_flush_workqueue(fs_info->endio_freespace_worker); ++ btrfs_run_delayed_iputs(fs_info); ++ + cancel_work_sync(&fs_info->async_reclaim_work); + + if (!sb_rdonly(fs_info->sb)) { +-- +2.35.1 + diff --git a/queue-5.4/ext4-limit-the-number-of-retries-after-discarding-pr.patch b/queue-5.4/ext4-limit-the-number-of-retries-after-discarding-pr.patch new file mode 100644 index 00000000000..1bee410b1b6 --- /dev/null +++ b/queue-5.4/ext4-limit-the-number-of-retries-after-discarding-pr.patch @@ -0,0 +1,80 @@ +From 5390723d23b0a5ad8f371b877b24c5aeba17750e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Sep 2022 18:03:14 -0400 +Subject: ext4: limit the number of retries after discarding preallocations + blocks + +From: Theodore Ts'o + +[ Upstream commit 80fa46d6b9e7b1527bfd2197d75431fd9c382161 ] + +This patch avoids threads live-locking for hours when a large number +threads are competing over the last few free extents as they blocks +getting added and removed from preallocation pools. From our bug +reporter: + + A reliable way for triggering this has multiple writers + continuously write() to files when the filesystem is full, while + small amounts of space are freed (e.g. by truncating a large file + -1MiB at a time). In the local filesystem, this can be done by + simply not checking the return code of write (0) and/or the error + (ENOSPACE) that is set. Over NFS with an async mount, even clients + with proper error checking will behave this way since the linux NFS + client implementation will not propagate the server errors [the + write syscalls immediately return success] until the file handle is + closed. This leads to a situation where NFS clients send a + continuous stream of WRITE rpcs which result in ERRNOSPACE -- but + since the client isn't seeing this, the stream of writes continues + at maximum network speed. + + When some space does appear, multiple writers will all attempt to + claim it for their current write. For NFS, we may see dozens to + hundreds of threads that do this. + + The real-world scenario of this is database backup tooling (in + particular, github.com/mdkent/percona-xtrabackup) which may write + large files (>1TiB) to NFS for safe keeping. Some temporary files + are written, rewound, and read back -- all before closing the file + handle (the temp file is actually unlinked, to trigger automatic + deletion on close/crash.) An application like this operating on an + async NFS mount will not see an error code until TiB have been + written/read. + + The lockup was observed when running this database backup on large + filesystems (64 TiB in this case) with a high number of block + groups and no free space. Fragmentation is generally not a factor + in this filesystem (~thousands of large files, mostly contiguous + except for the parts written while the filesystem is at capacity.) + +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index ae51b77c2863..895f7d3f97fb 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4562,6 +4562,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + ext4_fsblk_t block = 0; + unsigned int inquota = 0; + unsigned int reserv_clstrs = 0; ++ int retries = 0; + u64 seq; + + might_sleep(); +@@ -4656,7 +4657,8 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + ar->len = ac->ac_b_ex.fe_len; + } + } else { +- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) ++ if (++retries < 3 && ++ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) + goto repeat; + *errp = -ENOSPC; + } +-- +2.35.1 + diff --git a/queue-5.4/ext4-mballoc-introduce-pcpu-seqcnt-for-freeing-pa-to.patch b/queue-5.4/ext4-mballoc-introduce-pcpu-seqcnt-for-freeing-pa-to.patch new file mode 100644 index 00000000000..25733a81339 --- /dev/null +++ b/queue-5.4/ext4-mballoc-introduce-pcpu-seqcnt-for-freeing-pa-to.patch @@ -0,0 +1,194 @@ +From a815aaba92192ce6aa4f2338280e52c924046450 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 May 2020 12:10:34 +0530 +Subject: ext4: mballoc: introduce pcpu seqcnt for freeing PA to improve ENOSPC + handling + +From: Ritesh Harjani + +[ Upstream commit 07b5b8e1ac4004b7db1065a301df65cd434c31c9 ] + +There could be a race in function ext4_mb_discard_group_preallocations() +where the 1st thread may iterate through group's bb_prealloc_list and +remove all the PAs and add to function's local list head. +Now if the 2nd thread comes in to discard the group preallocations, +it will see that the group->bb_prealloc_list is empty and will return 0. + +Consider for a case where we have less number of groups +(for e.g. just group 0), +this may even return an -ENOSPC error from ext4_mb_new_blocks() +(where we call for ext4_mb_discard_group_preallocations()). +But that is wrong, since 2nd thread should have waited for 1st thread +to release all the PAs and should have retried for allocation. +Since 1st thread was anyway going to discard the PAs. + +The algorithm using this percpu seq counter goes below: +1. We sample the percpu discard_pa_seq counter before trying for block + allocation in ext4_mb_new_blocks(). +2. We increment this percpu discard_pa_seq counter when we either allocate + or free these blocks i.e. while marking those blocks as used/free in + mb_mark_used()/mb_free_blocks(). +3. We also increment this percpu seq counter when we successfully identify + that the bb_prealloc_list is not empty and hence proceed for discarding + of those PAs inside ext4_mb_discard_group_preallocations(). + +Now to make sure that the regular fast path of block allocation is not +affected, as a small optimization we only sample the percpu seq counter +on that cpu. Only when the block allocation fails and when freed blocks +found were 0, that is when we sample percpu seq counter for all cpus using +below function ext4_get_discard_pa_seq_sum(). This happens after making +sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty. + +It can be well argued that why don't just check for grp->bb_free to +see if there are any free blocks to be allocated. So here are the two +concerns which were discussed:- + +1. If for some reason the blocks available in the group are not + appropriate for allocation logic (say for e.g. + EXT4_MB_HINT_GOAL_ONLY, although this is not yet implemented), then + the retry logic may result into infinte looping since grp->bb_free is + non-zero. + +2. Also before preallocation was clubbed with block allocation with the + same ext4_lock_group() held, there were lot of races where grp->bb_free + could not be reliably relied upon. +Due to above, this patch considers discard_pa_seq logic to determine if +we should retry for block allocation. Say if there are are n threads +trying for block allocation and none of those could allocate or discard +any of the blocks, then all of those n threads will fail the block +allocation and return -ENOSPC error. (Since the seq counter for all of +those will match as no block allocation/discard was done during that +duration). + +Signed-off-by: Ritesh Harjani +Link: https://lore.kernel.org/r/7f254686903b87c419d798742fd9a1be34f0657b.1589955723.git.riteshh@linux.ibm.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 80fa46d6b9e7 ("ext4: limit the number of retries after discarding preallocations blocks") +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 5 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index fea8daf9a6b3..ae51b77c2863 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -357,6 +357,35 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, + static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, + ext4_group_t group); + ++/* ++ * The algorithm using this percpu seq counter goes below: ++ * 1. We sample the percpu discard_pa_seq counter before trying for block ++ * allocation in ext4_mb_new_blocks(). ++ * 2. We increment this percpu discard_pa_seq counter when we either allocate ++ * or free these blocks i.e. while marking those blocks as used/free in ++ * mb_mark_used()/mb_free_blocks(). ++ * 3. We also increment this percpu seq counter when we successfully identify ++ * that the bb_prealloc_list is not empty and hence proceed for discarding ++ * of those PAs inside ext4_mb_discard_group_preallocations(). ++ * ++ * Now to make sure that the regular fast path of block allocation is not ++ * affected, as a small optimization we only sample the percpu seq counter ++ * on that cpu. Only when the block allocation fails and when freed blocks ++ * found were 0, that is when we sample percpu seq counter for all cpus using ++ * below function ext4_get_discard_pa_seq_sum(). This happens after making ++ * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty. ++ */ ++static DEFINE_PER_CPU(u64, discard_pa_seq); ++static inline u64 ext4_get_discard_pa_seq_sum(void) ++{ ++ int __cpu; ++ u64 __seq = 0; ++ ++ for_each_possible_cpu(__cpu) ++ __seq += per_cpu(discard_pa_seq, __cpu); ++ return __seq; ++} ++ + static inline void *mb_correct_addr_and_bit(int *bit, void *addr) + { + #if BITS_PER_LONG == 64 +@@ -1430,6 +1459,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, + mb_check_buddy(e4b); + mb_free_blocks_double(inode, e4b, first, count); + ++ this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; +@@ -1572,6 +1602,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) + mb_check_buddy(e4b); + mb_mark_used_double(e4b, start, len); + ++ this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free -= len; + if (e4b->bd_info->bb_first_free == start) + e4b->bd_info->bb_first_free += len; +@@ -3952,6 +3983,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, + INIT_LIST_HEAD(&list); + repeat: + ext4_lock_group(sb, group); ++ this_cpu_inc(discard_pa_seq); + list_for_each_entry_safe(pa, tmp, + &grp->bb_prealloc_list, pa_group_list) { + spin_lock(&pa->pa_lock); +@@ -4494,14 +4526,26 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) + } + + static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, +- struct ext4_allocation_context *ac) ++ struct ext4_allocation_context *ac, u64 *seq) + { + int freed; ++ u64 seq_retry = 0; ++ bool ret = false; + + freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); +- if (freed) +- return true; +- return false; ++ if (freed) { ++ ret = true; ++ goto out_dbg; ++ } ++ seq_retry = ext4_get_discard_pa_seq_sum(); ++ if (seq_retry != *seq) { ++ *seq = seq_retry; ++ ret = true; ++ } ++ ++out_dbg: ++ mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no"); ++ return ret; + } + + /* +@@ -4518,6 +4562,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + ext4_fsblk_t block = 0; + unsigned int inquota = 0; + unsigned int reserv_clstrs = 0; ++ u64 seq; + + might_sleep(); + sb = ar->inode->i_sb; +@@ -4579,6 +4624,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + } + + ac->ac_op = EXT4_MB_HISTORY_PREALLOC; ++ seq = *this_cpu_ptr(&discard_pa_seq); + if (!ext4_mb_use_preallocated(ac)) { + ac->ac_op = EXT4_MB_HISTORY_ALLOC; + ext4_mb_normalize_request(ac, ar); +@@ -4610,7 +4656,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + ar->len = ac->ac_b_ex.fe_len; + } + } else { +- if (ext4_mb_discard_preallocations_should_retry(sb, ac)) ++ if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) + goto repeat; + *errp = -ENOSPC; + } +-- +2.35.1 + diff --git a/queue-5.4/ext4-mballoc-refactor-ext4_mb_discard_preallocations.patch b/queue-5.4/ext4-mballoc-refactor-ext4_mb_discard_preallocations.patch new file mode 100644 index 00000000000..7f4e80fc735 --- /dev/null +++ b/queue-5.4/ext4-mballoc-refactor-ext4_mb_discard_preallocations.patch @@ -0,0 +1,68 @@ +From 6216414446cc1406c63982214ca7648af80a16c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 May 2020 12:10:33 +0530 +Subject: ext4: mballoc: refactor ext4_mb_discard_preallocations() + +From: Ritesh Harjani + +[ Upstream commit cf5e2ca6c99077d128e971149f0c262e808ca831 ] + +Implement ext4_mb_discard_preallocations_should_retry() +which we will need in later patches to add more logic +like check for sequence number match to see if we should +retry for block allocation or not. + +There should be no functionality change in this patch. + +Signed-off-by: Ritesh Harjani +Link: https://lore.kernel.org/r/1cfae0098d2aa9afbeb59331401258182868c8f2.1589955723.git.riteshh@linux.ibm.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 80fa46d6b9e7 ("ext4: limit the number of retries after discarding preallocations blocks") +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 3c3166ba4364..fea8daf9a6b3 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4493,6 +4493,17 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) + return freed; + } + ++static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, ++ struct ext4_allocation_context *ac) ++{ ++ int freed; ++ ++ freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); ++ if (freed) ++ return true; ++ return false; ++} ++ + /* + * Main entry point into mballoc to allocate blocks + * it tries to use preallocation first, then falls back +@@ -4501,7 +4512,6 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) + ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + struct ext4_allocation_request *ar, int *errp) + { +- int freed; + struct ext4_allocation_context *ac = NULL; + struct ext4_sb_info *sbi; + struct super_block *sb; +@@ -4600,8 +4610,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + ar->len = ac->ac_b_ex.fe_len; + } + } else { +- freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); +- if (freed) ++ if (ext4_mb_discard_preallocations_should_retry(sb, ac)) + goto repeat; + *errp = -ENOSPC; + } +-- +2.35.1 + diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..456e1be1a9d --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,8 @@ +alsa-hda-tegra-correct-number-of-sdo-lines-for-tegra.patch +alsa-hda-tegra-use-clk_bulk-helpers.patch +alsa-hda-tegra-reset-hardware.patch +alsa-hda-tegra-fix-tegra194-hda-reset-failure.patch +btrfs-fix-hang-during-unmount-when-stopping-a-space-.patch +ext4-mballoc-refactor-ext4_mb_discard_preallocations.patch +ext4-mballoc-introduce-pcpu-seqcnt-for-freeing-pa-to.patch +ext4-limit-the-number-of-retries-after-discarding-pr.patch -- 2.47.3