From: Greg Kroah-Hartman Date: Mon, 1 Mar 2021 13:55:49 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.4.259~31 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=09c6b190e916e653de949665f04ecbde31ce1bb7;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch dm-era-fix-bitset-memory-leaks.patch dm-era-only-resize-metadata-in-preresume.patch dm-era-recover-committed-writeset-after-crash.patch dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch dm-era-update-in-core-bitset-after-committing-the-metadata.patch dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch dm-era-verify-the-data-block-size-hasn-t-changed.patch dm-fix-deadlock-when-swapping-to-encrypted-device.patch dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch dm-table-fix-iterate_devices-based-device-capability-checks.patch dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch dm-writecache-fix-performance-degradation-in-ssd-mode.patch dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch dm-writecache-return-the-exact-table-values-that-were-set.patch f2fs-enforce-the-immutable-flag-on-open-files.patch f2fs-fix-out-of-repair-__setattr_copy.patch f2fs-flush-data-when-enabling-checkpoint-back.patch gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch s390-vtime-fix-inline-assembly-clobber-list.patch sparc32-fix-a-user-triggerable-oops-in-clear_user.patch spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch spi-spi-synquacer-fix-set_cs-handling.patch um-defer-killing-userspace-on-page-table-update-failures.patch um-mm-check-more-comprehensively-for-stub-changes.patch virtio-s390-implement-virtio-ccw-revision-2-correctly.patch --- diff --git a/queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch b/queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch new file mode 100644 index 00000000000..0396af3ac9c --- /dev/null +++ b/queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch @@ -0,0 +1,86 @@ +From a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Thu, 7 Jan 2021 19:43:30 +0100 +Subject: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument + +From: Rafael J. Wysocki + +commit a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 upstream. + +All of the callers of intel_pstate_get_hwp_max() access the struct +cpudata object that corresponds to the given CPU already and the +function itself needs to access that object (in order to update +hwp_cap_cached), so modify the code to pass a struct cpudata pointer +to it instead of the CPU number. + +Signed-off-by: Rafael J. Wysocki +Tested-by: Chen Yu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -829,13 +829,13 @@ static struct freq_attr *hwp_cpufreq_att + NULL, + }; + +-static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, ++static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, + int *current_max) + { + u64 cap; + +- rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); +- WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); ++ rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); ++ WRITE_ONCE(cpu->hwp_cap_cached, cap); + if (global.no_turbo || global.turbo_disabled) + *current_max = HWP_GUARANTEED_PERF(cap); + else +@@ -1223,7 +1223,7 @@ static void update_qos_request(enum freq + continue; + + if (hwp_active) +- intel_pstate_get_hwp_max(i, &turbo_max, &max_state); ++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); + else + turbo_max = cpu->pstate.turbo_pstate; + +@@ -1733,7 +1733,7 @@ static void intel_pstate_get_cpu_pstates + if (hwp_active && !hwp_mode_bdw) { + unsigned int phy_max, current_max; + +- intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); ++ intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); + cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; + cpu->pstate.turbo_pstate = phy_max; + } else { +@@ -2217,7 +2217,7 @@ static void intel_pstate_update_perf_lim + * rather than pure ratios. + */ + if (hwp_active) { +- intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); ++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); + } else { + max_state = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; +@@ -2332,7 +2332,7 @@ static void intel_pstate_verify_cpu_poli + if (hwp_active) { + int max_state, turbo_max; + +- intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); ++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); + max_freq = max_state * cpu->pstate.scaling; + } else { + max_freq = intel_pstate_get_max_freq(cpu); +@@ -2675,7 +2675,7 @@ static int intel_cpufreq_cpu_init(struct + if (hwp_active) { + u64 value; + +- intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state); ++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); + policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); + WRITE_ONCE(cpu->hwp_req_cached, value); diff --git a/queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch b/queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch new file mode 100644 index 00000000000..e88d29f899c --- /dev/null +++ b/queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch @@ -0,0 +1,58 @@ +From 6f67e060083a84a4cc364eab6ae40c717165fb0c Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Tue, 12 Jan 2021 13:21:27 +0800 +Subject: cpufreq: intel_pstate: Get per-CPU max freq via MSR_HWP_CAPABILITIES if available + +From: Chen Yu + +commit 6f67e060083a84a4cc364eab6ae40c717165fb0c upstream. + +Currently, when turbo is disabled (either by BIOS or by the user), +the intel_pstate driver reads the max non-turbo frequency from the +package-wide MSR_PLATFORM_INFO(0xce) register. + +However, on asymmetric platforms it is possible in theory that small +and big core with HWP enabled might have different max non-turbo CPU +frequency, because MSR_HWP_CAPABILITIES is per-CPU scope according +to Intel Software Developer Manual. + +The turbo max freq is already per-CPU in current code, so make +similar change to the max non-turbo frequency as well. + +Reported-by: Wendy Wang +Signed-off-by: Chen Yu +[ rjw: Subject and changelog edits ] +Cc: 4.18+ # 4.18+: a45ee4d4e13b: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -1724,11 +1724,9 @@ static void intel_pstate_max_within_limi + static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) + { + cpu->pstate.min_pstate = pstate_funcs.get_min(); +- cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); + cpu->pstate.scaling = pstate_funcs.get_scaling(); +- cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + + if (hwp_active && !hwp_mode_bdw) { + unsigned int phy_max, current_max; +@@ -1736,9 +1734,12 @@ static void intel_pstate_get_cpu_pstates + intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); + cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; + cpu->pstate.turbo_pstate = phy_max; ++ cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); + } else { + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; ++ cpu->pstate.max_pstate = pstate_funcs.get_max(); + } ++ cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + + if (pstate_funcs.get_aperf_mperf_shift) + cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); diff --git a/queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch b/queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch new file mode 100644 index 00000000000..ba5000b3874 --- /dev/null +++ b/queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch @@ -0,0 +1,139 @@ +From 67fc209b527d023db4d087c68e44e9790aa089ef Mon Sep 17 00:00:00 2001 +From: Shawn Guo +Date: Tue, 19 Jan 2021 10:39:25 +0800 +Subject: cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks + +From: Shawn Guo + +commit 67fc209b527d023db4d087c68e44e9790aa089ef upstream. + +Commit f17b3e44320b ("cpufreq: qcom-hw: Use +devm_platform_ioremap_resource() to simplify code") introduces +a regression on platforms using the driver, by failing to initialise +a policy, when one is created post hotplug. + +When all the CPUs of a policy are hoptplugged out, the call to .exit() +and later to devm_iounmap() does not release the memory region that was +requested during devm_platform_ioremap_resource(). Therefore, +a subsequent call to .init() will result in the following error, which +will prevent a new policy to be initialised: + +[ 3395.915416] CPU4: shutdown +[ 3395.938185] psci: CPU4 killed (polled 0 ms) +[ 3399.071424] CPU5: shutdown +[ 3399.094316] psci: CPU5 killed (polled 0 ms) +[ 3402.139358] CPU6: shutdown +[ 3402.161705] psci: CPU6 killed (polled 0 ms) +[ 3404.742939] CPU7: shutdown +[ 3404.765592] psci: CPU7 killed (polled 0 ms) +[ 3411.492274] Detected VIPT I-cache on CPU4 +[ 3411.492337] GICv3: CPU4: found redistributor 400 region 0:0x0000000017ae0000 +[ 3411.492448] CPU4: Booted secondary processor 0x0000000400 [0x516f802d] +[ 3411.503654] qcom-cpufreq-hw 17d43000.cpufreq: can't request region for resource [mem 0x17d45800-0x17d46bff] + +With that being said, the original code was tricky and skipping memory +region request intentionally to hide this issue. The true cause is that +those devm_xxx() device managed functions shouldn't be used for cpufreq +init/exit hooks, because &pdev->dev is alive across the hooks and will +not trigger auto resource free-up. Let's drop the use of device managed +functions and manually allocate/free resources, so that the issue can be +fixed properly. + +Cc: v5.10+ # v5.10+ +Fixes: f17b3e44320b ("cpufreq: qcom-hw: Use devm_platform_ioremap_resource() to simplify code") +Suggested-by: Bjorn Andersson +Signed-off-by: Shawn Guo +Signed-off-by: Viresh Kumar +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/qcom-cpufreq-hw.c | 40 ++++++++++++++++++++++++++++++-------- + 1 file changed, 32 insertions(+), 8 deletions(-) + +--- a/drivers/cpufreq/qcom-cpufreq-hw.c ++++ b/drivers/cpufreq/qcom-cpufreq-hw.c +@@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data { + + struct qcom_cpufreq_data { + void __iomem *base; ++ struct resource *res; + const struct qcom_cpufreq_soc_data *soc_data; + }; + +@@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(stru + struct of_phandle_args args; + struct device_node *cpu_np; + struct device *cpu_dev; ++ struct resource *res; + void __iomem *base; + struct qcom_cpufreq_data *data; + int ret, index; +@@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(stru + + index = args.args[0]; + +- base = devm_platform_ioremap_resource(pdev, index); +- if (IS_ERR(base)) +- return PTR_ERR(base); ++ res = platform_get_resource(pdev, IORESOURCE_MEM, index); ++ if (!res) { ++ dev_err(dev, "failed to get mem resource %d\n", index); ++ return -ENODEV; ++ } ++ ++ if (!request_mem_region(res->start, resource_size(res), res->name)) { ++ dev_err(dev, "failed to request resource %pR\n", res); ++ return -EBUSY; ++ } + +- data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); ++ base = ioremap(res->start, resource_size(res)); ++ if (IS_ERR(base)) { ++ dev_err(dev, "failed to map resource %pR\n", res); ++ ret = PTR_ERR(base); ++ goto release_region; ++ } ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; +- goto error; ++ goto unmap_base; + } + + data->soc_data = of_device_get_match_data(&pdev->dev); + data->base = base; ++ data->res = res; + + /* HW should be in enabled state to proceed */ + if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { +@@ -349,7 +366,11 @@ static int qcom_cpufreq_hw_cpu_init(stru + + return 0; + error: +- devm_iounmap(dev, base); ++ kfree(data); ++unmap_base: ++ iounmap(data->base); ++release_region: ++ release_mem_region(res->start, resource_size(res)); + return ret; + } + +@@ -357,12 +378,15 @@ static int qcom_cpufreq_hw_cpu_exit(stru + { + struct device *cpu_dev = get_cpu_device(policy->cpu); + struct qcom_cpufreq_data *data = policy->driver_data; +- struct platform_device *pdev = cpufreq_get_driver_data(); ++ struct resource *res = data->res; ++ void __iomem *base = data->base; + + dev_pm_opp_remove_all_dynamic(cpu_dev); + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + kfree(policy->freq_table); +- devm_iounmap(&pdev->dev, data->base); ++ kfree(data); ++ iounmap(base); ++ release_mem_region(res->start, resource_size(res)); + + return 0; + } diff --git a/queue-5.10/dm-era-fix-bitset-memory-leaks.patch b/queue-5.10/dm-era-fix-bitset-memory-leaks.patch new file mode 100644 index 00000000000..97602b40c70 --- /dev/null +++ b/queue-5.10/dm-era-fix-bitset-memory-leaks.patch @@ -0,0 +1,58 @@ +From 904e6b266619c2da5c58b5dce14ae30629e39645 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:54 +0200 +Subject: dm era: Fix bitset memory leaks + +From: Nikos Tsironis + +commit 904e6b266619c2da5c58b5dce14ae30629e39645 upstream. + +Deallocate the memory allocated for the in-core bitsets when destroying +the target and in error paths. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -47,6 +47,7 @@ struct writeset { + static void writeset_free(struct writeset *ws) + { + vfree(ws->bits); ++ ws->bits = NULL; + } + + static int setup_on_disk_bitset(struct dm_disk_bitset *info, +@@ -811,6 +812,8 @@ static struct era_metadata *metadata_ope + + static void metadata_close(struct era_metadata *md) + { ++ writeset_free(&md->writesets[0]); ++ writeset_free(&md->writesets[1]); + destroy_persistent_data_objects(md); + kfree(md); + } +@@ -848,6 +851,7 @@ static int metadata_resize(struct era_me + r = writeset_alloc(&md->writesets[1], *new_size); + if (r) { + DMERR("%s: writeset_alloc failed for writeset 1", __func__); ++ writeset_free(&md->writesets[0]); + return r; + } + +@@ -858,6 +862,8 @@ static int metadata_resize(struct era_me + &value, &md->era_array_root); + if (r) { + DMERR("%s: dm_array_resize failed", __func__); ++ writeset_free(&md->writesets[0]); ++ writeset_free(&md->writesets[1]); + return r; + } + diff --git a/queue-5.10/dm-era-only-resize-metadata-in-preresume.patch b/queue-5.10/dm-era-only-resize-metadata-in-preresume.patch new file mode 100644 index 00000000000..a3d020cfbab --- /dev/null +++ b/queue-5.10/dm-era-only-resize-metadata-in-preresume.patch @@ -0,0 +1,80 @@ +From cca2c6aebe86f68103a8615074b3578e854b5016 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Thu, 11 Feb 2021 16:22:43 +0200 +Subject: dm era: only resize metadata in preresume + +From: Nikos Tsironis + +commit cca2c6aebe86f68103a8615074b3578e854b5016 upstream. + +Metadata resize shouldn't happen in the ctr. The ctr loads a temporary +(inactive) table that will only become active upon resume. That is why +resize should always be done in terms of resume. Otherwise a load (ctr) +whose inactive table never becomes active will incorrectly resize the +metadata. + +Also, perform the resize directly in preresume, instead of using the +worker to do it. + +The worker might run other metadata operations, e.g., it could start +digestion, before resizing the metadata. These operations will end up +using the old size. + +This could lead to errors, like: + + device-mapper: era: metadata_digest_transcribe_writeset: dm_array_set_value failed + device-mapper: era: process_old_eras: digest step failed, stopping digestion + +The reason of the above error is that the worker started the digestion +of the archived writeset using the old, larger size. + +As a result, metadata_digest_transcribe_writeset tried to write beyond +the end of the era array. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -1501,15 +1501,6 @@ static int era_ctr(struct dm_target *ti, + } + era->md = md; + +- era->nr_blocks = calc_nr_blocks(era); +- +- r = metadata_resize(era->md, &era->nr_blocks); +- if (r) { +- ti->error = "couldn't resize metadata"; +- era_destroy(era); +- return -ENOMEM; +- } +- + era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); + if (!era->wq) { + ti->error = "could not create workqueue for metadata object"; +@@ -1584,9 +1575,17 @@ static int era_preresume(struct dm_targe + dm_block_t new_size = calc_nr_blocks(era); + + if (era->nr_blocks != new_size) { +- r = in_worker1(era, metadata_resize, &new_size); +- if (r) ++ r = metadata_resize(era->md, &new_size); ++ if (r) { ++ DMERR("%s: metadata_resize failed", __func__); ++ return r; ++ } ++ ++ r = metadata_commit(era->md); ++ if (r) { ++ DMERR("%s: metadata_commit failed", __func__); + return r; ++ } + + era->nr_blocks = new_size; + } diff --git a/queue-5.10/dm-era-recover-committed-writeset-after-crash.patch b/queue-5.10/dm-era-recover-committed-writeset-after-crash.patch new file mode 100644 index 00000000000..33f8c80db67 --- /dev/null +++ b/queue-5.10/dm-era-recover-committed-writeset-after-crash.patch @@ -0,0 +1,125 @@ +From de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:19:30 +0200 +Subject: dm era: Recover committed writeset after crash + +From: Nikos Tsironis + +commit de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 upstream. + +Following a system crash, dm-era fails to recover the committed writeset +for the current era, leading to lost writes. That is, we lose the +information about what blocks were written during the affected era. + +dm-era assumes that the writeset of the current era is archived when the +device is suspended. So, when resuming the device, it just moves on to +the next era, ignoring the committed writeset. + +This assumption holds when the device is properly shut down. But, when +the system crashes, the code that suspends the target never runs, so the +writeset for the current era is not archived. + +There are three issues that cause the committed writeset to get lost: + +1. dm-era doesn't load the committed writeset when opening the metadata +2. The code that resizes the metadata wipes the information about the + committed writeset (assuming it was loaded at step 1) +3. era_preresume() starts a new era, without taking into account that + the current era might not have been archived, due to a system crash. + +To fix this: + +1. Load the committed writeset when opening the metadata +2. Fix the code that resizes the metadata to make sure it doesn't wipe + the loaded writeset +3. Fix era_preresume() to check for a loaded writeset and archive it, + before starting a new era. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bi + */ + static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) + { +- ws->md.nr_bits = nr_blocks; +- ws->md.root = INVALID_WRITESET_ROOT; + ws->bits = vzalloc(bitset_size(nr_blocks)); + if (!ws->bits) { + DMERR("%s: couldn't allocate in memory bitset", __func__); +@@ -85,12 +83,14 @@ static int writeset_alloc(struct writese + /* + * Wipes the in-core bitset, and creates a new on disk bitset. + */ +-static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) ++static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws, ++ dm_block_t nr_blocks) + { + int r; + +- memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); ++ memset(ws->bits, 0, bitset_size(nr_blocks)); + ++ ws->md.nr_bits = nr_blocks; + r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); + if (r) { + DMERR("%s: setup_on_disk_bitset failed", __func__); +@@ -579,6 +579,7 @@ static int open_metadata(struct era_meta + md->nr_blocks = le32_to_cpu(disk->nr_blocks); + md->current_era = le32_to_cpu(disk->current_era); + ++ ws_unpack(&disk->current_writeset, &md->current_writeset->md); + md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); + md->era_array_root = le64_to_cpu(disk->era_array_root); + md->metadata_snap = le64_to_cpu(disk->metadata_snap); +@@ -870,7 +871,6 @@ static int metadata_era_archive(struct e + } + + ws_pack(&md->current_writeset->md, &value); +- md->current_writeset->md.root = INVALID_WRITESET_ROOT; + + keys[0] = md->current_era; + __dm_bless_for_disk(&value); +@@ -882,6 +882,7 @@ static int metadata_era_archive(struct e + return r; + } + ++ md->current_writeset->md.root = INVALID_WRITESET_ROOT; + md->archived_writesets = true; + + return 0; +@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_m + int r; + struct writeset *new_writeset = next_writeset(md); + +- r = writeset_init(&md->bitset_info, new_writeset); ++ r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks); + if (r) { + DMERR("%s: writeset_init failed", __func__); + return r; +@@ -951,7 +952,7 @@ static int metadata_commit(struct era_me + int r; + struct dm_block *sblock; + +- if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { ++ if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { + r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, + &md->current_writeset->md.root); + if (r) { +@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_targe + + start_worker(era); + +- r = in_worker0(era, metadata_new_era); ++ r = in_worker0(era, metadata_era_rollover); + if (r) { + DMERR("%s: metadata_era_rollover failed", __func__); + return r; diff --git a/queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch b/queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch new file mode 100644 index 00000000000..dfbdda4f3e9 --- /dev/null +++ b/queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch @@ -0,0 +1,80 @@ +From 2524933307fd0036d5c32357c693c021ab09a0b0 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:22:04 +0200 +Subject: dm era: Reinitialize bitset cache before digesting a new writeset + +From: Nikos Tsironis + +commit 2524933307fd0036d5c32357c693c021ab09a0b0 upstream. + +In case of devices with at most 64 blocks, the digestion of consecutive +eras uses the writeset of the first era as the writeset of all eras to +digest, leading to lost writes. That is, we lose the information about +what blocks were written during the affected eras. + +The digestion code uses a dm_disk_bitset object to access the archived +writesets. This structure includes a one word (64-bit) cache to reduce +the number of array lookups. + +This structure is initialized only once, in metadata_digest_start(), +when we kick off digestion. + +But, when we insert a new writeset into the writeset tree, before the +digestion of the previous writeset is done, or equivalently when there +are multiple writesets in the writeset tree to digest, then all these +writesets are digested using the same cache and the cache is not +re-initialized when moving from one writeset to the next. + +For devices with more than 64 blocks, i.e., the size of the cache, the +cache is indirectly invalidated when we move to a next set of blocks, so +we avoid the bug. + +But for devices with at most 64 blocks we end up using the same cached +data for digesting all archived writesets, i.e., the cache is loaded +when digesting the first writeset and it never gets reloaded, until the +digestion is done. + +As a result, the writeset of the first era to digest is used as the +writeset of all the following archived eras, leading to lost writes. + +Fix this by reinitializing the dm_disk_bitset structure, and thus +invalidating the cache, every time the digestion code starts digesting a +new writeset. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -756,6 +756,12 @@ static int metadata_digest_lookup_writes + ws_unpack(&disk, &d->writeset); + d->value = cpu_to_le32(key); + ++ /* ++ * We initialise another bitset info to avoid any caching side effects ++ * with the previous one. ++ */ ++ dm_disk_bitset_init(md->tm, &d->info); ++ + d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); + d->current_bit = 0; + d->step = metadata_digest_transcribe_writeset; +@@ -769,12 +775,6 @@ static int metadata_digest_start(struct + return 0; + + memset(d, 0, sizeof(*d)); +- +- /* +- * We initialise another bitset info to avoid any caching side +- * effects with the previous one. +- */ +- dm_disk_bitset_init(md->tm, &d->info); + d->step = metadata_digest_lookup_writeset; + + return 0; diff --git a/queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch b/queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch new file mode 100644 index 00000000000..238a1f3c6f6 --- /dev/null +++ b/queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch @@ -0,0 +1,118 @@ +From 2099b145d77c1d53f5711f029c37cc537897cee6 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:19:31 +0200 +Subject: dm era: Update in-core bitset after committing the metadata + +From: Nikos Tsironis + +commit 2099b145d77c1d53f5711f029c37cc537897cee6 upstream. + +In case of a system crash, dm-era might fail to mark blocks as written +in its metadata, although the corresponding writes to these blocks were +passed down to the origin device and completed successfully. + +Consider the following sequence of events: + +1. We write to a block that has not been yet written in the current era +2. era_map() checks the in-core bitmap for the current era and sees + that the block is not marked as written. +3. The write is deferred for submission after the metadata have been + updated and committed. +4. The worker thread processes the deferred write + (process_deferred_bios()) and marks the block as written in the + in-core bitmap, **before** committing the metadata. +5. The worker thread starts committing the metadata. +6. We do more writes that map to the same block as the write of step (1) +7. era_map() checks the in-core bitmap and sees that the block is marked + as written, **although the metadata have not been committed yet**. +8. These writes are passed down to the origin device immediately and the + device reports them as completed. +9. The system crashes, e.g., power failure, before the commit from step + (5) finishes. + +When the system recovers and we query the dm-era target for the list of +written blocks it doesn't report the aforementioned block as written, +although the writes of step (6) completed successfully. + +The issue is that era_map() decides whether to defer or not a write +based on non committed information. The root cause of the bug is that we +update the in-core bitmap, **before** committing the metadata. + +Fix this by updating the in-core bitmap **after** successfully +committing the metadata. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -134,7 +134,7 @@ static int writeset_test_and_set(struct + { + int r; + +- if (!test_and_set_bit(block, ws->bits)) { ++ if (!test_bit(block, ws->bits)) { + r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); + if (r) { + /* FIXME: fail mode */ +@@ -1226,8 +1226,10 @@ static void process_deferred_bios(struct + int r; + struct bio_list deferred_bios, marked_bios; + struct bio *bio; ++ struct blk_plug plug; + bool commit_needed = false; + bool failed = false; ++ struct writeset *ws = era->md->current_writeset; + + bio_list_init(&deferred_bios); + bio_list_init(&marked_bios); +@@ -1237,9 +1239,11 @@ static void process_deferred_bios(struct + bio_list_init(&era->deferred_bios); + spin_unlock(&era->deferred_lock); + ++ if (bio_list_empty(&deferred_bios)) ++ return; ++ + while ((bio = bio_list_pop(&deferred_bios))) { +- r = writeset_test_and_set(&era->md->bitset_info, +- era->md->current_writeset, ++ r = writeset_test_and_set(&era->md->bitset_info, ws, + get_block(era, bio)); + if (r < 0) { + /* +@@ -1247,7 +1251,6 @@ static void process_deferred_bios(struct + * FIXME: finish. + */ + failed = true; +- + } else if (r == 0) + commit_needed = true; + +@@ -1263,9 +1266,19 @@ static void process_deferred_bios(struct + if (failed) + while ((bio = bio_list_pop(&marked_bios))) + bio_io_error(bio); +- else +- while ((bio = bio_list_pop(&marked_bios))) ++ else { ++ blk_start_plug(&plug); ++ while ((bio = bio_list_pop(&marked_bios))) { ++ /* ++ * Only update the in-core writeset if the on-disk one ++ * was updated too. ++ */ ++ if (commit_needed) ++ set_bit(get_block(era, bio), ws->bits); + submit_bio_noacct(bio); ++ } ++ blk_finish_plug(&plug); ++ } + } + + static void process_rpc_calls(struct era *era) diff --git a/queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch b/queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch new file mode 100644 index 00000000000..12ec7d9cbdb --- /dev/null +++ b/queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch @@ -0,0 +1,33 @@ +From 64f2d15afe7b336aafebdcd14cc835ecf856df4b Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:55 +0200 +Subject: dm era: Use correct value size in equality function of writeset tree + +From: Nikos Tsironis + +commit 64f2d15afe7b336aafebdcd14cc835ecf856df4b upstream. + +Fix the writeset tree equality test function to use the right value size +when comparing two btree values. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -389,7 +389,7 @@ static void ws_dec(void *context, const + + static int ws_eq(void *context, const void *value1, const void *value2) + { +- return !memcmp(value1, value2, sizeof(struct writeset_metadata)); ++ return !memcmp(value1, value2, sizeof(struct writeset_disk)); + } + + /*----------------------------------------------------------------*/ diff --git a/queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch b/queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch new file mode 100644 index 00000000000..a0864755ac5 --- /dev/null +++ b/queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch @@ -0,0 +1,49 @@ +From c8e846ff93d5eaa5384f6f325a1687ac5921aade Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:53 +0200 +Subject: dm era: Verify the data block size hasn't changed + +From: Nikos Tsironis + +commit c8e846ff93d5eaa5384f6f325a1687ac5921aade upstream. + +dm-era doesn't support changing the data block size of existing devices, +so check explicitly that the requested block size for a new target +matches the one stored in the metadata. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -564,6 +564,15 @@ static int open_metadata(struct era_meta + } + + disk = dm_block_data(sblock); ++ ++ /* Verify the data block size hasn't changed */ ++ if (le32_to_cpu(disk->data_block_size) != md->block_size) { ++ DMERR("changing the data block size (from %u to %llu) is not supported", ++ le32_to_cpu(disk->data_block_size), md->block_size); ++ r = -EINVAL; ++ goto bad; ++ } ++ + r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, + disk->metadata_space_map_root, + sizeof(disk->metadata_space_map_root), +@@ -575,7 +584,6 @@ static int open_metadata(struct era_meta + + setup_infos(md); + +- md->block_size = le32_to_cpu(disk->data_block_size); + md->nr_blocks = le32_to_cpu(disk->nr_blocks); + md->current_era = le32_to_cpu(disk->current_era); + diff --git a/queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch b/queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch new file mode 100644 index 00000000000..1f1bf6dc7c1 --- /dev/null +++ b/queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch @@ -0,0 +1,201 @@ +From a666e5c05e7c4aaabb2c5d58117b0946803d03d2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 10 Feb 2021 15:26:23 -0500 +Subject: dm: fix deadlock when swapping to encrypted device + +From: Mikulas Patocka + +commit a666e5c05e7c4aaabb2c5d58117b0946803d03d2 upstream. + +The system would deadlock when swapping to a dm-crypt device. The reason +is that for each incoming write bio, dm-crypt allocates memory that holds +encrypted data. These excessive allocations exhaust all the memory and the +result is either deadlock or OOM trigger. + +This patch limits the number of in-flight swap bios, so that the memory +consumed by dm-crypt is limited. The limit is enforced if the target set +the "limit_swap_bios" variable and if the bio has REQ_SWAP set. + +Non-swap bios are not affected becuase taking the semaphore would cause +performance degradation. + +This is similar to request-based drivers - they will also block when the +number of requests is over the limit. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-core.h | 4 ++ + drivers/md/dm-crypt.c | 1 + drivers/md/dm.c | 60 ++++++++++++++++++++++++++++++++++++++++++ + include/linux/device-mapper.h | 5 +++ + 4 files changed, 70 insertions(+) + +--- a/drivers/md/dm-core.h ++++ b/drivers/md/dm-core.h +@@ -109,6 +109,10 @@ struct mapped_device { + + struct block_device *bdev; + ++ int swap_bios; ++ struct semaphore swap_bios_semaphore; ++ struct mutex swap_bios_lock; ++ + struct dm_stats stats; + + /* for blk-mq request-based DM support */ +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *t + wake_up_process(cc->write_thread); + + ti->num_flush_bios = 1; ++ ti->limit_swap_bios = true; + + return 0; + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_ + #define DM_NUMA_NODE NUMA_NO_NODE + static int dm_numa_node = DM_NUMA_NODE; + ++#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE) ++static int swap_bios = DEFAULT_SWAP_BIOS; ++static int get_swap_bios(void) ++{ ++ int latch = READ_ONCE(swap_bios); ++ if (unlikely(latch <= 0)) ++ latch = DEFAULT_SWAP_BIOS; ++ return latch; ++} ++ + /* + * For mempools pre-allocation at the table loading time. + */ +@@ -966,6 +976,11 @@ void disable_write_zeroes(struct mapped_ + limits->max_write_zeroes_sectors = 0; + } + ++static bool swap_bios_limit(struct dm_target *ti, struct bio *bio) ++{ ++ return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); ++} ++ + static void clone_endio(struct bio *bio) + { + blk_status_t error = bio->bi_status; +@@ -1016,6 +1031,11 @@ static void clone_endio(struct bio *bio) + } + } + ++ if (unlikely(swap_bios_limit(tio->ti, bio))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } ++ + free_tio(tio); + dec_pending(io, error); + } +@@ -1249,6 +1269,22 @@ void dm_accept_partial_bio(struct bio *b + } + EXPORT_SYMBOL_GPL(dm_accept_partial_bio); + ++static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) ++{ ++ mutex_lock(&md->swap_bios_lock); ++ while (latch < md->swap_bios) { ++ cond_resched(); ++ down(&md->swap_bios_semaphore); ++ md->swap_bios--; ++ } ++ while (latch > md->swap_bios) { ++ cond_resched(); ++ up(&md->swap_bios_semaphore); ++ md->swap_bios++; ++ } ++ mutex_unlock(&md->swap_bios_lock); ++} ++ + static blk_qc_t __map_bio(struct dm_target_io *tio) + { + int r; +@@ -1268,6 +1304,14 @@ static blk_qc_t __map_bio(struct dm_targ + atomic_inc(&io->io_count); + sector = clone->bi_iter.bi_sector; + ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ int latch = get_swap_bios(); ++ if (unlikely(latch != md->swap_bios)) ++ __set_swap_bios_limit(md, latch); ++ down(&md->swap_bios_semaphore); ++ } ++ + r = ti->type->map(ti, clone); + switch (r) { + case DM_MAPIO_SUBMITTED: +@@ -1279,10 +1323,18 @@ static blk_qc_t __map_bio(struct dm_targ + ret = submit_bio_noacct(clone); + break; + case DM_MAPIO_KILL: ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } + free_tio(tio); + dec_pending(io, BLK_STS_IOERR); + break; + case DM_MAPIO_REQUEUE: ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } + free_tio(tio); + dec_pending(io, BLK_STS_DM_REQUEUE); + break; +@@ -1756,6 +1808,7 @@ static void cleanup_mapped_device(struct + mutex_destroy(&md->suspend_lock); + mutex_destroy(&md->type_lock); + mutex_destroy(&md->table_devices_lock); ++ mutex_destroy(&md->swap_bios_lock); + + dm_mq_cleanup_mapped_device(md); + } +@@ -1823,6 +1876,10 @@ static struct mapped_device *alloc_dev(i + init_waitqueue_head(&md->eventq); + init_completion(&md->kobj_holder.completion); + ++ md->swap_bios = get_swap_bios(); ++ sema_init(&md->swap_bios_semaphore, md->swap_bios); ++ mutex_init(&md->swap_bios_lock); ++ + md->disk->major = _major; + md->disk->first_minor = minor; + md->disk->fops = &dm_blk_dops; +@@ -3119,6 +3176,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, + module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); + MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); + ++module_param(swap_bios, int, S_IRUGO | S_IWUSR); ++MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs"); ++ + MODULE_DESCRIPTION(DM_NAME " driver"); + MODULE_AUTHOR("Joe Thornber "); + MODULE_LICENSE("GPL"); +--- a/include/linux/device-mapper.h ++++ b/include/linux/device-mapper.h +@@ -325,6 +325,11 @@ struct dm_target { + * whether or not its underlying devices have support. + */ + bool discards_supported:1; ++ ++ /* ++ * Set if we need to limit the number of in-flight bios when swapping. ++ */ ++ bool limit_swap_bios:1; + }; + + void *dm_per_bio_data(struct bio *bio, size_t data_size); diff --git a/queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch new file mode 100644 index 00000000000..83801b0d271 --- /dev/null +++ b/queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch @@ -0,0 +1,139 @@ +From 5b0fab508992c2e120971da658ce80027acbc405 Mon Sep 17 00:00:00 2001 +From: Jeffle Xu +Date: Mon, 8 Feb 2021 22:34:36 -0500 +Subject: dm table: fix DAX iterate_devices based device capability checks + +From: Jeffle Xu + +commit 5b0fab508992c2e120971da658ce80027acbc405 upstream. + +Fix dm_table_supports_dax() and invert logic of both +iterate_devices_callout_fn so that all devices' DAX capabilities are +properly checked. + +Fixes: 545ed20e6df6 ("dm: add infrastructure for DAX support") +Cc: stable@vger.kernel.org +Signed-off-by: Jeffle Xu +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-table.c | 37 ++++++++++--------------------------- + drivers/md/dm.c | 2 +- + drivers/md/dm.h | 2 +- + 3 files changed, 12 insertions(+), 29 deletions(-) + +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -827,24 +827,24 @@ void dm_table_set_type(struct dm_table * + EXPORT_SYMBOL_GPL(dm_table_set_type); + + /* validate the dax capability of the target device span */ +-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, ++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) + { + int blocksize = *(int *) data, id; + bool rc; + + id = dax_read_lock(); +- rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); ++ rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); + dax_read_unlock(id); + + return rc; + } + + /* Check devices support synchronous DAX */ +-static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev, +- sector_t start, sector_t len, void *data) ++static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev, ++ sector_t start, sector_t len, void *data) + { +- return dev->dax_dev && dax_synchronous(dev->dax_dev); ++ return !dev->dax_dev || !dax_synchronous(dev->dax_dev); + } + + bool dm_table_supports_dax(struct dm_table *t, +@@ -861,7 +861,7 @@ bool dm_table_supports_dax(struct dm_tab + return false; + + if (!ti->type->iterate_devices || +- !ti->type->iterate_devices(ti, iterate_fn, blocksize)) ++ ti->type->iterate_devices(ti, iterate_fn, blocksize)) + return false; + } + +@@ -932,7 +932,7 @@ static int dm_table_determine_type(struc + verify_bio_based: + /* We must use this table as bio-based */ + t->type = DM_TYPE_BIO_BASED; +- if (dm_table_supports_dax(t, device_supports_dax, &page_size) || ++ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || + (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { + t->type = DM_TYPE_DAX_BIO_BASED; + } +@@ -1625,23 +1625,6 @@ static int device_dax_write_cache_enable + return false; + } + +-static int dm_table_supports_dax_write_cache(struct dm_table *t) +-{ +- struct dm_target *ti; +- unsigned i; +- +- for (i = 0; i < dm_table_get_num_targets(t); i++) { +- ti = dm_table_get_target(t, i); +- +- if (ti->type->iterate_devices && +- ti->type->iterate_devices(ti, +- device_dax_write_cache_enabled, NULL)) +- return true; +- } +- +- return false; +-} +- + static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) + { +@@ -1846,15 +1829,15 @@ void dm_table_set_restrictions(struct dm + } + blk_queue_write_cache(q, wc, fua); + +- if (dm_table_supports_dax(t, device_supports_dax, &page_size)) { ++ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) { + blk_queue_flag_set(QUEUE_FLAG_DAX, q); +- if (dm_table_supports_dax(t, device_dax_synchronous, NULL)) ++ if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL)) + set_dax_synchronous(t->md->dax_dev); + } + else + blk_queue_flag_clear(QUEUE_FLAG_DAX, q); + +- if (dm_table_supports_dax_write_cache(t)) ++ if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled)) + dax_write_cache(t->md->dax_dev, true); + + /* Ensure that all underlying devices are non-rotational. */ +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1145,7 +1145,7 @@ static bool dm_dax_supported(struct dax_ + if (!map) + goto out; + +- ret = dm_table_supports_dax(map, device_supports_dax, &blocksize); ++ ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize); + + out: + dm_put_live_table(md, srcu_idx); +--- a/drivers/md/dm.h ++++ b/drivers/md/dm.h +@@ -73,7 +73,7 @@ void dm_table_free_md_mempools(struct dm + struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); + bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn, + int *blocksize); +-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, ++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data); + + void dm_lock_md_type(struct mapped_device *md); diff --git a/queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch new file mode 100644 index 00000000000..9c7196a171a --- /dev/null +++ b/queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch @@ -0,0 +1,191 @@ +From a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e Mon Sep 17 00:00:00 2001 +From: Jeffle Xu +Date: Tue, 2 Feb 2021 11:35:28 +0800 +Subject: dm table: fix iterate_devices based device capability checks + +From: Jeffle Xu + +commit a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e upstream. + +According to the definition of dm_iterate_devices_fn: + * This function must iterate through each section of device used by the + * target until it encounters a non-zero return code, which it then returns. + * Returns zero if no callout returned non-zero. + +For some target type (e.g. dm-stripe), one call of iterate_devices() may +iterate multiple underlying devices internally, in which case a non-zero +return code returned by iterate_devices_callout_fn will stop the iteration +in advance. No iterate_devices_callout_fn should return non-zero unless +device iteration should stop. + +Rename dm_table_requires_stable_pages() to dm_table_any_dev_attr() and +elevate it for reuse to stop iterating (and return non-zero) on the +first device that causes iterate_devices_callout_fn to return non-zero. +Use dm_table_any_dev_attr() to properly iterate through devices. + +Rename device_is_nonrot() to device_is_rotational() and invert logic +accordingly to fix improper disposition. + +Fixes: c3c4555edd10 ("dm table: clear add_random unless all devices have it set") +Fixes: 4693c9668fdc ("dm table: propagate non rotational flag") +Cc: stable@vger.kernel.org +Signed-off-by: Jeffle Xu +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-table.c | 97 ++++++++++++++++++++++++++------------------------ + 1 file changed, 51 insertions(+), 46 deletions(-) + +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -1302,6 +1302,46 @@ struct dm_target *dm_table_find_target(s + return &t->targets[(KEYS_PER_NODE * n) + k]; + } + ++/* ++ * type->iterate_devices() should be called when the sanity check needs to ++ * iterate and check all underlying data devices. iterate_devices() will ++ * iterate all underlying data devices until it encounters a non-zero return ++ * code, returned by whether the input iterate_devices_callout_fn, or ++ * iterate_devices() itself internally. ++ * ++ * For some target type (e.g. dm-stripe), one call of iterate_devices() may ++ * iterate multiple underlying devices internally, in which case a non-zero ++ * return code returned by iterate_devices_callout_fn will stop the iteration ++ * in advance. ++ * ++ * Cases requiring _any_ underlying device supporting some kind of attribute, ++ * should use the iteration structure like dm_table_any_dev_attr(), or call ++ * it directly. @func should handle semantics of positive examples, e.g. ++ * capable of something. ++ * ++ * Cases requiring _all_ underlying devices supporting some kind of attribute, ++ * should use the iteration structure like dm_table_supports_nowait() or ++ * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that ++ * uses an @anti_func that handle semantics of counter examples, e.g. not ++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func); ++ */ ++static bool dm_table_any_dev_attr(struct dm_table *t, ++ iterate_devices_callout_fn func) ++{ ++ struct dm_target *ti; ++ unsigned int i; ++ ++ for (i = 0; i < dm_table_get_num_targets(t); i++) { ++ ti = dm_table_get_target(t, i); ++ ++ if (ti->type->iterate_devices && ++ ti->type->iterate_devices(ti, func, NULL)) ++ return true; ++ } ++ ++ return false; ++} ++ + static int count_device(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) + { +@@ -1602,12 +1642,12 @@ static int dm_table_supports_dax_write_c + return false; + } + +-static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, +- sector_t start, sector_t len, void *data) ++static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, ++ sector_t start, sector_t len, void *data) + { + struct request_queue *q = bdev_get_queue(dev->bdev); + +- return q && blk_queue_nonrot(q); ++ return q && !blk_queue_nonrot(q); + } + + static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, +@@ -1618,23 +1658,6 @@ static int device_is_not_random(struct d + return q && !blk_queue_add_random(q); + } + +-static bool dm_table_all_devices_attribute(struct dm_table *t, +- iterate_devices_callout_fn func) +-{ +- struct dm_target *ti; +- unsigned i; +- +- for (i = 0; i < dm_table_get_num_targets(t); i++) { +- ti = dm_table_get_target(t, i); +- +- if (!ti->type->iterate_devices || +- !ti->type->iterate_devices(ti, func, NULL)) +- return false; +- } +- +- return true; +-} +- + static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) + { +@@ -1786,27 +1809,6 @@ static int device_requires_stable_pages( + return q && blk_queue_stable_writes(q); + } + +-/* +- * If any underlying device requires stable pages, a table must require +- * them as well. Only targets that support iterate_devices are considered: +- * don't want error, zero, etc to require stable pages. +- */ +-static bool dm_table_requires_stable_pages(struct dm_table *t) +-{ +- struct dm_target *ti; +- unsigned i; +- +- for (i = 0; i < dm_table_get_num_targets(t); i++) { +- ti = dm_table_get_target(t, i); +- +- if (ti->type->iterate_devices && +- ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) +- return true; +- } +- +- return false; +-} +- + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, + struct queue_limits *limits) + { +@@ -1856,10 +1858,10 @@ void dm_table_set_restrictions(struct dm + dax_write_cache(t->md->dax_dev, true); + + /* Ensure that all underlying devices are non-rotational. */ +- if (dm_table_all_devices_attribute(t, device_is_nonrot)) +- blk_queue_flag_set(QUEUE_FLAG_NONROT, q); +- else ++ if (dm_table_any_dev_attr(t, device_is_rotational)) + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); ++ else ++ blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; +@@ -1871,8 +1873,11 @@ void dm_table_set_restrictions(struct dm + /* + * Some devices don't use blk_integrity but still want stable pages + * because they do their own checksumming. ++ * If any underlying device requires stable pages, a table must require ++ * them as well. Only targets that support iterate_devices are considered: ++ * don't want error, zero, etc to require stable pages. + */ +- if (dm_table_requires_stable_pages(t)) ++ if (dm_table_any_dev_attr(t, device_requires_stable_pages)) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); + else + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); +@@ -1883,7 +1888,7 @@ void dm_table_set_restrictions(struct dm + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ +- if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) ++ if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random)) + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); + + /* diff --git a/queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch new file mode 100644 index 00000000000..17adbb8582f --- /dev/null +++ b/queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch @@ -0,0 +1,152 @@ +From 24f6b6036c9eec21191646930ad42808e6180510 Mon Sep 17 00:00:00 2001 +From: Jeffle Xu +Date: Mon, 8 Feb 2021 22:46:38 -0500 +Subject: dm table: fix zoned iterate_devices based device capability checks + +From: Jeffle Xu + +commit 24f6b6036c9eec21191646930ad42808e6180510 upstream. + +Fix dm_table_supports_zoned_model() and invert logic of both +iterate_devices_callout_fn so that all devices' zoned capabilities are +properly checked. + +Add one more parameter to dm_table_any_dev_attr(), which is actually +used as the @data parameter of iterate_devices_callout_fn, so that +dm_table_matches_zone_sectors() can be replaced by +dm_table_any_dev_attr(). + +Fixes: dd88d313bef02 ("dm table: add zoned block devices validation") +Cc: stable@vger.kernel.org +Signed-off-by: Jeffle Xu +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-table.c | 48 ++++++++++++++++-------------------------------- + 1 file changed, 16 insertions(+), 32 deletions(-) + +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -1323,10 +1323,10 @@ struct dm_target *dm_table_find_target(s + * should use the iteration structure like dm_table_supports_nowait() or + * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that + * uses an @anti_func that handle semantics of counter examples, e.g. not +- * capable of something. So: return !dm_table_any_dev_attr(t, anti_func); ++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data); + */ + static bool dm_table_any_dev_attr(struct dm_table *t, +- iterate_devices_callout_fn func) ++ iterate_devices_callout_fn func, void *data) + { + struct dm_target *ti; + unsigned int i; +@@ -1335,7 +1335,7 @@ static bool dm_table_any_dev_attr(struct + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && +- ti->type->iterate_devices(ti, func, NULL)) ++ ti->type->iterate_devices(ti, func, data)) + return true; + } + +@@ -1378,13 +1378,13 @@ bool dm_table_has_no_data_devices(struct + return true; + } + +-static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, +- sector_t start, sector_t len, void *data) ++static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, ++ sector_t start, sector_t len, void *data) + { + struct request_queue *q = bdev_get_queue(dev->bdev); + enum blk_zoned_model *zoned_model = data; + +- return q && blk_queue_zoned_model(q) == *zoned_model; ++ return !q || blk_queue_zoned_model(q) != *zoned_model; + } + + static bool dm_table_supports_zoned_model(struct dm_table *t, +@@ -1401,37 +1401,20 @@ static bool dm_table_supports_zoned_mode + return false; + + if (!ti->type->iterate_devices || +- !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model)) ++ ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model)) + return false; + } + + return true; + } + +-static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, +- sector_t start, sector_t len, void *data) ++static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, ++ sector_t start, sector_t len, void *data) + { + struct request_queue *q = bdev_get_queue(dev->bdev); + unsigned int *zone_sectors = data; + +- return q && blk_queue_zone_sectors(q) == *zone_sectors; +-} +- +-static bool dm_table_matches_zone_sectors(struct dm_table *t, +- unsigned int zone_sectors) +-{ +- struct dm_target *ti; +- unsigned i; +- +- for (i = 0; i < dm_table_get_num_targets(t); i++) { +- ti = dm_table_get_target(t, i); +- +- if (!ti->type->iterate_devices || +- !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors)) +- return false; +- } +- +- return true; ++ return !q || blk_queue_zone_sectors(q) != *zone_sectors; + } + + static int validate_hardware_zoned_model(struct dm_table *table, +@@ -1451,7 +1434,7 @@ static int validate_hardware_zoned_model + if (!zone_sectors || !is_power_of_2(zone_sectors)) + return -EINVAL; + +- if (!dm_table_matches_zone_sectors(table, zone_sectors)) { ++ if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) { + DMERR("%s: zone sectors is not consistent across all devices", + dm_device_name(table->md)); + return -EINVAL; +@@ -1837,11 +1820,11 @@ void dm_table_set_restrictions(struct dm + else + blk_queue_flag_clear(QUEUE_FLAG_DAX, q); + +- if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled)) ++ if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) + dax_write_cache(t->md->dax_dev, true); + + /* Ensure that all underlying devices are non-rotational. */ +- if (dm_table_any_dev_attr(t, device_is_rotational)) ++ if (dm_table_any_dev_attr(t, device_is_rotational, NULL)) + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + else + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); +@@ -1860,7 +1843,7 @@ void dm_table_set_restrictions(struct dm + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. + */ +- if (dm_table_any_dev_attr(t, device_requires_stable_pages)) ++ if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL)) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); + else + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); +@@ -1871,7 +1854,8 @@ void dm_table_set_restrictions(struct dm + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ +- if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random)) ++ if (blk_queue_add_random(q) && ++ dm_table_any_dev_attr(t, device_is_not_random, NULL)) + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); + + /* diff --git a/queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch b/queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch new file mode 100644 index 00000000000..839c607f5fe --- /dev/null +++ b/queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch @@ -0,0 +1,34 @@ +From cb728484a7710c202f02b96aa0962ce9b07aa5c2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sat, 23 Jan 2021 09:19:56 -0500 +Subject: dm writecache: fix performance degradation in ssd mode + +From: Mikulas Patocka + +commit cb728484a7710c202f02b96aa0962ce9b07aa5c2 upstream. + +Fix a thinko in ssd_commit_superblock. region.count is in sectors, not +bytes. This bug doesn't corrupt data, but it causes performance +degradation. + +Signed-off-by: Mikulas Patocka +Fixes: dc8a01ae1dbd ("dm writecache: optimize superblock write") +Cc: stable@vger.kernel.org # v5.7+ +Reported-by: J. Bruce Fields +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-writecache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -523,7 +523,7 @@ static void ssd_commit_superblock(struct + + region.bdev = wc->ssd_dev->bdev; + region.sector = 0; +- region.count = PAGE_SIZE; ++ region.count = PAGE_SIZE >> SECTOR_SHIFT; + + if (unlikely(region.sector + region.count > wc->metadata_sectors)) + region.count = wc->metadata_sectors - region.sector; diff --git a/queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch b/queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch new file mode 100644 index 00000000000..dc6e1f3ff99 --- /dev/null +++ b/queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch @@ -0,0 +1,78 @@ +From 4134455f2aafdfeab50cabb4cccb35e916034b93 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 9 Feb 2021 10:56:20 -0500 +Subject: dm writecache: fix writing beyond end of underlying device when shrinking + +From: Mikulas Patocka + +commit 4134455f2aafdfeab50cabb4cccb35e916034b93 upstream. + +Do not attempt to write any data beyond the end of the underlying data +device while shrinking it. + +The DM writecache device must be suspended when the underlying data +device is shrunk. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-writecache.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -148,6 +148,7 @@ struct dm_writecache { + size_t metadata_sectors; + size_t n_blocks; + uint64_t seq_count; ++ sector_t data_device_sectors; + void *block_start; + struct wc_entry *entries; + unsigned block_size; +@@ -977,6 +978,8 @@ static void writecache_resume(struct dm_ + + wc_lock(wc); + ++ wc->data_device_sectors = i_size_read(wc->dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ + if (WC_MODE_PMEM(wc)) { + persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); + } else { +@@ -1646,6 +1649,10 @@ static bool wc_add_block(struct writebac + void *address = memory_data(wc, e); + + persistent_memory_flush_cache(address, block_size); ++ ++ if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors)) ++ return true; ++ + return bio_add_page(&wb->bio, persistent_memory_page(address), + block_size, persistent_memory_page_offset(address)) != 0; + } +@@ -1717,6 +1724,9 @@ static void __writecache_writeback_pmem( + if (writecache_has_error(wc)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); ++ } else if (unlikely(!bio_sectors(bio))) { ++ bio->bi_status = BLK_STS_OK; ++ bio_endio(bio); + } else { + submit_bio(bio); + } +@@ -1760,6 +1770,14 @@ static void __writecache_writeback_ssd(s + e = f; + } + ++ if (unlikely(to.sector + to.count > wc->data_device_sectors)) { ++ if (to.sector >= wc->data_device_sectors) { ++ writecache_copy_endio(0, 0, c); ++ continue; ++ } ++ from.count = to.count = wc->data_device_sectors - to.sector; ++ } ++ + dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); + + __writeback_throttle(wc, wbl); diff --git a/queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch b/queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch new file mode 100644 index 00000000000..02bf00fec5d --- /dev/null +++ b/queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch @@ -0,0 +1,170 @@ +From 054bee16163df023e2589db09fd27d81f7ad9e72 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 4 Feb 2021 05:20:52 -0500 +Subject: dm writecache: return the exact table values that were set + +From: Mikulas Patocka + +commit 054bee16163df023e2589db09fd27d81f7ad9e72 upstream. + +LVM doesn't like it when the target returns different values from what +was set in the constructor. Fix dm-writecache so that the returned +table values are exactly the same as requested values. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org # v4.18+ +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-writecache.c | 54 +++++++++++++++++++++++++-------------------- + 1 file changed, 30 insertions(+), 24 deletions(-) + +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -159,14 +159,22 @@ struct dm_writecache { + bool overwrote_committed:1; + bool memory_vmapped:1; + ++ bool start_sector_set:1; + bool high_wm_percent_set:1; + bool low_wm_percent_set:1; + bool max_writeback_jobs_set:1; + bool autocommit_blocks_set:1; + bool autocommit_time_set:1; ++ bool max_age_set:1; + bool writeback_fua_set:1; + bool flush_on_suspend:1; + bool cleaner:1; ++ bool cleaner_set:1; ++ ++ unsigned high_wm_percent_value; ++ unsigned low_wm_percent_value; ++ unsigned autocommit_time_value; ++ unsigned max_age_value; + + unsigned writeback_all; + struct workqueue_struct *writeback_wq; +@@ -2205,6 +2213,7 @@ static int writecache_ctr(struct dm_targ + if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) + goto invalid_optional; + wc->start_sector = start_sector; ++ wc->start_sector_set = true; + if (wc->start_sector != start_sector || + wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) + goto invalid_optional; +@@ -2214,6 +2223,7 @@ static int writecache_ctr(struct dm_targ + goto invalid_optional; + if (high_wm_percent < 0 || high_wm_percent > 100) + goto invalid_optional; ++ wc->high_wm_percent_value = high_wm_percent; + wc->high_wm_percent_set = true; + } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { + string = dm_shift_arg(&as), opt_params--; +@@ -2221,6 +2231,7 @@ static int writecache_ctr(struct dm_targ + goto invalid_optional; + if (low_wm_percent < 0 || low_wm_percent > 100) + goto invalid_optional; ++ wc->low_wm_percent_value = low_wm_percent; + wc->low_wm_percent_set = true; + } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { + string = dm_shift_arg(&as), opt_params--; +@@ -2240,6 +2251,7 @@ static int writecache_ctr(struct dm_targ + if (autocommit_msecs > 3600000) + goto invalid_optional; + wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); ++ wc->autocommit_time_value = autocommit_msecs; + wc->autocommit_time_set = true; + } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { + unsigned max_age_msecs; +@@ -2249,7 +2261,10 @@ static int writecache_ctr(struct dm_targ + if (max_age_msecs > 86400000) + goto invalid_optional; + wc->max_age = msecs_to_jiffies(max_age_msecs); ++ wc->max_age_set = true; ++ wc->max_age_value = max_age_msecs; + } else if (!strcasecmp(string, "cleaner")) { ++ wc->cleaner_set = true; + wc->cleaner = true; + } else if (!strcasecmp(string, "fua")) { + if (WC_MODE_PMEM(wc)) { +@@ -2455,7 +2470,6 @@ static void writecache_status(struct dm_ + struct dm_writecache *wc = ti->private; + unsigned extra_args; + unsigned sz = 0; +- uint64_t x; + + switch (type) { + case STATUSTYPE_INFO: +@@ -2467,11 +2481,11 @@ static void writecache_status(struct dm_ + DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', + wc->dev->name, wc->ssd_dev->name, wc->block_size); + extra_args = 0; +- if (wc->start_sector) ++ if (wc->start_sector_set) + extra_args += 2; +- if (wc->high_wm_percent_set && !wc->cleaner) ++ if (wc->high_wm_percent_set) + extra_args += 2; +- if (wc->low_wm_percent_set && !wc->cleaner) ++ if (wc->low_wm_percent_set) + extra_args += 2; + if (wc->max_writeback_jobs_set) + extra_args += 2; +@@ -2479,37 +2493,29 @@ static void writecache_status(struct dm_ + extra_args += 2; + if (wc->autocommit_time_set) + extra_args += 2; +- if (wc->max_age != MAX_AGE_UNSPECIFIED) ++ if (wc->max_age_set) + extra_args += 2; +- if (wc->cleaner) ++ if (wc->cleaner_set) + extra_args++; + if (wc->writeback_fua_set) + extra_args++; + + DMEMIT("%u", extra_args); +- if (wc->start_sector) ++ if (wc->start_sector_set) + DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); +- if (wc->high_wm_percent_set && !wc->cleaner) { +- x = (uint64_t)wc->freelist_high_watermark * 100; +- x += wc->n_blocks / 2; +- do_div(x, (size_t)wc->n_blocks); +- DMEMIT(" high_watermark %u", 100 - (unsigned)x); +- } +- if (wc->low_wm_percent_set && !wc->cleaner) { +- x = (uint64_t)wc->freelist_low_watermark * 100; +- x += wc->n_blocks / 2; +- do_div(x, (size_t)wc->n_blocks); +- DMEMIT(" low_watermark %u", 100 - (unsigned)x); +- } ++ if (wc->high_wm_percent_set) ++ DMEMIT(" high_watermark %u", wc->high_wm_percent_value); ++ if (wc->low_wm_percent_set) ++ DMEMIT(" low_watermark %u", wc->low_wm_percent_value); + if (wc->max_writeback_jobs_set) + DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); + if (wc->autocommit_blocks_set) + DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); + if (wc->autocommit_time_set) +- DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); +- if (wc->max_age != MAX_AGE_UNSPECIFIED) +- DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); +- if (wc->cleaner) ++ DMEMIT(" autocommit_time %u", wc->autocommit_time_value); ++ if (wc->max_age_set) ++ DMEMIT(" max_age %u", wc->max_age_value); ++ if (wc->cleaner_set) + DMEMIT(" cleaner"); + if (wc->writeback_fua_set) + DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); +@@ -2519,7 +2525,7 @@ static void writecache_status(struct dm_ + + static struct target_type writecache_target = { + .name = "writecache", +- .version = {1, 3, 0}, ++ .version = {1, 4, 0}, + .module = THIS_MODULE, + .ctr = writecache_ctr, + .dtr = writecache_dtr, diff --git a/queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch b/queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch new file mode 100644 index 00000000000..478000e8b37 --- /dev/null +++ b/queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch @@ -0,0 +1,77 @@ +From e0fcd01510ad025c9bbce704c5c2579294056141 Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Sat, 26 Dec 2020 18:07:01 +0800 +Subject: f2fs: enforce the immutable flag on open files + +From: Chao Yu + +commit e0fcd01510ad025c9bbce704c5c2579294056141 upstream. + +This patch ports commit 02b016ca7f99 ("ext4: enforce the immutable +flag on open files") to f2fs. + +According to the chattr man page, "a file with the 'i' attribute +cannot be modified..." Historically, this was only enforced when the +file was opened, per the rest of the description, "... and the file +can not be opened in write mode". + +There is general agreement that we should standardize all file systems +to prevent modifications even for files that were opened at the time +the immutable flag is set. Eventually, a change to enforce this at +the VFS layer should be landing in mainline. + +Cc: stable@kernel.org +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/file.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -59,6 +59,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(s + bool need_alloc = true; + int err = 0; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return VM_FAULT_SIGBUS; ++ + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; +@@ -869,6 +872,14 @@ int f2fs_setattr(struct dentry *dentry, + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ if (unlikely(IS_APPEND(inode) && ++ (attr->ia_valid & (ATTR_MODE | ATTR_UID | ++ ATTR_GID | ATTR_TIMES_SET)))) ++ return -EPERM; ++ + if ((attr->ia_valid & ATTR_SIZE) && + !f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; +@@ -4084,6 +4095,11 @@ static ssize_t f2fs_file_write_iter(stru + inode_lock(inode); + } + ++ if (unlikely(IS_IMMUTABLE(inode))) { ++ ret = -EPERM; ++ goto unlock; ++ } ++ + ret = generic_write_checks(iocb, from); + if (ret > 0) { + bool preallocated = false; +@@ -4148,6 +4164,7 @@ write: + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); + } ++unlock: + inode_unlock(inode); + out: + trace_f2fs_file_write_iter(inode, iocb->ki_pos, diff --git a/queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch b/queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch new file mode 100644 index 00000000000..adeca00b731 --- /dev/null +++ b/queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch @@ -0,0 +1,36 @@ +From 2562515f0ad7342bde6456602c491b64c63fe950 Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Wed, 16 Dec 2020 17:15:23 +0800 +Subject: f2fs: fix out-of-repair __setattr_copy() + +From: Chao Yu + +commit 2562515f0ad7342bde6456602c491b64c63fe950 upstream. + +__setattr_copy() was copied from setattr_copy() in fs/attr.c, there is +two missing patches doesn't cover this inner function, fix it. + +Commit 7fa294c8991c ("userns: Allow chown and setgid preservation") +Commit 23adbe12ef7d ("fs,userns: Change inode_capable to capable_wrt_inode_uidgid") + +Fixes: fbfa2cc58d53 ("f2fs: add file operations") +Cc: stable@vger.kernel.org +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/file.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -851,7 +851,8 @@ static void __setattr_copy(struct inode + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + +- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) ++ if (!in_group_p(inode->i_gid) && ++ !capable_wrt_inode_uidgid(inode, CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(inode, mode); + } diff --git a/queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch b/queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch new file mode 100644 index 00000000000..41b3665b564 --- /dev/null +++ b/queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch @@ -0,0 +1,35 @@ +From b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Tue, 26 Jan 2021 17:00:42 -0800 +Subject: f2fs: flush data when enabling checkpoint back + +From: Jaegeuk Kim + +commit b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec upstream. + +During checkpoint=disable period, f2fs bypasses all the synchronous IOs such as +sync and fsync. So, when enabling it back, we must flush all of them in order +to keep the data persistent. Otherwise, suddern power-cut right after enabling +checkpoint will cause data loss. + +Fixes: 4354994f097d ("f2fs: checkpoint disabling") +Cc: stable@vger.kernel.org +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/super.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1764,6 +1764,9 @@ restore_flag: + + static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) + { ++ /* we should flush all the data to keep data consistency */ ++ sync_inodes_sb(sbi->sb); ++ + down_write(&sbi->gc_lock); + f2fs_dirty_to_prefree(sbi); + diff --git a/queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch b/queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch new file mode 100644 index 00000000000..e6ec74ee7bd --- /dev/null +++ b/queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch @@ -0,0 +1,65 @@ +From 78178ca844f0eb88f21f31c7fde969384be4c901 Mon Sep 17 00:00:00 2001 +From: Bob Peterson +Date: Fri, 5 Feb 2021 13:50:41 -0500 +Subject: gfs2: Don't skip dlm unlock if glock has an lvb + +From: Bob Peterson + +commit 78178ca844f0eb88f21f31c7fde969384be4c901 upstream. + +Patch fb6791d100d1 was designed to allow gfs2 to unmount quicker by +skipping the step where it tells dlm to unlock glocks in EX with lvbs. +This was done because when gfs2 unmounts a file system, it destroys the +dlm lockspace shortly after it destroys the glocks so it doesn't need to +unlock them all: the unlock is implied when the lockspace is destroyed +by dlm. + +However, that patch introduced a use-after-free in dlm: as part of its +normal dlm_recoverd process, it can call ls_recovery to recover dead +locks. In so doing, it can call recover_rsbs which calls recover_lvb for +any mastered rsbs. Func recover_lvb runs through the list of lkbs queued +to the given rsb (if the glock is cached but unlocked, it will still be +queued to the lkb, but in NL--Unlocked--mode) and if it has an lvb, +copies it to the rsb, thus trying to preserve the lkb. However, when +gfs2 skips the dlm unlock step, it frees the glock and its lvb, which +means dlm's function recover_lvb references the now freed lvb pointer, +copying the freed lvb memory to the rsb. + +This patch changes the check in gdlm_put_lock so that it calls +dlm_unlock for all glocks that contain an lvb pointer. + +Fixes: fb6791d100d1 ("GFS2: skip dlm_unlock calls in unmount") +Cc: stable@vger.kernel.org # v3.8+ +Signed-off-by: Bob Peterson +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/lock_dlm.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/fs/gfs2/lock_dlm.c ++++ b/fs/gfs2/lock_dlm.c +@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_gl + { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; +- int lvb_needs_unlock = 0; + int error; + + if (gl->gl_lksb.sb_lkid == 0) { +@@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_gl + gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); + gfs2_update_request_times(gl); + +- /* don't want to skip dlm_unlock writing the lvb when lock is ex */ +- +- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) +- lvb_needs_unlock = 1; ++ /* don't want to skip dlm_unlock writing the lvb when lock has one */ + + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && +- !lvb_needs_unlock) { ++ !gl->gl_lksb.sb_lvbptr) { + gfs2_glock_free(gl); + return; + } diff --git a/queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch b/queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch new file mode 100644 index 00000000000..1819a5bd0ee --- /dev/null +++ b/queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch @@ -0,0 +1,75 @@ +From f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 Mon Sep 17 00:00:00 2001 +From: Bob Peterson +Date: Mon, 18 Jan 2021 15:18:59 -0500 +Subject: gfs2: fix glock confusion in function signal_our_withdraw + +From: Bob Peterson + +commit f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 upstream. + +If go_free is defined, function signal_our_withdraw is supposed to +synchronize on the GLF_FREEING flag of the inode glock, but it +accidentally does that on the live glock. Fix that and disambiguate +the glock variables. + +Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish") +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Bob Peterson +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/util.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/fs/gfs2/util.c ++++ b/fs/gfs2/util.c +@@ -93,9 +93,10 @@ out_unlock: + + static void signal_our_withdraw(struct gfs2_sbd *sdp) + { +- struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl; ++ struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; + struct inode *inode = sdp->sd_jdesc->jd_inode; + struct gfs2_inode *ip = GFS2_I(inode); ++ struct gfs2_glock *i_gl = ip->i_gl; + u64 no_formal_ino = ip->i_no_formal_ino; + int ret = 0; + int tries; +@@ -141,7 +142,8 @@ static void signal_our_withdraw(struct g + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + thaw_super(sdp->sd_vfs); + } else { +- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); ++ wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, ++ TASK_UNINTERRUPTIBLE); + } + + /* +@@ -161,15 +163,15 @@ static void signal_our_withdraw(struct g + * on other nodes to be successful, otherwise we remain the owner of + * the glock as far as dlm is concerned. + */ +- if (gl->gl_ops->go_free) { +- set_bit(GLF_FREEING, &gl->gl_flags); +- wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); ++ if (i_gl->gl_ops->go_free) { ++ set_bit(GLF_FREEING, &i_gl->gl_flags); ++ wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); + } + + /* + * Dequeue the "live" glock, but keep a reference so it's never freed. + */ +- gfs2_glock_hold(gl); ++ gfs2_glock_hold(live_gl); + gfs2_glock_dq_wait(&sdp->sd_live_gh); + /* + * We enqueue the "live" glock in EX so that all other nodes +@@ -208,7 +210,7 @@ static void signal_our_withdraw(struct g + gfs2_glock_nq(&sdp->sd_live_gh); + } + +- gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */ ++ gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + + /* diff --git a/queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch b/queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch new file mode 100644 index 00000000000..a36566fc874 --- /dev/null +++ b/queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch @@ -0,0 +1,34 @@ +From 834ec3e1ee65029029225a86c12337a6cd385af7 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Fri, 5 Feb 2021 18:11:28 +0100 +Subject: gfs2: Lock imbalance on error path in gfs2_recover_one + +From: Andreas Gruenbacher + +commit 834ec3e1ee65029029225a86c12337a6cd385af7 upstream. + +In gfs2_recover_one, fix a sd_log_flush_lock imbalance when a recovery +pass fails. + +Fixes: c9ebc4b73799 ("gfs2: allow journal replay to hold sd_log_flush_lock") +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/recovery.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/gfs2/recovery.c ++++ b/fs/gfs2/recovery.c +@@ -514,8 +514,10 @@ void gfs2_recover_func(struct work_struc + error = foreach_descriptor(jd, head.lh_tail, + head.lh_blkno, pass); + lops_after_scan(jd, error, pass); +- if (error) ++ if (error) { ++ up_read(&sdp->sd_log_flush_lock); + goto fail_gunlock_thaw; ++ } + } + + recover_local_statfs(jd, &head); diff --git a/queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch b/queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch new file mode 100644 index 00000000000..5660f60edd2 --- /dev/null +++ b/queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch @@ -0,0 +1,46 @@ +From 7009fa9cd9a5262944b30eb7efb1f0561d074b68 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Tue, 9 Feb 2021 18:32:32 +0100 +Subject: gfs2: Recursive gfs2_quota_hold in gfs2_iomap_end + +From: Andreas Gruenbacher + +commit 7009fa9cd9a5262944b30eb7efb1f0561d074b68 upstream. + +When starting an iomap write, gfs2_quota_lock_check -> gfs2_quota_lock +-> gfs2_quota_hold is called from gfs2_iomap_begin. At the end of the +write, before unlocking the quotas, punch_hole -> gfs2_quota_hold can be +called again in gfs2_iomap_end, which is incorrect and leads to a failed +assertion. Instead, move the call to gfs2_quota_unlock before the call +to punch_hole to fix that. + +Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support") +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/bmap.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/gfs2/bmap.c ++++ b/fs/gfs2/bmap.c +@@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode * + + gfs2_inplace_release(ip); + ++ if (ip->i_qadata && ip->i_qadata->qa_qd_num) ++ gfs2_quota_unlock(ip); ++ + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ + loff_t blockmask = i_blocksize(inode) - 1; +@@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode * + } + } + +- if (ip->i_qadata && ip->i_qadata->qa_qd_num) +- gfs2_quota_unlock(ip); +- + if (unlikely(!written)) + goto out_unlock; + diff --git a/queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch b/queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch new file mode 100644 index 00000000000..2eb7e7dc76f --- /dev/null +++ b/queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch @@ -0,0 +1,34 @@ +From c1f664d2400e73d5ca0fcd067fa5847d2c789c11 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Tue, 9 Feb 2021 15:10:51 +0800 +Subject: irqchip/loongson-pch-msi: Use bitmap_zalloc() to allocate bitmap + +From: Huacai Chen + +commit c1f664d2400e73d5ca0fcd067fa5847d2c789c11 upstream. + +Currently we use bitmap_alloc() to allocate msi bitmap which should be +initialized with zero. This is obviously wrong but it works because msi +can fallback to legacy interrupt mode. So use bitmap_zalloc() instead. + +Fixes: 632dcc2c75ef6de3272aa ("irqchip: Add Loongson PCH MSI controller") +Cc: stable@vger.kernel.org +Signed-off-by: Huacai Chen +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20210209071051.2078435-1-chenhuacai@loongson.cn +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-loongson-pch-msi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-loongson-pch-msi.c ++++ b/drivers/irqchip/irq-loongson-pch-msi.c +@@ -225,7 +225,7 @@ static int pch_msi_init(struct device_no + goto err_priv; + } + +- priv->msi_map = bitmap_alloc(priv->num_irqs, GFP_KERNEL); ++ priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL); + if (!priv->msi_map) { + ret = -ENOMEM; + goto err_priv; diff --git a/queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch b/queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch new file mode 100644 index 00000000000..39695869885 --- /dev/null +++ b/queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch @@ -0,0 +1,53 @@ +From 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 14 Feb 2021 13:21:43 -0700 +Subject: proc: don't allow async path resolution of /proc/thread-self components + +From: Jens Axboe + +commit 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 upstream. + +If this is attempted by an io-wq kthread, then return -EOPNOTSUPP as we +don't currently support that. Once we can get task_pid_ptr() doing the +right thing, then this can go away again. + +Use PF_IO_WORKER for this to speciically target the io_uring workers. +Modify the /proc/self/ check to use PF_IO_WORKER as well. + +Cc: stable@vger.kernel.org +Fixes: 8d4c3e76e3be ("proc: don't allow async path resolution of /proc/self components") +Reported-by: Eric W. Biederman +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/self.c | 2 +- + fs/proc/thread_self.c | 7 +++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/proc/self.c ++++ b/fs/proc/self.c +@@ -20,7 +20,7 @@ static const char *proc_self_get_link(st + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ +- if (current->flags & PF_KTHREAD) ++ if (current->flags & PF_IO_WORKER) + return ERR_PTR(-EOPNOTSUPP); + + if (!tgid) +--- a/fs/proc/thread_self.c ++++ b/fs/proc/thread_self.c +@@ -17,6 +17,13 @@ static const char *proc_thread_self_get_ + pid_t pid = task_pid_nr_ns(current, ns); + char *name; + ++ /* ++ * Not currently supported. Once we can inherit all of struct pid, ++ * we can allow this. ++ */ ++ if (current->flags & PF_IO_WORKER) ++ return ERR_PTR(-EOPNOTSUPP); ++ + if (!pid) + return ERR_PTR(-ENOENT); + name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); diff --git a/queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch b/queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch new file mode 100644 index 00000000000..c68e57b992e --- /dev/null +++ b/queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch @@ -0,0 +1,38 @@ +From b29c5093820d333eef22f58cd04ec0d089059c39 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Tue, 2 Feb 2021 16:45:37 +0100 +Subject: s390/vtime: fix inline assembly clobber list + +From: Heiko Carstens + +commit b29c5093820d333eef22f58cd04ec0d089059c39 upstream. + +The stck/stckf instruction used within the inline assembly within +do_account_vtime() changes the condition code. This is not reflected +with the clobber list, and therefore might result in incorrect code +generation. + +It seems unlikely that the compiler could generate incorrect code +considering the surrounding C code, but it must still be fixed. + +Cc: +Reviewed-by: Christian Borntraeger +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/vtime.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/s390/kernel/vtime.c ++++ b/arch/s390/kernel/vtime.c +@@ -136,7 +136,8 @@ static int do_account_vtime(struct task_ + " stck %1" /* Store current tod clock value */ + #endif + : "=Q" (S390_lowcore.last_update_timer), +- "=Q" (S390_lowcore.last_update_clock)); ++ "=Q" (S390_lowcore.last_update_clock) ++ : : "cc"); + clock = S390_lowcore.last_update_clock - clock; + timer -= S390_lowcore.last_update_timer; + diff --git a/queue-5.10/series b/queue-5.10/series index 791e4b72304..4ab5cb59c22 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -619,3 +619,36 @@ exfat-fix-shift-out-of-bounds-in-exfat_fill_super.patch zonefs-fix-file-size-of-zones-in-full-condition.patch kcmp-support-selection-of-sys_kcmp-without-checkpoint_restore.patch thermal-cpufreq_cooling-freq_qos_update_request-returns-0-on-error.patch +cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch +cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch +cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch +proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch +s390-vtime-fix-inline-assembly-clobber-list.patch +virtio-s390-implement-virtio-ccw-revision-2-correctly.patch +um-mm-check-more-comprehensively-for-stub-changes.patch +um-defer-killing-userspace-on-page-table-update-failures.patch +irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch +f2fs-fix-out-of-repair-__setattr_copy.patch +f2fs-enforce-the-immutable-flag-on-open-files.patch +f2fs-flush-data-when-enabling-checkpoint-back.patch +sparc32-fix-a-user-triggerable-oops-in-clear_user.patch +spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch +spi-spi-synquacer-fix-set_cs-handling.patch +gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch +gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch +gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch +gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch +dm-fix-deadlock-when-swapping-to-encrypted-device.patch +dm-table-fix-iterate_devices-based-device-capability-checks.patch +dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch +dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch +dm-writecache-fix-performance-degradation-in-ssd-mode.patch +dm-writecache-return-the-exact-table-values-that-were-set.patch +dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch +dm-era-recover-committed-writeset-after-crash.patch +dm-era-update-in-core-bitset-after-committing-the-metadata.patch +dm-era-verify-the-data-block-size-hasn-t-changed.patch +dm-era-fix-bitset-memory-leaks.patch +dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch +dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch +dm-era-only-resize-metadata-in-preresume.patch diff --git a/queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch b/queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch new file mode 100644 index 00000000000..780001ea9aa --- /dev/null +++ b/queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch @@ -0,0 +1,50 @@ +From 7780918b36489f0b2f9a3749d7be00c2ceaec513 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 20 Jul 2020 02:21:51 +0100 +Subject: sparc32: fix a user-triggerable oops in clear_user() + +From: Al Viro + +commit 7780918b36489f0b2f9a3749d7be00c2ceaec513 upstream. + +Back in 2.1.29 the clear_user() guts (__bzero()) had been merged +with memset(). Unfortunately, while all exception handlers had been +copied, one of the exception table entries got lost. As the result, +clear_user() starting at 128*n bytes before the end of page and +spanning between 8 and 127 bytes into the next page would oops when +the second page is unmapped. It's trivial to reproduce - all +it takes is + +main() +{ + int fd = open("/dev/zero", O_RDONLY); + char *p = mmap(NULL, 16384, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + munmap(p + 8192, 8192); + read(fd, p + 8192 - 128, 192); +} + +which had been oopsing since March 1997. Says something about +the quality of test coverage... ;-/ And while today sparc32 port +is nearly dead, back in '97 it had been very much alive; in fact, +sparc64 had only been in mainline for 3 months by that point... + +Cc: stable@kernel.org +Fixes: v2.1.29 +Signed-off-by: Al Viro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/memset.S | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/sparc/lib/memset.S ++++ b/arch/sparc/lib/memset.S +@@ -142,6 +142,7 @@ __bzero: + ZERO_LAST_BLOCKS(%o0, 0x48, %g2) + ZERO_LAST_BLOCKS(%o0, 0x08, %g2) + 13: ++ EXT(12b, 13b, 21f) + be 8f + andcc %o1, 4, %g0 + diff --git a/queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch b/queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch new file mode 100644 index 00000000000..1a1ce4d556e --- /dev/null +++ b/queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch @@ -0,0 +1,42 @@ +From 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e Mon Sep 17 00:00:00 2001 +From: Rasmus Villemoes +Date: Sat, 30 Jan 2021 15:35:45 +0100 +Subject: spi: fsl: invert spisel_boot signal on MPC8309 + +From: Rasmus Villemoes + +commit 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e upstream. + +Commit 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH +is not set in spi->mode") broke our MPC8309 board by effectively +inverting the boolean value passed to fsl_spi_cs_control. The +SPISEL_BOOT signal is used as chipselect, but it's not a gpio, so +we cannot rely on gpiolib handling the polarity. + +Adapt to the new world order by inverting the logic here. This does +assume that the slave sitting at the SPISEL_BOOT is active low, but +should that ever turn out not to be the case, one can create a stub +gpiochip driver controlling a single gpio (or rather, a single "spo", +special-purpose output). + +Fixes: 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH is not set in spi->mode") +Cc: stable@vger.kernel.org +Signed-off-by: Rasmus Villemoes +Link: https://lore.kernel.org/r/20210130143545.505613-1-rasmus.villemoes@prevas.dk +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-fsl-spi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/spi/spi-fsl-spi.c ++++ b/drivers/spi/spi-fsl-spi.c +@@ -695,7 +695,7 @@ static void fsl_spi_cs_control(struct sp + + if (WARN_ON_ONCE(!pinfo->immr_spi_cs)) + return; +- iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs); ++ iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs); + } + } + diff --git a/queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch b/queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch new file mode 100644 index 00000000000..5b221304202 --- /dev/null +++ b/queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch @@ -0,0 +1,36 @@ +From 1c9f1750f0305bf605ff22686fc0ac89c06deb28 Mon Sep 17 00:00:00 2001 +From: Masahisa Kojima +Date: Mon, 1 Feb 2021 01:31:09 -0600 +Subject: spi: spi-synquacer: fix set_cs handling + +From: Masahisa Kojima + +commit 1c9f1750f0305bf605ff22686fc0ac89c06deb28 upstream. + +When the slave chip select is deasserted, DMSTOP bit +must be set. + +Fixes: b0823ee35cf9 ("spi: Add spi driver for Socionext SynQuacer platform") +Signed-off-by: Masahisa Kojima +Signed-off-by: Jassi Brar +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20210201073109.9036-1-jassisinghbrar@gmail.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-synquacer.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/spi/spi-synquacer.c ++++ b/drivers/spi/spi-synquacer.c +@@ -490,6 +490,10 @@ static void synquacer_spi_set_cs(struct + val &= ~(SYNQUACER_HSSPI_DMPSEL_CS_MASK << + SYNQUACER_HSSPI_DMPSEL_CS_SHIFT); + val |= spi->chip_select << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT; ++ ++ if (!enable) ++ val |= SYNQUACER_HSSPI_DMSTOP_STOP; ++ + writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART); + } + diff --git a/queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch b/queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch new file mode 100644 index 00000000000..1ae1604569c --- /dev/null +++ b/queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch @@ -0,0 +1,82 @@ +From a7d48886cacf8b426e0079bca9639d2657cf2d38 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Wed, 13 Jan 2021 22:08:03 +0100 +Subject: um: defer killing userspace on page table update failures + +From: Johannes Berg + +commit a7d48886cacf8b426e0079bca9639d2657cf2d38 upstream. + +In some cases we can get to fix_range_common() with mmap_sem held, +and in others we get there without it being held. For example, we +get there with it held from sys_mprotect(), and without it held +from fork_handler(). + +Avoid any issues in this and simply defer killing the task until +it runs the next time. Do it on the mm so that another task that +shares the same mm can't continue running afterwards. + +Cc: stable@vger.kernel.org +Fixes: 468f65976a8d ("um: Fix hung task in fix_range_common()") +Signed-off-by: Johannes Berg +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/include/shared/skas/mm_id.h | 1 + + arch/um/kernel/tlb.c | 7 +++---- + arch/um/os-Linux/skas/process.c | 4 ++++ + 3 files changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/um/include/shared/skas/mm_id.h ++++ b/arch/um/include/shared/skas/mm_id.h +@@ -12,6 +12,7 @@ struct mm_id { + int pid; + } u; + unsigned long stack; ++ int kill; + }; + + #endif +--- a/arch/um/kernel/tlb.c ++++ b/arch/um/kernel/tlb.c +@@ -352,12 +352,11 @@ void fix_range_common(struct mm_struct * + + /* This is not an else because ret is modified above */ + if (ret) { ++ struct mm_id *mm_idp = ¤t->mm->context.id; ++ + printk(KERN_ERR "fix_range_common: failed, killing current " + "process: %d\n", task_tgid_vnr(current)); +- /* We are under mmap_lock, release it such that current can terminate */ +- mmap_write_unlock(current->mm); +- force_sig(SIGKILL); +- do_signal(¤t->thread.regs); ++ mm_idp->kill = 1; + } + } + +--- a/arch/um/os-Linux/skas/process.c ++++ b/arch/um/os-Linux/skas/process.c +@@ -249,6 +249,7 @@ static int userspace_tramp(void *stack) + } + + int userspace_pid[NR_CPUS]; ++int kill_userspace_mm[NR_CPUS]; + + /** + * start_userspace() - prepare a new userspace process +@@ -342,6 +343,8 @@ void userspace(struct uml_pt_regs *regs, + interrupt_end(); + + while (1) { ++ if (kill_userspace_mm[0]) ++ fatal_sigsegv(); + + /* + * This can legitimately fail if the process loads a +@@ -650,4 +653,5 @@ void reboot_skas(void) + void __switch_mm(struct mm_id *mm_idp) + { + userspace_pid[0] = mm_idp->u.pid; ++ kill_userspace_mm[0] = mm_idp->kill; + } diff --git a/queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch b/queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch new file mode 100644 index 00000000000..b1f82d23cda --- /dev/null +++ b/queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch @@ -0,0 +1,71 @@ +From 47da29763ec9a153b9b685bff9db659e4e09e494 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Wed, 13 Jan 2021 22:08:02 +0100 +Subject: um: mm: check more comprehensively for stub changes + +From: Johannes Berg + +commit 47da29763ec9a153b9b685bff9db659e4e09e494 upstream. + +If userspace tries to change the stub, we need to kill it, +because otherwise it can escape the virtual machine. In a +few cases the stub checks weren't good, e.g. if userspace +just tries to + + mmap(0x100000 - 0x1000, 0x3000, ...) + +it could succeed to get a new private/anonymous mapping +replacing the stubs. Fix this by checking everywhere, and +checking for _overlap_, not just direct changes. + +Cc: stable@vger.kernel.org +Fixes: 3963333fe676 ("uml: cover stubs with a VMA") +Signed-off-by: Johannes Berg +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/kernel/tlb.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/arch/um/kernel/tlb.c ++++ b/arch/um/kernel/tlb.c +@@ -125,6 +125,9 @@ static int add_mmap(unsigned long virt, + struct host_vm_op *last; + int fd = -1, ret = 0; + ++ if (virt + len > STUB_START && virt < STUB_END) ++ return -EINVAL; ++ + if (hvc->userspace) + fd = phys_mapping(phys, &offset); + else +@@ -162,7 +165,7 @@ static int add_munmap(unsigned long addr + struct host_vm_op *last; + int ret = 0; + +- if ((addr >= STUB_START) && (addr < STUB_END)) ++ if (addr + len > STUB_START && addr < STUB_END) + return -EINVAL; + + if (hvc->index != 0) { +@@ -192,6 +195,9 @@ static int add_mprotect(unsigned long ad + struct host_vm_op *last; + int ret = 0; + ++ if (addr + len > STUB_START && addr < STUB_END) ++ return -EINVAL; ++ + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MPROTECT) && +@@ -472,6 +478,10 @@ void flush_tlb_page(struct vm_area_struc + struct mm_id *mm_id; + + address &= PAGE_MASK; ++ ++ if (address >= STUB_START && address < STUB_END) ++ goto kill; ++ + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto kill; diff --git a/queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch b/queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch new file mode 100644 index 00000000000..d2c2e212f69 --- /dev/null +++ b/queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch @@ -0,0 +1,59 @@ +From 182f709c5cff683e6732d04c78e328de0532284f Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 16 Feb 2021 12:06:45 +0100 +Subject: virtio/s390: implement virtio-ccw revision 2 correctly + +From: Cornelia Huck + +commit 182f709c5cff683e6732d04c78e328de0532284f upstream. + +CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw, +and drivers should only rely on it being implemented when they +negotiated at least that revision with the device. + +However, virtio_ccw_get_status() issued READ_STATUS for any +device operating at least at revision 1. If the device accepts +READ_STATUS regardless of the negotiated revision (which some +implementations like QEMU do, even though the spec currently does +not allow it), everything works as intended. While a device +rejecting the command should also be handled gracefully, we will +not be able to see any changes the device makes to the status, +such as setting NEEDS_RESET or setting the status to zero after +a completed reset. + +We negotiated the revision to at most 1, as we never bumped the +maximum revision; let's do that now and properly send READ_STATUS +only if we are operating at least at revision 2. + +Cc: stable@vger.kernel.org +Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw") +Reviewed-by: Halil Pasic +Signed-off-by: Cornelia Huck +Signed-off-by: Vasily Gorbik +Link: https://lore.kernel.org/r/20210216110645.1087321-1-cohuck@redhat.com +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/virtio/virtio_ccw.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/s390/virtio/virtio_ccw.c ++++ b/drivers/s390/virtio/virtio_ccw.c +@@ -117,7 +117,7 @@ struct virtio_rev_info { + }; + + /* the highest virtio-ccw revision we support */ +-#define VIRTIO_CCW_REV_MAX 1 ++#define VIRTIO_CCW_REV_MAX 2 + + struct virtio_ccw_vq_info { + struct virtqueue *vq; +@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct v + u8 old_status = vcdev->dma_area->status; + struct ccw1 *ccw; + +- if (vcdev->revision < 1) ++ if (vcdev->revision < 2) + return vcdev->dma_area->status; + + ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));