]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Mar 2021 13:55:49 +0000 (14:55 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Mar 2021 13:55:49 +0000 (14:55 +0100)
added patches:
cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch
cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch
cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch
dm-era-fix-bitset-memory-leaks.patch
dm-era-only-resize-metadata-in-preresume.patch
dm-era-recover-committed-writeset-after-crash.patch
dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch
dm-era-update-in-core-bitset-after-committing-the-metadata.patch
dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch
dm-era-verify-the-data-block-size-hasn-t-changed.patch
dm-fix-deadlock-when-swapping-to-encrypted-device.patch
dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch
dm-table-fix-iterate_devices-based-device-capability-checks.patch
dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch
dm-writecache-fix-performance-degradation-in-ssd-mode.patch
dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch
dm-writecache-return-the-exact-table-values-that-were-set.patch
f2fs-enforce-the-immutable-flag-on-open-files.patch
f2fs-fix-out-of-repair-__setattr_copy.patch
f2fs-flush-data-when-enabling-checkpoint-back.patch
gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch
gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch
gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch
gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch
irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch
proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch
s390-vtime-fix-inline-assembly-clobber-list.patch
sparc32-fix-a-user-triggerable-oops-in-clear_user.patch
spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch
spi-spi-synquacer-fix-set_cs-handling.patch
um-defer-killing-userspace-on-page-table-update-failures.patch
um-mm-check-more-comprehensively-for-stub-changes.patch
virtio-s390-implement-virtio-ccw-revision-2-correctly.patch

34 files changed:
queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch [new file with mode: 0644]
queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch [new file with mode: 0644]
queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch [new file with mode: 0644]
queue-5.10/dm-era-fix-bitset-memory-leaks.patch [new file with mode: 0644]
queue-5.10/dm-era-only-resize-metadata-in-preresume.patch [new file with mode: 0644]
queue-5.10/dm-era-recover-committed-writeset-after-crash.patch [new file with mode: 0644]
queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch [new file with mode: 0644]
queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch [new file with mode: 0644]
queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch [new file with mode: 0644]
queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch [new file with mode: 0644]
queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch [new file with mode: 0644]
queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch [new file with mode: 0644]
queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch [new file with mode: 0644]
queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch [new file with mode: 0644]
queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch [new file with mode: 0644]
queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch [new file with mode: 0644]
queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch [new file with mode: 0644]
queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch [new file with mode: 0644]
queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch [new file with mode: 0644]
queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch [new file with mode: 0644]
queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch [new file with mode: 0644]
queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch [new file with mode: 0644]
queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch [new file with mode: 0644]
queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch [new file with mode: 0644]
queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch [new file with mode: 0644]
queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch [new file with mode: 0644]
queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch [new file with mode: 0644]
queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch [new file with mode: 0644]
queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch [new file with mode: 0644]
queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch [new file with mode: 0644]
queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch [new file with mode: 0644]
queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch [new file with mode: 0644]

diff --git a/queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch b/queue-5.10/cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch
new file mode 100644 (file)
index 0000000..0396af3
--- /dev/null
@@ -0,0 +1,86 @@
+From a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Thu, 7 Jan 2021 19:43:30 +0100
+Subject: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 upstream.
+
+All of the callers of intel_pstate_get_hwp_max() access the struct
+cpudata object that corresponds to the given CPU already and the
+function itself needs to access that object (in order to update
+hwp_cap_cached), so modify the code to pass a struct cpudata pointer
+to it instead of the CPU number.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Tested-by: Chen Yu <yu.c.chen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -829,13 +829,13 @@ static struct freq_attr *hwp_cpufreq_att
+       NULL,
+ };
+-static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
++static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max,
+                                    int *current_max)
+ {
+       u64 cap;
+-      rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+-      WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
++      rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
++      WRITE_ONCE(cpu->hwp_cap_cached, cap);
+       if (global.no_turbo || global.turbo_disabled)
+               *current_max = HWP_GUARANTEED_PERF(cap);
+       else
+@@ -1223,7 +1223,7 @@ static void update_qos_request(enum freq
+                       continue;
+               if (hwp_active)
+-                      intel_pstate_get_hwp_max(i, &turbo_max, &max_state);
++                      intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+               else
+                       turbo_max = cpu->pstate.turbo_pstate;
+@@ -1733,7 +1733,7 @@ static void intel_pstate_get_cpu_pstates
+       if (hwp_active && !hwp_mode_bdw) {
+               unsigned int phy_max, current_max;
+-              intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
++              intel_pstate_get_hwp_max(cpu, &phy_max, &current_max);
+               cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+               cpu->pstate.turbo_pstate = phy_max;
+       } else {
+@@ -2217,7 +2217,7 @@ static void intel_pstate_update_perf_lim
+        * rather than pure ratios.
+        */
+       if (hwp_active) {
+-              intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
++              intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+       } else {
+               max_state = global.no_turbo || global.turbo_disabled ?
+                       cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+@@ -2332,7 +2332,7 @@ static void intel_pstate_verify_cpu_poli
+       if (hwp_active) {
+               int max_state, turbo_max;
+-              intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
++              intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+               max_freq = max_state * cpu->pstate.scaling;
+       } else {
+               max_freq = intel_pstate_get_max_freq(cpu);
+@@ -2675,7 +2675,7 @@ static int intel_cpufreq_cpu_init(struct
+       if (hwp_active) {
+               u64 value;
+-              intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state);
++              intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+               policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
+               rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
+               WRITE_ONCE(cpu->hwp_req_cached, value);
diff --git a/queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch b/queue-5.10/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch
new file mode 100644 (file)
index 0000000..e88d29f
--- /dev/null
@@ -0,0 +1,58 @@
+From 6f67e060083a84a4cc364eab6ae40c717165fb0c Mon Sep 17 00:00:00 2001
+From: Chen Yu <yu.c.chen@intel.com>
+Date: Tue, 12 Jan 2021 13:21:27 +0800
+Subject: cpufreq: intel_pstate: Get per-CPU max freq via MSR_HWP_CAPABILITIES if available
+
+From: Chen Yu <yu.c.chen@intel.com>
+
+commit 6f67e060083a84a4cc364eab6ae40c717165fb0c upstream.
+
+Currently, when turbo is disabled (either by BIOS or by the user),
+the intel_pstate driver reads the max non-turbo frequency from the
+package-wide MSR_PLATFORM_INFO(0xce) register.
+
+However, on asymmetric platforms it is possible in theory that small
+and big core with HWP enabled might have different max non-turbo CPU
+frequency, because MSR_HWP_CAPABILITIES is per-CPU scope according
+to Intel Software Developer Manual.
+
+The turbo max freq is already per-CPU in current code, so make
+similar change to the max non-turbo frequency as well.
+
+Reported-by: Wendy Wang <wendy.wang@intel.com>
+Signed-off-by: Chen Yu <yu.c.chen@intel.com>
+[ rjw: Subject and changelog edits ]
+Cc: 4.18+ <stable@vger.kernel.org> # 4.18+: a45ee4d4e13b: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -1724,11 +1724,9 @@ static void intel_pstate_max_within_limi
+ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+ {
+       cpu->pstate.min_pstate = pstate_funcs.get_min();
+-      cpu->pstate.max_pstate = pstate_funcs.get_max();
+       cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
+       cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
+       cpu->pstate.scaling = pstate_funcs.get_scaling();
+-      cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+       if (hwp_active && !hwp_mode_bdw) {
+               unsigned int phy_max, current_max;
+@@ -1736,9 +1734,12 @@ static void intel_pstate_get_cpu_pstates
+               intel_pstate_get_hwp_max(cpu, &phy_max, &current_max);
+               cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+               cpu->pstate.turbo_pstate = phy_max;
++              cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached));
+       } else {
+               cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
++              cpu->pstate.max_pstate = pstate_funcs.get_max();
+       }
++      cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+       if (pstate_funcs.get_aperf_mperf_shift)
+               cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
diff --git a/queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch b/queue-5.10/cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch
new file mode 100644 (file)
index 0000000..ba5000b
--- /dev/null
@@ -0,0 +1,139 @@
+From 67fc209b527d023db4d087c68e44e9790aa089ef Mon Sep 17 00:00:00 2001
+From: Shawn Guo <shawn.guo@linaro.org>
+Date: Tue, 19 Jan 2021 10:39:25 +0800
+Subject: cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks
+
+From: Shawn Guo <shawn.guo@linaro.org>
+
+commit 67fc209b527d023db4d087c68e44e9790aa089ef upstream.
+
+Commit f17b3e44320b ("cpufreq: qcom-hw: Use
+devm_platform_ioremap_resource() to simplify code") introduces
+a regression on platforms using the driver, by failing to initialise
+a policy, when one is created post hotplug.
+
+When all the CPUs of a policy are hoptplugged out, the call to .exit()
+and later to devm_iounmap() does not release the memory region that was
+requested during devm_platform_ioremap_resource().  Therefore,
+a subsequent call to .init() will result in the following error, which
+will prevent a new policy to be initialised:
+
+[ 3395.915416] CPU4: shutdown
+[ 3395.938185] psci: CPU4 killed (polled 0 ms)
+[ 3399.071424] CPU5: shutdown
+[ 3399.094316] psci: CPU5 killed (polled 0 ms)
+[ 3402.139358] CPU6: shutdown
+[ 3402.161705] psci: CPU6 killed (polled 0 ms)
+[ 3404.742939] CPU7: shutdown
+[ 3404.765592] psci: CPU7 killed (polled 0 ms)
+[ 3411.492274] Detected VIPT I-cache on CPU4
+[ 3411.492337] GICv3: CPU4: found redistributor 400 region 0:0x0000000017ae0000
+[ 3411.492448] CPU4: Booted secondary processor 0x0000000400 [0x516f802d]
+[ 3411.503654] qcom-cpufreq-hw 17d43000.cpufreq: can't request region for resource [mem 0x17d45800-0x17d46bff]
+
+With that being said, the original code was tricky and skipping memory
+region request intentionally to hide this issue.  The true cause is that
+those devm_xxx() device managed functions shouldn't be used for cpufreq
+init/exit hooks, because &pdev->dev is alive across the hooks and will
+not trigger auto resource free-up.  Let's drop the use of device managed
+functions and manually allocate/free resources, so that the issue can be
+fixed properly.
+
+Cc: v5.10+ <stable@vger.kernel.org> # v5.10+
+Fixes: f17b3e44320b ("cpufreq: qcom-hw: Use devm_platform_ioremap_resource() to simplify code")
+Suggested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/qcom-cpufreq-hw.c |   40 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 32 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/qcom-cpufreq-hw.c
++++ b/drivers/cpufreq/qcom-cpufreq-hw.c
+@@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
+ struct qcom_cpufreq_data {
+       void __iomem *base;
++      struct resource *res;
+       const struct qcom_cpufreq_soc_data *soc_data;
+ };
+@@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(stru
+       struct of_phandle_args args;
+       struct device_node *cpu_np;
+       struct device *cpu_dev;
++      struct resource *res;
+       void __iomem *base;
+       struct qcom_cpufreq_data *data;
+       int ret, index;
+@@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(stru
+       index = args.args[0];
+-      base = devm_platform_ioremap_resource(pdev, index);
+-      if (IS_ERR(base))
+-              return PTR_ERR(base);
++      res = platform_get_resource(pdev, IORESOURCE_MEM, index);
++      if (!res) {
++              dev_err(dev, "failed to get mem resource %d\n", index);
++              return -ENODEV;
++      }
++
++      if (!request_mem_region(res->start, resource_size(res), res->name)) {
++              dev_err(dev, "failed to request resource %pR\n", res);
++              return -EBUSY;
++      }
+-      data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
++      base = ioremap(res->start, resource_size(res));
++      if (IS_ERR(base)) {
++              dev_err(dev, "failed to map resource %pR\n", res);
++              ret = PTR_ERR(base);
++              goto release_region;
++      }
++
++      data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data) {
+               ret = -ENOMEM;
+-              goto error;
++              goto unmap_base;
+       }
+       data->soc_data = of_device_get_match_data(&pdev->dev);
+       data->base = base;
++      data->res = res;
+       /* HW should be in enabled state to proceed */
+       if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
+@@ -349,7 +366,11 @@ static int qcom_cpufreq_hw_cpu_init(stru
+       return 0;
+ error:
+-      devm_iounmap(dev, base);
++      kfree(data);
++unmap_base:
++      iounmap(data->base);
++release_region:
++      release_mem_region(res->start, resource_size(res));
+       return ret;
+ }
+@@ -357,12 +378,15 @@ static int qcom_cpufreq_hw_cpu_exit(stru
+ {
+       struct device *cpu_dev = get_cpu_device(policy->cpu);
+       struct qcom_cpufreq_data *data = policy->driver_data;
+-      struct platform_device *pdev = cpufreq_get_driver_data();
++      struct resource *res = data->res;
++      void __iomem *base = data->base;
+       dev_pm_opp_remove_all_dynamic(cpu_dev);
+       dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+       kfree(policy->freq_table);
+-      devm_iounmap(&pdev->dev, data->base);
++      kfree(data);
++      iounmap(base);
++      release_mem_region(res->start, resource_size(res));
+       return 0;
+ }
diff --git a/queue-5.10/dm-era-fix-bitset-memory-leaks.patch b/queue-5.10/dm-era-fix-bitset-memory-leaks.patch
new file mode 100644 (file)
index 0000000..97602b4
--- /dev/null
@@ -0,0 +1,58 @@
+From 904e6b266619c2da5c58b5dce14ae30629e39645 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:54 +0200
+Subject: dm era: Fix bitset memory leaks
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 904e6b266619c2da5c58b5dce14ae30629e39645 upstream.
+
+Deallocate the memory allocated for the in-core bitsets when destroying
+the target and in error paths.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -47,6 +47,7 @@ struct writeset {
+ static void writeset_free(struct writeset *ws)
+ {
+       vfree(ws->bits);
++      ws->bits = NULL;
+ }
+ static int setup_on_disk_bitset(struct dm_disk_bitset *info,
+@@ -811,6 +812,8 @@ static struct era_metadata *metadata_ope
+ static void metadata_close(struct era_metadata *md)
+ {
++      writeset_free(&md->writesets[0]);
++      writeset_free(&md->writesets[1]);
+       destroy_persistent_data_objects(md);
+       kfree(md);
+ }
+@@ -848,6 +851,7 @@ static int metadata_resize(struct era_me
+       r = writeset_alloc(&md->writesets[1], *new_size);
+       if (r) {
+               DMERR("%s: writeset_alloc failed for writeset 1", __func__);
++              writeset_free(&md->writesets[0]);
+               return r;
+       }
+@@ -858,6 +862,8 @@ static int metadata_resize(struct era_me
+                           &value, &md->era_array_root);
+       if (r) {
+               DMERR("%s: dm_array_resize failed", __func__);
++              writeset_free(&md->writesets[0]);
++              writeset_free(&md->writesets[1]);
+               return r;
+       }
diff --git a/queue-5.10/dm-era-only-resize-metadata-in-preresume.patch b/queue-5.10/dm-era-only-resize-metadata-in-preresume.patch
new file mode 100644 (file)
index 0000000..a3d020c
--- /dev/null
@@ -0,0 +1,80 @@
+From cca2c6aebe86f68103a8615074b3578e854b5016 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Thu, 11 Feb 2021 16:22:43 +0200
+Subject: dm era: only resize metadata in preresume
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit cca2c6aebe86f68103a8615074b3578e854b5016 upstream.
+
+Metadata resize shouldn't happen in the ctr. The ctr loads a temporary
+(inactive) table that will only become active upon resume. That is why
+resize should always be done in terms of resume. Otherwise a load (ctr)
+whose inactive table never becomes active will incorrectly resize the
+metadata.
+
+Also, perform the resize directly in preresume, instead of using the
+worker to do it.
+
+The worker might run other metadata operations, e.g., it could start
+digestion, before resizing the metadata. These operations will end up
+using the old size.
+
+This could lead to errors, like:
+
+  device-mapper: era: metadata_digest_transcribe_writeset: dm_array_set_value failed
+  device-mapper: era: process_old_eras: digest step failed, stopping digestion
+
+The reason of the above error is that the worker started the digestion
+of the archived writeset using the old, larger size.
+
+As a result, metadata_digest_transcribe_writeset tried to write beyond
+the end of the era array.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -1501,15 +1501,6 @@ static int era_ctr(struct dm_target *ti,
+       }
+       era->md = md;
+-      era->nr_blocks = calc_nr_blocks(era);
+-
+-      r = metadata_resize(era->md, &era->nr_blocks);
+-      if (r) {
+-              ti->error = "couldn't resize metadata";
+-              era_destroy(era);
+-              return -ENOMEM;
+-      }
+-
+       era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
+       if (!era->wq) {
+               ti->error = "could not create workqueue for metadata object";
+@@ -1584,9 +1575,17 @@ static int era_preresume(struct dm_targe
+       dm_block_t new_size = calc_nr_blocks(era);
+       if (era->nr_blocks != new_size) {
+-              r = in_worker1(era, metadata_resize, &new_size);
+-              if (r)
++              r = metadata_resize(era->md, &new_size);
++              if (r) {
++                      DMERR("%s: metadata_resize failed", __func__);
++                      return r;
++              }
++
++              r = metadata_commit(era->md);
++              if (r) {
++                      DMERR("%s: metadata_commit failed", __func__);
+                       return r;
++              }
+               era->nr_blocks = new_size;
+       }
diff --git a/queue-5.10/dm-era-recover-committed-writeset-after-crash.patch b/queue-5.10/dm-era-recover-committed-writeset-after-crash.patch
new file mode 100644 (file)
index 0000000..33f8c80
--- /dev/null
@@ -0,0 +1,125 @@
+From de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:19:30 +0200
+Subject: dm era: Recover committed writeset after crash
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 upstream.
+
+Following a system crash, dm-era fails to recover the committed writeset
+for the current era, leading to lost writes. That is, we lose the
+information about what blocks were written during the affected era.
+
+dm-era assumes that the writeset of the current era is archived when the
+device is suspended. So, when resuming the device, it just moves on to
+the next era, ignoring the committed writeset.
+
+This assumption holds when the device is properly shut down. But, when
+the system crashes, the code that suspends the target never runs, so the
+writeset for the current era is not archived.
+
+There are three issues that cause the committed writeset to get lost:
+
+1. dm-era doesn't load the committed writeset when opening the metadata
+2. The code that resizes the metadata wipes the information about the
+   committed writeset (assuming it was loaded at step 1)
+3. era_preresume() starts a new era, without taking into account that
+   the current era might not have been archived, due to a system crash.
+
+To fix this:
+
+1. Load the committed writeset when opening the metadata
+2. Fix the code that resizes the metadata to make sure it doesn't wipe
+   the loaded writeset
+3. Fix era_preresume() to check for a loaded writeset and archive it,
+   before starting a new era.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bi
+  */
+ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
+ {
+-      ws->md.nr_bits = nr_blocks;
+-      ws->md.root = INVALID_WRITESET_ROOT;
+       ws->bits = vzalloc(bitset_size(nr_blocks));
+       if (!ws->bits) {
+               DMERR("%s: couldn't allocate in memory bitset", __func__);
+@@ -85,12 +83,14 @@ static int writeset_alloc(struct writese
+ /*
+  * Wipes the in-core bitset, and creates a new on disk bitset.
+  */
+-static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws)
++static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws,
++                       dm_block_t nr_blocks)
+ {
+       int r;
+-      memset(ws->bits, 0, bitset_size(ws->md.nr_bits));
++      memset(ws->bits, 0, bitset_size(nr_blocks));
++      ws->md.nr_bits = nr_blocks;
+       r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
+       if (r) {
+               DMERR("%s: setup_on_disk_bitset failed", __func__);
+@@ -579,6 +579,7 @@ static int open_metadata(struct era_meta
+       md->nr_blocks = le32_to_cpu(disk->nr_blocks);
+       md->current_era = le32_to_cpu(disk->current_era);
++      ws_unpack(&disk->current_writeset, &md->current_writeset->md);
+       md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root);
+       md->era_array_root = le64_to_cpu(disk->era_array_root);
+       md->metadata_snap = le64_to_cpu(disk->metadata_snap);
+@@ -870,7 +871,6 @@ static int metadata_era_archive(struct e
+       }
+       ws_pack(&md->current_writeset->md, &value);
+-      md->current_writeset->md.root = INVALID_WRITESET_ROOT;
+       keys[0] = md->current_era;
+       __dm_bless_for_disk(&value);
+@@ -882,6 +882,7 @@ static int metadata_era_archive(struct e
+               return r;
+       }
++      md->current_writeset->md.root = INVALID_WRITESET_ROOT;
+       md->archived_writesets = true;
+       return 0;
+@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_m
+       int r;
+       struct writeset *new_writeset = next_writeset(md);
+-      r = writeset_init(&md->bitset_info, new_writeset);
++      r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks);
+       if (r) {
+               DMERR("%s: writeset_init failed", __func__);
+               return r;
+@@ -951,7 +952,7 @@ static int metadata_commit(struct era_me
+       int r;
+       struct dm_block *sblock;
+-      if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) {
++      if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) {
+               r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
+                                   &md->current_writeset->md.root);
+               if (r) {
+@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_targe
+       start_worker(era);
+-      r = in_worker0(era, metadata_new_era);
++      r = in_worker0(era, metadata_era_rollover);
+       if (r) {
+               DMERR("%s: metadata_era_rollover failed", __func__);
+               return r;
diff --git a/queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch b/queue-5.10/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch
new file mode 100644 (file)
index 0000000..dfbdda4
--- /dev/null
@@ -0,0 +1,80 @@
+From 2524933307fd0036d5c32357c693c021ab09a0b0 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:22:04 +0200
+Subject: dm era: Reinitialize bitset cache before digesting a new writeset
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 2524933307fd0036d5c32357c693c021ab09a0b0 upstream.
+
+In case of devices with at most 64 blocks, the digestion of consecutive
+eras uses the writeset of the first era as the writeset of all eras to
+digest, leading to lost writes. That is, we lose the information about
+what blocks were written during the affected eras.
+
+The digestion code uses a dm_disk_bitset object to access the archived
+writesets. This structure includes a one word (64-bit) cache to reduce
+the number of array lookups.
+
+This structure is initialized only once, in metadata_digest_start(),
+when we kick off digestion.
+
+But, when we insert a new writeset into the writeset tree, before the
+digestion of the previous writeset is done, or equivalently when there
+are multiple writesets in the writeset tree to digest, then all these
+writesets are digested using the same cache and the cache is not
+re-initialized when moving from one writeset to the next.
+
+For devices with more than 64 blocks, i.e., the size of the cache, the
+cache is indirectly invalidated when we move to a next set of blocks, so
+we avoid the bug.
+
+But for devices with at most 64 blocks we end up using the same cached
+data for digesting all archived writesets, i.e., the cache is loaded
+when digesting the first writeset and it never gets reloaded, until the
+digestion is done.
+
+As a result, the writeset of the first era to digest is used as the
+writeset of all the following archived eras, leading to lost writes.
+
+Fix this by reinitializing the dm_disk_bitset structure, and thus
+invalidating the cache, every time the digestion code starts digesting a
+new writeset.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -756,6 +756,12 @@ static int metadata_digest_lookup_writes
+       ws_unpack(&disk, &d->writeset);
+       d->value = cpu_to_le32(key);
++      /*
++       * We initialise another bitset info to avoid any caching side effects
++       * with the previous one.
++       */
++      dm_disk_bitset_init(md->tm, &d->info);
++
+       d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks);
+       d->current_bit = 0;
+       d->step = metadata_digest_transcribe_writeset;
+@@ -769,12 +775,6 @@ static int metadata_digest_start(struct
+               return 0;
+       memset(d, 0, sizeof(*d));
+-
+-      /*
+-       * We initialise another bitset info to avoid any caching side
+-       * effects with the previous one.
+-       */
+-      dm_disk_bitset_init(md->tm, &d->info);
+       d->step = metadata_digest_lookup_writeset;
+       return 0;
diff --git a/queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch b/queue-5.10/dm-era-update-in-core-bitset-after-committing-the-metadata.patch
new file mode 100644 (file)
index 0000000..238a1f3
--- /dev/null
@@ -0,0 +1,118 @@
+From 2099b145d77c1d53f5711f029c37cc537897cee6 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:19:31 +0200
+Subject: dm era: Update in-core bitset after committing the metadata
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 2099b145d77c1d53f5711f029c37cc537897cee6 upstream.
+
+In case of a system crash, dm-era might fail to mark blocks as written
+in its metadata, although the corresponding writes to these blocks were
+passed down to the origin device and completed successfully.
+
+Consider the following sequence of events:
+
+1. We write to a block that has not been yet written in the current era
+2. era_map() checks the in-core bitmap for the current era and sees
+   that the block is not marked as written.
+3. The write is deferred for submission after the metadata have been
+   updated and committed.
+4. The worker thread processes the deferred write
+   (process_deferred_bios()) and marks the block as written in the
+   in-core bitmap, **before** committing the metadata.
+5. The worker thread starts committing the metadata.
+6. We do more writes that map to the same block as the write of step (1)
+7. era_map() checks the in-core bitmap and sees that the block is marked
+   as written, **although the metadata have not been committed yet**.
+8. These writes are passed down to the origin device immediately and the
+   device reports them as completed.
+9. The system crashes, e.g., power failure, before the commit from step
+   (5) finishes.
+
+When the system recovers and we query the dm-era target for the list of
+written blocks it doesn't report the aforementioned block as written,
+although the writes of step (6) completed successfully.
+
+The issue is that era_map() decides whether to defer or not a write
+based on non committed information. The root cause of the bug is that we
+update the in-core bitmap, **before** committing the metadata.
+
+Fix this by updating the in-core bitmap **after** successfully
+committing the metadata.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -134,7 +134,7 @@ static int writeset_test_and_set(struct
+ {
+       int r;
+-      if (!test_and_set_bit(block, ws->bits)) {
++      if (!test_bit(block, ws->bits)) {
+               r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root);
+               if (r) {
+                       /* FIXME: fail mode */
+@@ -1226,8 +1226,10 @@ static void process_deferred_bios(struct
+       int r;
+       struct bio_list deferred_bios, marked_bios;
+       struct bio *bio;
++      struct blk_plug plug;
+       bool commit_needed = false;
+       bool failed = false;
++      struct writeset *ws = era->md->current_writeset;
+       bio_list_init(&deferred_bios);
+       bio_list_init(&marked_bios);
+@@ -1237,9 +1239,11 @@ static void process_deferred_bios(struct
+       bio_list_init(&era->deferred_bios);
+       spin_unlock(&era->deferred_lock);
++      if (bio_list_empty(&deferred_bios))
++              return;
++
+       while ((bio = bio_list_pop(&deferred_bios))) {
+-              r = writeset_test_and_set(&era->md->bitset_info,
+-                                        era->md->current_writeset,
++              r = writeset_test_and_set(&era->md->bitset_info, ws,
+                                         get_block(era, bio));
+               if (r < 0) {
+                       /*
+@@ -1247,7 +1251,6 @@ static void process_deferred_bios(struct
+                        * FIXME: finish.
+                        */
+                       failed = true;
+-
+               } else if (r == 0)
+                       commit_needed = true;
+@@ -1263,9 +1266,19 @@ static void process_deferred_bios(struct
+       if (failed)
+               while ((bio = bio_list_pop(&marked_bios)))
+                       bio_io_error(bio);
+-      else
+-              while ((bio = bio_list_pop(&marked_bios)))
++      else {
++              blk_start_plug(&plug);
++              while ((bio = bio_list_pop(&marked_bios))) {
++                      /*
++                       * Only update the in-core writeset if the on-disk one
++                       * was updated too.
++                       */
++                      if (commit_needed)
++                              set_bit(get_block(era, bio), ws->bits);
+                       submit_bio_noacct(bio);
++              }
++              blk_finish_plug(&plug);
++      }
+ }
+ static void process_rpc_calls(struct era *era)
diff --git a/queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch b/queue-5.10/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch
new file mode 100644 (file)
index 0000000..12ec7d9
--- /dev/null
@@ -0,0 +1,33 @@
+From 64f2d15afe7b336aafebdcd14cc835ecf856df4b Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:55 +0200
+Subject: dm era: Use correct value size in equality function of writeset tree
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 64f2d15afe7b336aafebdcd14cc835ecf856df4b upstream.
+
+Fix the writeset tree equality test function to use the right value size
+when comparing two btree values.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -389,7 +389,7 @@ static void ws_dec(void *context, const
+ static int ws_eq(void *context, const void *value1, const void *value2)
+ {
+-      return !memcmp(value1, value2, sizeof(struct writeset_metadata));
++      return !memcmp(value1, value2, sizeof(struct writeset_disk));
+ }
+ /*----------------------------------------------------------------*/
diff --git a/queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch b/queue-5.10/dm-era-verify-the-data-block-size-hasn-t-changed.patch
new file mode 100644 (file)
index 0000000..a086475
--- /dev/null
@@ -0,0 +1,49 @@
+From c8e846ff93d5eaa5384f6f325a1687ac5921aade Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:53 +0200
+Subject: dm era: Verify the data block size hasn't changed
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit c8e846ff93d5eaa5384f6f325a1687ac5921aade upstream.
+
+dm-era doesn't support changing the data block size of existing devices,
+so check explicitly that the requested block size for a new target
+matches the one stored in the metadata.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -564,6 +564,15 @@ static int open_metadata(struct era_meta
+       }
+       disk = dm_block_data(sblock);
++
++      /* Verify the data block size hasn't changed */
++      if (le32_to_cpu(disk->data_block_size) != md->block_size) {
++              DMERR("changing the data block size (from %u to %llu) is not supported",
++                    le32_to_cpu(disk->data_block_size), md->block_size);
++              r = -EINVAL;
++              goto bad;
++      }
++
+       r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION,
+                              disk->metadata_space_map_root,
+                              sizeof(disk->metadata_space_map_root),
+@@ -575,7 +584,6 @@ static int open_metadata(struct era_meta
+       setup_infos(md);
+-      md->block_size = le32_to_cpu(disk->data_block_size);
+       md->nr_blocks = le32_to_cpu(disk->nr_blocks);
+       md->current_era = le32_to_cpu(disk->current_era);
diff --git a/queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch b/queue-5.10/dm-fix-deadlock-when-swapping-to-encrypted-device.patch
new file mode 100644 (file)
index 0000000..1f1bf6d
--- /dev/null
@@ -0,0 +1,201 @@
+From a666e5c05e7c4aaabb2c5d58117b0946803d03d2 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Wed, 10 Feb 2021 15:26:23 -0500
+Subject: dm: fix deadlock when swapping to encrypted device
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a666e5c05e7c4aaabb2c5d58117b0946803d03d2 upstream.
+
+The system would deadlock when swapping to a dm-crypt device. The reason
+is that for each incoming write bio, dm-crypt allocates memory that holds
+encrypted data. These excessive allocations exhaust all the memory and the
+result is either deadlock or OOM trigger.
+
+This patch limits the number of in-flight swap bios, so that the memory
+consumed by dm-crypt is limited. The limit is enforced if the target set
+the "limit_swap_bios" variable and if the bio has REQ_SWAP set.
+
+Non-swap bios are not affected becuase taking the semaphore would cause
+performance degradation.
+
+This is similar to request-based drivers - they will also block when the
+number of requests is over the limit.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-core.h          |    4 ++
+ drivers/md/dm-crypt.c         |    1 
+ drivers/md/dm.c               |   60 ++++++++++++++++++++++++++++++++++++++++++
+ include/linux/device-mapper.h |    5 +++
+ 4 files changed, 70 insertions(+)
+
+--- a/drivers/md/dm-core.h
++++ b/drivers/md/dm-core.h
+@@ -109,6 +109,10 @@ struct mapped_device {
+       struct block_device *bdev;
++      int swap_bios;
++      struct semaphore swap_bios_semaphore;
++      struct mutex swap_bios_lock;
++
+       struct dm_stats stats;
+       /* for blk-mq request-based DM support */
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *t
+       wake_up_process(cc->write_thread);
+       ti->num_flush_bios = 1;
++      ti->limit_swap_bios = true;
+       return 0;
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_
+ #define DM_NUMA_NODE NUMA_NO_NODE
+ static int dm_numa_node = DM_NUMA_NODE;
++#define DEFAULT_SWAP_BIOS     (8 * 1048576 / PAGE_SIZE)
++static int swap_bios = DEFAULT_SWAP_BIOS;
++static int get_swap_bios(void)
++{
++      int latch = READ_ONCE(swap_bios);
++      if (unlikely(latch <= 0))
++              latch = DEFAULT_SWAP_BIOS;
++      return latch;
++}
++
+ /*
+  * For mempools pre-allocation at the table loading time.
+  */
+@@ -966,6 +976,11 @@ void disable_write_zeroes(struct mapped_
+       limits->max_write_zeroes_sectors = 0;
+ }
++static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
++{
++      return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
++}
++
+ static void clone_endio(struct bio *bio)
+ {
+       blk_status_t error = bio->bi_status;
+@@ -1016,6 +1031,11 @@ static void clone_endio(struct bio *bio)
+               }
+       }
++      if (unlikely(swap_bios_limit(tio->ti, bio))) {
++              struct mapped_device *md = io->md;
++              up(&md->swap_bios_semaphore);
++      }
++
+       free_tio(tio);
+       dec_pending(io, error);
+ }
+@@ -1249,6 +1269,22 @@ void dm_accept_partial_bio(struct bio *b
+ }
+ EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
++static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
++{
++      mutex_lock(&md->swap_bios_lock);
++      while (latch < md->swap_bios) {
++              cond_resched();
++              down(&md->swap_bios_semaphore);
++              md->swap_bios--;
++      }
++      while (latch > md->swap_bios) {
++              cond_resched();
++              up(&md->swap_bios_semaphore);
++              md->swap_bios++;
++      }
++      mutex_unlock(&md->swap_bios_lock);
++}
++
+ static blk_qc_t __map_bio(struct dm_target_io *tio)
+ {
+       int r;
+@@ -1268,6 +1304,14 @@ static blk_qc_t __map_bio(struct dm_targ
+       atomic_inc(&io->io_count);
+       sector = clone->bi_iter.bi_sector;
++      if (unlikely(swap_bios_limit(ti, clone))) {
++              struct mapped_device *md = io->md;
++              int latch = get_swap_bios();
++              if (unlikely(latch != md->swap_bios))
++                      __set_swap_bios_limit(md, latch);
++              down(&md->swap_bios_semaphore);
++      }
++
+       r = ti->type->map(ti, clone);
+       switch (r) {
+       case DM_MAPIO_SUBMITTED:
+@@ -1279,10 +1323,18 @@ static blk_qc_t __map_bio(struct dm_targ
+               ret = submit_bio_noacct(clone);
+               break;
+       case DM_MAPIO_KILL:
++              if (unlikely(swap_bios_limit(ti, clone))) {
++                      struct mapped_device *md = io->md;
++                      up(&md->swap_bios_semaphore);
++              }
+               free_tio(tio);
+               dec_pending(io, BLK_STS_IOERR);
+               break;
+       case DM_MAPIO_REQUEUE:
++              if (unlikely(swap_bios_limit(ti, clone))) {
++                      struct mapped_device *md = io->md;
++                      up(&md->swap_bios_semaphore);
++              }
+               free_tio(tio);
+               dec_pending(io, BLK_STS_DM_REQUEUE);
+               break;
+@@ -1756,6 +1808,7 @@ static void cleanup_mapped_device(struct
+       mutex_destroy(&md->suspend_lock);
+       mutex_destroy(&md->type_lock);
+       mutex_destroy(&md->table_devices_lock);
++      mutex_destroy(&md->swap_bios_lock);
+       dm_mq_cleanup_mapped_device(md);
+ }
+@@ -1823,6 +1876,10 @@ static struct mapped_device *alloc_dev(i
+       init_waitqueue_head(&md->eventq);
+       init_completion(&md->kobj_holder.completion);
++      md->swap_bios = get_swap_bios();
++      sema_init(&md->swap_bios_semaphore, md->swap_bios);
++      mutex_init(&md->swap_bios_lock);
++
+       md->disk->major = _major;
+       md->disk->first_minor = minor;
+       md->disk->fops = &dm_blk_dops;
+@@ -3119,6 +3176,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios,
+ module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
+ MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
++module_param(swap_bios, int, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
++
+ MODULE_DESCRIPTION(DM_NAME " driver");
+ MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+ MODULE_LICENSE("GPL");
+--- a/include/linux/device-mapper.h
++++ b/include/linux/device-mapper.h
+@@ -325,6 +325,11 @@ struct dm_target {
+        * whether or not its underlying devices have support.
+        */
+       bool discards_supported:1;
++
++      /*
++       * Set if we need to limit the number of in-flight bios when swapping.
++       */
++      bool limit_swap_bios:1;
+ };
+ void *dm_per_bio_data(struct bio *bio, size_t data_size);
diff --git a/queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch
new file mode 100644 (file)
index 0000000..83801b0
--- /dev/null
@@ -0,0 +1,139 @@
+From 5b0fab508992c2e120971da658ce80027acbc405 Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Mon, 8 Feb 2021 22:34:36 -0500
+Subject: dm table: fix DAX iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit 5b0fab508992c2e120971da658ce80027acbc405 upstream.
+
+Fix dm_table_supports_dax() and invert logic of both
+iterate_devices_callout_fn so that all devices' DAX capabilities are
+properly checked.
+
+Fixes: 545ed20e6df6 ("dm: add infrastructure for DAX support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c |   37 ++++++++++---------------------------
+ drivers/md/dm.c       |    2 +-
+ drivers/md/dm.h       |    2 +-
+ 3 files changed, 12 insertions(+), 29 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -827,24 +827,24 @@ void dm_table_set_type(struct dm_table *
+ EXPORT_SYMBOL_GPL(dm_table_set_type);
+ /* validate the dax capability of the target device span */
+-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
+                       sector_t start, sector_t len, void *data)
+ {
+       int blocksize = *(int *) data, id;
+       bool rc;
+       id = dax_read_lock();
+-      rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
++      rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
+       dax_read_unlock(id);
+       return rc;
+ }
+ /* Check devices support synchronous DAX */
+-static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev,
+-                                sector_t start, sector_t len, void *data)
++static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
++                                            sector_t start, sector_t len, void *data)
+ {
+-      return dev->dax_dev && dax_synchronous(dev->dax_dev);
++      return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
+ }
+ bool dm_table_supports_dax(struct dm_table *t,
+@@ -861,7 +861,7 @@ bool dm_table_supports_dax(struct dm_tab
+                       return false;
+               if (!ti->type->iterate_devices ||
+-                  !ti->type->iterate_devices(ti, iterate_fn, blocksize))
++                  ti->type->iterate_devices(ti, iterate_fn, blocksize))
+                       return false;
+       }
+@@ -932,7 +932,7 @@ static int dm_table_determine_type(struc
+ verify_bio_based:
+               /* We must use this table as bio-based */
+               t->type = DM_TYPE_BIO_BASED;
+-              if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
++              if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
+                   (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
+                       t->type = DM_TYPE_DAX_BIO_BASED;
+               }
+@@ -1625,23 +1625,6 @@ static int device_dax_write_cache_enable
+       return false;
+ }
+-static int dm_table_supports_dax_write_cache(struct dm_table *t)
+-{
+-      struct dm_target *ti;
+-      unsigned i;
+-
+-      for (i = 0; i < dm_table_get_num_targets(t); i++) {
+-              ti = dm_table_get_target(t, i);
+-
+-              if (ti->type->iterate_devices &&
+-                  ti->type->iterate_devices(ti,
+-                              device_dax_write_cache_enabled, NULL))
+-                      return true;
+-      }
+-
+-      return false;
+-}
+-
+ static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
+                               sector_t start, sector_t len, void *data)
+ {
+@@ -1846,15 +1829,15 @@ void dm_table_set_restrictions(struct dm
+       }
+       blk_queue_write_cache(q, wc, fua);
+-      if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
++      if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
+               blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+-              if (dm_table_supports_dax(t, device_dax_synchronous, NULL))
++              if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
+                       set_dax_synchronous(t->md->dax_dev);
+       }
+       else
+               blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+-      if (dm_table_supports_dax_write_cache(t))
++      if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled))
+               dax_write_cache(t->md->dax_dev, true);
+       /* Ensure that all underlying devices are non-rotational. */
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1145,7 +1145,7 @@ static bool dm_dax_supported(struct dax_
+       if (!map)
+               goto out;
+-      ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
++      ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
+ out:
+       dm_put_live_table(md, srcu_idx);
+--- a/drivers/md/dm.h
++++ b/drivers/md/dm.h
+@@ -73,7 +73,7 @@ void dm_table_free_md_mempools(struct dm
+ struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
+ bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
+                          int *blocksize);
+-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
+                          sector_t start, sector_t len, void *data);
+ void dm_lock_md_type(struct mapped_device *md);
diff --git a/queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-iterate_devices-based-device-capability-checks.patch
new file mode 100644 (file)
index 0000000..9c7196a
--- /dev/null
@@ -0,0 +1,191 @@
+From a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Tue, 2 Feb 2021 11:35:28 +0800
+Subject: dm table: fix iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e upstream.
+
+According to the definition of dm_iterate_devices_fn:
+ * This function must iterate through each section of device used by the
+ * target until it encounters a non-zero return code, which it then returns.
+ * Returns zero if no callout returned non-zero.
+
+For some target type (e.g. dm-stripe), one call of iterate_devices() may
+iterate multiple underlying devices internally, in which case a non-zero
+return code returned by iterate_devices_callout_fn will stop the iteration
+in advance. No iterate_devices_callout_fn should return non-zero unless
+device iteration should stop.
+
+Rename dm_table_requires_stable_pages() to dm_table_any_dev_attr() and
+elevate it for reuse to stop iterating (and return non-zero) on the
+first device that causes iterate_devices_callout_fn to return non-zero.
+Use dm_table_any_dev_attr() to properly iterate through devices.
+
+Rename device_is_nonrot() to device_is_rotational() and invert logic
+accordingly to fix improper disposition.
+
+Fixes: c3c4555edd10 ("dm table: clear add_random unless all devices have it set")
+Fixes: 4693c9668fdc ("dm table: propagate non rotational flag")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c |   97 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 51 insertions(+), 46 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1302,6 +1302,46 @@ struct dm_target *dm_table_find_target(s
+       return &t->targets[(KEYS_PER_NODE * n) + k];
+ }
++/*
++ * type->iterate_devices() should be called when the sanity check needs to
++ * iterate and check all underlying data devices. iterate_devices() will
++ * iterate all underlying data devices until it encounters a non-zero return
++ * code, returned by whether the input iterate_devices_callout_fn, or
++ * iterate_devices() itself internally.
++ *
++ * For some target type (e.g. dm-stripe), one call of iterate_devices() may
++ * iterate multiple underlying devices internally, in which case a non-zero
++ * return code returned by iterate_devices_callout_fn will stop the iteration
++ * in advance.
++ *
++ * Cases requiring _any_ underlying device supporting some kind of attribute,
++ * should use the iteration structure like dm_table_any_dev_attr(), or call
++ * it directly. @func should handle semantics of positive examples, e.g.
++ * capable of something.
++ *
++ * Cases requiring _all_ underlying devices supporting some kind of attribute,
++ * should use the iteration structure like dm_table_supports_nowait() or
++ * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
++ * uses an @anti_func that handle semantics of counter examples, e.g. not
++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func);
++ */
++static bool dm_table_any_dev_attr(struct dm_table *t,
++                                iterate_devices_callout_fn func)
++{
++      struct dm_target *ti;
++      unsigned int i;
++
++      for (i = 0; i < dm_table_get_num_targets(t); i++) {
++              ti = dm_table_get_target(t, i);
++
++              if (ti->type->iterate_devices &&
++                  ti->type->iterate_devices(ti, func, NULL))
++                      return true;
++        }
++
++      return false;
++}
++
+ static int count_device(struct dm_target *ti, struct dm_dev *dev,
+                       sector_t start, sector_t len, void *data)
+ {
+@@ -1602,12 +1642,12 @@ static int dm_table_supports_dax_write_c
+       return false;
+ }
+-static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
+-                          sector_t start, sector_t len, void *data)
++static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
++                              sector_t start, sector_t len, void *data)
+ {
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+-      return q && blk_queue_nonrot(q);
++      return q && !blk_queue_nonrot(q);
+ }
+ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
+@@ -1618,23 +1658,6 @@ static int device_is_not_random(struct d
+       return q && !blk_queue_add_random(q);
+ }
+-static bool dm_table_all_devices_attribute(struct dm_table *t,
+-                                         iterate_devices_callout_fn func)
+-{
+-      struct dm_target *ti;
+-      unsigned i;
+-
+-      for (i = 0; i < dm_table_get_num_targets(t); i++) {
+-              ti = dm_table_get_target(t, i);
+-
+-              if (!ti->type->iterate_devices ||
+-                  !ti->type->iterate_devices(ti, func, NULL))
+-                      return false;
+-      }
+-
+-      return true;
+-}
+-
+ static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
+                                        sector_t start, sector_t len, void *data)
+ {
+@@ -1786,27 +1809,6 @@ static int device_requires_stable_pages(
+       return q && blk_queue_stable_writes(q);
+ }
+-/*
+- * If any underlying device requires stable pages, a table must require
+- * them as well.  Only targets that support iterate_devices are considered:
+- * don't want error, zero, etc to require stable pages.
+- */
+-static bool dm_table_requires_stable_pages(struct dm_table *t)
+-{
+-      struct dm_target *ti;
+-      unsigned i;
+-
+-      for (i = 0; i < dm_table_get_num_targets(t); i++) {
+-              ti = dm_table_get_target(t, i);
+-
+-              if (ti->type->iterate_devices &&
+-                  ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+-                      return true;
+-      }
+-
+-      return false;
+-}
+-
+ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+                              struct queue_limits *limits)
+ {
+@@ -1856,10 +1858,10 @@ void dm_table_set_restrictions(struct dm
+               dax_write_cache(t->md->dax_dev, true);
+       /* Ensure that all underlying devices are non-rotational. */
+-      if (dm_table_all_devices_attribute(t, device_is_nonrot))
+-              blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+-      else
++      if (dm_table_any_dev_attr(t, device_is_rotational))
+               blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
++      else
++              blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+       if (!dm_table_supports_write_same(t))
+               q->limits.max_write_same_sectors = 0;
+@@ -1871,8 +1873,11 @@ void dm_table_set_restrictions(struct dm
+       /*
+        * Some devices don't use blk_integrity but still want stable pages
+        * because they do their own checksumming.
++       * If any underlying device requires stable pages, a table must require
++       * them as well.  Only targets that support iterate_devices are considered:
++       * don't want error, zero, etc to require stable pages.
+        */
+-      if (dm_table_requires_stable_pages(t))
++      if (dm_table_any_dev_attr(t, device_requires_stable_pages))
+               blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+       else
+               blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+@@ -1883,7 +1888,7 @@ void dm_table_set_restrictions(struct dm
+        * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+        * have it set.
+        */
+-      if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
++      if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random))
+               blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+       /*
diff --git a/queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch b/queue-5.10/dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch
new file mode 100644 (file)
index 0000000..17adbb8
--- /dev/null
@@ -0,0 +1,152 @@
+From 24f6b6036c9eec21191646930ad42808e6180510 Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Mon, 8 Feb 2021 22:46:38 -0500
+Subject: dm table: fix zoned iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit 24f6b6036c9eec21191646930ad42808e6180510 upstream.
+
+Fix dm_table_supports_zoned_model() and invert logic of both
+iterate_devices_callout_fn so that all devices' zoned capabilities are
+properly checked.
+
+Add one more parameter to dm_table_any_dev_attr(), which is actually
+used as the @data parameter of iterate_devices_callout_fn, so that
+dm_table_matches_zone_sectors() can be replaced by
+dm_table_any_dev_attr().
+
+Fixes: dd88d313bef02 ("dm table: add zoned block devices validation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c |   48 ++++++++++++++++--------------------------------
+ 1 file changed, 16 insertions(+), 32 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1323,10 +1323,10 @@ struct dm_target *dm_table_find_target(s
+  * should use the iteration structure like dm_table_supports_nowait() or
+  * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
+  * uses an @anti_func that handle semantics of counter examples, e.g. not
+- * capable of something. So: return !dm_table_any_dev_attr(t, anti_func);
++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
+  */
+ static bool dm_table_any_dev_attr(struct dm_table *t,
+-                                iterate_devices_callout_fn func)
++                                iterate_devices_callout_fn func, void *data)
+ {
+       struct dm_target *ti;
+       unsigned int i;
+@@ -1335,7 +1335,7 @@ static bool dm_table_any_dev_attr(struct
+               ti = dm_table_get_target(t, i);
+               if (ti->type->iterate_devices &&
+-                  ti->type->iterate_devices(ti, func, NULL))
++                  ti->type->iterate_devices(ti, func, data))
+                       return true;
+         }
+@@ -1378,13 +1378,13 @@ bool dm_table_has_no_data_devices(struct
+       return true;
+ }
+-static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
+-                               sector_t start, sector_t len, void *data)
++static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
++                                sector_t start, sector_t len, void *data)
+ {
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+       enum blk_zoned_model *zoned_model = data;
+-      return q && blk_queue_zoned_model(q) == *zoned_model;
++      return !q || blk_queue_zoned_model(q) != *zoned_model;
+ }
+ static bool dm_table_supports_zoned_model(struct dm_table *t,
+@@ -1401,37 +1401,20 @@ static bool dm_table_supports_zoned_mode
+                       return false;
+               if (!ti->type->iterate_devices ||
+-                  !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model))
++                  ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model))
+                       return false;
+       }
+       return true;
+ }
+-static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
+-                                     sector_t start, sector_t len, void *data)
++static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
++                                         sector_t start, sector_t len, void *data)
+ {
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+       unsigned int *zone_sectors = data;
+-      return q && blk_queue_zone_sectors(q) == *zone_sectors;
+-}
+-
+-static bool dm_table_matches_zone_sectors(struct dm_table *t,
+-                                        unsigned int zone_sectors)
+-{
+-      struct dm_target *ti;
+-      unsigned i;
+-
+-      for (i = 0; i < dm_table_get_num_targets(t); i++) {
+-              ti = dm_table_get_target(t, i);
+-
+-              if (!ti->type->iterate_devices ||
+-                  !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors))
+-                      return false;
+-      }
+-
+-      return true;
++      return !q || blk_queue_zone_sectors(q) != *zone_sectors;
+ }
+ static int validate_hardware_zoned_model(struct dm_table *table,
+@@ -1451,7 +1434,7 @@ static int validate_hardware_zoned_model
+       if (!zone_sectors || !is_power_of_2(zone_sectors))
+               return -EINVAL;
+-      if (!dm_table_matches_zone_sectors(table, zone_sectors)) {
++      if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
+               DMERR("%s: zone sectors is not consistent across all devices",
+                     dm_device_name(table->md));
+               return -EINVAL;
+@@ -1837,11 +1820,11 @@ void dm_table_set_restrictions(struct dm
+       else
+               blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+-      if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled))
++      if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
+               dax_write_cache(t->md->dax_dev, true);
+       /* Ensure that all underlying devices are non-rotational. */
+-      if (dm_table_any_dev_attr(t, device_is_rotational))
++      if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
+               blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+       else
+               blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+@@ -1860,7 +1843,7 @@ void dm_table_set_restrictions(struct dm
+        * them as well.  Only targets that support iterate_devices are considered:
+        * don't want error, zero, etc to require stable pages.
+        */
+-      if (dm_table_any_dev_attr(t, device_requires_stable_pages))
++      if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
+               blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+       else
+               blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+@@ -1871,7 +1854,8 @@ void dm_table_set_restrictions(struct dm
+        * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+        * have it set.
+        */
+-      if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random))
++      if (blk_queue_add_random(q) &&
++          dm_table_any_dev_attr(t, device_is_not_random, NULL))
+               blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+       /*
diff --git a/queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch b/queue-5.10/dm-writecache-fix-performance-degradation-in-ssd-mode.patch
new file mode 100644 (file)
index 0000000..839c607
--- /dev/null
@@ -0,0 +1,34 @@
+From cb728484a7710c202f02b96aa0962ce9b07aa5c2 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Sat, 23 Jan 2021 09:19:56 -0500
+Subject: dm writecache: fix performance degradation in ssd mode
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit cb728484a7710c202f02b96aa0962ce9b07aa5c2 upstream.
+
+Fix a thinko in ssd_commit_superblock. region.count is in sectors, not
+bytes. This bug doesn't corrupt data, but it causes performance
+degradation.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Fixes: dc8a01ae1dbd ("dm writecache: optimize superblock write")
+Cc: stable@vger.kernel.org # v5.7+
+Reported-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -523,7 +523,7 @@ static void ssd_commit_superblock(struct
+       region.bdev = wc->ssd_dev->bdev;
+       region.sector = 0;
+-      region.count = PAGE_SIZE;
++      region.count = PAGE_SIZE >> SECTOR_SHIFT;
+       if (unlikely(region.sector + region.count > wc->metadata_sectors))
+               region.count = wc->metadata_sectors - region.sector;
diff --git a/queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch b/queue-5.10/dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch
new file mode 100644 (file)
index 0000000..dc6e1f3
--- /dev/null
@@ -0,0 +1,78 @@
+From 4134455f2aafdfeab50cabb4cccb35e916034b93 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 9 Feb 2021 10:56:20 -0500
+Subject: dm writecache: fix writing beyond end of underlying device when shrinking
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 4134455f2aafdfeab50cabb4cccb35e916034b93 upstream.
+
+Do not attempt to write any data beyond the end of the underlying data
+device while shrinking it.
+
+The DM writecache device must be suspended when the underlying data
+device is shrunk.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -148,6 +148,7 @@ struct dm_writecache {
+       size_t metadata_sectors;
+       size_t n_blocks;
+       uint64_t seq_count;
++      sector_t data_device_sectors;
+       void *block_start;
+       struct wc_entry *entries;
+       unsigned block_size;
+@@ -977,6 +978,8 @@ static void writecache_resume(struct dm_
+       wc_lock(wc);
++      wc->data_device_sectors = i_size_read(wc->dev->bdev->bd_inode) >> SECTOR_SHIFT;
++
+       if (WC_MODE_PMEM(wc)) {
+               persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
+       } else {
+@@ -1646,6 +1649,10 @@ static bool wc_add_block(struct writebac
+       void *address = memory_data(wc, e);
+       persistent_memory_flush_cache(address, block_size);
++
++      if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors))
++              return true;
++
+       return bio_add_page(&wb->bio, persistent_memory_page(address),
+                           block_size, persistent_memory_page_offset(address)) != 0;
+ }
+@@ -1717,6 +1724,9 @@ static void __writecache_writeback_pmem(
+               if (writecache_has_error(wc)) {
+                       bio->bi_status = BLK_STS_IOERR;
+                       bio_endio(bio);
++              } else if (unlikely(!bio_sectors(bio))) {
++                      bio->bi_status = BLK_STS_OK;
++                      bio_endio(bio);
+               } else {
+                       submit_bio(bio);
+               }
+@@ -1760,6 +1770,14 @@ static void __writecache_writeback_ssd(s
+                       e = f;
+               }
++              if (unlikely(to.sector + to.count > wc->data_device_sectors)) {
++                      if (to.sector >= wc->data_device_sectors) {
++                              writecache_copy_endio(0, 0, c);
++                              continue;
++                      }
++                      from.count = to.count = wc->data_device_sectors - to.sector;
++              }
++
+               dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
+               __writeback_throttle(wc, wbl);
diff --git a/queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch b/queue-5.10/dm-writecache-return-the-exact-table-values-that-were-set.patch
new file mode 100644 (file)
index 0000000..02bf00f
--- /dev/null
@@ -0,0 +1,170 @@
+From 054bee16163df023e2589db09fd27d81f7ad9e72 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 4 Feb 2021 05:20:52 -0500
+Subject: dm writecache: return the exact table values that were set
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 054bee16163df023e2589db09fd27d81f7ad9e72 upstream.
+
+LVM doesn't like it when the target returns different values from what
+was set in the constructor. Fix dm-writecache so that the returned
+table values are exactly the same as requested values.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org # v4.18+
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c |   54 +++++++++++++++++++++++++--------------------
+ 1 file changed, 30 insertions(+), 24 deletions(-)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -159,14 +159,22 @@ struct dm_writecache {
+       bool overwrote_committed:1;
+       bool memory_vmapped:1;
++      bool start_sector_set:1;
+       bool high_wm_percent_set:1;
+       bool low_wm_percent_set:1;
+       bool max_writeback_jobs_set:1;
+       bool autocommit_blocks_set:1;
+       bool autocommit_time_set:1;
++      bool max_age_set:1;
+       bool writeback_fua_set:1;
+       bool flush_on_suspend:1;
+       bool cleaner:1;
++      bool cleaner_set:1;
++
++      unsigned high_wm_percent_value;
++      unsigned low_wm_percent_value;
++      unsigned autocommit_time_value;
++      unsigned max_age_value;
+       unsigned writeback_all;
+       struct workqueue_struct *writeback_wq;
+@@ -2205,6 +2213,7 @@ static int writecache_ctr(struct dm_targ
+                       if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
+                               goto invalid_optional;
+                       wc->start_sector = start_sector;
++                      wc->start_sector_set = true;
+                       if (wc->start_sector != start_sector ||
+                           wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT)
+                               goto invalid_optional;
+@@ -2214,6 +2223,7 @@ static int writecache_ctr(struct dm_targ
+                               goto invalid_optional;
+                       if (high_wm_percent < 0 || high_wm_percent > 100)
+                               goto invalid_optional;
++                      wc->high_wm_percent_value = high_wm_percent;
+                       wc->high_wm_percent_set = true;
+               } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
+                       string = dm_shift_arg(&as), opt_params--;
+@@ -2221,6 +2231,7 @@ static int writecache_ctr(struct dm_targ
+                               goto invalid_optional;
+                       if (low_wm_percent < 0 || low_wm_percent > 100)
+                               goto invalid_optional;
++                      wc->low_wm_percent_value = low_wm_percent;
+                       wc->low_wm_percent_set = true;
+               } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
+                       string = dm_shift_arg(&as), opt_params--;
+@@ -2240,6 +2251,7 @@ static int writecache_ctr(struct dm_targ
+                       if (autocommit_msecs > 3600000)
+                               goto invalid_optional;
+                       wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
++                      wc->autocommit_time_value = autocommit_msecs;
+                       wc->autocommit_time_set = true;
+               } else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
+                       unsigned max_age_msecs;
+@@ -2249,7 +2261,10 @@ static int writecache_ctr(struct dm_targ
+                       if (max_age_msecs > 86400000)
+                               goto invalid_optional;
+                       wc->max_age = msecs_to_jiffies(max_age_msecs);
++                      wc->max_age_set = true;
++                      wc->max_age_value = max_age_msecs;
+               } else if (!strcasecmp(string, "cleaner")) {
++                      wc->cleaner_set = true;
+                       wc->cleaner = true;
+               } else if (!strcasecmp(string, "fua")) {
+                       if (WC_MODE_PMEM(wc)) {
+@@ -2455,7 +2470,6 @@ static void writecache_status(struct dm_
+       struct dm_writecache *wc = ti->private;
+       unsigned extra_args;
+       unsigned sz = 0;
+-      uint64_t x;
+       switch (type) {
+       case STATUSTYPE_INFO:
+@@ -2467,11 +2481,11 @@ static void writecache_status(struct dm_
+               DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
+                               wc->dev->name, wc->ssd_dev->name, wc->block_size);
+               extra_args = 0;
+-              if (wc->start_sector)
++              if (wc->start_sector_set)
+                       extra_args += 2;
+-              if (wc->high_wm_percent_set && !wc->cleaner)
++              if (wc->high_wm_percent_set)
+                       extra_args += 2;
+-              if (wc->low_wm_percent_set && !wc->cleaner)
++              if (wc->low_wm_percent_set)
+                       extra_args += 2;
+               if (wc->max_writeback_jobs_set)
+                       extra_args += 2;
+@@ -2479,37 +2493,29 @@ static void writecache_status(struct dm_
+                       extra_args += 2;
+               if (wc->autocommit_time_set)
+                       extra_args += 2;
+-              if (wc->max_age != MAX_AGE_UNSPECIFIED)
++              if (wc->max_age_set)
+                       extra_args += 2;
+-              if (wc->cleaner)
++              if (wc->cleaner_set)
+                       extra_args++;
+               if (wc->writeback_fua_set)
+                       extra_args++;
+               DMEMIT("%u", extra_args);
+-              if (wc->start_sector)
++              if (wc->start_sector_set)
+                       DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
+-              if (wc->high_wm_percent_set && !wc->cleaner) {
+-                      x = (uint64_t)wc->freelist_high_watermark * 100;
+-                      x += wc->n_blocks / 2;
+-                      do_div(x, (size_t)wc->n_blocks);
+-                      DMEMIT(" high_watermark %u", 100 - (unsigned)x);
+-              }
+-              if (wc->low_wm_percent_set && !wc->cleaner) {
+-                      x = (uint64_t)wc->freelist_low_watermark * 100;
+-                      x += wc->n_blocks / 2;
+-                      do_div(x, (size_t)wc->n_blocks);
+-                      DMEMIT(" low_watermark %u", 100 - (unsigned)x);
+-              }
++              if (wc->high_wm_percent_set)
++                      DMEMIT(" high_watermark %u", wc->high_wm_percent_value);
++              if (wc->low_wm_percent_set)
++                      DMEMIT(" low_watermark %u", wc->low_wm_percent_value);
+               if (wc->max_writeback_jobs_set)
+                       DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
+               if (wc->autocommit_blocks_set)
+                       DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
+               if (wc->autocommit_time_set)
+-                      DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+-              if (wc->max_age != MAX_AGE_UNSPECIFIED)
+-                      DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age));
+-              if (wc->cleaner)
++                      DMEMIT(" autocommit_time %u", wc->autocommit_time_value);
++              if (wc->max_age_set)
++                      DMEMIT(" max_age %u", wc->max_age_value);
++              if (wc->cleaner_set)
+                       DMEMIT(" cleaner");
+               if (wc->writeback_fua_set)
+                       DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
+@@ -2519,7 +2525,7 @@ static void writecache_status(struct dm_
+ static struct target_type writecache_target = {
+       .name                   = "writecache",
+-      .version                = {1, 3, 0},
++      .version                = {1, 4, 0},
+       .module                 = THIS_MODULE,
+       .ctr                    = writecache_ctr,
+       .dtr                    = writecache_dtr,
diff --git a/queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch b/queue-5.10/f2fs-enforce-the-immutable-flag-on-open-files.patch
new file mode 100644 (file)
index 0000000..478000e
--- /dev/null
@@ -0,0 +1,77 @@
+From e0fcd01510ad025c9bbce704c5c2579294056141 Mon Sep 17 00:00:00 2001
+From: Chao Yu <yuchao0@huawei.com>
+Date: Sat, 26 Dec 2020 18:07:01 +0800
+Subject: f2fs: enforce the immutable flag on open files
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit e0fcd01510ad025c9bbce704c5c2579294056141 upstream.
+
+This patch ports commit 02b016ca7f99 ("ext4: enforce the immutable
+flag on open files") to f2fs.
+
+According to the chattr man page, "a file with the 'i' attribute
+cannot be modified..."  Historically, this was only enforced when the
+file was opened, per the rest of the description, "... and the file
+can not be opened in write mode".
+
+There is general agreement that we should standardize all file systems
+to prevent modifications even for files that were opened at the time
+the immutable flag is set.  Eventually, a change to enforce this at
+the VFS layer should be landing in mainline.
+
+Cc: stable@kernel.org
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/file.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -59,6 +59,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(s
+       bool need_alloc = true;
+       int err = 0;
++      if (unlikely(IS_IMMUTABLE(inode)))
++              return VM_FAULT_SIGBUS;
++
+       if (unlikely(f2fs_cp_error(sbi))) {
+               err = -EIO;
+               goto err;
+@@ -869,6 +872,14 @@ int f2fs_setattr(struct dentry *dentry,
+       if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+               return -EIO;
++      if (unlikely(IS_IMMUTABLE(inode)))
++              return -EPERM;
++
++      if (unlikely(IS_APPEND(inode) &&
++                      (attr->ia_valid & (ATTR_MODE | ATTR_UID |
++                                ATTR_GID | ATTR_TIMES_SET))))
++              return -EPERM;
++
+       if ((attr->ia_valid & ATTR_SIZE) &&
+               !f2fs_is_compress_backend_ready(inode))
+               return -EOPNOTSUPP;
+@@ -4084,6 +4095,11 @@ static ssize_t f2fs_file_write_iter(stru
+               inode_lock(inode);
+       }
++      if (unlikely(IS_IMMUTABLE(inode))) {
++              ret = -EPERM;
++              goto unlock;
++      }
++
+       ret = generic_write_checks(iocb, from);
+       if (ret > 0) {
+               bool preallocated = false;
+@@ -4148,6 +4164,7 @@ write:
+               if (ret > 0)
+                       f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+       }
++unlock:
+       inode_unlock(inode);
+ out:
+       trace_f2fs_file_write_iter(inode, iocb->ki_pos,
diff --git a/queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch b/queue-5.10/f2fs-fix-out-of-repair-__setattr_copy.patch
new file mode 100644 (file)
index 0000000..adeca00
--- /dev/null
@@ -0,0 +1,36 @@
+From 2562515f0ad7342bde6456602c491b64c63fe950 Mon Sep 17 00:00:00 2001
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 16 Dec 2020 17:15:23 +0800
+Subject: f2fs: fix out-of-repair __setattr_copy()
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 2562515f0ad7342bde6456602c491b64c63fe950 upstream.
+
+__setattr_copy() was copied from setattr_copy() in fs/attr.c, there is
+two missing patches doesn't cover this inner function, fix it.
+
+Commit 7fa294c8991c ("userns: Allow chown and setgid preservation")
+Commit 23adbe12ef7d ("fs,userns: Change inode_capable to capable_wrt_inode_uidgid")
+
+Fixes: fbfa2cc58d53 ("f2fs: add file operations")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/file.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -851,7 +851,8 @@ static void __setattr_copy(struct inode
+       if (ia_valid & ATTR_MODE) {
+               umode_t mode = attr->ia_mode;
+-              if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
++              if (!in_group_p(inode->i_gid) &&
++                      !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+                       mode &= ~S_ISGID;
+               set_acl_inode(inode, mode);
+       }
diff --git a/queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch b/queue-5.10/f2fs-flush-data-when-enabling-checkpoint-back.patch
new file mode 100644 (file)
index 0000000..41b3665
--- /dev/null
@@ -0,0 +1,35 @@
+From b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Tue, 26 Jan 2021 17:00:42 -0800
+Subject: f2fs: flush data when enabling checkpoint back
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec upstream.
+
+During checkpoint=disable period, f2fs bypasses all the synchronous IOs such as
+sync and fsync. So, when enabling it back, we must flush all of them in order
+to keep the data persistent. Otherwise, suddern power-cut right after enabling
+checkpoint will cause data loss.
+
+Fixes: 4354994f097d ("f2fs: checkpoint disabling")
+Cc: stable@vger.kernel.org
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/super.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1764,6 +1764,9 @@ restore_flag:
+ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+ {
++      /* we should flush all the data to keep data consistency */
++      sync_inodes_sb(sbi->sb);
++
+       down_write(&sbi->gc_lock);
+       f2fs_dirty_to_prefree(sbi);
diff --git a/queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch b/queue-5.10/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch
new file mode 100644 (file)
index 0000000..e6ec74e
--- /dev/null
@@ -0,0 +1,65 @@
+From 78178ca844f0eb88f21f31c7fde969384be4c901 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Fri, 5 Feb 2021 13:50:41 -0500
+Subject: gfs2: Don't skip dlm unlock if glock has an lvb
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit 78178ca844f0eb88f21f31c7fde969384be4c901 upstream.
+
+Patch fb6791d100d1 was designed to allow gfs2 to unmount quicker by
+skipping the step where it tells dlm to unlock glocks in EX with lvbs.
+This was done because when gfs2 unmounts a file system, it destroys the
+dlm lockspace shortly after it destroys the glocks so it doesn't need to
+unlock them all: the unlock is implied when the lockspace is destroyed
+by dlm.
+
+However, that patch introduced a use-after-free in dlm: as part of its
+normal dlm_recoverd process, it can call ls_recovery to recover dead
+locks. In so doing, it can call recover_rsbs which calls recover_lvb for
+any mastered rsbs. Func recover_lvb runs through the list of lkbs queued
+to the given rsb (if the glock is cached but unlocked, it will still be
+queued to the lkb, but in NL--Unlocked--mode) and if it has an lvb,
+copies it to the rsb, thus trying to preserve the lkb. However, when
+gfs2 skips the dlm unlock step, it frees the glock and its lvb, which
+means dlm's function recover_lvb references the now freed lvb pointer,
+copying the freed lvb memory to the rsb.
+
+This patch changes the check in gdlm_put_lock so that it calls
+dlm_unlock for all glocks that contain an lvb pointer.
+
+Fixes: fb6791d100d1 ("GFS2: skip dlm_unlock calls in unmount")
+Cc: stable@vger.kernel.org # v3.8+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/lock_dlm.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/fs/gfs2/lock_dlm.c
++++ b/fs/gfs2/lock_dlm.c
+@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_gl
+ {
+       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+       struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+-      int lvb_needs_unlock = 0;
+       int error;
+       if (gl->gl_lksb.sb_lkid == 0) {
+@@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_gl
+       gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
+       gfs2_update_request_times(gl);
+-      /* don't want to skip dlm_unlock writing the lvb when lock is ex */
+-
+-      if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
+-              lvb_needs_unlock = 1;
++      /* don't want to skip dlm_unlock writing the lvb when lock has one */
+       if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
+-          !lvb_needs_unlock) {
++          !gl->gl_lksb.sb_lvbptr) {
+               gfs2_glock_free(gl);
+               return;
+       }
diff --git a/queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch b/queue-5.10/gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch
new file mode 100644 (file)
index 0000000..1819a5b
--- /dev/null
@@ -0,0 +1,75 @@
+From f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Mon, 18 Jan 2021 15:18:59 -0500
+Subject: gfs2: fix glock confusion in function signal_our_withdraw
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 upstream.
+
+If go_free is defined, function signal_our_withdraw is supposed to
+synchronize on the GLF_FREEING flag of the inode glock, but it
+accidentally does that on the live glock. Fix that and disambiguate
+the glock variables.
+
+Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/util.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -93,9 +93,10 @@ out_unlock:
+ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+ {
+-      struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
++      struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
+       struct inode *inode = sdp->sd_jdesc->jd_inode;
+       struct gfs2_inode *ip = GFS2_I(inode);
++      struct gfs2_glock *i_gl = ip->i_gl;
+       u64 no_formal_ino = ip->i_no_formal_ino;
+       int ret = 0;
+       int tries;
+@@ -141,7 +142,8 @@ static void signal_our_withdraw(struct g
+               atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+               thaw_super(sdp->sd_vfs);
+       } else {
+-              wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
++              wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
++                          TASK_UNINTERRUPTIBLE);
+       }
+       /*
+@@ -161,15 +163,15 @@ static void signal_our_withdraw(struct g
+        * on other nodes to be successful, otherwise we remain the owner of
+        * the glock as far as dlm is concerned.
+        */
+-      if (gl->gl_ops->go_free) {
+-              set_bit(GLF_FREEING, &gl->gl_flags);
+-              wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
++      if (i_gl->gl_ops->go_free) {
++              set_bit(GLF_FREEING, &i_gl->gl_flags);
++              wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+       }
+       /*
+        * Dequeue the "live" glock, but keep a reference so it's never freed.
+        */
+-      gfs2_glock_hold(gl);
++      gfs2_glock_hold(live_gl);
+       gfs2_glock_dq_wait(&sdp->sd_live_gh);
+       /*
+        * We enqueue the "live" glock in EX so that all other nodes
+@@ -208,7 +210,7 @@ static void signal_our_withdraw(struct g
+               gfs2_glock_nq(&sdp->sd_live_gh);
+       }
+-      gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
++      gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
+       clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+       /*
diff --git a/queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch b/queue-5.10/gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch
new file mode 100644 (file)
index 0000000..a36566f
--- /dev/null
@@ -0,0 +1,34 @@
+From 834ec3e1ee65029029225a86c12337a6cd385af7 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Fri, 5 Feb 2021 18:11:28 +0100
+Subject: gfs2: Lock imbalance on error path in gfs2_recover_one
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 834ec3e1ee65029029225a86c12337a6cd385af7 upstream.
+
+In gfs2_recover_one, fix a sd_log_flush_lock imbalance when a recovery
+pass fails.
+
+Fixes: c9ebc4b73799 ("gfs2: allow journal replay to hold sd_log_flush_lock")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/recovery.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/gfs2/recovery.c
++++ b/fs/gfs2/recovery.c
+@@ -514,8 +514,10 @@ void gfs2_recover_func(struct work_struc
+                       error = foreach_descriptor(jd, head.lh_tail,
+                                                  head.lh_blkno, pass);
+                       lops_after_scan(jd, error, pass);
+-                      if (error)
++                      if (error) {
++                              up_read(&sdp->sd_log_flush_lock);
+                               goto fail_gunlock_thaw;
++                      }
+               }
+               recover_local_statfs(jd, &head);
diff --git a/queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch b/queue-5.10/gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch
new file mode 100644 (file)
index 0000000..5660f60
--- /dev/null
@@ -0,0 +1,46 @@
+From 7009fa9cd9a5262944b30eb7efb1f0561d074b68 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Tue, 9 Feb 2021 18:32:32 +0100
+Subject: gfs2: Recursive gfs2_quota_hold in gfs2_iomap_end
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 7009fa9cd9a5262944b30eb7efb1f0561d074b68 upstream.
+
+When starting an iomap write, gfs2_quota_lock_check -> gfs2_quota_lock
+-> gfs2_quota_hold is called from gfs2_iomap_begin.  At the end of the
+write, before unlocking the quotas, punch_hole -> gfs2_quota_hold can be
+called again in gfs2_iomap_end, which is incorrect and leads to a failed
+assertion.  Instead, move the call to gfs2_quota_unlock before the call
+to punch_hole to fix that.
+
+Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support")
+Cc: stable@vger.kernel.org # v4.19+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/bmap.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/gfs2/bmap.c
++++ b/fs/gfs2/bmap.c
+@@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode *
+       gfs2_inplace_release(ip);
++      if (ip->i_qadata && ip->i_qadata->qa_qd_num)
++              gfs2_quota_unlock(ip);
++
+       if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+               /* Deallocate blocks that were just allocated. */
+               loff_t blockmask = i_blocksize(inode) - 1;
+@@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode *
+               }
+       }
+-      if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+-              gfs2_quota_unlock(ip);
+-
+       if (unlikely(!written))
+               goto out_unlock;
diff --git a/queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch b/queue-5.10/irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch
new file mode 100644 (file)
index 0000000..2eb7e7d
--- /dev/null
@@ -0,0 +1,34 @@
+From c1f664d2400e73d5ca0fcd067fa5847d2c789c11 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Tue, 9 Feb 2021 15:10:51 +0800
+Subject: irqchip/loongson-pch-msi: Use bitmap_zalloc() to allocate bitmap
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit c1f664d2400e73d5ca0fcd067fa5847d2c789c11 upstream.
+
+Currently we use bitmap_alloc() to allocate msi bitmap which should be
+initialized with zero. This is obviously wrong but it works because msi
+can fallback to legacy interrupt mode. So use bitmap_zalloc() instead.
+
+Fixes: 632dcc2c75ef6de3272aa ("irqchip: Add Loongson PCH MSI controller")
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210209071051.2078435-1-chenhuacai@loongson.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/irqchip/irq-loongson-pch-msi.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/irqchip/irq-loongson-pch-msi.c
++++ b/drivers/irqchip/irq-loongson-pch-msi.c
+@@ -225,7 +225,7 @@ static int pch_msi_init(struct device_no
+               goto err_priv;
+       }
+-      priv->msi_map = bitmap_alloc(priv->num_irqs, GFP_KERNEL);
++      priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL);
+       if (!priv->msi_map) {
+               ret = -ENOMEM;
+               goto err_priv;
diff --git a/queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch b/queue-5.10/proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch
new file mode 100644 (file)
index 0000000..3969586
--- /dev/null
@@ -0,0 +1,53 @@
+From 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 14 Feb 2021 13:21:43 -0700
+Subject: proc: don't allow async path resolution of /proc/thread-self components
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 upstream.
+
+If this is attempted by an io-wq kthread, then return -EOPNOTSUPP as we
+don't currently support that. Once we can get task_pid_ptr() doing the
+right thing, then this can go away again.
+
+Use PF_IO_WORKER for this to speciically target the io_uring workers.
+Modify the /proc/self/ check to use PF_IO_WORKER as well.
+
+Cc: stable@vger.kernel.org
+Fixes: 8d4c3e76e3be ("proc: don't allow async path resolution of /proc/self components")
+Reported-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/proc/self.c        |    2 +-
+ fs/proc/thread_self.c |    7 +++++++
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/proc/self.c
++++ b/fs/proc/self.c
+@@ -20,7 +20,7 @@ static const char *proc_self_get_link(st
+        * Not currently supported. Once we can inherit all of struct pid,
+        * we can allow this.
+        */
+-      if (current->flags & PF_KTHREAD)
++      if (current->flags & PF_IO_WORKER)
+               return ERR_PTR(-EOPNOTSUPP);
+       if (!tgid)
+--- a/fs/proc/thread_self.c
++++ b/fs/proc/thread_self.c
+@@ -17,6 +17,13 @@ static const char *proc_thread_self_get_
+       pid_t pid = task_pid_nr_ns(current, ns);
+       char *name;
++      /*
++       * Not currently supported. Once we can inherit all of struct pid,
++       * we can allow this.
++       */
++      if (current->flags & PF_IO_WORKER)
++              return ERR_PTR(-EOPNOTSUPP);
++
+       if (!pid)
+               return ERR_PTR(-ENOENT);
+       name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
diff --git a/queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch b/queue-5.10/s390-vtime-fix-inline-assembly-clobber-list.patch
new file mode 100644 (file)
index 0000000..c68e57b
--- /dev/null
@@ -0,0 +1,38 @@
+From b29c5093820d333eef22f58cd04ec0d089059c39 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Tue, 2 Feb 2021 16:45:37 +0100
+Subject: s390/vtime: fix inline assembly clobber list
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit b29c5093820d333eef22f58cd04ec0d089059c39 upstream.
+
+The stck/stckf instruction used within the inline assembly within
+do_account_vtime() changes the condition code. This is not reflected
+with the clobber list, and therefore might result in incorrect code
+generation.
+
+It seems unlikely that the compiler could generate incorrect code
+considering the surrounding C code, but it must still be fixed.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/vtime.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/vtime.c
++++ b/arch/s390/kernel/vtime.c
+@@ -136,7 +136,8 @@ static int do_account_vtime(struct task_
+               "       stck    %1"     /* Store current tod clock value */
+ #endif
+               : "=Q" (S390_lowcore.last_update_timer),
+-                "=Q" (S390_lowcore.last_update_clock));
++                "=Q" (S390_lowcore.last_update_clock)
++              : : "cc");
+       clock = S390_lowcore.last_update_clock - clock;
+       timer -= S390_lowcore.last_update_timer;
index 791e4b72304f4433d84c5c2236ed34e78ec229b4..4ab5cb59c2241d83abc7dda296be19945626f18a 100644 (file)
@@ -619,3 +619,36 @@ exfat-fix-shift-out-of-bounds-in-exfat_fill_super.patch
 zonefs-fix-file-size-of-zones-in-full-condition.patch
 kcmp-support-selection-of-sys_kcmp-without-checkpoint_restore.patch
 thermal-cpufreq_cooling-freq_qos_update_request-returns-0-on-error.patch
+cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch
+cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch
+cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch
+proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch
+s390-vtime-fix-inline-assembly-clobber-list.patch
+virtio-s390-implement-virtio-ccw-revision-2-correctly.patch
+um-mm-check-more-comprehensively-for-stub-changes.patch
+um-defer-killing-userspace-on-page-table-update-failures.patch
+irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch
+f2fs-fix-out-of-repair-__setattr_copy.patch
+f2fs-enforce-the-immutable-flag-on-open-files.patch
+f2fs-flush-data-when-enabling-checkpoint-back.patch
+sparc32-fix-a-user-triggerable-oops-in-clear_user.patch
+spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch
+spi-spi-synquacer-fix-set_cs-handling.patch
+gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch
+gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch
+gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch
+gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch
+dm-fix-deadlock-when-swapping-to-encrypted-device.patch
+dm-table-fix-iterate_devices-based-device-capability-checks.patch
+dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch
+dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch
+dm-writecache-fix-performance-degradation-in-ssd-mode.patch
+dm-writecache-return-the-exact-table-values-that-were-set.patch
+dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch
+dm-era-recover-committed-writeset-after-crash.patch
+dm-era-update-in-core-bitset-after-committing-the-metadata.patch
+dm-era-verify-the-data-block-size-hasn-t-changed.patch
+dm-era-fix-bitset-memory-leaks.patch
+dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch
+dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch
+dm-era-only-resize-metadata-in-preresume.patch
diff --git a/queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch b/queue-5.10/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch
new file mode 100644 (file)
index 0000000..780001e
--- /dev/null
@@ -0,0 +1,50 @@
+From 7780918b36489f0b2f9a3749d7be00c2ceaec513 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 20 Jul 2020 02:21:51 +0100
+Subject: sparc32: fix a user-triggerable oops in clear_user()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 7780918b36489f0b2f9a3749d7be00c2ceaec513 upstream.
+
+Back in 2.1.29 the clear_user() guts (__bzero()) had been merged
+with memset().  Unfortunately, while all exception handlers had been
+copied, one of the exception table entries got lost.  As the result,
+clear_user() starting at 128*n bytes before the end of page and
+spanning between 8 and 127 bytes into the next page would oops when
+the second page is unmapped.  It's trivial to reproduce - all
+it takes is
+
+main()
+{
+       int fd = open("/dev/zero", O_RDONLY);
+       char *p = mmap(NULL, 16384, PROT_READ|PROT_WRITE,
+                       MAP_PRIVATE|MAP_ANON, -1, 0);
+       munmap(p + 8192, 8192);
+       read(fd, p + 8192 - 128, 192);
+}
+
+which had been oopsing since March 1997.  Says something about
+the quality of test coverage... ;-/  And while today sparc32 port
+is nearly dead, back in '97 it had been very much alive; in fact,
+sparc64 had only been in mainline for 3 months by that point...
+
+Cc: stable@kernel.org
+Fixes: v2.1.29
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/lib/memset.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/sparc/lib/memset.S
++++ b/arch/sparc/lib/memset.S
+@@ -142,6 +142,7 @@ __bzero:
+       ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
+       ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
+ 13:
++      EXT(12b, 13b, 21f)
+       be      8f
+        andcc  %o1, 4, %g0
diff --git a/queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch b/queue-5.10/spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch
new file mode 100644 (file)
index 0000000..1a1ce4d
--- /dev/null
@@ -0,0 +1,42 @@
+From 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e Mon Sep 17 00:00:00 2001
+From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+Date: Sat, 30 Jan 2021 15:35:45 +0100
+Subject: spi: fsl: invert spisel_boot signal on MPC8309
+
+From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+
+commit 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e upstream.
+
+Commit 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH
+is not set in spi->mode") broke our MPC8309 board by effectively
+inverting the boolean value passed to fsl_spi_cs_control. The
+SPISEL_BOOT signal is used as chipselect, but it's not a gpio, so
+we cannot rely on gpiolib handling the polarity.
+
+Adapt to the new world order by inverting the logic here. This does
+assume that the slave sitting at the SPISEL_BOOT is active low, but
+should that ever turn out not to be the case, one can create a stub
+gpiochip driver controlling a single gpio (or rather, a single "spo",
+special-purpose output).
+
+Fixes: 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH is not set in spi->mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+Link: https://lore.kernel.org/r/20210130143545.505613-1-rasmus.villemoes@prevas.dk
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-fsl-spi.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -695,7 +695,7 @@ static void fsl_spi_cs_control(struct sp
+               if (WARN_ON_ONCE(!pinfo->immr_spi_cs))
+                       return;
+-              iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs);
++              iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs);
+       }
+ }
diff --git a/queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch b/queue-5.10/spi-spi-synquacer-fix-set_cs-handling.patch
new file mode 100644 (file)
index 0000000..5b22130
--- /dev/null
@@ -0,0 +1,36 @@
+From 1c9f1750f0305bf605ff22686fc0ac89c06deb28 Mon Sep 17 00:00:00 2001
+From: Masahisa Kojima <masahisa.kojima@linaro.org>
+Date: Mon, 1 Feb 2021 01:31:09 -0600
+Subject: spi: spi-synquacer: fix set_cs handling
+
+From: Masahisa Kojima <masahisa.kojima@linaro.org>
+
+commit 1c9f1750f0305bf605ff22686fc0ac89c06deb28 upstream.
+
+When the slave chip select is deasserted, DMSTOP bit
+must be set.
+
+Fixes: b0823ee35cf9 ("spi: Add spi driver for Socionext SynQuacer platform")
+Signed-off-by: Masahisa Kojima <masahisa.kojima@linaro.org>
+Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210201073109.9036-1-jassisinghbrar@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-synquacer.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/spi/spi-synquacer.c
++++ b/drivers/spi/spi-synquacer.c
+@@ -490,6 +490,10 @@ static void synquacer_spi_set_cs(struct
+       val &= ~(SYNQUACER_HSSPI_DMPSEL_CS_MASK <<
+                SYNQUACER_HSSPI_DMPSEL_CS_SHIFT);
+       val |= spi->chip_select << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT;
++
++      if (!enable)
++              val |= SYNQUACER_HSSPI_DMSTOP_STOP;
++
+       writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+ }
diff --git a/queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch b/queue-5.10/um-defer-killing-userspace-on-page-table-update-failures.patch
new file mode 100644 (file)
index 0000000..1ae1604
--- /dev/null
@@ -0,0 +1,82 @@
+From a7d48886cacf8b426e0079bca9639d2657cf2d38 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 13 Jan 2021 22:08:03 +0100
+Subject: um: defer killing userspace on page table update failures
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit a7d48886cacf8b426e0079bca9639d2657cf2d38 upstream.
+
+In some cases we can get to fix_range_common() with mmap_sem held,
+and in others we get there without it being held. For example, we
+get there with it held from sys_mprotect(), and without it held
+from fork_handler().
+
+Avoid any issues in this and simply defer killing the task until
+it runs the next time. Do it on the mm so that another task that
+shares the same mm can't continue running afterwards.
+
+Cc: stable@vger.kernel.org
+Fixes: 468f65976a8d ("um: Fix hung task in fix_range_common()")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/include/shared/skas/mm_id.h |    1 +
+ arch/um/kernel/tlb.c                |    7 +++----
+ arch/um/os-Linux/skas/process.c     |    4 ++++
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/arch/um/include/shared/skas/mm_id.h
++++ b/arch/um/include/shared/skas/mm_id.h
+@@ -12,6 +12,7 @@ struct mm_id {
+               int pid;
+       } u;
+       unsigned long stack;
++      int kill;
+ };
+ #endif
+--- a/arch/um/kernel/tlb.c
++++ b/arch/um/kernel/tlb.c
+@@ -352,12 +352,11 @@ void fix_range_common(struct mm_struct *
+       /* This is not an else because ret is modified above */
+       if (ret) {
++              struct mm_id *mm_idp = &current->mm->context.id;
++
+               printk(KERN_ERR "fix_range_common: failed, killing current "
+                      "process: %d\n", task_tgid_vnr(current));
+-              /* We are under mmap_lock, release it such that current can terminate */
+-              mmap_write_unlock(current->mm);
+-              force_sig(SIGKILL);
+-              do_signal(&current->thread.regs);
++              mm_idp->kill = 1;
+       }
+ }
+--- a/arch/um/os-Linux/skas/process.c
++++ b/arch/um/os-Linux/skas/process.c
+@@ -249,6 +249,7 @@ static int userspace_tramp(void *stack)
+ }
+ int userspace_pid[NR_CPUS];
++int kill_userspace_mm[NR_CPUS];
+ /**
+  * start_userspace() - prepare a new userspace process
+@@ -342,6 +343,8 @@ void userspace(struct uml_pt_regs *regs,
+       interrupt_end();
+       while (1) {
++              if (kill_userspace_mm[0])
++                      fatal_sigsegv();
+               /*
+                * This can legitimately fail if the process loads a
+@@ -650,4 +653,5 @@ void reboot_skas(void)
+ void __switch_mm(struct mm_id *mm_idp)
+ {
+       userspace_pid[0] = mm_idp->u.pid;
++      kill_userspace_mm[0] = mm_idp->kill;
+ }
diff --git a/queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch b/queue-5.10/um-mm-check-more-comprehensively-for-stub-changes.patch
new file mode 100644 (file)
index 0000000..b1f82d2
--- /dev/null
@@ -0,0 +1,71 @@
+From 47da29763ec9a153b9b685bff9db659e4e09e494 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 13 Jan 2021 22:08:02 +0100
+Subject: um: mm: check more comprehensively for stub changes
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 47da29763ec9a153b9b685bff9db659e4e09e494 upstream.
+
+If userspace tries to change the stub, we need to kill it,
+because otherwise it can escape the virtual machine. In a
+few cases the stub checks weren't good, e.g. if userspace
+just tries to
+
+       mmap(0x100000 - 0x1000, 0x3000, ...)
+
+it could succeed to get a new private/anonymous mapping
+replacing the stubs. Fix this by checking everywhere, and
+checking for _overlap_, not just direct changes.
+
+Cc: stable@vger.kernel.org
+Fixes: 3963333fe676 ("uml: cover stubs with a VMA")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/kernel/tlb.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/um/kernel/tlb.c
++++ b/arch/um/kernel/tlb.c
+@@ -125,6 +125,9 @@ static int add_mmap(unsigned long virt,
+       struct host_vm_op *last;
+       int fd = -1, ret = 0;
++      if (virt + len > STUB_START && virt < STUB_END)
++              return -EINVAL;
++
+       if (hvc->userspace)
+               fd = phys_mapping(phys, &offset);
+       else
+@@ -162,7 +165,7 @@ static int add_munmap(unsigned long addr
+       struct host_vm_op *last;
+       int ret = 0;
+-      if ((addr >= STUB_START) && (addr < STUB_END))
++      if (addr + len > STUB_START && addr < STUB_END)
+               return -EINVAL;
+       if (hvc->index != 0) {
+@@ -192,6 +195,9 @@ static int add_mprotect(unsigned long ad
+       struct host_vm_op *last;
+       int ret = 0;
++      if (addr + len > STUB_START && addr < STUB_END)
++              return -EINVAL;
++
+       if (hvc->index != 0) {
+               last = &hvc->ops[hvc->index - 1];
+               if ((last->type == MPROTECT) &&
+@@ -472,6 +478,10 @@ void flush_tlb_page(struct vm_area_struc
+       struct mm_id *mm_id;
+       address &= PAGE_MASK;
++
++      if (address >= STUB_START && address < STUB_END)
++              goto kill;
++
+       pgd = pgd_offset(mm, address);
+       if (!pgd_present(*pgd))
+               goto kill;
diff --git a/queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch b/queue-5.10/virtio-s390-implement-virtio-ccw-revision-2-correctly.patch
new file mode 100644 (file)
index 0000000..d2c2e21
--- /dev/null
@@ -0,0 +1,59 @@
+From 182f709c5cff683e6732d04c78e328de0532284f Mon Sep 17 00:00:00 2001
+From: Cornelia Huck <cohuck@redhat.com>
+Date: Tue, 16 Feb 2021 12:06:45 +0100
+Subject: virtio/s390: implement virtio-ccw revision 2 correctly
+
+From: Cornelia Huck <cohuck@redhat.com>
+
+commit 182f709c5cff683e6732d04c78e328de0532284f upstream.
+
+CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw,
+and drivers should only rely on it being implemented when they
+negotiated at least that revision with the device.
+
+However, virtio_ccw_get_status() issued READ_STATUS for any
+device operating at least at revision 1. If the device accepts
+READ_STATUS regardless of the negotiated revision (which some
+implementations like QEMU do, even though the spec currently does
+not allow it), everything works as intended. While a device
+rejecting the command should also be handled gracefully, we will
+not be able to see any changes the device makes to the status,
+such as setting NEEDS_RESET or setting the status to zero after
+a completed reset.
+
+We negotiated the revision to at most 1, as we never bumped the
+maximum revision; let's do that now and properly send READ_STATUS
+only if we are operating at least at revision 2.
+
+Cc: stable@vger.kernel.org
+Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw")
+Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
+Signed-off-by: Cornelia Huck <cohuck@redhat.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Link: https://lore.kernel.org/r/20210216110645.1087321-1-cohuck@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/virtio/virtio_ccw.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/virtio/virtio_ccw.c
++++ b/drivers/s390/virtio/virtio_ccw.c
+@@ -117,7 +117,7 @@ struct virtio_rev_info {
+ };
+ /* the highest virtio-ccw revision we support */
+-#define VIRTIO_CCW_REV_MAX 1
++#define VIRTIO_CCW_REV_MAX 2
+ struct virtio_ccw_vq_info {
+       struct virtqueue *vq;
+@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct v
+       u8 old_status = vcdev->dma_area->status;
+       struct ccw1 *ccw;
+-      if (vcdev->revision < 1)
++      if (vcdev->revision < 2)
+               return vcdev->dma_area->status;
+       ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));