--- /dev/null
+From a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Thu, 7 Jan 2021 19:43:30 +0100
+Subject: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit a45ee4d4e13b0e35a8ec7ea0bf9267243d57b302 upstream.
+
+All of the callers of intel_pstate_get_hwp_max() access the struct
+cpudata object that corresponds to the given CPU already and the
+function itself needs to access that object (in order to update
+hwp_cap_cached), so modify the code to pass a struct cpudata pointer
+to it instead of the CPU number.
+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Tested-by: Chen Yu <yu.c.chen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -829,13 +829,13 @@ static struct freq_attr *hwp_cpufreq_att
+ NULL,
+ };
+
+-static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
++static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max,
+ int *current_max)
+ {
+ u64 cap;
+
+- rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+- WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
++ rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
++ WRITE_ONCE(cpu->hwp_cap_cached, cap);
+ if (global.no_turbo || global.turbo_disabled)
+ *current_max = HWP_GUARANTEED_PERF(cap);
+ else
+@@ -1223,7 +1223,7 @@ static void update_qos_request(enum freq
+ continue;
+
+ if (hwp_active)
+- intel_pstate_get_hwp_max(i, &turbo_max, &max_state);
++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+ else
+ turbo_max = cpu->pstate.turbo_pstate;
+
+@@ -1733,7 +1733,7 @@ static void intel_pstate_get_cpu_pstates
+ if (hwp_active && !hwp_mode_bdw) {
+ unsigned int phy_max, current_max;
+
+- intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max);
++ intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max);
+ cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+ cpu->pstate.turbo_pstate = phy_max;
+ } else {
+@@ -2217,7 +2217,7 @@ static void intel_pstate_update_perf_lim
+ * rather than pure ratios.
+ */
+ if (hwp_active) {
+- intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+ } else {
+ max_state = global.no_turbo || global.turbo_disabled ?
+ cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+@@ -2332,7 +2332,7 @@ static void intel_pstate_verify_cpu_poli
+ if (hwp_active) {
+ int max_state, turbo_max;
+
+- intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+ max_freq = max_state * cpu->pstate.scaling;
+ } else {
+ max_freq = intel_pstate_get_max_freq(cpu);
+@@ -2675,7 +2675,7 @@ static int intel_cpufreq_cpu_init(struct
+ if (hwp_active) {
+ u64 value;
+
+- intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state);
++ intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
+ policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
+ rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
+ WRITE_ONCE(cpu->hwp_req_cached, value);
--- /dev/null
+From 6f67e060083a84a4cc364eab6ae40c717165fb0c Mon Sep 17 00:00:00 2001
+From: Chen Yu <yu.c.chen@intel.com>
+Date: Tue, 12 Jan 2021 13:21:27 +0800
+Subject: cpufreq: intel_pstate: Get per-CPU max freq via MSR_HWP_CAPABILITIES if available
+
+From: Chen Yu <yu.c.chen@intel.com>
+
+commit 6f67e060083a84a4cc364eab6ae40c717165fb0c upstream.
+
+Currently, when turbo is disabled (either by BIOS or by the user),
+the intel_pstate driver reads the max non-turbo frequency from the
+package-wide MSR_PLATFORM_INFO(0xce) register.
+
+However, on asymmetric platforms it is possible in theory that small
+and big core with HWP enabled might have different max non-turbo CPU
+frequency, because MSR_HWP_CAPABILITIES is per-CPU scope according
+to Intel Software Developer Manual.
+
+The turbo max freq is already per-CPU in current code, so make
+similar change to the max non-turbo frequency as well.
+
+Reported-by: Wendy Wang <wendy.wang@intel.com>
+Signed-off-by: Chen Yu <yu.c.chen@intel.com>
+[ rjw: Subject and changelog edits ]
+Cc: 4.18+ <stable@vger.kernel.org> # 4.18+: a45ee4d4e13b: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -1724,11 +1724,9 @@ static void intel_pstate_max_within_limi
+ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+ {
+ cpu->pstate.min_pstate = pstate_funcs.get_min();
+- cpu->pstate.max_pstate = pstate_funcs.get_max();
+ cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
+ cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
+ cpu->pstate.scaling = pstate_funcs.get_scaling();
+- cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+
+ if (hwp_active && !hwp_mode_bdw) {
+ unsigned int phy_max, current_max;
+@@ -1736,9 +1734,12 @@ static void intel_pstate_get_cpu_pstates
+ intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max);
+ cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+ cpu->pstate.turbo_pstate = phy_max;
++ cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached));
+ } else {
+ cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
++ cpu->pstate.max_pstate = pstate_funcs.get_max();
+ }
++ cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+
+ if (pstate_funcs.get_aperf_mperf_shift)
+ cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
--- /dev/null
+From 67fc209b527d023db4d087c68e44e9790aa089ef Mon Sep 17 00:00:00 2001
+From: Shawn Guo <shawn.guo@linaro.org>
+Date: Tue, 19 Jan 2021 10:39:25 +0800
+Subject: cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks
+
+From: Shawn Guo <shawn.guo@linaro.org>
+
+commit 67fc209b527d023db4d087c68e44e9790aa089ef upstream.
+
+Commit f17b3e44320b ("cpufreq: qcom-hw: Use
+devm_platform_ioremap_resource() to simplify code") introduces
+a regression on platforms using the driver, by failing to initialise
+a policy, when one is created post hotplug.
+
+When all the CPUs of a policy are hoptplugged out, the call to .exit()
+and later to devm_iounmap() does not release the memory region that was
+requested during devm_platform_ioremap_resource(). Therefore,
+a subsequent call to .init() will result in the following error, which
+will prevent a new policy to be initialised:
+
+[ 3395.915416] CPU4: shutdown
+[ 3395.938185] psci: CPU4 killed (polled 0 ms)
+[ 3399.071424] CPU5: shutdown
+[ 3399.094316] psci: CPU5 killed (polled 0 ms)
+[ 3402.139358] CPU6: shutdown
+[ 3402.161705] psci: CPU6 killed (polled 0 ms)
+[ 3404.742939] CPU7: shutdown
+[ 3404.765592] psci: CPU7 killed (polled 0 ms)
+[ 3411.492274] Detected VIPT I-cache on CPU4
+[ 3411.492337] GICv3: CPU4: found redistributor 400 region 0:0x0000000017ae0000
+[ 3411.492448] CPU4: Booted secondary processor 0x0000000400 [0x516f802d]
+[ 3411.503654] qcom-cpufreq-hw 17d43000.cpufreq: can't request region for resource [mem 0x17d45800-0x17d46bff]
+
+With that being said, the original code was tricky and skipping memory
+region request intentionally to hide this issue. The true cause is that
+those devm_xxx() device managed functions shouldn't be used for cpufreq
+init/exit hooks, because &pdev->dev is alive across the hooks and will
+not trigger auto resource free-up. Let's drop the use of device managed
+functions and manually allocate/free resources, so that the issue can be
+fixed properly.
+
+Cc: v5.10+ <stable@vger.kernel.org> # v5.10+
+Fixes: f17b3e44320b ("cpufreq: qcom-hw: Use devm_platform_ioremap_resource() to simplify code")
+Suggested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/qcom-cpufreq-hw.c | 40 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 32 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/qcom-cpufreq-hw.c
++++ b/drivers/cpufreq/qcom-cpufreq-hw.c
+@@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
+
+ struct qcom_cpufreq_data {
+ void __iomem *base;
++ struct resource *res;
+ const struct qcom_cpufreq_soc_data *soc_data;
+ };
+
+@@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(stru
+ struct of_phandle_args args;
+ struct device_node *cpu_np;
+ struct device *cpu_dev;
++ struct resource *res;
+ void __iomem *base;
+ struct qcom_cpufreq_data *data;
+ int ret, index;
+@@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(stru
+
+ index = args.args[0];
+
+- base = devm_platform_ioremap_resource(pdev, index);
+- if (IS_ERR(base))
+- return PTR_ERR(base);
++ res = platform_get_resource(pdev, IORESOURCE_MEM, index);
++ if (!res) {
++ dev_err(dev, "failed to get mem resource %d\n", index);
++ return -ENODEV;
++ }
++
++ if (!request_mem_region(res->start, resource_size(res), res->name)) {
++ dev_err(dev, "failed to request resource %pR\n", res);
++ return -EBUSY;
++ }
+
+- data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
++ base = ioremap(res->start, resource_size(res));
++ if (IS_ERR(base)) {
++ dev_err(dev, "failed to map resource %pR\n", res);
++ ret = PTR_ERR(base);
++ goto release_region;
++ }
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+- goto error;
++ goto unmap_base;
+ }
+
+ data->soc_data = of_device_get_match_data(&pdev->dev);
+ data->base = base;
++ data->res = res;
+
+ /* HW should be in enabled state to proceed */
+ if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
+@@ -349,7 +366,11 @@ static int qcom_cpufreq_hw_cpu_init(stru
+
+ return 0;
+ error:
+- devm_iounmap(dev, base);
++ kfree(data);
++unmap_base:
++ iounmap(data->base);
++release_region:
++ release_mem_region(res->start, resource_size(res));
+ return ret;
+ }
+
+@@ -357,12 +378,15 @@ static int qcom_cpufreq_hw_cpu_exit(stru
+ {
+ struct device *cpu_dev = get_cpu_device(policy->cpu);
+ struct qcom_cpufreq_data *data = policy->driver_data;
+- struct platform_device *pdev = cpufreq_get_driver_data();
++ struct resource *res = data->res;
++ void __iomem *base = data->base;
+
+ dev_pm_opp_remove_all_dynamic(cpu_dev);
+ dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+ kfree(policy->freq_table);
+- devm_iounmap(&pdev->dev, data->base);
++ kfree(data);
++ iounmap(base);
++ release_mem_region(res->start, resource_size(res));
+
+ return 0;
+ }
--- /dev/null
+From 904e6b266619c2da5c58b5dce14ae30629e39645 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:54 +0200
+Subject: dm era: Fix bitset memory leaks
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 904e6b266619c2da5c58b5dce14ae30629e39645 upstream.
+
+Deallocate the memory allocated for the in-core bitsets when destroying
+the target and in error paths.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -47,6 +47,7 @@ struct writeset {
+ static void writeset_free(struct writeset *ws)
+ {
+ vfree(ws->bits);
++ ws->bits = NULL;
+ }
+
+ static int setup_on_disk_bitset(struct dm_disk_bitset *info,
+@@ -811,6 +812,8 @@ static struct era_metadata *metadata_ope
+
+ static void metadata_close(struct era_metadata *md)
+ {
++ writeset_free(&md->writesets[0]);
++ writeset_free(&md->writesets[1]);
+ destroy_persistent_data_objects(md);
+ kfree(md);
+ }
+@@ -848,6 +851,7 @@ static int metadata_resize(struct era_me
+ r = writeset_alloc(&md->writesets[1], *new_size);
+ if (r) {
+ DMERR("%s: writeset_alloc failed for writeset 1", __func__);
++ writeset_free(&md->writesets[0]);
+ return r;
+ }
+
+@@ -858,6 +862,8 @@ static int metadata_resize(struct era_me
+ &value, &md->era_array_root);
+ if (r) {
+ DMERR("%s: dm_array_resize failed", __func__);
++ writeset_free(&md->writesets[0]);
++ writeset_free(&md->writesets[1]);
+ return r;
+ }
+
--- /dev/null
+From cca2c6aebe86f68103a8615074b3578e854b5016 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Thu, 11 Feb 2021 16:22:43 +0200
+Subject: dm era: only resize metadata in preresume
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit cca2c6aebe86f68103a8615074b3578e854b5016 upstream.
+
+Metadata resize shouldn't happen in the ctr. The ctr loads a temporary
+(inactive) table that will only become active upon resume. That is why
+resize should always be done in terms of resume. Otherwise a load (ctr)
+whose inactive table never becomes active will incorrectly resize the
+metadata.
+
+Also, perform the resize directly in preresume, instead of using the
+worker to do it.
+
+The worker might run other metadata operations, e.g., it could start
+digestion, before resizing the metadata. These operations will end up
+using the old size.
+
+This could lead to errors, like:
+
+ device-mapper: era: metadata_digest_transcribe_writeset: dm_array_set_value failed
+ device-mapper: era: process_old_eras: digest step failed, stopping digestion
+
+The reason of the above error is that the worker started the digestion
+of the archived writeset using the old, larger size.
+
+As a result, metadata_digest_transcribe_writeset tried to write beyond
+the end of the era array.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -1501,15 +1501,6 @@ static int era_ctr(struct dm_target *ti,
+ }
+ era->md = md;
+
+- era->nr_blocks = calc_nr_blocks(era);
+-
+- r = metadata_resize(era->md, &era->nr_blocks);
+- if (r) {
+- ti->error = "couldn't resize metadata";
+- era_destroy(era);
+- return -ENOMEM;
+- }
+-
+ era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
+ if (!era->wq) {
+ ti->error = "could not create workqueue for metadata object";
+@@ -1584,9 +1575,17 @@ static int era_preresume(struct dm_targe
+ dm_block_t new_size = calc_nr_blocks(era);
+
+ if (era->nr_blocks != new_size) {
+- r = in_worker1(era, metadata_resize, &new_size);
+- if (r)
++ r = metadata_resize(era->md, &new_size);
++ if (r) {
++ DMERR("%s: metadata_resize failed", __func__);
++ return r;
++ }
++
++ r = metadata_commit(era->md);
++ if (r) {
++ DMERR("%s: metadata_commit failed", __func__);
+ return r;
++ }
+
+ era->nr_blocks = new_size;
+ }
--- /dev/null
+From de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:19:30 +0200
+Subject: dm era: Recover committed writeset after crash
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 upstream.
+
+Following a system crash, dm-era fails to recover the committed writeset
+for the current era, leading to lost writes. That is, we lose the
+information about what blocks were written during the affected era.
+
+dm-era assumes that the writeset of the current era is archived when the
+device is suspended. So, when resuming the device, it just moves on to
+the next era, ignoring the committed writeset.
+
+This assumption holds when the device is properly shut down. But, when
+the system crashes, the code that suspends the target never runs, so the
+writeset for the current era is not archived.
+
+There are three issues that cause the committed writeset to get lost:
+
+1. dm-era doesn't load the committed writeset when opening the metadata
+2. The code that resizes the metadata wipes the information about the
+ committed writeset (assuming it was loaded at step 1)
+3. era_preresume() starts a new era, without taking into account that
+ the current era might not have been archived, due to a system crash.
+
+To fix this:
+
+1. Load the committed writeset when opening the metadata
+2. Fix the code that resizes the metadata to make sure it doesn't wipe
+ the loaded writeset
+3. Fix era_preresume() to check for a loaded writeset and archive it,
+ before starting a new era.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bi
+ */
+ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
+ {
+- ws->md.nr_bits = nr_blocks;
+- ws->md.root = INVALID_WRITESET_ROOT;
+ ws->bits = vzalloc(bitset_size(nr_blocks));
+ if (!ws->bits) {
+ DMERR("%s: couldn't allocate in memory bitset", __func__);
+@@ -85,12 +83,14 @@ static int writeset_alloc(struct writese
+ /*
+ * Wipes the in-core bitset, and creates a new on disk bitset.
+ */
+-static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws)
++static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws,
++ dm_block_t nr_blocks)
+ {
+ int r;
+
+- memset(ws->bits, 0, bitset_size(ws->md.nr_bits));
++ memset(ws->bits, 0, bitset_size(nr_blocks));
+
++ ws->md.nr_bits = nr_blocks;
+ r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
+ if (r) {
+ DMERR("%s: setup_on_disk_bitset failed", __func__);
+@@ -579,6 +579,7 @@ static int open_metadata(struct era_meta
+ md->nr_blocks = le32_to_cpu(disk->nr_blocks);
+ md->current_era = le32_to_cpu(disk->current_era);
+
++ ws_unpack(&disk->current_writeset, &md->current_writeset->md);
+ md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root);
+ md->era_array_root = le64_to_cpu(disk->era_array_root);
+ md->metadata_snap = le64_to_cpu(disk->metadata_snap);
+@@ -870,7 +871,6 @@ static int metadata_era_archive(struct e
+ }
+
+ ws_pack(&md->current_writeset->md, &value);
+- md->current_writeset->md.root = INVALID_WRITESET_ROOT;
+
+ keys[0] = md->current_era;
+ __dm_bless_for_disk(&value);
+@@ -882,6 +882,7 @@ static int metadata_era_archive(struct e
+ return r;
+ }
+
++ md->current_writeset->md.root = INVALID_WRITESET_ROOT;
+ md->archived_writesets = true;
+
+ return 0;
+@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_m
+ int r;
+ struct writeset *new_writeset = next_writeset(md);
+
+- r = writeset_init(&md->bitset_info, new_writeset);
++ r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks);
+ if (r) {
+ DMERR("%s: writeset_init failed", __func__);
+ return r;
+@@ -951,7 +952,7 @@ static int metadata_commit(struct era_me
+ int r;
+ struct dm_block *sblock;
+
+- if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) {
++ if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) {
+ r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
+ &md->current_writeset->md.root);
+ if (r) {
+@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_targe
+
+ start_worker(era);
+
+- r = in_worker0(era, metadata_new_era);
++ r = in_worker0(era, metadata_era_rollover);
+ if (r) {
+ DMERR("%s: metadata_era_rollover failed", __func__);
+ return r;
--- /dev/null
+From 2524933307fd0036d5c32357c693c021ab09a0b0 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:22:04 +0200
+Subject: dm era: Reinitialize bitset cache before digesting a new writeset
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 2524933307fd0036d5c32357c693c021ab09a0b0 upstream.
+
+In case of devices with at most 64 blocks, the digestion of consecutive
+eras uses the writeset of the first era as the writeset of all eras to
+digest, leading to lost writes. That is, we lose the information about
+what blocks were written during the affected eras.
+
+The digestion code uses a dm_disk_bitset object to access the archived
+writesets. This structure includes a one word (64-bit) cache to reduce
+the number of array lookups.
+
+This structure is initialized only once, in metadata_digest_start(),
+when we kick off digestion.
+
+But, when we insert a new writeset into the writeset tree, before the
+digestion of the previous writeset is done, or equivalently when there
+are multiple writesets in the writeset tree to digest, then all these
+writesets are digested using the same cache and the cache is not
+re-initialized when moving from one writeset to the next.
+
+For devices with more than 64 blocks, i.e., the size of the cache, the
+cache is indirectly invalidated when we move to a next set of blocks, so
+we avoid the bug.
+
+But for devices with at most 64 blocks we end up using the same cached
+data for digesting all archived writesets, i.e., the cache is loaded
+when digesting the first writeset and it never gets reloaded, until the
+digestion is done.
+
+As a result, the writeset of the first era to digest is used as the
+writeset of all the following archived eras, leading to lost writes.
+
+Fix this by reinitializing the dm_disk_bitset structure, and thus
+invalidating the cache, every time the digestion code starts digesting a
+new writeset.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -756,6 +756,12 @@ static int metadata_digest_lookup_writes
+ ws_unpack(&disk, &d->writeset);
+ d->value = cpu_to_le32(key);
+
++ /*
++ * We initialise another bitset info to avoid any caching side effects
++ * with the previous one.
++ */
++ dm_disk_bitset_init(md->tm, &d->info);
++
+ d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks);
+ d->current_bit = 0;
+ d->step = metadata_digest_transcribe_writeset;
+@@ -769,12 +775,6 @@ static int metadata_digest_start(struct
+ return 0;
+
+ memset(d, 0, sizeof(*d));
+-
+- /*
+- * We initialise another bitset info to avoid any caching side
+- * effects with the previous one.
+- */
+- dm_disk_bitset_init(md->tm, &d->info);
+ d->step = metadata_digest_lookup_writeset;
+
+ return 0;
--- /dev/null
+From 2099b145d77c1d53f5711f029c37cc537897cee6 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:19:31 +0200
+Subject: dm era: Update in-core bitset after committing the metadata
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 2099b145d77c1d53f5711f029c37cc537897cee6 upstream.
+
+In case of a system crash, dm-era might fail to mark blocks as written
+in its metadata, although the corresponding writes to these blocks were
+passed down to the origin device and completed successfully.
+
+Consider the following sequence of events:
+
+1. We write to a block that has not been yet written in the current era
+2. era_map() checks the in-core bitmap for the current era and sees
+ that the block is not marked as written.
+3. The write is deferred for submission after the metadata have been
+ updated and committed.
+4. The worker thread processes the deferred write
+ (process_deferred_bios()) and marks the block as written in the
+ in-core bitmap, **before** committing the metadata.
+5. The worker thread starts committing the metadata.
+6. We do more writes that map to the same block as the write of step (1)
+7. era_map() checks the in-core bitmap and sees that the block is marked
+ as written, **although the metadata have not been committed yet**.
+8. These writes are passed down to the origin device immediately and the
+ device reports them as completed.
+9. The system crashes, e.g., power failure, before the commit from step
+ (5) finishes.
+
+When the system recovers and we query the dm-era target for the list of
+written blocks it doesn't report the aforementioned block as written,
+although the writes of step (6) completed successfully.
+
+The issue is that era_map() decides whether to defer or not a write
+based on non committed information. The root cause of the bug is that we
+update the in-core bitmap, **before** committing the metadata.
+
+Fix this by updating the in-core bitmap **after** successfully
+committing the metadata.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -134,7 +134,7 @@ static int writeset_test_and_set(struct
+ {
+ int r;
+
+- if (!test_and_set_bit(block, ws->bits)) {
++ if (!test_bit(block, ws->bits)) {
+ r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root);
+ if (r) {
+ /* FIXME: fail mode */
+@@ -1226,8 +1226,10 @@ static void process_deferred_bios(struct
+ int r;
+ struct bio_list deferred_bios, marked_bios;
+ struct bio *bio;
++ struct blk_plug plug;
+ bool commit_needed = false;
+ bool failed = false;
++ struct writeset *ws = era->md->current_writeset;
+
+ bio_list_init(&deferred_bios);
+ bio_list_init(&marked_bios);
+@@ -1237,9 +1239,11 @@ static void process_deferred_bios(struct
+ bio_list_init(&era->deferred_bios);
+ spin_unlock(&era->deferred_lock);
+
++ if (bio_list_empty(&deferred_bios))
++ return;
++
+ while ((bio = bio_list_pop(&deferred_bios))) {
+- r = writeset_test_and_set(&era->md->bitset_info,
+- era->md->current_writeset,
++ r = writeset_test_and_set(&era->md->bitset_info, ws,
+ get_block(era, bio));
+ if (r < 0) {
+ /*
+@@ -1247,7 +1251,6 @@ static void process_deferred_bios(struct
+ * FIXME: finish.
+ */
+ failed = true;
+-
+ } else if (r == 0)
+ commit_needed = true;
+
+@@ -1263,9 +1266,19 @@ static void process_deferred_bios(struct
+ if (failed)
+ while ((bio = bio_list_pop(&marked_bios)))
+ bio_io_error(bio);
+- else
+- while ((bio = bio_list_pop(&marked_bios)))
++ else {
++ blk_start_plug(&plug);
++ while ((bio = bio_list_pop(&marked_bios))) {
++ /*
++ * Only update the in-core writeset if the on-disk one
++ * was updated too.
++ */
++ if (commit_needed)
++ set_bit(get_block(era, bio), ws->bits);
+ submit_bio_noacct(bio);
++ }
++ blk_finish_plug(&plug);
++ }
+ }
+
+ static void process_rpc_calls(struct era *era)
--- /dev/null
+From 64f2d15afe7b336aafebdcd14cc835ecf856df4b Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:55 +0200
+Subject: dm era: Use correct value size in equality function of writeset tree
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 64f2d15afe7b336aafebdcd14cc835ecf856df4b upstream.
+
+Fix the writeset tree equality test function to use the right value size
+when comparing two btree values.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -389,7 +389,7 @@ static void ws_dec(void *context, const
+
+ static int ws_eq(void *context, const void *value1, const void *value2)
+ {
+- return !memcmp(value1, value2, sizeof(struct writeset_metadata));
++ return !memcmp(value1, value2, sizeof(struct writeset_disk));
+ }
+
+ /*----------------------------------------------------------------*/
--- /dev/null
+From c8e846ff93d5eaa5384f6f325a1687ac5921aade Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Fri, 22 Jan 2021 17:25:53 +0200
+Subject: dm era: Verify the data block size hasn't changed
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit c8e846ff93d5eaa5384f6f325a1687ac5921aade upstream.
+
+dm-era doesn't support changing the data block size of existing devices,
+so check explicitly that the requested block size for a new target
+matches the one stored in the metadata.
+
+Fixes: eec40579d84873 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -564,6 +564,15 @@ static int open_metadata(struct era_meta
+ }
+
+ disk = dm_block_data(sblock);
++
++ /* Verify the data block size hasn't changed */
++ if (le32_to_cpu(disk->data_block_size) != md->block_size) {
++ DMERR("changing the data block size (from %u to %llu) is not supported",
++ le32_to_cpu(disk->data_block_size), md->block_size);
++ r = -EINVAL;
++ goto bad;
++ }
++
+ r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION,
+ disk->metadata_space_map_root,
+ sizeof(disk->metadata_space_map_root),
+@@ -575,7 +584,6 @@ static int open_metadata(struct era_meta
+
+ setup_infos(md);
+
+- md->block_size = le32_to_cpu(disk->data_block_size);
+ md->nr_blocks = le32_to_cpu(disk->nr_blocks);
+ md->current_era = le32_to_cpu(disk->current_era);
+
--- /dev/null
+From a666e5c05e7c4aaabb2c5d58117b0946803d03d2 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Wed, 10 Feb 2021 15:26:23 -0500
+Subject: dm: fix deadlock when swapping to encrypted device
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a666e5c05e7c4aaabb2c5d58117b0946803d03d2 upstream.
+
+The system would deadlock when swapping to a dm-crypt device. The reason
+is that for each incoming write bio, dm-crypt allocates memory that holds
+encrypted data. These excessive allocations exhaust all the memory and the
+result is either deadlock or OOM trigger.
+
+This patch limits the number of in-flight swap bios, so that the memory
+consumed by dm-crypt is limited. The limit is enforced if the target set
+the "limit_swap_bios" variable and if the bio has REQ_SWAP set.
+
+Non-swap bios are not affected becuase taking the semaphore would cause
+performance degradation.
+
+This is similar to request-based drivers - they will also block when the
+number of requests is over the limit.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-core.h | 4 ++
+ drivers/md/dm-crypt.c | 1
+ drivers/md/dm.c | 60 ++++++++++++++++++++++++++++++++++++++++++
+ include/linux/device-mapper.h | 5 +++
+ 4 files changed, 70 insertions(+)
+
+--- a/drivers/md/dm-core.h
++++ b/drivers/md/dm-core.h
+@@ -109,6 +109,10 @@ struct mapped_device {
+
+ struct block_device *bdev;
+
++ int swap_bios;
++ struct semaphore swap_bios_semaphore;
++ struct mutex swap_bios_lock;
++
+ struct dm_stats stats;
+
+ /* for blk-mq request-based DM support */
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *t
+ wake_up_process(cc->write_thread);
+
+ ti->num_flush_bios = 1;
++ ti->limit_swap_bios = true;
+
+ return 0;
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_
+ #define DM_NUMA_NODE NUMA_NO_NODE
+ static int dm_numa_node = DM_NUMA_NODE;
+
++#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE)
++static int swap_bios = DEFAULT_SWAP_BIOS;
++static int get_swap_bios(void)
++{
++ int latch = READ_ONCE(swap_bios);
++ if (unlikely(latch <= 0))
++ latch = DEFAULT_SWAP_BIOS;
++ return latch;
++}
++
+ /*
+ * For mempools pre-allocation at the table loading time.
+ */
+@@ -966,6 +976,11 @@ void disable_write_zeroes(struct mapped_
+ limits->max_write_zeroes_sectors = 0;
+ }
+
++static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
++{
++ return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
++}
++
+ static void clone_endio(struct bio *bio)
+ {
+ blk_status_t error = bio->bi_status;
+@@ -1016,6 +1031,11 @@ static void clone_endio(struct bio *bio)
+ }
+ }
+
++ if (unlikely(swap_bios_limit(tio->ti, bio))) {
++ struct mapped_device *md = io->md;
++ up(&md->swap_bios_semaphore);
++ }
++
+ free_tio(tio);
+ dec_pending(io, error);
+ }
+@@ -1249,6 +1269,22 @@ void dm_accept_partial_bio(struct bio *b
+ }
+ EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+
++static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
++{
++ mutex_lock(&md->swap_bios_lock);
++ while (latch < md->swap_bios) {
++ cond_resched();
++ down(&md->swap_bios_semaphore);
++ md->swap_bios--;
++ }
++ while (latch > md->swap_bios) {
++ cond_resched();
++ up(&md->swap_bios_semaphore);
++ md->swap_bios++;
++ }
++ mutex_unlock(&md->swap_bios_lock);
++}
++
+ static blk_qc_t __map_bio(struct dm_target_io *tio)
+ {
+ int r;
+@@ -1268,6 +1304,14 @@ static blk_qc_t __map_bio(struct dm_targ
+ atomic_inc(&io->io_count);
+ sector = clone->bi_iter.bi_sector;
+
++ if (unlikely(swap_bios_limit(ti, clone))) {
++ struct mapped_device *md = io->md;
++ int latch = get_swap_bios();
++ if (unlikely(latch != md->swap_bios))
++ __set_swap_bios_limit(md, latch);
++ down(&md->swap_bios_semaphore);
++ }
++
+ r = ti->type->map(ti, clone);
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+@@ -1279,10 +1323,18 @@ static blk_qc_t __map_bio(struct dm_targ
+ ret = submit_bio_noacct(clone);
+ break;
+ case DM_MAPIO_KILL:
++ if (unlikely(swap_bios_limit(ti, clone))) {
++ struct mapped_device *md = io->md;
++ up(&md->swap_bios_semaphore);
++ }
+ free_tio(tio);
+ dec_pending(io, BLK_STS_IOERR);
+ break;
+ case DM_MAPIO_REQUEUE:
++ if (unlikely(swap_bios_limit(ti, clone))) {
++ struct mapped_device *md = io->md;
++ up(&md->swap_bios_semaphore);
++ }
+ free_tio(tio);
+ dec_pending(io, BLK_STS_DM_REQUEUE);
+ break;
+@@ -1756,6 +1808,7 @@ static void cleanup_mapped_device(struct
+ mutex_destroy(&md->suspend_lock);
+ mutex_destroy(&md->type_lock);
+ mutex_destroy(&md->table_devices_lock);
++ mutex_destroy(&md->swap_bios_lock);
+
+ dm_mq_cleanup_mapped_device(md);
+ }
+@@ -1823,6 +1876,10 @@ static struct mapped_device *alloc_dev(i
+ init_waitqueue_head(&md->eventq);
+ init_completion(&md->kobj_holder.completion);
+
++ md->swap_bios = get_swap_bios();
++ sema_init(&md->swap_bios_semaphore, md->swap_bios);
++ mutex_init(&md->swap_bios_lock);
++
+ md->disk->major = _major;
+ md->disk->first_minor = minor;
+ md->disk->fops = &dm_blk_dops;
+@@ -3119,6 +3176,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios,
+ module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
+ MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
+
++module_param(swap_bios, int, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
++
+ MODULE_DESCRIPTION(DM_NAME " driver");
+ MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+ MODULE_LICENSE("GPL");
+--- a/include/linux/device-mapper.h
++++ b/include/linux/device-mapper.h
+@@ -325,6 +325,11 @@ struct dm_target {
+ * whether or not its underlying devices have support.
+ */
+ bool discards_supported:1;
++
++ /*
++ * Set if we need to limit the number of in-flight bios when swapping.
++ */
++ bool limit_swap_bios:1;
+ };
+
+ void *dm_per_bio_data(struct bio *bio, size_t data_size);
--- /dev/null
+From 5b0fab508992c2e120971da658ce80027acbc405 Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Mon, 8 Feb 2021 22:34:36 -0500
+Subject: dm table: fix DAX iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit 5b0fab508992c2e120971da658ce80027acbc405 upstream.
+
+Fix dm_table_supports_dax() and invert logic of both
+iterate_devices_callout_fn so that all devices' DAX capabilities are
+properly checked.
+
+Fixes: 545ed20e6df6 ("dm: add infrastructure for DAX support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c | 37 ++++++++++---------------------------
+ drivers/md/dm.c | 2 +-
+ drivers/md/dm.h | 2 +-
+ 3 files changed, 12 insertions(+), 29 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -827,24 +827,24 @@ void dm_table_set_type(struct dm_table *
+ EXPORT_SYMBOL_GPL(dm_table_set_type);
+
+ /* validate the dax capability of the target device span */
+-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+ int blocksize = *(int *) data, id;
+ bool rc;
+
+ id = dax_read_lock();
+- rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
++ rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
+ dax_read_unlock(id);
+
+ return rc;
+ }
+
+ /* Check devices support synchronous DAX */
+-static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev,
+- sector_t start, sector_t len, void *data)
++static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
++ sector_t start, sector_t len, void *data)
+ {
+- return dev->dax_dev && dax_synchronous(dev->dax_dev);
++ return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
+ }
+
+ bool dm_table_supports_dax(struct dm_table *t,
+@@ -861,7 +861,7 @@ bool dm_table_supports_dax(struct dm_tab
+ return false;
+
+ if (!ti->type->iterate_devices ||
+- !ti->type->iterate_devices(ti, iterate_fn, blocksize))
++ ti->type->iterate_devices(ti, iterate_fn, blocksize))
+ return false;
+ }
+
+@@ -932,7 +932,7 @@ static int dm_table_determine_type(struc
+ verify_bio_based:
+ /* We must use this table as bio-based */
+ t->type = DM_TYPE_BIO_BASED;
+- if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
++ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
+ (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
+ t->type = DM_TYPE_DAX_BIO_BASED;
+ }
+@@ -1625,23 +1625,6 @@ static int device_dax_write_cache_enable
+ return false;
+ }
+
+-static int dm_table_supports_dax_write_cache(struct dm_table *t)
+-{
+- struct dm_target *ti;
+- unsigned i;
+-
+- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+- ti = dm_table_get_target(t, i);
+-
+- if (ti->type->iterate_devices &&
+- ti->type->iterate_devices(ti,
+- device_dax_write_cache_enabled, NULL))
+- return true;
+- }
+-
+- return false;
+-}
+-
+ static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+@@ -1846,15 +1829,15 @@ void dm_table_set_restrictions(struct dm
+ }
+ blk_queue_write_cache(q, wc, fua);
+
+- if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
++ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
+ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+- if (dm_table_supports_dax(t, device_dax_synchronous, NULL))
++ if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
+ set_dax_synchronous(t->md->dax_dev);
+ }
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+
+- if (dm_table_supports_dax_write_cache(t))
++ if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled))
+ dax_write_cache(t->md->dax_dev, true);
+
+ /* Ensure that all underlying devices are non-rotational. */
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1145,7 +1145,7 @@ static bool dm_dax_supported(struct dax_
+ if (!map)
+ goto out;
+
+- ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
++ ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
+
+ out:
+ dm_put_live_table(md, srcu_idx);
+--- a/drivers/md/dm.h
++++ b/drivers/md/dm.h
+@@ -73,7 +73,7 @@ void dm_table_free_md_mempools(struct dm
+ struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
+ bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
+ int *blocksize);
+-int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
++int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data);
+
+ void dm_lock_md_type(struct mapped_device *md);
--- /dev/null
+From a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Tue, 2 Feb 2021 11:35:28 +0800
+Subject: dm table: fix iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit a4c8dd9c2d0987cf542a2a0c42684c9c6d78a04e upstream.
+
+According to the definition of dm_iterate_devices_fn:
+ * This function must iterate through each section of device used by the
+ * target until it encounters a non-zero return code, which it then returns.
+ * Returns zero if no callout returned non-zero.
+
+For some target type (e.g. dm-stripe), one call of iterate_devices() may
+iterate multiple underlying devices internally, in which case a non-zero
+return code returned by iterate_devices_callout_fn will stop the iteration
+in advance. No iterate_devices_callout_fn should return non-zero unless
+device iteration should stop.
+
+Rename dm_table_requires_stable_pages() to dm_table_any_dev_attr() and
+elevate it for reuse to stop iterating (and return non-zero) on the
+first device that causes iterate_devices_callout_fn to return non-zero.
+Use dm_table_any_dev_attr() to properly iterate through devices.
+
+Rename device_is_nonrot() to device_is_rotational() and invert logic
+accordingly to fix improper disposition.
+
+Fixes: c3c4555edd10 ("dm table: clear add_random unless all devices have it set")
+Fixes: 4693c9668fdc ("dm table: propagate non rotational flag")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c | 97 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 51 insertions(+), 46 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1302,6 +1302,46 @@ struct dm_target *dm_table_find_target(s
+ return &t->targets[(KEYS_PER_NODE * n) + k];
+ }
+
++/*
++ * type->iterate_devices() should be called when the sanity check needs to
++ * iterate and check all underlying data devices. iterate_devices() will
++ * iterate all underlying data devices until it encounters a non-zero return
++ * code, returned by whether the input iterate_devices_callout_fn, or
++ * iterate_devices() itself internally.
++ *
++ * For some target type (e.g. dm-stripe), one call of iterate_devices() may
++ * iterate multiple underlying devices internally, in which case a non-zero
++ * return code returned by iterate_devices_callout_fn will stop the iteration
++ * in advance.
++ *
++ * Cases requiring _any_ underlying device supporting some kind of attribute,
++ * should use the iteration structure like dm_table_any_dev_attr(), or call
++ * it directly. @func should handle semantics of positive examples, e.g.
++ * capable of something.
++ *
++ * Cases requiring _all_ underlying devices supporting some kind of attribute,
++ * should use the iteration structure like dm_table_supports_nowait() or
++ * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
++ * uses an @anti_func that handle semantics of counter examples, e.g. not
++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func);
++ */
++static bool dm_table_any_dev_attr(struct dm_table *t,
++ iterate_devices_callout_fn func)
++{
++ struct dm_target *ti;
++ unsigned int i;
++
++ for (i = 0; i < dm_table_get_num_targets(t); i++) {
++ ti = dm_table_get_target(t, i);
++
++ if (ti->type->iterate_devices &&
++ ti->type->iterate_devices(ti, func, NULL))
++ return true;
++ }
++
++ return false;
++}
++
+ static int count_device(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+@@ -1602,12 +1642,12 @@ static int dm_table_supports_dax_write_c
+ return false;
+ }
+
+-static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
+- sector_t start, sector_t len, void *data)
++static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
++ sector_t start, sector_t len, void *data)
+ {
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+- return q && blk_queue_nonrot(q);
++ return q && !blk_queue_nonrot(q);
+ }
+
+ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
+@@ -1618,23 +1658,6 @@ static int device_is_not_random(struct d
+ return q && !blk_queue_add_random(q);
+ }
+
+-static bool dm_table_all_devices_attribute(struct dm_table *t,
+- iterate_devices_callout_fn func)
+-{
+- struct dm_target *ti;
+- unsigned i;
+-
+- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+- ti = dm_table_get_target(t, i);
+-
+- if (!ti->type->iterate_devices ||
+- !ti->type->iterate_devices(ti, func, NULL))
+- return false;
+- }
+-
+- return true;
+-}
+-
+ static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+@@ -1786,27 +1809,6 @@ static int device_requires_stable_pages(
+ return q && blk_queue_stable_writes(q);
+ }
+
+-/*
+- * If any underlying device requires stable pages, a table must require
+- * them as well. Only targets that support iterate_devices are considered:
+- * don't want error, zero, etc to require stable pages.
+- */
+-static bool dm_table_requires_stable_pages(struct dm_table *t)
+-{
+- struct dm_target *ti;
+- unsigned i;
+-
+- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+- ti = dm_table_get_target(t, i);
+-
+- if (ti->type->iterate_devices &&
+- ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+- return true;
+- }
+-
+- return false;
+-}
+-
+ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+ struct queue_limits *limits)
+ {
+@@ -1856,10 +1858,10 @@ void dm_table_set_restrictions(struct dm
+ dax_write_cache(t->md->dax_dev, true);
+
+ /* Ensure that all underlying devices are non-rotational. */
+- if (dm_table_all_devices_attribute(t, device_is_nonrot))
+- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+- else
++ if (dm_table_any_dev_attr(t, device_is_rotational))
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
++ else
++ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+
+ if (!dm_table_supports_write_same(t))
+ q->limits.max_write_same_sectors = 0;
+@@ -1871,8 +1873,11 @@ void dm_table_set_restrictions(struct dm
+ /*
+ * Some devices don't use blk_integrity but still want stable pages
+ * because they do their own checksumming.
++ * If any underlying device requires stable pages, a table must require
++ * them as well. Only targets that support iterate_devices are considered:
++ * don't want error, zero, etc to require stable pages.
+ */
+- if (dm_table_requires_stable_pages(t))
++ if (dm_table_any_dev_attr(t, device_requires_stable_pages))
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+@@ -1883,7 +1888,7 @@ void dm_table_set_restrictions(struct dm
+ * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+ * have it set.
+ */
+- if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
++ if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random))
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
--- /dev/null
+From 24f6b6036c9eec21191646930ad42808e6180510 Mon Sep 17 00:00:00 2001
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+Date: Mon, 8 Feb 2021 22:46:38 -0500
+Subject: dm table: fix zoned iterate_devices based device capability checks
+
+From: Jeffle Xu <jefflexu@linux.alibaba.com>
+
+commit 24f6b6036c9eec21191646930ad42808e6180510 upstream.
+
+Fix dm_table_supports_zoned_model() and invert logic of both
+iterate_devices_callout_fn so that all devices' zoned capabilities are
+properly checked.
+
+Add one more parameter to dm_table_any_dev_attr(), which is actually
+used as the @data parameter of iterate_devices_callout_fn, so that
+dm_table_matches_zone_sectors() can be replaced by
+dm_table_any_dev_attr().
+
+Fixes: dd88d313bef02 ("dm table: add zoned block devices validation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-table.c | 48 ++++++++++++++++--------------------------------
+ 1 file changed, 16 insertions(+), 32 deletions(-)
+
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1323,10 +1323,10 @@ struct dm_target *dm_table_find_target(s
+ * should use the iteration structure like dm_table_supports_nowait() or
+ * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
+ * uses an @anti_func that handle semantics of counter examples, e.g. not
+- * capable of something. So: return !dm_table_any_dev_attr(t, anti_func);
++ * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
+ */
+ static bool dm_table_any_dev_attr(struct dm_table *t,
+- iterate_devices_callout_fn func)
++ iterate_devices_callout_fn func, void *data)
+ {
+ struct dm_target *ti;
+ unsigned int i;
+@@ -1335,7 +1335,7 @@ static bool dm_table_any_dev_attr(struct
+ ti = dm_table_get_target(t, i);
+
+ if (ti->type->iterate_devices &&
+- ti->type->iterate_devices(ti, func, NULL))
++ ti->type->iterate_devices(ti, func, data))
+ return true;
+ }
+
+@@ -1378,13 +1378,13 @@ bool dm_table_has_no_data_devices(struct
+ return true;
+ }
+
+-static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
+- sector_t start, sector_t len, void *data)
++static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
++ sector_t start, sector_t len, void *data)
+ {
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+ enum blk_zoned_model *zoned_model = data;
+
+- return q && blk_queue_zoned_model(q) == *zoned_model;
++ return !q || blk_queue_zoned_model(q) != *zoned_model;
+ }
+
+ static bool dm_table_supports_zoned_model(struct dm_table *t,
+@@ -1401,37 +1401,20 @@ static bool dm_table_supports_zoned_mode
+ return false;
+
+ if (!ti->type->iterate_devices ||
+- !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model))
++ ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model))
+ return false;
+ }
+
+ return true;
+ }
+
+-static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
+- sector_t start, sector_t len, void *data)
++static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
++ sector_t start, sector_t len, void *data)
+ {
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+ unsigned int *zone_sectors = data;
+
+- return q && blk_queue_zone_sectors(q) == *zone_sectors;
+-}
+-
+-static bool dm_table_matches_zone_sectors(struct dm_table *t,
+- unsigned int zone_sectors)
+-{
+- struct dm_target *ti;
+- unsigned i;
+-
+- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+- ti = dm_table_get_target(t, i);
+-
+- if (!ti->type->iterate_devices ||
+- !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors))
+- return false;
+- }
+-
+- return true;
++ return !q || blk_queue_zone_sectors(q) != *zone_sectors;
+ }
+
+ static int validate_hardware_zoned_model(struct dm_table *table,
+@@ -1451,7 +1434,7 @@ static int validate_hardware_zoned_model
+ if (!zone_sectors || !is_power_of_2(zone_sectors))
+ return -EINVAL;
+
+- if (!dm_table_matches_zone_sectors(table, zone_sectors)) {
++ if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
+ DMERR("%s: zone sectors is not consistent across all devices",
+ dm_device_name(table->md));
+ return -EINVAL;
+@@ -1837,11 +1820,11 @@ void dm_table_set_restrictions(struct dm
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+
+- if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled))
++ if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
+ dax_write_cache(t->md->dax_dev, true);
+
+ /* Ensure that all underlying devices are non-rotational. */
+- if (dm_table_any_dev_attr(t, device_is_rotational))
++ if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+ else
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+@@ -1860,7 +1843,7 @@ void dm_table_set_restrictions(struct dm
+ * them as well. Only targets that support iterate_devices are considered:
+ * don't want error, zero, etc to require stable pages.
+ */
+- if (dm_table_any_dev_attr(t, device_requires_stable_pages))
++ if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+@@ -1871,7 +1854,8 @@ void dm_table_set_restrictions(struct dm
+ * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+ * have it set.
+ */
+- if (blk_queue_add_random(q) && dm_table_any_dev_attr(t, device_is_not_random))
++ if (blk_queue_add_random(q) &&
++ dm_table_any_dev_attr(t, device_is_not_random, NULL))
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
--- /dev/null
+From cb728484a7710c202f02b96aa0962ce9b07aa5c2 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Sat, 23 Jan 2021 09:19:56 -0500
+Subject: dm writecache: fix performance degradation in ssd mode
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit cb728484a7710c202f02b96aa0962ce9b07aa5c2 upstream.
+
+Fix a thinko in ssd_commit_superblock. region.count is in sectors, not
+bytes. This bug doesn't corrupt data, but it causes performance
+degradation.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Fixes: dc8a01ae1dbd ("dm writecache: optimize superblock write")
+Cc: stable@vger.kernel.org # v5.7+
+Reported-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -523,7 +523,7 @@ static void ssd_commit_superblock(struct
+
+ region.bdev = wc->ssd_dev->bdev;
+ region.sector = 0;
+- region.count = PAGE_SIZE;
++ region.count = PAGE_SIZE >> SECTOR_SHIFT;
+
+ if (unlikely(region.sector + region.count > wc->metadata_sectors))
+ region.count = wc->metadata_sectors - region.sector;
--- /dev/null
+From 4134455f2aafdfeab50cabb4cccb35e916034b93 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 9 Feb 2021 10:56:20 -0500
+Subject: dm writecache: fix writing beyond end of underlying device when shrinking
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 4134455f2aafdfeab50cabb4cccb35e916034b93 upstream.
+
+Do not attempt to write any data beyond the end of the underlying data
+device while shrinking it.
+
+The DM writecache device must be suspended when the underlying data
+device is shrunk.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -148,6 +148,7 @@ struct dm_writecache {
+ size_t metadata_sectors;
+ size_t n_blocks;
+ uint64_t seq_count;
++ sector_t data_device_sectors;
+ void *block_start;
+ struct wc_entry *entries;
+ unsigned block_size;
+@@ -977,6 +978,8 @@ static void writecache_resume(struct dm_
+
+ wc_lock(wc);
+
++ wc->data_device_sectors = i_size_read(wc->dev->bdev->bd_inode) >> SECTOR_SHIFT;
++
+ if (WC_MODE_PMEM(wc)) {
+ persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
+ } else {
+@@ -1646,6 +1649,10 @@ static bool wc_add_block(struct writebac
+ void *address = memory_data(wc, e);
+
+ persistent_memory_flush_cache(address, block_size);
++
++ if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors))
++ return true;
++
+ return bio_add_page(&wb->bio, persistent_memory_page(address),
+ block_size, persistent_memory_page_offset(address)) != 0;
+ }
+@@ -1717,6 +1724,9 @@ static void __writecache_writeback_pmem(
+ if (writecache_has_error(wc)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
++ } else if (unlikely(!bio_sectors(bio))) {
++ bio->bi_status = BLK_STS_OK;
++ bio_endio(bio);
+ } else {
+ submit_bio(bio);
+ }
+@@ -1760,6 +1770,14 @@ static void __writecache_writeback_ssd(s
+ e = f;
+ }
+
++ if (unlikely(to.sector + to.count > wc->data_device_sectors)) {
++ if (to.sector >= wc->data_device_sectors) {
++ writecache_copy_endio(0, 0, c);
++ continue;
++ }
++ from.count = to.count = wc->data_device_sectors - to.sector;
++ }
++
+ dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
+
+ __writeback_throttle(wc, wbl);
--- /dev/null
+From 054bee16163df023e2589db09fd27d81f7ad9e72 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 4 Feb 2021 05:20:52 -0500
+Subject: dm writecache: return the exact table values that were set
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 054bee16163df023e2589db09fd27d81f7ad9e72 upstream.
+
+LVM doesn't like it when the target returns different values from what
+was set in the constructor. Fix dm-writecache so that the returned
+table values are exactly the same as requested values.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org # v4.18+
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-writecache.c | 54 +++++++++++++++++++++++++--------------------
+ 1 file changed, 30 insertions(+), 24 deletions(-)
+
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -159,14 +159,22 @@ struct dm_writecache {
+ bool overwrote_committed:1;
+ bool memory_vmapped:1;
+
++ bool start_sector_set:1;
+ bool high_wm_percent_set:1;
+ bool low_wm_percent_set:1;
+ bool max_writeback_jobs_set:1;
+ bool autocommit_blocks_set:1;
+ bool autocommit_time_set:1;
++ bool max_age_set:1;
+ bool writeback_fua_set:1;
+ bool flush_on_suspend:1;
+ bool cleaner:1;
++ bool cleaner_set:1;
++
++ unsigned high_wm_percent_value;
++ unsigned low_wm_percent_value;
++ unsigned autocommit_time_value;
++ unsigned max_age_value;
+
+ unsigned writeback_all;
+ struct workqueue_struct *writeback_wq;
+@@ -2205,6 +2213,7 @@ static int writecache_ctr(struct dm_targ
+ if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
+ goto invalid_optional;
+ wc->start_sector = start_sector;
++ wc->start_sector_set = true;
+ if (wc->start_sector != start_sector ||
+ wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT)
+ goto invalid_optional;
+@@ -2214,6 +2223,7 @@ static int writecache_ctr(struct dm_targ
+ goto invalid_optional;
+ if (high_wm_percent < 0 || high_wm_percent > 100)
+ goto invalid_optional;
++ wc->high_wm_percent_value = high_wm_percent;
+ wc->high_wm_percent_set = true;
+ } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
+ string = dm_shift_arg(&as), opt_params--;
+@@ -2221,6 +2231,7 @@ static int writecache_ctr(struct dm_targ
+ goto invalid_optional;
+ if (low_wm_percent < 0 || low_wm_percent > 100)
+ goto invalid_optional;
++ wc->low_wm_percent_value = low_wm_percent;
+ wc->low_wm_percent_set = true;
+ } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
+ string = dm_shift_arg(&as), opt_params--;
+@@ -2240,6 +2251,7 @@ static int writecache_ctr(struct dm_targ
+ if (autocommit_msecs > 3600000)
+ goto invalid_optional;
+ wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
++ wc->autocommit_time_value = autocommit_msecs;
+ wc->autocommit_time_set = true;
+ } else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
+ unsigned max_age_msecs;
+@@ -2249,7 +2261,10 @@ static int writecache_ctr(struct dm_targ
+ if (max_age_msecs > 86400000)
+ goto invalid_optional;
+ wc->max_age = msecs_to_jiffies(max_age_msecs);
++ wc->max_age_set = true;
++ wc->max_age_value = max_age_msecs;
+ } else if (!strcasecmp(string, "cleaner")) {
++ wc->cleaner_set = true;
+ wc->cleaner = true;
+ } else if (!strcasecmp(string, "fua")) {
+ if (WC_MODE_PMEM(wc)) {
+@@ -2455,7 +2470,6 @@ static void writecache_status(struct dm_
+ struct dm_writecache *wc = ti->private;
+ unsigned extra_args;
+ unsigned sz = 0;
+- uint64_t x;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+@@ -2467,11 +2481,11 @@ static void writecache_status(struct dm_
+ DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
+ wc->dev->name, wc->ssd_dev->name, wc->block_size);
+ extra_args = 0;
+- if (wc->start_sector)
++ if (wc->start_sector_set)
+ extra_args += 2;
+- if (wc->high_wm_percent_set && !wc->cleaner)
++ if (wc->high_wm_percent_set)
+ extra_args += 2;
+- if (wc->low_wm_percent_set && !wc->cleaner)
++ if (wc->low_wm_percent_set)
+ extra_args += 2;
+ if (wc->max_writeback_jobs_set)
+ extra_args += 2;
+@@ -2479,37 +2493,29 @@ static void writecache_status(struct dm_
+ extra_args += 2;
+ if (wc->autocommit_time_set)
+ extra_args += 2;
+- if (wc->max_age != MAX_AGE_UNSPECIFIED)
++ if (wc->max_age_set)
+ extra_args += 2;
+- if (wc->cleaner)
++ if (wc->cleaner_set)
+ extra_args++;
+ if (wc->writeback_fua_set)
+ extra_args++;
+
+ DMEMIT("%u", extra_args);
+- if (wc->start_sector)
++ if (wc->start_sector_set)
+ DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
+- if (wc->high_wm_percent_set && !wc->cleaner) {
+- x = (uint64_t)wc->freelist_high_watermark * 100;
+- x += wc->n_blocks / 2;
+- do_div(x, (size_t)wc->n_blocks);
+- DMEMIT(" high_watermark %u", 100 - (unsigned)x);
+- }
+- if (wc->low_wm_percent_set && !wc->cleaner) {
+- x = (uint64_t)wc->freelist_low_watermark * 100;
+- x += wc->n_blocks / 2;
+- do_div(x, (size_t)wc->n_blocks);
+- DMEMIT(" low_watermark %u", 100 - (unsigned)x);
+- }
++ if (wc->high_wm_percent_set)
++ DMEMIT(" high_watermark %u", wc->high_wm_percent_value);
++ if (wc->low_wm_percent_set)
++ DMEMIT(" low_watermark %u", wc->low_wm_percent_value);
+ if (wc->max_writeback_jobs_set)
+ DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
+ if (wc->autocommit_blocks_set)
+ DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
+ if (wc->autocommit_time_set)
+- DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+- if (wc->max_age != MAX_AGE_UNSPECIFIED)
+- DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age));
+- if (wc->cleaner)
++ DMEMIT(" autocommit_time %u", wc->autocommit_time_value);
++ if (wc->max_age_set)
++ DMEMIT(" max_age %u", wc->max_age_value);
++ if (wc->cleaner_set)
+ DMEMIT(" cleaner");
+ if (wc->writeback_fua_set)
+ DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
+@@ -2519,7 +2525,7 @@ static void writecache_status(struct dm_
+
+ static struct target_type writecache_target = {
+ .name = "writecache",
+- .version = {1, 3, 0},
++ .version = {1, 4, 0},
+ .module = THIS_MODULE,
+ .ctr = writecache_ctr,
+ .dtr = writecache_dtr,
--- /dev/null
+From e0fcd01510ad025c9bbce704c5c2579294056141 Mon Sep 17 00:00:00 2001
+From: Chao Yu <yuchao0@huawei.com>
+Date: Sat, 26 Dec 2020 18:07:01 +0800
+Subject: f2fs: enforce the immutable flag on open files
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit e0fcd01510ad025c9bbce704c5c2579294056141 upstream.
+
+This patch ports commit 02b016ca7f99 ("ext4: enforce the immutable
+flag on open files") to f2fs.
+
+According to the chattr man page, "a file with the 'i' attribute
+cannot be modified..." Historically, this was only enforced when the
+file was opened, per the rest of the description, "... and the file
+can not be opened in write mode".
+
+There is general agreement that we should standardize all file systems
+to prevent modifications even for files that were opened at the time
+the immutable flag is set. Eventually, a change to enforce this at
+the VFS layer should be landing in mainline.
+
+Cc: stable@kernel.org
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/file.c | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -59,6 +59,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(s
+ bool need_alloc = true;
+ int err = 0;
+
++ if (unlikely(IS_IMMUTABLE(inode)))
++ return VM_FAULT_SIGBUS;
++
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto err;
+@@ -869,6 +872,14 @@ int f2fs_setattr(struct dentry *dentry,
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
++ if (unlikely(IS_IMMUTABLE(inode)))
++ return -EPERM;
++
++ if (unlikely(IS_APPEND(inode) &&
++ (attr->ia_valid & (ATTR_MODE | ATTR_UID |
++ ATTR_GID | ATTR_TIMES_SET))))
++ return -EPERM;
++
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ !f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
+@@ -4084,6 +4095,11 @@ static ssize_t f2fs_file_write_iter(stru
+ inode_lock(inode);
+ }
+
++ if (unlikely(IS_IMMUTABLE(inode))) {
++ ret = -EPERM;
++ goto unlock;
++ }
++
+ ret = generic_write_checks(iocb, from);
+ if (ret > 0) {
+ bool preallocated = false;
+@@ -4148,6 +4164,7 @@ write:
+ if (ret > 0)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+ }
++unlock:
+ inode_unlock(inode);
+ out:
+ trace_f2fs_file_write_iter(inode, iocb->ki_pos,
--- /dev/null
+From 2562515f0ad7342bde6456602c491b64c63fe950 Mon Sep 17 00:00:00 2001
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 16 Dec 2020 17:15:23 +0800
+Subject: f2fs: fix out-of-repair __setattr_copy()
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 2562515f0ad7342bde6456602c491b64c63fe950 upstream.
+
+__setattr_copy() was copied from setattr_copy() in fs/attr.c, there is
+two missing patches doesn't cover this inner function, fix it.
+
+Commit 7fa294c8991c ("userns: Allow chown and setgid preservation")
+Commit 23adbe12ef7d ("fs,userns: Change inode_capable to capable_wrt_inode_uidgid")
+
+Fixes: fbfa2cc58d53 ("f2fs: add file operations")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/file.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -851,7 +851,8 @@ static void __setattr_copy(struct inode
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+
+- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
++ if (!in_group_p(inode->i_gid) &&
++ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ mode &= ~S_ISGID;
+ set_acl_inode(inode, mode);
+ }
--- /dev/null
+From b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Tue, 26 Jan 2021 17:00:42 -0800
+Subject: f2fs: flush data when enabling checkpoint back
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit b0ff4fe746fd028eef920ddc8c7b0361c1ede6ec upstream.
+
+During checkpoint=disable period, f2fs bypasses all the synchronous IOs such as
+sync and fsync. So, when enabling it back, we must flush all of them in order
+to keep the data persistent. Otherwise, suddern power-cut right after enabling
+checkpoint will cause data loss.
+
+Fixes: 4354994f097d ("f2fs: checkpoint disabling")
+Cc: stable@vger.kernel.org
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/super.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1764,6 +1764,9 @@ restore_flag:
+
+ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+ {
++ /* we should flush all the data to keep data consistency */
++ sync_inodes_sb(sbi->sb);
++
+ down_write(&sbi->gc_lock);
+ f2fs_dirty_to_prefree(sbi);
+
--- /dev/null
+From 78178ca844f0eb88f21f31c7fde969384be4c901 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Fri, 5 Feb 2021 13:50:41 -0500
+Subject: gfs2: Don't skip dlm unlock if glock has an lvb
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit 78178ca844f0eb88f21f31c7fde969384be4c901 upstream.
+
+Patch fb6791d100d1 was designed to allow gfs2 to unmount quicker by
+skipping the step where it tells dlm to unlock glocks in EX with lvbs.
+This was done because when gfs2 unmounts a file system, it destroys the
+dlm lockspace shortly after it destroys the glocks so it doesn't need to
+unlock them all: the unlock is implied when the lockspace is destroyed
+by dlm.
+
+However, that patch introduced a use-after-free in dlm: as part of its
+normal dlm_recoverd process, it can call ls_recovery to recover dead
+locks. In so doing, it can call recover_rsbs which calls recover_lvb for
+any mastered rsbs. Func recover_lvb runs through the list of lkbs queued
+to the given rsb (if the glock is cached but unlocked, it will still be
+queued to the lkb, but in NL--Unlocked--mode) and if it has an lvb,
+copies it to the rsb, thus trying to preserve the lkb. However, when
+gfs2 skips the dlm unlock step, it frees the glock and its lvb, which
+means dlm's function recover_lvb references the now freed lvb pointer,
+copying the freed lvb memory to the rsb.
+
+This patch changes the check in gdlm_put_lock so that it calls
+dlm_unlock for all glocks that contain an lvb pointer.
+
+Fixes: fb6791d100d1 ("GFS2: skip dlm_unlock calls in unmount")
+Cc: stable@vger.kernel.org # v3.8+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/lock_dlm.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/fs/gfs2/lock_dlm.c
++++ b/fs/gfs2/lock_dlm.c
+@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_gl
+ {
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+- int lvb_needs_unlock = 0;
+ int error;
+
+ if (gl->gl_lksb.sb_lkid == 0) {
+@@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_gl
+ gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
+ gfs2_update_request_times(gl);
+
+- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
+-
+- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
+- lvb_needs_unlock = 1;
++ /* don't want to skip dlm_unlock writing the lvb when lock has one */
+
+ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
+- !lvb_needs_unlock) {
++ !gl->gl_lksb.sb_lvbptr) {
+ gfs2_glock_free(gl);
+ return;
+ }
--- /dev/null
+From f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Mon, 18 Jan 2021 15:18:59 -0500
+Subject: gfs2: fix glock confusion in function signal_our_withdraw
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit f5f02fde9f52b2d769c1c2ddfd3d9c4a1fe739a7 upstream.
+
+If go_free is defined, function signal_our_withdraw is supposed to
+synchronize on the GLF_FREEING flag of the inode glock, but it
+accidentally does that on the live glock. Fix that and disambiguate
+the glock variables.
+
+Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/util.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -93,9 +93,10 @@ out_unlock:
+
+ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+ {
+- struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
++ struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
+ struct inode *inode = sdp->sd_jdesc->jd_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
++ struct gfs2_glock *i_gl = ip->i_gl;
+ u64 no_formal_ino = ip->i_no_formal_ino;
+ int ret = 0;
+ int tries;
+@@ -141,7 +142,8 @@ static void signal_our_withdraw(struct g
+ atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+ thaw_super(sdp->sd_vfs);
+ } else {
+- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
++ wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
++ TASK_UNINTERRUPTIBLE);
+ }
+
+ /*
+@@ -161,15 +163,15 @@ static void signal_our_withdraw(struct g
+ * on other nodes to be successful, otherwise we remain the owner of
+ * the glock as far as dlm is concerned.
+ */
+- if (gl->gl_ops->go_free) {
+- set_bit(GLF_FREEING, &gl->gl_flags);
+- wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
++ if (i_gl->gl_ops->go_free) {
++ set_bit(GLF_FREEING, &i_gl->gl_flags);
++ wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+ }
+
+ /*
+ * Dequeue the "live" glock, but keep a reference so it's never freed.
+ */
+- gfs2_glock_hold(gl);
++ gfs2_glock_hold(live_gl);
+ gfs2_glock_dq_wait(&sdp->sd_live_gh);
+ /*
+ * We enqueue the "live" glock in EX so that all other nodes
+@@ -208,7 +210,7 @@ static void signal_our_withdraw(struct g
+ gfs2_glock_nq(&sdp->sd_live_gh);
+ }
+
+- gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
++ gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
+ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+
+ /*
--- /dev/null
+From 834ec3e1ee65029029225a86c12337a6cd385af7 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Fri, 5 Feb 2021 18:11:28 +0100
+Subject: gfs2: Lock imbalance on error path in gfs2_recover_one
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 834ec3e1ee65029029225a86c12337a6cd385af7 upstream.
+
+In gfs2_recover_one, fix a sd_log_flush_lock imbalance when a recovery
+pass fails.
+
+Fixes: c9ebc4b73799 ("gfs2: allow journal replay to hold sd_log_flush_lock")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/recovery.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/gfs2/recovery.c
++++ b/fs/gfs2/recovery.c
+@@ -514,8 +514,10 @@ void gfs2_recover_func(struct work_struc
+ error = foreach_descriptor(jd, head.lh_tail,
+ head.lh_blkno, pass);
+ lops_after_scan(jd, error, pass);
+- if (error)
++ if (error) {
++ up_read(&sdp->sd_log_flush_lock);
+ goto fail_gunlock_thaw;
++ }
+ }
+
+ recover_local_statfs(jd, &head);
--- /dev/null
+From 7009fa9cd9a5262944b30eb7efb1f0561d074b68 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Tue, 9 Feb 2021 18:32:32 +0100
+Subject: gfs2: Recursive gfs2_quota_hold in gfs2_iomap_end
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 7009fa9cd9a5262944b30eb7efb1f0561d074b68 upstream.
+
+When starting an iomap write, gfs2_quota_lock_check -> gfs2_quota_lock
+-> gfs2_quota_hold is called from gfs2_iomap_begin. At the end of the
+write, before unlocking the quotas, punch_hole -> gfs2_quota_hold can be
+called again in gfs2_iomap_end, which is incorrect and leads to a failed
+assertion. Instead, move the call to gfs2_quota_unlock before the call
+to punch_hole to fix that.
+
+Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support")
+Cc: stable@vger.kernel.org # v4.19+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/gfs2/bmap.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/gfs2/bmap.c
++++ b/fs/gfs2/bmap.c
+@@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode *
+
+ gfs2_inplace_release(ip);
+
++ if (ip->i_qadata && ip->i_qadata->qa_qd_num)
++ gfs2_quota_unlock(ip);
++
+ if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+ /* Deallocate blocks that were just allocated. */
+ loff_t blockmask = i_blocksize(inode) - 1;
+@@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode *
+ }
+ }
+
+- if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+- gfs2_quota_unlock(ip);
+-
+ if (unlikely(!written))
+ goto out_unlock;
+
--- /dev/null
+From c1f664d2400e73d5ca0fcd067fa5847d2c789c11 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Tue, 9 Feb 2021 15:10:51 +0800
+Subject: irqchip/loongson-pch-msi: Use bitmap_zalloc() to allocate bitmap
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit c1f664d2400e73d5ca0fcd067fa5847d2c789c11 upstream.
+
+Currently we use bitmap_alloc() to allocate msi bitmap which should be
+initialized with zero. This is obviously wrong but it works because msi
+can fallback to legacy interrupt mode. So use bitmap_zalloc() instead.
+
+Fixes: 632dcc2c75ef6de3272aa ("irqchip: Add Loongson PCH MSI controller")
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210209071051.2078435-1-chenhuacai@loongson.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/irqchip/irq-loongson-pch-msi.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/irqchip/irq-loongson-pch-msi.c
++++ b/drivers/irqchip/irq-loongson-pch-msi.c
+@@ -225,7 +225,7 @@ static int pch_msi_init(struct device_no
+ goto err_priv;
+ }
+
+- priv->msi_map = bitmap_alloc(priv->num_irqs, GFP_KERNEL);
++ priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL);
+ if (!priv->msi_map) {
+ ret = -ENOMEM;
+ goto err_priv;
--- /dev/null
+From 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 14 Feb 2021 13:21:43 -0700
+Subject: proc: don't allow async path resolution of /proc/thread-self components
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 0d4370cfe36b7f1719123b621a4ec4d9c7a25f89 upstream.
+
+If this is attempted by an io-wq kthread, then return -EOPNOTSUPP as we
+don't currently support that. Once we can get task_pid_ptr() doing the
+right thing, then this can go away again.
+
+Use PF_IO_WORKER for this to speciically target the io_uring workers.
+Modify the /proc/self/ check to use PF_IO_WORKER as well.
+
+Cc: stable@vger.kernel.org
+Fixes: 8d4c3e76e3be ("proc: don't allow async path resolution of /proc/self components")
+Reported-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/proc/self.c | 2 +-
+ fs/proc/thread_self.c | 7 +++++++
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/proc/self.c
++++ b/fs/proc/self.c
+@@ -20,7 +20,7 @@ static const char *proc_self_get_link(st
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+- if (current->flags & PF_KTHREAD)
++ if (current->flags & PF_IO_WORKER)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!tgid)
+--- a/fs/proc/thread_self.c
++++ b/fs/proc/thread_self.c
+@@ -17,6 +17,13 @@ static const char *proc_thread_self_get_
+ pid_t pid = task_pid_nr_ns(current, ns);
+ char *name;
+
++ /*
++ * Not currently supported. Once we can inherit all of struct pid,
++ * we can allow this.
++ */
++ if (current->flags & PF_IO_WORKER)
++ return ERR_PTR(-EOPNOTSUPP);
++
+ if (!pid)
+ return ERR_PTR(-ENOENT);
+ name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
--- /dev/null
+From b29c5093820d333eef22f58cd04ec0d089059c39 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Tue, 2 Feb 2021 16:45:37 +0100
+Subject: s390/vtime: fix inline assembly clobber list
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit b29c5093820d333eef22f58cd04ec0d089059c39 upstream.
+
+The stck/stckf instruction used within the inline assembly within
+do_account_vtime() changes the condition code. This is not reflected
+with the clobber list, and therefore might result in incorrect code
+generation.
+
+It seems unlikely that the compiler could generate incorrect code
+considering the surrounding C code, but it must still be fixed.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/vtime.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/vtime.c
++++ b/arch/s390/kernel/vtime.c
+@@ -136,7 +136,8 @@ static int do_account_vtime(struct task_
+ " stck %1" /* Store current tod clock value */
+ #endif
+ : "=Q" (S390_lowcore.last_update_timer),
+- "=Q" (S390_lowcore.last_update_clock));
++ "=Q" (S390_lowcore.last_update_clock)
++ : : "cc");
+ clock = S390_lowcore.last_update_clock - clock;
+ timer -= S390_lowcore.last_update_timer;
+
zonefs-fix-file-size-of-zones-in-full-condition.patch
kcmp-support-selection-of-sys_kcmp-without-checkpoint_restore.patch
thermal-cpufreq_cooling-freq_qos_update_request-returns-0-on-error.patch
+cpufreq-qcom-hw-drop-devm_xxx-calls-from-init-exit-hooks.patch
+cpufreq-intel_pstate-change-intel_pstate_get_hwp_max-argument.patch
+cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch
+proc-don-t-allow-async-path-resolution-of-proc-thread-self-components.patch
+s390-vtime-fix-inline-assembly-clobber-list.patch
+virtio-s390-implement-virtio-ccw-revision-2-correctly.patch
+um-mm-check-more-comprehensively-for-stub-changes.patch
+um-defer-killing-userspace-on-page-table-update-failures.patch
+irqchip-loongson-pch-msi-use-bitmap_zalloc-to-allocate-bitmap.patch
+f2fs-fix-out-of-repair-__setattr_copy.patch
+f2fs-enforce-the-immutable-flag-on-open-files.patch
+f2fs-flush-data-when-enabling-checkpoint-back.patch
+sparc32-fix-a-user-triggerable-oops-in-clear_user.patch
+spi-fsl-invert-spisel_boot-signal-on-mpc8309.patch
+spi-spi-synquacer-fix-set_cs-handling.patch
+gfs2-fix-glock-confusion-in-function-signal_our_withdraw.patch
+gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch
+gfs2-lock-imbalance-on-error-path-in-gfs2_recover_one.patch
+gfs2-recursive-gfs2_quota_hold-in-gfs2_iomap_end.patch
+dm-fix-deadlock-when-swapping-to-encrypted-device.patch
+dm-table-fix-iterate_devices-based-device-capability-checks.patch
+dm-table-fix-dax-iterate_devices-based-device-capability-checks.patch
+dm-table-fix-zoned-iterate_devices-based-device-capability-checks.patch
+dm-writecache-fix-performance-degradation-in-ssd-mode.patch
+dm-writecache-return-the-exact-table-values-that-were-set.patch
+dm-writecache-fix-writing-beyond-end-of-underlying-device-when-shrinking.patch
+dm-era-recover-committed-writeset-after-crash.patch
+dm-era-update-in-core-bitset-after-committing-the-metadata.patch
+dm-era-verify-the-data-block-size-hasn-t-changed.patch
+dm-era-fix-bitset-memory-leaks.patch
+dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch
+dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch
+dm-era-only-resize-metadata-in-preresume.patch
--- /dev/null
+From 7780918b36489f0b2f9a3749d7be00c2ceaec513 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 20 Jul 2020 02:21:51 +0100
+Subject: sparc32: fix a user-triggerable oops in clear_user()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 7780918b36489f0b2f9a3749d7be00c2ceaec513 upstream.
+
+Back in 2.1.29 the clear_user() guts (__bzero()) had been merged
+with memset(). Unfortunately, while all exception handlers had been
+copied, one of the exception table entries got lost. As the result,
+clear_user() starting at 128*n bytes before the end of page and
+spanning between 8 and 127 bytes into the next page would oops when
+the second page is unmapped. It's trivial to reproduce - all
+it takes is
+
+main()
+{
+ int fd = open("/dev/zero", O_RDONLY);
+ char *p = mmap(NULL, 16384, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ munmap(p + 8192, 8192);
+ read(fd, p + 8192 - 128, 192);
+}
+
+which had been oopsing since March 1997. Says something about
+the quality of test coverage... ;-/ And while today sparc32 port
+is nearly dead, back in '97 it had been very much alive; in fact,
+sparc64 had only been in mainline for 3 months by that point...
+
+Cc: stable@kernel.org
+Fixes: v2.1.29
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/lib/memset.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/sparc/lib/memset.S
++++ b/arch/sparc/lib/memset.S
+@@ -142,6 +142,7 @@ __bzero:
+ ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
+ ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
+ 13:
++ EXT(12b, 13b, 21f)
+ be 8f
+ andcc %o1, 4, %g0
+
--- /dev/null
+From 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e Mon Sep 17 00:00:00 2001
+From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+Date: Sat, 30 Jan 2021 15:35:45 +0100
+Subject: spi: fsl: invert spisel_boot signal on MPC8309
+
+From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+
+commit 9d2aa6dbf87af89c13cac2d1b4cccad83fb14a7e upstream.
+
+Commit 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH
+is not set in spi->mode") broke our MPC8309 board by effectively
+inverting the boolean value passed to fsl_spi_cs_control. The
+SPISEL_BOOT signal is used as chipselect, but it's not a gpio, so
+we cannot rely on gpiolib handling the polarity.
+
+Adapt to the new world order by inverting the logic here. This does
+assume that the slave sitting at the SPISEL_BOOT is active low, but
+should that ever turn out not to be the case, one can create a stub
+gpiochip driver controlling a single gpio (or rather, a single "spo",
+special-purpose output).
+
+Fixes: 7a2da5d7960a ("spi: fsl: Fix driver breakage when SPI_CS_HIGH is not set in spi->mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
+Link: https://lore.kernel.org/r/20210130143545.505613-1-rasmus.villemoes@prevas.dk
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-fsl-spi.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -695,7 +695,7 @@ static void fsl_spi_cs_control(struct sp
+
+ if (WARN_ON_ONCE(!pinfo->immr_spi_cs))
+ return;
+- iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs);
++ iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs);
+ }
+ }
+
--- /dev/null
+From 1c9f1750f0305bf605ff22686fc0ac89c06deb28 Mon Sep 17 00:00:00 2001
+From: Masahisa Kojima <masahisa.kojima@linaro.org>
+Date: Mon, 1 Feb 2021 01:31:09 -0600
+Subject: spi: spi-synquacer: fix set_cs handling
+
+From: Masahisa Kojima <masahisa.kojima@linaro.org>
+
+commit 1c9f1750f0305bf605ff22686fc0ac89c06deb28 upstream.
+
+When the slave chip select is deasserted, DMSTOP bit
+must be set.
+
+Fixes: b0823ee35cf9 ("spi: Add spi driver for Socionext SynQuacer platform")
+Signed-off-by: Masahisa Kojima <masahisa.kojima@linaro.org>
+Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210201073109.9036-1-jassisinghbrar@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-synquacer.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/spi/spi-synquacer.c
++++ b/drivers/spi/spi-synquacer.c
+@@ -490,6 +490,10 @@ static void synquacer_spi_set_cs(struct
+ val &= ~(SYNQUACER_HSSPI_DMPSEL_CS_MASK <<
+ SYNQUACER_HSSPI_DMPSEL_CS_SHIFT);
+ val |= spi->chip_select << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT;
++
++ if (!enable)
++ val |= SYNQUACER_HSSPI_DMSTOP_STOP;
++
+ writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+ }
+
--- /dev/null
+From a7d48886cacf8b426e0079bca9639d2657cf2d38 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 13 Jan 2021 22:08:03 +0100
+Subject: um: defer killing userspace on page table update failures
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit a7d48886cacf8b426e0079bca9639d2657cf2d38 upstream.
+
+In some cases we can get to fix_range_common() with mmap_sem held,
+and in others we get there without it being held. For example, we
+get there with it held from sys_mprotect(), and without it held
+from fork_handler().
+
+Avoid any issues in this and simply defer killing the task until
+it runs the next time. Do it on the mm so that another task that
+shares the same mm can't continue running afterwards.
+
+Cc: stable@vger.kernel.org
+Fixes: 468f65976a8d ("um: Fix hung task in fix_range_common()")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/include/shared/skas/mm_id.h | 1 +
+ arch/um/kernel/tlb.c | 7 +++----
+ arch/um/os-Linux/skas/process.c | 4 ++++
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/arch/um/include/shared/skas/mm_id.h
++++ b/arch/um/include/shared/skas/mm_id.h
+@@ -12,6 +12,7 @@ struct mm_id {
+ int pid;
+ } u;
+ unsigned long stack;
++ int kill;
+ };
+
+ #endif
+--- a/arch/um/kernel/tlb.c
++++ b/arch/um/kernel/tlb.c
+@@ -352,12 +352,11 @@ void fix_range_common(struct mm_struct *
+
+ /* This is not an else because ret is modified above */
+ if (ret) {
++ struct mm_id *mm_idp = ¤t->mm->context.id;
++
+ printk(KERN_ERR "fix_range_common: failed, killing current "
+ "process: %d\n", task_tgid_vnr(current));
+- /* We are under mmap_lock, release it such that current can terminate */
+- mmap_write_unlock(current->mm);
+- force_sig(SIGKILL);
+- do_signal(¤t->thread.regs);
++ mm_idp->kill = 1;
+ }
+ }
+
+--- a/arch/um/os-Linux/skas/process.c
++++ b/arch/um/os-Linux/skas/process.c
+@@ -249,6 +249,7 @@ static int userspace_tramp(void *stack)
+ }
+
+ int userspace_pid[NR_CPUS];
++int kill_userspace_mm[NR_CPUS];
+
+ /**
+ * start_userspace() - prepare a new userspace process
+@@ -342,6 +343,8 @@ void userspace(struct uml_pt_regs *regs,
+ interrupt_end();
+
+ while (1) {
++ if (kill_userspace_mm[0])
++ fatal_sigsegv();
+
+ /*
+ * This can legitimately fail if the process loads a
+@@ -650,4 +653,5 @@ void reboot_skas(void)
+ void __switch_mm(struct mm_id *mm_idp)
+ {
+ userspace_pid[0] = mm_idp->u.pid;
++ kill_userspace_mm[0] = mm_idp->kill;
+ }
--- /dev/null
+From 47da29763ec9a153b9b685bff9db659e4e09e494 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 13 Jan 2021 22:08:02 +0100
+Subject: um: mm: check more comprehensively for stub changes
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 47da29763ec9a153b9b685bff9db659e4e09e494 upstream.
+
+If userspace tries to change the stub, we need to kill it,
+because otherwise it can escape the virtual machine. In a
+few cases the stub checks weren't good, e.g. if userspace
+just tries to
+
+ mmap(0x100000 - 0x1000, 0x3000, ...)
+
+it could succeed to get a new private/anonymous mapping
+replacing the stubs. Fix this by checking everywhere, and
+checking for _overlap_, not just direct changes.
+
+Cc: stable@vger.kernel.org
+Fixes: 3963333fe676 ("uml: cover stubs with a VMA")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/kernel/tlb.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/um/kernel/tlb.c
++++ b/arch/um/kernel/tlb.c
+@@ -125,6 +125,9 @@ static int add_mmap(unsigned long virt,
+ struct host_vm_op *last;
+ int fd = -1, ret = 0;
+
++ if (virt + len > STUB_START && virt < STUB_END)
++ return -EINVAL;
++
+ if (hvc->userspace)
+ fd = phys_mapping(phys, &offset);
+ else
+@@ -162,7 +165,7 @@ static int add_munmap(unsigned long addr
+ struct host_vm_op *last;
+ int ret = 0;
+
+- if ((addr >= STUB_START) && (addr < STUB_END))
++ if (addr + len > STUB_START && addr < STUB_END)
+ return -EINVAL;
+
+ if (hvc->index != 0) {
+@@ -192,6 +195,9 @@ static int add_mprotect(unsigned long ad
+ struct host_vm_op *last;
+ int ret = 0;
+
++ if (addr + len > STUB_START && addr < STUB_END)
++ return -EINVAL;
++
+ if (hvc->index != 0) {
+ last = &hvc->ops[hvc->index - 1];
+ if ((last->type == MPROTECT) &&
+@@ -472,6 +478,10 @@ void flush_tlb_page(struct vm_area_struc
+ struct mm_id *mm_id;
+
+ address &= PAGE_MASK;
++
++ if (address >= STUB_START && address < STUB_END)
++ goto kill;
++
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ goto kill;
--- /dev/null
+From 182f709c5cff683e6732d04c78e328de0532284f Mon Sep 17 00:00:00 2001
+From: Cornelia Huck <cohuck@redhat.com>
+Date: Tue, 16 Feb 2021 12:06:45 +0100
+Subject: virtio/s390: implement virtio-ccw revision 2 correctly
+
+From: Cornelia Huck <cohuck@redhat.com>
+
+commit 182f709c5cff683e6732d04c78e328de0532284f upstream.
+
+CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw,
+and drivers should only rely on it being implemented when they
+negotiated at least that revision with the device.
+
+However, virtio_ccw_get_status() issued READ_STATUS for any
+device operating at least at revision 1. If the device accepts
+READ_STATUS regardless of the negotiated revision (which some
+implementations like QEMU do, even though the spec currently does
+not allow it), everything works as intended. While a device
+rejecting the command should also be handled gracefully, we will
+not be able to see any changes the device makes to the status,
+such as setting NEEDS_RESET or setting the status to zero after
+a completed reset.
+
+We negotiated the revision to at most 1, as we never bumped the
+maximum revision; let's do that now and properly send READ_STATUS
+only if we are operating at least at revision 2.
+
+Cc: stable@vger.kernel.org
+Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw")
+Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
+Signed-off-by: Cornelia Huck <cohuck@redhat.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Link: https://lore.kernel.org/r/20210216110645.1087321-1-cohuck@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/virtio/virtio_ccw.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/virtio/virtio_ccw.c
++++ b/drivers/s390/virtio/virtio_ccw.c
+@@ -117,7 +117,7 @@ struct virtio_rev_info {
+ };
+
+ /* the highest virtio-ccw revision we support */
+-#define VIRTIO_CCW_REV_MAX 1
++#define VIRTIO_CCW_REV_MAX 2
+
+ struct virtio_ccw_vq_info {
+ struct virtqueue *vq;
+@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct v
+ u8 old_status = vcdev->dma_area->status;
+ struct ccw1 *ccw;
+
+- if (vcdev->revision < 1)
++ if (vcdev->revision < 2)
+ return vcdev->dma_area->status;
+
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));