From: Greg Kroah-Hartman Date: Mon, 1 Mar 2021 13:55:08 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.4.259~33 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0aec2565fd3d002ca1f7ee2bf1ff3c89f43ce01b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch dm-era-fix-bitset-memory-leaks.patch dm-era-only-resize-metadata-in-preresume.patch dm-era-recover-committed-writeset-after-crash.patch dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch dm-era-verify-the-data-block-size-hasn-t-changed.patch dm-fix-deadlock-when-swapping-to-encrypted-device.patch f2fs-fix-out-of-repair-__setattr_copy.patch gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch sparc32-fix-a-user-triggerable-oops-in-clear_user.patch --- diff --git a/queue-4.19/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch b/queue-4.19/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch new file mode 100644 index 00000000000..433b36efe07 --- /dev/null +++ b/queue-4.19/cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch @@ -0,0 +1,58 @@ +From 6f67e060083a84a4cc364eab6ae40c717165fb0c Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Tue, 12 Jan 2021 13:21:27 +0800 +Subject: cpufreq: intel_pstate: Get per-CPU max freq via MSR_HWP_CAPABILITIES if available + +From: Chen Yu + +commit 6f67e060083a84a4cc364eab6ae40c717165fb0c upstream. + +Currently, when turbo is disabled (either by BIOS or by the user), +the intel_pstate driver reads the max non-turbo frequency from the +package-wide MSR_PLATFORM_INFO(0xce) register. + +However, on asymmetric platforms it is possible in theory that small +and big core with HWP enabled might have different max non-turbo CPU +frequency, because MSR_HWP_CAPABILITIES is per-CPU scope according +to Intel Software Developer Manual. + +The turbo max freq is already per-CPU in current code, so make +similar change to the max non-turbo frequency as well. + +Reported-by: Wendy Wang +Signed-off-by: Chen Yu +[ rjw: Subject and changelog edits ] +Cc: 4.18+ # 4.18+: a45ee4d4e13b: cpufreq: intel_pstate: Change intel_pstate_get_hwp_max() argument +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -1420,11 +1420,9 @@ static void intel_pstate_max_within_limi + static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) + { + cpu->pstate.min_pstate = pstate_funcs.get_min(); +- cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); + cpu->pstate.scaling = pstate_funcs.get_scaling(); +- cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + + if (hwp_active && !hwp_mode_bdw) { + unsigned int phy_max, current_max; +@@ -1432,9 +1430,12 @@ static void intel_pstate_get_cpu_pstates + intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); + cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; + cpu->pstate.turbo_pstate = phy_max; ++ cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); + } else { + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; ++ cpu->pstate.max_pstate = pstate_funcs.get_max(); + } ++ cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + + if (pstate_funcs.get_aperf_mperf_shift) + cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); diff --git a/queue-4.19/dm-era-fix-bitset-memory-leaks.patch b/queue-4.19/dm-era-fix-bitset-memory-leaks.patch new file mode 100644 index 00000000000..3505cd29d7c --- /dev/null +++ b/queue-4.19/dm-era-fix-bitset-memory-leaks.patch @@ -0,0 +1,58 @@ +From 904e6b266619c2da5c58b5dce14ae30629e39645 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:54 +0200 +Subject: dm era: Fix bitset memory leaks + +From: Nikos Tsironis + +commit 904e6b266619c2da5c58b5dce14ae30629e39645 upstream. + +Deallocate the memory allocated for the in-core bitsets when destroying +the target and in error paths. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -46,6 +46,7 @@ struct writeset { + static void writeset_free(struct writeset *ws) + { + vfree(ws->bits); ++ ws->bits = NULL; + } + + static int setup_on_disk_bitset(struct dm_disk_bitset *info, +@@ -810,6 +811,8 @@ static struct era_metadata *metadata_ope + + static void metadata_close(struct era_metadata *md) + { ++ writeset_free(&md->writesets[0]); ++ writeset_free(&md->writesets[1]); + destroy_persistent_data_objects(md); + kfree(md); + } +@@ -847,6 +850,7 @@ static int metadata_resize(struct era_me + r = writeset_alloc(&md->writesets[1], *new_size); + if (r) { + DMERR("%s: writeset_alloc failed for writeset 1", __func__); ++ writeset_free(&md->writesets[0]); + return r; + } + +@@ -857,6 +861,8 @@ static int metadata_resize(struct era_me + &value, &md->era_array_root); + if (r) { + DMERR("%s: dm_array_resize failed", __func__); ++ writeset_free(&md->writesets[0]); ++ writeset_free(&md->writesets[1]); + return r; + } + diff --git a/queue-4.19/dm-era-only-resize-metadata-in-preresume.patch b/queue-4.19/dm-era-only-resize-metadata-in-preresume.patch new file mode 100644 index 00000000000..622dbd880c8 --- /dev/null +++ b/queue-4.19/dm-era-only-resize-metadata-in-preresume.patch @@ -0,0 +1,80 @@ +From cca2c6aebe86f68103a8615074b3578e854b5016 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Thu, 11 Feb 2021 16:22:43 +0200 +Subject: dm era: only resize metadata in preresume + +From: Nikos Tsironis + +commit cca2c6aebe86f68103a8615074b3578e854b5016 upstream. + +Metadata resize shouldn't happen in the ctr. The ctr loads a temporary +(inactive) table that will only become active upon resume. That is why +resize should always be done in terms of resume. Otherwise a load (ctr) +whose inactive table never becomes active will incorrectly resize the +metadata. + +Also, perform the resize directly in preresume, instead of using the +worker to do it. + +The worker might run other metadata operations, e.g., it could start +digestion, before resizing the metadata. These operations will end up +using the old size. + +This could lead to errors, like: + + device-mapper: era: metadata_digest_transcribe_writeset: dm_array_set_value failed + device-mapper: era: process_old_eras: digest step failed, stopping digestion + +The reason of the above error is that the worker started the digestion +of the archived writeset using the old, larger size. + +As a result, metadata_digest_transcribe_writeset tried to write beyond +the end of the era array. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -1500,15 +1500,6 @@ static int era_ctr(struct dm_target *ti, + } + era->md = md; + +- era->nr_blocks = calc_nr_blocks(era); +- +- r = metadata_resize(era->md, &era->nr_blocks); +- if (r) { +- ti->error = "couldn't resize metadata"; +- era_destroy(era); +- return -ENOMEM; +- } +- + era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); + if (!era->wq) { + ti->error = "could not create workqueue for metadata object"; +@@ -1585,9 +1576,17 @@ static int era_preresume(struct dm_targe + dm_block_t new_size = calc_nr_blocks(era); + + if (era->nr_blocks != new_size) { +- r = in_worker1(era, metadata_resize, &new_size); +- if (r) ++ r = metadata_resize(era->md, &new_size); ++ if (r) { ++ DMERR("%s: metadata_resize failed", __func__); ++ return r; ++ } ++ ++ r = metadata_commit(era->md); ++ if (r) { ++ DMERR("%s: metadata_commit failed", __func__); + return r; ++ } + + era->nr_blocks = new_size; + } diff --git a/queue-4.19/dm-era-recover-committed-writeset-after-crash.patch b/queue-4.19/dm-era-recover-committed-writeset-after-crash.patch new file mode 100644 index 00000000000..c5483748c12 --- /dev/null +++ b/queue-4.19/dm-era-recover-committed-writeset-after-crash.patch @@ -0,0 +1,125 @@ +From de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:19:30 +0200 +Subject: dm era: Recover committed writeset after crash + +From: Nikos Tsironis + +commit de89afc1e40fdfa5f8b666e5d07c43d21a1d3be0 upstream. + +Following a system crash, dm-era fails to recover the committed writeset +for the current era, leading to lost writes. That is, we lose the +information about what blocks were written during the affected era. + +dm-era assumes that the writeset of the current era is archived when the +device is suspended. So, when resuming the device, it just moves on to +the next era, ignoring the committed writeset. + +This assumption holds when the device is properly shut down. But, when +the system crashes, the code that suspends the target never runs, so the +writeset for the current era is not archived. + +There are three issues that cause the committed writeset to get lost: + +1. dm-era doesn't load the committed writeset when opening the metadata +2. The code that resizes the metadata wipes the information about the + committed writeset (assuming it was loaded at step 1) +3. era_preresume() starts a new era, without taking into account that + the current era might not have been archived, due to a system crash. + +To fix this: + +1. Load the committed writeset when opening the metadata +2. Fix the code that resizes the metadata to make sure it doesn't wipe + the loaded writeset +3. Fix era_preresume() to check for a loaded writeset and archive it, + before starting a new era. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -70,8 +70,6 @@ static size_t bitset_size(unsigned nr_bi + */ + static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) + { +- ws->md.nr_bits = nr_blocks; +- ws->md.root = INVALID_WRITESET_ROOT; + ws->bits = vzalloc(bitset_size(nr_blocks)); + if (!ws->bits) { + DMERR("%s: couldn't allocate in memory bitset", __func__); +@@ -84,12 +82,14 @@ static int writeset_alloc(struct writese + /* + * Wipes the in-core bitset, and creates a new on disk bitset. + */ +-static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) ++static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws, ++ dm_block_t nr_blocks) + { + int r; + +- memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); ++ memset(ws->bits, 0, bitset_size(nr_blocks)); + ++ ws->md.nr_bits = nr_blocks; + r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); + if (r) { + DMERR("%s: setup_on_disk_bitset failed", __func__); +@@ -578,6 +578,7 @@ static int open_metadata(struct era_meta + md->nr_blocks = le32_to_cpu(disk->nr_blocks); + md->current_era = le32_to_cpu(disk->current_era); + ++ ws_unpack(&disk->current_writeset, &md->current_writeset->md); + md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); + md->era_array_root = le64_to_cpu(disk->era_array_root); + md->metadata_snap = le64_to_cpu(disk->metadata_snap); +@@ -869,7 +870,6 @@ static int metadata_era_archive(struct e + } + + ws_pack(&md->current_writeset->md, &value); +- md->current_writeset->md.root = INVALID_WRITESET_ROOT; + + keys[0] = md->current_era; + __dm_bless_for_disk(&value); +@@ -881,6 +881,7 @@ static int metadata_era_archive(struct e + return r; + } + ++ md->current_writeset->md.root = INVALID_WRITESET_ROOT; + md->archived_writesets = true; + + return 0; +@@ -897,7 +898,7 @@ static int metadata_new_era(struct era_m + int r; + struct writeset *new_writeset = next_writeset(md); + +- r = writeset_init(&md->bitset_info, new_writeset); ++ r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks); + if (r) { + DMERR("%s: writeset_init failed", __func__); + return r; +@@ -950,7 +951,7 @@ static int metadata_commit(struct era_me + int r; + struct dm_block *sblock; + +- if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { ++ if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { + r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, + &md->current_writeset->md.root); + if (r) { +@@ -1579,7 +1580,7 @@ static int era_preresume(struct dm_targe + + start_worker(era); + +- r = in_worker0(era, metadata_new_era); ++ r = in_worker0(era, metadata_era_rollover); + if (r) { + DMERR("%s: metadata_era_rollover failed", __func__); + return r; diff --git a/queue-4.19/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch b/queue-4.19/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch new file mode 100644 index 00000000000..a75ea312372 --- /dev/null +++ b/queue-4.19/dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch @@ -0,0 +1,80 @@ +From 2524933307fd0036d5c32357c693c021ab09a0b0 Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:22:04 +0200 +Subject: dm era: Reinitialize bitset cache before digesting a new writeset + +From: Nikos Tsironis + +commit 2524933307fd0036d5c32357c693c021ab09a0b0 upstream. + +In case of devices with at most 64 blocks, the digestion of consecutive +eras uses the writeset of the first era as the writeset of all eras to +digest, leading to lost writes. That is, we lose the information about +what blocks were written during the affected eras. + +The digestion code uses a dm_disk_bitset object to access the archived +writesets. This structure includes a one word (64-bit) cache to reduce +the number of array lookups. + +This structure is initialized only once, in metadata_digest_start(), +when we kick off digestion. + +But, when we insert a new writeset into the writeset tree, before the +digestion of the previous writeset is done, or equivalently when there +are multiple writesets in the writeset tree to digest, then all these +writesets are digested using the same cache and the cache is not +re-initialized when moving from one writeset to the next. + +For devices with more than 64 blocks, i.e., the size of the cache, the +cache is indirectly invalidated when we move to a next set of blocks, so +we avoid the bug. + +But for devices with at most 64 blocks we end up using the same cached +data for digesting all archived writesets, i.e., the cache is loaded +when digesting the first writeset and it never gets reloaded, until the +digestion is done. + +As a result, the writeset of the first era to digest is used as the +writeset of all the following archived eras, leading to lost writes. + +Fix this by reinitializing the dm_disk_bitset structure, and thus +invalidating the cache, every time the digestion code starts digesting a +new writeset. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -755,6 +755,12 @@ static int metadata_digest_lookup_writes + ws_unpack(&disk, &d->writeset); + d->value = cpu_to_le32(key); + ++ /* ++ * We initialise another bitset info to avoid any caching side effects ++ * with the previous one. ++ */ ++ dm_disk_bitset_init(md->tm, &d->info); ++ + d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); + d->current_bit = 0; + d->step = metadata_digest_transcribe_writeset; +@@ -768,12 +774,6 @@ static int metadata_digest_start(struct + return 0; + + memset(d, 0, sizeof(*d)); +- +- /* +- * We initialise another bitset info to avoid any caching side +- * effects with the previous one. +- */ +- dm_disk_bitset_init(md->tm, &d->info); + d->step = metadata_digest_lookup_writeset; + + return 0; diff --git a/queue-4.19/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch b/queue-4.19/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch new file mode 100644 index 00000000000..24155dac76f --- /dev/null +++ b/queue-4.19/dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch @@ -0,0 +1,33 @@ +From 64f2d15afe7b336aafebdcd14cc835ecf856df4b Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:55 +0200 +Subject: dm era: Use correct value size in equality function of writeset tree + +From: Nikos Tsironis + +commit 64f2d15afe7b336aafebdcd14cc835ecf856df4b upstream. + +Fix the writeset tree equality test function to use the right value size +when comparing two btree values. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -388,7 +388,7 @@ static void ws_dec(void *context, const + + static int ws_eq(void *context, const void *value1, const void *value2) + { +- return !memcmp(value1, value2, sizeof(struct writeset_metadata)); ++ return !memcmp(value1, value2, sizeof(struct writeset_disk)); + } + + /*----------------------------------------------------------------*/ diff --git a/queue-4.19/dm-era-verify-the-data-block-size-hasn-t-changed.patch b/queue-4.19/dm-era-verify-the-data-block-size-hasn-t-changed.patch new file mode 100644 index 00000000000..85fd374d467 --- /dev/null +++ b/queue-4.19/dm-era-verify-the-data-block-size-hasn-t-changed.patch @@ -0,0 +1,49 @@ +From c8e846ff93d5eaa5384f6f325a1687ac5921aade Mon Sep 17 00:00:00 2001 +From: Nikos Tsironis +Date: Fri, 22 Jan 2021 17:25:53 +0200 +Subject: dm era: Verify the data block size hasn't changed + +From: Nikos Tsironis + +commit c8e846ff93d5eaa5384f6f325a1687ac5921aade upstream. + +dm-era doesn't support changing the data block size of existing devices, +so check explicitly that the requested block size for a new target +matches the one stored in the metadata. + +Fixes: eec40579d84873 ("dm: add era target") +Cc: stable@vger.kernel.org # v3.15+ +Signed-off-by: Nikos Tsironis +Reviewed-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-era-target.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -563,6 +563,15 @@ static int open_metadata(struct era_meta + } + + disk = dm_block_data(sblock); ++ ++ /* Verify the data block size hasn't changed */ ++ if (le32_to_cpu(disk->data_block_size) != md->block_size) { ++ DMERR("changing the data block size (from %u to %llu) is not supported", ++ le32_to_cpu(disk->data_block_size), md->block_size); ++ r = -EINVAL; ++ goto bad; ++ } ++ + r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, + disk->metadata_space_map_root, + sizeof(disk->metadata_space_map_root), +@@ -574,7 +583,6 @@ static int open_metadata(struct era_meta + + setup_infos(md); + +- md->block_size = le32_to_cpu(disk->data_block_size); + md->nr_blocks = le32_to_cpu(disk->nr_blocks); + md->current_era = le32_to_cpu(disk->current_era); + diff --git a/queue-4.19/dm-fix-deadlock-when-swapping-to-encrypted-device.patch b/queue-4.19/dm-fix-deadlock-when-swapping-to-encrypted-device.patch new file mode 100644 index 00000000000..c117ba9a85f --- /dev/null +++ b/queue-4.19/dm-fix-deadlock-when-swapping-to-encrypted-device.patch @@ -0,0 +1,201 @@ +From a666e5c05e7c4aaabb2c5d58117b0946803d03d2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 10 Feb 2021 15:26:23 -0500 +Subject: dm: fix deadlock when swapping to encrypted device + +From: Mikulas Patocka + +commit a666e5c05e7c4aaabb2c5d58117b0946803d03d2 upstream. + +The system would deadlock when swapping to a dm-crypt device. The reason +is that for each incoming write bio, dm-crypt allocates memory that holds +encrypted data. These excessive allocations exhaust all the memory and the +result is either deadlock or OOM trigger. + +This patch limits the number of in-flight swap bios, so that the memory +consumed by dm-crypt is limited. The limit is enforced if the target set +the "limit_swap_bios" variable and if the bio has REQ_SWAP set. + +Non-swap bios are not affected becuase taking the semaphore would cause +performance degradation. + +This is similar to request-based drivers - they will also block when the +number of requests is over the limit. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-core.h | 4 ++ + drivers/md/dm-crypt.c | 1 + drivers/md/dm.c | 60 ++++++++++++++++++++++++++++++++++++++++++ + include/linux/device-mapper.h | 5 +++ + 4 files changed, 70 insertions(+) + +--- a/drivers/md/dm-core.h ++++ b/drivers/md/dm-core.h +@@ -110,6 +110,10 @@ struct mapped_device { + /* zero-length flush that will be cloned and submitted to targets */ + struct bio flush_bio; + ++ int swap_bios; ++ struct semaphore swap_bios_semaphore; ++ struct mutex swap_bios_lock; ++ + struct dm_stats stats; + + struct kthread_worker kworker; +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -2852,6 +2852,7 @@ static int crypt_ctr(struct dm_target *t + wake_up_process(cc->write_thread); + + ti->num_flush_bios = 1; ++ ti->limit_swap_bios = true; + + return 0; + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -146,6 +146,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_ + #define DM_NUMA_NODE NUMA_NO_NODE + static int dm_numa_node = DM_NUMA_NODE; + ++#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE) ++static int swap_bios = DEFAULT_SWAP_BIOS; ++static int get_swap_bios(void) ++{ ++ int latch = READ_ONCE(swap_bios); ++ if (unlikely(latch <= 0)) ++ latch = DEFAULT_SWAP_BIOS; ++ return latch; ++} ++ + /* + * For mempools pre-allocation at the table loading time. + */ +@@ -935,6 +945,11 @@ void disable_write_zeroes(struct mapped_ + limits->max_write_zeroes_sectors = 0; + } + ++static bool swap_bios_limit(struct dm_target *ti, struct bio *bio) ++{ ++ return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); ++} ++ + static void clone_endio(struct bio *bio) + { + blk_status_t error = bio->bi_status; +@@ -972,6 +987,11 @@ static void clone_endio(struct bio *bio) + } + } + ++ if (unlikely(swap_bios_limit(tio->ti, bio))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } ++ + free_tio(tio); + dec_pending(io, error); + } +@@ -1250,6 +1270,22 @@ void dm_remap_zone_report(struct dm_targ + } + EXPORT_SYMBOL_GPL(dm_remap_zone_report); + ++static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) ++{ ++ mutex_lock(&md->swap_bios_lock); ++ while (latch < md->swap_bios) { ++ cond_resched(); ++ down(&md->swap_bios_semaphore); ++ md->swap_bios--; ++ } ++ while (latch > md->swap_bios) { ++ cond_resched(); ++ up(&md->swap_bios_semaphore); ++ md->swap_bios++; ++ } ++ mutex_unlock(&md->swap_bios_lock); ++} ++ + static blk_qc_t __map_bio(struct dm_target_io *tio) + { + int r; +@@ -1270,6 +1306,14 @@ static blk_qc_t __map_bio(struct dm_targ + atomic_inc(&io->io_count); + sector = clone->bi_iter.bi_sector; + ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ int latch = get_swap_bios(); ++ if (unlikely(latch != md->swap_bios)) ++ __set_swap_bios_limit(md, latch); ++ down(&md->swap_bios_semaphore); ++ } ++ + r = ti->type->map(ti, clone); + switch (r) { + case DM_MAPIO_SUBMITTED: +@@ -1284,10 +1328,18 @@ static blk_qc_t __map_bio(struct dm_targ + ret = generic_make_request(clone); + break; + case DM_MAPIO_KILL: ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } + free_tio(tio); + dec_pending(io, BLK_STS_IOERR); + break; + case DM_MAPIO_REQUEUE: ++ if (unlikely(swap_bios_limit(ti, clone))) { ++ struct mapped_device *md = io->md; ++ up(&md->swap_bios_semaphore); ++ } + free_tio(tio); + dec_pending(io, BLK_STS_DM_REQUEUE); + break; +@@ -1859,6 +1911,7 @@ static void cleanup_mapped_device(struct + mutex_destroy(&md->suspend_lock); + mutex_destroy(&md->type_lock); + mutex_destroy(&md->table_devices_lock); ++ mutex_destroy(&md->swap_bios_lock); + + dm_mq_cleanup_mapped_device(md); + } +@@ -1933,6 +1986,10 @@ static struct mapped_device *alloc_dev(i + init_completion(&md->kobj_holder.completion); + md->kworker_task = NULL; + ++ md->swap_bios = get_swap_bios(); ++ sema_init(&md->swap_bios_semaphore, md->swap_bios); ++ mutex_init(&md->swap_bios_lock); ++ + md->disk->major = _major; + md->disk->first_minor = minor; + md->disk->fops = &dm_blk_dops; +@@ -3228,6 +3285,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, + module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); + MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); + ++module_param(swap_bios, int, S_IRUGO | S_IWUSR); ++MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs"); ++ + MODULE_DESCRIPTION(DM_NAME " driver"); + MODULE_AUTHOR("Joe Thornber "); + MODULE_LICENSE("GPL"); +--- a/include/linux/device-mapper.h ++++ b/include/linux/device-mapper.h +@@ -315,6 +315,11 @@ struct dm_target { + * on max_io_len boundary. + */ + bool split_discard_bios:1; ++ ++ /* ++ * Set if we need to limit the number of in-flight bios when swapping. ++ */ ++ bool limit_swap_bios:1; + }; + + /* Each target can link one of these into the table */ diff --git a/queue-4.19/f2fs-fix-out-of-repair-__setattr_copy.patch b/queue-4.19/f2fs-fix-out-of-repair-__setattr_copy.patch new file mode 100644 index 00000000000..dc9d7fab12a --- /dev/null +++ b/queue-4.19/f2fs-fix-out-of-repair-__setattr_copy.patch @@ -0,0 +1,36 @@ +From 2562515f0ad7342bde6456602c491b64c63fe950 Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Wed, 16 Dec 2020 17:15:23 +0800 +Subject: f2fs: fix out-of-repair __setattr_copy() + +From: Chao Yu + +commit 2562515f0ad7342bde6456602c491b64c63fe950 upstream. + +__setattr_copy() was copied from setattr_copy() in fs/attr.c, there is +two missing patches doesn't cover this inner function, fix it. + +Commit 7fa294c8991c ("userns: Allow chown and setgid preservation") +Commit 23adbe12ef7d ("fs,userns: Change inode_capable to capable_wrt_inode_uidgid") + +Fixes: fbfa2cc58d53 ("f2fs: add file operations") +Cc: stable@vger.kernel.org +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/file.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -760,7 +760,8 @@ static void __setattr_copy(struct inode + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + +- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) ++ if (!in_group_p(inode->i_gid) && ++ !capable_wrt_inode_uidgid(inode, CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(inode, mode); + } diff --git a/queue-4.19/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch b/queue-4.19/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch new file mode 100644 index 00000000000..f08809b5771 --- /dev/null +++ b/queue-4.19/gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch @@ -0,0 +1,65 @@ +From 78178ca844f0eb88f21f31c7fde969384be4c901 Mon Sep 17 00:00:00 2001 +From: Bob Peterson +Date: Fri, 5 Feb 2021 13:50:41 -0500 +Subject: gfs2: Don't skip dlm unlock if glock has an lvb + +From: Bob Peterson + +commit 78178ca844f0eb88f21f31c7fde969384be4c901 upstream. + +Patch fb6791d100d1 was designed to allow gfs2 to unmount quicker by +skipping the step where it tells dlm to unlock glocks in EX with lvbs. +This was done because when gfs2 unmounts a file system, it destroys the +dlm lockspace shortly after it destroys the glocks so it doesn't need to +unlock them all: the unlock is implied when the lockspace is destroyed +by dlm. + +However, that patch introduced a use-after-free in dlm: as part of its +normal dlm_recoverd process, it can call ls_recovery to recover dead +locks. In so doing, it can call recover_rsbs which calls recover_lvb for +any mastered rsbs. Func recover_lvb runs through the list of lkbs queued +to the given rsb (if the glock is cached but unlocked, it will still be +queued to the lkb, but in NL--Unlocked--mode) and if it has an lvb, +copies it to the rsb, thus trying to preserve the lkb. However, when +gfs2 skips the dlm unlock step, it frees the glock and its lvb, which +means dlm's function recover_lvb references the now freed lvb pointer, +copying the freed lvb memory to the rsb. + +This patch changes the check in gdlm_put_lock so that it calls +dlm_unlock for all glocks that contain an lvb pointer. + +Fixes: fb6791d100d1 ("GFS2: skip dlm_unlock calls in unmount") +Cc: stable@vger.kernel.org # v3.8+ +Signed-off-by: Bob Peterson +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/lock_dlm.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/fs/gfs2/lock_dlm.c ++++ b/fs/gfs2/lock_dlm.c +@@ -283,7 +283,6 @@ static void gdlm_put_lock(struct gfs2_gl + { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; +- int lvb_needs_unlock = 0; + int error; + + if (gl->gl_lksb.sb_lkid == 0) { +@@ -296,13 +295,10 @@ static void gdlm_put_lock(struct gfs2_gl + gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); + gfs2_update_request_times(gl); + +- /* don't want to skip dlm_unlock writing the lvb when lock is ex */ +- +- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) +- lvb_needs_unlock = 1; ++ /* don't want to skip dlm_unlock writing the lvb when lock has one */ + + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && +- !lvb_needs_unlock) { ++ !gl->gl_lksb.sb_lvbptr) { + gfs2_glock_free(gl); + return; + } diff --git a/queue-4.19/series b/queue-4.19/series index b47a362943b..d3065f59db9 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -225,3 +225,14 @@ module-ignore-_global_offset_table_-when-warning-for-undefined-symbols.patch mmc-sdhci-esdhc-imx-fix-kernel-panic-when-remove-module.patch gpio-pcf857x-fix-missing-first-interrupt.patch printk-fix-deadlock-when-kernel-panic.patch +cpufreq-intel_pstate-get-per-cpu-max-freq-via-msr_hwp_capabilities-if-available.patch +f2fs-fix-out-of-repair-__setattr_copy.patch +sparc32-fix-a-user-triggerable-oops-in-clear_user.patch +gfs2-don-t-skip-dlm-unlock-if-glock-has-an-lvb.patch +dm-fix-deadlock-when-swapping-to-encrypted-device.patch +dm-era-recover-committed-writeset-after-crash.patch +dm-era-verify-the-data-block-size-hasn-t-changed.patch +dm-era-fix-bitset-memory-leaks.patch +dm-era-use-correct-value-size-in-equality-function-of-writeset-tree.patch +dm-era-reinitialize-bitset-cache-before-digesting-a-new-writeset.patch +dm-era-only-resize-metadata-in-preresume.patch diff --git a/queue-4.19/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch b/queue-4.19/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch new file mode 100644 index 00000000000..780001ea9aa --- /dev/null +++ b/queue-4.19/sparc32-fix-a-user-triggerable-oops-in-clear_user.patch @@ -0,0 +1,50 @@ +From 7780918b36489f0b2f9a3749d7be00c2ceaec513 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 20 Jul 2020 02:21:51 +0100 +Subject: sparc32: fix a user-triggerable oops in clear_user() + +From: Al Viro + +commit 7780918b36489f0b2f9a3749d7be00c2ceaec513 upstream. + +Back in 2.1.29 the clear_user() guts (__bzero()) had been merged +with memset(). Unfortunately, while all exception handlers had been +copied, one of the exception table entries got lost. As the result, +clear_user() starting at 128*n bytes before the end of page and +spanning between 8 and 127 bytes into the next page would oops when +the second page is unmapped. It's trivial to reproduce - all +it takes is + +main() +{ + int fd = open("/dev/zero", O_RDONLY); + char *p = mmap(NULL, 16384, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + munmap(p + 8192, 8192); + read(fd, p + 8192 - 128, 192); +} + +which had been oopsing since March 1997. Says something about +the quality of test coverage... ;-/ And while today sparc32 port +is nearly dead, back in '97 it had been very much alive; in fact, +sparc64 had only been in mainline for 3 months by that point... + +Cc: stable@kernel.org +Fixes: v2.1.29 +Signed-off-by: Al Viro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/memset.S | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/sparc/lib/memset.S ++++ b/arch/sparc/lib/memset.S +@@ -142,6 +142,7 @@ __bzero: + ZERO_LAST_BLOCKS(%o0, 0x48, %g2) + ZERO_LAST_BLOCKS(%o0, 0x08, %g2) + 13: ++ EXT(12b, 13b, 21f) + be 8f + andcc %o1, 4, %g0 +