From: Greg Kroah-Hartman Date: Fri, 3 Jun 2022 16:13:20 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.9.317~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=444187ae5f9591da5f05065023a7bd64f53fab98;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: crypto-ecrdsa-fix-incorrect-use-of-vli_cmp.patch dm-crypt-make-printing-of-the-key-constant-time.patch dm-integrity-fix-error-code-in-dm_integrity_ctr.patch dm-stats-add-cond_resched-when-looping-over-entries.patch dm-verity-set-dm_target_immutable-feature-flag.patch hid-multitouch-add-support-for-google-whiskers-touchpad.patch raid5-introduce-md_broken.patch zsmalloc-fix-races-between-asynchronous-zspage-free-and-page-migration.patch --- diff --git a/queue-5.4/crypto-ecrdsa-fix-incorrect-use-of-vli_cmp.patch b/queue-5.4/crypto-ecrdsa-fix-incorrect-use-of-vli_cmp.patch new file mode 100644 index 00000000000..2f211e775ee --- /dev/null +++ b/queue-5.4/crypto-ecrdsa-fix-incorrect-use-of-vli_cmp.patch @@ -0,0 +1,51 @@ +From 7cc7ab73f83ee6d50dc9536bc3355495d8600fad Mon Sep 17 00:00:00 2001 +From: Vitaly Chikunov +Date: Thu, 21 Apr 2022 20:25:10 +0300 +Subject: crypto: ecrdsa - Fix incorrect use of vli_cmp + +From: Vitaly Chikunov + +commit 7cc7ab73f83ee6d50dc9536bc3355495d8600fad upstream. + +Correctly compare values that shall be greater-or-equal and not just +greater. + +Fixes: 0d7a78643f69 ("crypto: ecrdsa - add EC-RDSA (GOST 34.10) algorithm") +Cc: +Signed-off-by: Vitaly Chikunov +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + crypto/ecrdsa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/crypto/ecrdsa.c ++++ b/crypto/ecrdsa.c +@@ -112,15 +112,15 @@ static int ecrdsa_verify(struct akcipher + + /* Step 1: verify that 0 < r < q, 0 < s < q */ + if (vli_is_zero(r, ndigits) || +- vli_cmp(r, ctx->curve->n, ndigits) == 1 || ++ vli_cmp(r, ctx->curve->n, ndigits) >= 0 || + vli_is_zero(s, ndigits) || +- vli_cmp(s, ctx->curve->n, ndigits) == 1) ++ vli_cmp(s, ctx->curve->n, ndigits) >= 0) + return -EKEYREJECTED; + + /* Step 2: calculate hash (h) of the message (passed as input) */ + /* Step 3: calculate e = h \mod q */ + vli_from_le64(e, digest, ndigits); +- if (vli_cmp(e, ctx->curve->n, ndigits) == 1) ++ if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) + vli_sub(e, e, ctx->curve->n, ndigits); + if (vli_is_zero(e, ndigits)) + e[0] = 1; +@@ -136,7 +136,7 @@ static int ecrdsa_verify(struct akcipher + /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ + ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, + ctx->curve); +- if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) ++ if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) + vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); + + /* Step 7: if R == r signature is valid */ diff --git a/queue-5.4/dm-crypt-make-printing-of-the-key-constant-time.patch b/queue-5.4/dm-crypt-make-printing-of-the-key-constant-time.patch new file mode 100644 index 00000000000..feb2f0b8e51 --- /dev/null +++ b/queue-5.4/dm-crypt-make-printing-of-the-key-constant-time.patch @@ -0,0 +1,58 @@ +From 567dd8f34560fa221a6343729474536aa7ede4fd Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 25 Apr 2022 08:53:29 -0400 +Subject: dm crypt: make printing of the key constant-time + +From: Mikulas Patocka + +commit 567dd8f34560fa221a6343729474536aa7ede4fd upstream. + +The device mapper dm-crypt target is using scnprintf("%02x", cc->key[i]) to +report the current key to userspace. However, this is not a constant-time +operation and it may leak information about the key via timing, via cache +access patterns or via the branch predictor. + +Change dm-crypt's key printing to use "%c" instead of "%02x". Also +introduce hex2asc() that carefully avoids any branching or memory +accesses when converting a number in the range 0 ... 15 to an ascii +character. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Tested-by: Milan Broz +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-crypt.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -2817,6 +2817,11 @@ static int crypt_map(struct dm_target *t + return DM_MAPIO_SUBMITTED; + } + ++static char hex2asc(unsigned char c) ++{ ++ return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27); ++} ++ + static void crypt_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) + { +@@ -2835,9 +2840,12 @@ static void crypt_status(struct dm_targe + if (cc->key_size > 0) { + if (cc->key_string) + DMEMIT(":%u:%s", cc->key_size, cc->key_string); +- else +- for (i = 0; i < cc->key_size; i++) +- DMEMIT("%02x", cc->key[i]); ++ else { ++ for (i = 0; i < cc->key_size; i++) { ++ DMEMIT("%c%c", hex2asc(cc->key[i] >> 4), ++ hex2asc(cc->key[i] & 0xf)); ++ } ++ } + } else + DMEMIT("-"); + diff --git a/queue-5.4/dm-integrity-fix-error-code-in-dm_integrity_ctr.patch b/queue-5.4/dm-integrity-fix-error-code-in-dm_integrity_ctr.patch new file mode 100644 index 00000000000..eb4c7c49c82 --- /dev/null +++ b/queue-5.4/dm-integrity-fix-error-code-in-dm_integrity_ctr.patch @@ -0,0 +1,37 @@ +From d3f2a14b8906df913cb04a706367b012db94a6e8 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 25 Apr 2022 14:56:48 +0300 +Subject: dm integrity: fix error code in dm_integrity_ctr() + +From: Dan Carpenter + +commit d3f2a14b8906df913cb04a706367b012db94a6e8 upstream. + +The "r" variable shadows an earlier "r" that has function scope. It +means that we accidentally return success instead of an error code. +Smatch has a warning for this: + + drivers/md/dm-integrity.c:4503 dm_integrity_ctr() + warn: missing error code 'r' + +Fixes: 7eada909bfd7 ("dm: add integrity target") +Cc: stable@vger.kernel.org +Signed-off-by: Dan Carpenter +Reviewed-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-integrity.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -4149,8 +4149,6 @@ try_smaller_buffer: + } + + if (should_write_sb) { +- int r; +- + init_journal(ic, 0, ic->journal_sections, 0); + r = dm_integrity_failed(ic); + if (unlikely(r)) { diff --git a/queue-5.4/dm-stats-add-cond_resched-when-looping-over-entries.patch b/queue-5.4/dm-stats-add-cond_resched-when-looping-over-entries.patch new file mode 100644 index 00000000000..20d23feaffc --- /dev/null +++ b/queue-5.4/dm-stats-add-cond_resched-when-looping-over-entries.patch @@ -0,0 +1,80 @@ +From bfe2b0146c4d0230b68f5c71a64380ff8d361f8b Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sun, 24 Apr 2022 16:43:00 -0400 +Subject: dm stats: add cond_resched when looping over entries + +From: Mikulas Patocka + +commit bfe2b0146c4d0230b68f5c71a64380ff8d361f8b upstream. + +dm-stats can be used with a very large number of entries (it is only +limited by 1/4 of total system memory), so add rescheduling points to +the loops that iterate over the entries. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-stats.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/md/dm-stats.c ++++ b/drivers/md/dm-stats.c +@@ -224,6 +224,7 @@ void dm_stats_cleanup(struct dm_stats *s + atomic_read(&shared->in_flight[READ]), + atomic_read(&shared->in_flight[WRITE])); + } ++ cond_resched(); + } + dm_stat_free(&s->rcu_head); + } +@@ -313,6 +314,7 @@ static int dm_stats_create(struct dm_sta + for (ni = 0; ni < n_entries; ni++) { + atomic_set(&s->stat_shared[ni].in_flight[READ], 0); + atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0); ++ cond_resched(); + } + + if (s->n_histogram_entries) { +@@ -325,6 +327,7 @@ static int dm_stats_create(struct dm_sta + for (ni = 0; ni < n_entries; ni++) { + s->stat_shared[ni].tmp.histogram = hi; + hi += s->n_histogram_entries + 1; ++ cond_resched(); + } + } + +@@ -345,6 +348,7 @@ static int dm_stats_create(struct dm_sta + for (ni = 0; ni < n_entries; ni++) { + p[ni].histogram = hi; + hi += s->n_histogram_entries + 1; ++ cond_resched(); + } + } + } +@@ -474,6 +478,7 @@ static int dm_stats_list(struct dm_stats + } + DMEMIT("\n"); + } ++ cond_resched(); + } + mutex_unlock(&stats->mutex); + +@@ -750,6 +755,7 @@ static void __dm_stat_clear(struct dm_st + local_irq_enable(); + } + } ++ cond_resched(); + } + } + +@@ -865,6 +871,8 @@ static int dm_stats_print(struct dm_stat + + if (unlikely(sz + 1 >= maxlen)) + goto buffer_overflow; ++ ++ cond_resched(); + } + + if (clear) diff --git a/queue-5.4/dm-verity-set-dm_target_immutable-feature-flag.patch b/queue-5.4/dm-verity-set-dm_target_immutable-feature-flag.patch new file mode 100644 index 00000000000..6b694ecd600 --- /dev/null +++ b/queue-5.4/dm-verity-set-dm_target_immutable-feature-flag.patch @@ -0,0 +1,35 @@ +From 4caae58406f8ceb741603eee460d79bacca9b1b5 Mon Sep 17 00:00:00 2001 +From: Sarthak Kukreti +Date: Tue, 31 May 2022 15:56:40 -0400 +Subject: dm verity: set DM_TARGET_IMMUTABLE feature flag + +From: Sarthak Kukreti + +commit 4caae58406f8ceb741603eee460d79bacca9b1b5 upstream. + +The device-mapper framework provides a mechanism to mark targets as +immutable (and hence fail table reloads that try to change the target +type). Add the DM_TARGET_IMMUTABLE flag to the dm-verity target's +feature flags to prevent switching the verity target with a different +target type. + +Fixes: a4ffc152198e ("dm: add verity target") +Cc: stable@vger.kernel.org +Signed-off-by: Sarthak Kukreti +Reviewed-by: Kees Cook +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-verity-target.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -1217,6 +1217,7 @@ bad: + + static struct target_type verity_target = { + .name = "verity", ++ .features = DM_TARGET_IMMUTABLE, + .version = {1, 5, 0}, + .module = THIS_MODULE, + .ctr = verity_ctr, diff --git a/queue-5.4/hid-multitouch-add-support-for-google-whiskers-touchpad.patch b/queue-5.4/hid-multitouch-add-support-for-google-whiskers-touchpad.patch new file mode 100644 index 00000000000..55ebff5b35f --- /dev/null +++ b/queue-5.4/hid-multitouch-add-support-for-google-whiskers-touchpad.patch @@ -0,0 +1,33 @@ +From 1d07cef7fd7599450b3d03e1915efc2a96e1f03f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Ma=C5=9Blanka?= +Date: Tue, 5 Apr 2022 17:04:07 +0200 +Subject: HID: multitouch: Add support for Google Whiskers Touchpad + +From: Marek Maślanka + +commit 1d07cef7fd7599450b3d03e1915efc2a96e1f03f upstream. + +The Google Whiskers touchpad does not work properly with the default +multitouch configuration. Instead, use the same configuration as Google +Rose. + +Signed-off-by: Marek Maslanka +Acked-by: Benjamin Tissoires +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/hid-multitouch.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -2158,6 +2158,9 @@ static const struct hid_device_id mt_dev + { .driver_data = MT_CLS_GOOGLE, + HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, + USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, ++ { .driver_data = MT_CLS_GOOGLE, ++ HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE, ++ USB_DEVICE_ID_GOOGLE_WHISKERS) }, + + /* Generic MT device */ + { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, diff --git a/queue-5.4/raid5-introduce-md_broken.patch b/queue-5.4/raid5-introduce-md_broken.patch new file mode 100644 index 00000000000..14b94be2268 --- /dev/null +++ b/queue-5.4/raid5-introduce-md_broken.patch @@ -0,0 +1,159 @@ +From 57668f0a4cc4083a120cc8c517ca0055c4543b59 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 22 Mar 2022 16:23:39 +0100 +Subject: raid5: introduce MD_BROKEN + +From: Mariusz Tkaczyk + +commit 57668f0a4cc4083a120cc8c517ca0055c4543b59 upstream. + +Raid456 module had allowed to achieve failed state. It was fixed by +fb73b357fb9 ("raid5: block failing device if raid will be failed"). +This fix introduces a bug, now if raid5 fails during IO, it may result +with a hung task without completion. Faulty flag on the device is +necessary to process all requests and is checked many times, mainly in +analyze_stripe(). +Allow to set faulty on drive again and set MD_BROKEN if raid is failed. + +As a result, this level is allowed to achieve failed state again, but +communication with userspace (via -EBUSY status) will be preserved. + +This restores possibility to fail array via #mdadm --set-faulty command +and will be fixed by additional verification on mdadm side. + +Reproduction steps: + mdadm -CR imsm -e imsm -n 3 /dev/nvme[0-2]n1 + mdadm -CR r5 -e imsm -l5 -n3 /dev/nvme[0-2]n1 --assume-clean + mkfs.xfs /dev/md126 -f + mount /dev/md126 /mnt/root/ + + fio --filename=/mnt/root/file --size=5GB --direct=1 --rw=randrw +--bs=64k --ioengine=libaio --iodepth=64 --runtime=240 --numjobs=4 +--time_based --group_reporting --name=throughput-test-job +--eta-newline=1 & + + echo 1 > /sys/block/nvme2n1/device/device/remove + echo 1 > /sys/block/nvme1n1/device/device/remove + + [ 1475.787779] Call Trace: + [ 1475.793111] __schedule+0x2a6/0x700 + [ 1475.799460] schedule+0x38/0xa0 + [ 1475.805454] raid5_get_active_stripe+0x469/0x5f0 [raid456] + [ 1475.813856] ? finish_wait+0x80/0x80 + [ 1475.820332] raid5_make_request+0x180/0xb40 [raid456] + [ 1475.828281] ? finish_wait+0x80/0x80 + [ 1475.834727] ? finish_wait+0x80/0x80 + [ 1475.841127] ? finish_wait+0x80/0x80 + [ 1475.847480] md_handle_request+0x119/0x190 + [ 1475.854390] md_make_request+0x8a/0x190 + [ 1475.861041] generic_make_request+0xcf/0x310 + [ 1475.868145] submit_bio+0x3c/0x160 + [ 1475.874355] iomap_dio_submit_bio.isra.20+0x51/0x60 + [ 1475.882070] iomap_dio_bio_actor+0x175/0x390 + [ 1475.889149] iomap_apply+0xff/0x310 + [ 1475.895447] ? iomap_dio_bio_actor+0x390/0x390 + [ 1475.902736] ? iomap_dio_bio_actor+0x390/0x390 + [ 1475.909974] iomap_dio_rw+0x2f2/0x490 + [ 1475.916415] ? iomap_dio_bio_actor+0x390/0x390 + [ 1475.923680] ? atime_needs_update+0x77/0xe0 + [ 1475.930674] ? xfs_file_dio_aio_read+0x6b/0xe0 [xfs] + [ 1475.938455] xfs_file_dio_aio_read+0x6b/0xe0 [xfs] + [ 1475.946084] xfs_file_read_iter+0xba/0xd0 [xfs] + [ 1475.953403] aio_read+0xd5/0x180 + [ 1475.959395] ? _cond_resched+0x15/0x30 + [ 1475.965907] io_submit_one+0x20b/0x3c0 + [ 1475.972398] __x64_sys_io_submit+0xa2/0x180 + [ 1475.979335] ? do_io_getevents+0x7c/0xc0 + [ 1475.986009] do_syscall_64+0x5b/0x1a0 + [ 1475.992419] entry_SYSCALL_64_after_hwframe+0x65/0xca + [ 1476.000255] RIP: 0033:0x7f11fc27978d + [ 1476.006631] Code: Bad RIP value. + [ 1476.073251] INFO: task fio:3877 blocked for more than 120 seconds. + +Cc: stable@vger.kernel.org +Fixes: fb73b357fb9 ("raid5: block failing device if raid will be failed") +Reviewd-by: Xiao Ni +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid5.c | 47 ++++++++++++++++++++++------------------------- + 1 file changed, 22 insertions(+), 25 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -609,17 +609,17 @@ int raid5_calc_degraded(struct r5conf *c + return degraded; + } + +-static int has_failed(struct r5conf *conf) ++static bool has_failed(struct r5conf *conf) + { +- int degraded; ++ int degraded = conf->mddev->degraded; + +- if (conf->mddev->reshape_position == MaxSector) +- return conf->mddev->degraded > conf->max_degraded; ++ if (test_bit(MD_BROKEN, &conf->mddev->flags)) ++ return true; + +- degraded = raid5_calc_degraded(conf); +- if (degraded > conf->max_degraded) +- return 1; +- return 0; ++ if (conf->mddev->reshape_position != MaxSector) ++ degraded = raid5_calc_degraded(conf); ++ ++ return degraded > conf->max_degraded; + } + + struct stripe_head * +@@ -2679,34 +2679,31 @@ static void raid5_error(struct mddev *md + unsigned long flags; + pr_debug("raid456: error called\n"); + ++ pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n", ++ mdname(mddev), bdevname(rdev->bdev, b)); ++ + spin_lock_irqsave(&conf->device_lock, flags); ++ set_bit(Faulty, &rdev->flags); ++ clear_bit(In_sync, &rdev->flags); ++ mddev->degraded = raid5_calc_degraded(conf); + +- if (test_bit(In_sync, &rdev->flags) && +- mddev->degraded == conf->max_degraded) { +- /* +- * Don't allow to achieve failed state +- * Don't try to recover this device +- */ ++ if (has_failed(conf)) { ++ set_bit(MD_BROKEN, &conf->mddev->flags); + conf->recovery_disabled = mddev->recovery_disabled; +- spin_unlock_irqrestore(&conf->device_lock, flags); +- return; ++ ++ pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n", ++ mdname(mddev), mddev->degraded, conf->raid_disks); ++ } else { ++ pr_crit("md/raid:%s: Operation continuing on %d devices.\n", ++ mdname(mddev), conf->raid_disks - mddev->degraded); + } + +- set_bit(Faulty, &rdev->flags); +- clear_bit(In_sync, &rdev->flags); +- mddev->degraded = raid5_calc_degraded(conf); + spin_unlock_irqrestore(&conf->device_lock, flags); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + + set_bit(Blocked, &rdev->flags); + set_mask_bits(&mddev->sb_flags, 0, + BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); +- pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n" +- "md/raid:%s: Operation continuing on %d devices.\n", +- mdname(mddev), +- bdevname(rdev->bdev, b), +- mdname(mddev), +- conf->raid_disks - mddev->degraded); + r5c_update_on_rdev_error(mddev, rdev); + } + diff --git a/queue-5.4/series b/queue-5.4/series index 289098a7c4e..ce99c1c0dc8 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -18,3 +18,11 @@ cfg80211-set-custom-regdomain-after-wiphy-registration.patch drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch exec-force-single-empty-string-when-argv-is-empty.patch netfilter-conntrack-re-fetch-conntrack-after-insertion.patch +crypto-ecrdsa-fix-incorrect-use-of-vli_cmp.patch +zsmalloc-fix-races-between-asynchronous-zspage-free-and-page-migration.patch +dm-integrity-fix-error-code-in-dm_integrity_ctr.patch +dm-crypt-make-printing-of-the-key-constant-time.patch +dm-stats-add-cond_resched-when-looping-over-entries.patch +dm-verity-set-dm_target_immutable-feature-flag.patch +raid5-introduce-md_broken.patch +hid-multitouch-add-support-for-google-whiskers-touchpad.patch diff --git a/queue-5.4/zsmalloc-fix-races-between-asynchronous-zspage-free-and-page-migration.patch b/queue-5.4/zsmalloc-fix-races-between-asynchronous-zspage-free-and-page-migration.patch new file mode 100644 index 00000000000..b97c795eac5 --- /dev/null +++ b/queue-5.4/zsmalloc-fix-races-between-asynchronous-zspage-free-and-page-migration.patch @@ -0,0 +1,86 @@ +From 2505a981114dcb715f8977b8433f7540854851d8 Mon Sep 17 00:00:00 2001 +From: Sultan Alsawaf +Date: Fri, 13 May 2022 15:11:26 -0700 +Subject: zsmalloc: fix races between asynchronous zspage free and page migration + +From: Sultan Alsawaf + +commit 2505a981114dcb715f8977b8433f7540854851d8 upstream. + +The asynchronous zspage free worker tries to lock a zspage's entire page +list without defending against page migration. Since pages which haven't +yet been locked can concurrently migrate off the zspage page list while +lock_zspage() churns away, lock_zspage() can suffer from a few different +lethal races. + +It can lock a page which no longer belongs to the zspage and unsafely +dereference page_private(), it can unsafely dereference a torn pointer to +the next page (since there's a data race), and it can observe a spurious +NULL pointer to the next page and thus not lock all of the zspage's pages +(since a single page migration will reconstruct the entire page list, and +create_page_chain() unconditionally zeroes out each list pointer in the +process). + +Fix the races by using migrate_read_lock() in lock_zspage() to synchronize +with page migration. + +Link: https://lkml.kernel.org/r/20220509024703.243847-1-sultan@kerneltoast.com +Fixes: 77ff465799c602 ("zsmalloc: zs_page_migrate: skip unnecessary loops but not return -EBUSY if zspage is not inuse") +Signed-off-by: Sultan Alsawaf +Acked-by: Minchan Kim +Cc: Nitin Gupta +Cc: Sergey Senozhatsky +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/zsmalloc.c | 37 +++++++++++++++++++++++++++++++++---- + 1 file changed, 33 insertions(+), 4 deletions(-) + +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -1748,11 +1748,40 @@ static enum fullness_group putback_zspag + */ + static void lock_zspage(struct zspage *zspage) + { +- struct page *page = get_first_page(zspage); ++ struct page *curr_page, *page; + +- do { +- lock_page(page); +- } while ((page = get_next_page(page)) != NULL); ++ /* ++ * Pages we haven't locked yet can be migrated off the list while we're ++ * trying to lock them, so we need to be careful and only attempt to ++ * lock each page under migrate_read_lock(). Otherwise, the page we lock ++ * may no longer belong to the zspage. This means that we may wait for ++ * the wrong page to unlock, so we must take a reference to the page ++ * prior to waiting for it to unlock outside migrate_read_lock(). ++ */ ++ while (1) { ++ migrate_read_lock(zspage); ++ page = get_first_page(zspage); ++ if (trylock_page(page)) ++ break; ++ get_page(page); ++ migrate_read_unlock(zspage); ++ wait_on_page_locked(page); ++ put_page(page); ++ } ++ ++ curr_page = page; ++ while ((page = get_next_page(curr_page))) { ++ if (trylock_page(page)) { ++ curr_page = page; ++ } else { ++ get_page(page); ++ migrate_read_unlock(zspage); ++ wait_on_page_locked(page); ++ put_page(page); ++ migrate_read_lock(zspage); ++ } ++ } ++ migrate_read_unlock(zspage); + } + + static int zs_init_fs_context(struct fs_context *fc)