From 9cb6cb1510c0704f7bb3a28b8f9994235f641509 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jul 2023 09:06:13 +0200 Subject: [PATCH] 5.15-stable patches added patches: dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch fs-dlm-return-positive-pid-value-for-f_getlk.patch md-raid0-add-discard-support-for-the-original-layout.patch --- ...-asynchronously-probed-block-devices.patch | 119 ++++++++++ ...eturn-positive-pid-value-for-f_getlk.patch | 36 ++++ ...card-support-for-the-original-layout.patch | 203 ++++++++++++++++++ queue-5.15/series | 3 + 4 files changed, 361 insertions(+) create mode 100644 queue-5.15/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch create mode 100644 queue-5.15/fs-dlm-return-positive-pid-value-for-f_getlk.patch create mode 100644 queue-5.15/md-raid0-add-discard-support-for-the-original-layout.patch diff --git a/queue-5.15/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch b/queue-5.15/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch new file mode 100644 index 00000000000..e773d5e67f6 --- /dev/null +++ b/queue-5.15/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch @@ -0,0 +1,119 @@ +From 035641b01e72af4f6c6cf22a4bdb5d7dfc4e8e8e Mon Sep 17 00:00:00 2001 +From: Peter Korsgaard +Date: Wed, 16 Nov 2022 07:16:56 +0100 +Subject: dm init: add dm-mod.waitfor to wait for asynchronously probed block devices + +From: Peter Korsgaard + +commit 035641b01e72af4f6c6cf22a4bdb5d7dfc4e8e8e upstream. + +Just calling wait_for_device_probe() is not enough to ensure that +asynchronously probed block devices are available (E.G. mmc, usb), so +add a "dm-mod.waitfor=[,..,]" parameter to get +dm-init to explicitly wait for specific block devices before +initializing the tables with logic similar to the rootwait logic that +was introduced with commit cc1ed7542c8c ("init: wait for +asynchronously scanned block devices"). + +E.G. with dm-verity on mmc using: +dm-mod.waitfor="PARTLABEL=hash-a,PARTLABEL=root-a" + +[ 0.671671] device-mapper: init: waiting for all devices to be available before creating mapped devices +[ 0.671679] device-mapper: init: waiting for device PARTLABEL=hash-a ... +[ 0.710695] mmc0: new HS200 MMC card at address 0001 +[ 0.711158] mmcblk0: mmc0:0001 004GA0 3.69 GiB +[ 0.715954] mmcblk0boot0: mmc0:0001 004GA0 partition 1 2.00 MiB +[ 0.722085] mmcblk0boot1: mmc0:0001 004GA0 partition 2 2.00 MiB +[ 0.728093] mmcblk0rpmb: mmc0:0001 004GA0 partition 3 512 KiB, chardev (249:0) +[ 0.738274] mmcblk0: p1 p2 p3 p4 p5 p6 p7 +[ 0.751282] device-mapper: init: waiting for device PARTLABEL=root-a ... +[ 0.751306] device-mapper: init: all devices available +[ 0.751683] device-mapper: verity: sha256 using implementation "sha256-generic" +[ 0.759344] device-mapper: ioctl: dm-0 (vroot) is ready +[ 0.766540] VFS: Mounted root (squashfs filesystem) readonly on device 254:0. + +Signed-off-by: Peter Korsgaard +Signed-off-by: Mike Snitzer +Cc: Mark-PK Tsai +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/device-mapper/dm-init.rst | 8 +++++++ + drivers/md/dm-init.c | 22 +++++++++++++++++++- + 2 files changed, 29 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/device-mapper/dm-init.rst ++++ b/Documentation/admin-guide/device-mapper/dm-init.rst +@@ -123,3 +123,11 @@ Other examples (per target): + 0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256 + fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd + 51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584 ++ ++For setups using device-mapper on top of asynchronously probed block ++devices (MMC, USB, ..), it may be necessary to tell dm-init to ++explicitly wait for them to become available before setting up the ++device-mapper tables. This can be done with the "dm-mod.waitfor=" ++module parameter, which takes a list of devices to wait for:: ++ ++ dm-mod.waitfor=[,..,] +--- a/drivers/md/dm-init.c ++++ b/drivers/md/dm-init.c +@@ -8,6 +8,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -18,12 +19,17 @@ + #define DM_MAX_DEVICES 256 + #define DM_MAX_TARGETS 256 + #define DM_MAX_STR_SIZE 4096 ++#define DM_MAX_WAITFOR 256 + + static char *create; + ++static char *waitfor[DM_MAX_WAITFOR]; ++ + /* + * Format: dm-mod.create=,,,,[,
+][;,,,,
[,
+]+] + * Table format: ++ * Block devices to wait for to become available before setting up tables: ++ * dm-mod.waitfor=[,..,] + * + * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format + * details. +@@ -266,7 +272,7 @@ static int __init dm_init_init(void) + struct dm_device *dev; + LIST_HEAD(devices); + char *str; +- int r; ++ int i, r; + + if (!create) + return 0; +@@ -286,6 +292,17 @@ static int __init dm_init_init(void) + DMINFO("waiting for all devices to be available before creating mapped devices"); + wait_for_device_probe(); + ++ for (i = 0; i < ARRAY_SIZE(waitfor); i++) { ++ if (waitfor[i]) { ++ DMINFO("waiting for device %s ...", waitfor[i]); ++ while (!dm_get_dev_t(waitfor[i])) ++ msleep(5); ++ } ++ } ++ ++ if (waitfor[0]) ++ DMINFO("all devices available"); ++ + list_for_each_entry(dev, &devices, list) { + if (dm_early_create(&dev->dmi, dev->table, + dev->target_args_array)) +@@ -301,3 +318,6 @@ late_initcall(dm_init_init); + + module_param(create, charp, 0); + MODULE_PARM_DESC(create, "Create a mapped device in early boot"); ++ ++module_param_array(waitfor, charp, NULL, 0); ++MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables"); diff --git a/queue-5.15/fs-dlm-return-positive-pid-value-for-f_getlk.patch b/queue-5.15/fs-dlm-return-positive-pid-value-for-f_getlk.patch new file mode 100644 index 00000000000..e41a88a3395 --- /dev/null +++ b/queue-5.15/fs-dlm-return-positive-pid-value-for-f_getlk.patch @@ -0,0 +1,36 @@ +From 92655fbda5c05950a411eaabc19e025e86e2a291 Mon Sep 17 00:00:00 2001 +From: Alexander Aring +Date: Fri, 19 May 2023 11:21:24 -0400 +Subject: fs: dlm: return positive pid value for F_GETLK + +From: Alexander Aring + +commit 92655fbda5c05950a411eaabc19e025e86e2a291 upstream. + +The GETLK pid values have all been negated since commit 9d5b86ac13c5 +("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks"). +Revert this for local pids, and leave in place negative pids for remote +owners. + +Cc: stable@vger.kernel.org +Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Signed-off-by: Greg Kroah-Hartman +--- + fs/dlm/plock.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/dlm/plock.c ++++ b/fs/dlm/plock.c +@@ -363,7 +363,9 @@ int dlm_posix_get(dlm_lockspace_t *locks + locks_init_lock(fl); + fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; + fl->fl_flags = FL_POSIX; +- fl->fl_pid = -op->info.pid; ++ fl->fl_pid = op->info.pid; ++ if (op->info.nodeid != dlm_our_nodeid()) ++ fl->fl_pid = -fl->fl_pid; + fl->fl_start = op->info.start; + fl->fl_end = op->info.end; + rv = 0; diff --git a/queue-5.15/md-raid0-add-discard-support-for-the-original-layout.patch b/queue-5.15/md-raid0-add-discard-support-for-the-original-layout.patch new file mode 100644 index 00000000000..09a59549ab8 --- /dev/null +++ b/queue-5.15/md-raid0-add-discard-support-for-the-original-layout.patch @@ -0,0 +1,203 @@ +From e836007089ba8fdf24e636ef2b007651fb4582e6 Mon Sep 17 00:00:00 2001 +From: Jason Baron +Date: Fri, 23 Jun 2023 14:05:23 -0400 +Subject: md/raid0: add discard support for the 'original' layout + +From: Jason Baron + +commit e836007089ba8fdf24e636ef2b007651fb4582e6 upstream. + +We've found that using raid0 with the 'original' layout and discard +enabled with different disk sizes (such that at least two zones are +created) can result in data corruption. This is due to the fact that +the discard handling in 'raid0_handle_discard()' assumes the 'alternate' +layout. We've seen this corruption using ext4 but other filesystems are +likely susceptible as well. + +More specifically, while multiple zones are necessary to create the +corruption, the corruption may not occur with multiple zones if they +layout in such a way the layout matches what the 'alternate' layout +would have produced. Thus, not all raid0 devices with the 'original' +layout, different size disks and discard enabled will encounter this +corruption. + +The 3.14 kernel inadvertently changed the raid0 disk layout for different +size disks. Thus, running a pre-3.14 kernel and post-3.14 kernel on the +same raid0 array could corrupt data. This lead to the creation of the +'original' layout (to match the pre-3.14 layout) and the 'alternate' layout +(to match the post 3.14 layout) in the 5.4 kernel time frame and an option +to tell the kernel which layout to use (since it couldn't be autodetected). +However, when the 'original' layout was added back to 5.4 discard support +for the 'original' layout was not added leading this issue. + +I've been able to reliably reproduce the corruption with the following +test case: + +1. create raid0 array with different size disks using original layout +2. mkfs +3. mount -o discard +4. create lots of files +5. remove 1/2 the files +6. fstrim -a (or just the mount point for the raid0 array) +7. umount +8. fsck -fn /dev/md0 (spews all sorts of corruptions) + +Let's fix this by adding proper discard support to the 'original' layout. +The fix 'maps' the 'original' layout disks to the order in which they are +read/written such that we can compare the disks in the same way that the +current 'alternate' layout does. A 'disk_shift' field is added to +'struct strip_zone'. This could be computed on the fly in +raid0_handle_discard() but by adding this field, we save some computation +in the discard path. + +Note we could also potentially fix this by re-ordering the disks in the +zones that follow the first one, and then always read/writing them using +the 'alternate' layout. However, that is seen as a more substantial change, +and we are attempting the least invasive fix at this time to remedy the +corruption. + +I've verified the change using the reproducer mentioned above. Typically, +the corruption is seen after less than 3 iterations, while the patch has +run 500+ iterations. + +Cc: NeilBrown +Cc: Song Liu +Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") +Cc: stable@vger.kernel.org +Signed-off-by: Jason Baron +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230623180523.1901230-1-jbaron@akamai.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid0.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++------- + drivers/md/raid0.h | 1 + 2 files changed, 55 insertions(+), 8 deletions(-) + +--- a/drivers/md/raid0.c ++++ b/drivers/md/raid0.c +@@ -274,6 +274,18 @@ static int create_strip_zones(struct mdd + goto abort; + } + ++ if (conf->layout == RAID0_ORIG_LAYOUT) { ++ for (i = 1; i < conf->nr_strip_zones; i++) { ++ sector_t first_sector = conf->strip_zone[i-1].zone_end; ++ ++ sector_div(first_sector, mddev->chunk_sectors); ++ zone = conf->strip_zone + i; ++ /* disk_shift is first disk index used in the zone */ ++ zone->disk_shift = sector_div(first_sector, ++ zone->nb_dev); ++ } ++ } ++ + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); + *private_conf = conf; + +@@ -444,6 +456,20 @@ exit_acct_set: + return ret; + } + ++/* ++ * Convert disk_index to the disk order in which it is read/written. ++ * For example, if we have 4 disks, they are numbered 0,1,2,3. If we ++ * write the disks starting at disk 3, then the read/write order would ++ * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() ++ * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map ++ * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in ++ * that 'output' space to understand the read/write disk ordering. ++ */ ++static int map_disk_shift(int disk_index, int num_disks, int disk_shift) ++{ ++ return ((disk_index + num_disks - disk_shift) % num_disks); ++} ++ + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) + { + struct r0conf *conf = mddev->private; +@@ -457,7 +483,9 @@ static void raid0_handle_discard(struct + sector_t end_disk_offset; + unsigned int end_disk_index; + unsigned int disk; ++ sector_t orig_start, orig_end; + ++ orig_start = start; + zone = find_zone(conf, &start); + + if (bio_end_sector(bio) > zone->zone_end) { +@@ -471,6 +499,7 @@ static void raid0_handle_discard(struct + } else + end = bio_end_sector(bio); + ++ orig_end = end; + if (zone != conf->strip_zone) + end = end - zone[-1].zone_end; + +@@ -482,13 +511,26 @@ static void raid0_handle_discard(struct + last_stripe_index = end; + sector_div(last_stripe_index, stripe_size); + +- start_disk_index = (int)(start - first_stripe_index * stripe_size) / +- mddev->chunk_sectors; ++ /* In the first zone the original and alternate layouts are the same */ ++ if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { ++ sector_div(orig_start, mddev->chunk_sectors); ++ start_disk_index = sector_div(orig_start, zone->nb_dev); ++ start_disk_index = map_disk_shift(start_disk_index, ++ zone->nb_dev, ++ zone->disk_shift); ++ sector_div(orig_end, mddev->chunk_sectors); ++ end_disk_index = sector_div(orig_end, zone->nb_dev); ++ end_disk_index = map_disk_shift(end_disk_index, ++ zone->nb_dev, zone->disk_shift); ++ } else { ++ start_disk_index = (int)(start - first_stripe_index * stripe_size) / ++ mddev->chunk_sectors; ++ end_disk_index = (int)(end - last_stripe_index * stripe_size) / ++ mddev->chunk_sectors; ++ } + start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % + mddev->chunk_sectors) + + first_stripe_index * mddev->chunk_sectors; +- end_disk_index = (int)(end - last_stripe_index * stripe_size) / +- mddev->chunk_sectors; + end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % + mddev->chunk_sectors) + + last_stripe_index * mddev->chunk_sectors; +@@ -496,18 +538,22 @@ static void raid0_handle_discard(struct + for (disk = 0; disk < zone->nb_dev; disk++) { + sector_t dev_start, dev_end; + struct md_rdev *rdev; ++ int compare_disk; ++ ++ compare_disk = map_disk_shift(disk, zone->nb_dev, ++ zone->disk_shift); + +- if (disk < start_disk_index) ++ if (compare_disk < start_disk_index) + dev_start = (first_stripe_index + 1) * + mddev->chunk_sectors; +- else if (disk > start_disk_index) ++ else if (compare_disk > start_disk_index) + dev_start = first_stripe_index * mddev->chunk_sectors; + else + dev_start = start_disk_offset; + +- if (disk < end_disk_index) ++ if (compare_disk < end_disk_index) + dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; +- else if (disk > end_disk_index) ++ else if (compare_disk > end_disk_index) + dev_end = last_stripe_index * mddev->chunk_sectors; + else + dev_end = end_disk_offset; +--- a/drivers/md/raid0.h ++++ b/drivers/md/raid0.h +@@ -6,6 +6,7 @@ struct strip_zone { + sector_t zone_end; /* Start of the next zone (in sectors) */ + sector_t dev_start; /* Zone offset in real dev (in sectors) */ + int nb_dev; /* # of devices attached to the zone */ ++ int disk_shift; /* start disk for the original layout */ + }; + + /* Linux 3.14 (20d0189b101) made an unintended change to diff --git a/queue-5.15/series b/queue-5.15/series index 4a947046505..60ea0038a58 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -481,3 +481,6 @@ pci-rockchip-set-address-alignment-for-endpoint-mode.patch misc-pci_endpoint_test-free-irqs-before-removing-the-device.patch misc-pci_endpoint_test-re-init-completion-for-every-test.patch mfd-pm8008-fix-module-autoloading.patch +md-raid0-add-discard-support-for-the-original-layout.patch +dm-init-add-dm-mod.waitfor-to-wait-for-asynchronously-probed-block-devices.patch +fs-dlm-return-positive-pid-value-for-f_getlk.patch -- 2.47.3