From: Greg Kroah-Hartman Date: Mon, 2 Jan 2023 10:56:04 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v6.0.17~17 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6a5ad0879b28e70790bb3c9aa020d7e1a02895bc;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch md-fix-a-crash-in-mempool_free.patch mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch pnode-terminate-at-peers-of-source.patch --- diff --git a/queue-4.9/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch b/queue-4.9/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch new file mode 100644 index 00000000000..c2be88fbb6e --- /dev/null +++ b/queue-4.9/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch @@ -0,0 +1,145 @@ +From 8508fa2e7472f673edbeedf1b1d2b7a6bb898ecc Mon Sep 17 00:00:00 2001 +From: Artem Egorkine +Date: Sun, 25 Dec 2022 12:57:27 +0200 +Subject: ALSA: line6: correct midi status byte when receiving data from podxt + +From: Artem Egorkine + +commit 8508fa2e7472f673edbeedf1b1d2b7a6bb898ecc upstream. + +A PODxt device sends 0xb2, 0xc2 or 0xf2 as a status byte for MIDI +messages over USB that should otherwise have a 0xb0, 0xc0 or 0xf0 +status byte. This is usually corrected by the driver on other OSes. + +This fixes MIDI sysex messages sent by PODxt. + +[ tiwai: fixed white spaces ] + +Signed-off-by: Artem Egorkine +Cc: +Link: https://lore.kernel.org/r/20221225105728.1153989-1-arteme@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/line6/driver.c | 3 ++- + sound/usb/line6/midi.c | 3 ++- + sound/usb/line6/midibuf.c | 25 +++++++++++++++++-------- + sound/usb/line6/midibuf.h | 5 ++++- + sound/usb/line6/pod.c | 3 ++- + 5 files changed, 27 insertions(+), 12 deletions(-) + +--- a/sound/usb/line6/driver.c ++++ b/sound/usb/line6/driver.c +@@ -304,7 +304,8 @@ static void line6_data_received(struct u + for (;;) { + done = + line6_midibuf_read(mb, line6->buffer_message, +- LINE6_MIDI_MESSAGE_MAXLEN); ++ LINE6_MIDI_MESSAGE_MAXLEN, ++ LINE6_MIDIBUF_READ_RX); + + if (done <= 0) + break; +--- a/sound/usb/line6/midi.c ++++ b/sound/usb/line6/midi.c +@@ -60,7 +60,8 @@ static void line6_midi_transmit(struct s + + for (;;) { + done = line6_midibuf_read(mb, chunk, +- LINE6_FALLBACK_MAXPACKETSIZE); ++ LINE6_FALLBACK_MAXPACKETSIZE, ++ LINE6_MIDIBUF_READ_TX); + + if (done == 0) + break; +--- a/sound/usb/line6/midibuf.c ++++ b/sound/usb/line6/midibuf.c +@@ -13,6 +13,7 @@ + + #include "midibuf.h" + ++ + static int midibuf_message_length(unsigned char code) + { + int message_length; +@@ -24,12 +25,7 @@ static int midibuf_message_length(unsign + + message_length = length[(code >> 4) - 8]; + } else { +- /* +- Note that according to the MIDI specification 0xf2 is +- the "Song Position Pointer", but this is used by Line 6 +- to send sysex messages to the host. +- */ +- static const int length[] = { -1, 2, -1, 2, -1, -1, 1, 1, 1, 1, ++ static const int length[] = { -1, 2, 2, 2, -1, -1, 1, 1, 1, -1, + 1, 1, 1, -1, 1, 1 + }; + message_length = length[code & 0x0f]; +@@ -129,7 +125,7 @@ int line6_midibuf_write(struct midi_buff + } + + int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, +- int length) ++ int length, int read_type) + { + int bytes_used; + int length1, length2; +@@ -152,9 +148,22 @@ int line6_midibuf_read(struct midi_buffe + + length1 = this->size - this->pos_read; + +- /* check MIDI command length */ + command = this->buf[this->pos_read]; ++ /* ++ PODxt always has status byte lower nibble set to 0010, ++ when it means to send 0000, so we correct if here so ++ that control/program changes come on channel 1 and ++ sysex message status byte is correct ++ */ ++ if (read_type == LINE6_MIDIBUF_READ_RX) { ++ if (command == 0xb2 || command == 0xc2 || command == 0xf2) { ++ unsigned char fixed = command & 0xf0; ++ this->buf[this->pos_read] = fixed; ++ command = fixed; ++ } ++ } + ++ /* check MIDI command length */ + if (command & 0x80) { + midi_length = midibuf_message_length(command); + this->command_prev = command; +--- a/sound/usb/line6/midibuf.h ++++ b/sound/usb/line6/midibuf.h +@@ -12,6 +12,9 @@ + #ifndef MIDIBUF_H + #define MIDIBUF_H + ++#define LINE6_MIDIBUF_READ_TX 0 ++#define LINE6_MIDIBUF_READ_RX 1 ++ + struct midi_buffer { + unsigned char *buf; + int size; +@@ -27,7 +30,7 @@ extern void line6_midibuf_destroy(struct + extern int line6_midibuf_ignore(struct midi_buffer *mb, int length); + extern int line6_midibuf_init(struct midi_buffer *mb, int size, int split); + extern int line6_midibuf_read(struct midi_buffer *mb, unsigned char *data, +- int length); ++ int length, int read_type); + extern void line6_midibuf_reset(struct midi_buffer *mb); + extern int line6_midibuf_write(struct midi_buffer *mb, unsigned char *data, + int length); +--- a/sound/usb/line6/pod.c ++++ b/sound/usb/line6/pod.c +@@ -169,8 +169,9 @@ static struct line6_pcm_properties pod_p + .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ + }; + ++ + static const char pod_version_header[] = { +- 0xf2, 0x7e, 0x7f, 0x06, 0x02 ++ 0xf0, 0x7e, 0x7f, 0x06, 0x02 + }; + + /* forward declarations: */ diff --git a/queue-4.9/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch b/queue-4.9/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch new file mode 100644 index 00000000000..564a65d5cff --- /dev/null +++ b/queue-4.9/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch @@ -0,0 +1,34 @@ +From b8800d324abb50160560c636bfafe2c81001b66c Mon Sep 17 00:00:00 2001 +From: Artem Egorkine +Date: Sun, 25 Dec 2022 12:57:28 +0200 +Subject: ALSA: line6: fix stack overflow in line6_midi_transmit + +From: Artem Egorkine + +commit b8800d324abb50160560c636bfafe2c81001b66c upstream. + +Correctly calculate available space including the size of the chunk +buffer. This fixes a buffer overflow when multiple MIDI sysex +messages are sent to a PODxt device. + +Signed-off-by: Artem Egorkine +Cc: +Link: https://lore.kernel.org/r/20221225105728.1153989-2-arteme@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/line6/midi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/sound/usb/line6/midi.c ++++ b/sound/usb/line6/midi.c +@@ -48,7 +48,8 @@ static void line6_midi_transmit(struct s + int req, done; + + for (;;) { +- req = min(line6_midibuf_bytes_free(mb), line6->max_packet_size); ++ req = min3(line6_midibuf_bytes_free(mb), line6->max_packet_size, ++ LINE6_FALLBACK_MAXPACKETSIZE); + done = snd_rawmidi_transmit_peek(substream, chunk, req); + + if (done == 0) diff --git a/queue-4.9/md-fix-a-crash-in-mempool_free.patch b/queue-4.9/md-fix-a-crash-in-mempool_free.patch new file mode 100644 index 00000000000..51544853dfc --- /dev/null +++ b/queue-4.9/md-fix-a-crash-in-mempool_free.patch @@ -0,0 +1,109 @@ +From 341097ee53573e06ab9fc675d96a052385b851fa Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 4 Nov 2022 09:53:38 -0400 +Subject: md: fix a crash in mempool_free + +From: Mikulas Patocka + +commit 341097ee53573e06ab9fc675d96a052385b851fa upstream. + +There's a crash in mempool_free when running the lvm test +shell/lvchange-rebuild-raid.sh. + +The reason for the crash is this: +* super_written calls atomic_dec_and_test(&mddev->pending_writes) and + wake_up(&mddev->sb_wait). Then it calls rdev_dec_pending(rdev, mddev) + and bio_put(bio). +* so, the process that waited on sb_wait and that is woken up is racing + with bio_put(bio). +* if the process wins the race, it calls bioset_exit before bio_put(bio) + is executed. +* bio_put(bio) attempts to free a bio into a destroyed bio set - causing + a crash in mempool_free. + +We fix this bug by moving bio_put before atomic_dec_and_test. + +We also move rdev_dec_pending before atomic_dec_and_test as suggested by +Neil Brown. + +The function md_end_flush has a similar bug - we must call bio_put before +we decrement the number of in-progress bios. + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD 11557f0067 P4D 11557f0067 PUD 0 + Oops: 0002 [#1] PREEMPT SMP + CPU: 0 PID: 73 Comm: kworker/0:1 Not tainted 6.1.0-rc3 #5 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 + Workqueue: kdelayd flush_expired_bios [dm_delay] + RIP: 0010:mempool_free+0x47/0x80 + Code: 48 89 ef 5b 5d ff e0 f3 c3 48 89 f7 e8 32 45 3f 00 48 63 53 08 48 89 c6 3b 53 04 7d 2d 48 8b 43 10 8d 4a 01 48 89 df 89 4b 08 <48> 89 2c d0 e8 b0 45 3f 00 48 8d 7b 30 5b 5d 31 c9 ba 01 00 00 00 + RSP: 0018:ffff88910036bda8 EFLAGS: 00010093 + RAX: 0000000000000000 RBX: ffff8891037b65d8 RCX: 0000000000000001 + RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff8891037b65d8 + RBP: ffff8891447ba240 R08: 0000000000012908 R09: 00000000003d0900 + R10: 0000000000000000 R11: 0000000000173544 R12: ffff889101a14000 + R13: ffff8891562ac300 R14: ffff889102b41440 R15: ffffe8ffffa00d05 + FS: 0000000000000000(0000) GS:ffff88942fa00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000001102e99000 CR4: 00000000000006b0 + Call Trace: + + clone_endio+0xf4/0x1c0 [dm_mod] + clone_endio+0xf4/0x1c0 [dm_mod] + __submit_bio+0x76/0x120 + submit_bio_noacct_nocheck+0xb6/0x2a0 + flush_expired_bios+0x28/0x2f [dm_delay] + process_one_work+0x1b4/0x300 + worker_thread+0x45/0x3e0 + ? rescuer_thread+0x380/0x380 + kthread+0xc2/0x100 + ? kthread_complete_and_exit+0x20/0x20 + ret_from_fork+0x1f/0x30 + + Modules linked in: brd dm_delay dm_raid dm_mod af_packet uvesafb cfbfillrect cfbimgblt cn cfbcopyarea fb font fbdev tun autofs4 binfmt_misc configfs ipv6 virtio_rng virtio_balloon rng_core virtio_net pcspkr net_failover failover qemu_fw_cfg button mousedev raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod sd_mod t10_pi crc64_rocksoft crc64 virtio_scsi scsi_mod evdev psmouse bsg scsi_common [last unloaded: brd] + CR2: 0000000000000000 + ---[ end trace 0000000000000000 ]--- + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -360,13 +360,14 @@ static void md_end_flush(struct bio *bio + struct md_rdev *rdev = bio->bi_private; + struct mddev *mddev = rdev->mddev; + ++ bio_put(bio); ++ + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pre-request flush has finished */ + queue_work(md_wq, &mddev->flush_work); + } +- bio_put(bio); + } + + static void md_submit_flush_data(struct work_struct *ws); +@@ -725,10 +726,12 @@ static void super_written(struct bio *bi + md_error(mddev, rdev); + } + ++ bio_put(bio); ++ ++ rdev_dec_pending(rdev, mddev); ++ + if (atomic_dec_and_test(&mddev->pending_writes)) + wake_up(&mddev->sb_wait); +- rdev_dec_pending(rdev, mddev); +- bio_put(bio); + } + + void md_super_write(struct mddev *mddev, struct md_rdev *rdev, diff --git a/queue-4.9/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch b/queue-4.9/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch new file mode 100644 index 00000000000..6b604f5a4a7 --- /dev/null +++ b/queue-4.9/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch @@ -0,0 +1,64 @@ +From 4a44cd249604e29e7b90ae796d7692f5773dd348 Mon Sep 17 00:00:00 2001 +From: Deren Wu +Date: Sun, 4 Dec 2022 16:24:16 +0800 +Subject: mmc: vub300: fix warning - do not call blocking ops when !TASK_RUNNING + +From: Deren Wu + +commit 4a44cd249604e29e7b90ae796d7692f5773dd348 upstream. + +vub300_enable_sdio_irq() works with mutex and need TASK_RUNNING here. +Ensure that we mark current as TASK_RUNNING for sleepable context. + +[ 77.554641] do not call blocking ops when !TASK_RUNNING; state=1 set at [] sdio_irq_thread+0x17d/0x5b0 +[ 77.554652] WARNING: CPU: 2 PID: 1983 at kernel/sched/core.c:9813 __might_sleep+0x116/0x160 +[ 77.554905] CPU: 2 PID: 1983 Comm: ksdioirqd/mmc1 Tainted: G OE 6.1.0-rc5 #1 +[ 77.554910] Hardware name: Intel(R) Client Systems NUC8i7BEH/NUC8BEB, BIOS BECFL357.86A.0081.2020.0504.1834 05/04/2020 +[ 77.554912] RIP: 0010:__might_sleep+0x116/0x160 +[ 77.554920] RSP: 0018:ffff888107b7fdb8 EFLAGS: 00010282 +[ 77.554923] RAX: 0000000000000000 RBX: ffff888118c1b740 RCX: 0000000000000000 +[ 77.554926] RDX: 0000000000000001 RSI: 0000000000000004 RDI: ffffed1020f6ffa9 +[ 77.554928] RBP: ffff888107b7fde0 R08: 0000000000000001 R09: ffffed1043ea60ba +[ 77.554930] R10: ffff88821f5305cb R11: ffffed1043ea60b9 R12: ffffffff93aa3a60 +[ 77.554932] R13: 000000000000011b R14: 7fffffffffffffff R15: ffffffffc0558660 +[ 77.554934] FS: 0000000000000000(0000) GS:ffff88821f500000(0000) knlGS:0000000000000000 +[ 77.554937] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 77.554939] CR2: 00007f8a44010d68 CR3: 000000024421a003 CR4: 00000000003706e0 +[ 77.554942] Call Trace: +[ 77.554944] +[ 77.554952] mutex_lock+0x78/0xf0 +[ 77.554973] vub300_enable_sdio_irq+0x103/0x3c0 [vub300] +[ 77.554981] sdio_irq_thread+0x25c/0x5b0 +[ 77.555006] kthread+0x2b8/0x370 +[ 77.555017] ret_from_fork+0x1f/0x30 +[ 77.555023] +[ 77.555025] ---[ end trace 0000000000000000 ]--- + +Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") +Signed-off-by: Deren Wu +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/87dc45b122d26d63c80532976813c9365d7160b3.1670140888.git.deren.wu@mediatek.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/vub300.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/mmc/host/vub300.c ++++ b/drivers/mmc/host/vub300.c +@@ -2056,6 +2056,7 @@ static void vub300_enable_sdio_irq(struc + return; + kref_get(&vub300->kref); + if (enable) { ++ set_current_state(TASK_RUNNING); + mutex_lock(&vub300->irq_mutex); + if (vub300->irqs_queued) { + vub300->irqs_queued -= 1; +@@ -2071,6 +2072,7 @@ static void vub300_enable_sdio_irq(struc + vub300_queue_poll_work(vub300, 0); + } + mutex_unlock(&vub300->irq_mutex); ++ set_current_state(TASK_INTERRUPTIBLE); + } else { + vub300->irq_enabled = 0; + } diff --git a/queue-4.9/pnode-terminate-at-peers-of-source.patch b/queue-4.9/pnode-terminate-at-peers-of-source.patch new file mode 100644 index 00000000000..9b9888b5f0c --- /dev/null +++ b/queue-4.9/pnode-terminate-at-peers-of-source.patch @@ -0,0 +1,639 @@ +From 11933cf1d91d57da9e5c53822a540bbdc2656c16 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Sat, 17 Dec 2022 22:28:40 +0100 +Subject: pnode: terminate at peers of source + +From: Christian Brauner + +commit 11933cf1d91d57da9e5c53822a540bbdc2656c16 upstream. + +The propagate_mnt() function handles mount propagation when creating +mounts and propagates the source mount tree @source_mnt to all +applicable nodes of the destination propagation mount tree headed by +@dest_mnt. + +Unfortunately it contains a bug where it fails to terminate at peers of +@source_mnt when looking up copies of the source mount that become +masters for copies of the source mount tree mounted on top of slaves in +the destination propagation tree causing a NULL dereference. + +Once the mechanics of the bug are understood it's easy to trigger. +Because of unprivileged user namespaces it is available to unprivileged +users. + +While fixing this bug we've gotten confused multiple times due to +unclear terminology or missing concepts. So let's start this with some +clarifications: + +* The terms "master" or "peer" denote a shared mount. A shared mount + belongs to a peer group. + +* A peer group is a set of shared mounts that propagate to each other. + They are identified by a peer group id. The peer group id is available + in @shared_mnt->mnt_group_id. + Shared mounts within the same peer group have the same peer group id. + The peers in a peer group can be reached via @shared_mnt->mnt_share. + +* The terms "slave mount" or "dependent mount" denote a mount that + receives propagation from a peer in a peer group. IOW, shared mounts + may have slave mounts and slave mounts have shared mounts as their + master. Slave mounts of a given peer in a peer group are listed on + that peers slave list available at @shared_mnt->mnt_slave_list. + +* The term "master mount" denotes a mount in a peer group. IOW, it + denotes a shared mount or a peer mount in a peer group. The term + "master mount" - or "master" for short - is mostly used when talking + in the context of slave mounts that receive propagation from a master + mount. A master mount of a slave identifies the closest peer group a + slave mount receives propagation from. The master mount of a slave can + be identified via @slave_mount->mnt_master. Different slaves may point + to different masters in the same peer group. + +* Multiple peers in a peer group can have non-empty ->mnt_slave_lists. + Non-empty ->mnt_slave_lists of peers don't intersect. Consequently, to + ensure all slave mounts of a peer group are visited the + ->mnt_slave_lists of all peers in a peer group have to be walked. + +* Slave mounts point to a peer in the closest peer group they receive + propagation from via @slave_mnt->mnt_master (see above). Together with + these peers they form a propagation group (see below). The closest + peer group can thus be identified through the peer group id + @slave_mnt->mnt_master->mnt_group_id of the peer/master that a slave + mount receives propagation from. + +* A shared-slave mount is a slave mount to a peer group pg1 while also + a peer in another peer group pg2. IOW, a peer group may receive + propagation from another peer group. + + If a peer group pg1 is a slave to another peer group pg2 then all + peers in peer group pg1 point to the same peer in peer group pg2 via + ->mnt_master. IOW, all peers in peer group pg1 appear on the same + ->mnt_slave_list. IOW, they cannot be slaves to different peer groups. + +* A pure slave mount is a slave mount that is a slave to a peer group + but is not a peer in another peer group. + +* A propagation group denotes the set of mounts consisting of a single + peer group pg1 and all slave mounts and shared-slave mounts that point + to a peer in that peer group via ->mnt_master. IOW, all slave mounts + such that @slave_mnt->mnt_master->mnt_group_id is equal to + @shared_mnt->mnt_group_id. + + The concept of a propagation group makes it easier to talk about a + single propagation level in a propagation tree. + + For example, in propagate_mnt() the immediate peers of @dest_mnt and + all slaves of @dest_mnt's peer group form a propagation group propg1. + So a shared-slave mount that is a slave in propg1 and that is a peer + in another peer group pg2 forms another propagation group propg2 + together with all slaves that point to that shared-slave mount in + their ->mnt_master. + +* A propagation tree refers to all mounts that receive propagation + starting from a specific shared mount. + + For example, for propagate_mnt() @dest_mnt is the start of a + propagation tree. The propagation tree ecompasses all mounts that + receive propagation from @dest_mnt's peer group down to the leafs. + +With that out of the way let's get to the actual algorithm. + +We know that @dest_mnt is guaranteed to be a pure shared mount or a +shared-slave mount. This is guaranteed by a check in +attach_recursive_mnt(). So propagate_mnt() will first propagate the +source mount tree to all peers in @dest_mnt's peer group: + +for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) + goto out; +} + +Notice, that the peer propagation loop of propagate_mnt() doesn't +propagate @dest_mnt itself. @dest_mnt is mounted directly in +attach_recursive_mnt() after we propagated to the destination +propagation tree. + +The mount that will be mounted on top of @dest_mnt is @source_mnt. This +copy was created earlier even before we entered attach_recursive_mnt() +and doesn't concern us a lot here. + +It's just important to notice that when propagate_mnt() is called +@source_mnt will not yet have been mounted on top of @dest_mnt. Thus, +@source_mnt->mnt_parent will either still point to @source_mnt or - in +the case @source_mnt is moved and thus already attached - still to its +former parent. + +For each peer @m in @dest_mnt's peer group propagate_one() will create a +new copy of the source mount tree and mount that copy @child on @m such +that @child->mnt_parent points to @m after propagate_one() returns. + +propagate_one() will stash the last destination propagation node @m in +@last_dest and the last copy it created for the source mount tree in +@last_source. + +Hence, if we call into propagate_one() again for the next destination +propagation node @m, @last_dest will point to the previous destination +propagation node and @last_source will point to the previous copy of the +source mount tree and mounted on @last_dest. + +Each new copy of the source mount tree is created from the previous copy +of the source mount tree. This will become important later. + +The peer loop in propagate_mnt() is straightforward. We iterate through +the peers copying and updating @last_source and @last_dest as we go +through them and mount each copy of the source mount tree @child on a +peer @m in @dest_mnt's peer group. + +After propagate_mnt() handled the peers in @dest_mnt's peer group +propagate_mnt() will propagate the source mount tree down the +propagation tree that @dest_mnt's peer group propagates to: + +for (m = next_group(dest_mnt, dest_mnt); m; + m = next_group(m, dest_mnt)) { + /* everything in that slave group */ + n = m; + do { + ret = propagate_one(n); + if (ret) + goto out; + n = next_peer(n); + } while (n != m); +} + +The next_group() helper will recursively walk the destination +propagation tree, descending into each propagation group of the +propagation tree. + +The important part is that it takes care to propagate the source mount +tree to all peers in the peer group of a propagation group before it +propagates to the slaves to those peers in the propagation group. IOW, +it creates and mounts copies of the source mount tree that become +masters before it creates and mounts copies of the source mount tree +that become slaves to these masters. + +It is important to remember that propagating the source mount tree to +each mount @m in the destination propagation tree simply means that we +create and mount new copies @child of the source mount tree on @m such +that @child->mnt_parent points to @m. + +Since we know that each node @m in the destination propagation tree +headed by @dest_mnt's peer group will be overmounted with a copy of the +source mount tree and since we know that the propagation properties of +each copy of the source mount tree we create and mount at @m will mostly +mirror the propagation properties of @m. We can use that information to +create and mount the copies of the source mount tree that become masters +before their slaves. + +The easy case is always when @m and @last_dest are peers in a peer group +of a given propagation group. In that case we know that we can simply +copy @last_source without having to figure out what the master for the +new copy @child of the source mount tree needs to be as we've done that +in a previous call to propagate_one(). + +The hard case is when we're dealing with a slave mount or a shared-slave +mount @m in a destination propagation group that we need to create and +mount a copy of the source mount tree on. + +For each propagation group in the destination propagation tree we +propagate the source mount tree to we want to make sure that the copies +@child of the source mount tree we create and mount on slaves @m pick an +ealier copy of the source mount tree that we mounted on a master @m of +the destination propagation group as their master. This is a mouthful +but as far as we can tell that's the core of it all. + +But, if we keep track of the masters in the destination propagation tree +@m we can use the information to find the correct master for each copy +of the source mount tree we create and mount at the slaves in the +destination propagation tree @m. + +Let's walk through the base case as that's still fairly easy to grasp. + +If we're dealing with the first slave in the propagation group that +@dest_mnt is in then we don't yet have marked any masters in the +destination propagation tree. + +We know the master for the first slave to @dest_mnt's peer group is +simple @dest_mnt. So we expect this algorithm to yield a copy of the +source mount tree that was mounted on a peer in @dest_mnt's peer group +as the master for the copy of the source mount tree we want to mount at +the first slave @m: + +for (n = m; ; n = p) { + p = n->mnt_master; + if (p == dest_master || IS_MNT_MARKED(p)) + break; +} + +For the first slave we walk the destination propagation tree all the way +up to a peer in @dest_mnt's peer group. IOW, the propagation hierarchy +can be walked by walking up the @mnt->mnt_master hierarchy of the +destination propagation tree @m. We will ultimately find a peer in +@dest_mnt's peer group and thus ultimately @dest_mnt->mnt_master. + +Btw, here the assumption we listed at the beginning becomes important. +Namely, that peers in a peer group pg1 that are slaves in another peer +group pg2 appear on the same ->mnt_slave_list. IOW, all slaves who are +peers in peer group pg1 point to the same peer in peer group pg2 via +their ->mnt_master. Otherwise the termination condition in the code +above would be wrong and next_group() would be broken too. + +So the first iteration sets: + +n = m; +p = n->mnt_master; + +such that @p now points to a peer or @dest_mnt itself. We walk up one +more level since we don't have any marked mounts. So we end up with: + +n = dest_mnt; +p = dest_mnt->mnt_master; + +If @dest_mnt's peer group is not slave to another peer group then @p is +now NULL. If @dest_mnt's peer group is a slave to another peer group +then @p now points to @dest_mnt->mnt_master points which is a master +outside the propagation tree we're dealing with. + +Now we need to figure out the master for the copy of the source mount +tree we're about to create and mount on the first slave of @dest_mnt's +peer group: + +do { + struct mount *parent = last_source->mnt_parent; + if (last_source == first_source) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) + break; + last_source = last_source->mnt_master; +} while (!done); + +We know that @last_source->mnt_parent points to @last_dest and +@last_dest is the last peer in @dest_mnt's peer group we propagated to +in the peer loop in propagate_mnt(). + +Consequently, @last_source is the last copy we created and mount on that +last peer in @dest_mnt's peer group. So @last_source is the master we +want to pick. + +We know that @last_source->mnt_parent->mnt_master points to +@last_dest->mnt_master. We also know that @last_dest->mnt_master is +either NULL or points to a master outside of the destination propagation +tree and so does @p. Hence: + +done = parent->mnt_master == p; + +is trivially true in the base condition. + +We also know that for the first slave mount of @dest_mnt's peer group +that @last_dest either points @dest_mnt itself because it was +initialized to: + +last_dest = dest_mnt; + +at the beginning of propagate_mnt() or it will point to a peer of +@dest_mnt in its peer group. In both cases it is guaranteed that on the +first iteration @n and @parent are peers (Please note the check for +peers here as that's important.): + +if (done && peers(n, parent)) + break; + +So, as we expected, we select @last_source, which referes to the last +copy of the source mount tree we mounted on the last peer in @dest_mnt's +peer group, as the master of the first slave in @dest_mnt's peer group. +The rest is taken care of by clone_mnt(last_source, ...). We'll skip +over that part otherwise this becomes a blogpost. + +At the end of propagate_mnt() we now mark @m->mnt_master as the first +master in the destination propagation tree that is distinct from +@dest_mnt->mnt_master. IOW, we mark @dest_mnt itself as a master. + +By marking @dest_mnt or one of it's peers we are able to easily find it +again when we later lookup masters for other copies of the source mount +tree we mount copies of the source mount tree on slaves @m to +@dest_mnt's peer group. This, in turn allows us to find the master we +selected for the copies of the source mount tree we mounted on master in +the destination propagation tree again. + +The important part is to realize that the code makes use of the fact +that the last copy of the source mount tree stashed in @last_source was +mounted on top of the previous destination propagation node @last_dest. +What this means is that @last_source allows us to walk the destination +propagation hierarchy the same way each destination propagation node @m +does. + +If we take @last_source, which is the copy of @source_mnt we have +mounted on @last_dest in the previous iteration of propagate_one(), then +we know @last_source->mnt_parent points to @last_dest but we also know +that as we walk through the destination propagation tree that +@last_source->mnt_master will point to an earlier copy of the source +mount tree we mounted one an earlier destination propagation node @m. + +IOW, @last_source->mnt_parent will be our hook into the destination +propagation tree and each consecutive @last_source->mnt_master will lead +us to an earlier propagation node @m via +@last_source->mnt_master->mnt_parent. + +Hence, by walking up @last_source->mnt_master, each of which is mounted +on a node that is a master @m in the destination propagation tree we can +also walk up the destination propagation hierarchy. + +So, for each new destination propagation node @m we use the previous +copy of @last_source and the fact it's mounted on the previous +propagation node @last_dest via @last_source->mnt_master->mnt_parent to +determine what the master of the new copy of @last_source needs to be. + +The goal is to find the _closest_ master that the new copy of the source +mount tree we are about to create and mount on a slave @m in the +destination propagation tree needs to pick. IOW, we want to find a +suitable master in the propagation group. + +As the propagation structure of the source mount propagation tree we +create mirrors the propagation structure of the destination propagation +tree we can find @m's closest master - i.e., a marked master - which is +a peer in the closest peer group that @m receives propagation from. We +store that closest master of @m in @p as before and record the slave to +that master in @n + +We then search for this master @p via @last_source by walking up the +master hierarchy starting from the last copy of the source mount tree +stored in @last_source that we created and mounted on the previous +destination propagation node @m. + +We will try to find the master by walking @last_source->mnt_master and +by comparing @last_source->mnt_master->mnt_parent->mnt_master to @p. If +we find @p then we can figure out what earlier copy of the source mount +tree needs to be the master for the new copy of the source mount tree +we're about to create and mount at the current destination propagation +node @m. + +If @last_source->mnt_master->mnt_parent and @n are peers then we know +that the closest master they receive propagation from is +@last_source->mnt_master->mnt_parent->mnt_master. If not then the +closest immediate peer group that they receive propagation from must be +one level higher up. + +This builds on the earlier clarification at the beginning that all peers +in a peer group which are slaves of other peer groups all point to the +same ->mnt_master, i.e., appear on the same ->mnt_slave_list, of the +closest peer group that they receive propagation from. + +However, terminating the walk has corner cases. + +If the closest marked master for a given destination node @m cannot be +found by walking up the master hierarchy via @last_source->mnt_master +then we need to terminate the walk when we encounter @source_mnt again. + +This isn't an arbitrary termination. It simply means that the new copy +of the source mount tree we're about to create has a copy of the source +mount tree we created and mounted on a peer in @dest_mnt's peer group as +its master. IOW, @source_mnt is the peer in the closest peer group that +the new copy of the source mount tree receives propagation from. + +We absolutely have to stop @source_mnt because @last_source->mnt_master +either points outside the propagation hierarchy we're dealing with or it +is NULL because @source_mnt isn't a shared-slave. + +So continuing the walk past @source_mnt would cause a NULL dereference +via @last_source->mnt_master->mnt_parent. And so we have to stop the +walk when we encounter @source_mnt again. + +One scenario where this can happen is when we first handled a series of +slaves of @dest_mnt's peer group and then encounter peers in a new peer +group that is a slave to @dest_mnt's peer group. We handle them and then +we encounter another slave mount to @dest_mnt that is a pure slave to +@dest_mnt's peer group. That pure slave will have a peer in @dest_mnt's +peer group as its master. Consequently, the new copy of the source mount +tree will need to have @source_mnt as it's master. So we walk the +propagation hierarchy all the way up to @source_mnt based on +@last_source->mnt_master. + +So terminate on @source_mnt, easy peasy. Except, that the check misses +something that the rest of the algorithm already handles. + +If @dest_mnt has peers in it's peer group the peer loop in +propagate_mnt(): + +for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) + goto out; +} + +will consecutively update @last_source with each previous copy of the +source mount tree we created and mounted at the previous peer in +@dest_mnt's peer group. So after that loop terminates @last_source will +point to whatever copy of the source mount tree was created and mounted +on the last peer in @dest_mnt's peer group. + +Furthermore, if there is even a single additional peer in @dest_mnt's +peer group then @last_source will __not__ point to @source_mnt anymore. +Because, as we mentioned above, @dest_mnt isn't even handled in this +loop but directly in attach_recursive_mnt(). So it can't even accidently +come last in that peer loop. + +So the first time we handle a slave mount @m of @dest_mnt's peer group +the copy of the source mount tree we create will make the __last copy of +the source mount tree we created and mounted on the last peer in +@dest_mnt's peer group the master of the new copy of the source mount +tree we create and mount on the first slave of @dest_mnt's peer group__. + +But this means that the termination condition that checks for +@source_mnt is wrong. The @source_mnt cannot be found anymore by +propagate_one(). Instead it will find the last copy of the source mount +tree we created and mounted for the last peer of @dest_mnt's peer group +again. And that is a peer of @source_mnt not @source_mnt itself. + +IOW, we fail to terminate the loop correctly and ultimately dereference +@last_source->mnt_master->mnt_parent. When @source_mnt's peer group +isn't slave to another peer group then @last_source->mnt_master is NULL +causing the splat below. + +For example, assume @dest_mnt is a pure shared mount and has three peers +in its peer group: + +=================================================================================== + mount-id mount-parent-id peer-group-id +=================================================================================== +(@dest_mnt) mnt_master[216] 309 297 shared:216 + \ + (@source_mnt) mnt_master[218]: 609 609 shared:218 + +(1) mnt_master[216]: 607 605 shared:216 + \ + (P1) mnt_master[218]: 624 607 shared:218 + +(2) mnt_master[216]: 576 574 shared:216 + \ + (P2) mnt_master[218]: 625 576 shared:218 + +(3) mnt_master[216]: 545 543 shared:216 + \ + (P3) mnt_master[218]: 626 545 shared:218 + +After this sequence has been processed @last_source will point to (P3), +the copy generated for the third peer in @dest_mnt's peer group we +handled. So the copy of the source mount tree (P4) we create and mount +on the first slave of @dest_mnt's peer group: + +=================================================================================== + mount-id mount-parent-id peer-group-id +=================================================================================== + mnt_master[216] 309 297 shared:216 + / + / +(S0) mnt_slave 483 481 master:216 + \ + \ (P3) mnt_master[218] 626 545 shared:218 + \ / + \/ + (P4) mnt_slave 627 483 master:218 + +will pick the last copy of the source mount tree (P3) as master, not (S0). + +When walking the propagation hierarchy via @last_source's master +hierarchy we encounter (P3) but not (S0), i.e., @source_mnt. + +We can fix this in multiple ways: + +(1) By setting @last_source to @source_mnt after we processed the peers + in @dest_mnt's peer group right after the peer loop in + propagate_mnt(). + +(2) By changing the termination condition that relies on finding exactly + @source_mnt to finding a peer of @source_mnt. + +(3) By only moving @last_source when we actually venture into a new peer + group or some clever variant thereof. + +The first two options are minimally invasive and what we want as a fix. +The third option is more intrusive but something we'd like to explore in +the near future. + +This passes all LTP tests and specifically the mount propagation +testsuite part of it. It also holds up against all known reproducers of +this issues. + +Final words. +First, this is a clever but __worringly__ underdocumented algorithm. +There isn't a single detailed comment to be found in next_group(), +propagate_one() or anywhere else in that file for that matter. This has +been a giant pain to understand and work through and a bug like this is +insanely difficult to fix without a detailed understanding of what's +happening. Let's not talk about the amount of time that was sunk into +fixing this. + +Second, all the cool kids with access to +unshare --mount --user --map-root --propagation=unchanged +are going to have a lot of fun. IOW, triggerable by unprivileged users +while namespace_lock() lock is held. + +[ 115.848393] BUG: kernel NULL pointer dereference, address: 0000000000000010 +[ 115.848967] #PF: supervisor read access in kernel mode +[ 115.849386] #PF: error_code(0x0000) - not-present page +[ 115.849803] PGD 0 P4D 0 +[ 115.850012] Oops: 0000 [#1] PREEMPT SMP PTI +[ 115.850354] CPU: 0 PID: 15591 Comm: mount Not tainted 6.1.0-rc7 #3 +[ 115.850851] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS +VirtualBox 12/01/2006 +[ 115.851510] RIP: 0010:propagate_one.part.0+0x7f/0x1a0 +[ 115.851924] Code: 75 eb 4c 8b 05 c2 25 37 02 4c 89 ca 48 8b 4a 10 +49 39 d0 74 1e 48 3b 81 e0 00 00 00 74 26 48 8b 92 e0 00 00 00 be 01 +00 00 00 <48> 8b 4a 10 49 39 d0 75 e2 40 84 f6 74 38 4c 89 05 84 25 37 +02 4d +[ 115.853441] RSP: 0018:ffffb8d5443d7d50 EFLAGS: 00010282 +[ 115.853865] RAX: ffff8e4d87c41c80 RBX: ffff8e4d88ded780 RCX: ffff8e4da4333a00 +[ 115.854458] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8e4d88ded780 +[ 115.855044] RBP: ffff8e4d88ded780 R08: ffff8e4da4338000 R09: ffff8e4da43388c0 +[ 115.855693] R10: 0000000000000002 R11: ffffb8d540158000 R12: ffffb8d5443d7da8 +[ 115.856304] R13: ffff8e4d88ded780 R14: 0000000000000000 R15: 0000000000000000 +[ 115.856859] FS: 00007f92c90c9800(0000) GS:ffff8e4dfdc00000(0000) +knlGS:0000000000000000 +[ 115.857531] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 115.858006] CR2: 0000000000000010 CR3: 0000000022f4c002 CR4: 00000000000706f0 +[ 115.858598] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 115.859393] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 115.860099] Call Trace: +[ 115.860358] +[ 115.860535] propagate_mnt+0x14d/0x190 +[ 115.860848] attach_recursive_mnt+0x274/0x3e0 +[ 115.861212] path_mount+0x8c8/0xa60 +[ 115.861503] __x64_sys_mount+0xf6/0x140 +[ 115.861819] do_syscall_64+0x5b/0x80 +[ 115.862117] ? do_faccessat+0x123/0x250 +[ 115.862435] ? syscall_exit_to_user_mode+0x17/0x40 +[ 115.862826] ? do_syscall_64+0x67/0x80 +[ 115.863133] ? syscall_exit_to_user_mode+0x17/0x40 +[ 115.863527] ? do_syscall_64+0x67/0x80 +[ 115.863835] ? do_syscall_64+0x67/0x80 +[ 115.864144] ? do_syscall_64+0x67/0x80 +[ 115.864452] ? exc_page_fault+0x70/0x170 +[ 115.864775] entry_SYSCALL_64_after_hwframe+0x63/0xcd +[ 115.865187] RIP: 0033:0x7f92c92b0ebe +[ 115.865480] Code: 48 8b 0d 75 4f 0c 00 f7 d8 64 89 01 48 83 c8 ff +c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 +00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 42 4f 0c 00 f7 d8 64 89 +01 48 +[ 115.866984] RSP: 002b:00007fff000aa728 EFLAGS: 00000246 ORIG_RAX: +00000000000000a5 +[ 115.867607] RAX: ffffffffffffffda RBX: 000055a77888d6b0 RCX: 00007f92c92b0ebe +[ 115.868240] RDX: 000055a77888d8e0 RSI: 000055a77888e6e0 RDI: 000055a77888e620 +[ 115.868823] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 +[ 115.869403] R10: 0000000000001000 R11: 0000000000000246 R12: 000055a77888e620 +[ 115.869994] R13: 000055a77888d8e0 R14: 00000000ffffffff R15: 00007f92c93e4076 +[ 115.870581] +[ 115.870763] Modules linked in: nft_fib_inet nft_fib_ipv4 +nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 +nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 +nf_defrag_ipv4 ip_set rfkill nf_tables nfnetlink qrtr snd_intel8x0 +sunrpc snd_ac97_codec ac97_bus snd_pcm snd_timer intel_rapl_msr +intel_rapl_common snd vboxguest intel_powerclamp video rapl joydev +soundcore i2c_piix4 wmi fuse zram xfs vmwgfx crct10dif_pclmul +crc32_pclmul crc32c_intel polyval_clmulni polyval_generic +drm_ttm_helper ttm e1000 ghash_clmulni_intel serio_raw ata_generic +pata_acpi scsi_dh_rdac scsi_dh_emc scsi_dh_alua dm_multipath +[ 115.875288] CR2: 0000000000000010 +[ 115.875641] ---[ end trace 0000000000000000 ]--- +[ 115.876135] RIP: 0010:propagate_one.part.0+0x7f/0x1a0 +[ 115.876551] Code: 75 eb 4c 8b 05 c2 25 37 02 4c 89 ca 48 8b 4a 10 +49 39 d0 74 1e 48 3b 81 e0 00 00 00 74 26 48 8b 92 e0 00 00 00 be 01 +00 00 00 <48> 8b 4a 10 49 39 d0 75 e2 40 84 f6 74 38 4c 89 05 84 25 37 +02 4d +[ 115.878086] RSP: 0018:ffffb8d5443d7d50 EFLAGS: 00010282 +[ 115.878511] RAX: ffff8e4d87c41c80 RBX: ffff8e4d88ded780 RCX: ffff8e4da4333a00 +[ 115.879128] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8e4d88ded780 +[ 115.879715] RBP: ffff8e4d88ded780 R08: ffff8e4da4338000 R09: ffff8e4da43388c0 +[ 115.880359] R10: 0000000000000002 R11: ffffb8d540158000 R12: ffffb8d5443d7da8 +[ 115.880962] R13: ffff8e4d88ded780 R14: 0000000000000000 R15: 0000000000000000 +[ 115.881548] FS: 00007f92c90c9800(0000) GS:ffff8e4dfdc00000(0000) +knlGS:0000000000000000 +[ 115.882234] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 115.882713] CR2: 0000000000000010 CR3: 0000000022f4c002 CR4: 00000000000706f0 +[ 115.883314] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 115.883966] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: f2ebb3a921c1 ("smarter propagate_mnt()") +Fixes: 5ec0811d3037 ("propogate_mnt: Handle the first propogated copy being a slave") +Cc: +Reported-by: Ditang Chen +Signed-off-by: Seth Forshee (Digital Ocean) +Signed-off-by: Christian Brauner (Microsoft) +Signed-off-by: Greg Kroah-Hartman +--- +If there are no big objections I'll get this to Linus rather sooner than later. +--- + fs/pnode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -247,7 +247,7 @@ static int propagate_one(struct mount *m + } + do { + struct mount *parent = last_source->mnt_parent; +- if (last_source == first_source) ++ if (peers(last_source, first_source)) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) diff --git a/queue-4.9/series b/queue-4.9/series index 662d7d69fe5..ebe8529b04c 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -218,3 +218,8 @@ gcov-add-support-for-checksum-field.patch powerpc-rtas-avoid-scheduling-in-rtas_os_term.patch hid-plantronics-additional-pids-for-double-volume-ke.patch hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch +alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch +alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch +pnode-terminate-at-peers-of-source.patch +md-fix-a-crash-in-mempool_free.patch +mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch