From: Greg Kroah-Hartman Date: Mon, 2 Jan 2023 10:56:40 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v6.0.17~13 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=18e2c70b8b987b5693ee9ceff53168099ad6e06d;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch binfmt-fix-error-return-code-in-load_elf_fdpic_binary.patch f2fs-should-put-a-page-when-checking-the-summary-info.patch md-fix-a-crash-in-mempool_free.patch mm-compaction-fix-fast_isolate_around-to-stay-within-boundaries.patch mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch ovl-use-ovl-mounter-s-fsuid-and-fsgid-in-ovl_link.patch pnode-terminate-at-peers-of-source.patch --- diff --git a/queue-5.10/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch b/queue-5.10/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch new file mode 100644 index 00000000000..f040eefe6f7 --- /dev/null +++ b/queue-5.10/alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch @@ -0,0 +1,145 @@ +From 8508fa2e7472f673edbeedf1b1d2b7a6bb898ecc Mon Sep 17 00:00:00 2001 +From: Artem Egorkine +Date: Sun, 25 Dec 2022 12:57:27 +0200 +Subject: ALSA: line6: correct midi status byte when receiving data from podxt + +From: Artem Egorkine + +commit 8508fa2e7472f673edbeedf1b1d2b7a6bb898ecc upstream. + +A PODxt device sends 0xb2, 0xc2 or 0xf2 as a status byte for MIDI +messages over USB that should otherwise have a 0xb0, 0xc0 or 0xf0 +status byte. This is usually corrected by the driver on other OSes. + +This fixes MIDI sysex messages sent by PODxt. + +[ tiwai: fixed white spaces ] + +Signed-off-by: Artem Egorkine +Cc: +Link: https://lore.kernel.org/r/20221225105728.1153989-1-arteme@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/line6/driver.c | 3 ++- + sound/usb/line6/midi.c | 3 ++- + sound/usb/line6/midibuf.c | 25 +++++++++++++++++-------- + sound/usb/line6/midibuf.h | 5 ++++- + sound/usb/line6/pod.c | 3 ++- + 5 files changed, 27 insertions(+), 12 deletions(-) + +--- a/sound/usb/line6/driver.c ++++ b/sound/usb/line6/driver.c +@@ -304,7 +304,8 @@ static void line6_data_received(struct u + for (;;) { + done = + line6_midibuf_read(mb, line6->buffer_message, +- LINE6_MIDI_MESSAGE_MAXLEN); ++ LINE6_MIDI_MESSAGE_MAXLEN, ++ LINE6_MIDIBUF_READ_RX); + + if (done <= 0) + break; +--- a/sound/usb/line6/midi.c ++++ b/sound/usb/line6/midi.c +@@ -56,7 +56,8 @@ static void line6_midi_transmit(struct s + + for (;;) { + done = line6_midibuf_read(mb, chunk, +- LINE6_FALLBACK_MAXPACKETSIZE); ++ LINE6_FALLBACK_MAXPACKETSIZE, ++ LINE6_MIDIBUF_READ_TX); + + if (done == 0) + break; +--- a/sound/usb/line6/midibuf.c ++++ b/sound/usb/line6/midibuf.c +@@ -9,6 +9,7 @@ + + #include "midibuf.h" + ++ + static int midibuf_message_length(unsigned char code) + { + int message_length; +@@ -20,12 +21,7 @@ static int midibuf_message_length(unsign + + message_length = length[(code >> 4) - 8]; + } else { +- /* +- Note that according to the MIDI specification 0xf2 is +- the "Song Position Pointer", but this is used by Line 6 +- to send sysex messages to the host. +- */ +- static const int length[] = { -1, 2, -1, 2, -1, -1, 1, 1, 1, 1, ++ static const int length[] = { -1, 2, 2, 2, -1, -1, 1, 1, 1, -1, + 1, 1, 1, -1, 1, 1 + }; + message_length = length[code & 0x0f]; +@@ -125,7 +121,7 @@ int line6_midibuf_write(struct midi_buff + } + + int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, +- int length) ++ int length, int read_type) + { + int bytes_used; + int length1, length2; +@@ -148,9 +144,22 @@ int line6_midibuf_read(struct midi_buffe + + length1 = this->size - this->pos_read; + +- /* check MIDI command length */ + command = this->buf[this->pos_read]; ++ /* ++ PODxt always has status byte lower nibble set to 0010, ++ when it means to send 0000, so we correct if here so ++ that control/program changes come on channel 1 and ++ sysex message status byte is correct ++ */ ++ if (read_type == LINE6_MIDIBUF_READ_RX) { ++ if (command == 0xb2 || command == 0xc2 || command == 0xf2) { ++ unsigned char fixed = command & 0xf0; ++ this->buf[this->pos_read] = fixed; ++ command = fixed; ++ } ++ } + ++ /* check MIDI command length */ + if (command & 0x80) { + midi_length = midibuf_message_length(command); + this->command_prev = command; +--- a/sound/usb/line6/midibuf.h ++++ b/sound/usb/line6/midibuf.h +@@ -8,6 +8,9 @@ + #ifndef MIDIBUF_H + #define MIDIBUF_H + ++#define LINE6_MIDIBUF_READ_TX 0 ++#define LINE6_MIDIBUF_READ_RX 1 ++ + struct midi_buffer { + unsigned char *buf; + int size; +@@ -23,7 +26,7 @@ extern void line6_midibuf_destroy(struct + extern int line6_midibuf_ignore(struct midi_buffer *mb, int length); + extern int line6_midibuf_init(struct midi_buffer *mb, int size, int split); + extern int line6_midibuf_read(struct midi_buffer *mb, unsigned char *data, +- int length); ++ int length, int read_type); + extern void line6_midibuf_reset(struct midi_buffer *mb); + extern int line6_midibuf_write(struct midi_buffer *mb, unsigned char *data, + int length); +--- a/sound/usb/line6/pod.c ++++ b/sound/usb/line6/pod.c +@@ -159,8 +159,9 @@ static struct line6_pcm_properties pod_p + .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ + }; + ++ + static const char pod_version_header[] = { +- 0xf2, 0x7e, 0x7f, 0x06, 0x02 ++ 0xf0, 0x7e, 0x7f, 0x06, 0x02 + }; + + static char *pod_alloc_sysex_buffer(struct usb_line6_pod *pod, int code, diff --git a/queue-5.10/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch b/queue-5.10/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch new file mode 100644 index 00000000000..98524c8106e --- /dev/null +++ b/queue-5.10/alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch @@ -0,0 +1,34 @@ +From b8800d324abb50160560c636bfafe2c81001b66c Mon Sep 17 00:00:00 2001 +From: Artem Egorkine +Date: Sun, 25 Dec 2022 12:57:28 +0200 +Subject: ALSA: line6: fix stack overflow in line6_midi_transmit + +From: Artem Egorkine + +commit b8800d324abb50160560c636bfafe2c81001b66c upstream. + +Correctly calculate available space including the size of the chunk +buffer. This fixes a buffer overflow when multiple MIDI sysex +messages are sent to a PODxt device. + +Signed-off-by: Artem Egorkine +Cc: +Link: https://lore.kernel.org/r/20221225105728.1153989-2-arteme@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/line6/midi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/sound/usb/line6/midi.c ++++ b/sound/usb/line6/midi.c +@@ -44,7 +44,8 @@ static void line6_midi_transmit(struct s + int req, done; + + for (;;) { +- req = min(line6_midibuf_bytes_free(mb), line6->max_packet_size); ++ req = min3(line6_midibuf_bytes_free(mb), line6->max_packet_size, ++ LINE6_FALLBACK_MAXPACKETSIZE); + done = snd_rawmidi_transmit_peek(substream, chunk, req); + + if (done == 0) diff --git a/queue-5.10/binfmt-fix-error-return-code-in-load_elf_fdpic_binary.patch b/queue-5.10/binfmt-fix-error-return-code-in-load_elf_fdpic_binary.patch new file mode 100644 index 00000000000..c294865066c --- /dev/null +++ b/queue-5.10/binfmt-fix-error-return-code-in-load_elf_fdpic_binary.patch @@ -0,0 +1,36 @@ +From e7f703ff2507f4e9f496da96cd4b78fd3026120c Mon Sep 17 00:00:00 2001 +From: Wang Yufen +Date: Fri, 2 Dec 2022 09:41:01 +0800 +Subject: binfmt: Fix error return code in load_elf_fdpic_binary() + +From: Wang Yufen + +commit e7f703ff2507f4e9f496da96cd4b78fd3026120c upstream. + +Fix to return a negative error code from create_elf_fdpic_tables() +instead of 0. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: stable@vger.kernel.org +Signed-off-by: Wang Yufen +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/1669945261-30271-1-git-send-email-wangyufen@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/binfmt_elf_fdpic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/binfmt_elf_fdpic.c ++++ b/fs/binfmt_elf_fdpic.c +@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct + current->mm->start_stack = current->mm->start_brk + stack_size; + #endif + +- if (create_elf_fdpic_tables(bprm, current->mm, +- &exec_params, &interp_params) < 0) ++ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, ++ &interp_params); ++ if (retval < 0) + goto error; + + kdebug("- start_code %lx", current->mm->start_code); diff --git a/queue-5.10/f2fs-should-put-a-page-when-checking-the-summary-info.patch b/queue-5.10/f2fs-should-put-a-page-when-checking-the-summary-info.patch new file mode 100644 index 00000000000..420aec9a430 --- /dev/null +++ b/queue-5.10/f2fs-should-put-a-page-when-checking-the-summary-info.patch @@ -0,0 +1,31 @@ +From c3db3c2fd9992c08f49aa93752d3c103c3a4f6aa Mon Sep 17 00:00:00 2001 +From: Pavel Machek +Date: Mon, 24 Oct 2022 19:30:12 +0200 +Subject: f2fs: should put a page when checking the summary info + +From: Pavel Machek + +commit c3db3c2fd9992c08f49aa93752d3c103c3a4f6aa upstream. + +The commit introduces another bug. + +Cc: stable@vger.kernel.org +Fixes: c6ad7fd16657e ("f2fs: fix to do sanity check on summary info") +Signed-off-by: Pavel Machek +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/gc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/f2fs/gc.c ++++ b/fs/f2fs/gc.c +@@ -1008,6 +1008,7 @@ static bool is_alive(struct f2fs_sb_info + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", + ofs_in_node, dni->ino, dni->nid, max_addrs); ++ f2fs_put_page(node_page, 1); + return false; + } + diff --git a/queue-5.10/md-fix-a-crash-in-mempool_free.patch b/queue-5.10/md-fix-a-crash-in-mempool_free.patch new file mode 100644 index 00000000000..39fcba3c93e --- /dev/null +++ b/queue-5.10/md-fix-a-crash-in-mempool_free.patch @@ -0,0 +1,109 @@ +From 341097ee53573e06ab9fc675d96a052385b851fa Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 4 Nov 2022 09:53:38 -0400 +Subject: md: fix a crash in mempool_free + +From: Mikulas Patocka + +commit 341097ee53573e06ab9fc675d96a052385b851fa upstream. + +There's a crash in mempool_free when running the lvm test +shell/lvchange-rebuild-raid.sh. + +The reason for the crash is this: +* super_written calls atomic_dec_and_test(&mddev->pending_writes) and + wake_up(&mddev->sb_wait). Then it calls rdev_dec_pending(rdev, mddev) + and bio_put(bio). +* so, the process that waited on sb_wait and that is woken up is racing + with bio_put(bio). +* if the process wins the race, it calls bioset_exit before bio_put(bio) + is executed. +* bio_put(bio) attempts to free a bio into a destroyed bio set - causing + a crash in mempool_free. + +We fix this bug by moving bio_put before atomic_dec_and_test. + +We also move rdev_dec_pending before atomic_dec_and_test as suggested by +Neil Brown. + +The function md_end_flush has a similar bug - we must call bio_put before +we decrement the number of in-progress bios. + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD 11557f0067 P4D 11557f0067 PUD 0 + Oops: 0002 [#1] PREEMPT SMP + CPU: 0 PID: 73 Comm: kworker/0:1 Not tainted 6.1.0-rc3 #5 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 + Workqueue: kdelayd flush_expired_bios [dm_delay] + RIP: 0010:mempool_free+0x47/0x80 + Code: 48 89 ef 5b 5d ff e0 f3 c3 48 89 f7 e8 32 45 3f 00 48 63 53 08 48 89 c6 3b 53 04 7d 2d 48 8b 43 10 8d 4a 01 48 89 df 89 4b 08 <48> 89 2c d0 e8 b0 45 3f 00 48 8d 7b 30 5b 5d 31 c9 ba 01 00 00 00 + RSP: 0018:ffff88910036bda8 EFLAGS: 00010093 + RAX: 0000000000000000 RBX: ffff8891037b65d8 RCX: 0000000000000001 + RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff8891037b65d8 + RBP: ffff8891447ba240 R08: 0000000000012908 R09: 00000000003d0900 + R10: 0000000000000000 R11: 0000000000173544 R12: ffff889101a14000 + R13: ffff8891562ac300 R14: ffff889102b41440 R15: ffffe8ffffa00d05 + FS: 0000000000000000(0000) GS:ffff88942fa00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000001102e99000 CR4: 00000000000006b0 + Call Trace: + + clone_endio+0xf4/0x1c0 [dm_mod] + clone_endio+0xf4/0x1c0 [dm_mod] + __submit_bio+0x76/0x120 + submit_bio_noacct_nocheck+0xb6/0x2a0 + flush_expired_bios+0x28/0x2f [dm_delay] + process_one_work+0x1b4/0x300 + worker_thread+0x45/0x3e0 + ? rescuer_thread+0x380/0x380 + kthread+0xc2/0x100 + ? kthread_complete_and_exit+0x20/0x20 + ret_from_fork+0x1f/0x30 + + Modules linked in: brd dm_delay dm_raid dm_mod af_packet uvesafb cfbfillrect cfbimgblt cn cfbcopyarea fb font fbdev tun autofs4 binfmt_misc configfs ipv6 virtio_rng virtio_balloon rng_core virtio_net pcspkr net_failover failover qemu_fw_cfg button mousedev raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod sd_mod t10_pi crc64_rocksoft crc64 virtio_scsi scsi_mod evdev psmouse bsg scsi_common [last unloaded: brd] + CR2: 0000000000000000 + ---[ end trace 0000000000000000 ]--- + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -555,13 +555,14 @@ static void md_end_flush(struct bio *bio + struct md_rdev *rdev = bio->bi_private; + struct mddev *mddev = rdev->mddev; + ++ bio_put(bio); ++ + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pre-request flush has finished */ + queue_work(md_wq, &mddev->flush_work); + } +- bio_put(bio); + } + + static void md_submit_flush_data(struct work_struct *ws); +@@ -966,10 +967,12 @@ static void super_written(struct bio *bi + } else + clear_bit(LastDev, &rdev->flags); + ++ bio_put(bio); ++ ++ rdev_dec_pending(rdev, mddev); ++ + if (atomic_dec_and_test(&mddev->pending_writes)) + wake_up(&mddev->sb_wait); +- rdev_dec_pending(rdev, mddev); +- bio_put(bio); + } + + void md_super_write(struct mddev *mddev, struct md_rdev *rdev, diff --git a/queue-5.10/mm-compaction-fix-fast_isolate_around-to-stay-within-boundaries.patch b/queue-5.10/mm-compaction-fix-fast_isolate_around-to-stay-within-boundaries.patch new file mode 100644 index 00000000000..d523969774b --- /dev/null +++ b/queue-5.10/mm-compaction-fix-fast_isolate_around-to-stay-within-boundaries.patch @@ -0,0 +1,114 @@ +From be21b32afe470c5ae98e27e49201158a47032942 Mon Sep 17 00:00:00 2001 +From: NARIBAYASHI Akira +Date: Wed, 26 Oct 2022 20:24:38 +0900 +Subject: mm, compaction: fix fast_isolate_around() to stay within boundaries + +From: NARIBAYASHI Akira + +commit be21b32afe470c5ae98e27e49201158a47032942 upstream. + +Depending on the memory configuration, isolate_freepages_block() may scan +pages out of the target range and causes panic. + +Panic can occur on systems with multiple zones in a single pageblock. + +The reason it is rare is that it only happens in special +configurations. Depending on how many similar systems there are, it +may be a good idea to fix this problem for older kernels as well. + +The problem is that pfn as argument of fast_isolate_around() could be out +of the target range. Therefore we should consider the case where pfn < +start_pfn, and also the case where end_pfn < pfn. + +This problem should have been addressd by the commit 6e2b7044c199 ("mm, +compaction: make fast_isolate_freepages() stay within zone") but there was +an oversight. + + Case1: pfn < start_pfn + + + | node X's zone | node Y's zone + +-----------------+------------------------------... + pageblock ^ ^ ^ + +-----------+-----------+-----------+-----------+... + ^ ^ ^ + ^ ^ end_pfn + ^ start_pfn = cc->zone->zone_start_pfn + pfn + <---------> scanned range by "Scan After" + + Case2: end_pfn < pfn + + + | node X's zone | node Y's zone + +-----------------+------------------------------... + pageblock ^ ^ ^ + +-----------+-----------+-----------+-----------+... + ^ ^ ^ + ^ ^ pfn + ^ end_pfn + start_pfn + <---------> scanned range by "Scan Before" + +It seems that there is no good reason to skip nr_isolated pages just after +given pfn. So let perform simple scan from start to end instead of +dividing the scan into "Before" and "After". + +Link: https://lkml.kernel.org/r/20221026112438.236336-1-a.naribayashi@fujitsu.com +Fixes: 6e2b7044c199 ("mm, compaction: make fast_isolate_freepages() stay within zone"). +Signed-off-by: NARIBAYASHI Akira +Cc: David Rientjes +Cc: Mel Gorman +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/compaction.c | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -1245,7 +1245,7 @@ move_freelist_tail(struct list_head *fre + } + + static void +-fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated) ++fast_isolate_around(struct compact_control *cc, unsigned long pfn) + { + unsigned long start_pfn, end_pfn; + struct page *page; +@@ -1266,21 +1266,13 @@ fast_isolate_around(struct compact_contr + if (!page) + return; + +- /* Scan before */ +- if (start_pfn != pfn) { +- isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false); +- if (cc->nr_freepages >= cc->nr_migratepages) +- return; +- } +- +- /* Scan after */ +- start_pfn = pfn + nr_isolated; +- if (start_pfn < end_pfn) +- isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); ++ isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); + + /* Skip this pageblock in the future as it's full or nearly full */ + if (cc->nr_freepages < cc->nr_migratepages) + set_pageblock_skip(page); ++ ++ return; + } + + /* Search orders in round-robin fashion */ +@@ -1456,7 +1448,7 @@ fast_isolate_freepages(struct compact_co + return cc->free_pfn; + + low_pfn = page_to_pfn(page); +- fast_isolate_around(cc, low_pfn, nr_isolated); ++ fast_isolate_around(cc, low_pfn); + return low_pfn; + } + diff --git a/queue-5.10/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch b/queue-5.10/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch new file mode 100644 index 00000000000..038e0b442cd --- /dev/null +++ b/queue-5.10/mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch @@ -0,0 +1,64 @@ +From 4a44cd249604e29e7b90ae796d7692f5773dd348 Mon Sep 17 00:00:00 2001 +From: Deren Wu +Date: Sun, 4 Dec 2022 16:24:16 +0800 +Subject: mmc: vub300: fix warning - do not call blocking ops when !TASK_RUNNING + +From: Deren Wu + +commit 4a44cd249604e29e7b90ae796d7692f5773dd348 upstream. + +vub300_enable_sdio_irq() works with mutex and need TASK_RUNNING here. +Ensure that we mark current as TASK_RUNNING for sleepable context. + +[ 77.554641] do not call blocking ops when !TASK_RUNNING; state=1 set at [] sdio_irq_thread+0x17d/0x5b0 +[ 77.554652] WARNING: CPU: 2 PID: 1983 at kernel/sched/core.c:9813 __might_sleep+0x116/0x160 +[ 77.554905] CPU: 2 PID: 1983 Comm: ksdioirqd/mmc1 Tainted: G OE 6.1.0-rc5 #1 +[ 77.554910] Hardware name: Intel(R) Client Systems NUC8i7BEH/NUC8BEB, BIOS BECFL357.86A.0081.2020.0504.1834 05/04/2020 +[ 77.554912] RIP: 0010:__might_sleep+0x116/0x160 +[ 77.554920] RSP: 0018:ffff888107b7fdb8 EFLAGS: 00010282 +[ 77.554923] RAX: 0000000000000000 RBX: ffff888118c1b740 RCX: 0000000000000000 +[ 77.554926] RDX: 0000000000000001 RSI: 0000000000000004 RDI: ffffed1020f6ffa9 +[ 77.554928] RBP: ffff888107b7fde0 R08: 0000000000000001 R09: ffffed1043ea60ba +[ 77.554930] R10: ffff88821f5305cb R11: ffffed1043ea60b9 R12: ffffffff93aa3a60 +[ 77.554932] R13: 000000000000011b R14: 7fffffffffffffff R15: ffffffffc0558660 +[ 77.554934] FS: 0000000000000000(0000) GS:ffff88821f500000(0000) knlGS:0000000000000000 +[ 77.554937] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 77.554939] CR2: 00007f8a44010d68 CR3: 000000024421a003 CR4: 00000000003706e0 +[ 77.554942] Call Trace: +[ 77.554944] +[ 77.554952] mutex_lock+0x78/0xf0 +[ 77.554973] vub300_enable_sdio_irq+0x103/0x3c0 [vub300] +[ 77.554981] sdio_irq_thread+0x25c/0x5b0 +[ 77.555006] kthread+0x2b8/0x370 +[ 77.555017] ret_from_fork+0x1f/0x30 +[ 77.555023] +[ 77.555025] ---[ end trace 0000000000000000 ]--- + +Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") +Signed-off-by: Deren Wu +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/87dc45b122d26d63c80532976813c9365d7160b3.1670140888.git.deren.wu@mediatek.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/vub300.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/mmc/host/vub300.c ++++ b/drivers/mmc/host/vub300.c +@@ -2049,6 +2049,7 @@ static void vub300_enable_sdio_irq(struc + return; + kref_get(&vub300->kref); + if (enable) { ++ set_current_state(TASK_RUNNING); + mutex_lock(&vub300->irq_mutex); + if (vub300->irqs_queued) { + vub300->irqs_queued -= 1; +@@ -2064,6 +2065,7 @@ static void vub300_enable_sdio_irq(struc + vub300_queue_poll_work(vub300, 0); + } + mutex_unlock(&vub300->irq_mutex); ++ set_current_state(TASK_INTERRUPTIBLE); + } else { + vub300->irq_enabled = 0; + } diff --git a/queue-5.10/ovl-use-ovl-mounter-s-fsuid-and-fsgid-in-ovl_link.patch b/queue-5.10/ovl-use-ovl-mounter-s-fsuid-and-fsgid-in-ovl_link.patch new file mode 100644 index 00000000000..e7c52a471ca --- /dev/null +++ b/queue-5.10/ovl-use-ovl-mounter-s-fsuid-and-fsgid-in-ovl_link.patch @@ -0,0 +1,117 @@ +From 5b0db51215e895a361bc63132caa7cca36a53d6a Mon Sep 17 00:00:00 2001 +From: Zhang Tianci +Date: Thu, 1 Sep 2022 16:29:29 +0800 +Subject: ovl: Use ovl mounter's fsuid and fsgid in ovl_link() + +From: Zhang Tianci + +commit 5b0db51215e895a361bc63132caa7cca36a53d6a upstream. + +There is a wrong case of link() on overlay: + $ mkdir /lower /fuse /merge + $ mount -t fuse /fuse + $ mkdir /fuse/upper /fuse/work + $ mount -t overlay /merge -o lowerdir=/lower,upperdir=/fuse/upper,\ + workdir=work + $ touch /merge/file + $ chown bin.bin /merge/file // the file's caller becomes "bin" + $ ln /merge/file /merge/lnkfile + +Then we will get an error(EACCES) because fuse daemon checks the link()'s +caller is "bin", it denied this request. + +In the changing history of ovl_link(), there are two key commits: + +The first is commit bb0d2b8ad296 ("ovl: fix sgid on directory") which +overrides the cred's fsuid/fsgid using the new inode. The new inode's +owner is initialized by inode_init_owner(), and inode->fsuid is +assigned to the current user. So the override fsuid becomes the +current user. We know link() is actually modifying the directory, so +the caller must have the MAY_WRITE permission on the directory. The +current caller may should have this permission. This is acceptable +to use the caller's fsuid. + +The second is commit 51f7e52dc943 ("ovl: share inode for hard link") +which removed the inode creation in ovl_link(). This commit move +inode_init_owner() into ovl_create_object(), so the ovl_link() just +give the old inode to ovl_create_or_link(). Then the override fsuid +becomes the old inode's fsuid, neither the caller nor the overlay's +mounter! So this is incorrect. + +Fix this bug by using ovl mounter's fsuid/fsgid to do underlying +fs's link(). + +Link: https://lore.kernel.org/all/20220817102952.xnvesg3a7rbv576x@wittgenstein/T +Link: https://lore.kernel.org/lkml/20220825130552.29587-1-zhangtianci.1997@bytedance.com/t +Signed-off-by: Zhang Tianci +Signed-off-by: Jiachen Zhang +Reviewed-by: Christian Brauner (Microsoft) +Fixes: 51f7e52dc943 ("ovl: share inode for hard link") +Cc: # v4.8 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/overlayfs/dir.c | 46 ++++++++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 16 deletions(-) + +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -586,28 +586,42 @@ static int ovl_create_or_link(struct den + goto out_revert_creds; + } + +- err = -ENOMEM; +- override_cred = prepare_creds(); +- if (override_cred) { ++ if (!attr->hardlink) { ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_revert_creds; ++ /* ++ * In the creation cases(create, mkdir, mknod, symlink), ++ * ovl should transfer current's fs{u,g}id to underlying ++ * fs. Because underlying fs want to initialize its new ++ * inode owner using current's fs{u,g}id. And in this ++ * case, the @inode is a new inode that is initialized ++ * in inode_init_owner() to current's fs{u,g}id. So use ++ * the inode's i_{u,g}id to override the cred's fs{u,g}id. ++ * ++ * But in the other hardlink case, ovl_link() does not ++ * create a new inode, so just use the ovl mounter's ++ * fs{u,g}id. ++ */ + override_cred->fsuid = inode->i_uid; + override_cred->fsgid = inode->i_gid; +- if (!attr->hardlink) { +- err = security_dentry_create_files_as(dentry, +- attr->mode, &dentry->d_name, old_cred, +- override_cred); +- if (err) { +- put_cred(override_cred); +- goto out_revert_creds; +- } ++ err = security_dentry_create_files_as(dentry, ++ attr->mode, &dentry->d_name, old_cred, ++ override_cred); ++ if (err) { ++ put_cred(override_cred); ++ goto out_revert_creds; + } + put_cred(override_creds(override_cred)); + put_cred(override_cred); +- +- if (!ovl_dentry_is_whiteout(dentry)) +- err = ovl_create_upper(dentry, inode, attr); +- else +- err = ovl_create_over_whiteout(dentry, inode, attr); + } ++ ++ if (!ovl_dentry_is_whiteout(dentry)) ++ err = ovl_create_upper(dentry, inode, attr); ++ else ++ err = ovl_create_over_whiteout(dentry, inode, attr); ++ + out_revert_creds: + revert_creds(old_cred); + return err; diff --git a/queue-5.10/pnode-terminate-at-peers-of-source.patch b/queue-5.10/pnode-terminate-at-peers-of-source.patch new file mode 100644 index 00000000000..27fd80ecadc --- /dev/null +++ b/queue-5.10/pnode-terminate-at-peers-of-source.patch @@ -0,0 +1,639 @@ +From 11933cf1d91d57da9e5c53822a540bbdc2656c16 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Sat, 17 Dec 2022 22:28:40 +0100 +Subject: pnode: terminate at peers of source + +From: Christian Brauner + +commit 11933cf1d91d57da9e5c53822a540bbdc2656c16 upstream. + +The propagate_mnt() function handles mount propagation when creating +mounts and propagates the source mount tree @source_mnt to all +applicable nodes of the destination propagation mount tree headed by +@dest_mnt. + +Unfortunately it contains a bug where it fails to terminate at peers of +@source_mnt when looking up copies of the source mount that become +masters for copies of the source mount tree mounted on top of slaves in +the destination propagation tree causing a NULL dereference. + +Once the mechanics of the bug are understood it's easy to trigger. +Because of unprivileged user namespaces it is available to unprivileged +users. + +While fixing this bug we've gotten confused multiple times due to +unclear terminology or missing concepts. So let's start this with some +clarifications: + +* The terms "master" or "peer" denote a shared mount. A shared mount + belongs to a peer group. + +* A peer group is a set of shared mounts that propagate to each other. + They are identified by a peer group id. The peer group id is available + in @shared_mnt->mnt_group_id. + Shared mounts within the same peer group have the same peer group id. + The peers in a peer group can be reached via @shared_mnt->mnt_share. + +* The terms "slave mount" or "dependent mount" denote a mount that + receives propagation from a peer in a peer group. IOW, shared mounts + may have slave mounts and slave mounts have shared mounts as their + master. Slave mounts of a given peer in a peer group are listed on + that peers slave list available at @shared_mnt->mnt_slave_list. + +* The term "master mount" denotes a mount in a peer group. IOW, it + denotes a shared mount or a peer mount in a peer group. The term + "master mount" - or "master" for short - is mostly used when talking + in the context of slave mounts that receive propagation from a master + mount. A master mount of a slave identifies the closest peer group a + slave mount receives propagation from. The master mount of a slave can + be identified via @slave_mount->mnt_master. Different slaves may point + to different masters in the same peer group. + +* Multiple peers in a peer group can have non-empty ->mnt_slave_lists. + Non-empty ->mnt_slave_lists of peers don't intersect. Consequently, to + ensure all slave mounts of a peer group are visited the + ->mnt_slave_lists of all peers in a peer group have to be walked. + +* Slave mounts point to a peer in the closest peer group they receive + propagation from via @slave_mnt->mnt_master (see above). Together with + these peers they form a propagation group (see below). The closest + peer group can thus be identified through the peer group id + @slave_mnt->mnt_master->mnt_group_id of the peer/master that a slave + mount receives propagation from. + +* A shared-slave mount is a slave mount to a peer group pg1 while also + a peer in another peer group pg2. IOW, a peer group may receive + propagation from another peer group. + + If a peer group pg1 is a slave to another peer group pg2 then all + peers in peer group pg1 point to the same peer in peer group pg2 via + ->mnt_master. IOW, all peers in peer group pg1 appear on the same + ->mnt_slave_list. IOW, they cannot be slaves to different peer groups. + +* A pure slave mount is a slave mount that is a slave to a peer group + but is not a peer in another peer group. + +* A propagation group denotes the set of mounts consisting of a single + peer group pg1 and all slave mounts and shared-slave mounts that point + to a peer in that peer group via ->mnt_master. IOW, all slave mounts + such that @slave_mnt->mnt_master->mnt_group_id is equal to + @shared_mnt->mnt_group_id. + + The concept of a propagation group makes it easier to talk about a + single propagation level in a propagation tree. + + For example, in propagate_mnt() the immediate peers of @dest_mnt and + all slaves of @dest_mnt's peer group form a propagation group propg1. + So a shared-slave mount that is a slave in propg1 and that is a peer + in another peer group pg2 forms another propagation group propg2 + together with all slaves that point to that shared-slave mount in + their ->mnt_master. + +* A propagation tree refers to all mounts that receive propagation + starting from a specific shared mount. + + For example, for propagate_mnt() @dest_mnt is the start of a + propagation tree. The propagation tree ecompasses all mounts that + receive propagation from @dest_mnt's peer group down to the leafs. + +With that out of the way let's get to the actual algorithm. + +We know that @dest_mnt is guaranteed to be a pure shared mount or a +shared-slave mount. This is guaranteed by a check in +attach_recursive_mnt(). So propagate_mnt() will first propagate the +source mount tree to all peers in @dest_mnt's peer group: + +for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) + goto out; +} + +Notice, that the peer propagation loop of propagate_mnt() doesn't +propagate @dest_mnt itself. @dest_mnt is mounted directly in +attach_recursive_mnt() after we propagated to the destination +propagation tree. + +The mount that will be mounted on top of @dest_mnt is @source_mnt. This +copy was created earlier even before we entered attach_recursive_mnt() +and doesn't concern us a lot here. + +It's just important to notice that when propagate_mnt() is called +@source_mnt will not yet have been mounted on top of @dest_mnt. Thus, +@source_mnt->mnt_parent will either still point to @source_mnt or - in +the case @source_mnt is moved and thus already attached - still to its +former parent. + +For each peer @m in @dest_mnt's peer group propagate_one() will create a +new copy of the source mount tree and mount that copy @child on @m such +that @child->mnt_parent points to @m after propagate_one() returns. + +propagate_one() will stash the last destination propagation node @m in +@last_dest and the last copy it created for the source mount tree in +@last_source. + +Hence, if we call into propagate_one() again for the next destination +propagation node @m, @last_dest will point to the previous destination +propagation node and @last_source will point to the previous copy of the +source mount tree and mounted on @last_dest. + +Each new copy of the source mount tree is created from the previous copy +of the source mount tree. This will become important later. + +The peer loop in propagate_mnt() is straightforward. We iterate through +the peers copying and updating @last_source and @last_dest as we go +through them and mount each copy of the source mount tree @child on a +peer @m in @dest_mnt's peer group. + +After propagate_mnt() handled the peers in @dest_mnt's peer group +propagate_mnt() will propagate the source mount tree down the +propagation tree that @dest_mnt's peer group propagates to: + +for (m = next_group(dest_mnt, dest_mnt); m; + m = next_group(m, dest_mnt)) { + /* everything in that slave group */ + n = m; + do { + ret = propagate_one(n); + if (ret) + goto out; + n = next_peer(n); + } while (n != m); +} + +The next_group() helper will recursively walk the destination +propagation tree, descending into each propagation group of the +propagation tree. + +The important part is that it takes care to propagate the source mount +tree to all peers in the peer group of a propagation group before it +propagates to the slaves to those peers in the propagation group. IOW, +it creates and mounts copies of the source mount tree that become +masters before it creates and mounts copies of the source mount tree +that become slaves to these masters. + +It is important to remember that propagating the source mount tree to +each mount @m in the destination propagation tree simply means that we +create and mount new copies @child of the source mount tree on @m such +that @child->mnt_parent points to @m. + +Since we know that each node @m in the destination propagation tree +headed by @dest_mnt's peer group will be overmounted with a copy of the +source mount tree and since we know that the propagation properties of +each copy of the source mount tree we create and mount at @m will mostly +mirror the propagation properties of @m. We can use that information to +create and mount the copies of the source mount tree that become masters +before their slaves. + +The easy case is always when @m and @last_dest are peers in a peer group +of a given propagation group. In that case we know that we can simply +copy @last_source without having to figure out what the master for the +new copy @child of the source mount tree needs to be as we've done that +in a previous call to propagate_one(). + +The hard case is when we're dealing with a slave mount or a shared-slave +mount @m in a destination propagation group that we need to create and +mount a copy of the source mount tree on. + +For each propagation group in the destination propagation tree we +propagate the source mount tree to we want to make sure that the copies +@child of the source mount tree we create and mount on slaves @m pick an +ealier copy of the source mount tree that we mounted on a master @m of +the destination propagation group as their master. This is a mouthful +but as far as we can tell that's the core of it all. + +But, if we keep track of the masters in the destination propagation tree +@m we can use the information to find the correct master for each copy +of the source mount tree we create and mount at the slaves in the +destination propagation tree @m. + +Let's walk through the base case as that's still fairly easy to grasp. + +If we're dealing with the first slave in the propagation group that +@dest_mnt is in then we don't yet have marked any masters in the +destination propagation tree. + +We know the master for the first slave to @dest_mnt's peer group is +simple @dest_mnt. So we expect this algorithm to yield a copy of the +source mount tree that was mounted on a peer in @dest_mnt's peer group +as the master for the copy of the source mount tree we want to mount at +the first slave @m: + +for (n = m; ; n = p) { + p = n->mnt_master; + if (p == dest_master || IS_MNT_MARKED(p)) + break; +} + +For the first slave we walk the destination propagation tree all the way +up to a peer in @dest_mnt's peer group. IOW, the propagation hierarchy +can be walked by walking up the @mnt->mnt_master hierarchy of the +destination propagation tree @m. We will ultimately find a peer in +@dest_mnt's peer group and thus ultimately @dest_mnt->mnt_master. + +Btw, here the assumption we listed at the beginning becomes important. +Namely, that peers in a peer group pg1 that are slaves in another peer +group pg2 appear on the same ->mnt_slave_list. IOW, all slaves who are +peers in peer group pg1 point to the same peer in peer group pg2 via +their ->mnt_master. Otherwise the termination condition in the code +above would be wrong and next_group() would be broken too. + +So the first iteration sets: + +n = m; +p = n->mnt_master; + +such that @p now points to a peer or @dest_mnt itself. We walk up one +more level since we don't have any marked mounts. So we end up with: + +n = dest_mnt; +p = dest_mnt->mnt_master; + +If @dest_mnt's peer group is not slave to another peer group then @p is +now NULL. If @dest_mnt's peer group is a slave to another peer group +then @p now points to @dest_mnt->mnt_master points which is a master +outside the propagation tree we're dealing with. + +Now we need to figure out the master for the copy of the source mount +tree we're about to create and mount on the first slave of @dest_mnt's +peer group: + +do { + struct mount *parent = last_source->mnt_parent; + if (last_source == first_source) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) + break; + last_source = last_source->mnt_master; +} while (!done); + +We know that @last_source->mnt_parent points to @last_dest and +@last_dest is the last peer in @dest_mnt's peer group we propagated to +in the peer loop in propagate_mnt(). + +Consequently, @last_source is the last copy we created and mount on that +last peer in @dest_mnt's peer group. So @last_source is the master we +want to pick. + +We know that @last_source->mnt_parent->mnt_master points to +@last_dest->mnt_master. We also know that @last_dest->mnt_master is +either NULL or points to a master outside of the destination propagation +tree and so does @p. Hence: + +done = parent->mnt_master == p; + +is trivially true in the base condition. + +We also know that for the first slave mount of @dest_mnt's peer group +that @last_dest either points @dest_mnt itself because it was +initialized to: + +last_dest = dest_mnt; + +at the beginning of propagate_mnt() or it will point to a peer of +@dest_mnt in its peer group. In both cases it is guaranteed that on the +first iteration @n and @parent are peers (Please note the check for +peers here as that's important.): + +if (done && peers(n, parent)) + break; + +So, as we expected, we select @last_source, which referes to the last +copy of the source mount tree we mounted on the last peer in @dest_mnt's +peer group, as the master of the first slave in @dest_mnt's peer group. +The rest is taken care of by clone_mnt(last_source, ...). We'll skip +over that part otherwise this becomes a blogpost. + +At the end of propagate_mnt() we now mark @m->mnt_master as the first +master in the destination propagation tree that is distinct from +@dest_mnt->mnt_master. IOW, we mark @dest_mnt itself as a master. + +By marking @dest_mnt or one of it's peers we are able to easily find it +again when we later lookup masters for other copies of the source mount +tree we mount copies of the source mount tree on slaves @m to +@dest_mnt's peer group. This, in turn allows us to find the master we +selected for the copies of the source mount tree we mounted on master in +the destination propagation tree again. + +The important part is to realize that the code makes use of the fact +that the last copy of the source mount tree stashed in @last_source was +mounted on top of the previous destination propagation node @last_dest. +What this means is that @last_source allows us to walk the destination +propagation hierarchy the same way each destination propagation node @m +does. + +If we take @last_source, which is the copy of @source_mnt we have +mounted on @last_dest in the previous iteration of propagate_one(), then +we know @last_source->mnt_parent points to @last_dest but we also know +that as we walk through the destination propagation tree that +@last_source->mnt_master will point to an earlier copy of the source +mount tree we mounted one an earlier destination propagation node @m. + +IOW, @last_source->mnt_parent will be our hook into the destination +propagation tree and each consecutive @last_source->mnt_master will lead +us to an earlier propagation node @m via +@last_source->mnt_master->mnt_parent. + +Hence, by walking up @last_source->mnt_master, each of which is mounted +on a node that is a master @m in the destination propagation tree we can +also walk up the destination propagation hierarchy. + +So, for each new destination propagation node @m we use the previous +copy of @last_source and the fact it's mounted on the previous +propagation node @last_dest via @last_source->mnt_master->mnt_parent to +determine what the master of the new copy of @last_source needs to be. + +The goal is to find the _closest_ master that the new copy of the source +mount tree we are about to create and mount on a slave @m in the +destination propagation tree needs to pick. IOW, we want to find a +suitable master in the propagation group. + +As the propagation structure of the source mount propagation tree we +create mirrors the propagation structure of the destination propagation +tree we can find @m's closest master - i.e., a marked master - which is +a peer in the closest peer group that @m receives propagation from. We +store that closest master of @m in @p as before and record the slave to +that master in @n + +We then search for this master @p via @last_source by walking up the +master hierarchy starting from the last copy of the source mount tree +stored in @last_source that we created and mounted on the previous +destination propagation node @m. + +We will try to find the master by walking @last_source->mnt_master and +by comparing @last_source->mnt_master->mnt_parent->mnt_master to @p. If +we find @p then we can figure out what earlier copy of the source mount +tree needs to be the master for the new copy of the source mount tree +we're about to create and mount at the current destination propagation +node @m. + +If @last_source->mnt_master->mnt_parent and @n are peers then we know +that the closest master they receive propagation from is +@last_source->mnt_master->mnt_parent->mnt_master. If not then the +closest immediate peer group that they receive propagation from must be +one level higher up. + +This builds on the earlier clarification at the beginning that all peers +in a peer group which are slaves of other peer groups all point to the +same ->mnt_master, i.e., appear on the same ->mnt_slave_list, of the +closest peer group that they receive propagation from. + +However, terminating the walk has corner cases. + +If the closest marked master for a given destination node @m cannot be +found by walking up the master hierarchy via @last_source->mnt_master +then we need to terminate the walk when we encounter @source_mnt again. + +This isn't an arbitrary termination. It simply means that the new copy +of the source mount tree we're about to create has a copy of the source +mount tree we created and mounted on a peer in @dest_mnt's peer group as +its master. IOW, @source_mnt is the peer in the closest peer group that +the new copy of the source mount tree receives propagation from. + +We absolutely have to stop @source_mnt because @last_source->mnt_master +either points outside the propagation hierarchy we're dealing with or it +is NULL because @source_mnt isn't a shared-slave. + +So continuing the walk past @source_mnt would cause a NULL dereference +via @last_source->mnt_master->mnt_parent. And so we have to stop the +walk when we encounter @source_mnt again. + +One scenario where this can happen is when we first handled a series of +slaves of @dest_mnt's peer group and then encounter peers in a new peer +group that is a slave to @dest_mnt's peer group. We handle them and then +we encounter another slave mount to @dest_mnt that is a pure slave to +@dest_mnt's peer group. That pure slave will have a peer in @dest_mnt's +peer group as its master. Consequently, the new copy of the source mount +tree will need to have @source_mnt as it's master. So we walk the +propagation hierarchy all the way up to @source_mnt based on +@last_source->mnt_master. + +So terminate on @source_mnt, easy peasy. Except, that the check misses +something that the rest of the algorithm already handles. + +If @dest_mnt has peers in it's peer group the peer loop in +propagate_mnt(): + +for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) + goto out; +} + +will consecutively update @last_source with each previous copy of the +source mount tree we created and mounted at the previous peer in +@dest_mnt's peer group. So after that loop terminates @last_source will +point to whatever copy of the source mount tree was created and mounted +on the last peer in @dest_mnt's peer group. + +Furthermore, if there is even a single additional peer in @dest_mnt's +peer group then @last_source will __not__ point to @source_mnt anymore. +Because, as we mentioned above, @dest_mnt isn't even handled in this +loop but directly in attach_recursive_mnt(). So it can't even accidently +come last in that peer loop. + +So the first time we handle a slave mount @m of @dest_mnt's peer group +the copy of the source mount tree we create will make the __last copy of +the source mount tree we created and mounted on the last peer in +@dest_mnt's peer group the master of the new copy of the source mount +tree we create and mount on the first slave of @dest_mnt's peer group__. + +But this means that the termination condition that checks for +@source_mnt is wrong. The @source_mnt cannot be found anymore by +propagate_one(). Instead it will find the last copy of the source mount +tree we created and mounted for the last peer of @dest_mnt's peer group +again. And that is a peer of @source_mnt not @source_mnt itself. + +IOW, we fail to terminate the loop correctly and ultimately dereference +@last_source->mnt_master->mnt_parent. When @source_mnt's peer group +isn't slave to another peer group then @last_source->mnt_master is NULL +causing the splat below. + +For example, assume @dest_mnt is a pure shared mount and has three peers +in its peer group: + +=================================================================================== + mount-id mount-parent-id peer-group-id +=================================================================================== +(@dest_mnt) mnt_master[216] 309 297 shared:216 + \ + (@source_mnt) mnt_master[218]: 609 609 shared:218 + +(1) mnt_master[216]: 607 605 shared:216 + \ + (P1) mnt_master[218]: 624 607 shared:218 + +(2) mnt_master[216]: 576 574 shared:216 + \ + (P2) mnt_master[218]: 625 576 shared:218 + +(3) mnt_master[216]: 545 543 shared:216 + \ + (P3) mnt_master[218]: 626 545 shared:218 + +After this sequence has been processed @last_source will point to (P3), +the copy generated for the third peer in @dest_mnt's peer group we +handled. So the copy of the source mount tree (P4) we create and mount +on the first slave of @dest_mnt's peer group: + +=================================================================================== + mount-id mount-parent-id peer-group-id +=================================================================================== + mnt_master[216] 309 297 shared:216 + / + / +(S0) mnt_slave 483 481 master:216 + \ + \ (P3) mnt_master[218] 626 545 shared:218 + \ / + \/ + (P4) mnt_slave 627 483 master:218 + +will pick the last copy of the source mount tree (P3) as master, not (S0). + +When walking the propagation hierarchy via @last_source's master +hierarchy we encounter (P3) but not (S0), i.e., @source_mnt. + +We can fix this in multiple ways: + +(1) By setting @last_source to @source_mnt after we processed the peers + in @dest_mnt's peer group right after the peer loop in + propagate_mnt(). + +(2) By changing the termination condition that relies on finding exactly + @source_mnt to finding a peer of @source_mnt. + +(3) By only moving @last_source when we actually venture into a new peer + group or some clever variant thereof. + +The first two options are minimally invasive and what we want as a fix. +The third option is more intrusive but something we'd like to explore in +the near future. + +This passes all LTP tests and specifically the mount propagation +testsuite part of it. It also holds up against all known reproducers of +this issues. + +Final words. +First, this is a clever but __worringly__ underdocumented algorithm. +There isn't a single detailed comment to be found in next_group(), +propagate_one() or anywhere else in that file for that matter. This has +been a giant pain to understand and work through and a bug like this is +insanely difficult to fix without a detailed understanding of what's +happening. Let's not talk about the amount of time that was sunk into +fixing this. + +Second, all the cool kids with access to +unshare --mount --user --map-root --propagation=unchanged +are going to have a lot of fun. IOW, triggerable by unprivileged users +while namespace_lock() lock is held. + +[ 115.848393] BUG: kernel NULL pointer dereference, address: 0000000000000010 +[ 115.848967] #PF: supervisor read access in kernel mode +[ 115.849386] #PF: error_code(0x0000) - not-present page +[ 115.849803] PGD 0 P4D 0 +[ 115.850012] Oops: 0000 [#1] PREEMPT SMP PTI +[ 115.850354] CPU: 0 PID: 15591 Comm: mount Not tainted 6.1.0-rc7 #3 +[ 115.850851] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS +VirtualBox 12/01/2006 +[ 115.851510] RIP: 0010:propagate_one.part.0+0x7f/0x1a0 +[ 115.851924] Code: 75 eb 4c 8b 05 c2 25 37 02 4c 89 ca 48 8b 4a 10 +49 39 d0 74 1e 48 3b 81 e0 00 00 00 74 26 48 8b 92 e0 00 00 00 be 01 +00 00 00 <48> 8b 4a 10 49 39 d0 75 e2 40 84 f6 74 38 4c 89 05 84 25 37 +02 4d +[ 115.853441] RSP: 0018:ffffb8d5443d7d50 EFLAGS: 00010282 +[ 115.853865] RAX: ffff8e4d87c41c80 RBX: ffff8e4d88ded780 RCX: ffff8e4da4333a00 +[ 115.854458] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8e4d88ded780 +[ 115.855044] RBP: ffff8e4d88ded780 R08: ffff8e4da4338000 R09: ffff8e4da43388c0 +[ 115.855693] R10: 0000000000000002 R11: ffffb8d540158000 R12: ffffb8d5443d7da8 +[ 115.856304] R13: ffff8e4d88ded780 R14: 0000000000000000 R15: 0000000000000000 +[ 115.856859] FS: 00007f92c90c9800(0000) GS:ffff8e4dfdc00000(0000) +knlGS:0000000000000000 +[ 115.857531] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 115.858006] CR2: 0000000000000010 CR3: 0000000022f4c002 CR4: 00000000000706f0 +[ 115.858598] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 115.859393] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 115.860099] Call Trace: +[ 115.860358] +[ 115.860535] propagate_mnt+0x14d/0x190 +[ 115.860848] attach_recursive_mnt+0x274/0x3e0 +[ 115.861212] path_mount+0x8c8/0xa60 +[ 115.861503] __x64_sys_mount+0xf6/0x140 +[ 115.861819] do_syscall_64+0x5b/0x80 +[ 115.862117] ? do_faccessat+0x123/0x250 +[ 115.862435] ? syscall_exit_to_user_mode+0x17/0x40 +[ 115.862826] ? do_syscall_64+0x67/0x80 +[ 115.863133] ? syscall_exit_to_user_mode+0x17/0x40 +[ 115.863527] ? do_syscall_64+0x67/0x80 +[ 115.863835] ? do_syscall_64+0x67/0x80 +[ 115.864144] ? do_syscall_64+0x67/0x80 +[ 115.864452] ? exc_page_fault+0x70/0x170 +[ 115.864775] entry_SYSCALL_64_after_hwframe+0x63/0xcd +[ 115.865187] RIP: 0033:0x7f92c92b0ebe +[ 115.865480] Code: 48 8b 0d 75 4f 0c 00 f7 d8 64 89 01 48 83 c8 ff +c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 +00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 42 4f 0c 00 f7 d8 64 89 +01 48 +[ 115.866984] RSP: 002b:00007fff000aa728 EFLAGS: 00000246 ORIG_RAX: +00000000000000a5 +[ 115.867607] RAX: ffffffffffffffda RBX: 000055a77888d6b0 RCX: 00007f92c92b0ebe +[ 115.868240] RDX: 000055a77888d8e0 RSI: 000055a77888e6e0 RDI: 000055a77888e620 +[ 115.868823] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 +[ 115.869403] R10: 0000000000001000 R11: 0000000000000246 R12: 000055a77888e620 +[ 115.869994] R13: 000055a77888d8e0 R14: 00000000ffffffff R15: 00007f92c93e4076 +[ 115.870581] +[ 115.870763] Modules linked in: nft_fib_inet nft_fib_ipv4 +nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 +nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 +nf_defrag_ipv4 ip_set rfkill nf_tables nfnetlink qrtr snd_intel8x0 +sunrpc snd_ac97_codec ac97_bus snd_pcm snd_timer intel_rapl_msr +intel_rapl_common snd vboxguest intel_powerclamp video rapl joydev +soundcore i2c_piix4 wmi fuse zram xfs vmwgfx crct10dif_pclmul +crc32_pclmul crc32c_intel polyval_clmulni polyval_generic +drm_ttm_helper ttm e1000 ghash_clmulni_intel serio_raw ata_generic +pata_acpi scsi_dh_rdac scsi_dh_emc scsi_dh_alua dm_multipath +[ 115.875288] CR2: 0000000000000010 +[ 115.875641] ---[ end trace 0000000000000000 ]--- +[ 115.876135] RIP: 0010:propagate_one.part.0+0x7f/0x1a0 +[ 115.876551] Code: 75 eb 4c 8b 05 c2 25 37 02 4c 89 ca 48 8b 4a 10 +49 39 d0 74 1e 48 3b 81 e0 00 00 00 74 26 48 8b 92 e0 00 00 00 be 01 +00 00 00 <48> 8b 4a 10 49 39 d0 75 e2 40 84 f6 74 38 4c 89 05 84 25 37 +02 4d +[ 115.878086] RSP: 0018:ffffb8d5443d7d50 EFLAGS: 00010282 +[ 115.878511] RAX: ffff8e4d87c41c80 RBX: ffff8e4d88ded780 RCX: ffff8e4da4333a00 +[ 115.879128] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8e4d88ded780 +[ 115.879715] RBP: ffff8e4d88ded780 R08: ffff8e4da4338000 R09: ffff8e4da43388c0 +[ 115.880359] R10: 0000000000000002 R11: ffffb8d540158000 R12: ffffb8d5443d7da8 +[ 115.880962] R13: ffff8e4d88ded780 R14: 0000000000000000 R15: 0000000000000000 +[ 115.881548] FS: 00007f92c90c9800(0000) GS:ffff8e4dfdc00000(0000) +knlGS:0000000000000000 +[ 115.882234] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 115.882713] CR2: 0000000000000010 CR3: 0000000022f4c002 CR4: 00000000000706f0 +[ 115.883314] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 115.883966] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: f2ebb3a921c1 ("smarter propagate_mnt()") +Fixes: 5ec0811d3037 ("propogate_mnt: Handle the first propogated copy being a slave") +Cc: +Reported-by: Ditang Chen +Signed-off-by: Seth Forshee (Digital Ocean) +Signed-off-by: Christian Brauner (Microsoft) +Signed-off-by: Greg Kroah-Hartman +--- +If there are no big objections I'll get this to Linus rather sooner than later. +--- + fs/pnode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -244,7 +244,7 @@ static int propagate_one(struct mount *m + } + do { + struct mount *parent = last_source->mnt_parent; +- if (last_source == first_source) ++ if (peers(last_source, first_source)) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) diff --git a/queue-5.10/series b/queue-5.10/series index 4ea5e3bc002..054d328a242 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -584,3 +584,12 @@ hid-multitouch-fix-asus-expertbook-p2-p2451fa-trackp.patch hid-plantronics-additional-pids-for-double-volume-ke.patch pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch +binfmt-fix-error-return-code-in-load_elf_fdpic_binary.patch +ovl-use-ovl-mounter-s-fsuid-and-fsgid-in-ovl_link.patch +alsa-line6-correct-midi-status-byte-when-receiving-data-from-podxt.patch +alsa-line6-fix-stack-overflow-in-line6_midi_transmit.patch +pnode-terminate-at-peers-of-source.patch +md-fix-a-crash-in-mempool_free.patch +mm-compaction-fix-fast_isolate_around-to-stay-within-boundaries.patch +f2fs-should-put-a-page-when-checking-the-summary-info.patch +mmc-vub300-fix-warning-do-not-call-blocking-ops-when-task_running.patch