From: Greg Kroah-Hartman Date: Mon, 26 Jul 2021 08:53:55 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v4.4.277~42 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6d44859a71b73070beac7a7fd1d57350b8dba696;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: btrfs-check-for-missing-device-in-btrfs_trim_fs.patch bus-mhi-core-validate-channel-id-when-processing-command-completions.patch firmware-efi-tell-memblock-about-efi-iomem-reservations.patch io_uring-explicitly-count-entries-for-poll-reqs.patch io_uring-remove-double-poll-entry-on-arm-failure.patch ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch posix-cpu-timers-fix-rearm-racing-against-process-tick.patch selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch tracing-histogram-rename-cpu-to-common_cpu.patch tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch userfaultfd-do-not-untag-user-pointers.patch --- diff --git a/queue-5.10/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch b/queue-5.10/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch new file mode 100644 index 00000000000..47f84df51a2 --- /dev/null +++ b/queue-5.10/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch @@ -0,0 +1,80 @@ +From 16a200f66ede3f9afa2e51d90ade017aaa18d213 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Sun, 4 Jul 2021 19:14:39 +0800 +Subject: btrfs: check for missing device in btrfs_trim_fs + +From: Anand Jain + +commit 16a200f66ede3f9afa2e51d90ade017aaa18d213 upstream. + +A fstrim on a degraded raid1 can trigger the following null pointer +dereference: + + BTRFS info (device loop0): allowing degraded mounts + BTRFS info (device loop0): disk space caching is enabled + BTRFS info (device loop0): has skinny extents + BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing + BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing + BTRFS info (device loop0): enabling ssd optimizations + BUG: kernel NULL pointer dereference, address: 0000000000000620 + PGD 0 P4D 0 + Oops: 0000 [#1] SMP NOPTI + CPU: 0 PID: 4574 Comm: fstrim Not tainted 5.13.0-rc7+ #31 + Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 + RIP: 0010:btrfs_trim_fs+0x199/0x4a0 [btrfs] + RSP: 0018:ffff959541797d28 EFLAGS: 00010293 + RAX: 0000000000000000 RBX: ffff946f84eca508 RCX: a7a67937adff8608 + RDX: ffff946e8122d000 RSI: 0000000000000000 RDI: ffffffffc02fdbf0 + RBP: ffff946ea4615000 R08: 0000000000000001 R09: 0000000000000000 + R10: 0000000000000000 R11: ffff946e8122d960 R12: 0000000000000000 + R13: ffff959541797db8 R14: ffff946e8122d000 R15: ffff959541797db8 + FS: 00007f55917a5080(0000) GS:ffff946f9bc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000620 CR3: 000000002d2c8001 CR4: 00000000000706f0 + Call Trace: + btrfs_ioctl_fitrim+0x167/0x260 [btrfs] + btrfs_ioctl+0x1c00/0x2fe0 [btrfs] + ? selinux_file_ioctl+0x140/0x240 + ? syscall_trace_enter.constprop.0+0x188/0x240 + ? __x64_sys_ioctl+0x83/0xb0 + __x64_sys_ioctl+0x83/0xb0 + +Reproducer: + + $ mkfs.btrfs -fq -d raid1 -m raid1 /dev/loop0 /dev/loop1 + $ mount /dev/loop0 /btrfs + $ umount /btrfs + $ btrfs dev scan --forget + $ mount -o degraded /dev/loop0 /btrfs + + $ fstrim /btrfs + +The reason is we call btrfs_trim_free_extents() for the missing device, +which uses device->bdev (NULL for missing device) to find if the device +supports discard. + +Fix is to check if the device is missing before calling +btrfs_trim_free_extents(). + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -5883,6 +5883,9 @@ int btrfs_trim_fs(struct btrfs_fs_info * + mutex_lock(&fs_info->fs_devices->device_list_mutex); + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ++ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) ++ continue; ++ + ret = btrfs_trim_free_extents(device, &group_trimmed); + if (ret) { + dev_failed++; diff --git a/queue-5.10/bus-mhi-core-validate-channel-id-when-processing-command-completions.patch b/queue-5.10/bus-mhi-core-validate-channel-id-when-processing-command-completions.patch new file mode 100644 index 00000000000..d1dc9eb1769 --- /dev/null +++ b/queue-5.10/bus-mhi-core-validate-channel-id-when-processing-command-completions.patch @@ -0,0 +1,56 @@ +From 546362a9ef2ef40b57c6605f14e88ced507f8dd0 Mon Sep 17 00:00:00 2001 +From: Bhaumik Bhatt +Date: Fri, 16 Jul 2021 13:21:05 +0530 +Subject: bus: mhi: core: Validate channel ID when processing command completions + +From: Bhaumik Bhatt + +commit 546362a9ef2ef40b57c6605f14e88ced507f8dd0 upstream. + +MHI reads the channel ID from the event ring element sent by the +device which can be any value between 0 and 255. In order to +prevent any out of bound accesses, add a check against the maximum +number of channels supported by the controller and those channels +not configured yet so as to skip processing of that event ring +element. + +Link: https://lore.kernel.org/r/1624558141-11045-1-git-send-email-bbhatt@codeaurora.org +Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device") +Cc: stable@vger.kernel.org #5.10 +Reviewed-by: Hemant Kumar +Reviewed-by: Manivannan Sadhasivam +Reviewed-by: Jeffrey Hugo +Signed-off-by: Bhaumik Bhatt +Signed-off-by: Manivannan Sadhasivam +Link: https://lore.kernel.org/r/20210716075106.49938-3-manivannan.sadhasivam@linaro.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bus/mhi/core/main.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/bus/mhi/core/main.c ++++ b/drivers/bus/mhi/core/main.c +@@ -706,11 +706,18 @@ static void mhi_process_cmd_completion(s + cmd_pkt = mhi_to_virtual(mhi_ring, ptr); + + chan = MHI_TRE_GET_CMD_CHID(cmd_pkt); +- mhi_chan = &mhi_cntrl->mhi_chan[chan]; +- write_lock_bh(&mhi_chan->lock); +- mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); +- complete(&mhi_chan->completion); +- write_unlock_bh(&mhi_chan->lock); ++ ++ if (chan < mhi_cntrl->max_chan && ++ mhi_cntrl->mhi_chan[chan].configured) { ++ mhi_chan = &mhi_cntrl->mhi_chan[chan]; ++ write_lock_bh(&mhi_chan->lock); ++ mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); ++ complete(&mhi_chan->completion); ++ write_unlock_bh(&mhi_chan->lock); ++ } else { ++ dev_err(&mhi_cntrl->mhi_dev->dev, ++ "Completion packet for invalid channel ID: %d\n", chan); ++ } + + mhi_del_ring_element(mhi_cntrl, mhi_ring); + } diff --git a/queue-5.10/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch b/queue-5.10/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch new file mode 100644 index 00000000000..907fee2ea65 --- /dev/null +++ b/queue-5.10/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch @@ -0,0 +1,66 @@ +From 2bab693a608bdf614b9fcd44083c5100f34b9f77 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 13 Jul 2021 19:43:26 +0100 +Subject: firmware/efi: Tell memblock about EFI iomem reservations + +From: Marc Zyngier + +commit 2bab693a608bdf614b9fcd44083c5100f34b9f77 upstream. + +kexec_load_file() relies on the memblock infrastructure to avoid +stamping over regions of memory that are essential to the survival +of the system. + +However, nobody seems to agree how to flag these regions as reserved, +and (for example) EFI only publishes its reservations in /proc/iomem +for the benefit of the traditional, userspace based kexec tool. + +On arm64 platforms with GICv3, this can result in the payload being +placed at the location of the LPI tables. Shock, horror! + +Let's augment the EFI reservation code with a memblock_reserve() call, +protecting our dear tables from the secondary kernel invasion. + +Reported-by: Moritz Fischer +Tested-by: Moritz Fischer +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Cc: Ard Biesheuvel +Cc: James Morse +Cc: Catalin Marinas +Cc: Will Deacon +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/efi.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -896,6 +896,7 @@ static int __init efi_memreserve_map_roo + static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) + { + struct resource *res, *parent; ++ int ret; + + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) +@@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_ad + + /* we expect a conflict with a 'System RAM' region */ + parent = request_resource_conflict(&iomem_resource, res); +- return parent ? request_resource(parent, res) : 0; ++ ret = parent ? request_resource(parent, res) : 0; ++ ++ /* ++ * Given that efi_mem_reserve_iomem() can be called at any ++ * time, only call memblock_reserve() if the architecture ++ * keeps the infrastructure around. ++ */ ++ if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret) ++ memblock_reserve(addr, size); ++ ++ return ret; + } + + int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) diff --git a/queue-5.10/io_uring-explicitly-count-entries-for-poll-reqs.patch b/queue-5.10/io_uring-explicitly-count-entries-for-poll-reqs.patch new file mode 100644 index 00000000000..cba0ad700fe --- /dev/null +++ b/queue-5.10/io_uring-explicitly-count-entries-for-poll-reqs.patch @@ -0,0 +1,74 @@ +From 68b11e8b1562986c134764433af64e97d30c9fc0 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 20 Jul 2021 10:50:43 +0100 +Subject: io_uring: explicitly count entries for poll reqs + +From: Pavel Begunkov + +commit 68b11e8b1562986c134764433af64e97d30c9fc0 upstream. + +If __io_queue_proc() fails to add a second poll entry, e.g. kmalloc() +failed, but it goes on with a third waitqueue, it may succeed and +overwrite the error status. Count the number of poll entries we added, +so we can set pt->error to zero at the beginning and find out when the +mentioned scenario happens. + +Cc: stable@vger.kernel.org +Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/9d6b9e561f88bcc0163623b74a76c39f712151c3.1626774457.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -4916,6 +4916,7 @@ static int io_connect(struct io_kiocb *r + struct io_poll_table { + struct poll_table_struct pt; + struct io_kiocb *req; ++ int nr_entries; + int error; + }; + +@@ -5098,11 +5099,11 @@ static void __io_queue_proc(struct io_po + struct io_kiocb *req = pt->req; + + /* +- * If poll->head is already set, it's because the file being polled +- * uses multiple waitqueues for poll handling (eg one for read, one +- * for write). Setup a separate io_poll_iocb if this happens. ++ * The file being polled uses multiple waitqueues for poll handling ++ * (e.g. one for read, one for write). Setup a separate io_poll_iocb ++ * if this happens. + */ +- if (unlikely(poll->head)) { ++ if (unlikely(pt->nr_entries)) { + struct io_poll_iocb *poll_one = poll; + + /* already have a 2nd entry, fail a third attempt */ +@@ -5124,7 +5125,7 @@ static void __io_queue_proc(struct io_po + *poll_ptr = poll; + } + +- pt->error = 0; ++ pt->nr_entries++; + poll->head = head; + + if (poll->events & EPOLLEXCLUSIVE) +@@ -5210,9 +5211,12 @@ static __poll_t __io_arm_poll_handler(st + + ipt->pt._key = mask; + ipt->req = req; +- ipt->error = -EINVAL; ++ ipt->error = 0; ++ ipt->nr_entries = 0; + + mask = vfs_poll(req->file, &ipt->pt) & poll->events; ++ if (unlikely(!ipt->nr_entries) && !ipt->error) ++ ipt->error = -EINVAL; + + spin_lock_irq(&ctx->completion_lock); + if (likely(poll->head)) { diff --git a/queue-5.10/io_uring-remove-double-poll-entry-on-arm-failure.patch b/queue-5.10/io_uring-remove-double-poll-entry-on-arm-failure.patch new file mode 100644 index 00000000000..e2c3d78a9f1 --- /dev/null +++ b/queue-5.10/io_uring-remove-double-poll-entry-on-arm-failure.patch @@ -0,0 +1,46 @@ +From 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 20 Jul 2021 10:50:44 +0100 +Subject: io_uring: remove double poll entry on arm failure + +From: Pavel Begunkov + +commit 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e upstream. + +__io_queue_proc() can enqueue both poll entries and still fail +afterwards, so the callers trying to cancel it should also try to remove +the second poll entry (if any). + +For example, it may leave the request alive referencing a io_uring +context but not accessible for cancellation: + +[ 282.599913][ T1620] task:iou-sqp-23145 state:D stack:28720 pid:23155 ppid: 8844 flags:0x00004004 +[ 282.609927][ T1620] Call Trace: +[ 282.613711][ T1620] __schedule+0x93a/0x26f0 +[ 282.634647][ T1620] schedule+0xd3/0x270 +[ 282.638874][ T1620] io_uring_cancel_generic+0x54d/0x890 +[ 282.660346][ T1620] io_sq_thread+0xaac/0x1250 +[ 282.696394][ T1620] ret_from_fork+0x1f/0x30 + +Cc: stable@vger.kernel.org +Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") +Reported-and-tested-by: syzbot+ac957324022b7132accf@syzkaller.appspotmail.com +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/0ec1228fc5eda4cb524eeda857da8efdc43c331c.1626774457.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -5219,6 +5219,8 @@ static __poll_t __io_arm_poll_handler(st + ipt->error = -EINVAL; + + spin_lock_irq(&ctx->completion_lock); ++ if (ipt->error) ++ io_poll_remove_double(req); + if (likely(poll->head)) { + spin_lock(&poll->head->lock); + if (unlikely(list_empty(&poll->wait.entry))) { diff --git a/queue-5.10/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch b/queue-5.10/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch new file mode 100644 index 00000000000..2c8104c28fe --- /dev/null +++ b/queue-5.10/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch @@ -0,0 +1,55 @@ +From 09cfae9f13d51700b0fecf591dcd658fc5375428 Mon Sep 17 00:00:00 2001 +From: Markus Boehme +Date: Tue, 20 Jul 2021 16:26:19 -0700 +Subject: ixgbe: Fix packet corruption due to missing DMA sync + +From: Markus Boehme + +commit 09cfae9f13d51700b0fecf591dcd658fc5375428 upstream. + +When receiving a packet with multiple fragments, hardware may still +touch the first fragment until the entire packet has been received. The +driver therefore keeps the first fragment mapped for DMA until end of +packet has been asserted, and delays its dma_sync call until then. + +The driver tries to fit multiple receive buffers on one page. When using +3K receive buffers (e.g. using Jumbo frames and legacy-rx is turned +off/build_skb is being used) on an architecture with 4K pages, the +driver allocates an order 1 compound page and uses one page per receive +buffer. To determine the correct offset for a delayed DMA sync of the +first fragment of a multi-fragment packet, the driver then cannot just +use PAGE_MASK on the DMA address but has to construct a mask based on +the actual size of the backing page. + +Using PAGE_MASK in the 3K RX buffer/4K page architecture configuration +will always sync the first page of a compound page. With the SWIOTLB +enabled this can lead to corrupted packets (zeroed out first fragment, +re-used garbage from another packet) and various consequences, such as +slow/stalling data transfers and connection resets. For example, testing +on a link with MTU exceeding 3058 bytes on a host with SWIOTLB enabled +(e.g. "iommu=soft swiotlb=262144,force") TCP transfers quickly fizzle +out without this patch. + +Cc: stable@vger.kernel.org +Fixes: 0c5661ecc5dd7 ("ixgbe: fix crash in build_skb Rx code path") +Signed-off-by: Markus Boehme +Tested-by: Tony Brelinski +Signed-off-by: Tony Nguyen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct i + struct sk_buff *skb) + { + if (ring_uses_build_skb(rx_ring)) { +- unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; ++ unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; ++ unsigned long offset = (unsigned long)(skb->data) & mask; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, diff --git a/queue-5.10/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch b/queue-5.10/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch new file mode 100644 index 00000000000..d233006b8b4 --- /dev/null +++ b/queue-5.10/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch @@ -0,0 +1,82 @@ +From 8d4abca95ecc82fc8c41912fa0085281f19cc29f Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Mon, 19 Apr 2021 18:43:32 -0500 +Subject: media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf() + +From: Gustavo A. R. Silva + +commit 8d4abca95ecc82fc8c41912fa0085281f19cc29f upstream. + +Fix an 11-year old bug in ngene_command_config_free_buf() while +addressing the following warnings caught with -Warray-bounds: + +arch/alpha/include/asm/string.h:22:16: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] +arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] + +The problem is that the original code is trying to copy 6 bytes of +data into a one-byte size member _config_ of the wrong structue +FW_CONFIGURE_BUFFERS, in a single call to memcpy(). This causes a +legitimate compiler warning because memcpy() overruns the length +of &com.cmd.ConfigureBuffers.config. It seems that the right +structure is FW_CONFIGURE_FREE_BUFFERS, instead, because it contains +6 more members apart from the header _hdr_. Also, the name of +the function ngene_command_config_free_buf() suggests that the actual +intention is to ConfigureFreeBuffers, instead of ConfigureBuffers +(which takes place in the function ngene_command_config_buf(), above). + +Fix this by enclosing those 6 members of struct FW_CONFIGURE_FREE_BUFFERS +into new struct config, and use &com.cmd.ConfigureFreeBuffers.config as +the destination address, instead of &com.cmd.ConfigureBuffers.config, +when calling memcpy(). + +This also helps with the ongoing efforts to globally enable +-Warray-bounds and get us closer to being able to tighten the +FORTIFY_SOURCE routines on memcpy(). + +Link: https://github.com/KSPP/linux/issues/109 +Fixes: dae52d009fc9 ("V4L/DVB: ngene: Initial check-in") +Cc: stable@vger.kernel.org +Reported-by: kernel test robot +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Link: https://lore.kernel.org/linux-hardening/20210420001631.GA45456@embeddedor/ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/pci/ngene/ngene-core.c | 2 +- + drivers/media/pci/ngene/ngene.h | 14 ++++++++------ + 2 files changed, 9 insertions(+), 7 deletions(-) + +--- a/drivers/media/pci/ngene/ngene-core.c ++++ b/drivers/media/pci/ngene/ngene-core.c +@@ -385,7 +385,7 @@ static int ngene_command_config_free_buf + + com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER; + com.cmd.hdr.Length = 6; +- memcpy(&com.cmd.ConfigureBuffers.config, config, 6); ++ memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6); + com.in_len = 6; + com.out_len = 0; + +--- a/drivers/media/pci/ngene/ngene.h ++++ b/drivers/media/pci/ngene/ngene.h +@@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS { + + struct FW_CONFIGURE_FREE_BUFFERS { + struct FW_HEADER hdr; +- u8 UVI1_BufferLength; +- u8 UVI2_BufferLength; +- u8 TVO_BufferLength; +- u8 AUD1_BufferLength; +- u8 AUD2_BufferLength; +- u8 TVA_BufferLength; ++ struct { ++ u8 UVI1_BufferLength; ++ u8 UVI2_BufferLength; ++ u8 TVO_BufferLength; ++ u8 AUD1_BufferLength; ++ u8 AUD2_BufferLength; ++ u8 TVA_BufferLength; ++ } __packed config; + } __attribute__ ((__packed__)); + + struct FW_CONFIGURE_UART { diff --git a/queue-5.10/posix-cpu-timers-fix-rearm-racing-against-process-tick.patch b/queue-5.10/posix-cpu-timers-fix-rearm-racing-against-process-tick.patch new file mode 100644 index 00000000000..85675c183aa --- /dev/null +++ b/queue-5.10/posix-cpu-timers-fix-rearm-racing-against-process-tick.patch @@ -0,0 +1,73 @@ +From 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Thu, 3 Jun 2021 01:15:59 +0200 +Subject: posix-cpu-timers: Fix rearm racing against process tick + +From: Frederic Weisbecker + +commit 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 upstream. + +Since the process wide cputime counter is started locklessly from +posix_cpu_timer_rearm(), it can be concurrently stopped by operations +on other timers from the same thread group, such as in the following +unlucky scenario: + + CPU 0 CPU 1 + ----- ----- + timer_settime(TIMER B) + posix_cpu_timer_rearm(TIMER A) + cpu_clock_sample_group() + (pct->timers_active already true) + + handle_posix_cpu_timers() + check_process_timers() + stop_process_timers() + pct->timers_active = false + arm_timer(TIMER A) + + tick -> run_posix_cpu_timers() + // sees !pct->timers_active, ignore + // our TIMER A + +Fix this with simply locking process wide cputime counting start and +timer arm in the same block. + +Acked-by: Peter Zijlstra (Intel) +Signed-off-by: Frederic Weisbecker +Fixes: 60f2ceaa8111 ("posix-cpu-timers: Remove unnecessary locking around cpu_clock_sample_group") +Cc: stable@vger.kernel.org +Cc: Oleg Nesterov +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Eric W. Biederman +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/posix-cpu-timers.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct + if (!p) + goto out; + ++ /* Protect timer list r/w in arm_timer() */ ++ sighand = lock_task_sighand(p, &flags); ++ if (unlikely(sighand == NULL)) ++ goto out; ++ + /* + * Fetch the current sample and update the timer's expiry time. + */ +@@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct + + bump_cpu_timer(timer, now); + +- /* Protect timer list r/w in arm_timer() */ +- sighand = lock_task_sighand(p, &flags); +- if (unlikely(sighand == NULL)) +- goto out; +- + /* + * Now re-arm for the new expiry time. + */ diff --git a/queue-5.10/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch b/queue-5.10/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch new file mode 100644 index 00000000000..b802709a77e --- /dev/null +++ b/queue-5.10/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch @@ -0,0 +1,56 @@ +From 0db282ba2c12c1515d490d14a1ff696643ab0f1b Mon Sep 17 00:00:00 2001 +From: Peter Collingbourne +Date: Fri, 23 Jul 2021 15:50:04 -0700 +Subject: selftest: use mmap instead of posix_memalign to allocate memory + +From: Peter Collingbourne + +commit 0db282ba2c12c1515d490d14a1ff696643ab0f1b upstream. + +This test passes pointers obtained from anon_allocate_area to the +userfaultfd and mremap APIs. This causes a problem if the system +allocator returns tagged pointers because with the tagged address ABI +the kernel rejects tagged addresses passed to these APIs, which would +end up causing the test to fail. To make this test compatible with such +system allocators, stop using the system allocator to allocate memory in +anon_allocate_area, and instead just use mmap. + +Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com +Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241 +Fixes: c47174fc362a ("userfaultfd: selftest") +Co-developed-by: Lokesh Gidra +Signed-off-by: Lokesh Gidra +Signed-off-by: Peter Collingbourne +Reviewed-by: Catalin Marinas +Cc: Vincenzo Frascino +Cc: Dave Martin +Cc: Will Deacon +Cc: Andrea Arcangeli +Cc: Alistair Delva +Cc: William McVicker +Cc: Evgenii Stepanov +Cc: Mitch Phillips +Cc: Andrey Konovalov +Cc: [5.4] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/vm/userfaultfd.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/vm/userfaultfd.c ++++ b/tools/testing/selftests/vm/userfaultfd.c +@@ -180,8 +180,10 @@ static int anon_release_pages(char *rel_ + + static void anon_allocate_area(void **alloc_area) + { +- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { +- fprintf(stderr, "out of memory\n"); ++ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, ++ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ++ if (*alloc_area == MAP_FAILED) ++ fprintf(stderr, "mmap of anonymous memory failed"); + *alloc_area = NULL; + } + } diff --git a/queue-5.10/series b/queue-5.10/series index 63f9b819c8d..ccf5c14b826 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -132,3 +132,17 @@ usb-gadget-fix-unbalanced-pm_runtime_enable-in-tegra_xudc_probe.patch usb-dwc2-gadget-fix-goutnak-flow-for-slave-mode.patch usb-dwc2-gadget-fix-sending-zero-length-packet-in-ddma-mode.patch usb-typec-stusb160x-register-role-switch-before-interrupt-registration.patch +firmware-efi-tell-memblock-about-efi-iomem-reservations.patch +tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch +tracing-histogram-rename-cpu-to-common_cpu.patch +tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch +tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch +btrfs-check-for-missing-device-in-btrfs_trim_fs.patch +media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch +ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch +bus-mhi-core-validate-channel-id-when-processing-command-completions.patch +posix-cpu-timers-fix-rearm-racing-against-process-tick.patch +selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch +io_uring-explicitly-count-entries-for-poll-reqs.patch +io_uring-remove-double-poll-entry-on-arm-failure.patch +userfaultfd-do-not-untag-user-pointers.patch diff --git a/queue-5.10/tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch b/queue-5.10/tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch new file mode 100644 index 00000000000..f6c364603f3 --- /dev/null +++ b/queue-5.10/tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch @@ -0,0 +1,120 @@ +From 352384d5c84ebe40fa77098cc234fe173247d8ef Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Thu, 22 Jul 2021 21:52:18 -0400 +Subject: tracepoints: Update static_call before tp_funcs when adding a tracepoint + +From: Steven Rostedt (VMware) + +commit 352384d5c84ebe40fa77098cc234fe173247d8ef upstream. + +Because of the significant overhead that retpolines pose on indirect +calls, the tracepoint code was updated to use the new "static_calls" that +can modify the running code to directly call a function instead of using +an indirect caller, and this function can be changed at runtime. + +In the tracepoint code that calls all the registered callbacks that are +attached to a tracepoint, the following is done: + + it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); + if (it_func_ptr) { + __data = (it_func_ptr)->data; + static_call(tp_func_##name)(__data, args); + } + +If there's just a single callback, the static_call is updated to just call +that callback directly. Once another handler is added, then the static +caller is updated to call the iterator, that simply loops over all the +funcs in the array and calls each of the callbacks like the old method +using indirect calling. + +The issue was discovered with a race between updating the funcs array and +updating the static_call. The funcs array was updated first and then the +static_call was updated. This is not an issue as long as the first element +in the old array is the same as the first element in the new array. But +that assumption is incorrect, because callbacks also have a priority +field, and if there's a callback added that has a higher priority than the +callback on the old array, then it will become the first callback in the +new array. This means that it is possible to call the old callback with +the new callback data element, which can cause a kernel panic. + + static_call = callback1() + funcs[] = {callback1,data1}; + callback2 has higher priority than callback1 + + CPU 1 CPU 2 + ----- ----- + + new_funcs = {callback2,data2}, + {callback1,data1} + + rcu_assign_pointer(tp->funcs, new_funcs); + + /* + * Now tp->funcs has the new array + * but the static_call still calls callback1 + */ + + it_func_ptr = tp->funcs [ new_funcs ] + data = it_func_ptr->data [ data2 ] + static_call(callback1, data); + + /* Now callback1 is called with + * callback2's data */ + + [ KERNEL PANIC ] + + update_static_call(iterator); + +To prevent this from happening, always switch the static_call to the +iterator before assigning the tp->funcs to the new array. The iterator will +always properly match the callback with its data. + +To trigger this bug: + + In one terminal: + + while :; do hackbench 50; done + + In another terminal + + echo 1 > /sys/kernel/tracing/events/sched/sched_waking/enable + while :; do + echo 1 > /sys/kernel/tracing/set_event_pid; + sleep 0.5 + echo 0 > /sys/kernel/tracing/set_event_pid; + sleep 0.5 + done + +And it doesn't take long to crash. This is because the set_event_pid adds +a callback to the sched_waking tracepoint with a high priority, which will +be called before the sched_waking trace event callback is called. + +Note, the removal to a single callback updates the array first, before +changing the static_call to single callback, which is the proper order as +the first element in the array is the same as what the static_call is +being changed to. + +Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ + +Cc: stable@vger.kernel.org +Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") +Reported-by: Stefan Metzmacher +tested-by: Stefan Metzmacher +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/tracepoint.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/tracepoint.c ++++ b/kernel/tracepoint.c +@@ -320,8 +320,8 @@ static int tracepoint_add_func(struct tr + * a pointer to it. This array is referenced by __DO_TRACE from + * include/linux/tracepoint.h using rcu_dereference_sched(). + */ +- rcu_assign_pointer(tp->funcs, tp_funcs); + tracepoint_update_call(tp, tp_funcs, false); ++ rcu_assign_pointer(tp->funcs, tp_funcs); + static_key_enable(&tp->key); + + release_probes(old); diff --git a/queue-5.10/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch b/queue-5.10/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch new file mode 100644 index 00000000000..eeeaf7e70ae --- /dev/null +++ b/queue-5.10/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch @@ -0,0 +1,102 @@ +From 67f0d6d9883c13174669f88adac4f0ee656cc16a Mon Sep 17 00:00:00 2001 +From: Haoran Luo +Date: Wed, 21 Jul 2021 14:12:07 +0000 +Subject: tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop. + +From: Haoran Luo + +commit 67f0d6d9883c13174669f88adac4f0ee656cc16a upstream. + +The "rb_per_cpu_empty()" misinterpret the condition (as not-empty) when +"head_page" and "commit_page" of "struct ring_buffer_per_cpu" points to +the same buffer page, whose "buffer_data_page" is empty and "read" field +is non-zero. + +An error scenario could be constructed as followed (kernel perspective): + +1. All pages in the buffer has been accessed by reader(s) so that all of +them will have non-zero "read" field. + +2. Read and clear all buffer pages so that "rb_num_of_entries()" will +return 0 rendering there's no more data to read. It is also required +that the "read_page", "commit_page" and "tail_page" points to the same +page, while "head_page" is the next page of them. + +3. Invoke "ring_buffer_lock_reserve()" with large enough "length" +so that it shot pass the end of current tail buffer page. Now the +"head_page", "commit_page" and "tail_page" points to the same page. + +4. Discard current event with "ring_buffer_discard_commit()", so that +"head_page", "commit_page" and "tail_page" points to a page whose buffer +data page is now empty. + +When the error scenario has been constructed, "tracing_read_pipe" will +be trapped inside a deadloop: "trace_empty()" returns 0 since +"rb_per_cpu_empty()" returns 0 when it hits the CPU containing such +constructed ring buffer. Then "trace_find_next_entry_inc()" always +return NULL since "rb_num_of_entries()" reports there's no more entry +to read. Finally "trace_seq_to_user()" returns "-EBUSY" spanking +"tracing_read_pipe" back to the start of the "waitagain" loop. + +I've also written a proof-of-concept script to construct the scenario +and trigger the bug automatically, you can use it to trace and validate +my reasoning above: + + https://github.com/aegistudio/RingBufferDetonator.git + +Tests has been carried out on linux kernel 5.14-rc2 +(2734d6c1b1a089fb593ef6a23d4b70903526fe0c), my fixed version +of kernel (for testing whether my update fixes the bug) and +some older kernels (for range of affected kernels). Test result is +also attached to the proof-of-concept repository. + +Link: https://lore.kernel.org/linux-trace-devel/YPaNxsIlb2yjSi5Y@aegistudio/ +Link: https://lore.kernel.org/linux-trace-devel/YPgrN85WL9VyrZ55@aegistudio + +Cc: stable@vger.kernel.org +Fixes: bf41a158cacba ("ring-buffer: make reentrant") +Suggested-by: Linus Torvalds +Signed-off-by: Haoran Luo +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 28 ++++++++++++++++++++++++---- + 1 file changed, 24 insertions(+), 4 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3649,10 +3649,30 @@ static bool rb_per_cpu_empty(struct ring + if (unlikely(!head)) + return true; + +- return reader->read == rb_page_commit(reader) && +- (commit == reader || +- (commit == head && +- head->read == rb_page_commit(commit))); ++ /* Reader should exhaust content in reader page */ ++ if (reader->read != rb_page_commit(reader)) ++ return false; ++ ++ /* ++ * If writers are committing on the reader page, knowing all ++ * committed content has been read, the ring buffer is empty. ++ */ ++ if (commit == reader) ++ return true; ++ ++ /* ++ * If writers are committing on a page other than reader page ++ * and head page, there should always be content to read. ++ */ ++ if (commit != head) ++ return false; ++ ++ /* ++ * Writers are committing on the head page, we just need ++ * to care about there're committed data, and the reader will ++ * swap reader page with head page when it is to read data. ++ */ ++ return rb_page_commit(commit) == 0; + } + + /** diff --git a/queue-5.10/tracing-histogram-rename-cpu-to-common_cpu.patch b/queue-5.10/tracing-histogram-rename-cpu-to-common_cpu.patch new file mode 100644 index 00000000000..2baafd3f006 --- /dev/null +++ b/queue-5.10/tracing-histogram-rename-cpu-to-common_cpu.patch @@ -0,0 +1,152 @@ +From 1e3bac71c5053c99d438771fc9fa5082ae5d90aa Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 21 Jul 2021 11:00:53 -0400 +Subject: tracing/histogram: Rename "cpu" to "common_cpu" + +From: Steven Rostedt (VMware) + +commit 1e3bac71c5053c99d438771fc9fa5082ae5d90aa upstream. + +Currently the histogram logic allows the user to write "cpu" in as an +event field, and it will record the CPU that the event happened on. + +The problem with this is that there's a lot of events that have "cpu" +as a real field, and using "cpu" as the CPU it ran on, makes it +impossible to run histograms on the "cpu" field of events. + +For example, if I want to have a histogram on the count of the +workqueue_queue_work event on its cpu field, running: + + ># echo 'hist:keys=cpu' > events/workqueue/workqueue_queue_work/trigger + +Gives a misleading and wrong result. + +Change the command to "common_cpu" as no event should have "common_*" +fields as that's a reserved name for fields used by all events. And +this makes sense here as common_cpu would be a field used by all events. + +Now we can even do: + + ># echo 'hist:keys=common_cpu,cpu if cpu < 100' > events/workqueue/workqueue_queue_work/trigger + ># cat events/workqueue/workqueue_queue_work/hist + # event histogram + # + # trigger info: hist:keys=common_cpu,cpu:vals=hitcount:sort=hitcount:size=2048 if cpu < 100 [active] + # + + { common_cpu: 0, cpu: 2 } hitcount: 1 + { common_cpu: 0, cpu: 4 } hitcount: 1 + { common_cpu: 7, cpu: 7 } hitcount: 1 + { common_cpu: 0, cpu: 7 } hitcount: 1 + { common_cpu: 0, cpu: 1 } hitcount: 1 + { common_cpu: 0, cpu: 6 } hitcount: 2 + { common_cpu: 0, cpu: 5 } hitcount: 2 + { common_cpu: 1, cpu: 1 } hitcount: 4 + { common_cpu: 6, cpu: 6 } hitcount: 4 + { common_cpu: 5, cpu: 5 } hitcount: 14 + { common_cpu: 4, cpu: 4 } hitcount: 26 + { common_cpu: 0, cpu: 0 } hitcount: 39 + { common_cpu: 2, cpu: 2 } hitcount: 184 + +Now for backward compatibility, I added a trick. If "cpu" is used, and +the field is not found, it will fall back to "common_cpu" and work as +it did before. This way, it will still work for old programs that use +"cpu" to get the actual CPU, but if the event has a "cpu" as a field, it +will get that event's "cpu" field, which is probably what it wants +anyway. + +I updated the tracefs/README to include documentation about both the +common_timestamp and the common_cpu. This way, if that text is present in +the README, then an application can know that common_cpu is supported over +just plain "cpu". + +Link: https://lkml.kernel.org/r/20210721110053.26b4f641@oasis.local.home + +Cc: Namhyung Kim +Cc: Ingo Molnar +Cc: Andrew Morton +Cc: stable@vger.kernel.org +Fixes: 8b7622bf94a44 ("tracing: Add cpu field for hist triggers") +Reviewed-by: Tom Zanussi +Reviewed-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/trace/histogram.rst | 2 +- + kernel/trace/trace.c | 4 ++++ + kernel/trace/trace_events_hist.c | 22 ++++++++++++++++------ + 3 files changed, 21 insertions(+), 7 deletions(-) + +--- a/Documentation/trace/histogram.rst ++++ b/Documentation/trace/histogram.rst +@@ -191,7 +191,7 @@ Documentation written by Tom Zanussi + with the event, in nanoseconds. May be + modified by .usecs to have timestamps + interpreted as microseconds. +- cpu int the cpu on which the event occurred. ++ common_cpu int the cpu on which the event occurred. + ====================== ==== ======================================= + + Extended error information +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -5241,6 +5241,10 @@ static const char readme_msg[] = + "\t [:name=histname1]\n" + "\t [:.]\n" + "\t [if ]\n\n" ++ "\t Note, special fields can be used as well:\n" ++ "\t common_timestamp - to record current timestamp\n" ++ "\t common_cpu - to record the CPU the event happened on\n" ++ "\n" + "\t When a matching event is hit, an entry is added to a hash\n" + "\t table using the key(s) and value(s) named, and the value of a\n" + "\t sum called 'hitcount' is incremented. Keys and values\n" +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -1095,7 +1095,7 @@ static const char *hist_field_name(struc + field->flags & HIST_FIELD_FL_ALIAS) + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_CPU) +- field_name = "cpu"; ++ field_name = "common_cpu"; + else if (field->flags & HIST_FIELD_FL_EXPR || + field->flags & HIST_FIELD_FL_VAR_REF) { + if (field->system) { +@@ -1975,14 +1975,24 @@ parse_field(struct hist_trigger_data *hi + hist_data->enable_timestamps = true; + if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) + hist_data->attrs->ts_in_usecs = true; +- } else if (strcmp(field_name, "cpu") == 0) ++ } else if (strcmp(field_name, "common_cpu") == 0) + *flags |= HIST_FIELD_FL_CPU; + else { + field = trace_find_event_field(file->event_call, field_name); + if (!field || !field->size) { +- hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); +- field = ERR_PTR(-EINVAL); +- goto out; ++ /* ++ * For backward compatibility, if field_name ++ * was "cpu", then we treat this the same as ++ * common_cpu. ++ */ ++ if (strcmp(field_name, "cpu") == 0) { ++ *flags |= HIST_FIELD_FL_CPU; ++ } else { ++ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, ++ errpos(field_name)); ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } + } + } + out: +@@ -5057,7 +5067,7 @@ static void hist_field_print(struct seq_ + seq_printf(m, "%s=", hist_field->var.name); + + if (hist_field->flags & HIST_FIELD_FL_CPU) +- seq_puts(m, "cpu"); ++ seq_puts(m, "common_cpu"); + else if (field_name) { + if (hist_field->flags & HIST_FIELD_FL_VAR_REF || + hist_field->flags & HIST_FIELD_FL_ALIAS) diff --git a/queue-5.10/tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch b/queue-5.10/tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch new file mode 100644 index 00000000000..1a612ed0b60 --- /dev/null +++ b/queue-5.10/tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch @@ -0,0 +1,98 @@ +From 3b13911a2fd0dd0146c9777a254840c5466cf120 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 21 Jul 2021 19:10:08 -0400 +Subject: tracing: Synthetic event field_pos is an index not a boolean + +From: Steven Rostedt (VMware) + +commit 3b13911a2fd0dd0146c9777a254840c5466cf120 upstream. + +Performing the following: + + ># echo 'wakeup_lat s32 pid; u64 delta; char wake_comm[]' > synthetic_events + ># echo 'hist:keys=pid:__arg__1=common_timestamp.usecs' > events/sched/sched_waking/trigger + ># echo 'hist:keys=next_pid:pid=next_pid,delta=common_timestamp.usecs-$__arg__1:onmatch(sched.sched_waking).trace(wakeup_lat,$pid,$delta,prev_comm)'\ + > events/sched/sched_switch/trigger + ># echo 1 > events/synthetic/enable + +Crashed the kernel: + + BUG: kernel NULL pointer dereference, address: 000000000000001b + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP + CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.13.0-rc5-test+ #104 + Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 + RIP: 0010:strlen+0x0/0x20 + Code: f6 82 80 2b 0b bc 20 74 11 0f b6 50 01 48 83 c0 01 f6 82 80 2b 0b bc + 20 75 ef c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <80> 3f 00 74 10 + 48 89 f8 48 83 c0 01 80 38 9 f8 c3 31 + RSP: 0018:ffffaa75000d79d0 EFLAGS: 00010046 + RAX: 0000000000000002 RBX: ffff9cdb55575270 RCX: 0000000000000000 + RDX: ffff9cdb58c7a320 RSI: ffffaa75000d7b40 RDI: 000000000000001b + RBP: ffffaa75000d7b40 R08: ffff9cdb40a4f010 R09: ffffaa75000d7ab8 + R10: ffff9cdb4398c700 R11: 0000000000000008 R12: ffff9cdb58c7a320 + R13: ffff9cdb55575270 R14: ffff9cdb58c7a000 R15: 0000000000000018 + FS: 0000000000000000(0000) GS:ffff9cdb5aa00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000001b CR3: 00000000c0612006 CR4: 00000000001706e0 + Call Trace: + trace_event_raw_event_synth+0x90/0x1d0 + action_trace+0x5b/0x70 + event_hist_trigger+0x4bd/0x4e0 + ? cpumask_next_and+0x20/0x30 + ? update_sd_lb_stats.constprop.0+0xf6/0x840 + ? __lock_acquire.constprop.0+0x125/0x550 + ? find_held_lock+0x32/0x90 + ? sched_clock_cpu+0xe/0xd0 + ? lock_release+0x155/0x440 + ? update_load_avg+0x8c/0x6f0 + ? enqueue_entity+0x18a/0x920 + ? __rb_reserve_next+0xe5/0x460 + ? ring_buffer_lock_reserve+0x12a/0x3f0 + event_triggers_call+0x52/0xe0 + trace_event_buffer_commit+0x1ae/0x240 + trace_event_raw_event_sched_switch+0x114/0x170 + __traceiter_sched_switch+0x39/0x50 + __schedule+0x431/0xb00 + schedule_idle+0x28/0x40 + do_idle+0x198/0x2e0 + cpu_startup_entry+0x19/0x20 + secondary_startup_64_no_verify+0xc2/0xcb + +The reason is that the dynamic events array keeps track of the field +position of the fields array, via the field_pos variable in the +synth_field structure. Unfortunately, that field is a boolean for some +reason, which means any field_pos greater than 1 will be a bug (in this +case it was 2). + +Link: https://lkml.kernel.org/r/20210721191008.638bce34@oasis.local.home + +Cc: Masami Hiramatsu +Cc: Namhyung Kim +Cc: Ingo Molnar +Cc: Andrew Morton +Cc: stable@vger.kernel.org +Fixes: bd82631d7ccdc ("tracing: Add support for dynamic strings to synthetic events") +Reviewed-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_synth.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace_synth.h ++++ b/kernel/trace/trace_synth.h +@@ -14,10 +14,10 @@ struct synth_field { + char *name; + size_t size; + unsigned int offset; ++ unsigned int field_pos; + bool is_signed; + bool is_string; + bool is_dynamic; +- bool field_pos; + }; + + struct synth_event { diff --git a/queue-5.10/userfaultfd-do-not-untag-user-pointers.patch b/queue-5.10/userfaultfd-do-not-untag-user-pointers.patch new file mode 100644 index 00000000000..c3fbf06ff95 --- /dev/null +++ b/queue-5.10/userfaultfd-do-not-untag-user-pointers.patch @@ -0,0 +1,196 @@ +From e71e2ace5721a8b921dca18b045069e7bb411277 Mon Sep 17 00:00:00 2001 +From: Peter Collingbourne +Date: Fri, 23 Jul 2021 15:50:01 -0700 +Subject: userfaultfd: do not untag user pointers + +From: Peter Collingbourne + +commit e71e2ace5721a8b921dca18b045069e7bb411277 upstream. + +Patch series "userfaultfd: do not untag user pointers", v5. + +If a user program uses userfaultfd on ranges of heap memory, it may end +up passing a tagged pointer to the kernel in the range.start field of +the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable +allocator, or on Android if using the Tagged Pointers feature for MTE +readiness [1]. + +When a fault subsequently occurs, the tag is stripped from the fault +address returned to the application in the fault.address field of struct +uffd_msg. However, from the application's perspective, the tagged +address *is* the memory address, so if the application is unaware of +memory tags, it may get confused by receiving an address that is, from +its point of view, outside of the bounds of the allocation. We observed +this behavior in the kselftest for userfaultfd [2] but other +applications could have the same problem. + +Address this by not untagging pointers passed to the userfaultfd ioctls. +Instead, let the system call fail. Also change the kselftest to use +mmap so that it doesn't encounter this problem. + +[1] https://source.android.com/devices/tech/debug/tagged-pointers +[2] tools/testing/selftests/vm/userfaultfd.c + +This patch (of 2): + +Do not untag pointers passed to the userfaultfd ioctls. Instead, let +the system call fail. This will provide an early indication of problems +with tag-unaware userspace code instead of letting the code get confused +later, and is consistent with how we decided to handle brk/mmap/mremap +in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in +brk()/mmap()/mremap()"), as well as being consistent with the existing +tagged address ABI documentation relating to how ioctl arguments are +handled. + +The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag +user pointers") plus some fixups to some additional calls to +validate_range that have appeared since then. + +[1] https://source.android.com/devices/tech/debug/tagged-pointers +[2] tools/testing/selftests/vm/userfaultfd.c + +Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com +Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com +Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b +Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") +Signed-off-by: Peter Collingbourne +Reviewed-by: Andrey Konovalov +Reviewed-by: Catalin Marinas +Cc: Alistair Delva +Cc: Andrea Arcangeli +Cc: Dave Martin +Cc: Evgenii Stepanov +Cc: Lokesh Gidra +Cc: Mitch Phillips +Cc: Vincenzo Frascino +Cc: Will Deacon +Cc: William McVicker +Cc: [5.4] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arm64/tagged-address-abi.rst | 26 ++++++++++++++++++-------- + fs/userfaultfd.c | 24 +++++++++++------------- + 2 files changed, 29 insertions(+), 21 deletions(-) + +--- a/Documentation/arm64/tagged-address-abi.rst ++++ b/Documentation/arm64/tagged-address-abi.rst +@@ -45,14 +45,24 @@ how the user addresses are used by the k + + 1. User addresses not accessed by the kernel but used for address space + management (e.g. ``mprotect()``, ``madvise()``). The use of valid +- tagged pointers in this context is allowed with the exception of +- ``brk()``, ``mmap()`` and the ``new_address`` argument to +- ``mremap()`` as these have the potential to alias with existing +- user addresses. +- +- NOTE: This behaviour changed in v5.6 and so some earlier kernels may +- incorrectly accept valid tagged pointers for the ``brk()``, +- ``mmap()`` and ``mremap()`` system calls. ++ tagged pointers in this context is allowed with these exceptions: ++ ++ - ``brk()``, ``mmap()`` and the ``new_address`` argument to ++ ``mremap()`` as these have the potential to alias with existing ++ user addresses. ++ ++ NOTE: This behaviour changed in v5.6 and so some earlier kernels may ++ incorrectly accept valid tagged pointers for the ``brk()``, ++ ``mmap()`` and ``mremap()`` system calls. ++ ++ - The ``range.start``, ``start`` and ``dst`` arguments to the ++ ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from ++ ``userfaultfd()``, as fault addresses subsequently obtained by reading ++ the file descriptor will be untagged, which may otherwise confuse ++ tag-unaware programs. ++ ++ NOTE: This behaviour changed in v5.14 and so some earlier kernels may ++ incorrectly accept valid tagged pointers for this system call. + + 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI + relaxation is disabled by default and the application thread needs to +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1228,23 +1228,21 @@ static __always_inline void wake_userfau + } + + static __always_inline int validate_range(struct mm_struct *mm, +- __u64 *start, __u64 len) ++ __u64 start, __u64 len) + { + __u64 task_size = mm->task_size; + +- *start = untagged_addr(*start); +- +- if (*start & ~PAGE_MASK) ++ if (start & ~PAGE_MASK) + return -EINVAL; + if (len & ~PAGE_MASK) + return -EINVAL; + if (!len) + return -EINVAL; +- if (*start < mmap_min_addr) ++ if (start < mmap_min_addr) + return -EINVAL; +- if (*start >= task_size) ++ if (start >= task_size) + return -EINVAL; +- if (len > task_size - *start) ++ if (len > task_size - start) + return -EINVAL; + return 0; + } +@@ -1290,7 +1288,7 @@ static int userfaultfd_register(struct u + if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) + vm_flags |= VM_UFFD_WP; + +- ret = validate_range(mm, &uffdio_register.range.start, ++ ret = validate_range(mm, uffdio_register.range.start, + uffdio_register.range.len); + if (ret) + goto out; +@@ -1490,7 +1488,7 @@ static int userfaultfd_unregister(struct + if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) + goto out; + +- ret = validate_range(mm, &uffdio_unregister.start, ++ ret = validate_range(mm, uffdio_unregister.start, + uffdio_unregister.len); + if (ret) + goto out; +@@ -1639,7 +1637,7 @@ static int userfaultfd_wake(struct userf + if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); ++ ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); + if (ret) + goto out; + +@@ -1679,7 +1677,7 @@ static int userfaultfd_copy(struct userf + sizeof(uffdio_copy)-sizeof(__s64))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); ++ ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); + if (ret) + goto out; + /* +@@ -1736,7 +1734,7 @@ static int userfaultfd_zeropage(struct u + sizeof(uffdio_zeropage)-sizeof(__s64))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, ++ ret = validate_range(ctx->mm, uffdio_zeropage.range.start, + uffdio_zeropage.range.len); + if (ret) + goto out; +@@ -1786,7 +1784,7 @@ static int userfaultfd_writeprotect(stru + sizeof(struct uffdio_writeprotect))) + return -EFAULT; + +- ret = validate_range(ctx->mm, &uffdio_wp.range.start, ++ ret = validate_range(ctx->mm, uffdio_wp.range.start, + uffdio_wp.range.len); + if (ret) + return ret;