From: Greg Kroah-Hartman Date: Mon, 26 Jul 2021 08:53:54 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.4.277~43 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=16e5159f6d50e153bc492b05e3a7774ddfd6865b;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: btrfs-check-for-missing-device-in-btrfs_trim_fs.patch firmware-efi-tell-memblock-about-efi-iomem-reservations.patch ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch tracing-histogram-rename-cpu-to-common_cpu.patch userfaultfd-do-not-untag-user-pointers.patch --- diff --git a/queue-5.4/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch b/queue-5.4/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch new file mode 100644 index 00000000000..c4b72e8f6de --- /dev/null +++ b/queue-5.4/btrfs-check-for-missing-device-in-btrfs_trim_fs.patch @@ -0,0 +1,80 @@ +From 16a200f66ede3f9afa2e51d90ade017aaa18d213 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Sun, 4 Jul 2021 19:14:39 +0800 +Subject: btrfs: check for missing device in btrfs_trim_fs + +From: Anand Jain + +commit 16a200f66ede3f9afa2e51d90ade017aaa18d213 upstream. + +A fstrim on a degraded raid1 can trigger the following null pointer +dereference: + + BTRFS info (device loop0): allowing degraded mounts + BTRFS info (device loop0): disk space caching is enabled + BTRFS info (device loop0): has skinny extents + BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing + BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing + BTRFS info (device loop0): enabling ssd optimizations + BUG: kernel NULL pointer dereference, address: 0000000000000620 + PGD 0 P4D 0 + Oops: 0000 [#1] SMP NOPTI + CPU: 0 PID: 4574 Comm: fstrim Not tainted 5.13.0-rc7+ #31 + Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 + RIP: 0010:btrfs_trim_fs+0x199/0x4a0 [btrfs] + RSP: 0018:ffff959541797d28 EFLAGS: 00010293 + RAX: 0000000000000000 RBX: ffff946f84eca508 RCX: a7a67937adff8608 + RDX: ffff946e8122d000 RSI: 0000000000000000 RDI: ffffffffc02fdbf0 + RBP: ffff946ea4615000 R08: 0000000000000001 R09: 0000000000000000 + R10: 0000000000000000 R11: ffff946e8122d960 R12: 0000000000000000 + R13: ffff959541797db8 R14: ffff946e8122d000 R15: ffff959541797db8 + FS: 00007f55917a5080(0000) GS:ffff946f9bc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000620 CR3: 000000002d2c8001 CR4: 00000000000706f0 + Call Trace: + btrfs_ioctl_fitrim+0x167/0x260 [btrfs] + btrfs_ioctl+0x1c00/0x2fe0 [btrfs] + ? selinux_file_ioctl+0x140/0x240 + ? syscall_trace_enter.constprop.0+0x188/0x240 + ? __x64_sys_ioctl+0x83/0xb0 + __x64_sys_ioctl+0x83/0xb0 + +Reproducer: + + $ mkfs.btrfs -fq -d raid1 -m raid1 /dev/loop0 /dev/loop1 + $ mount /dev/loop0 /btrfs + $ umount /btrfs + $ btrfs dev scan --forget + $ mount -o degraded /dev/loop0 /btrfs + + $ fstrim /btrfs + +The reason is we call btrfs_trim_free_extents() for the missing device, +which uses device->bdev (NULL for missing device) to find if the device +supports discard. + +Fix is to check if the device is missing before calling +btrfs_trim_free_extents(). + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -5768,6 +5768,9 @@ int btrfs_trim_fs(struct btrfs_fs_info * + mutex_lock(&fs_info->fs_devices->device_list_mutex); + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ++ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) ++ continue; ++ + ret = btrfs_trim_free_extents(device, &group_trimmed); + if (ret) { + dev_failed++; diff --git a/queue-5.4/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch b/queue-5.4/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch new file mode 100644 index 00000000000..debaa681720 --- /dev/null +++ b/queue-5.4/firmware-efi-tell-memblock-about-efi-iomem-reservations.patch @@ -0,0 +1,66 @@ +From 2bab693a608bdf614b9fcd44083c5100f34b9f77 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 13 Jul 2021 19:43:26 +0100 +Subject: firmware/efi: Tell memblock about EFI iomem reservations + +From: Marc Zyngier + +commit 2bab693a608bdf614b9fcd44083c5100f34b9f77 upstream. + +kexec_load_file() relies on the memblock infrastructure to avoid +stamping over regions of memory that are essential to the survival +of the system. + +However, nobody seems to agree how to flag these regions as reserved, +and (for example) EFI only publishes its reservations in /proc/iomem +for the benefit of the traditional, userspace based kexec tool. + +On arm64 platforms with GICv3, this can result in the payload being +placed at the location of the LPI tables. Shock, horror! + +Let's augment the EFI reservation code with a memblock_reserve() call, +protecting our dear tables from the secondary kernel invasion. + +Reported-by: Moritz Fischer +Tested-by: Moritz Fischer +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Cc: Ard Biesheuvel +Cc: James Morse +Cc: Catalin Marinas +Cc: Will Deacon +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/efi.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -975,6 +975,7 @@ static int __init efi_memreserve_map_roo + static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) + { + struct resource *res, *parent; ++ int ret; + + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) +@@ -987,7 +988,17 @@ static int efi_mem_reserve_iomem(phys_ad + + /* we expect a conflict with a 'System RAM' region */ + parent = request_resource_conflict(&iomem_resource, res); +- return parent ? request_resource(parent, res) : 0; ++ ret = parent ? request_resource(parent, res) : 0; ++ ++ /* ++ * Given that efi_mem_reserve_iomem() can be called at any ++ * time, only call memblock_reserve() if the architecture ++ * keeps the infrastructure around. ++ */ ++ if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret) ++ memblock_reserve(addr, size); ++ ++ return ret; + } + + int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) diff --git a/queue-5.4/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch b/queue-5.4/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch new file mode 100644 index 00000000000..7734bb9f55d --- /dev/null +++ b/queue-5.4/ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch @@ -0,0 +1,55 @@ +From 09cfae9f13d51700b0fecf591dcd658fc5375428 Mon Sep 17 00:00:00 2001 +From: Markus Boehme +Date: Tue, 20 Jul 2021 16:26:19 -0700 +Subject: ixgbe: Fix packet corruption due to missing DMA sync + +From: Markus Boehme + +commit 09cfae9f13d51700b0fecf591dcd658fc5375428 upstream. + +When receiving a packet with multiple fragments, hardware may still +touch the first fragment until the entire packet has been received. The +driver therefore keeps the first fragment mapped for DMA until end of +packet has been asserted, and delays its dma_sync call until then. + +The driver tries to fit multiple receive buffers on one page. When using +3K receive buffers (e.g. using Jumbo frames and legacy-rx is turned +off/build_skb is being used) on an architecture with 4K pages, the +driver allocates an order 1 compound page and uses one page per receive +buffer. To determine the correct offset for a delayed DMA sync of the +first fragment of a multi-fragment packet, the driver then cannot just +use PAGE_MASK on the DMA address but has to construct a mask based on +the actual size of the backing page. + +Using PAGE_MASK in the 3K RX buffer/4K page architecture configuration +will always sync the first page of a compound page. With the SWIOTLB +enabled this can lead to corrupted packets (zeroed out first fragment, +re-used garbage from another packet) and various consequences, such as +slow/stalling data transfers and connection resets. For example, testing +on a link with MTU exceeding 3058 bytes on a host with SWIOTLB enabled +(e.g. "iommu=soft swiotlb=262144,force") TCP transfers quickly fizzle +out without this patch. + +Cc: stable@vger.kernel.org +Fixes: 0c5661ecc5dd7 ("ixgbe: fix crash in build_skb Rx code path") +Signed-off-by: Markus Boehme +Tested-by: Tony Brelinski +Signed-off-by: Tony Nguyen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -1827,7 +1827,8 @@ static void ixgbe_dma_sync_frag(struct i + struct sk_buff *skb) + { + if (ring_uses_build_skb(rx_ring)) { +- unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; ++ unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; ++ unsigned long offset = (unsigned long)(skb->data) & mask; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, diff --git a/queue-5.4/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch b/queue-5.4/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch new file mode 100644 index 00000000000..d233006b8b4 --- /dev/null +++ b/queue-5.4/media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch @@ -0,0 +1,82 @@ +From 8d4abca95ecc82fc8c41912fa0085281f19cc29f Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Mon, 19 Apr 2021 18:43:32 -0500 +Subject: media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf() + +From: Gustavo A. R. Silva + +commit 8d4abca95ecc82fc8c41912fa0085281f19cc29f upstream. + +Fix an 11-year old bug in ngene_command_config_free_buf() while +addressing the following warnings caught with -Warray-bounds: + +arch/alpha/include/asm/string.h:22:16: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] +arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] + +The problem is that the original code is trying to copy 6 bytes of +data into a one-byte size member _config_ of the wrong structue +FW_CONFIGURE_BUFFERS, in a single call to memcpy(). This causes a +legitimate compiler warning because memcpy() overruns the length +of &com.cmd.ConfigureBuffers.config. It seems that the right +structure is FW_CONFIGURE_FREE_BUFFERS, instead, because it contains +6 more members apart from the header _hdr_. Also, the name of +the function ngene_command_config_free_buf() suggests that the actual +intention is to ConfigureFreeBuffers, instead of ConfigureBuffers +(which takes place in the function ngene_command_config_buf(), above). + +Fix this by enclosing those 6 members of struct FW_CONFIGURE_FREE_BUFFERS +into new struct config, and use &com.cmd.ConfigureFreeBuffers.config as +the destination address, instead of &com.cmd.ConfigureBuffers.config, +when calling memcpy(). + +This also helps with the ongoing efforts to globally enable +-Warray-bounds and get us closer to being able to tighten the +FORTIFY_SOURCE routines on memcpy(). + +Link: https://github.com/KSPP/linux/issues/109 +Fixes: dae52d009fc9 ("V4L/DVB: ngene: Initial check-in") +Cc: stable@vger.kernel.org +Reported-by: kernel test robot +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Link: https://lore.kernel.org/linux-hardening/20210420001631.GA45456@embeddedor/ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/pci/ngene/ngene-core.c | 2 +- + drivers/media/pci/ngene/ngene.h | 14 ++++++++------ + 2 files changed, 9 insertions(+), 7 deletions(-) + +--- a/drivers/media/pci/ngene/ngene-core.c ++++ b/drivers/media/pci/ngene/ngene-core.c +@@ -385,7 +385,7 @@ static int ngene_command_config_free_buf + + com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER; + com.cmd.hdr.Length = 6; +- memcpy(&com.cmd.ConfigureBuffers.config, config, 6); ++ memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6); + com.in_len = 6; + com.out_len = 0; + +--- a/drivers/media/pci/ngene/ngene.h ++++ b/drivers/media/pci/ngene/ngene.h +@@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS { + + struct FW_CONFIGURE_FREE_BUFFERS { + struct FW_HEADER hdr; +- u8 UVI1_BufferLength; +- u8 UVI2_BufferLength; +- u8 TVO_BufferLength; +- u8 AUD1_BufferLength; +- u8 AUD2_BufferLength; +- u8 TVA_BufferLength; ++ struct { ++ u8 UVI1_BufferLength; ++ u8 UVI2_BufferLength; ++ u8 TVO_BufferLength; ++ u8 AUD1_BufferLength; ++ u8 AUD2_BufferLength; ++ u8 TVA_BufferLength; ++ } __packed config; + } __attribute__ ((__packed__)); + + struct FW_CONFIGURE_UART { diff --git a/queue-5.4/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch b/queue-5.4/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch new file mode 100644 index 00000000000..9c441dbc632 --- /dev/null +++ b/queue-5.4/selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch @@ -0,0 +1,56 @@ +From 0db282ba2c12c1515d490d14a1ff696643ab0f1b Mon Sep 17 00:00:00 2001 +From: Peter Collingbourne +Date: Fri, 23 Jul 2021 15:50:04 -0700 +Subject: selftest: use mmap instead of posix_memalign to allocate memory + +From: Peter Collingbourne + +commit 0db282ba2c12c1515d490d14a1ff696643ab0f1b upstream. + +This test passes pointers obtained from anon_allocate_area to the +userfaultfd and mremap APIs. This causes a problem if the system +allocator returns tagged pointers because with the tagged address ABI +the kernel rejects tagged addresses passed to these APIs, which would +end up causing the test to fail. To make this test compatible with such +system allocators, stop using the system allocator to allocate memory in +anon_allocate_area, and instead just use mmap. + +Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com +Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241 +Fixes: c47174fc362a ("userfaultfd: selftest") +Co-developed-by: Lokesh Gidra +Signed-off-by: Lokesh Gidra +Signed-off-by: Peter Collingbourne +Reviewed-by: Catalin Marinas +Cc: Vincenzo Frascino +Cc: Dave Martin +Cc: Will Deacon +Cc: Andrea Arcangeli +Cc: Alistair Delva +Cc: William McVicker +Cc: Evgenii Stepanov +Cc: Mitch Phillips +Cc: Andrey Konovalov +Cc: [5.4] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/vm/userfaultfd.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/vm/userfaultfd.c ++++ b/tools/testing/selftests/vm/userfaultfd.c +@@ -139,8 +139,10 @@ static int anon_release_pages(char *rel_ + + static void anon_allocate_area(void **alloc_area) + { +- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { +- fprintf(stderr, "out of memory\n"); ++ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, ++ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ++ if (*alloc_area == MAP_FAILED) ++ fprintf(stderr, "mmap of anonymous memory failed"); + *alloc_area = NULL; + } + } diff --git a/queue-5.4/series b/queue-5.4/series index 14ed0ef8328..5a72f71c8c3 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -86,3 +86,11 @@ usb-serial-option-add-support-for-u-blox-lara-r6-family.patch usb-serial-cp210x-fix-comments-for-ge-cs1000.patch usb-serial-cp210x-add-id-for-cel-em3588-usb-zigbee-stick.patch usb-dwc2-gadget-fix-sending-zero-length-packet-in-ddma-mode.patch +firmware-efi-tell-memblock-about-efi-iomem-reservations.patch +tracing-histogram-rename-cpu-to-common_cpu.patch +tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch +btrfs-check-for-missing-device-in-btrfs_trim_fs.patch +media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch +ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch +selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch +userfaultfd-do-not-untag-user-pointers.patch diff --git a/queue-5.4/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch b/queue-5.4/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch new file mode 100644 index 00000000000..11c0fd01f79 --- /dev/null +++ b/queue-5.4/tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch @@ -0,0 +1,102 @@ +From 67f0d6d9883c13174669f88adac4f0ee656cc16a Mon Sep 17 00:00:00 2001 +From: Haoran Luo +Date: Wed, 21 Jul 2021 14:12:07 +0000 +Subject: tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop. + +From: Haoran Luo + +commit 67f0d6d9883c13174669f88adac4f0ee656cc16a upstream. + +The "rb_per_cpu_empty()" misinterpret the condition (as not-empty) when +"head_page" and "commit_page" of "struct ring_buffer_per_cpu" points to +the same buffer page, whose "buffer_data_page" is empty and "read" field +is non-zero. + +An error scenario could be constructed as followed (kernel perspective): + +1. All pages in the buffer has been accessed by reader(s) so that all of +them will have non-zero "read" field. + +2. Read and clear all buffer pages so that "rb_num_of_entries()" will +return 0 rendering there's no more data to read. It is also required +that the "read_page", "commit_page" and "tail_page" points to the same +page, while "head_page" is the next page of them. + +3. Invoke "ring_buffer_lock_reserve()" with large enough "length" +so that it shot pass the end of current tail buffer page. Now the +"head_page", "commit_page" and "tail_page" points to the same page. + +4. Discard current event with "ring_buffer_discard_commit()", so that +"head_page", "commit_page" and "tail_page" points to a page whose buffer +data page is now empty. + +When the error scenario has been constructed, "tracing_read_pipe" will +be trapped inside a deadloop: "trace_empty()" returns 0 since +"rb_per_cpu_empty()" returns 0 when it hits the CPU containing such +constructed ring buffer. Then "trace_find_next_entry_inc()" always +return NULL since "rb_num_of_entries()" reports there's no more entry +to read. Finally "trace_seq_to_user()" returns "-EBUSY" spanking +"tracing_read_pipe" back to the start of the "waitagain" loop. + +I've also written a proof-of-concept script to construct the scenario +and trigger the bug automatically, you can use it to trace and validate +my reasoning above: + + https://github.com/aegistudio/RingBufferDetonator.git + +Tests has been carried out on linux kernel 5.14-rc2 +(2734d6c1b1a089fb593ef6a23d4b70903526fe0c), my fixed version +of kernel (for testing whether my update fixes the bug) and +some older kernels (for range of affected kernels). Test result is +also attached to the proof-of-concept repository. + +Link: https://lore.kernel.org/linux-trace-devel/YPaNxsIlb2yjSi5Y@aegistudio/ +Link: https://lore.kernel.org/linux-trace-devel/YPgrN85WL9VyrZ55@aegistudio + +Cc: stable@vger.kernel.org +Fixes: bf41a158cacba ("ring-buffer: make reentrant") +Suggested-by: Linus Torvalds +Signed-off-by: Haoran Luo +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 28 ++++++++++++++++++++++++---- + 1 file changed, 24 insertions(+), 4 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3221,10 +3221,30 @@ static bool rb_per_cpu_empty(struct ring + if (unlikely(!head)) + return true; + +- return reader->read == rb_page_commit(reader) && +- (commit == reader || +- (commit == head && +- head->read == rb_page_commit(commit))); ++ /* Reader should exhaust content in reader page */ ++ if (reader->read != rb_page_commit(reader)) ++ return false; ++ ++ /* ++ * If writers are committing on the reader page, knowing all ++ * committed content has been read, the ring buffer is empty. ++ */ ++ if (commit == reader) ++ return true; ++ ++ /* ++ * If writers are committing on a page other than reader page ++ * and head page, there should always be content to read. ++ */ ++ if (commit != head) ++ return false; ++ ++ /* ++ * Writers are committing on the head page, we just need ++ * to care about there're committed data, and the reader will ++ * swap reader page with head page when it is to read data. ++ */ ++ return rb_page_commit(commit) == 0; + } + + /** diff --git a/queue-5.4/tracing-histogram-rename-cpu-to-common_cpu.patch b/queue-5.4/tracing-histogram-rename-cpu-to-common_cpu.patch new file mode 100644 index 00000000000..a7fa68fc66a --- /dev/null +++ b/queue-5.4/tracing-histogram-rename-cpu-to-common_cpu.patch @@ -0,0 +1,152 @@ +From 1e3bac71c5053c99d438771fc9fa5082ae5d90aa Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 21 Jul 2021 11:00:53 -0400 +Subject: tracing/histogram: Rename "cpu" to "common_cpu" + +From: Steven Rostedt (VMware) + +commit 1e3bac71c5053c99d438771fc9fa5082ae5d90aa upstream. + +Currently the histogram logic allows the user to write "cpu" in as an +event field, and it will record the CPU that the event happened on. + +The problem with this is that there's a lot of events that have "cpu" +as a real field, and using "cpu" as the CPU it ran on, makes it +impossible to run histograms on the "cpu" field of events. + +For example, if I want to have a histogram on the count of the +workqueue_queue_work event on its cpu field, running: + + ># echo 'hist:keys=cpu' > events/workqueue/workqueue_queue_work/trigger + +Gives a misleading and wrong result. + +Change the command to "common_cpu" as no event should have "common_*" +fields as that's a reserved name for fields used by all events. And +this makes sense here as common_cpu would be a field used by all events. + +Now we can even do: + + ># echo 'hist:keys=common_cpu,cpu if cpu < 100' > events/workqueue/workqueue_queue_work/trigger + ># cat events/workqueue/workqueue_queue_work/hist + # event histogram + # + # trigger info: hist:keys=common_cpu,cpu:vals=hitcount:sort=hitcount:size=2048 if cpu < 100 [active] + # + + { common_cpu: 0, cpu: 2 } hitcount: 1 + { common_cpu: 0, cpu: 4 } hitcount: 1 + { common_cpu: 7, cpu: 7 } hitcount: 1 + { common_cpu: 0, cpu: 7 } hitcount: 1 + { common_cpu: 0, cpu: 1 } hitcount: 1 + { common_cpu: 0, cpu: 6 } hitcount: 2 + { common_cpu: 0, cpu: 5 } hitcount: 2 + { common_cpu: 1, cpu: 1 } hitcount: 4 + { common_cpu: 6, cpu: 6 } hitcount: 4 + { common_cpu: 5, cpu: 5 } hitcount: 14 + { common_cpu: 4, cpu: 4 } hitcount: 26 + { common_cpu: 0, cpu: 0 } hitcount: 39 + { common_cpu: 2, cpu: 2 } hitcount: 184 + +Now for backward compatibility, I added a trick. If "cpu" is used, and +the field is not found, it will fall back to "common_cpu" and work as +it did before. This way, it will still work for old programs that use +"cpu" to get the actual CPU, but if the event has a "cpu" as a field, it +will get that event's "cpu" field, which is probably what it wants +anyway. + +I updated the tracefs/README to include documentation about both the +common_timestamp and the common_cpu. This way, if that text is present in +the README, then an application can know that common_cpu is supported over +just plain "cpu". + +Link: https://lkml.kernel.org/r/20210721110053.26b4f641@oasis.local.home + +Cc: Namhyung Kim +Cc: Ingo Molnar +Cc: Andrew Morton +Cc: stable@vger.kernel.org +Fixes: 8b7622bf94a44 ("tracing: Add cpu field for hist triggers") +Reviewed-by: Tom Zanussi +Reviewed-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/trace/histogram.rst | 2 +- + kernel/trace/trace.c | 4 ++++ + kernel/trace/trace_events_hist.c | 22 ++++++++++++++++------ + 3 files changed, 21 insertions(+), 7 deletions(-) + +--- a/Documentation/trace/histogram.rst ++++ b/Documentation/trace/histogram.rst +@@ -191,7 +191,7 @@ Documentation written by Tom Zanussi + with the event, in nanoseconds. May be + modified by .usecs to have timestamps + interpreted as microseconds. +- cpu int the cpu on which the event occurred. ++ common_cpu int the cpu on which the event occurred. + ====================== ==== ======================================= + + Extended error information +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4975,6 +4975,10 @@ static const char readme_msg[] = + "\t [:name=histname1]\n" + "\t [:.]\n" + "\t [if ]\n\n" ++ "\t Note, special fields can be used as well:\n" ++ "\t common_timestamp - to record current timestamp\n" ++ "\t common_cpu - to record the CPU the event happened on\n" ++ "\n" + "\t When a matching event is hit, an entry is added to a hash\n" + "\t table using the key(s) and value(s) named, and the value of a\n" + "\t sum called 'hitcount' is incremented. Keys and values\n" +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2001,7 +2001,7 @@ static const char *hist_field_name(struc + field->flags & HIST_FIELD_FL_ALIAS) + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_CPU) +- field_name = "cpu"; ++ field_name = "common_cpu"; + else if (field->flags & HIST_FIELD_FL_EXPR || + field->flags & HIST_FIELD_FL_VAR_REF) { + if (field->system) { +@@ -2873,14 +2873,24 @@ parse_field(struct hist_trigger_data *hi + hist_data->enable_timestamps = true; + if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) + hist_data->attrs->ts_in_usecs = true; +- } else if (strcmp(field_name, "cpu") == 0) ++ } else if (strcmp(field_name, "common_cpu") == 0) + *flags |= HIST_FIELD_FL_CPU; + else { + field = trace_find_event_field(file->event_call, field_name); + if (!field || !field->size) { +- hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); +- field = ERR_PTR(-EINVAL); +- goto out; ++ /* ++ * For backward compatibility, if field_name ++ * was "cpu", then we treat this the same as ++ * common_cpu. ++ */ ++ if (strcmp(field_name, "cpu") == 0) { ++ *flags |= HIST_FIELD_FL_CPU; ++ } else { ++ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, ++ errpos(field_name)); ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } + } + } + out: +@@ -5641,7 +5651,7 @@ static void hist_field_print(struct seq_ + seq_printf(m, "%s=", hist_field->var.name); + + if (hist_field->flags & HIST_FIELD_FL_CPU) +- seq_puts(m, "cpu"); ++ seq_puts(m, "common_cpu"); + else if (field_name) { + if (hist_field->flags & HIST_FIELD_FL_VAR_REF || + hist_field->flags & HIST_FIELD_FL_ALIAS) diff --git a/queue-5.4/userfaultfd-do-not-untag-user-pointers.patch b/queue-5.4/userfaultfd-do-not-untag-user-pointers.patch new file mode 100644 index 00000000000..7b3c41b4d0d --- /dev/null +++ b/queue-5.4/userfaultfd-do-not-untag-user-pointers.patch @@ -0,0 +1,187 @@ +From e71e2ace5721a8b921dca18b045069e7bb411277 Mon Sep 17 00:00:00 2001 +From: Peter Collingbourne +Date: Fri, 23 Jul 2021 15:50:01 -0700 +Subject: userfaultfd: do not untag user pointers + +From: Peter Collingbourne + +commit e71e2ace5721a8b921dca18b045069e7bb411277 upstream. + +Patch series "userfaultfd: do not untag user pointers", v5. + +If a user program uses userfaultfd on ranges of heap memory, it may end +up passing a tagged pointer to the kernel in the range.start field of +the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable +allocator, or on Android if using the Tagged Pointers feature for MTE +readiness [1]. + +When a fault subsequently occurs, the tag is stripped from the fault +address returned to the application in the fault.address field of struct +uffd_msg. However, from the application's perspective, the tagged +address *is* the memory address, so if the application is unaware of +memory tags, it may get confused by receiving an address that is, from +its point of view, outside of the bounds of the allocation. We observed +this behavior in the kselftest for userfaultfd [2] but other +applications could have the same problem. + +Address this by not untagging pointers passed to the userfaultfd ioctls. +Instead, let the system call fail. Also change the kselftest to use +mmap so that it doesn't encounter this problem. + +[1] https://source.android.com/devices/tech/debug/tagged-pointers +[2] tools/testing/selftests/vm/userfaultfd.c + +This patch (of 2): + +Do not untag pointers passed to the userfaultfd ioctls. Instead, let +the system call fail. This will provide an early indication of problems +with tag-unaware userspace code instead of letting the code get confused +later, and is consistent with how we decided to handle brk/mmap/mremap +in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in +brk()/mmap()/mremap()"), as well as being consistent with the existing +tagged address ABI documentation relating to how ioctl arguments are +handled. + +The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag +user pointers") plus some fixups to some additional calls to +validate_range that have appeared since then. + +[1] https://source.android.com/devices/tech/debug/tagged-pointers +[2] tools/testing/selftests/vm/userfaultfd.c + +Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com +Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com +Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b +Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") +Signed-off-by: Peter Collingbourne +Reviewed-by: Andrey Konovalov +Reviewed-by: Catalin Marinas +Cc: Alistair Delva +Cc: Andrea Arcangeli +Cc: Dave Martin +Cc: Evgenii Stepanov +Cc: Lokesh Gidra +Cc: Mitch Phillips +Cc: Vincenzo Frascino +Cc: Will Deacon +Cc: William McVicker +Cc: [5.4] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arm64/tagged-address-abi.rst | 26 ++++++++++++++++++-------- + fs/userfaultfd.c | 22 ++++++++++------------ + 2 files changed, 28 insertions(+), 20 deletions(-) + +--- a/Documentation/arm64/tagged-address-abi.rst ++++ b/Documentation/arm64/tagged-address-abi.rst +@@ -45,14 +45,24 @@ how the user addresses are used by the k + + 1. User addresses not accessed by the kernel but used for address space + management (e.g. ``mprotect()``, ``madvise()``). The use of valid +- tagged pointers in this context is allowed with the exception of +- ``brk()``, ``mmap()`` and the ``new_address`` argument to +- ``mremap()`` as these have the potential to alias with existing +- user addresses. +- +- NOTE: This behaviour changed in v5.6 and so some earlier kernels may +- incorrectly accept valid tagged pointers for the ``brk()``, +- ``mmap()`` and ``mremap()`` system calls. ++ tagged pointers in this context is allowed with these exceptions: ++ ++ - ``brk()``, ``mmap()`` and the ``new_address`` argument to ++ ``mremap()`` as these have the potential to alias with existing ++ user addresses. ++ ++ NOTE: This behaviour changed in v5.6 and so some earlier kernels may ++ incorrectly accept valid tagged pointers for the ``brk()``, ++ ``mmap()`` and ``mremap()`` system calls. ++ ++ - The ``range.start``, ``start`` and ``dst`` arguments to the ++ ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from ++ ``userfaultfd()``, as fault addresses subsequently obtained by reading ++ the file descriptor will be untagged, which may otherwise confuse ++ tag-unaware programs. ++ ++ NOTE: This behaviour changed in v5.14 and so some earlier kernels may ++ incorrectly accept valid tagged pointers for this system call. + + 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI + relaxation is disabled by default and the application thread needs to +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1272,23 +1272,21 @@ static __always_inline void wake_userfau + } + + static __always_inline int validate_range(struct mm_struct *mm, +- __u64 *start, __u64 len) ++ __u64 start, __u64 len) + { + __u64 task_size = mm->task_size; + +- *start = untagged_addr(*start); +- +- if (*start & ~PAGE_MASK) ++ if (start & ~PAGE_MASK) + return -EINVAL; + if (len & ~PAGE_MASK) + return -EINVAL; + if (!len) + return -EINVAL; +- if (*start < mmap_min_addr) ++ if (start < mmap_min_addr) + return -EINVAL; +- if (*start >= task_size) ++ if (start >= task_size) + return -EINVAL; +- if (len > task_size - *start) ++ if (len > task_size - start) + return -EINVAL; + return 0; + } +@@ -1338,7 +1336,7 @@ static int userfaultfd_register(struct u + goto out; + } + +- ret = validate_range(mm, &uffdio_register.range.start, ++ ret = validate_range(mm, uffdio_register.range.start, + uffdio_register.range.len); + if (ret) + goto out; +@@ -1527,7 +1525,7 @@ static int userfaultfd_unregister(struct + if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) + goto out; + +- ret = validate_range(mm, &uffdio_unregister.start, ++ ret = validate_range(mm, uffdio_unregister.start, + uffdio_unregister.len); + if (ret) + goto out; +@@ -1678,7 +1676,7 @@ static int userfaultfd_wake(struct userf + if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); ++ ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); + if (ret) + goto out; + +@@ -1718,7 +1716,7 @@ static int userfaultfd_copy(struct userf + sizeof(uffdio_copy)-sizeof(__s64))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); ++ ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); + if (ret) + goto out; + /* +@@ -1774,7 +1772,7 @@ static int userfaultfd_zeropage(struct u + sizeof(uffdio_zeropage)-sizeof(__s64))) + goto out; + +- ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, ++ ret = validate_range(ctx->mm, uffdio_zeropage.range.start, + uffdio_zeropage.range.len); + if (ret) + goto out;