--- /dev/null
+From 16a200f66ede3f9afa2e51d90ade017aaa18d213 Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Sun, 4 Jul 2021 19:14:39 +0800
+Subject: btrfs: check for missing device in btrfs_trim_fs
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 16a200f66ede3f9afa2e51d90ade017aaa18d213 upstream.
+
+A fstrim on a degraded raid1 can trigger the following null pointer
+dereference:
+
+ BTRFS info (device loop0): allowing degraded mounts
+ BTRFS info (device loop0): disk space caching is enabled
+ BTRFS info (device loop0): has skinny extents
+ BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing
+ BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing
+ BTRFS info (device loop0): enabling ssd optimizations
+ BUG: kernel NULL pointer dereference, address: 0000000000000620
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP NOPTI
+ CPU: 0 PID: 4574 Comm: fstrim Not tainted 5.13.0-rc7+ #31
+ Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+ RIP: 0010:btrfs_trim_fs+0x199/0x4a0 [btrfs]
+ RSP: 0018:ffff959541797d28 EFLAGS: 00010293
+ RAX: 0000000000000000 RBX: ffff946f84eca508 RCX: a7a67937adff8608
+ RDX: ffff946e8122d000 RSI: 0000000000000000 RDI: ffffffffc02fdbf0
+ RBP: ffff946ea4615000 R08: 0000000000000001 R09: 0000000000000000
+ R10: 0000000000000000 R11: ffff946e8122d960 R12: 0000000000000000
+ R13: ffff959541797db8 R14: ffff946e8122d000 R15: ffff959541797db8
+ FS: 00007f55917a5080(0000) GS:ffff946f9bc00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000620 CR3: 000000002d2c8001 CR4: 00000000000706f0
+ Call Trace:
+ btrfs_ioctl_fitrim+0x167/0x260 [btrfs]
+ btrfs_ioctl+0x1c00/0x2fe0 [btrfs]
+ ? selinux_file_ioctl+0x140/0x240
+ ? syscall_trace_enter.constprop.0+0x188/0x240
+ ? __x64_sys_ioctl+0x83/0xb0
+ __x64_sys_ioctl+0x83/0xb0
+
+Reproducer:
+
+ $ mkfs.btrfs -fq -d raid1 -m raid1 /dev/loop0 /dev/loop1
+ $ mount /dev/loop0 /btrfs
+ $ umount /btrfs
+ $ btrfs dev scan --forget
+ $ mount -o degraded /dev/loop0 /btrfs
+
+ $ fstrim /btrfs
+
+The reason is we call btrfs_trim_free_extents() for the missing device,
+which uses device->bdev (NULL for missing device) to find if the device
+supports discard.
+
+Fix is to check if the device is missing before calling
+btrfs_trim_free_extents().
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -5883,6 +5883,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ devices = &fs_info->fs_devices->devices;
+ list_for_each_entry(device, devices, dev_list) {
++ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
++ continue;
++
+ ret = btrfs_trim_free_extents(device, &group_trimmed);
+ if (ret) {
+ dev_failed++;
--- /dev/null
+From 546362a9ef2ef40b57c6605f14e88ced507f8dd0 Mon Sep 17 00:00:00 2001
+From: Bhaumik Bhatt <bbhatt@codeaurora.org>
+Date: Fri, 16 Jul 2021 13:21:05 +0530
+Subject: bus: mhi: core: Validate channel ID when processing command completions
+
+From: Bhaumik Bhatt <bbhatt@codeaurora.org>
+
+commit 546362a9ef2ef40b57c6605f14e88ced507f8dd0 upstream.
+
+MHI reads the channel ID from the event ring element sent by the
+device which can be any value between 0 and 255. In order to
+prevent any out of bound accesses, add a check against the maximum
+number of channels supported by the controller and those channels
+not configured yet so as to skip processing of that event ring
+element.
+
+Link: https://lore.kernel.org/r/1624558141-11045-1-git-send-email-bbhatt@codeaurora.org
+Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device")
+Cc: stable@vger.kernel.org #5.10
+Reviewed-by: Hemant Kumar <hemantk@codeaurora.org>
+Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Signed-off-by: Bhaumik Bhatt <bbhatt@codeaurora.org>
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20210716075106.49938-3-manivannan.sadhasivam@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bus/mhi/core/main.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/drivers/bus/mhi/core/main.c
++++ b/drivers/bus/mhi/core/main.c
+@@ -706,11 +706,18 @@ static void mhi_process_cmd_completion(s
+ cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
+
+ chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
+- mhi_chan = &mhi_cntrl->mhi_chan[chan];
+- write_lock_bh(&mhi_chan->lock);
+- mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
+- complete(&mhi_chan->completion);
+- write_unlock_bh(&mhi_chan->lock);
++
++ if (chan < mhi_cntrl->max_chan &&
++ mhi_cntrl->mhi_chan[chan].configured) {
++ mhi_chan = &mhi_cntrl->mhi_chan[chan];
++ write_lock_bh(&mhi_chan->lock);
++ mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
++ complete(&mhi_chan->completion);
++ write_unlock_bh(&mhi_chan->lock);
++ } else {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Completion packet for invalid channel ID: %d\n", chan);
++ }
+
+ mhi_del_ring_element(mhi_cntrl, mhi_ring);
+ }
--- /dev/null
+From 2bab693a608bdf614b9fcd44083c5100f34b9f77 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Tue, 13 Jul 2021 19:43:26 +0100
+Subject: firmware/efi: Tell memblock about EFI iomem reservations
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 2bab693a608bdf614b9fcd44083c5100f34b9f77 upstream.
+
+kexec_load_file() relies on the memblock infrastructure to avoid
+stamping over regions of memory that are essential to the survival
+of the system.
+
+However, nobody seems to agree how to flag these regions as reserved,
+and (for example) EFI only publishes its reservations in /proc/iomem
+for the benefit of the traditional, userspace based kexec tool.
+
+On arm64 platforms with GICv3, this can result in the payload being
+placed at the location of the LPI tables. Shock, horror!
+
+Let's augment the EFI reservation code with a memblock_reserve() call,
+protecting our dear tables from the secondary kernel invasion.
+
+Reported-by: Moritz Fischer <mdf@kernel.org>
+Tested-by: Moritz Fischer <mdf@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: James Morse <james.morse@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/efi.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -896,6 +896,7 @@ static int __init efi_memreserve_map_roo
+ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
+ {
+ struct resource *res, *parent;
++ int ret;
+
+ res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ if (!res)
+@@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_ad
+
+ /* we expect a conflict with a 'System RAM' region */
+ parent = request_resource_conflict(&iomem_resource, res);
+- return parent ? request_resource(parent, res) : 0;
++ ret = parent ? request_resource(parent, res) : 0;
++
++ /*
++ * Given that efi_mem_reserve_iomem() can be called at any
++ * time, only call memblock_reserve() if the architecture
++ * keeps the infrastructure around.
++ */
++ if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret)
++ memblock_reserve(addr, size);
++
++ return ret;
+ }
+
+ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
--- /dev/null
+From 68b11e8b1562986c134764433af64e97d30c9fc0 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 20 Jul 2021 10:50:43 +0100
+Subject: io_uring: explicitly count entries for poll reqs
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 68b11e8b1562986c134764433af64e97d30c9fc0 upstream.
+
+If __io_queue_proc() fails to add a second poll entry, e.g. kmalloc()
+failed, but it goes on with a third waitqueue, it may succeed and
+overwrite the error status. Count the number of poll entries we added,
+so we can set pt->error to zero at the beginning and find out when the
+mentioned scenario happens.
+
+Cc: stable@vger.kernel.org
+Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/9d6b9e561f88bcc0163623b74a76c39f712151c3.1626774457.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -4916,6 +4916,7 @@ static int io_connect(struct io_kiocb *r
+ struct io_poll_table {
+ struct poll_table_struct pt;
+ struct io_kiocb *req;
++ int nr_entries;
+ int error;
+ };
+
+@@ -5098,11 +5099,11 @@ static void __io_queue_proc(struct io_po
+ struct io_kiocb *req = pt->req;
+
+ /*
+- * If poll->head is already set, it's because the file being polled
+- * uses multiple waitqueues for poll handling (eg one for read, one
+- * for write). Setup a separate io_poll_iocb if this happens.
++ * The file being polled uses multiple waitqueues for poll handling
++ * (e.g. one for read, one for write). Setup a separate io_poll_iocb
++ * if this happens.
+ */
+- if (unlikely(poll->head)) {
++ if (unlikely(pt->nr_entries)) {
+ struct io_poll_iocb *poll_one = poll;
+
+ /* already have a 2nd entry, fail a third attempt */
+@@ -5124,7 +5125,7 @@ static void __io_queue_proc(struct io_po
+ *poll_ptr = poll;
+ }
+
+- pt->error = 0;
++ pt->nr_entries++;
+ poll->head = head;
+
+ if (poll->events & EPOLLEXCLUSIVE)
+@@ -5210,9 +5211,12 @@ static __poll_t __io_arm_poll_handler(st
+
+ ipt->pt._key = mask;
+ ipt->req = req;
+- ipt->error = -EINVAL;
++ ipt->error = 0;
++ ipt->nr_entries = 0;
+
+ mask = vfs_poll(req->file, &ipt->pt) & poll->events;
++ if (unlikely(!ipt->nr_entries) && !ipt->error)
++ ipt->error = -EINVAL;
+
+ spin_lock_irq(&ctx->completion_lock);
+ if (likely(poll->head)) {
--- /dev/null
+From 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 20 Jul 2021 10:50:44 +0100
+Subject: io_uring: remove double poll entry on arm failure
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e upstream.
+
+__io_queue_proc() can enqueue both poll entries and still fail
+afterwards, so the callers trying to cancel it should also try to remove
+the second poll entry (if any).
+
+For example, it may leave the request alive referencing a io_uring
+context but not accessible for cancellation:
+
+[ 282.599913][ T1620] task:iou-sqp-23145 state:D stack:28720 pid:23155 ppid: 8844 flags:0x00004004
+[ 282.609927][ T1620] Call Trace:
+[ 282.613711][ T1620] __schedule+0x93a/0x26f0
+[ 282.634647][ T1620] schedule+0xd3/0x270
+[ 282.638874][ T1620] io_uring_cancel_generic+0x54d/0x890
+[ 282.660346][ T1620] io_sq_thread+0xaac/0x1250
+[ 282.696394][ T1620] ret_from_fork+0x1f/0x30
+
+Cc: stable@vger.kernel.org
+Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users")
+Reported-and-tested-by: syzbot+ac957324022b7132accf@syzkaller.appspotmail.com
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/0ec1228fc5eda4cb524eeda857da8efdc43c331c.1626774457.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5219,6 +5219,8 @@ static __poll_t __io_arm_poll_handler(st
+ ipt->error = -EINVAL;
+
+ spin_lock_irq(&ctx->completion_lock);
++ if (ipt->error)
++ io_poll_remove_double(req);
+ if (likely(poll->head)) {
+ spin_lock(&poll->head->lock);
+ if (unlikely(list_empty(&poll->wait.entry))) {
--- /dev/null
+From 09cfae9f13d51700b0fecf591dcd658fc5375428 Mon Sep 17 00:00:00 2001
+From: Markus Boehme <markubo@amazon.com>
+Date: Tue, 20 Jul 2021 16:26:19 -0700
+Subject: ixgbe: Fix packet corruption due to missing DMA sync
+
+From: Markus Boehme <markubo@amazon.com>
+
+commit 09cfae9f13d51700b0fecf591dcd658fc5375428 upstream.
+
+When receiving a packet with multiple fragments, hardware may still
+touch the first fragment until the entire packet has been received. The
+driver therefore keeps the first fragment mapped for DMA until end of
+packet has been asserted, and delays its dma_sync call until then.
+
+The driver tries to fit multiple receive buffers on one page. When using
+3K receive buffers (e.g. using Jumbo frames and legacy-rx is turned
+off/build_skb is being used) on an architecture with 4K pages, the
+driver allocates an order 1 compound page and uses one page per receive
+buffer. To determine the correct offset for a delayed DMA sync of the
+first fragment of a multi-fragment packet, the driver then cannot just
+use PAGE_MASK on the DMA address but has to construct a mask based on
+the actual size of the backing page.
+
+Using PAGE_MASK in the 3K RX buffer/4K page architecture configuration
+will always sync the first page of a compound page. With the SWIOTLB
+enabled this can lead to corrupted packets (zeroed out first fragment,
+re-used garbage from another packet) and various consequences, such as
+slow/stalling data transfers and connection resets. For example, testing
+on a link with MTU exceeding 3058 bytes on a host with SWIOTLB enabled
+(e.g. "iommu=soft swiotlb=262144,force") TCP transfers quickly fizzle
+out without this patch.
+
+Cc: stable@vger.kernel.org
+Fixes: 0c5661ecc5dd7 ("ixgbe: fix crash in build_skb Rx code path")
+Signed-off-by: Markus Boehme <markubo@amazon.com>
+Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct i
+ struct sk_buff *skb)
+ {
+ if (ring_uses_build_skb(rx_ring)) {
+- unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
++ unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1;
++ unsigned long offset = (unsigned long)(skb->data) & mask;
+
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ IXGBE_CB(skb)->dma,
--- /dev/null
+From 8d4abca95ecc82fc8c41912fa0085281f19cc29f Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Mon, 19 Apr 2021 18:43:32 -0500
+Subject: media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf()
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+commit 8d4abca95ecc82fc8c41912fa0085281f19cc29f upstream.
+
+Fix an 11-year old bug in ngene_command_config_free_buf() while
+addressing the following warnings caught with -Warray-bounds:
+
+arch/alpha/include/asm/string.h:22:16: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds]
+arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds]
+
+The problem is that the original code is trying to copy 6 bytes of
+data into a one-byte size member _config_ of the wrong structue
+FW_CONFIGURE_BUFFERS, in a single call to memcpy(). This causes a
+legitimate compiler warning because memcpy() overruns the length
+of &com.cmd.ConfigureBuffers.config. It seems that the right
+structure is FW_CONFIGURE_FREE_BUFFERS, instead, because it contains
+6 more members apart from the header _hdr_. Also, the name of
+the function ngene_command_config_free_buf() suggests that the actual
+intention is to ConfigureFreeBuffers, instead of ConfigureBuffers
+(which takes place in the function ngene_command_config_buf(), above).
+
+Fix this by enclosing those 6 members of struct FW_CONFIGURE_FREE_BUFFERS
+into new struct config, and use &com.cmd.ConfigureFreeBuffers.config as
+the destination address, instead of &com.cmd.ConfigureBuffers.config,
+when calling memcpy().
+
+This also helps with the ongoing efforts to globally enable
+-Warray-bounds and get us closer to being able to tighten the
+FORTIFY_SOURCE routines on memcpy().
+
+Link: https://github.com/KSPP/linux/issues/109
+Fixes: dae52d009fc9 ("V4L/DVB: ngene: Initial check-in")
+Cc: stable@vger.kernel.org
+Reported-by: kernel test robot <lkp@intel.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Link: https://lore.kernel.org/linux-hardening/20210420001631.GA45456@embeddedor/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/media/pci/ngene/ngene-core.c | 2 +-
+ drivers/media/pci/ngene/ngene.h | 14 ++++++++------
+ 2 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/drivers/media/pci/ngene/ngene-core.c
++++ b/drivers/media/pci/ngene/ngene-core.c
+@@ -385,7 +385,7 @@ static int ngene_command_config_free_buf
+
+ com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER;
+ com.cmd.hdr.Length = 6;
+- memcpy(&com.cmd.ConfigureBuffers.config, config, 6);
++ memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6);
+ com.in_len = 6;
+ com.out_len = 0;
+
+--- a/drivers/media/pci/ngene/ngene.h
++++ b/drivers/media/pci/ngene/ngene.h
+@@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS {
+
+ struct FW_CONFIGURE_FREE_BUFFERS {
+ struct FW_HEADER hdr;
+- u8 UVI1_BufferLength;
+- u8 UVI2_BufferLength;
+- u8 TVO_BufferLength;
+- u8 AUD1_BufferLength;
+- u8 AUD2_BufferLength;
+- u8 TVA_BufferLength;
++ struct {
++ u8 UVI1_BufferLength;
++ u8 UVI2_BufferLength;
++ u8 TVO_BufferLength;
++ u8 AUD1_BufferLength;
++ u8 AUD2_BufferLength;
++ u8 TVA_BufferLength;
++ } __packed config;
+ } __attribute__ ((__packed__));
+
+ struct FW_CONFIGURE_UART {
--- /dev/null
+From 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Thu, 3 Jun 2021 01:15:59 +0200
+Subject: posix-cpu-timers: Fix rearm racing against process tick
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 upstream.
+
+Since the process wide cputime counter is started locklessly from
+posix_cpu_timer_rearm(), it can be concurrently stopped by operations
+on other timers from the same thread group, such as in the following
+unlucky scenario:
+
+ CPU 0 CPU 1
+ ----- -----
+ timer_settime(TIMER B)
+ posix_cpu_timer_rearm(TIMER A)
+ cpu_clock_sample_group()
+ (pct->timers_active already true)
+
+ handle_posix_cpu_timers()
+ check_process_timers()
+ stop_process_timers()
+ pct->timers_active = false
+ arm_timer(TIMER A)
+
+ tick -> run_posix_cpu_timers()
+ // sees !pct->timers_active, ignore
+ // our TIMER A
+
+Fix this with simply locking process wide cputime counting start and
+timer arm in the same block.
+
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Fixes: 60f2ceaa8111 ("posix-cpu-timers: Remove unnecessary locking around cpu_clock_sample_group")
+Cc: stable@vger.kernel.org
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/posix-cpu-timers.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/time/posix-cpu-timers.c
++++ b/kernel/time/posix-cpu-timers.c
+@@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct
+ if (!p)
+ goto out;
+
++ /* Protect timer list r/w in arm_timer() */
++ sighand = lock_task_sighand(p, &flags);
++ if (unlikely(sighand == NULL))
++ goto out;
++
+ /*
+ * Fetch the current sample and update the timer's expiry time.
+ */
+@@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct
+
+ bump_cpu_timer(timer, now);
+
+- /* Protect timer list r/w in arm_timer() */
+- sighand = lock_task_sighand(p, &flags);
+- if (unlikely(sighand == NULL))
+- goto out;
+-
+ /*
+ * Now re-arm for the new expiry time.
+ */
--- /dev/null
+From 0db282ba2c12c1515d490d14a1ff696643ab0f1b Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 23 Jul 2021 15:50:04 -0700
+Subject: selftest: use mmap instead of posix_memalign to allocate memory
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit 0db282ba2c12c1515d490d14a1ff696643ab0f1b upstream.
+
+This test passes pointers obtained from anon_allocate_area to the
+userfaultfd and mremap APIs. This causes a problem if the system
+allocator returns tagged pointers because with the tagged address ABI
+the kernel rejects tagged addresses passed to these APIs, which would
+end up causing the test to fail. To make this test compatible with such
+system allocators, stop using the system allocator to allocate memory in
+anon_allocate_area, and instead just use mmap.
+
+Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com
+Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241
+Fixes: c47174fc362a ("userfaultfd: selftest")
+Co-developed-by: Lokesh Gidra <lokeshgidra@google.com>
+Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Cc: Dave Martin <Dave.Martin@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Alistair Delva <adelva@google.com>
+Cc: William McVicker <willmcvicker@google.com>
+Cc: Evgenii Stepanov <eugenis@google.com>
+Cc: Mitch Phillips <mitchp@google.com>
+Cc: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: <stable@vger.kernel.org> [5.4]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/vm/userfaultfd.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/tools/testing/selftests/vm/userfaultfd.c
++++ b/tools/testing/selftests/vm/userfaultfd.c
+@@ -180,8 +180,10 @@ static int anon_release_pages(char *rel_
+
+ static void anon_allocate_area(void **alloc_area)
+ {
+- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
+- fprintf(stderr, "out of memory\n");
++ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
++ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
++ if (*alloc_area == MAP_FAILED)
++ fprintf(stderr, "mmap of anonymous memory failed");
+ *alloc_area = NULL;
+ }
+ }
usb-dwc2-gadget-fix-goutnak-flow-for-slave-mode.patch
usb-dwc2-gadget-fix-sending-zero-length-packet-in-ddma-mode.patch
usb-typec-stusb160x-register-role-switch-before-interrupt-registration.patch
+firmware-efi-tell-memblock-about-efi-iomem-reservations.patch
+tracepoints-update-static_call-before-tp_funcs-when-adding-a-tracepoint.patch
+tracing-histogram-rename-cpu-to-common_cpu.patch
+tracing-fix-bug-in-rb_per_cpu_empty-that-might-cause-deadloop.patch
+tracing-synthetic-event-field_pos-is-an-index-not-a-boolean.patch
+btrfs-check-for-missing-device-in-btrfs_trim_fs.patch
+media-ngene-fix-out-of-bounds-bug-in-ngene_command_config_free_buf.patch
+ixgbe-fix-packet-corruption-due-to-missing-dma-sync.patch
+bus-mhi-core-validate-channel-id-when-processing-command-completions.patch
+posix-cpu-timers-fix-rearm-racing-against-process-tick.patch
+selftest-use-mmap-instead-of-posix_memalign-to-allocate-memory.patch
+io_uring-explicitly-count-entries-for-poll-reqs.patch
+io_uring-remove-double-poll-entry-on-arm-failure.patch
+userfaultfd-do-not-untag-user-pointers.patch
--- /dev/null
+From 352384d5c84ebe40fa77098cc234fe173247d8ef Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Thu, 22 Jul 2021 21:52:18 -0400
+Subject: tracepoints: Update static_call before tp_funcs when adding a tracepoint
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 352384d5c84ebe40fa77098cc234fe173247d8ef upstream.
+
+Because of the significant overhead that retpolines pose on indirect
+calls, the tracepoint code was updated to use the new "static_calls" that
+can modify the running code to directly call a function instead of using
+an indirect caller, and this function can be changed at runtime.
+
+In the tracepoint code that calls all the registered callbacks that are
+attached to a tracepoint, the following is done:
+
+ it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs);
+ if (it_func_ptr) {
+ __data = (it_func_ptr)->data;
+ static_call(tp_func_##name)(__data, args);
+ }
+
+If there's just a single callback, the static_call is updated to just call
+that callback directly. Once another handler is added, then the static
+caller is updated to call the iterator, that simply loops over all the
+funcs in the array and calls each of the callbacks like the old method
+using indirect calling.
+
+The issue was discovered with a race between updating the funcs array and
+updating the static_call. The funcs array was updated first and then the
+static_call was updated. This is not an issue as long as the first element
+in the old array is the same as the first element in the new array. But
+that assumption is incorrect, because callbacks also have a priority
+field, and if there's a callback added that has a higher priority than the
+callback on the old array, then it will become the first callback in the
+new array. This means that it is possible to call the old callback with
+the new callback data element, which can cause a kernel panic.
+
+ static_call = callback1()
+ funcs[] = {callback1,data1};
+ callback2 has higher priority than callback1
+
+ CPU 1 CPU 2
+ ----- -----
+
+ new_funcs = {callback2,data2},
+ {callback1,data1}
+
+ rcu_assign_pointer(tp->funcs, new_funcs);
+
+ /*
+ * Now tp->funcs has the new array
+ * but the static_call still calls callback1
+ */
+
+ it_func_ptr = tp->funcs [ new_funcs ]
+ data = it_func_ptr->data [ data2 ]
+ static_call(callback1, data);
+
+ /* Now callback1 is called with
+ * callback2's data */
+
+ [ KERNEL PANIC ]
+
+ update_static_call(iterator);
+
+To prevent this from happening, always switch the static_call to the
+iterator before assigning the tp->funcs to the new array. The iterator will
+always properly match the callback with its data.
+
+To trigger this bug:
+
+ In one terminal:
+
+ while :; do hackbench 50; done
+
+ In another terminal
+
+ echo 1 > /sys/kernel/tracing/events/sched/sched_waking/enable
+ while :; do
+ echo 1 > /sys/kernel/tracing/set_event_pid;
+ sleep 0.5
+ echo 0 > /sys/kernel/tracing/set_event_pid;
+ sleep 0.5
+ done
+
+And it doesn't take long to crash. This is because the set_event_pid adds
+a callback to the sched_waking tracepoint with a high priority, which will
+be called before the sched_waking trace event callback is called.
+
+Note, the removal to a single callback updates the array first, before
+changing the static_call to single callback, which is the proper order as
+the first element in the array is the same as what the static_call is
+being changed to.
+
+Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/
+
+Cc: stable@vger.kernel.org
+Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()")
+Reported-by: Stefan Metzmacher <metze@samba.org>
+tested-by: Stefan Metzmacher <metze@samba.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/tracepoint.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/tracepoint.c
++++ b/kernel/tracepoint.c
+@@ -320,8 +320,8 @@ static int tracepoint_add_func(struct tr
+ * a pointer to it. This array is referenced by __DO_TRACE from
+ * include/linux/tracepoint.h using rcu_dereference_sched().
+ */
+- rcu_assign_pointer(tp->funcs, tp_funcs);
+ tracepoint_update_call(tp, tp_funcs, false);
++ rcu_assign_pointer(tp->funcs, tp_funcs);
+ static_key_enable(&tp->key);
+
+ release_probes(old);
--- /dev/null
+From 67f0d6d9883c13174669f88adac4f0ee656cc16a Mon Sep 17 00:00:00 2001
+From: Haoran Luo <www@aegistudio.net>
+Date: Wed, 21 Jul 2021 14:12:07 +0000
+Subject: tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop.
+
+From: Haoran Luo <www@aegistudio.net>
+
+commit 67f0d6d9883c13174669f88adac4f0ee656cc16a upstream.
+
+The "rb_per_cpu_empty()" misinterpret the condition (as not-empty) when
+"head_page" and "commit_page" of "struct ring_buffer_per_cpu" points to
+the same buffer page, whose "buffer_data_page" is empty and "read" field
+is non-zero.
+
+An error scenario could be constructed as followed (kernel perspective):
+
+1. All pages in the buffer has been accessed by reader(s) so that all of
+them will have non-zero "read" field.
+
+2. Read and clear all buffer pages so that "rb_num_of_entries()" will
+return 0 rendering there's no more data to read. It is also required
+that the "read_page", "commit_page" and "tail_page" points to the same
+page, while "head_page" is the next page of them.
+
+3. Invoke "ring_buffer_lock_reserve()" with large enough "length"
+so that it shot pass the end of current tail buffer page. Now the
+"head_page", "commit_page" and "tail_page" points to the same page.
+
+4. Discard current event with "ring_buffer_discard_commit()", so that
+"head_page", "commit_page" and "tail_page" points to a page whose buffer
+data page is now empty.
+
+When the error scenario has been constructed, "tracing_read_pipe" will
+be trapped inside a deadloop: "trace_empty()" returns 0 since
+"rb_per_cpu_empty()" returns 0 when it hits the CPU containing such
+constructed ring buffer. Then "trace_find_next_entry_inc()" always
+return NULL since "rb_num_of_entries()" reports there's no more entry
+to read. Finally "trace_seq_to_user()" returns "-EBUSY" spanking
+"tracing_read_pipe" back to the start of the "waitagain" loop.
+
+I've also written a proof-of-concept script to construct the scenario
+and trigger the bug automatically, you can use it to trace and validate
+my reasoning above:
+
+ https://github.com/aegistudio/RingBufferDetonator.git
+
+Tests has been carried out on linux kernel 5.14-rc2
+(2734d6c1b1a089fb593ef6a23d4b70903526fe0c), my fixed version
+of kernel (for testing whether my update fixes the bug) and
+some older kernels (for range of affected kernels). Test result is
+also attached to the proof-of-concept repository.
+
+Link: https://lore.kernel.org/linux-trace-devel/YPaNxsIlb2yjSi5Y@aegistudio/
+Link: https://lore.kernel.org/linux-trace-devel/YPgrN85WL9VyrZ55@aegistudio
+
+Cc: stable@vger.kernel.org
+Fixes: bf41a158cacba ("ring-buffer: make reentrant")
+Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Signed-off-by: Haoran Luo <www@aegistudio.net>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c | 28 ++++++++++++++++++++++++----
+ 1 file changed, 24 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -3649,10 +3649,30 @@ static bool rb_per_cpu_empty(struct ring
+ if (unlikely(!head))
+ return true;
+
+- return reader->read == rb_page_commit(reader) &&
+- (commit == reader ||
+- (commit == head &&
+- head->read == rb_page_commit(commit)));
++ /* Reader should exhaust content in reader page */
++ if (reader->read != rb_page_commit(reader))
++ return false;
++
++ /*
++ * If writers are committing on the reader page, knowing all
++ * committed content has been read, the ring buffer is empty.
++ */
++ if (commit == reader)
++ return true;
++
++ /*
++ * If writers are committing on a page other than reader page
++ * and head page, there should always be content to read.
++ */
++ if (commit != head)
++ return false;
++
++ /*
++ * Writers are committing on the head page, we just need
++ * to care about there're committed data, and the reader will
++ * swap reader page with head page when it is to read data.
++ */
++ return rb_page_commit(commit) == 0;
+ }
+
+ /**
--- /dev/null
+From 1e3bac71c5053c99d438771fc9fa5082ae5d90aa Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Wed, 21 Jul 2021 11:00:53 -0400
+Subject: tracing/histogram: Rename "cpu" to "common_cpu"
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 1e3bac71c5053c99d438771fc9fa5082ae5d90aa upstream.
+
+Currently the histogram logic allows the user to write "cpu" in as an
+event field, and it will record the CPU that the event happened on.
+
+The problem with this is that there's a lot of events that have "cpu"
+as a real field, and using "cpu" as the CPU it ran on, makes it
+impossible to run histograms on the "cpu" field of events.
+
+For example, if I want to have a histogram on the count of the
+workqueue_queue_work event on its cpu field, running:
+
+ ># echo 'hist:keys=cpu' > events/workqueue/workqueue_queue_work/trigger
+
+Gives a misleading and wrong result.
+
+Change the command to "common_cpu" as no event should have "common_*"
+fields as that's a reserved name for fields used by all events. And
+this makes sense here as common_cpu would be a field used by all events.
+
+Now we can even do:
+
+ ># echo 'hist:keys=common_cpu,cpu if cpu < 100' > events/workqueue/workqueue_queue_work/trigger
+ ># cat events/workqueue/workqueue_queue_work/hist
+ # event histogram
+ #
+ # trigger info: hist:keys=common_cpu,cpu:vals=hitcount:sort=hitcount:size=2048 if cpu < 100 [active]
+ #
+
+ { common_cpu: 0, cpu: 2 } hitcount: 1
+ { common_cpu: 0, cpu: 4 } hitcount: 1
+ { common_cpu: 7, cpu: 7 } hitcount: 1
+ { common_cpu: 0, cpu: 7 } hitcount: 1
+ { common_cpu: 0, cpu: 1 } hitcount: 1
+ { common_cpu: 0, cpu: 6 } hitcount: 2
+ { common_cpu: 0, cpu: 5 } hitcount: 2
+ { common_cpu: 1, cpu: 1 } hitcount: 4
+ { common_cpu: 6, cpu: 6 } hitcount: 4
+ { common_cpu: 5, cpu: 5 } hitcount: 14
+ { common_cpu: 4, cpu: 4 } hitcount: 26
+ { common_cpu: 0, cpu: 0 } hitcount: 39
+ { common_cpu: 2, cpu: 2 } hitcount: 184
+
+Now for backward compatibility, I added a trick. If "cpu" is used, and
+the field is not found, it will fall back to "common_cpu" and work as
+it did before. This way, it will still work for old programs that use
+"cpu" to get the actual CPU, but if the event has a "cpu" as a field, it
+will get that event's "cpu" field, which is probably what it wants
+anyway.
+
+I updated the tracefs/README to include documentation about both the
+common_timestamp and the common_cpu. This way, if that text is present in
+the README, then an application can know that common_cpu is supported over
+just plain "cpu".
+
+Link: https://lkml.kernel.org/r/20210721110053.26b4f641@oasis.local.home
+
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: 8b7622bf94a44 ("tracing: Add cpu field for hist triggers")
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/trace/histogram.rst | 2 +-
+ kernel/trace/trace.c | 4 ++++
+ kernel/trace/trace_events_hist.c | 22 ++++++++++++++++------
+ 3 files changed, 21 insertions(+), 7 deletions(-)
+
+--- a/Documentation/trace/histogram.rst
++++ b/Documentation/trace/histogram.rst
+@@ -191,7 +191,7 @@ Documentation written by Tom Zanussi
+ with the event, in nanoseconds. May be
+ modified by .usecs to have timestamps
+ interpreted as microseconds.
+- cpu int the cpu on which the event occurred.
++ common_cpu int the cpu on which the event occurred.
+ ====================== ==== =======================================
+
+ Extended error information
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5241,6 +5241,10 @@ static const char readme_msg[] =
+ "\t [:name=histname1]\n"
+ "\t [:<handler>.<action>]\n"
+ "\t [if <filter>]\n\n"
++ "\t Note, special fields can be used as well:\n"
++ "\t common_timestamp - to record current timestamp\n"
++ "\t common_cpu - to record the CPU the event happened on\n"
++ "\n"
+ "\t When a matching event is hit, an entry is added to a hash\n"
+ "\t table using the key(s) and value(s) named, and the value of a\n"
+ "\t sum called 'hitcount' is incremented. Keys and values\n"
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -1095,7 +1095,7 @@ static const char *hist_field_name(struc
+ field->flags & HIST_FIELD_FL_ALIAS)
+ field_name = hist_field_name(field->operands[0], ++level);
+ else if (field->flags & HIST_FIELD_FL_CPU)
+- field_name = "cpu";
++ field_name = "common_cpu";
+ else if (field->flags & HIST_FIELD_FL_EXPR ||
+ field->flags & HIST_FIELD_FL_VAR_REF) {
+ if (field->system) {
+@@ -1975,14 +1975,24 @@ parse_field(struct hist_trigger_data *hi
+ hist_data->enable_timestamps = true;
+ if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ hist_data->attrs->ts_in_usecs = true;
+- } else if (strcmp(field_name, "cpu") == 0)
++ } else if (strcmp(field_name, "common_cpu") == 0)
+ *flags |= HIST_FIELD_FL_CPU;
+ else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+- hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name));
+- field = ERR_PTR(-EINVAL);
+- goto out;
++ /*
++ * For backward compatibility, if field_name
++ * was "cpu", then we treat this the same as
++ * common_cpu.
++ */
++ if (strcmp(field_name, "cpu") == 0) {
++ *flags |= HIST_FIELD_FL_CPU;
++ } else {
++ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
++ errpos(field_name));
++ field = ERR_PTR(-EINVAL);
++ goto out;
++ }
+ }
+ }
+ out:
+@@ -5057,7 +5067,7 @@ static void hist_field_print(struct seq_
+ seq_printf(m, "%s=", hist_field->var.name);
+
+ if (hist_field->flags & HIST_FIELD_FL_CPU)
+- seq_puts(m, "cpu");
++ seq_puts(m, "common_cpu");
+ else if (field_name) {
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
+ hist_field->flags & HIST_FIELD_FL_ALIAS)
--- /dev/null
+From 3b13911a2fd0dd0146c9777a254840c5466cf120 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Wed, 21 Jul 2021 19:10:08 -0400
+Subject: tracing: Synthetic event field_pos is an index not a boolean
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 3b13911a2fd0dd0146c9777a254840c5466cf120 upstream.
+
+Performing the following:
+
+ ># echo 'wakeup_lat s32 pid; u64 delta; char wake_comm[]' > synthetic_events
+ ># echo 'hist:keys=pid:__arg__1=common_timestamp.usecs' > events/sched/sched_waking/trigger
+ ># echo 'hist:keys=next_pid:pid=next_pid,delta=common_timestamp.usecs-$__arg__1:onmatch(sched.sched_waking).trace(wakeup_lat,$pid,$delta,prev_comm)'\
+ > events/sched/sched_switch/trigger
+ ># echo 1 > events/synthetic/enable
+
+Crashed the kernel:
+
+ BUG: kernel NULL pointer dereference, address: 000000000000001b
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] PREEMPT SMP
+ CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.13.0-rc5-test+ #104
+ Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
+ RIP: 0010:strlen+0x0/0x20
+ Code: f6 82 80 2b 0b bc 20 74 11 0f b6 50 01 48 83 c0 01 f6 82 80 2b 0b bc
+ 20 75 ef c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <80> 3f 00 74 10
+ 48 89 f8 48 83 c0 01 80 38 9 f8 c3 31
+ RSP: 0018:ffffaa75000d79d0 EFLAGS: 00010046
+ RAX: 0000000000000002 RBX: ffff9cdb55575270 RCX: 0000000000000000
+ RDX: ffff9cdb58c7a320 RSI: ffffaa75000d7b40 RDI: 000000000000001b
+ RBP: ffffaa75000d7b40 R08: ffff9cdb40a4f010 R09: ffffaa75000d7ab8
+ R10: ffff9cdb4398c700 R11: 0000000000000008 R12: ffff9cdb58c7a320
+ R13: ffff9cdb55575270 R14: ffff9cdb58c7a000 R15: 0000000000000018
+ FS: 0000000000000000(0000) GS:ffff9cdb5aa00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000000000000001b CR3: 00000000c0612006 CR4: 00000000001706e0
+ Call Trace:
+ trace_event_raw_event_synth+0x90/0x1d0
+ action_trace+0x5b/0x70
+ event_hist_trigger+0x4bd/0x4e0
+ ? cpumask_next_and+0x20/0x30
+ ? update_sd_lb_stats.constprop.0+0xf6/0x840
+ ? __lock_acquire.constprop.0+0x125/0x550
+ ? find_held_lock+0x32/0x90
+ ? sched_clock_cpu+0xe/0xd0
+ ? lock_release+0x155/0x440
+ ? update_load_avg+0x8c/0x6f0
+ ? enqueue_entity+0x18a/0x920
+ ? __rb_reserve_next+0xe5/0x460
+ ? ring_buffer_lock_reserve+0x12a/0x3f0
+ event_triggers_call+0x52/0xe0
+ trace_event_buffer_commit+0x1ae/0x240
+ trace_event_raw_event_sched_switch+0x114/0x170
+ __traceiter_sched_switch+0x39/0x50
+ __schedule+0x431/0xb00
+ schedule_idle+0x28/0x40
+ do_idle+0x198/0x2e0
+ cpu_startup_entry+0x19/0x20
+ secondary_startup_64_no_verify+0xc2/0xcb
+
+The reason is that the dynamic events array keeps track of the field
+position of the fields array, via the field_pos variable in the
+synth_field structure. Unfortunately, that field is a boolean for some
+reason, which means any field_pos greater than 1 will be a bug (in this
+case it was 2).
+
+Link: https://lkml.kernel.org/r/20210721191008.638bce34@oasis.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: bd82631d7ccdc ("tracing: Add support for dynamic strings to synthetic events")
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_synth.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_synth.h
++++ b/kernel/trace/trace_synth.h
+@@ -14,10 +14,10 @@ struct synth_field {
+ char *name;
+ size_t size;
+ unsigned int offset;
++ unsigned int field_pos;
+ bool is_signed;
+ bool is_string;
+ bool is_dynamic;
+- bool field_pos;
+ };
+
+ struct synth_event {
--- /dev/null
+From e71e2ace5721a8b921dca18b045069e7bb411277 Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 23 Jul 2021 15:50:01 -0700
+Subject: userfaultfd: do not untag user pointers
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit e71e2ace5721a8b921dca18b045069e7bb411277 upstream.
+
+Patch series "userfaultfd: do not untag user pointers", v5.
+
+If a user program uses userfaultfd on ranges of heap memory, it may end
+up passing a tagged pointer to the kernel in the range.start field of
+the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable
+allocator, or on Android if using the Tagged Pointers feature for MTE
+readiness [1].
+
+When a fault subsequently occurs, the tag is stripped from the fault
+address returned to the application in the fault.address field of struct
+uffd_msg. However, from the application's perspective, the tagged
+address *is* the memory address, so if the application is unaware of
+memory tags, it may get confused by receiving an address that is, from
+its point of view, outside of the bounds of the allocation. We observed
+this behavior in the kselftest for userfaultfd [2] but other
+applications could have the same problem.
+
+Address this by not untagging pointers passed to the userfaultfd ioctls.
+Instead, let the system call fail. Also change the kselftest to use
+mmap so that it doesn't encounter this problem.
+
+[1] https://source.android.com/devices/tech/debug/tagged-pointers
+[2] tools/testing/selftests/vm/userfaultfd.c
+
+This patch (of 2):
+
+Do not untag pointers passed to the userfaultfd ioctls. Instead, let
+the system call fail. This will provide an early indication of problems
+with tag-unaware userspace code instead of letting the code get confused
+later, and is consistent with how we decided to handle brk/mmap/mremap
+in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in
+brk()/mmap()/mremap()"), as well as being consistent with the existing
+tagged address ABI documentation relating to how ioctl arguments are
+handled.
+
+The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag
+user pointers") plus some fixups to some additional calls to
+validate_range that have appeared since then.
+
+[1] https://source.android.com/devices/tech/debug/tagged-pointers
+[2] tools/testing/selftests/vm/userfaultfd.c
+
+Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com
+Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com
+Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b
+Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI")
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Alistair Delva <adelva@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Dave Martin <Dave.Martin@arm.com>
+Cc: Evgenii Stepanov <eugenis@google.com>
+Cc: Lokesh Gidra <lokeshgidra@google.com>
+Cc: Mitch Phillips <mitchp@google.com>
+Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: William McVicker <willmcvicker@google.com>
+Cc: <stable@vger.kernel.org> [5.4]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/arm64/tagged-address-abi.rst | 26 ++++++++++++++++++--------
+ fs/userfaultfd.c | 24 +++++++++++-------------
+ 2 files changed, 29 insertions(+), 21 deletions(-)
+
+--- a/Documentation/arm64/tagged-address-abi.rst
++++ b/Documentation/arm64/tagged-address-abi.rst
+@@ -45,14 +45,24 @@ how the user addresses are used by the k
+
+ 1. User addresses not accessed by the kernel but used for address space
+ management (e.g. ``mprotect()``, ``madvise()``). The use of valid
+- tagged pointers in this context is allowed with the exception of
+- ``brk()``, ``mmap()`` and the ``new_address`` argument to
+- ``mremap()`` as these have the potential to alias with existing
+- user addresses.
+-
+- NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+- incorrectly accept valid tagged pointers for the ``brk()``,
+- ``mmap()`` and ``mremap()`` system calls.
++ tagged pointers in this context is allowed with these exceptions:
++
++ - ``brk()``, ``mmap()`` and the ``new_address`` argument to
++ ``mremap()`` as these have the potential to alias with existing
++ user addresses.
++
++ NOTE: This behaviour changed in v5.6 and so some earlier kernels may
++ incorrectly accept valid tagged pointers for the ``brk()``,
++ ``mmap()`` and ``mremap()`` system calls.
++
++ - The ``range.start``, ``start`` and ``dst`` arguments to the
++ ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
++ ``userfaultfd()``, as fault addresses subsequently obtained by reading
++ the file descriptor will be untagged, which may otherwise confuse
++ tag-unaware programs.
++
++ NOTE: This behaviour changed in v5.14 and so some earlier kernels may
++ incorrectly accept valid tagged pointers for this system call.
+
+ 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
+ relaxation is disabled by default and the application thread needs to
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1228,23 +1228,21 @@ static __always_inline void wake_userfau
+ }
+
+ static __always_inline int validate_range(struct mm_struct *mm,
+- __u64 *start, __u64 len)
++ __u64 start, __u64 len)
+ {
+ __u64 task_size = mm->task_size;
+
+- *start = untagged_addr(*start);
+-
+- if (*start & ~PAGE_MASK)
++ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ if (len & ~PAGE_MASK)
+ return -EINVAL;
+ if (!len)
+ return -EINVAL;
+- if (*start < mmap_min_addr)
++ if (start < mmap_min_addr)
+ return -EINVAL;
+- if (*start >= task_size)
++ if (start >= task_size)
+ return -EINVAL;
+- if (len > task_size - *start)
++ if (len > task_size - start)
+ return -EINVAL;
+ return 0;
+ }
+@@ -1290,7 +1288,7 @@ static int userfaultfd_register(struct u
+ if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)
+ vm_flags |= VM_UFFD_WP;
+
+- ret = validate_range(mm, &uffdio_register.range.start,
++ ret = validate_range(mm, uffdio_register.range.start,
+ uffdio_register.range.len);
+ if (ret)
+ goto out;
+@@ -1490,7 +1488,7 @@ static int userfaultfd_unregister(struct
+ if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
+ goto out;
+
+- ret = validate_range(mm, &uffdio_unregister.start,
++ ret = validate_range(mm, uffdio_unregister.start,
+ uffdio_unregister.len);
+ if (ret)
+ goto out;
+@@ -1639,7 +1637,7 @@ static int userfaultfd_wake(struct userf
+ if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
+ goto out;
+
+- ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
++ ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
+ if (ret)
+ goto out;
+
+@@ -1679,7 +1677,7 @@ static int userfaultfd_copy(struct userf
+ sizeof(uffdio_copy)-sizeof(__s64)))
+ goto out;
+
+- ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
++ ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
+ if (ret)
+ goto out;
+ /*
+@@ -1736,7 +1734,7 @@ static int userfaultfd_zeropage(struct u
+ sizeof(uffdio_zeropage)-sizeof(__s64)))
+ goto out;
+
+- ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
++ ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
+ uffdio_zeropage.range.len);
+ if (ret)
+ goto out;
+@@ -1786,7 +1784,7 @@ static int userfaultfd_writeprotect(stru
+ sizeof(struct uffdio_writeprotect)))
+ return -EFAULT;
+
+- ret = validate_range(ctx->mm, &uffdio_wp.range.start,
++ ret = validate_range(ctx->mm, uffdio_wp.range.start,
+ uffdio_wp.range.len);
+ if (ret)
+ return ret;