]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.9
authorSasha Levin <sashal@kernel.org>
Tue, 10 Nov 2020 15:06:09 +0000 (10:06 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 10 Nov 2020 15:06:09 +0000 (10:06 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
18 files changed:
queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch [new file with mode: 0644]
queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch [new file with mode: 0644]
queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch [new file with mode: 0644]
queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch [new file with mode: 0644]
queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch [new file with mode: 0644]
queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch [new file with mode: 0644]
queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch [new file with mode: 0644]
queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch [new file with mode: 0644]
queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch [new file with mode: 0644]
queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch [new file with mode: 0644]
queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch [new file with mode: 0644]
queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch [new file with mode: 0644]
queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch [new file with mode: 0644]
queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch [new file with mode: 0644]
queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch [new file with mode: 0644]
queue-4.9/series [new file with mode: 0644]
queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch [new file with mode: 0644]
queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch [new file with mode: 0644]

diff --git a/queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch b/queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch
new file mode 100644 (file)
index 0000000..27b5460
--- /dev/null
@@ -0,0 +1,37 @@
+From 45b673adb012c0e7669c555540e1d9bd40805a71 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Nov 2020 13:18:07 +0300
+Subject: ALSA: hda: prevent undefined shift in snd_hdac_ext_bus_get_link()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 158e1886b6262c1d1c96a18c85fac5219b8bf804 ]
+
+This is harmless, but the "addr" comes from the user and it could lead
+to a negative shift or to shift wrapping if it's too high.
+
+Fixes: 0b00a5615dc4 ("ALSA: hdac_ext: add hdac extended controller")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Link: https://lore.kernel.org/r/20201103101807.GC1127762@mwanda
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/hda/ext/hdac_ext_controller.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
+index 261469188566c..49d42971d90da 100644
+--- a/sound/hda/ext/hdac_ext_controller.c
++++ b/sound/hda/ext/hdac_ext_controller.c
+@@ -155,6 +155,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus,
+               return NULL;
+       if (ebus->idx != bus_idx)
+               return NULL;
++      if (addr < 0 || addr > 31)
++              return NULL;
+       list_for_each_entry(hlink, &ebus->hlink_list, list) {
+               for (i = 0; i < HDA_MAX_CODECS; i++) {
+-- 
+2.27.0
+
diff --git a/queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch b/queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch
new file mode 100644 (file)
index 0000000..28c5fbb
--- /dev/null
@@ -0,0 +1,98 @@
+From d021b0ca798d44f45f4377a1120ea6a3a9ee2ebf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Sep 2020 17:27:29 +0900
+Subject: btrfs: reschedule when cloning lots of extents
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+[ Upstream commit 6b613cc97f0ace77f92f7bc112b8f6ad3f52baf8 ]
+
+We have several occurrences of a soft lockup from fstest's generic/175
+testcase, which look more or less like this one:
+
+  watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [xfs_io:10030]
+  Kernel panic - not syncing: softlockup: hung tasks
+  CPU: 0 PID: 10030 Comm: xfs_io Tainted: G             L    5.9.0-rc5+ #768
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014
+  Call Trace:
+   <IRQ>
+   dump_stack+0x77/0xa0
+   panic+0xfa/0x2cb
+   watchdog_timer_fn.cold+0x85/0xa5
+   ? lockup_detector_update_enable+0x50/0x50
+   __hrtimer_run_queues+0x99/0x4c0
+   ? recalibrate_cpu_khz+0x10/0x10
+   hrtimer_run_queues+0x9f/0xb0
+   update_process_times+0x28/0x80
+   tick_handle_periodic+0x1b/0x60
+   __sysvec_apic_timer_interrupt+0x76/0x210
+   asm_call_on_stack+0x12/0x20
+   </IRQ>
+   sysvec_apic_timer_interrupt+0x7f/0x90
+   asm_sysvec_apic_timer_interrupt+0x12/0x20
+  RIP: 0010:btrfs_tree_unlock+0x91/0x1a0 [btrfs]
+  RSP: 0018:ffffc90007123a58 EFLAGS: 00000282
+  RAX: ffff8881cea2fbe0 RBX: ffff8881cea2fbe0 RCX: 0000000000000000
+  RDX: ffff8881d23fd200 RSI: ffffffff82045220 RDI: ffff8881cea2fba0
+  RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000032
+  R10: 0000160000000000 R11: 0000000000001000 R12: 0000000000001000
+  R13: ffff8882357fd5b0 R14: ffff88816fa76e70 R15: ffff8881cea2fad0
+   ? btrfs_tree_unlock+0x15b/0x1a0 [btrfs]
+   btrfs_release_path+0x67/0x80 [btrfs]
+   btrfs_insert_replace_extent+0x177/0x2c0 [btrfs]
+   btrfs_replace_file_extents+0x472/0x7c0 [btrfs]
+   btrfs_clone+0x9ba/0xbd0 [btrfs]
+   btrfs_clone_files.isra.0+0xeb/0x140 [btrfs]
+   ? file_update_time+0xcd/0x120
+   btrfs_remap_file_range+0x322/0x3b0 [btrfs]
+   do_clone_file_range+0xb7/0x1e0
+   vfs_clone_file_range+0x30/0xa0
+   ioctl_file_clone+0x8a/0xc0
+   do_vfs_ioctl+0x5b2/0x6f0
+   __x64_sys_ioctl+0x37/0xa0
+   do_syscall_64+0x33/0x40
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x7f87977fc247
+  RSP: 002b:00007ffd51a2f6d8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
+  RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f87977fc247
+  RDX: 00007ffd51a2f710 RSI: 000000004020940d RDI: 0000000000000003
+  RBP: 0000000000000004 R08: 00007ffd51a79080 R09: 0000000000000000
+  R10: 00005621f11352f2 R11: 0000000000000206 R12: 0000000000000000
+  R13: 0000000000000000 R14: 00005621f128b958 R15: 0000000080000000
+  Kernel Offset: disabled
+  ---[ end Kernel panic - not syncing: softlockup: hung tasks ]---
+
+All of these lockup reports have the call chain btrfs_clone_files() ->
+btrfs_clone() in common. btrfs_clone_files() calls btrfs_clone() with
+both source and destination extents locked and loops over the source
+extent to create the clones.
+
+Conditionally reschedule in the btrfs_clone() loop, to give some time back
+to other processes.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 981091bd6c3c4..ebca009030c3a 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3854,6 +3854,8 @@ process_slot:
+                       ret = -EINTR;
+                       goto out;
+               }
++
++              cond_resched();
+       }
+       ret = 0;
+-- 
+2.27.0
+
diff --git a/queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch b/queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch
new file mode 100644 (file)
index 0000000..4352dff
--- /dev/null
@@ -0,0 +1,192 @@
+From af859eca833c6eb04908ddd3bb232731f8c6cb3a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Sep 2020 08:09:01 -0400
+Subject: btrfs: sysfs: init devices outside of the chunk_mutex
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit ca10845a56856fff4de3804c85e6424d0f6d0cde ]
+
+While running btrfs/061, btrfs/073, btrfs/078, or btrfs/178 we hit the
+following lockdep splat:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.9.0-rc3+ #4 Not tainted
+  ------------------------------------------------------
+  kswapd0/100 is trying to acquire lock:
+  ffff96ecc22ef4a0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x330
+
+  but task is already holding lock:
+  ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #3 (fs_reclaim){+.+.}-{0:0}:
+        fs_reclaim_acquire+0x65/0x80
+        slab_pre_alloc_hook.constprop.0+0x20/0x200
+        kmem_cache_alloc+0x37/0x270
+        alloc_inode+0x82/0xb0
+        iget_locked+0x10d/0x2c0
+        kernfs_get_inode+0x1b/0x130
+        kernfs_get_tree+0x136/0x240
+        sysfs_get_tree+0x16/0x40
+        vfs_get_tree+0x28/0xc0
+        path_mount+0x434/0xc00
+        __x64_sys_mount+0xe3/0x120
+        do_syscall_64+0x33/0x40
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #2 (kernfs_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x7e/0x7e0
+        kernfs_add_one+0x23/0x150
+        kernfs_create_link+0x63/0xa0
+        sysfs_do_create_link_sd+0x5e/0xd0
+        btrfs_sysfs_add_devices_dir+0x81/0x130
+        btrfs_init_new_device+0x67f/0x1250
+        btrfs_ioctl+0x1ef/0x2e20
+        __x64_sys_ioctl+0x83/0xb0
+        do_syscall_64+0x33/0x40
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x7e/0x7e0
+        btrfs_chunk_alloc+0x125/0x3a0
+        find_free_extent+0xdf6/0x1210
+        btrfs_reserve_extent+0xb3/0x1b0
+        btrfs_alloc_tree_block+0xb0/0x310
+        alloc_tree_block_no_bg_flush+0x4a/0x60
+        __btrfs_cow_block+0x11a/0x530
+        btrfs_cow_block+0x104/0x220
+        btrfs_search_slot+0x52e/0x9d0
+        btrfs_insert_empty_items+0x64/0xb0
+        btrfs_insert_delayed_items+0x90/0x4f0
+        btrfs_commit_inode_delayed_items+0x93/0x140
+        btrfs_log_inode+0x5de/0x2020
+        btrfs_log_inode_parent+0x429/0xc90
+        btrfs_log_new_name+0x95/0x9b
+        btrfs_rename2+0xbb9/0x1800
+        vfs_rename+0x64f/0x9f0
+        do_renameat2+0x320/0x4e0
+        __x64_sys_rename+0x1f/0x30
+        do_syscall_64+0x33/0x40
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #0 (&delayed_node->mutex){+.+.}-{3:3}:
+        __lock_acquire+0x119c/0x1fc0
+        lock_acquire+0xa7/0x3d0
+        __mutex_lock+0x7e/0x7e0
+        __btrfs_release_delayed_node.part.0+0x3f/0x330
+        btrfs_evict_inode+0x24c/0x500
+        evict+0xcf/0x1f0
+        dispose_list+0x48/0x70
+        prune_icache_sb+0x44/0x50
+        super_cache_scan+0x161/0x1e0
+        do_shrink_slab+0x178/0x3c0
+        shrink_slab+0x17c/0x290
+        shrink_node+0x2b2/0x6d0
+        balance_pgdat+0x30a/0x670
+        kswapd+0x213/0x4c0
+        kthread+0x138/0x160
+        ret_from_fork+0x1f/0x30
+
+  other info that might help us debug this:
+
+  Chain exists of:
+    &delayed_node->mutex --> kernfs_mutex --> fs_reclaim
+
+   Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+    lock(fs_reclaim);
+                                lock(kernfs_mutex);
+                                lock(fs_reclaim);
+    lock(&delayed_node->mutex);
+
+   *** DEADLOCK ***
+
+  3 locks held by kswapd0/100:
+   #0: ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30
+   #1: ffffffff8dd65c50 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x115/0x290
+   #2: ffff96ed2ade30e0 (&type->s_umount_key#36){++++}-{3:3}, at: super_cache_scan+0x38/0x1e0
+
+  stack backtrace:
+  CPU: 0 PID: 100 Comm: kswapd0 Not tainted 5.9.0-rc3+ #4
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
+  Call Trace:
+   dump_stack+0x8b/0xb8
+   check_noncircular+0x12d/0x150
+   __lock_acquire+0x119c/0x1fc0
+   lock_acquire+0xa7/0x3d0
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x330
+   __mutex_lock+0x7e/0x7e0
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x330
+   ? __btrfs_release_delayed_node.part.0+0x3f/0x330
+   ? lock_acquire+0xa7/0x3d0
+   ? find_held_lock+0x2b/0x80
+   __btrfs_release_delayed_node.part.0+0x3f/0x330
+   btrfs_evict_inode+0x24c/0x500
+   evict+0xcf/0x1f0
+   dispose_list+0x48/0x70
+   prune_icache_sb+0x44/0x50
+   super_cache_scan+0x161/0x1e0
+   do_shrink_slab+0x178/0x3c0
+   shrink_slab+0x17c/0x290
+   shrink_node+0x2b2/0x6d0
+   balance_pgdat+0x30a/0x670
+   kswapd+0x213/0x4c0
+   ? _raw_spin_unlock_irqrestore+0x41/0x50
+   ? add_wait_queue_exclusive+0x70/0x70
+   ? balance_pgdat+0x670/0x670
+   kthread+0x138/0x160
+   ? kthread_create_worker_on_cpu+0x40/0x40
+   ret_from_fork+0x1f/0x30
+
+This happens because we are holding the chunk_mutex at the time of
+adding in a new device.  However we only need to hold the
+device_list_mutex, as we're going to iterate over the fs_devices
+devices.  Move the sysfs init stuff outside of the chunk_mutex to get
+rid of this lockdep splat.
+
+CC: stable@vger.kernel.org # 4.4.x: f3cd2c58110dad14e: btrfs: sysfs, rename device_link add/remove functions
+CC: stable@vger.kernel.org # 4.4.x
+Reported-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index c31b02692f706..56ae889fb44f2 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2431,9 +2431,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+       btrfs_set_super_num_devices(root->fs_info->super_copy,
+                                   tmp + 1);
+-      /* add sysfs device entry */
+-      btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
+-
+       /*
+        * we've got more storage, clear any full flags on the space
+        * infos
+@@ -2441,6 +2438,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+       btrfs_clear_space_info_full(root->fs_info);
+       unlock_chunks(root);
++
++      /* Add sysfs device entry */
++      btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
++
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+       if (seeding_dev) {
+-- 
+2.27.0
+
diff --git a/queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch b/queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch
new file mode 100644 (file)
index 0000000..6b0440a
--- /dev/null
@@ -0,0 +1,99 @@
+From a613b860c1bb953b039ddfe313f03fffc64aedb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2019 09:39:02 +0100
+Subject: can: can_create_echo_skb(): fix echo skb generation: always use
+ skb_clone()
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 ]
+
+All user space generated SKBs are owned by a socket (unless injected into the
+key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned
+up.
+
+This leads to a problem when a CAN driver calls can_put_echo_skb() on a
+unshared SKB. If the socket is closed prior to the TX complete handler,
+can_get_echo_skb() and the subsequent delivering of the echo SKB to all
+registered callbacks, a SKB with a refcount of 0 is delivered.
+
+To avoid the problem, in can_get_echo_skb() the original SKB is now always
+cloned, regardless of shared SKB or not. If the process exists it can now
+safely discard its SKBs, without disturbing the delivery of the echo SKB.
+
+The problem shows up in the j1939 stack, when it clones the incoming skb, which
+detects the already 0 refcount.
+
+We can easily reproduce this with following example:
+
+testj1939 -B -r can0: &
+cansend can0 1823ff40#0123
+
+WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174
+refcount_t: addition on 0; use-after-free.
+Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan
+CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1
+Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
+Backtrace:
+[<c010f570>] (dump_backtrace) from [<c010f90c>] (show_stack+0x20/0x24)
+[<c010f8ec>] (show_stack) from [<c0c3e1a4>] (dump_stack+0x8c/0xa0)
+[<c0c3e118>] (dump_stack) from [<c0127fec>] (__warn+0xe0/0x108)
+[<c0127f0c>] (__warn) from [<c01283c8>] (warn_slowpath_fmt+0xa8/0xcc)
+[<c0128324>] (warn_slowpath_fmt) from [<c0539c0c>] (refcount_warn_saturate+0x108/0x174)
+[<c0539b04>] (refcount_warn_saturate) from [<c0ad2cac>] (j1939_can_recv+0x20c/0x210)
+[<c0ad2aa0>] (j1939_can_recv) from [<c0ac9dc8>] (can_rcv_filter+0xb4/0x268)
+[<c0ac9d14>] (can_rcv_filter) from [<c0aca2cc>] (can_receive+0xb0/0xe4)
+[<c0aca21c>] (can_receive) from [<c0aca348>] (can_rcv+0x48/0x98)
+[<c0aca300>] (can_rcv) from [<c09b1fdc>] (__netif_receive_skb_one_core+0x64/0x88)
+[<c09b1f78>] (__netif_receive_skb_one_core) from [<c09b2070>] (__netif_receive_skb+0x38/0x94)
+[<c09b2038>] (__netif_receive_skb) from [<c09b2130>] (netif_receive_skb_internal+0x64/0xf8)
+[<c09b20cc>] (netif_receive_skb_internal) from [<c09b21f8>] (netif_receive_skb+0x34/0x19c)
+[<c09b21c4>] (netif_receive_skb) from [<c0791278>] (can_rx_offload_napi_poll+0x58/0xb4)
+
+Fixes: 0ae89beb283a ("can: add destructor for self generated skbs")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de
+Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/can/skb.h | 20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
+index 51bb6532785c3..1a2111c775ae1 100644
+--- a/include/linux/can/skb.h
++++ b/include/linux/can/skb.h
+@@ -60,21 +60,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
+  */
+ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
+ {
+-      if (skb_shared(skb)) {
+-              struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
++      struct sk_buff *nskb;
+-              if (likely(nskb)) {
+-                      can_skb_set_owner(nskb, skb->sk);
+-                      consume_skb(skb);
+-                      return nskb;
+-              } else {
+-                      kfree_skb(skb);
+-                      return NULL;
+-              }
++      nskb = skb_clone(skb, GFP_ATOMIC);
++      if (unlikely(!nskb)) {
++              kfree_skb(skb);
++              return NULL;
+       }
+-      /* we can assume to have an unshared skb with proper owner */
+-      return skb;
++      can_skb_set_owner(nskb, skb->sk);
++      consume_skb(skb);
++      return nskb;
+ }
+ #endif /* !_CAN_SKB_H */
+-- 
+2.27.0
+
diff --git a/queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch b/queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch
new file mode 100644 (file)
index 0000000..ae7637d
--- /dev/null
@@ -0,0 +1,49 @@
+From e0be8a352fdb30df35547909136e2ce7ec21caa5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Oct 2020 08:44:43 +0200
+Subject: can: dev: __can_get_echo_skb(): fix real payload length return value
+ for RTR frames
+
+From: Oliver Hartkopp <socketcan@hartkopp.net>
+
+[ Upstream commit ed3320cec279407a86bc4c72edc4a39eb49165ec ]
+
+The can_get_echo_skb() function returns the number of received bytes to
+be used for netdev statistics. In the case of RTR frames we get a valid
+(potential non-zero) data length value which has to be passed for further
+operations. But on the wire RTR frames have no payload length. Therefore
+the value to be used in the statistics has to be zero for RTR frames.
+
+Reported-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Link: https://lore.kernel.org/r/20201020064443.80164-1-socketcan@hartkopp.net
+Fixes: cf5046b309b3 ("can: dev: let can_get_echo_skb() return dlc of CAN frame")
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/dev.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index aa2158fabf2ac..617eb75c7c0ce 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -469,9 +469,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
+                */
+               struct sk_buff *skb = priv->echo_skb[idx];
+               struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+-              u8 len = cf->len;
+-              *len_ptr = len;
++              /* get the real payload length for netdev statistics */
++              if (cf->can_id & CAN_RTR_FLAG)
++                      *len_ptr = 0;
++              else
++                      *len_ptr = cf->len;
++
+               priv->echo_skb[idx] = NULL;
+               return skb;
+-- 
+2.27.0
+
diff --git a/queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch b/queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch
new file mode 100644 (file)
index 0000000..c62f47d
--- /dev/null
@@ -0,0 +1,67 @@
+From 1c1f66058cf76240b374c6d0d6c77bec7e0a38ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 3 Oct 2020 00:41:45 +0900
+Subject: can: dev: can_get_echo_skb(): prevent call to kfree_skb() in hard IRQ
+ context
+
+From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+
+[ Upstream commit 2283f79b22684d2812e5c76fc2280aae00390365 ]
+
+If a driver calls can_get_echo_skb() during a hardware IRQ (which is often, but
+not always, the case), the 'WARN_ON(in_irq)' in
+net/core/skbuff.c#skb_release_head_state() might be triggered, under network
+congestion circumstances, together with the potential risk of a NULL pointer
+dereference.
+
+The root cause of this issue is the call to kfree_skb() instead of
+dev_kfree_skb_irq() in net/core/dev.c#enqueue_to_backlog().
+
+This patch prevents the skb to be freed within the call to netif_rx() by
+incrementing its reference count with skb_get(). The skb is finally freed by
+one of the in-irq-context safe functions: dev_consume_skb_any() or
+dev_kfree_skb_any(). The "any" version is used because some drivers might call
+can_get_echo_skb() in a normal context.
+
+The reason for this issue to occur is that initially, in the core network
+stack, loopback skb were not supposed to be received in hardware IRQ context.
+The CAN stack is an exeption.
+
+This bug was previously reported back in 2017 in [1] but the proposed patch
+never got accepted.
+
+While [1] directly modifies net/core/dev.c, we try to propose here a
+smoother modification local to CAN network stack (the assumption
+behind is that only CAN devices are affected by this issue).
+
+[1] http://lore.kernel.org/r/57a3ffb6-3309-3ad5-5a34-e93c3fe3614d@cetitec.com
+
+Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+Link: https://lore.kernel.org/r/20201002154219.4887-2-mailhol.vincent@wanadoo.fr
+Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface")
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/dev.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index ffc5467a1ec2b..aa2158fabf2ac 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -496,7 +496,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+       if (!skb)
+               return 0;
+-      netif_rx(skb);
++      skb_get(skb);
++      if (netif_rx(skb) == NET_RX_SUCCESS)
++              dev_consume_skb_any(skb);
++      else
++              dev_kfree_skb_any(skb);
+       return len;
+ }
+-- 
+2.27.0
+
diff --git a/queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch b/queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch
new file mode 100644 (file)
index 0000000..77ccbae
--- /dev/null
@@ -0,0 +1,129 @@
+From a2e39bc3dd0ccdc1358d1b941e93b090c316dab7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Aug 2020 17:06:04 +0300
+Subject: can: peak_usb: add range checking in decode operations
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit a6921dd524fe31d1f460c161d3526a407533b6db ]
+
+These values come from skb->data so Smatch considers them untrusted.  I
+believe Smatch is correct but I don't have a way to test this.
+
+The usb_if->dev[] array has 2 elements but the index is in the 0-15
+range without checks.  The cfd->len can be up to 255 but the maximum
+valid size is CANFD_MAX_DLEN (64) so that could lead to memory
+corruption.
+
+Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Link: https://lore.kernel.org/r/20200813140604.GA456946@mwanda
+Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 48 +++++++++++++++++-----
+ 1 file changed, 37 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+index 40647b837b31f..d314e73f3d061 100644
+--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
++++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+@@ -475,12 +475,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
+                                    struct pucan_msg *rx_msg)
+ {
+       struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg;
+-      struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)];
+-      struct net_device *netdev = dev->netdev;
++      struct peak_usb_device *dev;
++      struct net_device *netdev;
+       struct canfd_frame *cfd;
+       struct sk_buff *skb;
+       const u16 rx_msg_flags = le16_to_cpu(rm->flags);
++      if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev))
++              return -ENOMEM;
++
++      dev = usb_if->dev[pucan_msg_get_channel(rm)];
++      netdev = dev->netdev;
++
+       if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) {
+               /* CANFD frame case */
+               skb = alloc_canfd_skb(netdev, &cfd);
+@@ -527,15 +533,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
+                                    struct pucan_msg *rx_msg)
+ {
+       struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg;
+-      struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
+-      struct pcan_usb_fd_device *pdev =
+-                      container_of(dev, struct pcan_usb_fd_device, dev);
++      struct pcan_usb_fd_device *pdev;
+       enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
+       enum can_state rx_state, tx_state;
+-      struct net_device *netdev = dev->netdev;
++      struct peak_usb_device *dev;
++      struct net_device *netdev;
+       struct can_frame *cf;
+       struct sk_buff *skb;
++      if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev))
++              return -ENOMEM;
++
++      dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
++      pdev = container_of(dev, struct pcan_usb_fd_device, dev);
++      netdev = dev->netdev;
++
+       /* nothing should be sent while in BUS_OFF state */
+       if (dev->can.state == CAN_STATE_BUS_OFF)
+               return 0;
+@@ -588,9 +600,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if,
+                                   struct pucan_msg *rx_msg)
+ {
+       struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg;
+-      struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)];
+-      struct pcan_usb_fd_device *pdev =
+-                      container_of(dev, struct pcan_usb_fd_device, dev);
++      struct pcan_usb_fd_device *pdev;
++      struct peak_usb_device *dev;
++
++      if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev))
++              return -EINVAL;
++
++      dev = usb_if->dev[pucan_ermsg_get_channel(er)];
++      pdev = container_of(dev, struct pcan_usb_fd_device, dev);
+       /* keep a trace of tx and rx error counters for later use */
+       pdev->bec.txerr = er->tx_err_cnt;
+@@ -604,11 +621,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if,
+                                     struct pucan_msg *rx_msg)
+ {
+       struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg;
+-      struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)];
+-      struct net_device *netdev = dev->netdev;
++      struct peak_usb_device *dev;
++      struct net_device *netdev;
+       struct can_frame *cf;
+       struct sk_buff *skb;
++      if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev))
++              return -EINVAL;
++
++      dev = usb_if->dev[pufd_omsg_get_channel(ov)];
++      netdev = dev->netdev;
++
+       /* allocate an skb to store the error frame */
+       skb = alloc_can_err_skb(netdev, &cf);
+       if (!skb)
+@@ -726,6 +749,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev,
+       u16 tx_msg_size, tx_msg_flags;
+       u8 can_dlc;
++      if (cfd->len > CANFD_MAX_DLEN)
++              return -EINVAL;
++
+       tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4);
+       tx_msg->size = cpu_to_le16(tx_msg_size);
+       tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX);
+-- 
+2.27.0
+
diff --git a/queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch b/queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch
new file mode 100644 (file)
index 0000000..f79bec0
--- /dev/null
@@ -0,0 +1,96 @@
+From ab3b057ca307fc3bfae115aac0e84b38420500a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Oct 2020 10:56:31 +0200
+Subject: can: peak_usb: peak_usb_get_ts_time(): fix timestamp wrapping
+
+From: Stephane Grosjean <s.grosjean@peak-system.com>
+
+[ Upstream commit ecc7b4187dd388549544195fb13a11b4ea8e6a84 ]
+
+Fabian Inostroza <fabianinostrozap@gmail.com> has discovered a potential
+problem in the hardware timestamp reporting from the PCAN-USB USB CAN interface
+(only), related to the fact that a timestamp of an event may precede the
+timestamp used for synchronization when both records are part of the same USB
+packet. However, this case was used to detect the wrapping of the time counter.
+
+This patch details and fixes the two identified cases where this problem can
+occur.
+
+Reported-by: Fabian Inostroza <fabianinostrozap@gmail.com>
+Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
+Link: https://lore.kernel.org/r/20201014085631.15128-1-s.grosjean@peak-system.com
+Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core")
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/usb/peak_usb/pcan_usb_core.c | 51 ++++++++++++++++++--
+ 1 file changed, 46 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+index 6cd4317fe94df..74b37309efab7 100644
+--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
++++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+@@ -152,14 +152,55 @@ void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
+       /* protect from getting timeval before setting now */
+       if (time_ref->tv_host.tv_sec > 0) {
+               u64 delta_us;
++              s64 delta_ts = 0;
++
++              /* General case: dev_ts_1 < dev_ts_2 < ts, with:
++               *
++               * - dev_ts_1 = previous sync timestamp
++               * - dev_ts_2 = last sync timestamp
++               * - ts = event timestamp
++               * - ts_period = known sync period (theoretical)
++               *             ~ dev_ts2 - dev_ts1
++               * *but*:
++               *
++               * - time counters wrap (see adapter->ts_used_bits)
++               * - sometimes, dev_ts_1 < ts < dev_ts2
++               *
++               * "normal" case (sync time counters increase):
++               * must take into account case when ts wraps (tsw)
++               *
++               *      < ts_period > <          >
++               *     |             |            |
++               *  ---+--------+----+-------0-+--+-->
++               *     ts_dev_1 |    ts_dev_2  |
++               *              ts             tsw
++               */
++              if (time_ref->ts_dev_1 < time_ref->ts_dev_2) {
++                      /* case when event time (tsw) wraps */
++                      if (ts < time_ref->ts_dev_1)
++                              delta_ts = 1 << time_ref->adapter->ts_used_bits;
++
++              /* Otherwise, sync time counter (ts_dev_2) has wrapped:
++               * handle case when event time (tsn) hasn't.
++               *
++               *      < ts_period > <          >
++               *     |             |            |
++               *  ---+--------+--0-+---------+--+-->
++               *     ts_dev_1 |    ts_dev_2  |
++               *              tsn            ts
++               */
++              } else if (time_ref->ts_dev_1 < ts) {
++                      delta_ts = -(1 << time_ref->adapter->ts_used_bits);
++              }
+-              delta_us = ts - time_ref->ts_dev_2;
+-              if (ts < time_ref->ts_dev_2)
+-                      delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1;
++              /* add delay between last sync and event timestamps */
++              delta_ts += (signed int)(ts - time_ref->ts_dev_2);
+-              delta_us += time_ref->ts_total;
++              /* add time from beginning to last sync */
++              delta_ts += time_ref->ts_total;
+-              delta_us *= time_ref->adapter->us_per_ts_scale;
++              /* convert ticks number into microseconds */
++              delta_us = delta_ts * time_ref->adapter->us_per_ts_scale;
+               delta_us >>= time_ref->adapter->us_per_ts_shift;
+               *tv = time_ref->tv_host_0;
+-- 
+2.27.0
+
diff --git a/queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch b/queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch
new file mode 100644 (file)
index 0000000..78024df
--- /dev/null
@@ -0,0 +1,37 @@
+From a934424c7b0a0ff8782e435f26c944a53a2b97ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Oct 2020 21:41:44 +0100
+Subject: genirq: Let GENERIC_IRQ_IPI select IRQ_DOMAIN_HIERARCHY
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 151a535171be6ff824a0a3875553ea38570f4c05 ]
+
+kernel/irq/ipi.c otherwise fails to compile if nothing else
+selects it.
+
+Fixes: 379b656446a3 ("genirq: Add GENERIC_IRQ_IPI Kconfig symbol")
+Reported-by: Pavel Machek <pavel@ucw.cz>
+Tested-by: Pavel Machek <pavel@ucw.cz>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20201015101222.GA32747@amd
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
+index 3bbfd6a9c4756..bb3a46cbe034c 100644
+--- a/kernel/irq/Kconfig
++++ b/kernel/irq/Kconfig
+@@ -67,6 +67,7 @@ config IRQ_DOMAIN_HIERARCHY
+ # Generic IRQ IPI support
+ config GENERIC_IRQ_IPI
+       bool
++      select IRQ_DOMAIN_HIERARCHY
+ # Generic MSI interrupt support
+ config GENERIC_MSI_IRQ
+-- 
+2.27.0
+
diff --git a/queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch b/queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch
new file mode 100644 (file)
index 0000000..9f5f1d1
--- /dev/null
@@ -0,0 +1,42 @@
+From bcf2329e3d2cb07ec6ef7355dd07b32fa00178c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Oct 2020 10:52:29 -0400
+Subject: gfs2: Wake up when sd_glock_disposal becomes zero
+
+From: Alexander Aring <aahringo@redhat.com>
+
+[ Upstream commit da7d554f7c62d0c17c1ac3cc2586473c2d99f0bd ]
+
+Commit fc0e38dae645 ("GFS2: Fix glock deallocation race") fixed a
+sd_glock_disposal accounting bug by adding a missing atomic_dec
+statement, but it failed to wake up sd_glock_wait when that decrement
+causes sd_glock_disposal to reach zero.  As a consequence,
+gfs2_gl_hash_clear can now run into a 10-minute timeout instead of
+being woken up.  Add the missing wakeup.
+
+Fixes: fc0e38dae645 ("GFS2: Fix glock deallocation race")
+Cc: stable@vger.kernel.org # v2.6.39+
+Signed-off-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/glock.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
+index efd44d5645d83..f19e49a5d032b 100644
+--- a/fs/gfs2/glock.c
++++ b/fs/gfs2/glock.c
+@@ -758,7 +758,8 @@ again:
+       }
+       kfree(gl->gl_lksb.sb_lvbptr);
+       kmem_cache_free(cachep, gl);
+-      atomic_dec(&sdp->sd_glock_disposal);
++      if (atomic_dec_and_test(&sdp->sd_glock_disposal))
++              wake_up(&sdp->sd_glock_wait);
+       *glp = tmp;
+       return ret;
+-- 
+2.27.0
+
diff --git a/queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch b/queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch
new file mode 100644 (file)
index 0000000..2046292
--- /dev/null
@@ -0,0 +1,72 @@
+From 96e7deb0a438672dc1ad9ef1d86e43d3ebd30126 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Nov 2020 17:07:40 -0800
+Subject: mm: mempolicy: fix potential pte_unmap_unlock pte error
+
+From: Shijie Luo <luoshijie1@huawei.com>
+
+[ Upstream commit 3f08842098e842c51e3b97d0dcdebf810b32558e ]
+
+When flags in queue_pages_pte_range don't have MPOL_MF_MOVE or
+MPOL_MF_MOVE_ALL bits, code breaks and passing origin pte - 1 to
+pte_unmap_unlock seems like not a good idea.
+
+queue_pages_pte_range can run in MPOL_MF_MOVE_ALL mode which doesn't
+migrate misplaced pages but returns with EIO when encountering such a
+page.  Since commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return
+-EIO when MPOL_MF_STRICT is specified") and early break on the first pte
+in the range results in pte_unmap_unlock on an underflow pte.  This can
+lead to lockups later on when somebody tries to lock the pte resp.
+page_table_lock again..
+
+Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified")
+Signed-off-by: Shijie Luo <luoshijie1@huawei.com>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Feilong Lin <linfeilong@huawei.com>
+Cc: Shijie Luo <luoshijie1@huawei.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201019074853.50856-1-luoshijie1@huawei.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index a2be65bf5d8cc..2f443767fd1b4 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -487,7 +487,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+       struct queue_pages *qp = walk->private;
+       unsigned long flags = qp->flags;
+       int nid, ret;
+-      pte_t *pte;
++      pte_t *pte, *mapped_pte;
+       spinlock_t *ptl;
+       if (pmd_trans_huge(*pmd)) {
+@@ -515,7 +515,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+       if (pmd_trans_unstable(pmd))
+               return 0;
+ retry:
+-      pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
++      mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               if (!pte_present(*pte))
+                       continue;
+@@ -554,7 +554,7 @@ retry:
+               } else
+                       break;
+       }
+-      pte_unmap_unlock(pte - 1, ptl);
++      pte_unmap_unlock(mapped_pte, ptl);
+       cond_resched();
+       return addr != end ? -EIO : 0;
+ }
+-- 
+2.27.0
+
diff --git a/queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch b/queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch
new file mode 100644 (file)
index 0000000..d288d13
--- /dev/null
@@ -0,0 +1,94 @@
+From cb457f51b18f48e226ab3701d5d507383ab2e2c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Oct 2020 09:05:35 +0200
+Subject: net: xfrm: fix a race condition during allocing spi
+
+From: zhuoliang zhang <zhuoliang.zhang@mediatek.com>
+
+[ Upstream commit a779d91314ca7208b7feb3ad817b62904397c56d ]
+
+we found that the following race condition exists in
+xfrm_alloc_userspi flow:
+
+user thread                                    state_hash_work thread
+----                                           ----
+xfrm_alloc_userspi()
+ __find_acq_core()
+   /*alloc new xfrm_state:x*/
+   xfrm_state_alloc()
+   /*schedule state_hash_work thread*/
+   xfrm_hash_grow_check()                     xfrm_hash_resize()
+ xfrm_alloc_spi                                  /*hold lock*/
+      x->id.spi = htonl(spi)                     spin_lock_bh(&net->xfrm.xfrm_state_lock)
+      /*waiting lock release*/                     xfrm_hash_transfer()
+      spin_lock_bh(&net->xfrm.xfrm_state_lock)      /*add x into hlist:net->xfrm.state_byspi*/
+                                                       hlist_add_head_rcu(&x->byspi)
+                                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock)
+
+    /*add x into hlist:net->xfrm.state_byspi 2 times*/
+    hlist_add_head_rcu(&x->byspi)
+
+1. a new state x is alloced in xfrm_state_alloc() and added into the bydst hlist
+in  __find_acq_core() on the LHS;
+2. on the RHS, state_hash_work thread travels the old bydst and tranfers every xfrm_state
+(include x) into the new bydst hlist and new byspi hlist;
+3. user thread on the LHS gets the lock and adds x into the new byspi hlist again.
+
+So the same xfrm_state (x) is added into the same list_hash
+(net->xfrm.state_byspi) 2 times that makes the list_hash become
+an inifite loop.
+
+To fix the race, x->id.spi = htonl(spi) in the xfrm_alloc_spi() is moved
+to the back of spin_lock_bh, sothat state_hash_work thread no longer add x
+which id.spi is zero into the hash_list.
+
+Fixes: f034b5d4efdf ("[XFRM]: Dynamic xfrm_state hash table sizing.")
+Signed-off-by: zhuoliang zhang <zhuoliang.zhang@mediatek.com>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_state.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index 0eb85765d35a1..4d19f2ff6e052 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -1591,6 +1591,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
+       int err = -ENOENT;
+       __be32 minspi = htonl(low);
+       __be32 maxspi = htonl(high);
++      __be32 newspi = 0;
+       u32 mark = x->mark.v & x->mark.m;
+       spin_lock_bh(&x->lock);
+@@ -1609,21 +1610,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
+                       xfrm_state_put(x0);
+                       goto unlock;
+               }
+-              x->id.spi = minspi;
++              newspi = minspi;
+       } else {
+               u32 spi = 0;
+               for (h = 0; h < high-low+1; h++) {
+                       spi = low + prandom_u32()%(high-low+1);
+                       x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+                       if (x0 == NULL) {
+-                              x->id.spi = htonl(spi);
++                              newspi = htonl(spi);
+                               break;
+                       }
+                       xfrm_state_put(x0);
+               }
+       }
+-      if (x->id.spi) {
++      if (newspi) {
+               spin_lock_bh(&net->xfrm.xfrm_state_lock);
++              x->id.spi = newspi;
+               h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
+               spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+-- 
+2.27.0
+
diff --git a/queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch b/queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch
new file mode 100644 (file)
index 0000000..14499dc
--- /dev/null
@@ -0,0 +1,36 @@
+From a7e6a07cd2426880a60fe78f90059292dd54273d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Nov 2020 00:31:03 +0100
+Subject: perf tools: Add missing swap for ino_generation
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit fe01adb72356a4e2f8735e4128af85921ca98fa1 ]
+
+We are missing swap for ino_generation field.
+
+Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support")
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/session.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
+index 7e0573e55a356..89808ab008ad2 100644
+--- a/tools/perf/util/session.c
++++ b/tools/perf/util/session.c
+@@ -482,6 +482,7 @@ static void perf_event__mmap2_swap(union perf_event *event,
+       event->mmap2.maj   = bswap_32(event->mmap2.maj);
+       event->mmap2.min   = bswap_32(event->mmap2.min);
+       event->mmap2.ino   = bswap_64(event->mmap2.ino);
++      event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+       if (sample_id_all) {
+               void *data = &event->mmap2.filename;
+-- 
+2.27.0
+
diff --git a/queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch b/queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch
new file mode 100644 (file)
index 0000000..b81018c
--- /dev/null
@@ -0,0 +1,48 @@
+From dc3de59a16a47f376ed42255deb9b06c4ef32417 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Nov 2020 22:27:27 +0100
+Subject: regulator: defer probe when trying to get voltage from unresolved
+ supply
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+
+[ Upstream commit cf1ad559a20d1930aa7b47a52f54e1f8718de301 ]
+
+regulator_get_voltage_rdev() is called in regulator probe() when
+applying machine constraints.  The "fixed" commit exposed the problem
+that non-bypassed regulators can forward the request to its parent
+(like bypassed ones) supply. Return -EPROBE_DEFER when the supply
+is expected but not resolved yet.
+
+Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Reported-by: Ondřej Jirman <megous@megous.com>
+Reported-by: Corentin Labbe <clabbe.montjoie@gmail.com>
+Tested-by: Ondřej Jirman <megous@megous.com>
+Link: https://lore.kernel.org/r/a9041d68b4d35e4a2dd71629c8a6422662acb5ee.1604351936.git.mirq-linux@rere.qmqm.pl
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/core.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
+index 0f730e4bf6bcb..0caf751d85ded 100644
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -3185,6 +3185,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
+               ret = rdev->desc->fixed_uV;
+       } else if (rdev->supply) {
+               ret = _regulator_get_voltage(rdev->supply->rdev);
++      } else if (rdev->supply_name) {
++              return -EPROBE_DEFER;
+       } else {
+               return -EINVAL;
+       }
+-- 
+2.27.0
+
diff --git a/queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch b/queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch
new file mode 100644 (file)
index 0000000..722fd5f
--- /dev/null
@@ -0,0 +1,120 @@
+From 485529432128dddadf79e4ceb17d2c9b11a94c27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Nov 2020 15:31:27 -0500
+Subject: ring-buffer: Fix recursion protection transitions between interrupt
+ context
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+[ Upstream commit b02414c8f045ab3b9afc816c3735bc98c5c3d262 ]
+
+The recursion protection of the ring buffer depends on preempt_count() to be
+correct. But it is possible that the ring buffer gets called after an
+interrupt comes in but before it updates the preempt_count(). This will
+trigger a false positive in the recursion code.
+
+Use the same trick from the ftrace function callback recursion code which
+uses a "transition" bit that gets set, to allow for a single recursion for
+to handle transitions between contexts.
+
+Cc: stable@vger.kernel.org
+Fixes: 567cd4da54ff4 ("ring-buffer: User context bit recursion checking")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 54 +++++++++++++++++++++++++++++++-------
+ 1 file changed, 44 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index fb2aa2430edcc..55f60d2edc3fb 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -416,14 +416,16 @@ struct rb_event_info {
+ /*
+  * Used for which event context the event is in.
+- *  NMI     = 0
+- *  IRQ     = 1
+- *  SOFTIRQ = 2
+- *  NORMAL  = 3
++ *  TRANSITION = 0
++ *  NMI     = 1
++ *  IRQ     = 2
++ *  SOFTIRQ = 3
++ *  NORMAL  = 4
+  *
+  * See trace_recursive_lock() comment below for more details.
+  */
+ enum {
++      RB_CTX_TRANSITION,
+       RB_CTX_NMI,
+       RB_CTX_IRQ,
+       RB_CTX_SOFTIRQ,
+@@ -2579,10 +2581,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+  * a bit of overhead in something as critical as function tracing,
+  * we use a bitmask trick.
+  *
+- *  bit 0 =  NMI context
+- *  bit 1 =  IRQ context
+- *  bit 2 =  SoftIRQ context
+- *  bit 3 =  normal context.
++ *  bit 1 =  NMI context
++ *  bit 2 =  IRQ context
++ *  bit 3 =  SoftIRQ context
++ *  bit 4 =  normal context.
+  *
+  * This works because this is the order of contexts that can
+  * preempt other contexts. A SoftIRQ never preempts an IRQ
+@@ -2605,6 +2607,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+  * The least significant bit can be cleared this way, and it
+  * just so happens that it is the same bit corresponding to
+  * the current context.
++ *
++ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
++ * is set when a recursion is detected at the current context, and if
++ * the TRANSITION bit is already set, it will fail the recursion.
++ * This is needed because there's a lag between the changing of
++ * interrupt context and updating the preempt count. In this case,
++ * a false positive will be found. To handle this, one extra recursion
++ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
++ * bit is already set, then it is considered a recursion and the function
++ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
++ *
++ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
++ * to be cleared. Even if it wasn't the context that set it. That is,
++ * if an interrupt comes in while NORMAL bit is set and the ring buffer
++ * is called before preempt_count() is updated, since the check will
++ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
++ * NMI then comes in, it will set the NMI bit, but when the NMI code
++ * does the trace_recursive_unlock() it will clear the TRANSTION bit
++ * and leave the NMI bit set. But this is fine, because the interrupt
++ * code that set the TRANSITION bit will then clear the NMI bit when it
++ * calls trace_recursive_unlock(). If another NMI comes in, it will
++ * set the TRANSITION bit and continue.
++ *
++ * Note: The TRANSITION bit only handles a single transition between context.
+  */
+ static __always_inline int
+@@ -2623,8 +2649,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
+       } else
+               bit = RB_CTX_NORMAL;
+-      if (unlikely(val & (1 << bit)))
+-              return 1;
++      if (unlikely(val & (1 << bit))) {
++              /*
++               * It is possible that this was called by transitioning
++               * between interrupt context, and preempt_count() has not
++               * been updated yet. In this case, use the TRANSITION bit.
++               */
++              bit = RB_CTX_TRANSITION;
++              if (val & (1 << bit))
++                      return 1;
++      }
+       val |= (1 << bit);
+       cpu_buffer->current_context = val;
+-- 
+2.27.0
+
diff --git a/queue-4.9/series b/queue-4.9/series
new file mode 100644 (file)
index 0000000..3b5971c
--- /dev/null
@@ -0,0 +1,17 @@
+regulator-defer-probe-when-trying-to-get-voltage-fro.patch
+ring-buffer-fix-recursion-protection-transitions-bet.patch
+gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch
+mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch
+time-prevent-undefined-behaviour-in-timespec64_to_ns.patch
+btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch
+btrfs-reschedule-when-cloning-lots-of-extents.patch
+genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch
+net-xfrm-fix-a-race-condition-during-allocing-spi.patch
+perf-tools-add-missing-swap-for-ino_generation.patch
+alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch
+can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch
+can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch
+can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch
+can-peak_usb-add-range-checking-in-decode-operations.patch
+can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch
+xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch
diff --git a/queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch b/queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch
new file mode 100644 (file)
index 0000000..3eb729f
--- /dev/null
@@ -0,0 +1,59 @@
+From 1352df9b2a0d6898ff7777618c1fc1b7284958bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Sep 2020 17:30:13 +0800
+Subject: time: Prevent undefined behaviour in timespec64_to_ns()
+
+From: Zeng Tao <prime.zeng@hisilicon.com>
+
+[ Upstream commit cb47755725da7b90fecbb2aa82ac3b24a7adb89b ]
+
+UBSAN reports:
+
+Undefined behaviour in ./include/linux/time64.h:127:27
+signed integer overflow:
+17179869187 * 1000000000 cannot be represented in type 'long long int'
+Call Trace:
+ timespec64_to_ns include/linux/time64.h:127 [inline]
+ set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180
+ do_setitimer+0x8e/0x740 kernel/time/itimer.c:245
+ __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336
+ do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295
+
+Commit bd40a175769d ("y2038: itimer: change implementation to timespec64")
+replaced the original conversion which handled time clamping correctly with
+timespec64_to_ns() which has no overflow protection.
+
+Fix it in timespec64_to_ns() as this is not necessarily limited to the
+usage in itimers.
+
+[ tglx: Added comment and adjusted the fixes tag ]
+
+Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64")
+Signed-off-by: Zeng Tao <prime.zeng@hisilicon.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/time64.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/include/linux/time64.h b/include/linux/time64.h
+index 980c71b3001a5..2a45b8c87edbf 100644
+--- a/include/linux/time64.h
++++ b/include/linux/time64.h
+@@ -188,6 +188,10 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts)
+  */
+ static inline s64 timespec64_to_ns(const struct timespec64 *ts)
+ {
++      /* Prevent multiplication overflow */
++      if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
++              return KTIME_MAX;
++
+       return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+ }
+-- 
+2.27.0
+
diff --git a/queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch b/queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch
new file mode 100644 (file)
index 0000000..eb5a084
--- /dev/null
@@ -0,0 +1,70 @@
+From a8a60e1eee8daa4747040e6dd5dd96726d958f6a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Oct 2020 14:30:48 -0700
+Subject: xfs: flush new eof page on truncate to avoid post-eof corruption
+
+From: Brian Foster <bfoster@redhat.com>
+
+[ Upstream commit 869ae85dae64b5540e4362d7fe4cd520e10ec05c ]
+
+It is possible to expose non-zeroed post-EOF data in XFS if the new
+EOF page is dirty, backed by an unwritten block and the truncate
+happens to race with writeback. iomap_truncate_page() will not zero
+the post-EOF portion of the page if the underlying block is
+unwritten. The subsequent call to truncate_setsize() will, but
+doesn't dirty the page. Therefore, if writeback happens to complete
+after iomap_truncate_page() (so it still sees the unwritten block)
+but before truncate_setsize(), the cached page becomes inconsistent
+with the on-disk block. A mapped read after the associated page is
+reclaimed or invalidated exposes non-zero post-EOF data.
+
+For example, consider the following sequence when run on a kernel
+modified to explicitly flush the new EOF page within the race
+window:
+
+$ xfs_io -fc "falloc 0 4k" -c fsync /mnt/file
+$ xfs_io -c "pwrite 0 4k" -c "truncate 1k" /mnt/file
+  ...
+$ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file
+00000400:  00 00 00 00 00 00 00 00  ........
+$ umount /mnt/; mount <dev> /mnt/
+$ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file
+00000400:  cd cd cd cd cd cd cd cd  ........
+
+Update xfs_setattr_size() to explicitly flush the new EOF page prior
+to the page truncate to ensure iomap has the latest state of the
+underlying block.
+
+Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_iops.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
+index 7bfddcd32d73e..0d587657056d8 100644
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -864,6 +864,16 @@ xfs_setattr_size(
+       if (newsize > oldsize) {
+               error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
+       } else {
++              /*
++               * iomap won't detect a dirty page over an unwritten block (or a
++               * cow block over a hole) and subsequently skips zeroing the
++               * newly post-EOF portion of the page. Flush the new EOF to
++               * convert the block before the pagecache truncate.
++               */
++              error = filemap_write_and_wait_range(inode->i_mapping, newsize,
++                                                   newsize);
++              if (error)
++                      return error;
+               error = iomap_truncate_page(inode, newsize, &did_zeroing,
+                               &xfs_iomap_ops);
+       }
+-- 
+2.27.0
+