From 509869ed8de0c8e5914e2a360855aeb47ca3acb8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 10 Nov 2020 10:06:09 -0500 Subject: [PATCH] Fixes for 4.9 Signed-off-by: Sasha Levin --- ...-undefined-shift-in-snd_hdac_ext_bus.patch | 37 ++++ ...chedule-when-cloning-lots-of-extents.patch | 98 +++++++++ ...t-devices-outside-of-the-chunk_mutex.patch | 192 ++++++++++++++++++ ...cho_skb-fix-echo-skb-generation-alwa.patch | 99 +++++++++ ...t_echo_skb-fix-real-payload-length-r.patch | 49 +++++ ...echo_skb-prevent-call-to-kfree_skb-i.patch | 67 ++++++ ...-range-checking-in-decode-operations.patch | 129 ++++++++++++ ...k_usb_get_ts_time-fix-timestamp-wrap.patch | 96 +++++++++ ...ic_irq_ipi-select-irq_domain_hierarc.patch | 37 ++++ ...-when-sd_glock_disposal-becomes-zero.patch | 42 ++++ ...-potential-pte_unmap_unlock-pte-erro.patch | 72 +++++++ ...a-race-condition-during-allocing-spi.patch | 94 +++++++++ ...-add-missing-swap-for-ino_generation.patch | 36 ++++ ...probe-when-trying-to-get-voltage-fro.patch | 48 +++++ ...recursion-protection-transitions-bet.patch | 120 +++++++++++ queue-4.9/series | 17 ++ ...efined-behaviour-in-timespec64_to_ns.patch | 59 ++++++ ...f-page-on-truncate-to-avoid-post-eof.patch | 70 +++++++ 18 files changed, 1362 insertions(+) create mode 100644 queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch create mode 100644 queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch create mode 100644 queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch create mode 100644 queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch create mode 100644 queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch create mode 100644 queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch create mode 100644 queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch create mode 100644 queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch create mode 100644 queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch create mode 100644 queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch create mode 100644 queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch create mode 100644 queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch create mode 100644 queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch create mode 100644 queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch create mode 100644 queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch create mode 100644 queue-4.9/series create mode 100644 queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch create mode 100644 queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch diff --git a/queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch b/queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch new file mode 100644 index 00000000000..27b5460053c --- /dev/null +++ b/queue-4.9/alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch @@ -0,0 +1,37 @@ +From 45b673adb012c0e7669c555540e1d9bd40805a71 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Nov 2020 13:18:07 +0300 +Subject: ALSA: hda: prevent undefined shift in snd_hdac_ext_bus_get_link() + +From: Dan Carpenter + +[ Upstream commit 158e1886b6262c1d1c96a18c85fac5219b8bf804 ] + +This is harmless, but the "addr" comes from the user and it could lead +to a negative shift or to shift wrapping if it's too high. + +Fixes: 0b00a5615dc4 ("ALSA: hdac_ext: add hdac extended controller") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/20201103101807.GC1127762@mwanda +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/hda/ext/hdac_ext_controller.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c +index 261469188566c..49d42971d90da 100644 +--- a/sound/hda/ext/hdac_ext_controller.c ++++ b/sound/hda/ext/hdac_ext_controller.c +@@ -155,6 +155,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus, + return NULL; + if (ebus->idx != bus_idx) + return NULL; ++ if (addr < 0 || addr > 31) ++ return NULL; + + list_for_each_entry(hlink, &ebus->hlink_list, list) { + for (i = 0; i < HDA_MAX_CODECS; i++) { +-- +2.27.0 + diff --git a/queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch b/queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch new file mode 100644 index 00000000000..28c5fbbe903 --- /dev/null +++ b/queue-4.9/btrfs-reschedule-when-cloning-lots-of-extents.patch @@ -0,0 +1,98 @@ +From d021b0ca798d44f45f4377a1120ea6a3a9ee2ebf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Sep 2020 17:27:29 +0900 +Subject: btrfs: reschedule when cloning lots of extents + +From: Johannes Thumshirn + +[ Upstream commit 6b613cc97f0ace77f92f7bc112b8f6ad3f52baf8 ] + +We have several occurrences of a soft lockup from fstest's generic/175 +testcase, which look more or less like this one: + + watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [xfs_io:10030] + Kernel panic - not syncing: softlockup: hung tasks + CPU: 0 PID: 10030 Comm: xfs_io Tainted: G L 5.9.0-rc5+ #768 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014 + Call Trace: + + dump_stack+0x77/0xa0 + panic+0xfa/0x2cb + watchdog_timer_fn.cold+0x85/0xa5 + ? lockup_detector_update_enable+0x50/0x50 + __hrtimer_run_queues+0x99/0x4c0 + ? recalibrate_cpu_khz+0x10/0x10 + hrtimer_run_queues+0x9f/0xb0 + update_process_times+0x28/0x80 + tick_handle_periodic+0x1b/0x60 + __sysvec_apic_timer_interrupt+0x76/0x210 + asm_call_on_stack+0x12/0x20 + + sysvec_apic_timer_interrupt+0x7f/0x90 + asm_sysvec_apic_timer_interrupt+0x12/0x20 + RIP: 0010:btrfs_tree_unlock+0x91/0x1a0 [btrfs] + RSP: 0018:ffffc90007123a58 EFLAGS: 00000282 + RAX: ffff8881cea2fbe0 RBX: ffff8881cea2fbe0 RCX: 0000000000000000 + RDX: ffff8881d23fd200 RSI: ffffffff82045220 RDI: ffff8881cea2fba0 + RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000032 + R10: 0000160000000000 R11: 0000000000001000 R12: 0000000000001000 + R13: ffff8882357fd5b0 R14: ffff88816fa76e70 R15: ffff8881cea2fad0 + ? btrfs_tree_unlock+0x15b/0x1a0 [btrfs] + btrfs_release_path+0x67/0x80 [btrfs] + btrfs_insert_replace_extent+0x177/0x2c0 [btrfs] + btrfs_replace_file_extents+0x472/0x7c0 [btrfs] + btrfs_clone+0x9ba/0xbd0 [btrfs] + btrfs_clone_files.isra.0+0xeb/0x140 [btrfs] + ? file_update_time+0xcd/0x120 + btrfs_remap_file_range+0x322/0x3b0 [btrfs] + do_clone_file_range+0xb7/0x1e0 + vfs_clone_file_range+0x30/0xa0 + ioctl_file_clone+0x8a/0xc0 + do_vfs_ioctl+0x5b2/0x6f0 + __x64_sys_ioctl+0x37/0xa0 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x7f87977fc247 + RSP: 002b:00007ffd51a2f6d8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 + RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f87977fc247 + RDX: 00007ffd51a2f710 RSI: 000000004020940d RDI: 0000000000000003 + RBP: 0000000000000004 R08: 00007ffd51a79080 R09: 0000000000000000 + R10: 00005621f11352f2 R11: 0000000000000206 R12: 0000000000000000 + R13: 0000000000000000 R14: 00005621f128b958 R15: 0000000080000000 + Kernel Offset: disabled + ---[ end Kernel panic - not syncing: softlockup: hung tasks ]--- + +All of these lockup reports have the call chain btrfs_clone_files() -> +btrfs_clone() in common. btrfs_clone_files() calls btrfs_clone() with +both source and destination extents locked and loops over the source +extent to create the clones. + +Conditionally reschedule in the btrfs_clone() loop, to give some time back +to other processes. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Johannes Thumshirn +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 981091bd6c3c4..ebca009030c3a 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3854,6 +3854,8 @@ process_slot: + ret = -EINTR; + goto out; + } ++ ++ cond_resched(); + } + ret = 0; + +-- +2.27.0 + diff --git a/queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch b/queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch new file mode 100644 index 00000000000..4352dff5b76 --- /dev/null +++ b/queue-4.9/btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch @@ -0,0 +1,192 @@ +From af859eca833c6eb04908ddd3bb232731f8c6cb3a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Sep 2020 08:09:01 -0400 +Subject: btrfs: sysfs: init devices outside of the chunk_mutex + +From: Josef Bacik + +[ Upstream commit ca10845a56856fff4de3804c85e6424d0f6d0cde ] + +While running btrfs/061, btrfs/073, btrfs/078, or btrfs/178 we hit the +following lockdep splat: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.9.0-rc3+ #4 Not tainted + ------------------------------------------------------ + kswapd0/100 is trying to acquire lock: + ffff96ecc22ef4a0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x330 + + but task is already holding lock: + ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #3 (fs_reclaim){+.+.}-{0:0}: + fs_reclaim_acquire+0x65/0x80 + slab_pre_alloc_hook.constprop.0+0x20/0x200 + kmem_cache_alloc+0x37/0x270 + alloc_inode+0x82/0xb0 + iget_locked+0x10d/0x2c0 + kernfs_get_inode+0x1b/0x130 + kernfs_get_tree+0x136/0x240 + sysfs_get_tree+0x16/0x40 + vfs_get_tree+0x28/0xc0 + path_mount+0x434/0xc00 + __x64_sys_mount+0xe3/0x120 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #2 (kernfs_mutex){+.+.}-{3:3}: + __mutex_lock+0x7e/0x7e0 + kernfs_add_one+0x23/0x150 + kernfs_create_link+0x63/0xa0 + sysfs_do_create_link_sd+0x5e/0xd0 + btrfs_sysfs_add_devices_dir+0x81/0x130 + btrfs_init_new_device+0x67f/0x1250 + btrfs_ioctl+0x1ef/0x2e20 + __x64_sys_ioctl+0x83/0xb0 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}: + __mutex_lock+0x7e/0x7e0 + btrfs_chunk_alloc+0x125/0x3a0 + find_free_extent+0xdf6/0x1210 + btrfs_reserve_extent+0xb3/0x1b0 + btrfs_alloc_tree_block+0xb0/0x310 + alloc_tree_block_no_bg_flush+0x4a/0x60 + __btrfs_cow_block+0x11a/0x530 + btrfs_cow_block+0x104/0x220 + btrfs_search_slot+0x52e/0x9d0 + btrfs_insert_empty_items+0x64/0xb0 + btrfs_insert_delayed_items+0x90/0x4f0 + btrfs_commit_inode_delayed_items+0x93/0x140 + btrfs_log_inode+0x5de/0x2020 + btrfs_log_inode_parent+0x429/0xc90 + btrfs_log_new_name+0x95/0x9b + btrfs_rename2+0xbb9/0x1800 + vfs_rename+0x64f/0x9f0 + do_renameat2+0x320/0x4e0 + __x64_sys_rename+0x1f/0x30 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #0 (&delayed_node->mutex){+.+.}-{3:3}: + __lock_acquire+0x119c/0x1fc0 + lock_acquire+0xa7/0x3d0 + __mutex_lock+0x7e/0x7e0 + __btrfs_release_delayed_node.part.0+0x3f/0x330 + btrfs_evict_inode+0x24c/0x500 + evict+0xcf/0x1f0 + dispose_list+0x48/0x70 + prune_icache_sb+0x44/0x50 + super_cache_scan+0x161/0x1e0 + do_shrink_slab+0x178/0x3c0 + shrink_slab+0x17c/0x290 + shrink_node+0x2b2/0x6d0 + balance_pgdat+0x30a/0x670 + kswapd+0x213/0x4c0 + kthread+0x138/0x160 + ret_from_fork+0x1f/0x30 + + other info that might help us debug this: + + Chain exists of: + &delayed_node->mutex --> kernfs_mutex --> fs_reclaim + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(fs_reclaim); + lock(kernfs_mutex); + lock(fs_reclaim); + lock(&delayed_node->mutex); + + *** DEADLOCK *** + + 3 locks held by kswapd0/100: + #0: ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 + #1: ffffffff8dd65c50 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x115/0x290 + #2: ffff96ed2ade30e0 (&type->s_umount_key#36){++++}-{3:3}, at: super_cache_scan+0x38/0x1e0 + + stack backtrace: + CPU: 0 PID: 100 Comm: kswapd0 Not tainted 5.9.0-rc3+ #4 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 + Call Trace: + dump_stack+0x8b/0xb8 + check_noncircular+0x12d/0x150 + __lock_acquire+0x119c/0x1fc0 + lock_acquire+0xa7/0x3d0 + ? __btrfs_release_delayed_node.part.0+0x3f/0x330 + __mutex_lock+0x7e/0x7e0 + ? __btrfs_release_delayed_node.part.0+0x3f/0x330 + ? __btrfs_release_delayed_node.part.0+0x3f/0x330 + ? lock_acquire+0xa7/0x3d0 + ? find_held_lock+0x2b/0x80 + __btrfs_release_delayed_node.part.0+0x3f/0x330 + btrfs_evict_inode+0x24c/0x500 + evict+0xcf/0x1f0 + dispose_list+0x48/0x70 + prune_icache_sb+0x44/0x50 + super_cache_scan+0x161/0x1e0 + do_shrink_slab+0x178/0x3c0 + shrink_slab+0x17c/0x290 + shrink_node+0x2b2/0x6d0 + balance_pgdat+0x30a/0x670 + kswapd+0x213/0x4c0 + ? _raw_spin_unlock_irqrestore+0x41/0x50 + ? add_wait_queue_exclusive+0x70/0x70 + ? balance_pgdat+0x670/0x670 + kthread+0x138/0x160 + ? kthread_create_worker_on_cpu+0x40/0x40 + ret_from_fork+0x1f/0x30 + +This happens because we are holding the chunk_mutex at the time of +adding in a new device. However we only need to hold the +device_list_mutex, as we're going to iterate over the fs_devices +devices. Move the sysfs init stuff outside of the chunk_mutex to get +rid of this lockdep splat. + +CC: stable@vger.kernel.org # 4.4.x: f3cd2c58110dad14e: btrfs: sysfs, rename device_link add/remove functions +CC: stable@vger.kernel.org # 4.4.x +Reported-by: David Sterba +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/volumes.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index c31b02692f706..56ae889fb44f2 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -2431,9 +2431,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) + btrfs_set_super_num_devices(root->fs_info->super_copy, + tmp + 1); + +- /* add sysfs device entry */ +- btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device); +- + /* + * we've got more storage, clear any full flags on the space + * infos +@@ -2441,6 +2438,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) + btrfs_clear_space_info_full(root->fs_info); + + unlock_chunks(root); ++ ++ /* Add sysfs device entry */ ++ btrfs_sysfs_add_device_link(fs_info->fs_devices, device); ++ + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + + if (seeding_dev) { +-- +2.27.0 + diff --git a/queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch b/queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch new file mode 100644 index 00000000000..6b0440afe90 --- /dev/null +++ b/queue-4.9/can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch @@ -0,0 +1,99 @@ +From a613b860c1bb953b039ddfe313f03fffc64aedb1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2019 09:39:02 +0100 +Subject: can: can_create_echo_skb(): fix echo skb generation: always use + skb_clone() + +From: Oleksij Rempel + +[ Upstream commit 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 ] + +All user space generated SKBs are owned by a socket (unless injected into the +key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned +up. + +This leads to a problem when a CAN driver calls can_put_echo_skb() on a +unshared SKB. If the socket is closed prior to the TX complete handler, +can_get_echo_skb() and the subsequent delivering of the echo SKB to all +registered callbacks, a SKB with a refcount of 0 is delivered. + +To avoid the problem, in can_get_echo_skb() the original SKB is now always +cloned, regardless of shared SKB or not. If the process exists it can now +safely discard its SKBs, without disturbing the delivery of the echo SKB. + +The problem shows up in the j1939 stack, when it clones the incoming skb, which +detects the already 0 refcount. + +We can easily reproduce this with following example: + +testj1939 -B -r can0: & +cansend can0 1823ff40#0123 + +WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174 +refcount_t: addition on 0; use-after-free. +Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan +CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1 +Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) +Backtrace: +[] (dump_backtrace) from [] (show_stack+0x20/0x24) +[] (show_stack) from [] (dump_stack+0x8c/0xa0) +[] (dump_stack) from [] (__warn+0xe0/0x108) +[] (__warn) from [] (warn_slowpath_fmt+0xa8/0xcc) +[] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x108/0x174) +[] (refcount_warn_saturate) from [] (j1939_can_recv+0x20c/0x210) +[] (j1939_can_recv) from [] (can_rcv_filter+0xb4/0x268) +[] (can_rcv_filter) from [] (can_receive+0xb0/0xe4) +[] (can_receive) from [] (can_rcv+0x48/0x98) +[] (can_rcv) from [] (__netif_receive_skb_one_core+0x64/0x88) +[] (__netif_receive_skb_one_core) from [] (__netif_receive_skb+0x38/0x94) +[] (__netif_receive_skb) from [] (netif_receive_skb_internal+0x64/0xf8) +[] (netif_receive_skb_internal) from [] (netif_receive_skb+0x34/0x19c) +[] (netif_receive_skb) from [] (can_rx_offload_napi_poll+0x58/0xb4) + +Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") +Signed-off-by: Oleksij Rempel +Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de +Acked-by: Oliver Hartkopp +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + include/linux/can/skb.h | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h +index 51bb6532785c3..1a2111c775ae1 100644 +--- a/include/linux/can/skb.h ++++ b/include/linux/can/skb.h +@@ -60,21 +60,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) + */ + static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) + { +- if (skb_shared(skb)) { +- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); ++ struct sk_buff *nskb; + +- if (likely(nskb)) { +- can_skb_set_owner(nskb, skb->sk); +- consume_skb(skb); +- return nskb; +- } else { +- kfree_skb(skb); +- return NULL; +- } ++ nskb = skb_clone(skb, GFP_ATOMIC); ++ if (unlikely(!nskb)) { ++ kfree_skb(skb); ++ return NULL; + } + +- /* we can assume to have an unshared skb with proper owner */ +- return skb; ++ can_skb_set_owner(nskb, skb->sk); ++ consume_skb(skb); ++ return nskb; + } + + #endif /* !_CAN_SKB_H */ +-- +2.27.0 + diff --git a/queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch b/queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch new file mode 100644 index 00000000000..ae7637d1e76 --- /dev/null +++ b/queue-4.9/can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch @@ -0,0 +1,49 @@ +From e0be8a352fdb30df35547909136e2ce7ec21caa5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Oct 2020 08:44:43 +0200 +Subject: can: dev: __can_get_echo_skb(): fix real payload length return value + for RTR frames + +From: Oliver Hartkopp + +[ Upstream commit ed3320cec279407a86bc4c72edc4a39eb49165ec ] + +The can_get_echo_skb() function returns the number of received bytes to +be used for netdev statistics. In the case of RTR frames we get a valid +(potential non-zero) data length value which has to be passed for further +operations. But on the wire RTR frames have no payload length. Therefore +the value to be used in the statistics has to be zero for RTR frames. + +Reported-by: Vincent Mailhol +Signed-off-by: Oliver Hartkopp +Link: https://lore.kernel.org/r/20201020064443.80164-1-socketcan@hartkopp.net +Fixes: cf5046b309b3 ("can: dev: let can_get_echo_skb() return dlc of CAN frame") +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + drivers/net/can/dev.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c +index aa2158fabf2ac..617eb75c7c0ce 100644 +--- a/drivers/net/can/dev.c ++++ b/drivers/net/can/dev.c +@@ -469,9 +469,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; +- u8 len = cf->len; + +- *len_ptr = len; ++ /* get the real payload length for netdev statistics */ ++ if (cf->can_id & CAN_RTR_FLAG) ++ *len_ptr = 0; ++ else ++ *len_ptr = cf->len; ++ + priv->echo_skb[idx] = NULL; + + return skb; +-- +2.27.0 + diff --git a/queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch b/queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch new file mode 100644 index 00000000000..c62f47d4ef4 --- /dev/null +++ b/queue-4.9/can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch @@ -0,0 +1,67 @@ +From 1c1f66058cf76240b374c6d0d6c77bec7e0a38ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 3 Oct 2020 00:41:45 +0900 +Subject: can: dev: can_get_echo_skb(): prevent call to kfree_skb() in hard IRQ + context + +From: Vincent Mailhol + +[ Upstream commit 2283f79b22684d2812e5c76fc2280aae00390365 ] + +If a driver calls can_get_echo_skb() during a hardware IRQ (which is often, but +not always, the case), the 'WARN_ON(in_irq)' in +net/core/skbuff.c#skb_release_head_state() might be triggered, under network +congestion circumstances, together with the potential risk of a NULL pointer +dereference. + +The root cause of this issue is the call to kfree_skb() instead of +dev_kfree_skb_irq() in net/core/dev.c#enqueue_to_backlog(). + +This patch prevents the skb to be freed within the call to netif_rx() by +incrementing its reference count with skb_get(). The skb is finally freed by +one of the in-irq-context safe functions: dev_consume_skb_any() or +dev_kfree_skb_any(). The "any" version is used because some drivers might call +can_get_echo_skb() in a normal context. + +The reason for this issue to occur is that initially, in the core network +stack, loopback skb were not supposed to be received in hardware IRQ context. +The CAN stack is an exeption. + +This bug was previously reported back in 2017 in [1] but the proposed patch +never got accepted. + +While [1] directly modifies net/core/dev.c, we try to propose here a +smoother modification local to CAN network stack (the assumption +behind is that only CAN devices are affected by this issue). + +[1] http://lore.kernel.org/r/57a3ffb6-3309-3ad5-5a34-e93c3fe3614d@cetitec.com + +Signed-off-by: Vincent Mailhol +Link: https://lore.kernel.org/r/20201002154219.4887-2-mailhol.vincent@wanadoo.fr +Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + drivers/net/can/dev.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c +index ffc5467a1ec2b..aa2158fabf2ac 100644 +--- a/drivers/net/can/dev.c ++++ b/drivers/net/can/dev.c +@@ -496,7 +496,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) + if (!skb) + return 0; + +- netif_rx(skb); ++ skb_get(skb); ++ if (netif_rx(skb) == NET_RX_SUCCESS) ++ dev_consume_skb_any(skb); ++ else ++ dev_kfree_skb_any(skb); + + return len; + } +-- +2.27.0 + diff --git a/queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch b/queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch new file mode 100644 index 00000000000..77ccbae2349 --- /dev/null +++ b/queue-4.9/can-peak_usb-add-range-checking-in-decode-operations.patch @@ -0,0 +1,129 @@ +From a2e39bc3dd0ccdc1358d1b941e93b090c316dab7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Aug 2020 17:06:04 +0300 +Subject: can: peak_usb: add range checking in decode operations + +From: Dan Carpenter + +[ Upstream commit a6921dd524fe31d1f460c161d3526a407533b6db ] + +These values come from skb->data so Smatch considers them untrusted. I +believe Smatch is correct but I don't have a way to test this. + +The usb_if->dev[] array has 2 elements but the index is in the 0-15 +range without checks. The cfd->len can be up to 255 but the maximum +valid size is CANFD_MAX_DLEN (64) so that could lead to memory +corruption. + +Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/20200813140604.GA456946@mwanda +Acked-by: Stephane Grosjean +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 48 +++++++++++++++++----- + 1 file changed, 37 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +index 40647b837b31f..d314e73f3d061 100644 +--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c ++++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +@@ -475,12 +475,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, + struct pucan_msg *rx_msg) + { + struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg; +- struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)]; +- struct net_device *netdev = dev->netdev; ++ struct peak_usb_device *dev; ++ struct net_device *netdev; + struct canfd_frame *cfd; + struct sk_buff *skb; + const u16 rx_msg_flags = le16_to_cpu(rm->flags); + ++ if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev)) ++ return -ENOMEM; ++ ++ dev = usb_if->dev[pucan_msg_get_channel(rm)]; ++ netdev = dev->netdev; ++ + if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) { + /* CANFD frame case */ + skb = alloc_canfd_skb(netdev, &cfd); +@@ -527,15 +533,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, + struct pucan_msg *rx_msg) + { + struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg; +- struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; +- struct pcan_usb_fd_device *pdev = +- container_of(dev, struct pcan_usb_fd_device, dev); ++ struct pcan_usb_fd_device *pdev; + enum can_state new_state = CAN_STATE_ERROR_ACTIVE; + enum can_state rx_state, tx_state; +- struct net_device *netdev = dev->netdev; ++ struct peak_usb_device *dev; ++ struct net_device *netdev; + struct can_frame *cf; + struct sk_buff *skb; + ++ if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev)) ++ return -ENOMEM; ++ ++ dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; ++ pdev = container_of(dev, struct pcan_usb_fd_device, dev); ++ netdev = dev->netdev; ++ + /* nothing should be sent while in BUS_OFF state */ + if (dev->can.state == CAN_STATE_BUS_OFF) + return 0; +@@ -588,9 +600,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if, + struct pucan_msg *rx_msg) + { + struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg; +- struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)]; +- struct pcan_usb_fd_device *pdev = +- container_of(dev, struct pcan_usb_fd_device, dev); ++ struct pcan_usb_fd_device *pdev; ++ struct peak_usb_device *dev; ++ ++ if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev)) ++ return -EINVAL; ++ ++ dev = usb_if->dev[pucan_ermsg_get_channel(er)]; ++ pdev = container_of(dev, struct pcan_usb_fd_device, dev); + + /* keep a trace of tx and rx error counters for later use */ + pdev->bec.txerr = er->tx_err_cnt; +@@ -604,11 +621,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if, + struct pucan_msg *rx_msg) + { + struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg; +- struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)]; +- struct net_device *netdev = dev->netdev; ++ struct peak_usb_device *dev; ++ struct net_device *netdev; + struct can_frame *cf; + struct sk_buff *skb; + ++ if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev)) ++ return -EINVAL; ++ ++ dev = usb_if->dev[pufd_omsg_get_channel(ov)]; ++ netdev = dev->netdev; ++ + /* allocate an skb to store the error frame */ + skb = alloc_can_err_skb(netdev, &cf); + if (!skb) +@@ -726,6 +749,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev, + u16 tx_msg_size, tx_msg_flags; + u8 can_dlc; + ++ if (cfd->len > CANFD_MAX_DLEN) ++ return -EINVAL; ++ + tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4); + tx_msg->size = cpu_to_le16(tx_msg_size); + tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX); +-- +2.27.0 + diff --git a/queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch b/queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch new file mode 100644 index 00000000000..f79bec0956f --- /dev/null +++ b/queue-4.9/can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch @@ -0,0 +1,96 @@ +From ab3b057ca307fc3bfae115aac0e84b38420500a0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Oct 2020 10:56:31 +0200 +Subject: can: peak_usb: peak_usb_get_ts_time(): fix timestamp wrapping + +From: Stephane Grosjean + +[ Upstream commit ecc7b4187dd388549544195fb13a11b4ea8e6a84 ] + +Fabian Inostroza has discovered a potential +problem in the hardware timestamp reporting from the PCAN-USB USB CAN interface +(only), related to the fact that a timestamp of an event may precede the +timestamp used for synchronization when both records are part of the same USB +packet. However, this case was used to detect the wrapping of the time counter. + +This patch details and fixes the two identified cases where this problem can +occur. + +Reported-by: Fabian Inostroza +Signed-off-by: Stephane Grosjean +Link: https://lore.kernel.org/r/20201014085631.15128-1-s.grosjean@peak-system.com +Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + drivers/net/can/usb/peak_usb/pcan_usb_core.c | 51 ++++++++++++++++++-- + 1 file changed, 46 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c +index 6cd4317fe94df..74b37309efab7 100644 +--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c ++++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c +@@ -152,14 +152,55 @@ void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts, + /* protect from getting timeval before setting now */ + if (time_ref->tv_host.tv_sec > 0) { + u64 delta_us; ++ s64 delta_ts = 0; ++ ++ /* General case: dev_ts_1 < dev_ts_2 < ts, with: ++ * ++ * - dev_ts_1 = previous sync timestamp ++ * - dev_ts_2 = last sync timestamp ++ * - ts = event timestamp ++ * - ts_period = known sync period (theoretical) ++ * ~ dev_ts2 - dev_ts1 ++ * *but*: ++ * ++ * - time counters wrap (see adapter->ts_used_bits) ++ * - sometimes, dev_ts_1 < ts < dev_ts2 ++ * ++ * "normal" case (sync time counters increase): ++ * must take into account case when ts wraps (tsw) ++ * ++ * < ts_period > < > ++ * | | | ++ * ---+--------+----+-------0-+--+--> ++ * ts_dev_1 | ts_dev_2 | ++ * ts tsw ++ */ ++ if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { ++ /* case when event time (tsw) wraps */ ++ if (ts < time_ref->ts_dev_1) ++ delta_ts = 1 << time_ref->adapter->ts_used_bits; ++ ++ /* Otherwise, sync time counter (ts_dev_2) has wrapped: ++ * handle case when event time (tsn) hasn't. ++ * ++ * < ts_period > < > ++ * | | | ++ * ---+--------+--0-+---------+--+--> ++ * ts_dev_1 | ts_dev_2 | ++ * tsn ts ++ */ ++ } else if (time_ref->ts_dev_1 < ts) { ++ delta_ts = -(1 << time_ref->adapter->ts_used_bits); ++ } + +- delta_us = ts - time_ref->ts_dev_2; +- if (ts < time_ref->ts_dev_2) +- delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1; ++ /* add delay between last sync and event timestamps */ ++ delta_ts += (signed int)(ts - time_ref->ts_dev_2); + +- delta_us += time_ref->ts_total; ++ /* add time from beginning to last sync */ ++ delta_ts += time_ref->ts_total; + +- delta_us *= time_ref->adapter->us_per_ts_scale; ++ /* convert ticks number into microseconds */ ++ delta_us = delta_ts * time_ref->adapter->us_per_ts_scale; + delta_us >>= time_ref->adapter->us_per_ts_shift; + + *tv = time_ref->tv_host_0; +-- +2.27.0 + diff --git a/queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch b/queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch new file mode 100644 index 00000000000..78024df6a2a --- /dev/null +++ b/queue-4.9/genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch @@ -0,0 +1,37 @@ +From a934424c7b0a0ff8782e435f26c944a53a2b97ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Oct 2020 21:41:44 +0100 +Subject: genirq: Let GENERIC_IRQ_IPI select IRQ_DOMAIN_HIERARCHY + +From: Marc Zyngier + +[ Upstream commit 151a535171be6ff824a0a3875553ea38570f4c05 ] + +kernel/irq/ipi.c otherwise fails to compile if nothing else +selects it. + +Fixes: 379b656446a3 ("genirq: Add GENERIC_IRQ_IPI Kconfig symbol") +Reported-by: Pavel Machek +Tested-by: Pavel Machek +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20201015101222.GA32747@amd +Signed-off-by: Sasha Levin +--- + kernel/irq/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig +index 3bbfd6a9c4756..bb3a46cbe034c 100644 +--- a/kernel/irq/Kconfig ++++ b/kernel/irq/Kconfig +@@ -67,6 +67,7 @@ config IRQ_DOMAIN_HIERARCHY + # Generic IRQ IPI support + config GENERIC_IRQ_IPI + bool ++ select IRQ_DOMAIN_HIERARCHY + + # Generic MSI interrupt support + config GENERIC_MSI_IRQ +-- +2.27.0 + diff --git a/queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch b/queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch new file mode 100644 index 00000000000..9f5f1d1afed --- /dev/null +++ b/queue-4.9/gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch @@ -0,0 +1,42 @@ +From bcf2329e3d2cb07ec6ef7355dd07b32fa00178c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Oct 2020 10:52:29 -0400 +Subject: gfs2: Wake up when sd_glock_disposal becomes zero + +From: Alexander Aring + +[ Upstream commit da7d554f7c62d0c17c1ac3cc2586473c2d99f0bd ] + +Commit fc0e38dae645 ("GFS2: Fix glock deallocation race") fixed a +sd_glock_disposal accounting bug by adding a missing atomic_dec +statement, but it failed to wake up sd_glock_wait when that decrement +causes sd_glock_disposal to reach zero. As a consequence, +gfs2_gl_hash_clear can now run into a 10-minute timeout instead of +being woken up. Add the missing wakeup. + +Fixes: fc0e38dae645 ("GFS2: Fix glock deallocation race") +Cc: stable@vger.kernel.org # v2.6.39+ +Signed-off-by: Alexander Aring +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Sasha Levin +--- + fs/gfs2/glock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c +index efd44d5645d83..f19e49a5d032b 100644 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -758,7 +758,8 @@ again: + } + kfree(gl->gl_lksb.sb_lvbptr); + kmem_cache_free(cachep, gl); +- atomic_dec(&sdp->sd_glock_disposal); ++ if (atomic_dec_and_test(&sdp->sd_glock_disposal)) ++ wake_up(&sdp->sd_glock_wait); + *glp = tmp; + + return ret; +-- +2.27.0 + diff --git a/queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch b/queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch new file mode 100644 index 00000000000..204629234ef --- /dev/null +++ b/queue-4.9/mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch @@ -0,0 +1,72 @@ +From 96e7deb0a438672dc1ad9ef1d86e43d3ebd30126 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Nov 2020 17:07:40 -0800 +Subject: mm: mempolicy: fix potential pte_unmap_unlock pte error + +From: Shijie Luo + +[ Upstream commit 3f08842098e842c51e3b97d0dcdebf810b32558e ] + +When flags in queue_pages_pte_range don't have MPOL_MF_MOVE or +MPOL_MF_MOVE_ALL bits, code breaks and passing origin pte - 1 to +pte_unmap_unlock seems like not a good idea. + +queue_pages_pte_range can run in MPOL_MF_MOVE_ALL mode which doesn't +migrate misplaced pages but returns with EIO when encountering such a +page. Since commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return +-EIO when MPOL_MF_STRICT is specified") and early break on the first pte +in the range results in pte_unmap_unlock on an underflow pte. This can +lead to lockups later on when somebody tries to lock the pte resp. +page_table_lock again.. + +Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") +Signed-off-by: Shijie Luo +Signed-off-by: Miaohe Lin +Signed-off-by: Andrew Morton +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Cc: Miaohe Lin +Cc: Feilong Lin +Cc: Shijie Luo +Cc: +Link: https://lkml.kernel.org/r/20201019074853.50856-1-luoshijie1@huawei.com +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/mempolicy.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index a2be65bf5d8cc..2f443767fd1b4 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -487,7 +487,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + struct queue_pages *qp = walk->private; + unsigned long flags = qp->flags; + int nid, ret; +- pte_t *pte; ++ pte_t *pte, *mapped_pte; + spinlock_t *ptl; + + if (pmd_trans_huge(*pmd)) { +@@ -515,7 +515,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + if (pmd_trans_unstable(pmd)) + return 0; + retry: +- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); ++ mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + if (!pte_present(*pte)) + continue; +@@ -554,7 +554,7 @@ retry: + } else + break; + } +- pte_unmap_unlock(pte - 1, ptl); ++ pte_unmap_unlock(mapped_pte, ptl); + cond_resched(); + return addr != end ? -EIO : 0; + } +-- +2.27.0 + diff --git a/queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch b/queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch new file mode 100644 index 00000000000..d288d13df81 --- /dev/null +++ b/queue-4.9/net-xfrm-fix-a-race-condition-during-allocing-spi.patch @@ -0,0 +1,94 @@ +From cb457f51b18f48e226ab3701d5d507383ab2e2c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Oct 2020 09:05:35 +0200 +Subject: net: xfrm: fix a race condition during allocing spi + +From: zhuoliang zhang + +[ Upstream commit a779d91314ca7208b7feb3ad817b62904397c56d ] + +we found that the following race condition exists in +xfrm_alloc_userspi flow: + +user thread state_hash_work thread +---- ---- +xfrm_alloc_userspi() + __find_acq_core() + /*alloc new xfrm_state:x*/ + xfrm_state_alloc() + /*schedule state_hash_work thread*/ + xfrm_hash_grow_check() xfrm_hash_resize() + xfrm_alloc_spi /*hold lock*/ + x->id.spi = htonl(spi) spin_lock_bh(&net->xfrm.xfrm_state_lock) + /*waiting lock release*/ xfrm_hash_transfer() + spin_lock_bh(&net->xfrm.xfrm_state_lock) /*add x into hlist:net->xfrm.state_byspi*/ + hlist_add_head_rcu(&x->byspi) + spin_unlock_bh(&net->xfrm.xfrm_state_lock) + + /*add x into hlist:net->xfrm.state_byspi 2 times*/ + hlist_add_head_rcu(&x->byspi) + +1. a new state x is alloced in xfrm_state_alloc() and added into the bydst hlist +in __find_acq_core() on the LHS; +2. on the RHS, state_hash_work thread travels the old bydst and tranfers every xfrm_state +(include x) into the new bydst hlist and new byspi hlist; +3. user thread on the LHS gets the lock and adds x into the new byspi hlist again. + +So the same xfrm_state (x) is added into the same list_hash +(net->xfrm.state_byspi) 2 times that makes the list_hash become +an inifite loop. + +To fix the race, x->id.spi = htonl(spi) in the xfrm_alloc_spi() is moved +to the back of spin_lock_bh, sothat state_hash_work thread no longer add x +which id.spi is zero into the hash_list. + +Fixes: f034b5d4efdf ("[XFRM]: Dynamic xfrm_state hash table sizing.") +Signed-off-by: zhuoliang zhang +Acked-by: Herbert Xu +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_state.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index 0eb85765d35a1..4d19f2ff6e052 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1591,6 +1591,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) + int err = -ENOENT; + __be32 minspi = htonl(low); + __be32 maxspi = htonl(high); ++ __be32 newspi = 0; + u32 mark = x->mark.v & x->mark.m; + + spin_lock_bh(&x->lock); +@@ -1609,21 +1610,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) + xfrm_state_put(x0); + goto unlock; + } +- x->id.spi = minspi; ++ newspi = minspi; + } else { + u32 spi = 0; + for (h = 0; h < high-low+1; h++) { + spi = low + prandom_u32()%(high-low+1); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); + if (x0 == NULL) { +- x->id.spi = htonl(spi); ++ newspi = htonl(spi); + break; + } + xfrm_state_put(x0); + } + } +- if (x->id.spi) { ++ if (newspi) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); ++ x->id.spi = newspi; + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); +-- +2.27.0 + diff --git a/queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch b/queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch new file mode 100644 index 00000000000..14499dc2b7f --- /dev/null +++ b/queue-4.9/perf-tools-add-missing-swap-for-ino_generation.patch @@ -0,0 +1,36 @@ +From a7e6a07cd2426880a60fe78f90059292dd54273d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Nov 2020 00:31:03 +0100 +Subject: perf tools: Add missing swap for ino_generation + +From: Jiri Olsa + +[ Upstream commit fe01adb72356a4e2f8735e4128af85921ca98fa1 ] + +We are missing swap for ino_generation field. + +Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support") +Signed-off-by: Jiri Olsa +Acked-by: Namhyung Kim +Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/session.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c +index 7e0573e55a356..89808ab008ad2 100644 +--- a/tools/perf/util/session.c ++++ b/tools/perf/util/session.c +@@ -482,6 +482,7 @@ static void perf_event__mmap2_swap(union perf_event *event, + event->mmap2.maj = bswap_32(event->mmap2.maj); + event->mmap2.min = bswap_32(event->mmap2.min); + event->mmap2.ino = bswap_64(event->mmap2.ino); ++ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); + + if (sample_id_all) { + void *data = &event->mmap2.filename; +-- +2.27.0 + diff --git a/queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch b/queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch new file mode 100644 index 00000000000..b81018c243f --- /dev/null +++ b/queue-4.9/regulator-defer-probe-when-trying-to-get-voltage-fro.patch @@ -0,0 +1,48 @@ +From dc3de59a16a47f376ed42255deb9b06c4ef32417 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Nov 2020 22:27:27 +0100 +Subject: regulator: defer probe when trying to get voltage from unresolved + supply +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michał Mirosław + +[ Upstream commit cf1ad559a20d1930aa7b47a52f54e1f8718de301 ] + +regulator_get_voltage_rdev() is called in regulator probe() when +applying machine constraints. The "fixed" commit exposed the problem +that non-bypassed regulators can forward the request to its parent +(like bypassed ones) supply. Return -EPROBE_DEFER when the supply +is expected but not resolved yet. + +Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") +Cc: stable@vger.kernel.org +Signed-off-by: Michał Mirosław +Reported-by: Ondřej Jirman +Reported-by: Corentin Labbe +Tested-by: Ondřej Jirman +Link: https://lore.kernel.org/r/a9041d68b4d35e4a2dd71629c8a6422662acb5ee.1604351936.git.mirq-linux@rere.qmqm.pl +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/regulator/core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c +index 0f730e4bf6bcb..0caf751d85ded 100644 +--- a/drivers/regulator/core.c ++++ b/drivers/regulator/core.c +@@ -3185,6 +3185,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) + ret = rdev->desc->fixed_uV; + } else if (rdev->supply) { + ret = _regulator_get_voltage(rdev->supply->rdev); ++ } else if (rdev->supply_name) { ++ return -EPROBE_DEFER; + } else { + return -EINVAL; + } +-- +2.27.0 + diff --git a/queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch b/queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch new file mode 100644 index 00000000000..722fd5fe04a --- /dev/null +++ b/queue-4.9/ring-buffer-fix-recursion-protection-transitions-bet.patch @@ -0,0 +1,120 @@ +From 485529432128dddadf79e4ceb17d2c9b11a94c27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Nov 2020 15:31:27 -0500 +Subject: ring-buffer: Fix recursion protection transitions between interrupt + context + +From: Steven Rostedt (VMware) + +[ Upstream commit b02414c8f045ab3b9afc816c3735bc98c5c3d262 ] + +The recursion protection of the ring buffer depends on preempt_count() to be +correct. But it is possible that the ring buffer gets called after an +interrupt comes in but before it updates the preempt_count(). This will +trigger a false positive in the recursion code. + +Use the same trick from the ftrace function callback recursion code which +uses a "transition" bit that gets set, to allow for a single recursion for +to handle transitions between contexts. + +Cc: stable@vger.kernel.org +Fixes: 567cd4da54ff4 ("ring-buffer: User context bit recursion checking") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sasha Levin +--- + kernel/trace/ring_buffer.c | 54 +++++++++++++++++++++++++++++++------- + 1 file changed, 44 insertions(+), 10 deletions(-) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index fb2aa2430edcc..55f60d2edc3fb 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -416,14 +416,16 @@ struct rb_event_info { + + /* + * Used for which event context the event is in. +- * NMI = 0 +- * IRQ = 1 +- * SOFTIRQ = 2 +- * NORMAL = 3 ++ * TRANSITION = 0 ++ * NMI = 1 ++ * IRQ = 2 ++ * SOFTIRQ = 3 ++ * NORMAL = 4 + * + * See trace_recursive_lock() comment below for more details. + */ + enum { ++ RB_CTX_TRANSITION, + RB_CTX_NMI, + RB_CTX_IRQ, + RB_CTX_SOFTIRQ, +@@ -2579,10 +2581,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. + * +- * bit 0 = NMI context +- * bit 1 = IRQ context +- * bit 2 = SoftIRQ context +- * bit 3 = normal context. ++ * bit 1 = NMI context ++ * bit 2 = IRQ context ++ * bit 3 = SoftIRQ context ++ * bit 4 = normal context. + * + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ +@@ -2605,6 +2607,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. ++ * ++ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit ++ * is set when a recursion is detected at the current context, and if ++ * the TRANSITION bit is already set, it will fail the recursion. ++ * This is needed because there's a lag between the changing of ++ * interrupt context and updating the preempt count. In this case, ++ * a false positive will be found. To handle this, one extra recursion ++ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION ++ * bit is already set, then it is considered a recursion and the function ++ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. ++ * ++ * On the trace_recursive_unlock(), the TRANSITION bit will be the first ++ * to be cleared. Even if it wasn't the context that set it. That is, ++ * if an interrupt comes in while NORMAL bit is set and the ring buffer ++ * is called before preempt_count() is updated, since the check will ++ * be on the NORMAL bit, the TRANSITION bit will then be set. If an ++ * NMI then comes in, it will set the NMI bit, but when the NMI code ++ * does the trace_recursive_unlock() it will clear the TRANSTION bit ++ * and leave the NMI bit set. But this is fine, because the interrupt ++ * code that set the TRANSITION bit will then clear the NMI bit when it ++ * calls trace_recursive_unlock(). If another NMI comes in, it will ++ * set the TRANSITION bit and continue. ++ * ++ * Note: The TRANSITION bit only handles a single transition between context. + */ + + static __always_inline int +@@ -2623,8 +2649,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) + } else + bit = RB_CTX_NORMAL; + +- if (unlikely(val & (1 << bit))) +- return 1; ++ if (unlikely(val & (1 << bit))) { ++ /* ++ * It is possible that this was called by transitioning ++ * between interrupt context, and preempt_count() has not ++ * been updated yet. In this case, use the TRANSITION bit. ++ */ ++ bit = RB_CTX_TRANSITION; ++ if (val & (1 << bit)) ++ return 1; ++ } + + val |= (1 << bit); + cpu_buffer->current_context = val; +-- +2.27.0 + diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..3b5971c2895 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,17 @@ +regulator-defer-probe-when-trying-to-get-voltage-fro.patch +ring-buffer-fix-recursion-protection-transitions-bet.patch +gfs2-wake-up-when-sd_glock_disposal-becomes-zero.patch +mm-mempolicy-fix-potential-pte_unmap_unlock-pte-erro.patch +time-prevent-undefined-behaviour-in-timespec64_to_ns.patch +btrfs-sysfs-init-devices-outside-of-the-chunk_mutex.patch +btrfs-reschedule-when-cloning-lots-of-extents.patch +genirq-let-generic_irq_ipi-select-irq_domain_hierarc.patch +net-xfrm-fix-a-race-condition-during-allocing-spi.patch +perf-tools-add-missing-swap-for-ino_generation.patch +alsa-hda-prevent-undefined-shift-in-snd_hdac_ext_bus.patch +can-dev-can_get_echo_skb-prevent-call-to-kfree_skb-i.patch +can-dev-__can_get_echo_skb-fix-real-payload-length-r.patch +can-can_create_echo_skb-fix-echo-skb-generation-alwa.patch +can-peak_usb-add-range-checking-in-decode-operations.patch +can-peak_usb-peak_usb_get_ts_time-fix-timestamp-wrap.patch +xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch diff --git a/queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch b/queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch new file mode 100644 index 00000000000..3eb729faceb --- /dev/null +++ b/queue-4.9/time-prevent-undefined-behaviour-in-timespec64_to_ns.patch @@ -0,0 +1,59 @@ +From 1352df9b2a0d6898ff7777618c1fc1b7284958bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Sep 2020 17:30:13 +0800 +Subject: time: Prevent undefined behaviour in timespec64_to_ns() + +From: Zeng Tao + +[ Upstream commit cb47755725da7b90fecbb2aa82ac3b24a7adb89b ] + +UBSAN reports: + +Undefined behaviour in ./include/linux/time64.h:127:27 +signed integer overflow: +17179869187 * 1000000000 cannot be represented in type 'long long int' +Call Trace: + timespec64_to_ns include/linux/time64.h:127 [inline] + set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180 + do_setitimer+0x8e/0x740 kernel/time/itimer.c:245 + __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336 + do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295 + +Commit bd40a175769d ("y2038: itimer: change implementation to timespec64") +replaced the original conversion which handled time clamping correctly with +timespec64_to_ns() which has no overflow protection. + +Fix it in timespec64_to_ns() as this is not necessarily limited to the +usage in itimers. + +[ tglx: Added comment and adjusted the fixes tag ] + +Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64") +Signed-off-by: Zeng Tao +Signed-off-by: Thomas Gleixner +Reviewed-by: Arnd Bergmann +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com +Signed-off-by: Sasha Levin +--- + include/linux/time64.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/linux/time64.h b/include/linux/time64.h +index 980c71b3001a5..2a45b8c87edbf 100644 +--- a/include/linux/time64.h ++++ b/include/linux/time64.h +@@ -188,6 +188,10 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts) + */ + static inline s64 timespec64_to_ns(const struct timespec64 *ts) + { ++ /* Prevent multiplication overflow */ ++ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) ++ return KTIME_MAX; ++ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; + } + +-- +2.27.0 + diff --git a/queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch b/queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch new file mode 100644 index 00000000000..eb5a0845c13 --- /dev/null +++ b/queue-4.9/xfs-flush-new-eof-page-on-truncate-to-avoid-post-eof.patch @@ -0,0 +1,70 @@ +From a8a60e1eee8daa4747040e6dd5dd96726d958f6a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Oct 2020 14:30:48 -0700 +Subject: xfs: flush new eof page on truncate to avoid post-eof corruption + +From: Brian Foster + +[ Upstream commit 869ae85dae64b5540e4362d7fe4cd520e10ec05c ] + +It is possible to expose non-zeroed post-EOF data in XFS if the new +EOF page is dirty, backed by an unwritten block and the truncate +happens to race with writeback. iomap_truncate_page() will not zero +the post-EOF portion of the page if the underlying block is +unwritten. The subsequent call to truncate_setsize() will, but +doesn't dirty the page. Therefore, if writeback happens to complete +after iomap_truncate_page() (so it still sees the unwritten block) +but before truncate_setsize(), the cached page becomes inconsistent +with the on-disk block. A mapped read after the associated page is +reclaimed or invalidated exposes non-zero post-EOF data. + +For example, consider the following sequence when run on a kernel +modified to explicitly flush the new EOF page within the race +window: + +$ xfs_io -fc "falloc 0 4k" -c fsync /mnt/file +$ xfs_io -c "pwrite 0 4k" -c "truncate 1k" /mnt/file + ... +$ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file +00000400: 00 00 00 00 00 00 00 00 ........ +$ umount /mnt/; mount /mnt/ +$ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file +00000400: cd cd cd cd cd cd cd cd ........ + +Update xfs_setattr_size() to explicitly flush the new EOF page prior +to the page truncate to ensure iomap has the latest state of the +underlying block. + +Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path") +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_iops.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c +index 7bfddcd32d73e..0d587657056d8 100644 +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -864,6 +864,16 @@ xfs_setattr_size( + if (newsize > oldsize) { + error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); + } else { ++ /* ++ * iomap won't detect a dirty page over an unwritten block (or a ++ * cow block over a hole) and subsequently skips zeroing the ++ * newly post-EOF portion of the page. Flush the new EOF to ++ * convert the block before the pagecache truncate. ++ */ ++ error = filemap_write_and_wait_range(inode->i_mapping, newsize, ++ newsize); ++ if (error) ++ return error; + error = iomap_truncate_page(inode, newsize, &did_zeroing, + &xfs_iomap_ops); + } +-- +2.27.0 + -- 2.47.3