From 3d7bf2481b4256e635ebaf3094d618a7ab47a3c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Oct 2019 13:56:10 +0200 Subject: [PATCH] 4.9-stable patches added patches: btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch btrfs-relinquish-cpus-in-btrfs_compare_trees.patch i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch memcg-kmem-do-not-fail-__gfp_nofail-charges.patch ovl-filter-of-trusted-xattr-results-in-audit.patch --- ...when-using-the-tree-modification-log.patch | 99 +++++++++++++++++++ ...linquish-cpus-in-btrfs_compare_trees.patch | 69 +++++++++++++ ...ble-bit-in-i40e_sync_filters_subtask.patch | 74 ++++++++++++++ ...mem-do-not-fail-__gfp_nofail-charges.patch | 87 ++++++++++++++++ ...er-of-trusted-xattr-results-in-audit.patch | 41 ++++++++ queue-4.9/series | 5 + 6 files changed, 375 insertions(+) create mode 100644 queue-4.9/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch create mode 100644 queue-4.9/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch create mode 100644 queue-4.9/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch create mode 100644 queue-4.9/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch create mode 100644 queue-4.9/ovl-filter-of-trusted-xattr-results-in-audit.patch diff --git a/queue-4.9/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch b/queue-4.9/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch new file mode 100644 index 00000000000..00955367e6a --- /dev/null +++ b/queue-4.9/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch @@ -0,0 +1,99 @@ +From efad8a853ad2057f96664328a0d327a05ce39c76 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 12 Aug 2019 19:14:29 +0100 +Subject: Btrfs: fix use-after-free when using the tree modification log + +From: Filipe Manana + +commit efad8a853ad2057f96664328a0d327a05ce39c76 upstream. + +At ctree.c:get_old_root(), we are accessing a root's header owner field +after we have freed the respective extent buffer. This results in an +use-after-free that can lead to crashes, and when CONFIG_DEBUG_PAGEALLOC +is set, results in a stack trace like the following: + + [ 3876.799331] stack segment: 0000 [#1] SMP DEBUG_PAGEALLOC PTI + [ 3876.799363] CPU: 0 PID: 15436 Comm: pool Not tainted 5.3.0-rc3-btrfs-next-54 #1 + [ 3876.799385] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 + [ 3876.799433] RIP: 0010:btrfs_search_old_slot+0x652/0xd80 [btrfs] + (...) + [ 3876.799502] RSP: 0018:ffff9f08c1a2f9f0 EFLAGS: 00010286 + [ 3876.799518] RAX: ffff8dd300000000 RBX: ffff8dd85a7a9348 RCX: 000000038da26000 + [ 3876.799538] RDX: 0000000000000000 RSI: ffffe522ce368980 RDI: 0000000000000246 + [ 3876.799559] RBP: dae1922adadad000 R08: 0000000008020000 R09: ffffe522c0000000 + [ 3876.799579] R10: ffff8dd57fd788c8 R11: 000000007511b030 R12: ffff8dd781ddc000 + [ 3876.799599] R13: ffff8dd9e6240578 R14: ffff8dd6896f7a88 R15: ffff8dd688cf90b8 + [ 3876.799620] FS: 00007f23ddd97700(0000) GS:ffff8dda20200000(0000) knlGS:0000000000000000 + [ 3876.799643] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 3876.799660] CR2: 00007f23d4024000 CR3: 0000000710bb0005 CR4: 00000000003606f0 + [ 3876.799682] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [ 3876.799703] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [ 3876.799723] Call Trace: + [ 3876.799735] ? do_raw_spin_unlock+0x49/0xc0 + [ 3876.799749] ? _raw_spin_unlock+0x24/0x30 + [ 3876.799779] resolve_indirect_refs+0x1eb/0xc80 [btrfs] + [ 3876.799810] find_parent_nodes+0x38d/0x1180 [btrfs] + [ 3876.799841] btrfs_check_shared+0x11a/0x1d0 [btrfs] + [ 3876.799870] ? extent_fiemap+0x598/0x6e0 [btrfs] + [ 3876.799895] extent_fiemap+0x598/0x6e0 [btrfs] + [ 3876.799913] do_vfs_ioctl+0x45a/0x700 + [ 3876.799926] ksys_ioctl+0x70/0x80 + [ 3876.799938] ? trace_hardirqs_off_thunk+0x1a/0x20 + [ 3876.799953] __x64_sys_ioctl+0x16/0x20 + [ 3876.799965] do_syscall_64+0x62/0x220 + [ 3876.799977] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [ 3876.799993] RIP: 0033:0x7f23e0013dd7 + (...) + [ 3876.800056] RSP: 002b:00007f23ddd96ca8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + [ 3876.800078] RAX: ffffffffffffffda RBX: 00007f23d80210f8 RCX: 00007f23e0013dd7 + [ 3876.800099] RDX: 00007f23d80210f8 RSI: 00000000c020660b RDI: 0000000000000003 + [ 3876.800626] RBP: 000055fa2a2a2440 R08: 0000000000000000 R09: 00007f23ddd96d7c + [ 3876.801143] R10: 00007f23d8022000 R11: 0000000000000246 R12: 00007f23ddd96d80 + [ 3876.801662] R13: 00007f23ddd96d78 R14: 00007f23d80210f0 R15: 00007f23ddd96d80 + (...) + [ 3876.805107] ---[ end trace e53161e179ef04f9 ]--- + +Fix that by saving the root's header owner field into a local variable +before freeing the root's extent buffer, and then use that local variable +when needed. + +Fixes: 30b0463a9394d9 ("Btrfs: fix accessing the root pointer in tree mod log functions") +CC: stable@vger.kernel.org # 3.10+ +Reviewed-by: Nikolay Borisov +Reviewed-by: Anand Jain +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1406,6 +1406,7 @@ get_old_root(struct btrfs_root *root, u6 + struct tree_mod_elem *tm; + struct extent_buffer *eb = NULL; + struct extent_buffer *eb_root; ++ u64 eb_root_owner = 0; + struct extent_buffer *old; + struct tree_mod_root *old_root = NULL; + u64 old_generation = 0; +@@ -1439,6 +1440,7 @@ get_old_root(struct btrfs_root *root, u6 + free_extent_buffer(old); + } + } else if (old_root) { ++ eb_root_owner = btrfs_header_owner(eb_root); + btrfs_tree_read_unlock(eb_root); + free_extent_buffer(eb_root); + eb = alloc_dummy_extent_buffer(root->fs_info, logical, +@@ -1457,7 +1459,7 @@ get_old_root(struct btrfs_root *root, u6 + if (old_root) { + btrfs_set_header_bytenr(eb, eb->start); + btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); +- btrfs_set_header_owner(eb, btrfs_header_owner(eb_root)); ++ btrfs_set_header_owner(eb, eb_root_owner); + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); + } diff --git a/queue-4.9/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch b/queue-4.9/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch new file mode 100644 index 00000000000..4a51e6470e8 --- /dev/null +++ b/queue-4.9/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch @@ -0,0 +1,69 @@ +From 6af112b11a4bc1b560f60a618ac9c1dcefe9836e Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Wed, 4 Sep 2019 19:33:58 +0300 +Subject: btrfs: Relinquish CPUs in btrfs_compare_trees + +From: Nikolay Borisov + +commit 6af112b11a4bc1b560f60a618ac9c1dcefe9836e upstream. + +When doing any form of incremental send the parent and the child trees +need to be compared via btrfs_compare_trees. This can result in long +loop chains without ever relinquishing the CPU. This causes softlockup +detector to trigger when comparing trees with a lot of items. Example +report: + +watchdog: BUG: soft lockup - CPU#0 stuck for 24s! [snapperd:16153] +CPU: 0 PID: 16153 Comm: snapperd Not tainted 5.2.9-1-default #1 openSUSE Tumbleweed (unreleased) +Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +pstate: 40000005 (nZcv daif -PAN -UAO) +pc : __ll_sc_arch_atomic_sub_return+0x14/0x20 +lr : btrfs_release_extent_buffer_pages+0xe0/0x1e8 [btrfs] +sp : ffff00001273b7e0 +Call trace: + __ll_sc_arch_atomic_sub_return+0x14/0x20 + release_extent_buffer+0xdc/0x120 [btrfs] + free_extent_buffer.part.0+0xb0/0x118 [btrfs] + free_extent_buffer+0x24/0x30 [btrfs] + btrfs_release_path+0x4c/0xa0 [btrfs] + btrfs_free_path.part.0+0x20/0x40 [btrfs] + btrfs_free_path+0x24/0x30 [btrfs] + get_inode_info+0xa8/0xf8 [btrfs] + finish_inode_if_needed+0xe0/0x6d8 [btrfs] + changed_cb+0x9c/0x410 [btrfs] + btrfs_compare_trees+0x284/0x648 [btrfs] + send_subvol+0x33c/0x520 [btrfs] + btrfs_ioctl_send+0x8a0/0xaf0 [btrfs] + btrfs_ioctl+0x199c/0x2288 [btrfs] + do_vfs_ioctl+0x4b0/0x820 + ksys_ioctl+0x84/0xb8 + __arm64_sys_ioctl+0x28/0x38 + el0_svc_common.constprop.0+0x7c/0x188 + el0_svc_handler+0x34/0x90 + el0_svc+0x8/0xc + +Fix this by adding a call to cond_resched at the beginning of the main +loop in btrfs_compare_trees. + +Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Johannes Thumshirn +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -5467,6 +5467,7 @@ int btrfs_compare_trees(struct btrfs_roo + advance_left = advance_right = 0; + + while (1) { ++ cond_resched(); + if (advance_left && !left_end_reached) { + ret = tree_advance(left_root, left_path, &left_level, + left_root_level, diff --git a/queue-4.9/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch b/queue-4.9/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch new file mode 100644 index 00000000000..bfbd562af85 --- /dev/null +++ b/queue-4.9/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch @@ -0,0 +1,74 @@ +From a7542b87607560d0b89e7ff81d870bd6ff8835cb Mon Sep 17 00:00:00 2001 +From: Stefan Assmann +Date: Wed, 21 Aug 2019 16:09:29 +0200 +Subject: i40e: check __I40E_VF_DISABLE bit in i40e_sync_filters_subtask + +From: Stefan Assmann + +commit a7542b87607560d0b89e7ff81d870bd6ff8835cb upstream. + +While testing VF spawn/destroy the following panic occurred. + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000029 +[...] +Workqueue: i40e i40e_service_task [i40e] +RIP: 0010:i40e_sync_vsi_filters+0x6fd/0xc60 [i40e] +[...] +Call Trace: + ? __switch_to_asm+0x35/0x70 + ? __switch_to_asm+0x41/0x70 + ? __switch_to_asm+0x35/0x70 + ? _cond_resched+0x15/0x30 + i40e_sync_filters_subtask+0x56/0x70 [i40e] + i40e_service_task+0x382/0x11b0 [i40e] + ? __switch_to_asm+0x41/0x70 + ? __switch_to_asm+0x41/0x70 + process_one_work+0x1a7/0x3b0 + worker_thread+0x30/0x390 + ? create_worker+0x1a0/0x1a0 + kthread+0x112/0x130 + ? kthread_bind+0x30/0x30 + ret_from_fork+0x35/0x40 + +Investigation revealed a race where pf->vf[vsi->vf_id].trusted may get +accessed by the watchdog via i40e_sync_filters_subtask() although +i40e_free_vfs() already free'd pf->vf. +To avoid this the call to i40e_sync_vsi_filters() in +i40e_sync_filters_subtask() needs to be guarded by __I40E_VF_DISABLE, +which is also used by i40e_free_vfs(). + +Note: put the __I40E_VF_DISABLE check after the +__I40E_MACVLAN_SYNC_PENDING check as the latter is more likely to +trigger. + +CC: stable@vger.kernel.org +Signed-off-by: Stefan Assmann +Tested-by: Andrew Bowers +Signed-off-by: Jeff Kirsher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -2645,6 +2645,10 @@ static void i40e_restore_vlan(struct i40 + + if (!vsi->netdev) + return; ++ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) { ++ set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); ++ return; ++ } + + i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features); + +@@ -7164,6 +7168,7 @@ static void i40e_sync_udp_filters_subtas + } + } + } ++ clear_bit(__I40E_VF_DISABLE, pf->state); + } + + /** diff --git a/queue-4.9/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch b/queue-4.9/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch new file mode 100644 index 00000000000..93d1706b5c7 --- /dev/null +++ b/queue-4.9/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch @@ -0,0 +1,87 @@ +From e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Wed, 25 Sep 2019 16:45:53 -0700 +Subject: memcg, kmem: do not fail __GFP_NOFAIL charges + +From: Michal Hocko + +commit e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 upstream. + +Thomas has noticed the following NULL ptr dereference when using cgroup +v1 kmem limit: +BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 +PGD 0 +P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 3 PID: 16923 Comm: gtk-update-icon Not tainted 4.19.51 #42 +Hardware name: Gigabyte Technology Co., Ltd. Z97X-Gaming G1/Z97X-Gaming G1, BIOS F9 07/31/2015 +RIP: 0010:create_empty_buffers+0x24/0x100 +Code: cd 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 d4 ba 01 00 00 00 55 53 48 89 fb e8 97 fe ff ff 48 89 c5 48 89 c2 eb 03 48 89 ca <48> 8b 4a 08 4c 09 22 48 85 c9 75 f1 48 89 6a 08 48 8b 43 18 48 8d +RSP: 0018:ffff927ac1b37bf8 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: fffff2d4429fd740 RCX: 0000000100097149 +RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff9075a99fbe00 +RBP: 0000000000000000 R08: fffff2d440949cc8 R09: 00000000000960c0 +R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff907601f18360 R14: 0000000000002000 R15: 0000000000001000 +FS: 00007fb55b288bc0(0000) GS:ffff90761f8c0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000008 CR3: 000000007aebc002 CR4: 00000000001606e0 +Call Trace: + create_page_buffers+0x4d/0x60 + __block_write_begin_int+0x8e/0x5a0 + ? ext4_inode_attach_jinode.part.82+0xb0/0xb0 + ? jbd2__journal_start+0xd7/0x1f0 + ext4_da_write_begin+0x112/0x3d0 + generic_perform_write+0xf1/0x1b0 + ? file_update_time+0x70/0x140 + __generic_file_write_iter+0x141/0x1a0 + ext4_file_write_iter+0xef/0x3b0 + __vfs_write+0x17e/0x1e0 + vfs_write+0xa5/0x1a0 + ksys_write+0x57/0xd0 + do_syscall_64+0x55/0x160 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Tetsuo then noticed that this is because the __memcg_kmem_charge_memcg +fails __GFP_NOFAIL charge when the kmem limit is reached. This is a wrong +behavior because nofail allocations are not allowed to fail. Normal +charge path simply forces the charge even if that means to cross the +limit. Kmem accounting should be doing the same. + +Link: http://lkml.kernel.org/r/20190906125608.32129-1-mhocko@kernel.org +Signed-off-by: Michal Hocko +Reported-by: Thomas Lindroth +Debugged-by: Tetsuo Handa +Cc: Johannes Weiner +Cc: Vladimir Davydov +Cc: Andrey Ryabinin +Cc: Thomas Lindroth +Cc: Shakeel Butt +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2325,6 +2325,16 @@ int memcg_kmem_charge_memcg(struct page + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { ++ ++ /* ++ * Enforce __GFP_NOFAIL allocation because callers are not ++ * prepared to see failures and likely do not have any failure ++ * handling code. ++ */ ++ if (gfp & __GFP_NOFAIL) { ++ page_counter_charge(&memcg->kmem, nr_pages); ++ return 0; ++ } + cancel_charge(memcg, nr_pages); + return -ENOMEM; + } diff --git a/queue-4.9/ovl-filter-of-trusted-xattr-results-in-audit.patch b/queue-4.9/ovl-filter-of-trusted-xattr-results-in-audit.patch new file mode 100644 index 00000000000..93a338e5fe0 --- /dev/null +++ b/queue-4.9/ovl-filter-of-trusted-xattr-results-in-audit.patch @@ -0,0 +1,41 @@ +From 5c2e9f346b815841f9bed6029ebcb06415caf640 Mon Sep 17 00:00:00 2001 +From: Mark Salyzyn +Date: Thu, 29 Aug 2019 11:30:14 -0700 +Subject: ovl: filter of trusted xattr results in audit + +From: Mark Salyzyn + +commit 5c2e9f346b815841f9bed6029ebcb06415caf640 upstream. + +When filtering xattr list for reading, presence of trusted xattr +results in a security audit log. However, if there is other content +no errno will be set, and if there isn't, the errno will be -ENODATA +and not -EPERM as is usually associated with a lack of capability. +The check does not block the request to list the xattrs present. + +Switch to ns_capable_noaudit to reflect a more appropriate check. + +Signed-off-by: Mark Salyzyn +Cc: linux-security-module@vger.kernel.org +Cc: kernel-team@android.com +Cc: stable@vger.kernel.org # v3.18+ +Fixes: a082c6f680da ("ovl: filter trusted xattr for non-admin") +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman + +--- + fs/overlayfs/inode.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -234,7 +234,8 @@ static bool ovl_can_list(const char *s) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ +- return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); ++ return !ovl_is_private_xattr(s) && ++ ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); + } + + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) diff --git a/queue-4.9/series b/queue-4.9/series index 598494ad1fb..e8e2d1988b0 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -111,3 +111,8 @@ asoc-intel-fix-use-of-potentially-uninitialized-variable.patch arm-zynq-use-memcpy_toio-instead-of-memcpy-on-smp-bring-up.patch alarmtimer-use-eopnotsupp-instead-of-enotsupp.patch regulator-defer-init-completion-for-a-while-after-late_initcall.patch +memcg-kmem-do-not-fail-__gfp_nofail-charges.patch +i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch +ovl-filter-of-trusted-xattr-results-in-audit.patch +btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch +btrfs-relinquish-cpus-in-btrfs_compare_trees.patch -- 2.47.2