--- /dev/null
+From efad8a853ad2057f96664328a0d327a05ce39c76 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 12 Aug 2019 19:14:29 +0100
+Subject: Btrfs: fix use-after-free when using the tree modification log
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit efad8a853ad2057f96664328a0d327a05ce39c76 upstream.
+
+At ctree.c:get_old_root(), we are accessing a root's header owner field
+after we have freed the respective extent buffer. This results in an
+use-after-free that can lead to crashes, and when CONFIG_DEBUG_PAGEALLOC
+is set, results in a stack trace like the following:
+
+ [ 3876.799331] stack segment: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
+ [ 3876.799363] CPU: 0 PID: 15436 Comm: pool Not tainted 5.3.0-rc3-btrfs-next-54 #1
+ [ 3876.799385] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+ [ 3876.799433] RIP: 0010:btrfs_search_old_slot+0x652/0xd80 [btrfs]
+ (...)
+ [ 3876.799502] RSP: 0018:ffff9f08c1a2f9f0 EFLAGS: 00010286
+ [ 3876.799518] RAX: ffff8dd300000000 RBX: ffff8dd85a7a9348 RCX: 000000038da26000
+ [ 3876.799538] RDX: 0000000000000000 RSI: ffffe522ce368980 RDI: 0000000000000246
+ [ 3876.799559] RBP: dae1922adadad000 R08: 0000000008020000 R09: ffffe522c0000000
+ [ 3876.799579] R10: ffff8dd57fd788c8 R11: 000000007511b030 R12: ffff8dd781ddc000
+ [ 3876.799599] R13: ffff8dd9e6240578 R14: ffff8dd6896f7a88 R15: ffff8dd688cf90b8
+ [ 3876.799620] FS: 00007f23ddd97700(0000) GS:ffff8dda20200000(0000) knlGS:0000000000000000
+ [ 3876.799643] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [ 3876.799660] CR2: 00007f23d4024000 CR3: 0000000710bb0005 CR4: 00000000003606f0
+ [ 3876.799682] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [ 3876.799703] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [ 3876.799723] Call Trace:
+ [ 3876.799735] ? do_raw_spin_unlock+0x49/0xc0
+ [ 3876.799749] ? _raw_spin_unlock+0x24/0x30
+ [ 3876.799779] resolve_indirect_refs+0x1eb/0xc80 [btrfs]
+ [ 3876.799810] find_parent_nodes+0x38d/0x1180 [btrfs]
+ [ 3876.799841] btrfs_check_shared+0x11a/0x1d0 [btrfs]
+ [ 3876.799870] ? extent_fiemap+0x598/0x6e0 [btrfs]
+ [ 3876.799895] extent_fiemap+0x598/0x6e0 [btrfs]
+ [ 3876.799913] do_vfs_ioctl+0x45a/0x700
+ [ 3876.799926] ksys_ioctl+0x70/0x80
+ [ 3876.799938] ? trace_hardirqs_off_thunk+0x1a/0x20
+ [ 3876.799953] __x64_sys_ioctl+0x16/0x20
+ [ 3876.799965] do_syscall_64+0x62/0x220
+ [ 3876.799977] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+ [ 3876.799993] RIP: 0033:0x7f23e0013dd7
+ (...)
+ [ 3876.800056] RSP: 002b:00007f23ddd96ca8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+ [ 3876.800078] RAX: ffffffffffffffda RBX: 00007f23d80210f8 RCX: 00007f23e0013dd7
+ [ 3876.800099] RDX: 00007f23d80210f8 RSI: 00000000c020660b RDI: 0000000000000003
+ [ 3876.800626] RBP: 000055fa2a2a2440 R08: 0000000000000000 R09: 00007f23ddd96d7c
+ [ 3876.801143] R10: 00007f23d8022000 R11: 0000000000000246 R12: 00007f23ddd96d80
+ [ 3876.801662] R13: 00007f23ddd96d78 R14: 00007f23d80210f0 R15: 00007f23ddd96d80
+ (...)
+ [ 3876.805107] ---[ end trace e53161e179ef04f9 ]---
+
+Fix that by saving the root's header owner field into a local variable
+before freeing the root's extent buffer, and then use that local variable
+when needed.
+
+Fixes: 30b0463a9394d9 ("Btrfs: fix accessing the root pointer in tree mod log functions")
+CC: stable@vger.kernel.org # 3.10+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1414,6 +1414,7 @@ get_old_root(struct btrfs_root *root, u6
+ struct tree_mod_elem *tm;
+ struct extent_buffer *eb = NULL;
+ struct extent_buffer *eb_root;
++ u64 eb_root_owner = 0;
+ struct extent_buffer *old;
+ struct tree_mod_root *old_root = NULL;
+ u64 old_generation = 0;
+@@ -1448,6 +1449,7 @@ get_old_root(struct btrfs_root *root, u6
+ free_extent_buffer(old);
+ }
+ } else if (old_root) {
++ eb_root_owner = btrfs_header_owner(eb_root);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
+ eb = alloc_dummy_extent_buffer(fs_info, logical);
+@@ -1465,7 +1467,7 @@ get_old_root(struct btrfs_root *root, u6
+ if (old_root) {
+ btrfs_set_header_bytenr(eb, eb->start);
+ btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
+- btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
++ btrfs_set_header_owner(eb, eb_root_owner);
+ btrfs_set_header_level(eb, old_root->level);
+ btrfs_set_header_generation(eb, old_generation);
+ }
--- /dev/null
+From bab32fc069ce8829c416e8737c119f62a57970f9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 16 Sep 2019 20:02:38 +0800
+Subject: btrfs: qgroup: Fix the wrong target io_tree when freeing reserved data space
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bab32fc069ce8829c416e8737c119f62a57970f9 upstream.
+
+[BUG]
+Under the following case with qgroup enabled, if some error happened
+after we have reserved delalloc space, then in error handling path, we
+could cause qgroup data space leakage:
+
+From btrfs_truncate_block() in inode.c:
+
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ block_start, blocksize);
+ if (ret)
+ goto out;
+
+ again:
+ page = find_or_create_page(mapping, index, mask);
+ if (!page) {
+ btrfs_delalloc_release_space(inode, data_reserved,
+ block_start, blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+[CAUSE]
+In the above case, btrfs_delalloc_reserve_space() will call
+btrfs_qgroup_reserve_data() and mark the io_tree range with
+EXTENT_QGROUP_RESERVED flag.
+
+In the error handling path, we have the following call stack:
+btrfs_delalloc_release_space()
+|- btrfs_free_reserved_data_space()
+ |- btrsf_qgroup_free_data()
+ |- __btrfs_qgroup_release_data(reserved=@reserved, free=1)
+ |- qgroup_free_reserved_data(reserved=@reserved)
+ |- clear_record_extent_bits();
+ |- freed += changeset.bytes_changed;
+
+However due to a completion bug, qgroup_free_reserved_data() will clear
+EXTENT_QGROUP_RESERVED flag in BTRFS_I(inode)->io_failure_tree, other
+than the correct BTRFS_I(inode)->io_tree.
+Since io_failure_tree is never marked with that flag,
+btrfs_qgroup_free_data() will not free any data reserved space at all,
+causing a leakage.
+
+This type of error handling can only be triggered by errors outside of
+qgroup code. So EDQUOT error from qgroup can't trigger it.
+
+[FIX]
+Fix the wrong target io_tree.
+
+Reported-by: Josef Bacik <josef@toxicpanda.com>
+Fixes: bc42bda22345 ("btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges")
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2951,7 +2951,7 @@ static int qgroup_free_reserved_data(str
+ * EXTENT_QGROUP_RESERVED, we won't double free.
+ * So not need to rush.
+ */
+- ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
++ ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
+ free_start, free_start + free_len - 1,
+ EXTENT_QGROUP_RESERVED, &changeset);
+ if (ret < 0)
--- /dev/null
+From 6af112b11a4bc1b560f60a618ac9c1dcefe9836e Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Wed, 4 Sep 2019 19:33:58 +0300
+Subject: btrfs: Relinquish CPUs in btrfs_compare_trees
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit 6af112b11a4bc1b560f60a618ac9c1dcefe9836e upstream.
+
+When doing any form of incremental send the parent and the child trees
+need to be compared via btrfs_compare_trees. This can result in long
+loop chains without ever relinquishing the CPU. This causes softlockup
+detector to trigger when comparing trees with a lot of items. Example
+report:
+
+watchdog: BUG: soft lockup - CPU#0 stuck for 24s! [snapperd:16153]
+CPU: 0 PID: 16153 Comm: snapperd Not tainted 5.2.9-1-default #1 openSUSE Tumbleweed (unreleased)
+Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+pstate: 40000005 (nZcv daif -PAN -UAO)
+pc : __ll_sc_arch_atomic_sub_return+0x14/0x20
+lr : btrfs_release_extent_buffer_pages+0xe0/0x1e8 [btrfs]
+sp : ffff00001273b7e0
+Call trace:
+ __ll_sc_arch_atomic_sub_return+0x14/0x20
+ release_extent_buffer+0xdc/0x120 [btrfs]
+ free_extent_buffer.part.0+0xb0/0x118 [btrfs]
+ free_extent_buffer+0x24/0x30 [btrfs]
+ btrfs_release_path+0x4c/0xa0 [btrfs]
+ btrfs_free_path.part.0+0x20/0x40 [btrfs]
+ btrfs_free_path+0x24/0x30 [btrfs]
+ get_inode_info+0xa8/0xf8 [btrfs]
+ finish_inode_if_needed+0xe0/0x6d8 [btrfs]
+ changed_cb+0x9c/0x410 [btrfs]
+ btrfs_compare_trees+0x284/0x648 [btrfs]
+ send_subvol+0x33c/0x520 [btrfs]
+ btrfs_ioctl_send+0x8a0/0xaf0 [btrfs]
+ btrfs_ioctl+0x199c/0x2288 [btrfs]
+ do_vfs_ioctl+0x4b0/0x820
+ ksys_ioctl+0x84/0xb8
+ __arm64_sys_ioctl+0x28/0x38
+ el0_svc_common.constprop.0+0x7c/0x188
+ el0_svc_handler+0x34/0x90
+ el0_svc+0x8/0xc
+
+Fix this by adding a call to cond_resched at the beginning of the main
+loop in btrfs_compare_trees.
+
+Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -5494,6 +5494,7 @@ int btrfs_compare_trees(struct btrfs_roo
+ advance_left = advance_right = 0;
+
+ while (1) {
++ cond_resched();
+ if (advance_left && !left_end_reached) {
+ ret = tree_advance(fs_info, left_path, &left_level,
+ left_root_level,
--- /dev/null
+From f0b444b349e33ae0d3dd93e25ca365482a5d17d4 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Thu, 12 Sep 2019 13:54:27 -0400
+Subject: gfs2: clear buf_in_tr when ending a transaction in sweep_bh_for_rgrps
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit f0b444b349e33ae0d3dd93e25ca365482a5d17d4 upstream.
+
+In function sweep_bh_for_rgrps, which is a helper for punch_hole,
+it uses variable buf_in_tr to keep track of when it needs to commit
+pending block frees on a partial delete that overflows the
+transaction created for the delete. The problem is that the
+variable was initialized at the start of function sweep_bh_for_rgrps
+but it was never cleared, even when starting a new transaction.
+
+This patch reinitializes the variable when the transaction is
+ended, so the next transaction starts out with it cleared.
+
+Fixes: d552a2b9b33e ("GFS2: Non-recursive delete")
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/bmap.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/gfs2/bmap.c
++++ b/fs/gfs2/bmap.c
+@@ -1078,6 +1078,7 @@ out_unlock:
+ gfs2_dinode_out(ip, dibh->b_data);
+ up_write(&ip->i_rw_mutex);
+ gfs2_trans_end(sdp);
++ buf_in_tr = false;
+ }
+ gfs2_glock_dq_uninit(rd_gh);
+ cond_resched();
--- /dev/null
+From a7542b87607560d0b89e7ff81d870bd6ff8835cb Mon Sep 17 00:00:00 2001
+From: Stefan Assmann <sassmann@kpanic.de>
+Date: Wed, 21 Aug 2019 16:09:29 +0200
+Subject: i40e: check __I40E_VF_DISABLE bit in i40e_sync_filters_subtask
+
+From: Stefan Assmann <sassmann@kpanic.de>
+
+commit a7542b87607560d0b89e7ff81d870bd6ff8835cb upstream.
+
+While testing VF spawn/destroy the following panic occurred.
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000029
+[...]
+Workqueue: i40e i40e_service_task [i40e]
+RIP: 0010:i40e_sync_vsi_filters+0x6fd/0xc60 [i40e]
+[...]
+Call Trace:
+ ? __switch_to_asm+0x35/0x70
+ ? __switch_to_asm+0x41/0x70
+ ? __switch_to_asm+0x35/0x70
+ ? _cond_resched+0x15/0x30
+ i40e_sync_filters_subtask+0x56/0x70 [i40e]
+ i40e_service_task+0x382/0x11b0 [i40e]
+ ? __switch_to_asm+0x41/0x70
+ ? __switch_to_asm+0x41/0x70
+ process_one_work+0x1a7/0x3b0
+ worker_thread+0x30/0x390
+ ? create_worker+0x1a0/0x1a0
+ kthread+0x112/0x130
+ ? kthread_bind+0x30/0x30
+ ret_from_fork+0x35/0x40
+
+Investigation revealed a race where pf->vf[vsi->vf_id].trusted may get
+accessed by the watchdog via i40e_sync_filters_subtask() although
+i40e_free_vfs() already free'd pf->vf.
+To avoid this the call to i40e_sync_vsi_filters() in
+i40e_sync_filters_subtask() needs to be guarded by __I40E_VF_DISABLE,
+which is also used by i40e_free_vfs().
+
+Note: put the __I40E_VF_DISABLE check after the
+__I40E_MACVLAN_SYNC_PENDING check as the latter is more likely to
+trigger.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
+Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -2536,6 +2536,10 @@ void i40e_vlan_stripping_disable(struct
+ /* Don't modify stripping options if a port VLAN is active */
+ if (vsi->info.pvid)
+ return;
++ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) {
++ set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
++ return;
++ }
+
+ if ((vsi->info.valid_sections &
+ cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
+@@ -7596,6 +7600,7 @@ static void i40e_sync_udp_filters_subtas
+ }
+ }
+ }
++ clear_bit(__I40E_VF_DISABLE, pf->state);
+ }
+
+ /**
--- /dev/null
+From e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 25 Sep 2019 16:45:53 -0700
+Subject: memcg, kmem: do not fail __GFP_NOFAIL charges
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 upstream.
+
+Thomas has noticed the following NULL ptr dereference when using cgroup
+v1 kmem limit:
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
+PGD 0
+P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 3 PID: 16923 Comm: gtk-update-icon Not tainted 4.19.51 #42
+Hardware name: Gigabyte Technology Co., Ltd. Z97X-Gaming G1/Z97X-Gaming G1, BIOS F9 07/31/2015
+RIP: 0010:create_empty_buffers+0x24/0x100
+Code: cd 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 d4 ba 01 00 00 00 55 53 48 89 fb e8 97 fe ff ff 48 89 c5 48 89 c2 eb 03 48 89 ca <48> 8b 4a 08 4c 09 22 48 85 c9 75 f1 48 89 6a 08 48 8b 43 18 48 8d
+RSP: 0018:ffff927ac1b37bf8 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: fffff2d4429fd740 RCX: 0000000100097149
+RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff9075a99fbe00
+RBP: 0000000000000000 R08: fffff2d440949cc8 R09: 00000000000960c0
+R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000
+R13: ffff907601f18360 R14: 0000000000002000 R15: 0000000000001000
+FS: 00007fb55b288bc0(0000) GS:ffff90761f8c0000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000007aebc002 CR4: 00000000001606e0
+Call Trace:
+ create_page_buffers+0x4d/0x60
+ __block_write_begin_int+0x8e/0x5a0
+ ? ext4_inode_attach_jinode.part.82+0xb0/0xb0
+ ? jbd2__journal_start+0xd7/0x1f0
+ ext4_da_write_begin+0x112/0x3d0
+ generic_perform_write+0xf1/0x1b0
+ ? file_update_time+0x70/0x140
+ __generic_file_write_iter+0x141/0x1a0
+ ext4_file_write_iter+0xef/0x3b0
+ __vfs_write+0x17e/0x1e0
+ vfs_write+0xa5/0x1a0
+ ksys_write+0x57/0xd0
+ do_syscall_64+0x55/0x160
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Tetsuo then noticed that this is because the __memcg_kmem_charge_memcg
+fails __GFP_NOFAIL charge when the kmem limit is reached. This is a wrong
+behavior because nofail allocations are not allowed to fail. Normal
+charge path simply forces the charge even if that means to cross the
+limit. Kmem accounting should be doing the same.
+
+Link: http://lkml.kernel.org/r/20190906125608.32129-1-mhocko@kernel.org
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
+Debugged-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Thomas Lindroth <thomas.lindroth@gmail.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2352,6 +2352,16 @@ int memcg_kmem_charge_memcg(struct page
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
+ !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
++
++ /*
++ * Enforce __GFP_NOFAIL allocation because callers are not
++ * prepared to see failures and likely do not have any failure
++ * handling code.
++ */
++ if (gfp & __GFP_NOFAIL) {
++ page_counter_charge(&memcg->kmem, nr_pages);
++ return 0;
++ }
+ cancel_charge(memcg, nr_pages);
+ return -ENOMEM;
+ }
--- /dev/null
+From f9c645621a28e37813a1de96d9cbd89cde94a1e4 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Date: Mon, 23 Sep 2019 15:37:08 -0700
+Subject: memcg, oom: don't require __GFP_FS when invoking memcg OOM killer
+
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+
+commit f9c645621a28e37813a1de96d9cbd89cde94a1e4 upstream.
+
+Masoud Sharbiani noticed that commit 29ef680ae7c21110 ("memcg, oom: move
+out_of_memory back to the charge path") broke memcg OOM called from
+__xfs_filemap_fault() path. It turned out that try_charge() is retrying
+forever without making forward progress because mem_cgroup_oom(GFP_NOFS)
+cannot invoke the OOM killer due to commit 3da88fb3bacfaa33 ("mm, oom:
+move GFP_NOFS check to out_of_memory").
+
+Allowing forced charge due to being unable to invoke memcg OOM killer will
+lead to global OOM situation. Also, just returning -ENOMEM will be risky
+because OOM path is lost and some paths (e.g. get_user_pages()) will leak
+-ENOMEM. Therefore, invoking memcg OOM killer (despite GFP_NOFS) will be
+the only choice we can choose for now.
+
+Until 29ef680ae7c21110, we were able to invoke memcg OOM killer when
+GFP_KERNEL reclaim failed [1]. But since 29ef680ae7c21110, we need to
+invoke memcg OOM killer when GFP_NOFS reclaim failed [2]. Although in the
+past we did invoke memcg OOM killer for GFP_NOFS [3], we might get
+pre-mature memcg OOM reports due to this patch.
+
+[1]
+
+ leaker invoked oom-killer: gfp_mask=0x6200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, oom_score_adj=0
+ CPU: 0 PID: 2746 Comm: leaker Not tainted 4.18.0+ #19
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+ dump_stack+0x63/0x88
+ dump_header+0x67/0x27a
+ ? mem_cgroup_scan_tasks+0x91/0xf0
+ oom_kill_process+0x210/0x410
+ out_of_memory+0x10a/0x2c0
+ mem_cgroup_out_of_memory+0x46/0x80
+ mem_cgroup_oom_synchronize+0x2e4/0x310
+ ? high_work_func+0x20/0x20
+ pagefault_out_of_memory+0x31/0x76
+ mm_fault_error+0x55/0x115
+ ? handle_mm_fault+0xfd/0x220
+ __do_page_fault+0x433/0x4e0
+ do_page_fault+0x22/0x30
+ ? page_fault+0x8/0x30
+ page_fault+0x1e/0x30
+ RIP: 0033:0x4009f0
+ Code: 03 00 00 00 e8 71 fd ff ff 48 83 f8 ff 49 89 c6 74 74 48 89 c6 bf c0 0c 40 00 31 c0 e8 69 fd ff ff 45 85 ff 7e 21 31 c9 66 90 <41> 0f be 14 0e 01 d3 f7 c1 ff 0f 00 00 75 05 41 c6 04 0e 2a 48 83
+ RSP: 002b:00007ffe29ae96f0 EFLAGS: 00010206
+ RAX: 000000000000001b RBX: 0000000000000000 RCX: 0000000001ce1000
+ RDX: 0000000000000000 RSI: 000000007fffffe5 RDI: 0000000000000000
+ RBP: 000000000000000c R08: 0000000000000000 R09: 00007f94be09220d
+ R10: 0000000000000002 R11: 0000000000000246 R12: 00000000000186a0
+ R13: 0000000000000003 R14: 00007f949d845000 R15: 0000000002800000
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 158965
+ memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 2016kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:844KB rss:521136KB rss_huge:0KB shmem:0KB mapped_file:0KB dirty:132KB writeback:0KB inactive_anon:0KB active_anon:521224KB inactive_file:1012KB active_file:8KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 2746 (leaker) score 998 or sacrifice child
+ Killed process 2746 (leaker) total-vm:536704kB, anon-rss:521176kB, file-rss:1208kB, shmem-rss:0kB
+ oom_reaper: reaped process 2746 (leaker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
+
+[2]
+
+ leaker invoked oom-killer: gfp_mask=0x600040(GFP_NOFS), nodemask=(null), order=0, oom_score_adj=0
+ CPU: 1 PID: 2746 Comm: leaker Not tainted 4.18.0+ #20
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+ dump_stack+0x63/0x88
+ dump_header+0x67/0x27a
+ ? mem_cgroup_scan_tasks+0x91/0xf0
+ oom_kill_process+0x210/0x410
+ out_of_memory+0x109/0x2d0
+ mem_cgroup_out_of_memory+0x46/0x80
+ try_charge+0x58d/0x650
+ ? __radix_tree_replace+0x81/0x100
+ mem_cgroup_try_charge+0x7a/0x100
+ __add_to_page_cache_locked+0x92/0x180
+ add_to_page_cache_lru+0x4d/0xf0
+ iomap_readpages_actor+0xde/0x1b0
+ ? iomap_zero_range_actor+0x1d0/0x1d0
+ iomap_apply+0xaf/0x130
+ iomap_readpages+0x9f/0x150
+ ? iomap_zero_range_actor+0x1d0/0x1d0
+ xfs_vm_readpages+0x18/0x20 [xfs]
+ read_pages+0x60/0x140
+ __do_page_cache_readahead+0x193/0x1b0
+ ondemand_readahead+0x16d/0x2c0
+ page_cache_async_readahead+0x9a/0xd0
+ filemap_fault+0x403/0x620
+ ? alloc_set_pte+0x12c/0x540
+ ? _cond_resched+0x14/0x30
+ __xfs_filemap_fault+0x66/0x180 [xfs]
+ xfs_filemap_fault+0x27/0x30 [xfs]
+ __do_fault+0x19/0x40
+ __handle_mm_fault+0x8e8/0xb60
+ handle_mm_fault+0xfd/0x220
+ __do_page_fault+0x238/0x4e0
+ do_page_fault+0x22/0x30
+ ? page_fault+0x8/0x30
+ page_fault+0x1e/0x30
+ RIP: 0033:0x4009f0
+ Code: 03 00 00 00 e8 71 fd ff ff 48 83 f8 ff 49 89 c6 74 74 48 89 c6 bf c0 0c 40 00 31 c0 e8 69 fd ff ff 45 85 ff 7e 21 31 c9 66 90 <41> 0f be 14 0e 01 d3 f7 c1 ff 0f 00 00 75 05 41 c6 04 0e 2a 48 83
+ RSP: 002b:00007ffda45c9290 EFLAGS: 00010206
+ RAX: 000000000000001b RBX: 0000000000000000 RCX: 0000000001a1e000
+ RDX: 0000000000000000 RSI: 000000007fffffe5 RDI: 0000000000000000
+ RBP: 000000000000000c R08: 0000000000000000 R09: 00007f6d061ff20d
+ R10: 0000000000000002 R11: 0000000000000246 R12: 00000000000186a0
+ R13: 0000000000000003 R14: 00007f6ce59b2000 R15: 0000000002800000
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 7221
+ memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 1944kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:3632KB rss:518232KB rss_huge:0KB shmem:0KB mapped_file:0KB dirty:0KB writeback:0KB inactive_anon:0KB active_anon:518408KB inactive_file:3908KB active_file:12KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 2746 (leaker) score 992 or sacrifice child
+ Killed process 2746 (leaker) total-vm:536704kB, anon-rss:518264kB, file-rss:1188kB, shmem-rss:0kB
+ oom_reaper: reaped process 2746 (leaker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
+
+[3]
+
+ leaker invoked oom-killer: gfp_mask=0x50, order=0, oom_score_adj=0
+ leaker cpuset=/ mems_allowed=0
+ CPU: 1 PID: 3206 Comm: leaker Not tainted 3.10.0-957.27.2.el7.x86_64 #1
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+ [<ffffffffaf364147>] dump_stack+0x19/0x1b
+ [<ffffffffaf35eb6a>] dump_header+0x90/0x229
+ [<ffffffffaedbb456>] ? find_lock_task_mm+0x56/0xc0
+ [<ffffffffaee32a38>] ? try_get_mem_cgroup_from_mm+0x28/0x60
+ [<ffffffffaedbb904>] oom_kill_process+0x254/0x3d0
+ [<ffffffffaee36c36>] mem_cgroup_oom_synchronize+0x546/0x570
+ [<ffffffffaee360b0>] ? mem_cgroup_charge_common+0xc0/0xc0
+ [<ffffffffaedbc194>] pagefault_out_of_memory+0x14/0x90
+ [<ffffffffaf35d072>] mm_fault_error+0x6a/0x157
+ [<ffffffffaf3717c8>] __do_page_fault+0x3c8/0x4f0
+ [<ffffffffaf371925>] do_page_fault+0x35/0x90
+ [<ffffffffaf36d768>] page_fault+0x28/0x30
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 20628
+ memory+swap: usage 524288kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 0kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:840KB rss:523448KB rss_huge:0KB mapped_file:0KB swap:0KB inactive_anon:0KB active_anon:523448KB inactive_file:464KB active_file:376KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 3206 (leaker) score 970 or sacrifice child
+ Killed process 3206 (leaker) total-vm:536692kB, anon-rss:523304kB, file-rss:412kB, shmem-rss:0kB
+
+Bisected by Masoud Sharbiani.
+
+Link: http://lkml.kernel.org/r/cbe54ed1-b6ba-a056-8899-2dc42526371d@i-love.sakura.ne.jp
+Fixes: 3da88fb3bacfaa33 ("mm, oom: move GFP_NOFS check to out_of_memory") [necessary after 29ef680ae7c21110]
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Masoud Sharbiani <msharbiani@apple.com>
+Tested-by: Masoud Sharbiani <msharbiani@apple.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org> [4.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -1050,9 +1050,10 @@ bool out_of_memory(struct oom_control *o
+ * The OOM killer does not compensate for IO-less reclaim.
+ * pagefault_out_of_memory lost its gfp context so we have to
+ * make sure exclude 0 mask - all other users should have at least
+- * ___GFP_DIRECT_RECLAIM to get here.
++ * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
++ * invoke the OOM killer even if it is a GFP_NOFS allocation.
+ */
+- if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
++ if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
+ return true;
+
+ /*
--- /dev/null
+From 5c2e9f346b815841f9bed6029ebcb06415caf640 Mon Sep 17 00:00:00 2001
+From: Mark Salyzyn <salyzyn@android.com>
+Date: Thu, 29 Aug 2019 11:30:14 -0700
+Subject: ovl: filter of trusted xattr results in audit
+
+From: Mark Salyzyn <salyzyn@android.com>
+
+commit 5c2e9f346b815841f9bed6029ebcb06415caf640 upstream.
+
+When filtering xattr list for reading, presence of trusted xattr
+results in a security audit log. However, if there is other content
+no errno will be set, and if there isn't, the errno will be -ENODATA
+and not -EPERM as is usually associated with a lack of capability.
+The check does not block the request to list the xattrs present.
+
+Switch to ns_capable_noaudit to reflect a more appropriate check.
+
+Signed-off-by: Mark Salyzyn <salyzyn@android.com>
+Cc: linux-security-module@vger.kernel.org
+Cc: kernel-team@android.com
+Cc: stable@vger.kernel.org # v3.18+
+Fixes: a082c6f680da ("ovl: filter trusted xattr for non-admin")
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -265,7 +265,8 @@ static bool ovl_can_list(const char *s)
+ return true;
+
+ /* Never list trusted.overlay, list other trusted for superuser only */
+- return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
++ return !ovl_is_private_xattr(s) &&
++ ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ }
+
+ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
arm64-dts-rockchip-limit-clock-rate-of-mmc-controllers-for-rk3328.patch
alarmtimer-use-eopnotsupp-instead-of-enotsupp.patch
regulator-defer-init-completion-for-a-while-after-late_initcall.patch
+gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch
+memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch
+memcg-kmem-do-not-fail-__gfp_nofail-charges.patch
+i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch
+ovl-filter-of-trusted-xattr-results-in-audit.patch
+btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch
+btrfs-relinquish-cpus-in-btrfs_compare_trees.patch
+btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch