--- /dev/null
+From cbcf01128d0a92e131bd09f1688fe032480b65ca Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 28 Jul 2021 14:47:20 +0200
+Subject: af_unix: fix garbage collect vs MSG_PEEK
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit cbcf01128d0a92e131bd09f1688fe032480b65ca upstream.
+
+unix_gc() assumes that candidate sockets can never gain an external
+reference (i.e. be installed into an fd) while the unix_gc_lock is
+held. Except for MSG_PEEK this is guaranteed by modifying inflight
+count under the unix_gc_lock.
+
+MSG_PEEK does not touch any variable protected by unix_gc_lock (file
+count is not), yet it needs to be serialized with garbage collection.
+Do this by locking/unlocking unix_gc_lock:
+
+ 1) increment file count
+
+ 2) lock/unlock barrier to make sure incremented file count is visible
+ to garbage collection
+
+ 3) install file into fd
+
+This is a lock barrier (unlike smp_mb()) that ensures that garbage
+collection is run completely before or completely after the barrier.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 49 insertions(+), 2 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1521,6 +1521,53 @@ out:
+ return err;
+ }
+
++static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
++{
++ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
++
++ /*
++ * Garbage collection of unix sockets starts by selecting a set of
++ * candidate sockets which have reference only from being in flight
++ * (total_refs == inflight_refs). This condition is checked once during
++ * the candidate collection phase, and candidates are marked as such, so
++ * that non-candidates can later be ignored. While inflight_refs is
++ * protected by unix_gc_lock, total_refs (file count) is not, hence this
++ * is an instantaneous decision.
++ *
++ * Once a candidate, however, the socket must not be reinstalled into a
++ * file descriptor while the garbage collection is in progress.
++ *
++ * If the above conditions are met, then the directed graph of
++ * candidates (*) does not change while unix_gc_lock is held.
++ *
++ * Any operations that changes the file count through file descriptors
++ * (dup, close, sendmsg) does not change the graph since candidates are
++ * not installed in fds.
++ *
++ * Dequeing a candidate via recvmsg would install it into an fd, but
++ * that takes unix_gc_lock to decrement the inflight count, so it's
++ * serialized with garbage collection.
++ *
++ * MSG_PEEK is special in that it does not change the inflight count,
++ * yet does install the socket into an fd. The following lock/unlock
++ * pair is to ensure serialization with garbage collection. It must be
++ * done between incrementing the file count and installing the file into
++ * an fd.
++ *
++ * If garbage collection starts after the barrier provided by the
++ * lock/unlock, then it will see the elevated refcount and not mark this
++ * as a candidate. If a garbage collection is already in progress
++ * before the file count was incremented, then the lock/unlock pair will
++ * ensure that garbage collection is finished before progressing to
++ * installing the fd.
++ *
++ * (*) A -> B where B is on the queue of A or B is on the queue of C
++ * which is on the queue of listening socket A.
++ */
++ spin_lock(&unix_gc_lock);
++ spin_unlock(&unix_gc_lock);
++}
++
+ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+ {
+ int err = 0;
+@@ -2170,7 +2217,7 @@ static int unix_dgram_recvmsg(struct soc
+ sk_peek_offset_fwd(sk, size);
+
+ if (UNIXCB(skb).fp)
+- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
++ unix_peek_fds(&scm, skb);
+ }
+ err = (flags & MSG_TRUNC) ? skb->len - skip : size;
+
+@@ -2413,7 +2460,7 @@ unlock:
+ /* It is questionable, see note in unix_dgram_recvmsg.
+ */
+ if (UNIXCB(skb).fp)
+- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
++ unix_peek_fds(&scm, skb);
+
+ sk_peek_offset_fwd(sk, chunk);
+
--- /dev/null
+From 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec Mon Sep 17 00:00:00 2001
+From: Paul Gortmaker <paul.gortmaker@windriver.com>
+Date: Wed, 16 Jun 2021 08:51:57 -0400
+Subject: cgroup1: fix leaked context root causing sporadic NULL deref in LTP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paul Gortmaker <paul.gortmaker@windriver.com>
+
+commit 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec upstream.
+
+Richard reported sporadic (roughly one in 10 or so) null dereferences and
+other strange behaviour for a set of automated LTP tests. Things like:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000008
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] PREEMPT SMP PTI
+ CPU: 0 PID: 1516 Comm: umount Not tainted 5.10.0-yocto-standard #1
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:kernfs_sop_show_path+0x1b/0x60
+
+...or these others:
+
+ RIP: 0010:do_mkdirat+0x6a/0xf0
+ RIP: 0010:d_alloc_parallel+0x98/0x510
+ RIP: 0010:do_readlinkat+0x86/0x120
+
+There were other less common instances of some kind of a general scribble
+but the common theme was mount and cgroup and a dubious dentry triggering
+the NULL dereference. I was only able to reproduce it under qemu by
+replicating Richard's setup as closely as possible - I never did get it
+to happen on bare metal, even while keeping everything else the same.
+
+In commit 71d883c37e8d ("cgroup_do_mount(): massage calling conventions")
+we see this as a part of the overall change:
+
+ --------------
+ struct cgroup_subsys *ss;
+ - struct dentry *dentry;
+
+ [...]
+
+ - dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root,
+ - CGROUP_SUPER_MAGIC, ns);
+
+ [...]
+
+ - if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+ - struct super_block *sb = dentry->d_sb;
+ - dput(dentry);
+ + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns);
+ + if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+ + struct super_block *sb = fc->root->d_sb;
+ + dput(fc->root);
+ deactivate_locked_super(sb);
+ msleep(10);
+ return restart_syscall();
+ }
+ --------------
+
+In changing from the local "*dentry" variable to using fc->root, we now
+export/leave that dentry pointer in the file context after doing the dput()
+in the unlikely "is_dying" case. With LTP doing a crazy amount of back to
+back mount/unmount [testcases/bin/cgroup_regression_5_1.sh] the unlikely
+becomes slightly likely and then bad things happen.
+
+A fix would be to not leave the stale reference in fc->root as follows:
+
+ --------------
+ dput(fc->root);
+ + fc->root = NULL;
+ deactivate_locked_super(sb);
+ --------------
+
+...but then we are just open-coding a duplicate of fc_drop_locked() so we
+simply use that instead.
+
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Zefan Li <lizefan.x@bytedance.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: stable@vger.kernel.org # v5.1+
+Reported-by: Richard Purdie <richard.purdie@linuxfoundation.org>
+Fixes: 71d883c37e8d ("cgroup_do_mount(): massage calling conventions")
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/internal.h | 1 -
+ include/linux/fs_context.h | 1 +
+ kernel/cgroup/cgroup-v1.c | 4 +---
+ 3 files changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -64,7 +64,6 @@ extern void __init chrdev_init(void);
+ */
+ extern const struct fs_context_operations legacy_fs_context_ops;
+ extern int parse_monolithic_mount_data(struct fs_context *, void *);
+-extern void fc_drop_locked(struct fs_context *);
+ extern void vfs_clean_context(struct fs_context *fc);
+ extern int finish_clean_context(struct fs_context *fc);
+
+--- a/include/linux/fs_context.h
++++ b/include/linux/fs_context.h
+@@ -139,6 +139,7 @@ extern int vfs_parse_fs_string(struct fs
+ extern int generic_parse_monolithic(struct fs_context *fc, void *data);
+ extern int vfs_get_tree(struct fs_context *fc);
+ extern void put_fs_context(struct fs_context *fc);
++extern void fc_drop_locked(struct fs_context *fc);
+
+ /*
+ * sget() wrappers to be called from the ->get_tree() op.
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -1225,9 +1225,7 @@ int cgroup1_get_tree(struct fs_context *
+ ret = cgroup_do_get_tree(fc);
+
+ if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
+- struct super_block *sb = fc->root->d_sb;
+- dput(fc->root);
+- deactivate_locked_super(sb);
++ fc_drop_locked(fc);
+ ret = 1;
+ }
+
--- /dev/null
+From b97f074583736c42fb36f2da1164e28c73758912 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 25 Feb 2021 17:41:32 +0200
+Subject: KVM: x86: determine if an exception has an error code only when injecting it.
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit b97f074583736c42fb36f2da1164e28c73758912 upstream.
+
+A page fault can be queued while vCPU is in real paged mode on AMD, and
+AMD manual asks the user to always intercept it
+(otherwise result is undefined).
+The resulting VM exit, does have an error code.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210225154135.405125-2-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Zubin Mithra <zsm@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -541,8 +541,6 @@ static void kvm_multiple_exception(struc
+
+ if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
+ queue:
+- if (has_error && !is_protmode(vcpu))
+- has_error = false;
+ if (reinject) {
+ /*
+ * On vmentry, vcpu->arch.exception.pending is only
+@@ -8265,6 +8263,13 @@ static void update_cr8_intercept(struct
+ kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
+ }
+
++static void kvm_inject_exception(struct kvm_vcpu *vcpu)
++{
++ if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
++ vcpu->arch.exception.error_code = false;
++ kvm_x86_ops.queue_exception(vcpu);
++}
++
+ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
+ {
+ int r;
+@@ -8273,7 +8278,7 @@ static void inject_pending_event(struct
+ /* try to reinject previous events if any */
+
+ if (vcpu->arch.exception.injected) {
+- kvm_x86_ops.queue_exception(vcpu);
++ kvm_inject_exception(vcpu);
+ can_inject = false;
+ }
+ /*
+@@ -8336,7 +8341,7 @@ static void inject_pending_event(struct
+ }
+ }
+
+- kvm_x86_ops.queue_exception(vcpu);
++ kvm_inject_exception(vcpu);
+ can_inject = false;
+ }
+
tools-allow-proper-cc-cxx-...-override-with-llvm-1-in-makefile.include.patch
io_uring-fix-link-timeout-refs.patch
+kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch
+af_unix-fix-garbage-collect-vs-msg_peek.patch
+workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch
+cgroup1-fix-leaked-context-root-causing-sporadic-null-deref-in-ltp.patch
--- /dev/null
+From b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 Mon Sep 17 00:00:00 2001
+From: Yang Yingliang <yangyingliang@huawei.com>
+Date: Wed, 14 Jul 2021 17:19:33 +0800
+Subject: workqueue: fix UAF in pwq_unbound_release_workfn()
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+commit b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 upstream.
+
+I got a UAF report when doing fuzz test:
+
+[ 152.880091][ T8030] ==================================================================
+[ 152.881240][ T8030] BUG: KASAN: use-after-free in pwq_unbound_release_workfn+0x50/0x190
+[ 152.882442][ T8030] Read of size 4 at addr ffff88810d31bd00 by task kworker/3:2/8030
+[ 152.883578][ T8030]
+[ 152.883932][ T8030] CPU: 3 PID: 8030 Comm: kworker/3:2 Not tainted 5.13.0+ #249
+[ 152.885014][ T8030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+[ 152.886442][ T8030] Workqueue: events pwq_unbound_release_workfn
+[ 152.887358][ T8030] Call Trace:
+[ 152.887837][ T8030] dump_stack_lvl+0x75/0x9b
+[ 152.888525][ T8030] ? pwq_unbound_release_workfn+0x50/0x190
+[ 152.889371][ T8030] print_address_description.constprop.10+0x48/0x70
+[ 152.890326][ T8030] ? pwq_unbound_release_workfn+0x50/0x190
+[ 152.891163][ T8030] ? pwq_unbound_release_workfn+0x50/0x190
+[ 152.891999][ T8030] kasan_report.cold.15+0x82/0xdb
+[ 152.892740][ T8030] ? pwq_unbound_release_workfn+0x50/0x190
+[ 152.893594][ T8030] __asan_load4+0x69/0x90
+[ 152.894243][ T8030] pwq_unbound_release_workfn+0x50/0x190
+[ 152.895057][ T8030] process_one_work+0x47b/0x890
+[ 152.895778][ T8030] worker_thread+0x5c/0x790
+[ 152.896439][ T8030] ? process_one_work+0x890/0x890
+[ 152.897163][ T8030] kthread+0x223/0x250
+[ 152.897747][ T8030] ? set_kthread_struct+0xb0/0xb0
+[ 152.898471][ T8030] ret_from_fork+0x1f/0x30
+[ 152.899114][ T8030]
+[ 152.899446][ T8030] Allocated by task 8884:
+[ 152.900084][ T8030] kasan_save_stack+0x21/0x50
+[ 152.900769][ T8030] __kasan_kmalloc+0x88/0xb0
+[ 152.901416][ T8030] __kmalloc+0x29c/0x460
+[ 152.902014][ T8030] alloc_workqueue+0x111/0x8e0
+[ 152.902690][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0
+[ 152.903459][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0
+[ 152.904198][ T8030] scrub_workers_get+0x1e8/0x490
+[ 152.904929][ T8030] btrfs_scrub_dev+0x1b9/0x9c0
+[ 152.905599][ T8030] btrfs_ioctl+0x122c/0x4e50
+[ 152.906247][ T8030] __x64_sys_ioctl+0x137/0x190
+[ 152.906916][ T8030] do_syscall_64+0x34/0xb0
+[ 152.907535][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae
+[ 152.908365][ T8030]
+[ 152.908688][ T8030] Freed by task 8884:
+[ 152.909243][ T8030] kasan_save_stack+0x21/0x50
+[ 152.909893][ T8030] kasan_set_track+0x20/0x30
+[ 152.910541][ T8030] kasan_set_free_info+0x24/0x40
+[ 152.911265][ T8030] __kasan_slab_free+0xf7/0x140
+[ 152.911964][ T8030] kfree+0x9e/0x3d0
+[ 152.912501][ T8030] alloc_workqueue+0x7d7/0x8e0
+[ 152.913182][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0
+[ 152.913949][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0
+[ 152.914703][ T8030] scrub_workers_get+0x1e8/0x490
+[ 152.915402][ T8030] btrfs_scrub_dev+0x1b9/0x9c0
+[ 152.916077][ T8030] btrfs_ioctl+0x122c/0x4e50
+[ 152.916729][ T8030] __x64_sys_ioctl+0x137/0x190
+[ 152.917414][ T8030] do_syscall_64+0x34/0xb0
+[ 152.918034][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae
+[ 152.918872][ T8030]
+[ 152.919203][ T8030] The buggy address belongs to the object at ffff88810d31bc00
+[ 152.919203][ T8030] which belongs to the cache kmalloc-512 of size 512
+[ 152.921155][ T8030] The buggy address is located 256 bytes inside of
+[ 152.921155][ T8030] 512-byte region [ffff88810d31bc00, ffff88810d31be00)
+[ 152.922993][ T8030] The buggy address belongs to the page:
+[ 152.923800][ T8030] page:ffffea000434c600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d318
+[ 152.925249][ T8030] head:ffffea000434c600 order:2 compound_mapcount:0 compound_pincount:0
+[ 152.926399][ T8030] flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff)
+[ 152.927515][ T8030] raw: 057ff00000010200 dead000000000100 dead000000000122 ffff888009c42c80
+[ 152.928716][ T8030] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
+[ 152.929890][ T8030] page dumped because: kasan: bad access detected
+[ 152.930759][ T8030]
+[ 152.931076][ T8030] Memory state around the buggy address:
+[ 152.931851][ T8030] ffff88810d31bc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 152.932967][ T8030] ffff88810d31bc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 152.934068][ T8030] >ffff88810d31bd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 152.935189][ T8030] ^
+[ 152.935763][ T8030] ffff88810d31bd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 152.936847][ T8030] ffff88810d31be00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 152.937940][ T8030] ==================================================================
+
+If apply_wqattrs_prepare() fails in alloc_workqueue(), it will call put_pwq()
+which invoke a work queue to call pwq_unbound_release_workfn() and use the 'wq'.
+The 'wq' allocated in alloc_workqueue() will be freed in error path when
+apply_wqattrs_prepare() fails. So it will lead a UAF.
+
+CPU0 CPU1
+alloc_workqueue()
+alloc_and_link_pwqs()
+apply_wqattrs_prepare() fails
+apply_wqattrs_cleanup()
+schedule_work(&pwq->unbound_release_work)
+kfree(wq)
+ worker_thread()
+ pwq_unbound_release_workfn() <- trigger uaf here
+
+If apply_wqattrs_prepare() fails, the new pwq are not linked, it doesn't
+hold any reference to the 'wq', 'wq' is invalid to access in the worker,
+so add check pwq if linked to fix this.
+
+Fixes: 2d5f0764b526 ("workqueue: split apply_workqueue_attrs() into 3 stages")
+Cc: stable@vger.kernel.org # v4.2+
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Tested-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3670,15 +3670,21 @@ static void pwq_unbound_release_workfn(s
+ unbound_release_work);
+ struct workqueue_struct *wq = pwq->wq;
+ struct worker_pool *pool = pwq->pool;
+- bool is_last;
++ bool is_last = false;
+
+- if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+- return;
++ /*
++ * when @pwq is not linked, it doesn't hold any reference to the
++ * @wq, and @wq is invalid to access.
++ */
++ if (!list_empty(&pwq->pwqs_node)) {
++ if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
++ return;
+
+- mutex_lock(&wq->mutex);
+- list_del_rcu(&pwq->pwqs_node);
+- is_last = list_empty(&wq->pwqs);
+- mutex_unlock(&wq->mutex);
++ mutex_lock(&wq->mutex);
++ list_del_rcu(&pwq->pwqs_node);
++ is_last = list_empty(&wq->pwqs);
++ mutex_unlock(&wq->mutex);
++ }
+
+ mutex_lock(&wq_pool_mutex);
+ put_unbound_pool(pool);