4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)
diff --git a/queue-4.19/af_unix-fix-garbage-collect-vs-msg_peek.patch b/queue-4.19/af_unix-fix-garbage-collect-vs-msg_peek.patch

new file mode 100644 (file)

index 0000000..ae9e8ae
--- /dev/null
+++ b/queue-4.19/af_unix-fix-garbage-collect-vs-msg_peek.patch
@@ -0,0 +1,110 @@
+From cbcf01128d0a92e131bd09f1688fe032480b65ca Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Wed, 28 Jul 2021 14:47:20 +0200
+Subject: af_unix: fix garbage collect vs MSG_PEEK
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit cbcf01128d0a92e131bd09f1688fe032480b65ca upstream.
+
+unix_gc() assumes that candidate sockets can never gain an external
+reference (i.e.  be installed into an fd) while the unix_gc_lock is
+held.  Except for MSG_PEEK this is guaranteed by modifying inflight
+count under the unix_gc_lock.
+
+MSG_PEEK does not touch any variable protected by unix_gc_lock (file
+count is not), yet it needs to be serialized with garbage collection.
+Do this by locking/unlocking unix_gc_lock:
+
+ 1) increment file count
+
+ 2) lock/unlock barrier to make sure incremented file count is visible
+    to garbage collection
+
+ 3) install file into fd
+
+This is a lock barrier (unlike smp_mb()) that ensures that garbage
+collection is run completely before or completely after the barrier.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 49 insertions(+), 2 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1517,6 +1517,53 @@ out:
+       return err;
+ }
+ 
++static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
++{
++      scm->fp = scm_fp_dup(UNIXCB(skb).fp);
++
++      /*
++       * Garbage collection of unix sockets starts by selecting a set of
++       * candidate sockets which have reference only from being in flight
++       * (total_refs == inflight_refs).  This condition is checked once during
++       * the candidate collection phase, and candidates are marked as such, so
++       * that non-candidates can later be ignored.  While inflight_refs is
++       * protected by unix_gc_lock, total_refs (file count) is not, hence this
++       * is an instantaneous decision.
++       *
++       * Once a candidate, however, the socket must not be reinstalled into a
++       * file descriptor while the garbage collection is in progress.
++       *
++       * If the above conditions are met, then the directed graph of
++       * candidates (*) does not change while unix_gc_lock is held.
++       *
++       * Any operations that changes the file count through file descriptors
++       * (dup, close, sendmsg) does not change the graph since candidates are
++       * not installed in fds.
++       *
++       * Dequeing a candidate via recvmsg would install it into an fd, but
++       * that takes unix_gc_lock to decrement the inflight count, so it's
++       * serialized with garbage collection.
++       *
++       * MSG_PEEK is special in that it does not change the inflight count,
++       * yet does install the socket into an fd.  The following lock/unlock
++       * pair is to ensure serialization with garbage collection.  It must be
++       * done between incrementing the file count and installing the file into
++       * an fd.
++       *
++       * If garbage collection starts after the barrier provided by the
++       * lock/unlock, then it will see the elevated refcount and not mark this
++       * as a candidate.  If a garbage collection is already in progress
++       * before the file count was incremented, then the lock/unlock pair will
++       * ensure that garbage collection is finished before progressing to
++       * installing the fd.
++       *
++       * (*) A -> B where B is on the queue of A or B is on the queue of C
++       * which is on the queue of listening socket A.
++       */
++      spin_lock(&unix_gc_lock);
++      spin_unlock(&unix_gc_lock);
++}
++
+ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+ {
+       int err = 0;
+@@ -2142,7 +2189,7 @@ static int unix_dgram_recvmsg(struct soc
+               sk_peek_offset_fwd(sk, size);
+ 
+               if (UNIXCB(skb).fp)
+-                      scm.fp = scm_fp_dup(UNIXCB(skb).fp);
++                      unix_peek_fds(&scm, skb);
+       }
+       err = (flags & MSG_TRUNC) ? skb->len - skip : size;
+ 
+@@ -2383,7 +2430,7 @@ unlock:
+                       /* It is questionable, see note in unix_dgram_recvmsg.
+                        */
+                       if (UNIXCB(skb).fp)
+-                              scm.fp = scm_fp_dup(UNIXCB(skb).fp);
++                              unix_peek_fds(&scm, skb);
+ 
+                       sk_peek_offset_fwd(sk, chunk);
+ 
diff --git a/queue-4.19/kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch b/queue-4.19/kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch

new file mode 100644 (file)

index 0000000..6c9f0d1
--- /dev/null
+++ b/queue-4.19/kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch
@@ -0,0 +1,67 @@
+From b97f074583736c42fb36f2da1164e28c73758912 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 25 Feb 2021 17:41:32 +0200
+Subject: KVM: x86: determine if an exception has an error code only when injecting it.
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit b97f074583736c42fb36f2da1164e28c73758912 upstream.
+
+A page fault can be queued while vCPU is in real paged mode on AMD, and
+AMD manual asks the user to always intercept it
+(otherwise result is undefined).
+The resulting VM exit, does have an error code.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210225154135.405125-2-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Zubin Mithra <zsm@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -416,8 +416,6 @@ static void kvm_multiple_exception(struc
+ 
+       if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
+       queue:
+-              if (has_error && !is_protmode(vcpu))
+-                      has_error = false;
+               if (reinject) {
+                       /*
+                        * On vmentry, vcpu->arch.exception.pending is only
+@@ -7114,6 +7112,13 @@ static void update_cr8_intercept(struct
+       kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+ }
+ 
++static void kvm_inject_exception(struct kvm_vcpu *vcpu)
++{
++       if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
++               vcpu->arch.exception.error_code = false;
++       kvm_x86_ops->queue_exception(vcpu);
++}
++
+ static int inject_pending_event(struct kvm_vcpu *vcpu)
+ {
+       int r;
+@@ -7121,7 +7126,7 @@ static int inject_pending_event(struct k
+       /* try to reinject previous events if any */
+ 
+       if (vcpu->arch.exception.injected)
+-              kvm_x86_ops->queue_exception(vcpu);
++              kvm_inject_exception(vcpu);
+       /*
+        * Do not inject an NMI or interrupt if there is a pending
+        * exception.  Exceptions and interrupts are recognized at
+@@ -7175,7 +7180,7 @@ static int inject_pending_event(struct k
+                       kvm_update_dr7(vcpu);
+               }
+ 
+-              kvm_x86_ops->queue_exception(vcpu);
++              kvm_inject_exception(vcpu);
+       }
+ 
+       /* Don't consider new event if we re-injected an event */
diff --git a/queue-4.19/net-split-out-functions-related-to-registering-inflight-socket-files.patch b/queue-4.19/net-split-out-functions-related-to-registering-inflight-socket-files.patch

new file mode 100644 (file)

index 0000000..e52aa5a
--- /dev/null
+++ b/queue-4.19/net-split-out-functions-related-to-registering-inflight-socket-files.patch
@@ -0,0 +1,402 @@
+From f4e65870e5cede5ca1ec0006b6c9803994e5f7b8 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 8 Feb 2019 09:01:44 -0700
+Subject: net: split out functions related to registering inflight socket files
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit f4e65870e5cede5ca1ec0006b6c9803994e5f7b8 upstream.
+
+We need this functionality for the io_uring file registration, but
+we cannot rely on it since CONFIG_UNIX can be modular. Move the helpers
+to a separate file, that's always builtin to the kernel if CONFIG_UNIX is
+m/y.
+
+No functional changes in this patch, just moving code around.
+
+Reviewed-by: Hannes Reinecke <hare@suse.com>
+Acked-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[ backported to older kernels to get access to unix_gc_lock - gregkh ]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/af_unix.h |    1 
+ net/Makefile          |    2 
+ net/unix/Kconfig      |    5 +
+ net/unix/Makefile     |    2 
+ net/unix/af_unix.c    |   63 ---------------------
+ net/unix/garbage.c    |   68 ----------------------
+ net/unix/scm.c        |  148 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ net/unix/scm.h        |   10 +++
+ 8 files changed, 171 insertions(+), 128 deletions(-)
+ create mode 100644 net/unix/scm.c
+ create mode 100644 net/unix/scm.h
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -10,6 +10,7 @@
+ 
+ void unix_inflight(struct user_struct *user, struct file *fp);
+ void unix_notinflight(struct user_struct *user, struct file *fp);
++void unix_destruct_scm(struct sk_buff *skb);
+ void unix_gc(void);
+ void wait_for_unix_gc(void);
+ struct sock *unix_get_socket(struct file *filp);
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER)              += netfilter/
+ obj-$(CONFIG_INET)            += ipv4/
+ obj-$(CONFIG_TLS)             += tls/
+ obj-$(CONFIG_XFRM)            += xfrm/
+-obj-$(CONFIG_UNIX)            += unix/
++obj-$(CONFIG_UNIX_SCM)                += unix/
+ obj-$(CONFIG_NET)             += ipv6/
+ obj-$(CONFIG_BPFILTER)                += bpfilter/
+ obj-$(CONFIG_PACKET)          += packet/
+--- a/net/unix/Kconfig
++++ b/net/unix/Kconfig
+@@ -19,6 +19,11 @@ config UNIX
+ 
+         Say Y unless you know what you are doing.
+ 
++config UNIX_SCM
++      bool
++      depends on UNIX
++      default y
++
+ config UNIX_DIAG
+       tristate "UNIX: socket monitoring interface"
+       depends on UNIX
+--- a/net/unix/Makefile
++++ b/net/unix/Makefile
+@@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL)        += sysctl_net_unix
+ 
+ obj-$(CONFIG_UNIX_DIAG)       += unix_diag.o
+ unix_diag-y           := diag.o
++
++obj-$(CONFIG_UNIX_SCM)        += scm.o
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -119,6 +119,8 @@
+ #include <linux/freezer.h>
+ #include <linux/file.h>
+ 
++#include "scm.h"
++
+ struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
+ EXPORT_SYMBOL_GPL(unix_socket_table);
+ DEFINE_SPINLOCK(unix_table_lock);
+@@ -1515,67 +1517,6 @@ out:
+       return err;
+ }
+ 
+-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+-{
+-      int i;
+-
+-      scm->fp = UNIXCB(skb).fp;
+-      UNIXCB(skb).fp = NULL;
+-
+-      for (i = scm->fp->count-1; i >= 0; i--)
+-              unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+-}
+-
+-static void unix_destruct_scm(struct sk_buff *skb)
+-{
+-      struct scm_cookie scm;
+-      memset(&scm, 0, sizeof(scm));
+-      scm.pid  = UNIXCB(skb).pid;
+-      if (UNIXCB(skb).fp)
+-              unix_detach_fds(&scm, skb);
+-
+-      /* Alas, it calls VFS */
+-      /* So fscking what? fput() had been SMP-safe since the last Summer */
+-      scm_destroy(&scm);
+-      sock_wfree(skb);
+-}
+-
+-/*
+- * The "user->unix_inflight" variable is protected by the garbage
+- * collection lock, and we just read it locklessly here. If you go
+- * over the limit, there might be a tiny race in actually noticing
+- * it across threads. Tough.
+- */
+-static inline bool too_many_unix_fds(struct task_struct *p)
+-{
+-      struct user_struct *user = current_user();
+-
+-      if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+-              return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+-      return false;
+-}
+-
+-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+-{
+-      int i;
+-
+-      if (too_many_unix_fds(current))
+-              return -ETOOMANYREFS;
+-
+-      /*
+-       * Need to duplicate file references for the sake of garbage
+-       * collection.  Otherwise a socket in the fps might become a
+-       * candidate for GC while the skb is not yet queued.
+-       */
+-      UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+-      if (!UNIXCB(skb).fp)
+-              return -ENOMEM;
+-
+-      for (i = scm->fp->count - 1; i >= 0; i--)
+-              unix_inflight(scm->fp->user, scm->fp->fp[i]);
+-      return 0;
+-}
+-
+ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+ {
+       int err = 0;
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -86,77 +86,13 @@
+ #include <net/scm.h>
+ #include <net/tcp_states.h>
+ 
++#include "scm.h"
++
+ /* Internal data structures and random procedures: */
+ 
+-static LIST_HEAD(gc_inflight_list);
+ static LIST_HEAD(gc_candidates);
+-static DEFINE_SPINLOCK(unix_gc_lock);
+ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+ 
+-unsigned int unix_tot_inflight;
+-
+-struct sock *unix_get_socket(struct file *filp)
+-{
+-      struct sock *u_sock = NULL;
+-      struct inode *inode = file_inode(filp);
+-
+-      /* Socket ? */
+-      if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+-              struct socket *sock = SOCKET_I(inode);
+-              struct sock *s = sock->sk;
+-
+-              /* PF_UNIX ? */
+-              if (s && sock->ops && sock->ops->family == PF_UNIX)
+-                      u_sock = s;
+-      }
+-      return u_sock;
+-}
+-
+-/* Keep the number of times in flight count for the file
+- * descriptor if it is for an AF_UNIX socket.
+- */
+-
+-void unix_inflight(struct user_struct *user, struct file *fp)
+-{
+-      struct sock *s = unix_get_socket(fp);
+-
+-      spin_lock(&unix_gc_lock);
+-
+-      if (s) {
+-              struct unix_sock *u = unix_sk(s);
+-
+-              if (atomic_long_inc_return(&u->inflight) == 1) {
+-                      BUG_ON(!list_empty(&u->link));
+-                      list_add_tail(&u->link, &gc_inflight_list);
+-              } else {
+-                      BUG_ON(list_empty(&u->link));
+-              }
+-              unix_tot_inflight++;
+-      }
+-      user->unix_inflight++;
+-      spin_unlock(&unix_gc_lock);
+-}
+-
+-void unix_notinflight(struct user_struct *user, struct file *fp)
+-{
+-      struct sock *s = unix_get_socket(fp);
+-
+-      spin_lock(&unix_gc_lock);
+-
+-      if (s) {
+-              struct unix_sock *u = unix_sk(s);
+-
+-              BUG_ON(!atomic_long_read(&u->inflight));
+-              BUG_ON(list_empty(&u->link));
+-
+-              if (atomic_long_dec_and_test(&u->inflight))
+-                      list_del_init(&u->link);
+-              unix_tot_inflight--;
+-      }
+-      user->unix_inflight--;
+-      spin_unlock(&unix_gc_lock);
+-}
+-
+ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
+                         struct sk_buff_head *hitlist)
+ {
+--- /dev/null
++++ b/net/unix/scm.c
+@@ -0,0 +1,148 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/net.h>
++#include <linux/fs.h>
++#include <net/af_unix.h>
++#include <net/scm.h>
++#include <linux/init.h>
++
++#include "scm.h"
++
++unsigned int unix_tot_inflight;
++EXPORT_SYMBOL(unix_tot_inflight);
++
++LIST_HEAD(gc_inflight_list);
++EXPORT_SYMBOL(gc_inflight_list);
++
++DEFINE_SPINLOCK(unix_gc_lock);
++EXPORT_SYMBOL(unix_gc_lock);
++
++struct sock *unix_get_socket(struct file *filp)
++{
++      struct sock *u_sock = NULL;
++      struct inode *inode = file_inode(filp);
++
++      /* Socket ? */
++      if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
++              struct socket *sock = SOCKET_I(inode);
++              struct sock *s = sock->sk;
++
++              /* PF_UNIX ? */
++              if (s && sock->ops && sock->ops->family == PF_UNIX)
++                      u_sock = s;
++      }
++      return u_sock;
++}
++EXPORT_SYMBOL(unix_get_socket);
++
++/* Keep the number of times in flight count for the file
++ * descriptor if it is for an AF_UNIX socket.
++ */
++void unix_inflight(struct user_struct *user, struct file *fp)
++{
++      struct sock *s = unix_get_socket(fp);
++
++      spin_lock(&unix_gc_lock);
++
++      if (s) {
++              struct unix_sock *u = unix_sk(s);
++
++              if (atomic_long_inc_return(&u->inflight) == 1) {
++                      BUG_ON(!list_empty(&u->link));
++                      list_add_tail(&u->link, &gc_inflight_list);
++              } else {
++                      BUG_ON(list_empty(&u->link));
++              }
++              unix_tot_inflight++;
++      }
++      user->unix_inflight++;
++      spin_unlock(&unix_gc_lock);
++}
++
++void unix_notinflight(struct user_struct *user, struct file *fp)
++{
++      struct sock *s = unix_get_socket(fp);
++
++      spin_lock(&unix_gc_lock);
++
++      if (s) {
++              struct unix_sock *u = unix_sk(s);
++
++              BUG_ON(!atomic_long_read(&u->inflight));
++              BUG_ON(list_empty(&u->link));
++
++              if (atomic_long_dec_and_test(&u->inflight))
++                      list_del_init(&u->link);
++              unix_tot_inflight--;
++      }
++      user->unix_inflight--;
++      spin_unlock(&unix_gc_lock);
++}
++
++/*
++ * The "user->unix_inflight" variable is protected by the garbage
++ * collection lock, and we just read it locklessly here. If you go
++ * over the limit, there might be a tiny race in actually noticing
++ * it across threads. Tough.
++ */
++static inline bool too_many_unix_fds(struct task_struct *p)
++{
++      struct user_struct *user = current_user();
++
++      if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
++              return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
++      return false;
++}
++
++int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
++{
++      int i;
++
++      if (too_many_unix_fds(current))
++              return -ETOOMANYREFS;
++
++      /*
++       * Need to duplicate file references for the sake of garbage
++       * collection.  Otherwise a socket in the fps might become a
++       * candidate for GC while the skb is not yet queued.
++       */
++      UNIXCB(skb).fp = scm_fp_dup(scm->fp);
++      if (!UNIXCB(skb).fp)
++              return -ENOMEM;
++
++      for (i = scm->fp->count - 1; i >= 0; i--)
++              unix_inflight(scm->fp->user, scm->fp->fp[i]);
++      return 0;
++}
++EXPORT_SYMBOL(unix_attach_fds);
++
++void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
++{
++      int i;
++
++      scm->fp = UNIXCB(skb).fp;
++      UNIXCB(skb).fp = NULL;
++
++      for (i = scm->fp->count-1; i >= 0; i--)
++              unix_notinflight(scm->fp->user, scm->fp->fp[i]);
++}
++EXPORT_SYMBOL(unix_detach_fds);
++
++void unix_destruct_scm(struct sk_buff *skb)
++{
++      struct scm_cookie scm;
++
++      memset(&scm, 0, sizeof(scm));
++      scm.pid  = UNIXCB(skb).pid;
++      if (UNIXCB(skb).fp)
++              unix_detach_fds(&scm, skb);
++
++      /* Alas, it calls VFS */
++      /* So fscking what? fput() had been SMP-safe since the last Summer */
++      scm_destroy(&scm);
++      sock_wfree(skb);
++}
++EXPORT_SYMBOL(unix_destruct_scm);
+--- /dev/null
++++ b/net/unix/scm.h
+@@ -0,0 +1,10 @@
++#ifndef NET_UNIX_SCM_H
++#define NET_UNIX_SCM_H
++
++extern struct list_head gc_inflight_list;
++extern spinlock_t unix_gc_lock;
++
++int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
++void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
++
++#endif
diff --git a/queue-4.19/series b/queue-4.19/series

index 1ce8881b30fd9dd5360ef3acf5261fdc526ad888..265fc417792c8f097ae8d325f7ba089f92df6008 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -1,2 +1,6 @@
  selftest-fix-build-error-in-tools-testing-selftests-vm-userfaultfd.c.patch
  iio-dac-ds4422-ds4424-drop-of_node-check.patch
+kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch
+net-split-out-functions-related-to-registering-inflight-socket-files.patch
+af_unix-fix-garbage-collect-vs-msg_peek.patch
+workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch
diff --git a/queue-4.19/workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch b/queue-4.19/workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch

new file mode 100644 (file)

index 0000000..4581da3
--- /dev/null
+++ b/queue-4.19/workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch
@@ -0,0 +1,149 @@
+From b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 Mon Sep 17 00:00:00 2001
+From: Yang Yingliang <yangyingliang@huawei.com>
+Date: Wed, 14 Jul 2021 17:19:33 +0800
+Subject: workqueue: fix UAF in pwq_unbound_release_workfn()
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+commit b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 upstream.
+
+I got a UAF report when doing fuzz test:
+
+[  152.880091][ T8030] ==================================================================
+[  152.881240][ T8030] BUG: KASAN: use-after-free in pwq_unbound_release_workfn+0x50/0x190
+[  152.882442][ T8030] Read of size 4 at addr ffff88810d31bd00 by task kworker/3:2/8030
+[  152.883578][ T8030]
+[  152.883932][ T8030] CPU: 3 PID: 8030 Comm: kworker/3:2 Not tainted 5.13.0+ #249
+[  152.885014][ T8030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+[  152.886442][ T8030] Workqueue: events pwq_unbound_release_workfn
+[  152.887358][ T8030] Call Trace:
+[  152.887837][ T8030]  dump_stack_lvl+0x75/0x9b
+[  152.888525][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
+[  152.889371][ T8030]  print_address_description.constprop.10+0x48/0x70
+[  152.890326][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
+[  152.891163][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
+[  152.891999][ T8030]  kasan_report.cold.15+0x82/0xdb
+[  152.892740][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
+[  152.893594][ T8030]  __asan_load4+0x69/0x90
+[  152.894243][ T8030]  pwq_unbound_release_workfn+0x50/0x190
+[  152.895057][ T8030]  process_one_work+0x47b/0x890
+[  152.895778][ T8030]  worker_thread+0x5c/0x790
+[  152.896439][ T8030]  ? process_one_work+0x890/0x890
+[  152.897163][ T8030]  kthread+0x223/0x250
+[  152.897747][ T8030]  ? set_kthread_struct+0xb0/0xb0
+[  152.898471][ T8030]  ret_from_fork+0x1f/0x30
+[  152.899114][ T8030]
+[  152.899446][ T8030] Allocated by task 8884:
+[  152.900084][ T8030]  kasan_save_stack+0x21/0x50
+[  152.900769][ T8030]  __kasan_kmalloc+0x88/0xb0
+[  152.901416][ T8030]  __kmalloc+0x29c/0x460
+[  152.902014][ T8030]  alloc_workqueue+0x111/0x8e0
+[  152.902690][ T8030]  __btrfs_alloc_workqueue+0x11e/0x2a0
+[  152.903459][ T8030]  btrfs_alloc_workqueue+0x6d/0x1d0
+[  152.904198][ T8030]  scrub_workers_get+0x1e8/0x490
+[  152.904929][ T8030]  btrfs_scrub_dev+0x1b9/0x9c0
+[  152.905599][ T8030]  btrfs_ioctl+0x122c/0x4e50
+[  152.906247][ T8030]  __x64_sys_ioctl+0x137/0x190
+[  152.906916][ T8030]  do_syscall_64+0x34/0xb0
+[  152.907535][ T8030]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+[  152.908365][ T8030]
+[  152.908688][ T8030] Freed by task 8884:
+[  152.909243][ T8030]  kasan_save_stack+0x21/0x50
+[  152.909893][ T8030]  kasan_set_track+0x20/0x30
+[  152.910541][ T8030]  kasan_set_free_info+0x24/0x40
+[  152.911265][ T8030]  __kasan_slab_free+0xf7/0x140
+[  152.911964][ T8030]  kfree+0x9e/0x3d0
+[  152.912501][ T8030]  alloc_workqueue+0x7d7/0x8e0
+[  152.913182][ T8030]  __btrfs_alloc_workqueue+0x11e/0x2a0
+[  152.913949][ T8030]  btrfs_alloc_workqueue+0x6d/0x1d0
+[  152.914703][ T8030]  scrub_workers_get+0x1e8/0x490
+[  152.915402][ T8030]  btrfs_scrub_dev+0x1b9/0x9c0
+[  152.916077][ T8030]  btrfs_ioctl+0x122c/0x4e50
+[  152.916729][ T8030]  __x64_sys_ioctl+0x137/0x190
+[  152.917414][ T8030]  do_syscall_64+0x34/0xb0
+[  152.918034][ T8030]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+[  152.918872][ T8030]
+[  152.919203][ T8030] The buggy address belongs to the object at ffff88810d31bc00
+[  152.919203][ T8030]  which belongs to the cache kmalloc-512 of size 512
+[  152.921155][ T8030] The buggy address is located 256 bytes inside of
+[  152.921155][ T8030]  512-byte region [ffff88810d31bc00, ffff88810d31be00)
+[  152.922993][ T8030] The buggy address belongs to the page:
+[  152.923800][ T8030] page:ffffea000434c600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d318
+[  152.925249][ T8030] head:ffffea000434c600 order:2 compound_mapcount:0 compound_pincount:0
+[  152.926399][ T8030] flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff)
+[  152.927515][ T8030] raw: 057ff00000010200 dead000000000100 dead000000000122 ffff888009c42c80
+[  152.928716][ T8030] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
+[  152.929890][ T8030] page dumped because: kasan: bad access detected
+[  152.930759][ T8030]
+[  152.931076][ T8030] Memory state around the buggy address:
+[  152.931851][ T8030]  ffff88810d31bc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  152.932967][ T8030]  ffff88810d31bc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  152.934068][ T8030] >ffff88810d31bd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  152.935189][ T8030]                    ^
+[  152.935763][ T8030]  ffff88810d31bd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  152.936847][ T8030]  ffff88810d31be00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  152.937940][ T8030] ==================================================================
+
+If apply_wqattrs_prepare() fails in alloc_workqueue(), it will call put_pwq()
+which invoke a work queue to call pwq_unbound_release_workfn() and use the 'wq'.
+The 'wq' allocated in alloc_workqueue() will be freed in error path when
+apply_wqattrs_prepare() fails. So it will lead a UAF.
+
+CPU0                                          CPU1
+alloc_workqueue()
+alloc_and_link_pwqs()
+apply_wqattrs_prepare() fails
+apply_wqattrs_cleanup()
+schedule_work(&pwq->unbound_release_work)
+kfree(wq)
+                                              worker_thread()
+                                              pwq_unbound_release_workfn() <- trigger uaf here
+
+If apply_wqattrs_prepare() fails, the new pwq are not linked, it doesn't
+hold any reference to the 'wq', 'wq' is invalid to access in the worker,
+so add check pwq if linked to fix this.
+
+Fixes: 2d5f0764b526 ("workqueue: split apply_workqueue_attrs() into 3 stages")
+Cc: stable@vger.kernel.org # v4.2+
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Tested-by: Pavel Skripkin <paskripkin@gmail.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3498,15 +3498,21 @@ static void pwq_unbound_release_workfn(s
+                                                 unbound_release_work);
+       struct workqueue_struct *wq = pwq->wq;
+       struct worker_pool *pool = pwq->pool;
+-      bool is_last;
++      bool is_last = false;
+ 
+-      if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+-              return;
++      /*
++       * when @pwq is not linked, it doesn't hold any reference to the
++       * @wq, and @wq is invalid to access.
++       */
++      if (!list_empty(&pwq->pwqs_node)) {
++              if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
++                      return;
+ 
+-      mutex_lock(&wq->mutex);
+-      list_del_rcu(&pwq->pwqs_node);
+-      is_last = list_empty(&wq->pwqs);
+-      mutex_unlock(&wq->mutex);
++              mutex_lock(&wq->mutex);
++              list_del_rcu(&pwq->pwqs_node);
++              is_last = list_empty(&wq->pwqs);
++              mutex_unlock(&wq->mutex);
++      }
+ 
+       mutex_lock(&wq_pool_mutex);
+       put_unbound_pool(pool);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 29 Jul 2021 11:28:14 +0000 (13:28 +0200)
queue-4.19/af_unix-fix-garbage-collect-vs-msg_peek.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/kvm-x86-determine-if-an-exception-has-an-error-code-only-when-injecting-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-split-out-functions-related-to-registering-inflight-socket-files.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/workqueue-fix-uaf-in-pwq_unbound_release_workfn.patch	[new file with mode: 0644]	patch \| blob