4.14-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)
diff --git a/queue-4.14/mm-hugetlb-yield-when-prepping-struct-pages.patch b/queue-4.14/mm-hugetlb-yield-when-prepping-struct-pages.patch

new file mode 100644 (file)

index 0000000..f9905c1
--- /dev/null
+++ b/queue-4.14/mm-hugetlb-yield-when-prepping-struct-pages.patch
@@ -0,0 +1,52 @@
+From 520495fe96d74e05db585fc748351e0504d8f40d Mon Sep 17 00:00:00 2001
+From: Cannon Matthews <cannonmatthews@google.com>
+Date: Tue, 3 Jul 2018 17:02:43 -0700
+Subject: mm: hugetlb: yield when prepping struct pages
+
+From: Cannon Matthews <cannonmatthews@google.com>
+
+commit 520495fe96d74e05db585fc748351e0504d8f40d upstream.
+
+When booting with very large numbers of gigantic (i.e.  1G) pages, the
+operations in the loop of gather_bootmem_prealloc, and specifically
+prep_compound_gigantic_page, takes a very long time, and can cause a
+softlockup if enough pages are requested at boot.
+
+For example booting with 3844 1G pages requires prepping
+(set_compound_head, init the count) over 1 billion 4K tail pages, which
+takes considerable time.
+
+Add a cond_resched() to the outer loop in gather_bootmem_prealloc() to
+prevent this lockup.
+
+Tested: Booted with softlockup_panic=1 hugepagesz=1G hugepages=3844 and
+no softlockup is reported, and the hugepages are reported as
+successfully setup.
+
+Link: http://lkml.kernel.org/r/20180627214447.260804-1-cannonmatthews@google.com
+Signed-off-by: Cannon Matthews <cannonmatthews@google.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Andres Lagar-Cavilla <andreslc@google.com>
+Cc: Peter Feiner <pfeiner@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2159,6 +2159,7 @@ static void __init gather_bootmem_preall
+                */
+               if (hstate_is_gigantic(h))
+                       adjust_managed_page_count(page, 1 << h->order);
++              cond_resched();
+       }
+ }
+ 
diff --git a/queue-4.14/scsi-sg-mitigate-read-write-abuse.patch b/queue-4.14/scsi-sg-mitigate-read-write-abuse.patch

new file mode 100644 (file)

index 0000000..1e8586f
--- /dev/null
+++ b/queue-4.14/scsi-sg-mitigate-read-write-abuse.patch
@@ -0,0 +1,111 @@
+From 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Mon, 25 Jun 2018 16:25:44 +0200
+Subject: scsi: sg: mitigate read/write abuse
+
+From: Jann Horn <jannh@google.com>
+
+commit 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd upstream.
+
+As Al Viro noted in commit 128394eff343 ("sg_write()/bsg_write() is not fit
+to be called under KERNEL_DS"), sg improperly accesses userspace memory
+outside the provided buffer, permitting kernel memory corruption via
+splice().  But it doesn't just do it on ->write(), also on ->read().
+
+As a band-aid, make sure that the ->read() and ->write() handlers can not
+be called in weird contexts (kernel context or credentials different from
+file opener), like for ib_safe_file_access().
+
+If someone needs to use these interfaces from different security contexts,
+a new interface should be written that goes through the ->ioctl() handler.
+
+I've mostly copypasted ib_safe_file_access() over as sg_safe_file_access()
+because I couldn't find a good common header - please tell me if you know a
+better way.
+
+[mkp: s/_safe_/_check_/]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Douglas Gilbert <dgilbert@interlog.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sg.c |   42 ++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 40 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/sg.c
++++ b/drivers/scsi/sg.c
+@@ -51,6 +51,7 @@ static int sg_version_num = 30536;   /* 2
+ #include <linux/atomic.h>
+ #include <linux/ratelimit.h>
+ #include <linux/uio.h>
++#include <linux/cred.h> /* for sg_check_file_access() */
+ 
+ #include "scsi.h"
+ #include <scsi/scsi_dbg.h>
+@@ -210,6 +211,33 @@ static void sg_device_destroy(struct kre
+       sdev_prefix_printk(prefix, (sdp)->device,               \
+                          (sdp)->disk->disk_name, fmt, ##a)
+ 
++/*
++ * The SCSI interfaces that use read() and write() as an asynchronous variant of
++ * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways
++ * to trigger read() and write() calls from various contexts with elevated
++ * privileges. This can lead to kernel memory corruption (e.g. if these
++ * interfaces are called through splice()) and privilege escalation inside
++ * userspace (e.g. if a process with access to such a device passes a file
++ * descriptor to a SUID binary as stdin/stdout/stderr).
++ *
++ * This function provides protection for the legacy API by restricting the
++ * calling context.
++ */
++static int sg_check_file_access(struct file *filp, const char *caller)
++{
++      if (filp->f_cred != current_real_cred()) {
++              pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
++                      caller, task_tgid_vnr(current), current->comm);
++              return -EPERM;
++      }
++      if (uaccess_kernel()) {
++              pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n",
++                      caller, task_tgid_vnr(current), current->comm);
++              return -EACCES;
++      }
++      return 0;
++}
++
+ static int sg_allow_access(struct file *filp, unsigned char *cmd)
+ {
+       struct sg_fd *sfp = filp->private_data;
+@@ -394,6 +422,14 @@ sg_read(struct file *filp, char __user *
+       struct sg_header *old_hdr = NULL;
+       int retval = 0;
+ 
++      /*
++       * This could cause a response to be stranded. Close the associated
++       * file descriptor to free up any resources being held.
++       */
++      retval = sg_check_file_access(filp, __func__);
++      if (retval)
++              return retval;
++
+       if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
+               return -ENXIO;
+       SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp,
+@@ -581,9 +617,11 @@ sg_write(struct file *filp, const char _
+       struct sg_header old_hdr;
+       sg_io_hdr_t *hp;
+       unsigned char cmnd[SG_MAX_CDB_SIZE];
++      int retval;
+ 
+-      if (unlikely(uaccess_kernel()))
+-              return -EINVAL;
++      retval = sg_check_file_access(filp, __func__);
++      if (retval)
++              return retval;
+ 
+       if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
+               return -ENXIO;
diff --git a/queue-4.14/tracing-fix-missing-return-symbol-in-function_graph-output.patch b/queue-4.14/tracing-fix-missing-return-symbol-in-function_graph-output.patch

new file mode 100644 (file)

index 0000000..8fb9f7d
--- /dev/null
+++ b/queue-4.14/tracing-fix-missing-return-symbol-in-function_graph-output.patch
@@ -0,0 +1,85 @@
+From 1fe4293f4b8de75824935f8d8e9a99c7fc6873da Mon Sep 17 00:00:00 2001
+From: Changbin Du <changbin.du@intel.com>
+Date: Wed, 31 Jan 2018 23:48:49 +0800
+Subject: tracing: Fix missing return symbol in function_graph output
+
+From: Changbin Du <changbin.du@intel.com>
+
+commit 1fe4293f4b8de75824935f8d8e9a99c7fc6873da upstream.
+
+The function_graph tracer does not show the interrupt return marker for the
+leaf entry. On leaf entries, we see an unbalanced interrupt marker (the
+interrupt was entered, but nevern left).
+
+Before:
+ 1)               |  SyS_write() {
+ 1)               |    __fdget_pos() {
+ 1)   0.061 us    |      __fget_light();
+ 1)   0.289 us    |    }
+ 1)               |    vfs_write() {
+ 1)   0.049 us    |      rw_verify_area();
+ 1) + 15.424 us   |      __vfs_write();
+ 1)   ==========> |
+ 1)   6.003 us    |      smp_apic_timer_interrupt();
+ 1)   0.055 us    |      __fsnotify_parent();
+ 1)   0.073 us    |      fsnotify();
+ 1) + 23.665 us   |    }
+ 1) + 24.501 us   |  }
+
+After:
+ 0)               |  SyS_write() {
+ 0)               |    __fdget_pos() {
+ 0)   0.052 us    |      __fget_light();
+ 0)   0.328 us    |    }
+ 0)               |    vfs_write() {
+ 0)   0.057 us    |      rw_verify_area();
+ 0)               |      __vfs_write() {
+ 0)   ==========> |
+ 0)   8.548 us    |      smp_apic_timer_interrupt();
+ 0)   <========== |
+ 0) + 36.507 us   |      } /* __vfs_write */
+ 0)   0.049 us    |      __fsnotify_parent();
+ 0)   0.066 us    |      fsnotify();
+ 0) + 50.064 us   |    }
+ 0) + 50.952 us   |  }
+
+Link: http://lkml.kernel.org/r/1517413729-20411-1-git-send-email-changbin.du@intel.com
+
+Cc: stable@vger.kernel.org
+Fixes: f8b755ac8e0cc ("tracing/function-graph-tracer: Output arrows signal on hardirq call/return")
+Signed-off-by: Changbin Du <changbin.du@intel.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_functions_graph.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_functions_graph.c
++++ b/kernel/trace/trace_functions_graph.c
+@@ -831,6 +831,7 @@ print_graph_entry_leaf(struct trace_iter
+       struct ftrace_graph_ret *graph_ret;
+       struct ftrace_graph_ent *call;
+       unsigned long long duration;
++      int cpu = iter->cpu;
+       int i;
+ 
+       graph_ret = &ret_entry->ret;
+@@ -839,7 +840,6 @@ print_graph_entry_leaf(struct trace_iter
+ 
+       if (data) {
+               struct fgraph_cpu_data *cpu_data;
+-              int cpu = iter->cpu;
+ 
+               cpu_data = per_cpu_ptr(data->cpu_data, cpu);
+ 
+@@ -869,6 +869,9 @@ print_graph_entry_leaf(struct trace_iter
+ 
+       trace_seq_printf(s, "%ps();\n", (void *)call->func);
+ 
++      print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
++                      cpu, iter->ent->pid, flags);
++
+       return trace_handle_return(s);
+ }
+ 
diff --git a/queue-4.14/userfaultfd-hugetlbfs-fix-userfaultfd_huge_must_wait-pte-access.patch b/queue-4.14/userfaultfd-hugetlbfs-fix-userfaultfd_huge_must_wait-pte-access.patch

new file mode 100644 (file)

index 0000000..fedabd2
--- /dev/null
+++ b/queue-4.14/userfaultfd-hugetlbfs-fix-userfaultfd_huge_must_wait-pte-access.patch
@@ -0,0 +1,63 @@
+From 1e2c043628c7736dd56536d16c0ce009bc834ae7 Mon Sep 17 00:00:00 2001
+From: Janosch Frank <frankja@linux.ibm.com>
+Date: Tue, 3 Jul 2018 17:02:39 -0700
+Subject: userfaultfd: hugetlbfs: fix userfaultfd_huge_must_wait() pte access
+
+From: Janosch Frank <frankja@linux.ibm.com>
+
+commit 1e2c043628c7736dd56536d16c0ce009bc834ae7 upstream.
+
+Use huge_ptep_get() to translate huge ptes to normal ptes so we can
+check them with the huge_pte_* functions.  Otherwise some architectures
+will check the wrong values and will not wait for userspace to bring in
+the memory.
+
+Link: http://lkml.kernel.org/r/20180626132421.78084-1-frankja@linux.ibm.com
+Fixes: 369cd2121be4 ("userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges")
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/userfaultfd.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -220,24 +220,26 @@ static inline bool userfaultfd_huge_must
+                                        unsigned long reason)
+ {
+       struct mm_struct *mm = ctx->mm;
+-      pte_t *pte;
++      pte_t *ptep, pte;
+       bool ret = true;
+ 
+       VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
+ 
+-      pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
+-      if (!pte)
++      ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
++
++      if (!ptep)
+               goto out;
+ 
+       ret = false;
++      pte = huge_ptep_get(ptep);
+ 
+       /*
+        * Lockless access: we're in a wait_event so it's ok if it
+        * changes under us.
+        */
+-      if (huge_pte_none(*pte))
++      if (huge_pte_none(pte))
+               ret = true;
+-      if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
++      if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
+               ret = true;
+ out:
+       return ret;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 10 Jul 2018 11:14:53 +0000 (13:14 +0200)
queue-4.14/mm-hugetlb-yield-when-prepping-struct-pages.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/scsi-sg-mitigate-read-write-abuse.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/tracing-fix-missing-return-symbol-in-function_graph-output.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/userfaultfd-hugetlbfs-fix-userfaultfd_huge_must_wait-pte-access.patch	[new file with mode: 0644]	patch \| blob