]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 Nov 2020 16:26:16 +0000 (17:26 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 Nov 2020 16:26:16 +0000 (17:26 +0100)
added patches:
bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch
compiler.h-fix-barrier_data-on-clang.patch
futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch
hugetlbfs-fix-anon-huge-page-migration-race.patch
jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch
mei-protect-mei_cl_mtu-from-null-dereference.patch
mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch
mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch
mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch
mm-slub-fix-panic-in-slab_alloc_node.patch
mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch
ocfs2-initialize-ip_next_orphan.patch
reboot-fix-overflow-parsing-reboot-cpu-number.patch
revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch
virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch
xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch

17 files changed:
queue-5.9/bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch [new file with mode: 0644]
queue-5.9/compiler.h-fix-barrier_data-on-clang.patch [new file with mode: 0644]
queue-5.9/futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch [new file with mode: 0644]
queue-5.9/hugetlbfs-fix-anon-huge-page-migration-race.patch [new file with mode: 0644]
queue-5.9/jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch [new file with mode: 0644]
queue-5.9/mei-protect-mei_cl_mtu-from-null-dereference.patch [new file with mode: 0644]
queue-5.9/mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch [new file with mode: 0644]
queue-5.9/mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch [new file with mode: 0644]
queue-5.9/mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch [new file with mode: 0644]
queue-5.9/mm-slub-fix-panic-in-slab_alloc_node.patch [new file with mode: 0644]
queue-5.9/mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch [new file with mode: 0644]
queue-5.9/ocfs2-initialize-ip_next_orphan.patch [new file with mode: 0644]
queue-5.9/reboot-fix-overflow-parsing-reboot-cpu-number.patch [new file with mode: 0644]
queue-5.9/revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch [new file with mode: 0644]
queue-5.9/series
queue-5.9/virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch [new file with mode: 0644]
queue-5.9/xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch [new file with mode: 0644]

diff --git a/queue-5.9/bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch b/queue-5.9/bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch
new file mode 100644 (file)
index 0000000..246c25f
--- /dev/null
@@ -0,0 +1,55 @@
+From 50b8a742850fce7293bed45753152c425f7e931b Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 13 Nov 2020 02:27:31 +0900
+Subject: bootconfig: Extend the magic check range to the preceding 3 bytes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 50b8a742850fce7293bed45753152c425f7e931b upstream.
+
+Since Grub may align the size of initrd to 4 if user pass
+initrd from cpio, we have to check the preceding 3 bytes as well.
+
+Link: https://lkml.kernel.org/r/160520205132.303174.4876760192433315429.stgit@devnote2
+
+Cc: stable@vger.kernel.org
+Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
+Reported-by: Chen Yu <yu.chen.surf@gmail.com>
+Tested-by: Chen Yu <yu.chen.surf@gmail.com>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ init/main.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/init/main.c
++++ b/init/main.c
+@@ -267,14 +267,24 @@ static void * __init get_boot_config_fro
+       u32 size, csum;
+       char *data;
+       u32 *hdr;
++      int i;
+       if (!initrd_end)
+               return NULL;
+       data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+-      if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+-              return NULL;
++      /*
++       * Since Grub may align the size of initrd to 4, we must
++       * check the preceding 3 bytes as well.
++       */
++      for (i = 0; i < 4; i++) {
++              if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
++                      goto found;
++              data--;
++      }
++      return NULL;
++found:
+       hdr = (u32 *)(data - 8);
+       size = hdr[0];
+       csum = hdr[1];
diff --git a/queue-5.9/compiler.h-fix-barrier_data-on-clang.patch b/queue-5.9/compiler.h-fix-barrier_data-on-clang.patch
new file mode 100644 (file)
index 0000000..1e47727
--- /dev/null
@@ -0,0 +1,131 @@
+From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001
+From: Arvind Sankar <nivedita@alum.mit.edu>
+Date: Fri, 13 Nov 2020 22:51:59 -0800
+Subject: compiler.h: fix barrier_data() on clang
+
+From: Arvind Sankar <nivedita@alum.mit.edu>
+
+commit 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 upstream.
+
+Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
+mutually exclusive") neglected to copy barrier_data() from
+compiler-gcc.h into compiler-clang.h.
+
+The definition in compiler-gcc.h was really to work around clang's more
+aggressive optimization, so this broke barrier_data() on clang, and
+consequently memzero_explicit() as well.
+
+For example, this results in at least the memzero_explicit() call in
+lib/crypto/sha256.c:sha256_transform() being optimized away by clang.
+
+Fix this by moving the definition of barrier_data() into compiler.h.
+
+Also move the gcc/clang definition of barrier() into compiler.h,
+__memory_barrier() is icc-specific (and barrier() is already defined
+using it in compiler-intel.h) and doesn't belong in compiler.h.
+
+[rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled]
+
+Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org
+Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
+Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/asm-generic/barrier.h  |    1 +
+ include/linux/compiler-clang.h |    6 ------
+ include/linux/compiler-gcc.h   |   19 -------------------
+ include/linux/compiler.h       |   18 ++++++++++++++++--
+ 4 files changed, 17 insertions(+), 27 deletions(-)
+
+--- a/include/asm-generic/barrier.h
++++ b/include/asm-generic/barrier.h
+@@ -13,6 +13,7 @@
+ #ifndef __ASSEMBLY__
++#include <linux/compiler.h>
+ #include <asm/rwonce.h>
+ #ifndef nop
+--- a/include/linux/compiler-clang.h
++++ b/include/linux/compiler-clang.h
+@@ -52,12 +52,6 @@
+ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+ #endif
+-/* The following are for compatibility with GCC, from compiler-gcc.h,
+- * and may be redefined here because they should not be shared with other
+- * compilers, like ICC.
+- */
+-#define barrier() __asm__ __volatile__("" : : : "memory")
+-
+ #if __has_feature(shadow_call_stack)
+ # define __noscs      __attribute__((__no_sanitize__("shadow-call-stack")))
+ #endif
+--- a/include/linux/compiler-gcc.h
++++ b/include/linux/compiler-gcc.h
+@@ -15,25 +15,6 @@
+ # error Sorry, your compiler is too old - please upgrade it.
+ #endif
+-/* Optimization barrier */
+-
+-/* The "volatile" is due to gcc bugs */
+-#define barrier() __asm__ __volatile__("": : :"memory")
+-/*
+- * This version is i.e. to prevent dead stores elimination on @ptr
+- * where gcc and llvm may behave differently when otherwise using
+- * normal barrier(): while gcc behavior gets along with a normal
+- * barrier(), llvm needs an explicit input variable to be assumed
+- * clobbered. The issue is as follows: while the inline asm might
+- * access any memory it wants, the compiler could have fit all of
+- * @ptr into memory registers instead, and since @ptr never escaped
+- * from that, it proved that the inline asm wasn't touching any of
+- * it. This version works well with both compilers, i.e. we're telling
+- * the compiler that the inline asm absolutely may see the contents
+- * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
+- */
+-#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+-
+ /*
+  * This macro obfuscates arithmetic on a variable address so that gcc
+  * shouldn't recognize the original var, and make assumptions about it.
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_
+ /* Optimization barrier */
+ #ifndef barrier
+-# define barrier() __memory_barrier()
++/* The "volatile" is due to gcc bugs */
++# define barrier() __asm__ __volatile__("": : :"memory")
+ #endif
+ #ifndef barrier_data
+-# define barrier_data(ptr) barrier()
++/*
++ * This version is i.e. to prevent dead stores elimination on @ptr
++ * where gcc and llvm may behave differently when otherwise using
++ * normal barrier(): while gcc behavior gets along with a normal
++ * barrier(), llvm needs an explicit input variable to be assumed
++ * clobbered. The issue is as follows: while the inline asm might
++ * access any memory it wants, the compiler could have fit all of
++ * @ptr into memory registers instead, and since @ptr never escaped
++ * from that, it proved that the inline asm wasn't touching any of
++ * it. This version works well with both compilers, i.e. we're telling
++ * the compiler that the inline asm absolutely may see the contents
++ * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
++ */
++# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+ #endif
+ /* workaround for GCC PR82365 if needed */
diff --git a/queue-5.9/futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch b/queue-5.9/futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch
new file mode 100644 (file)
index 0000000..20d8798
--- /dev/null
@@ -0,0 +1,49 @@
+From 1e106aa3509b86738769775969822ffc1ec21bf4 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 6 Nov 2020 11:52:05 +0300
+Subject: futex: Don't enable IRQs unconditionally in put_pi_state()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 1e106aa3509b86738769775969822ffc1ec21bf4 upstream.
+
+The exit_pi_state_list() function calls put_pi_state() with IRQs disabled
+and is not expecting that IRQs will be enabled inside the function.
+
+Use the _irqsave() variant so that IRQs are restored to the original state
+instead of being enabled unconditionally.
+
+Fixes: 153fbd1226fb ("futex: Fix more put_pi_state() vs. exit_pi_state_list() races")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201106085205.GA1159983@mwanda
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi
+        */
+       if (pi_state->owner) {
+               struct task_struct *owner;
++              unsigned long flags;
+-              raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++              raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
+               owner = pi_state->owner;
+               if (owner) {
+                       raw_spin_lock(&owner->pi_lock);
+@@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi
+                       raw_spin_unlock(&owner->pi_lock);
+               }
+               rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+-              raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++              raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
+       }
+       if (current->pi_state_cache) {
diff --git a/queue-5.9/hugetlbfs-fix-anon-huge-page-migration-race.patch b/queue-5.9/hugetlbfs-fix-anon-huge-page-migration-race.patch
new file mode 100644 (file)
index 0000000..b5fa07b
--- /dev/null
@@ -0,0 +1,319 @@
+From 336bf30eb76580b579dc711ded5d599d905c0217 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 13 Nov 2020 22:52:16 -0800
+Subject: hugetlbfs: fix anon huge page migration race
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 336bf30eb76580b579dc711ded5d599d905c0217 upstream.
+
+Qian Cai reported the following BUG in [1]
+
+  LTP: starting move_pages12
+  BUG: unable to handle page fault for address: ffffffffffffffe0
+  ...
+  RIP: 0010:anon_vma_interval_tree_iter_first+0xa2/0x170 avc_start_pgoff at mm/interval_tree.c:63
+  Call Trace:
+    rmap_walk_anon+0x141/0xa30 rmap_walk_anon at mm/rmap.c:1864
+    try_to_unmap+0x209/0x2d0 try_to_unmap at mm/rmap.c:1763
+    migrate_pages+0x1005/0x1fb0
+    move_pages_and_store_status.isra.47+0xd7/0x1a0
+    __x64_sys_move_pages+0xa5c/0x1100
+    do_syscall_64+0x5f/0x310
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Hugh Dickins diagnosed this as a migration bug caused by code introduced
+to use i_mmap_rwsem for pmd sharing synchronization.  Specifically, the
+routine unmap_and_move_huge_page() is always passing the TTU_RMAP_LOCKED
+flag to try_to_unmap() while holding i_mmap_rwsem.  This is wrong for
+anon pages as the anon_vma_lock should be held in this case.  Further
+analysis suggested that i_mmap_rwsem was not required to he held at all
+when calling try_to_unmap for anon pages as an anon page could never be
+part of a shared pmd mapping.
+
+Discussion also revealed that the hack in hugetlb_page_mapping_lock_write
+to drop page lock and acquire i_mmap_rwsem is wrong.  There is no way to
+keep mapping valid while dropping page lock.
+
+This patch does the following:
+
+ - Do not take i_mmap_rwsem and set TTU_RMAP_LOCKED for anon pages when
+   calling try_to_unmap.
+
+ - Remove the hacky code in hugetlb_page_mapping_lock_write. The routine
+   will now simply do a 'trylock' while still holding the page lock. If
+   the trylock fails, it will return NULL. This could impact the
+   callers:
+
+    - migration calling code will receive -EAGAIN and retry up to the
+      hard coded limit (10).
+
+    - memory error code will treat the page as BUSY. This will force
+      killing (SIGKILL) instead of SIGBUS any mapping tasks.
+
+   Do note that this change in behavior only happens when there is a
+   race. None of the standard kernel testing suites actually hit this
+   race, but it is possible.
+
+[1] https://lore.kernel.org/lkml/20200708012044.GC992@lca.pw/
+[2] https://lore.kernel.org/linux-mm/alpine.LSU.2.11.2010071833100.2214@eggly.anvils/
+
+Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
+Reported-by: Qian Cai <cai@lca.pw>
+Suggested-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201105195058.78401-1-mike.kravetz@oracle.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c        |   90 ++--------------------------------------------------
+ mm/memory-failure.c |   36 +++++++++-----------
+ mm/migrate.c        |   46 ++++++++++++++------------
+ mm/rmap.c           |    5 --
+ 4 files changed, 48 insertions(+), 129 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1579,103 +1579,23 @@ int PageHeadHuge(struct page *page_head)
+ }
+ /*
+- * Find address_space associated with hugetlbfs page.
+- * Upon entry page is locked and page 'was' mapped although mapped state
+- * could change.  If necessary, use anon_vma to find vma and associated
+- * address space.  The returned mapping may be stale, but it can not be
+- * invalid as page lock (which is held) is required to destroy mapping.
+- */
+-static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
+-{
+-      struct anon_vma *anon_vma;
+-      pgoff_t pgoff_start, pgoff_end;
+-      struct anon_vma_chain *avc;
+-      struct address_space *mapping = page_mapping(hpage);
+-
+-      /* Simple file based mapping */
+-      if (mapping)
+-              return mapping;
+-
+-      /*
+-       * Even anonymous hugetlbfs mappings are associated with an
+-       * underlying hugetlbfs file (see hugetlb_file_setup in mmap
+-       * code).  Find a vma associated with the anonymous vma, and
+-       * use the file pointer to get address_space.
+-       */
+-      anon_vma = page_lock_anon_vma_read(hpage);
+-      if (!anon_vma)
+-              return mapping;  /* NULL */
+-
+-      /* Use first found vma */
+-      pgoff_start = page_to_pgoff(hpage);
+-      pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
+-      anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
+-                                      pgoff_start, pgoff_end) {
+-              struct vm_area_struct *vma = avc->vma;
+-
+-              mapping = vma->vm_file->f_mapping;
+-              break;
+-      }
+-
+-      anon_vma_unlock_read(anon_vma);
+-      return mapping;
+-}
+-
+-/*
+  * Find and lock address space (mapping) in write mode.
+  *
+- * Upon entry, the page is locked which allows us to find the mapping
+- * even in the case of an anon page.  However, locking order dictates
+- * the i_mmap_rwsem be acquired BEFORE the page lock.  This is hugetlbfs
+- * specific.  So, we first try to lock the sema while still holding the
+- * page lock.  If this works, great!  If not, then we need to drop the
+- * page lock and then acquire i_mmap_rwsem and reacquire page lock.  Of
+- * course, need to revalidate state along the way.
++ * Upon entry, the page is locked which means that page_mapping() is
++ * stable.  Due to locking order, we can only trylock_write.  If we can
++ * not get the lock, simply return NULL to caller.
+  */
+ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
+ {
+-      struct address_space *mapping, *mapping2;
++      struct address_space *mapping = page_mapping(hpage);
+-      mapping = _get_hugetlb_page_mapping(hpage);
+-retry:
+       if (!mapping)
+               return mapping;
+-      /*
+-       * If no contention, take lock and return
+-       */
+       if (i_mmap_trylock_write(mapping))
+               return mapping;
+-      /*
+-       * Must drop page lock and wait on mapping sema.
+-       * Note:  Once page lock is dropped, mapping could become invalid.
+-       * As a hack, increase map count until we lock page again.
+-       */
+-      atomic_inc(&hpage->_mapcount);
+-      unlock_page(hpage);
+-      i_mmap_lock_write(mapping);
+-      lock_page(hpage);
+-      atomic_add_negative(-1, &hpage->_mapcount);
+-
+-      /* verify page is still mapped */
+-      if (!page_mapped(hpage)) {
+-              i_mmap_unlock_write(mapping);
+-              return NULL;
+-      }
+-
+-      /*
+-       * Get address space again and verify it is the same one
+-       * we locked.  If not, drop lock and retry.
+-       */
+-      mapping2 = _get_hugetlb_page_mapping(hpage);
+-      if (mapping2 != mapping) {
+-              i_mmap_unlock_write(mapping);
+-              mapping = mapping2;
+-              goto retry;
+-      }
+-
+-      return mapping;
++      return NULL;
+ }
+ pgoff_t __basepage_index(struct page *page)
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1031,27 +1031,25 @@ static bool hwpoison_user_mappings(struc
+       if (!PageHuge(hpage)) {
+               unmap_success = try_to_unmap(hpage, ttu);
+       } else {
+-              /*
+-               * For hugetlb pages, try_to_unmap could potentially call
+-               * huge_pmd_unshare.  Because of this, take semaphore in
+-               * write mode here and set TTU_RMAP_LOCKED to indicate we
+-               * have taken the lock at this higer level.
+-               *
+-               * Note that the call to hugetlb_page_mapping_lock_write
+-               * is necessary even if mapping is already set.  It handles
+-               * ugliness of potentially having to drop page lock to obtain
+-               * i_mmap_rwsem.
+-               */
+-              mapping = hugetlb_page_mapping_lock_write(hpage);
+-
+-              if (mapping) {
+-                      unmap_success = try_to_unmap(hpage,
++              if (!PageAnon(hpage)) {
++                      /*
++                       * For hugetlb pages in shared mappings, try_to_unmap
++                       * could potentially call huge_pmd_unshare.  Because of
++                       * this, take semaphore in write mode here and set
++                       * TTU_RMAP_LOCKED to indicate we have taken the lock
++                       * at this higer level.
++                       */
++                      mapping = hugetlb_page_mapping_lock_write(hpage);
++                      if (mapping) {
++                              unmap_success = try_to_unmap(hpage,
+                                                    ttu|TTU_RMAP_LOCKED);
+-                      i_mmap_unlock_write(mapping);
++                              i_mmap_unlock_write(mapping);
++                      } else {
++                              pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
++                              unmap_success = false;
++                      }
+               } else {
+-                      pr_info("Memory failure: %#lx: could not find mapping for mapped huge page\n",
+-                              pfn);
+-                      unmap_success = false;
++                      unmap_success = try_to_unmap(hpage, ttu);
+               }
+       }
+       if (!unmap_success)
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1333,34 +1333,38 @@ static int unmap_and_move_huge_page(new_
+               goto put_anon;
+       if (page_mapped(hpage)) {
+-              /*
+-               * try_to_unmap could potentially call huge_pmd_unshare.
+-               * Because of this, take semaphore in write mode here and
+-               * set TTU_RMAP_LOCKED to let lower levels know we have
+-               * taken the lock.
+-               */
+-              mapping = hugetlb_page_mapping_lock_write(hpage);
+-              if (unlikely(!mapping))
+-                      goto unlock_put_anon;
+-
+-              try_to_unmap(hpage,
+-                      TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
+-                      TTU_RMAP_LOCKED);
++              bool mapping_locked = false;
++              enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK|
++                                      TTU_IGNORE_ACCESS;
++
++              if (!PageAnon(hpage)) {
++                      /*
++                       * In shared mappings, try_to_unmap could potentially
++                       * call huge_pmd_unshare.  Because of this, take
++                       * semaphore in write mode here and set TTU_RMAP_LOCKED
++                       * to let lower levels know we have taken the lock.
++                       */
++                      mapping = hugetlb_page_mapping_lock_write(hpage);
++                      if (unlikely(!mapping))
++                              goto unlock_put_anon;
++
++                      mapping_locked = true;
++                      ttu |= TTU_RMAP_LOCKED;
++              }
++
++              try_to_unmap(hpage, ttu);
+               page_was_mapped = 1;
+-              /*
+-               * Leave mapping locked until after subsequent call to
+-               * remove_migration_ptes()
+-               */
++
++              if (mapping_locked)
++                      i_mmap_unlock_write(mapping);
+       }
+       if (!page_mapped(hpage))
+               rc = move_to_new_page(new_hpage, hpage, mode);
+-      if (page_was_mapped) {
++      if (page_was_mapped)
+               remove_migration_ptes(hpage,
+-                      rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true);
+-              i_mmap_unlock_write(mapping);
+-      }
++                      rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
+ unlock_put_anon:
+       unlock_page(new_hpage);
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1413,9 +1413,6 @@ static bool try_to_unmap_one(struct page
+               /*
+                * If sharing is possible, start and end will be adjusted
+                * accordingly.
+-               *
+-               * If called for a huge page, caller must hold i_mmap_rwsem
+-               * in write mode as it is possible to call huge_pmd_unshare.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &range.start,
+                                                    &range.end);
+@@ -1462,7 +1459,7 @@ static bool try_to_unmap_one(struct page
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
+-              if (PageHuge(page)) {
++              if (PageHuge(page) && !PageAnon(page)) {
+                       /*
+                        * To call huge_pmd_unshare, i_mmap_rwsem must be
+                        * held in write mode.  Caller needs to explicitly
diff --git a/queue-5.9/jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch b/queue-5.9/jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch
new file mode 100644 (file)
index 0000000..9fe5ffe
--- /dev/null
@@ -0,0 +1,50 @@
+From 05d5233df85e9621597c5838e95235107eb624a2 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 7 Nov 2020 00:00:49 -0500
+Subject: jbd2: fix up sparse warnings in checkpoint code
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 05d5233df85e9621597c5838e95235107eb624a2 upstream.
+
+Add missing __acquires() and __releases() annotations.  Also, in an
+"this should never happen" WARN_ON check, if it *does* actually
+happen, we need to release j_state_lock since this function is always
+supposed to release that lock.  Otherwise, things will quickly grind
+to a halt after the WARN_ON trips.
+
+Fixes: 96f1e0974575 ("jbd2: avoid long hold times of j_state_lock...")
+Cc: stable@kernel.org
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/checkpoint.c  |    2 ++
+ fs/jbd2/transaction.c |    4 +++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct j
+  * for a checkpoint to free up some space in the log.
+  */
+ void __jbd2_log_wait_for_space(journal_t *journal)
++__acquires(&journal->j_state_lock)
++__releases(&journal->j_state_lock)
+ {
+       int nblocks, space_left;
+       /* assert_spin_locked(&journal->j_state_lock); */
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -195,8 +195,10 @@ static void wait_transaction_switching(j
+       DEFINE_WAIT(wait);
+       if (WARN_ON(!journal->j_running_transaction ||
+-                  journal->j_running_transaction->t_state != T_SWITCH))
++                  journal->j_running_transaction->t_state != T_SWITCH)) {
++              read_unlock(&journal->j_state_lock);
+               return;
++      }
+       prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+                       TASK_UNINTERRUPTIBLE);
+       read_unlock(&journal->j_state_lock);
diff --git a/queue-5.9/mei-protect-mei_cl_mtu-from-null-dereference.patch b/queue-5.9/mei-protect-mei_cl_mtu-from-null-dereference.patch
new file mode 100644 (file)
index 0000000..c2e6c77
--- /dev/null
@@ -0,0 +1,41 @@
+From bcbc0b2e275f0a797de11a10eff495b4571863fc Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Thu, 29 Oct 2020 11:54:42 +0200
+Subject: mei: protect mei_cl_mtu from null dereference
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit bcbc0b2e275f0a797de11a10eff495b4571863fc upstream.
+
+A receive callback is queued while the client is still connected
+but can still be called after the client was disconnected. Upon
+disconnect cl->me_cl is set to NULL, hence we need to check
+that ME client is not-NULL in mei_cl_mtu to avoid
+null dereference.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Link: https://lore.kernel.org/r/20201029095444.957924-2-tomas.winkler@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/misc/mei/client.h
++++ b/drivers/misc/mei/client.h
+@@ -164,11 +164,11 @@ static inline u8 mei_cl_me_id(const stru
+  *
+  * @cl: host client
+  *
+- * Return: mtu
++ * Return: mtu or 0 if client is not connected
+  */
+ static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+ {
+-      return cl->me_cl->props.max_msg_length;
++      return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
+ }
+ /**
diff --git a/queue-5.9/mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch b/queue-5.9/mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch
new file mode 100644 (file)
index 0000000..7cf7363
--- /dev/null
@@ -0,0 +1,86 @@
+From 38935861d85a4d9a353d1dd5a156c97700e2765d Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 13 Nov 2020 22:51:40 -0800
+Subject: mm/compaction: count pages and stop correctly during page isolation
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit 38935861d85a4d9a353d1dd5a156c97700e2765d upstream.
+
+In isolate_migratepages_block, when cc->alloc_contig is true, we are
+able to isolate compound pages.  But nr_migratepages and nr_isolated did
+not count compound pages correctly, causing us to isolate more pages
+than we thought.
+
+So count compound pages as the number of base pages they contain.
+Otherwise, we might be trapped in too_many_isolated while loop, since
+the actual isolated pages can go up to COMPACT_CLUSTER_MAX*512=16384,
+where COMPACT_CLUSTER_MAX is 32, since we stop isolation after
+cc->nr_migratepages reaches to COMPACT_CLUSTER_MAX.
+
+In addition, after we fix the issue above, cc->nr_migratepages could
+never be equal to COMPACT_CLUSTER_MAX if compound pages are isolated,
+thus page isolation could not stop as we intended.  Change the isolation
+stop condition to '>='.
+
+The issue can be triggered as follows:
+
+In a system with 16GB memory and an 8GB CMA region reserved by
+hugetlb_cma, if we first allocate 10GB THPs and mlock them (so some THPs
+are allocated in the CMA region and mlocked), reserving 6 1GB hugetlb
+pages via /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages will
+get stuck (looping in too_many_isolated function) until we kill either
+task.  With the patch applied, oom will kill the application with 10GB
+THPs and let hugetlb page reservation finish.
+
+[ziy@nvidia.com: v3]
+
+Link: https://lkml.kernel.org/r/20201030183809.3616803-1-zi.yan@sent.com
+Fixes: 1da2f328fa64 ("cmm,thp,compaction,cma: allow THP migration for CMA allocations")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201029200435.3386066-1-zi.yan@sent.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1013,8 +1013,8 @@ isolate_migratepages_block(struct compac
+ isolate_success:
+               list_add(&page->lru, &cc->migratepages);
+-              cc->nr_migratepages++;
+-              nr_isolated++;
++              cc->nr_migratepages += compound_nr(page);
++              nr_isolated += compound_nr(page);
+               /*
+                * Avoid isolating too much unless this block is being
+@@ -1022,7 +1022,7 @@ isolate_success:
+                * or a lock is contended. For contention, isolate quickly to
+                * potentially remove one source of contention.
+                */
+-              if (cc->nr_migratepages == COMPACT_CLUSTER_MAX &&
++              if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
+                   !cc->rescan && !cc->contended) {
+                       ++low_pfn;
+                       break;
+@@ -1133,7 +1133,7 @@ isolate_migratepages_range(struct compac
+               if (!pfn)
+                       break;
+-              if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
++              if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
+                       break;
+       }
diff --git a/queue-5.9/mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch b/queue-5.9/mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch
new file mode 100644 (file)
index 0000000..c2757b4
--- /dev/null
@@ -0,0 +1,53 @@
+From d20bdd571ee5c9966191568527ecdb1bd4b52368 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 13 Nov 2020 22:51:43 -0800
+Subject: mm/compaction: stop isolation if too many pages are isolated and we have pages to migrate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit d20bdd571ee5c9966191568527ecdb1bd4b52368 upstream.
+
+In isolate_migratepages_block, if we have too many isolated pages and
+nr_migratepages is not zero, we should try to migrate what we have
+without wasting time on isolating.
+
+In theory it's possible that multiple parallel compactions will cause
+too_many_isolated() to become true even if each has isolated less than
+COMPACT_CLUSTER_MAX, and loop forever in the while loop.  Bailing
+immediately prevents that.
+
+[vbabka@suse.cz: changelog addition]
+
+Fixes: 1da2f328fa64 (“mm,thp,compaction,cma: allow THP migration for CMA allocations”)
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Link: https://lkml.kernel.org/r/20201030183809.3616803-2-zi.yan@sent.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -818,6 +818,10 @@ isolate_migratepages_block(struct compac
+        * delay for some time until fewer pages are isolated
+        */
+       while (unlikely(too_many_isolated(pgdat))) {
++              /* stop isolation if there are still pages not migrated */
++              if (cc->nr_migratepages)
++                      return 0;
++
+               /* async migration should just abort */
+               if (cc->mode == MIGRATE_ASYNC)
+                       return 0;
diff --git a/queue-5.9/mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch b/queue-5.9/mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch
new file mode 100644 (file)
index 0000000..d1cd832
--- /dev/null
@@ -0,0 +1,66 @@
+From 96e1fac162cc0086c50b2b14062112adb2ba640e Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@ziepe.ca>
+Date: Fri, 13 Nov 2020 22:51:56 -0800
+Subject: mm/gup: use unpin_user_pages() in __gup_longterm_locked()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 96e1fac162cc0086c50b2b14062112adb2ba640e upstream.
+
+When FOLL_PIN is passed to __get_user_pages() the page list must be put
+back using unpin_user_pages() otherwise the page pin reference persists
+in a corrupted state.
+
+There are two places in the unwind of __gup_longterm_locked() that put
+the pages back without checking.  Normally on error this function would
+return the partial page list making this the caller's responsibility,
+but in these two cases the caller is not allowed to see these pages at
+all.
+
+Fixes: 3faa52c03f44 ("mm/gup: track FOLL_PIN pages")
+Reported-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/0-v2-3ae7d9d162e2+2a7-gup_cma_fix_jgg@nvidia.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/gup.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1637,8 +1637,11 @@ check_again:
+               /*
+                * drop the above get_user_pages reference.
+                */
+-              for (i = 0; i < nr_pages; i++)
+-                      put_page(pages[i]);
++              if (gup_flags & FOLL_PIN)
++                      unpin_user_pages(pages, nr_pages);
++              else
++                      for (i = 0; i < nr_pages; i++)
++                              put_page(pages[i]);
+               if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
+                       (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+@@ -1718,8 +1721,11 @@ static long __gup_longterm_locked(struct
+                       goto out;
+               if (check_dax_vmas(vmas_tmp, rc)) {
+-                      for (i = 0; i < rc; i++)
+-                              put_page(pages[i]);
++                      if (gup_flags & FOLL_PIN)
++                              unpin_user_pages(pages, rc);
++                      else
++                              for (i = 0; i < rc; i++)
++                                      put_page(pages[i]);
+                       rc = -EOPNOTSUPP;
+                       goto out;
+               }
diff --git a/queue-5.9/mm-slub-fix-panic-in-slab_alloc_node.patch b/queue-5.9/mm-slub-fix-panic-in-slab_alloc_node.patch
new file mode 100644 (file)
index 0000000..acb04c3
--- /dev/null
@@ -0,0 +1,126 @@
+From 22e4663e916321b72972c69ca0c6b962f529bd78 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 13 Nov 2020 22:51:53 -0800
+Subject: mm/slub: fix panic in slab_alloc_node()
+
+From: Laurent Dufour <ldufour@linux.ibm.com>
+
+commit 22e4663e916321b72972c69ca0c6b962f529bd78 upstream.
+
+While doing memory hot-unplug operation on a PowerPC VM running 1024 CPUs
+with 11TB of ram, I hit the following panic:
+
+    BUG: Kernel NULL pointer dereference on read at 0x00000007
+    Faulting instruction address: 0xc000000000456048
+    Oops: Kernel access of bad area, sig: 11 [#2]
+    LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS= 2048 NUMA pSeries
+    Modules linked in: rpadlpar_io rpaphp
+    CPU: 160 PID: 1 Comm: systemd Tainted: G      D           5.9.0 #1
+    NIP:  c000000000456048 LR: c000000000455fd4 CTR: c00000000047b350
+    REGS: c00006028d1b77a0 TRAP: 0300   Tainted: G      D            (5.9.0)
+    MSR:  8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 24004228  XER: 00000000
+    CFAR: c00000000000f1b0 DAR: 0000000000000007 DSISR: 40000000 IRQMASK: 0
+    GPR00: c000000000455fd4 c00006028d1b7a30 c000000001bec800 0000000000000000
+    GPR04: 0000000000000dc0 0000000000000000 00000000000374ef c00007c53df99320
+    GPR08: 000007c53c980000 0000000000000000 000007c53c980000 0000000000000000
+    GPR12: 0000000000004400 c00000001e8e4400 0000000000000000 0000000000000f6a
+    GPR16: 0000000000000000 c000000001c25930 c000000001d62528 00000000000000c1
+    GPR20: c000000001d62538 c00006be469e9000 0000000fffffffe0 c0000000003c0ff8
+    GPR24: 0000000000000018 0000000000000000 0000000000000dc0 0000000000000000
+    GPR28: c00007c513755700 c000000001c236a4 c00007bc4001f800 0000000000000001
+    NIP [c000000000456048] __kmalloc_node+0x108/0x790
+    LR [c000000000455fd4] __kmalloc_node+0x94/0x790
+    Call Trace:
+      kvmalloc_node+0x58/0x110
+      mem_cgroup_css_online+0x10c/0x270
+      online_css+0x48/0xd0
+      cgroup_apply_control_enable+0x2c4/0x470
+      cgroup_mkdir+0x408/0x5f0
+      kernfs_iop_mkdir+0x90/0x100
+      vfs_mkdir+0x138/0x250
+      do_mkdirat+0x154/0x1c0
+      system_call_exception+0xf8/0x200
+      system_call_common+0xf0/0x27c
+    Instruction dump:
+    e93e0000 e90d0030 39290008 7cc9402a e94d0030 e93e0000 7ce95214 7f89502a
+    2fbc0000 419e0018 41920230 e9270010 <89290007> 7f994800 419e0220 7ee6bb78
+
+This pointing to the following code:
+
+    mm/slub.c:2851
+            if (unlikely(!object || !node_match(page, node))) {
+    c000000000456038:       00 00 bc 2f     cmpdi   cr7,r28,0
+    c00000000045603c:       18 00 9e 41     beq     cr7,c000000000456054 <__kmalloc_node+0x114>
+    node_match():
+    mm/slub.c:2491
+            if (node != NUMA_NO_NODE && page_to_nid(page) != node)
+    c000000000456040:       30 02 92 41     beq     cr4,c000000000456270 <__kmalloc_node+0x330>
+    page_to_nid():
+    include/linux/mm.h:1294
+    c000000000456044:       10 00 27 e9     ld      r9,16(r7)
+    c000000000456048:       07 00 29 89     lbz     r9,7(r9)   <<<< r9 = NULL
+    node_match():
+    mm/slub.c:2491
+    c00000000045604c:       00 48 99 7f     cmpw    cr7,r25,r9
+    c000000000456050:       20 02 9e 41     beq     cr7,c000000000456270 <__kmalloc_node+0x330>
+
+The panic occurred in slab_alloc_node() when checking for the page's node:
+
+       object = c->freelist;
+       page = c->page;
+       if (unlikely(!object || !node_match(page, node))) {
+               object = __slab_alloc(s, gfpflags, node, addr, c);
+               stat(s, ALLOC_SLOWPATH);
+
+The issue is that object is not NULL while page is NULL which is odd but
+may happen if the cache flush happened after loading object but before
+loading page.  Thus checking for the page pointer is required too.
+
+The cache flush is done through an inter processor interrupt when a
+piece of memory is off-lined.  That interrupt is triggered when a memory
+hot-unplug operation is initiated and offline_pages() is calling the
+slub's MEM_GOING_OFFLINE callback slab_mem_going_offline_callback()
+which is calling flush_cpu_slab().  If that interrupt is caught between
+the reading of c->freelist and the reading of c->page, this could lead
+to such a situation.  That situation is expected and the later call to
+this_cpu_cmpxchg_double() will detect the change to c->freelist and redo
+the whole operation.
+
+In commit 6159d0f5c03e ("mm/slub.c: page is always non-NULL in
+node_match()") check on the page pointer has been removed assuming that
+page is always valid when it is called.  It happens that this is not
+true in that particular case, so check for page before calling
+node_match() here.
+
+Fixes: 6159d0f5c03e ("mm/slub.c: page is always non-NULL in node_match()")
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Christoph Lameter <cl@linux.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: Scott Cheloha <cheloha@linux.ibm.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201027190406.33283-1-ldufour@linux.ibm.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2848,7 +2848,7 @@ redo:
+       object = c->freelist;
+       page = c->page;
+-      if (unlikely(!object || !node_match(page, node))) {
++      if (unlikely(!object || !page || !node_match(page, node))) {
+               object = __slab_alloc(s, gfpflags, node, addr, c);
+               stat(s, ALLOC_SLOWPATH);
+       } else {
diff --git a/queue-5.9/mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch b/queue-5.9/mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch
new file mode 100644 (file)
index 0000000..5400b77
--- /dev/null
@@ -0,0 +1,59 @@
+From 2da9f6305f306ffbbb44790675799328fb73119d Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 13 Nov 2020 22:51:46 -0800
+Subject: mm/vmscan: fix NR_ISOLATED_FILE corruption on 64-bit
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 2da9f6305f306ffbbb44790675799328fb73119d upstream.
+
+Previously the negated unsigned long would be cast back to signed long
+which would have the correct negative value.  After commit 730ec8c01a2b
+("mm/vmscan.c: change prototype for shrink_page_list"), the large
+unsigned int converts to a large positive signed long.
+
+Symptoms include CMA allocations hanging forever holding the cma_mutex
+due to alloc_contig_range->...->isolate_migratepages_block waiting
+forever in "while (unlikely(too_many_isolated(pgdat)))".
+
+[akpm@linux-foundation.org: fix -stat.nr_lazyfree_fail as well, per Michal]
+
+Fixes: 730ec8c01a2b ("mm/vmscan.c: change prototype for shrink_page_list")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vaneet Narang <v.narang@samsung.com>
+Cc: Maninder Singh <maninder1.s@samsung.com>
+Cc: Amit Sahrawat <a.sahrawat@samsung.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201029032320.1448441-1-npiggin@gmail.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1514,7 +1514,8 @@ unsigned int reclaim_clean_pages_from_li
+       nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
+                       TTU_IGNORE_ACCESS, &stat, true);
+       list_splice(&clean_pages, page_list);
+-      mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -nr_reclaimed);
++      mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
++                          -(long)nr_reclaimed);
+       /*
+        * Since lazyfree pages are isolated from file LRU from the beginning,
+        * they will rotate back to anonymous LRU in the end if it failed to
+@@ -1524,7 +1525,7 @@ unsigned int reclaim_clean_pages_from_li
+       mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON,
+                           stat.nr_lazyfree_fail);
+       mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
+-                          -stat.nr_lazyfree_fail);
++                          -(long)stat.nr_lazyfree_fail);
+       return nr_reclaimed;
+ }
diff --git a/queue-5.9/ocfs2-initialize-ip_next_orphan.patch b/queue-5.9/ocfs2-initialize-ip_next_orphan.patch
new file mode 100644 (file)
index 0000000..fa20adb
--- /dev/null
@@ -0,0 +1,93 @@
+From f5785283dd64867a711ca1fb1f5bb172f252ecdf Mon Sep 17 00:00:00 2001
+From: Wengang Wang <wen.gang.wang@oracle.com>
+Date: Fri, 13 Nov 2020 22:52:23 -0800
+Subject: ocfs2: initialize ip_next_orphan
+
+From: Wengang Wang <wen.gang.wang@oracle.com>
+
+commit f5785283dd64867a711ca1fb1f5bb172f252ecdf upstream.
+
+Though problem if found on a lower 4.1.12 kernel, I think upstream has
+same issue.
+
+In one node in the cluster, there is the following callback trace:
+
+   # cat /proc/21473/stack
+   __ocfs2_cluster_lock.isra.36+0x336/0x9e0 [ocfs2]
+   ocfs2_inode_lock_full_nested+0x121/0x520 [ocfs2]
+   ocfs2_evict_inode+0x152/0x820 [ocfs2]
+   evict+0xae/0x1a0
+   iput+0x1c6/0x230
+   ocfs2_orphan_filldir+0x5d/0x100 [ocfs2]
+   ocfs2_dir_foreach_blk+0x490/0x4f0 [ocfs2]
+   ocfs2_dir_foreach+0x29/0x30 [ocfs2]
+   ocfs2_recover_orphans+0x1b6/0x9a0 [ocfs2]
+   ocfs2_complete_recovery+0x1de/0x5c0 [ocfs2]
+   process_one_work+0x169/0x4a0
+   worker_thread+0x5b/0x560
+   kthread+0xcb/0xf0
+   ret_from_fork+0x61/0x90
+
+The above stack is not reasonable, the final iput shouldn't happen in
+ocfs2_orphan_filldir() function.  Looking at the code,
+
+  2067         /* Skip inodes which are already added to recover list, since dio may
+  2068          * happen concurrently with unlink/rename */
+  2069         if (OCFS2_I(iter)->ip_next_orphan) {
+  2070                 iput(iter);
+  2071                 return 0;
+  2072         }
+  2073
+
+The logic thinks the inode is already in recover list on seeing
+ip_next_orphan is non-NULL, so it skip this inode after dropping a
+reference which incremented in ocfs2_iget().
+
+While, if the inode is already in recover list, it should have another
+reference and the iput() at line 2070 should not be the final iput
+(dropping the last reference).  So I don't think the inode is really in
+the recover list (no vmcore to confirm).
+
+Note that ocfs2_queue_orphans(), though not shown up in the call back
+trace, is holding cluster lock on the orphan directory when looking up
+for unlinked inodes.  The on disk inode eviction could involve a lot of
+IOs which may need long time to finish.  That means this node could hold
+the cluster lock for very long time, that can lead to the lock requests
+(from other nodes) to the orhpan directory hang for long time.
+
+Looking at more on ip_next_orphan, I found it's not initialized when
+allocating a new ocfs2_inode_info structure.
+
+This causes te reflink operations from some nodes hang for very long
+time waiting for the cluster lock on the orphan directory.
+
+Fix: initialize ip_next_orphan as NULL.
+
+Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201109171746.27884-1-wen.gang.wang@oracle.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/super.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *
+       oi->ip_blkno = 0ULL;
+       oi->ip_clusters = 0;
++      oi->ip_next_orphan = NULL;
+       ocfs2_resv_init_once(&oi->ip_la_data_resv);
diff --git a/queue-5.9/reboot-fix-overflow-parsing-reboot-cpu-number.patch b/queue-5.9/reboot-fix-overflow-parsing-reboot-cpu-number.patch
new file mode 100644 (file)
index 0000000..efade51
--- /dev/null
@@ -0,0 +1,74 @@
+From df5b0ab3e08a156701b537809914b339b0daa526 Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 13 Nov 2020 22:52:07 -0800
+Subject: reboot: fix overflow parsing reboot cpu number
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit df5b0ab3e08a156701b537809914b339b0daa526 upstream.
+
+Limit the CPU number to num_possible_cpus(), because setting it to a
+value lower than INT_MAX but higher than NR_CPUS produces the following
+error on reboot and shutdown:
+
+    BUG: unable to handle page fault for address: ffffffff90ab1bb0
+    #PF: supervisor read access in kernel mode
+    #PF: error_code(0x0000) - not-present page
+    PGD 1c09067 P4D 1c09067 PUD 1c0a063 PMD 0
+    Oops: 0000 [#1] SMP
+    CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 5.9.0-rc8-kvm #110
+    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
+    RIP: 0010:migrate_to_reboot_cpu+0xe/0x60
+    Code: ea ea 00 48 89 fa 48 c7 c7 30 57 f1 81 e9 fa ef ff ff 66 2e 0f 1f 84 00 00 00 00 00 53 8b 1d d5 ea ea 00 e8 14 33 fe ff 89 da <48> 0f a3 15 ea fc bd 00 48 89 d0 73 29 89 c2 c1 e8 06 65 48 8b 3c
+    RSP: 0018:ffffc90000013e08 EFLAGS: 00010246
+    RAX: ffff88801f0a0000 RBX: 0000000077359400 RCX: 0000000000000000
+    RDX: 0000000077359400 RSI: 0000000000000002 RDI: ffffffff81c199e0
+    RBP: ffffffff81c1e3c0 R08: ffff88801f41f000 R09: ffffffff81c1e348
+    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+    R13: 00007f32bedf8830 R14: 00000000fee1dead R15: 0000000000000000
+    FS:  00007f32bedf8980(0000) GS:ffff88801f480000(0000) knlGS:0000000000000000
+    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+    CR2: ffffffff90ab1bb0 CR3: 000000001d057000 CR4: 00000000000006a0
+    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+    Call Trace:
+      __do_sys_reboot.cold+0x34/0x5b
+      do_syscall_64+0x2d/0x40
+
+Fixes: 1b3a5d02ee07 ("reboot: move arch/x86 reboot= handling to generic kernel")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Fabian Frederick <fabf@skynet.be>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Robin Holt <robinmholt@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201103214025.116799-3-mcroce@linux.microsoft.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/reboot.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/kernel/reboot.c
++++ b/kernel/reboot.c
+@@ -558,6 +558,13 @@ static int __init reboot_setup(char *str
+                               reboot_cpu = simple_strtoul(str+3, NULL, 0);
+                       else
+                               *mode = REBOOT_SOFT;
++                      if (reboot_cpu >= num_possible_cpus()) {
++                              pr_err("Ignoring the CPU number in reboot= option. "
++                                     "CPU %d exceeds possible cpu number %d\n",
++                                     reboot_cpu, num_possible_cpus());
++                              reboot_cpu = 0;
++                              break;
++                      }
+                       break;
+               case 'g':
diff --git a/queue-5.9/revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch b/queue-5.9/revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch
new file mode 100644 (file)
index 0000000..0bb1379
--- /dev/null
@@ -0,0 +1,86 @@
+From 8b92c4ff4423aa9900cf838d3294fcade4dbda35 Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 13 Nov 2020 22:52:02 -0800
+Subject: Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint"
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit 8b92c4ff4423aa9900cf838d3294fcade4dbda35 upstream.
+
+Patch series "fix parsing of reboot= cmdline", v3.
+
+The parsing of the reboot= cmdline has two major errors:
+
+ - a missing bound check can crash the system on reboot
+
+ - parsing of the cpu number only works if specified last
+
+Fix both.
+
+This patch (of 2):
+
+This reverts commit 616feab753972b97.
+
+kstrtoint() and simple_strtoul() have a subtle difference which makes
+them non interchangeable: if a non digit character is found amid the
+parsing, the former will return an error, while the latter will just
+stop parsing, e.g.  simple_strtoul("123xyx") = 123.
+
+The kernel cmdline reboot= argument allows to specify the CPU used for
+rebooting, with the syntax `s####` among the other flags, e.g.
+"reboot=warm,s31,force", so if this flag is not the last given, it's
+silently ignored as well as the subsequent ones.
+
+Fixes: 616feab75397 ("kernel/reboot.c: convert simple_strtoul to kstrtoint")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Robin Holt <robinmholt@gmail.com>
+Cc: Fabian Frederick <fabf@skynet.be>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201103214025.116799-2-mcroce@linux.microsoft.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/reboot.c |   21 +++++++--------------
+ 1 file changed, 7 insertions(+), 14 deletions(-)
+
+--- a/kernel/reboot.c
++++ b/kernel/reboot.c
+@@ -551,22 +551,15 @@ static int __init reboot_setup(char *str
+                       break;
+               case 's':
+-              {
+-                      int rc;
+-
+-                      if (isdigit(*(str+1))) {
+-                              rc = kstrtoint(str+1, 0, &reboot_cpu);
+-                              if (rc)
+-                                      return rc;
+-                      } else if (str[1] == 'm' && str[2] == 'p' &&
+-                                 isdigit(*(str+3))) {
+-                              rc = kstrtoint(str+3, 0, &reboot_cpu);
+-                              if (rc)
+-                                      return rc;
+-                      } else
++                      if (isdigit(*(str+1)))
++                              reboot_cpu = simple_strtoul(str+1, NULL, 0);
++                      else if (str[1] == 'm' && str[2] == 'p' &&
++                                                      isdigit(*(str+3)))
++                              reboot_cpu = simple_strtoul(str+3, NULL, 0);
++                      else
+                               *mode = REBOOT_SOFT;
+                       break;
+-              }
++
+               case 'g':
+                       *mode = REBOOT_GPIO;
+                       break;
index d5fbe708955960d897f28c93fdfac1a9a4898758..bac52db0db94f07095993af330624108a9ea3b0f 100644 (file)
@@ -200,3 +200,19 @@ uio-fix-use-after-free-in-uio_unregister_device.patch
 revert-usb-musb-convert-to-devm_platform_ioremap_resource_byname.patch
 usb-cdc-acm-add-disable_echo-for-renesas-usb-download-mode.patch
 usb-typec-ucsi-report-power-supply-changes.patch
+xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch
+virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch
+mei-protect-mei_cl_mtu-from-null-dereference.patch
+futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch
+jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch
+bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch
+mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch
+mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch
+mm-slub-fix-panic-in-slab_alloc_node.patch
+mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch
+mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch
+compiler.h-fix-barrier_data-on-clang.patch
+revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch
+reboot-fix-overflow-parsing-reboot-cpu-number.patch
+hugetlbfs-fix-anon-huge-page-migration-race.patch
+ocfs2-initialize-ip_next_orphan.patch
diff --git a/queue-5.9/virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch b/queue-5.9/virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch
new file mode 100644 (file)
index 0000000..3b0c7d3
--- /dev/null
@@ -0,0 +1,83 @@
+From 9d516aa82b7d4fbe7f6303348697960ba03a530b Mon Sep 17 00:00:00 2001
+From: Alexander Lobakin <alobakin@pm.me>
+Date: Wed, 4 Nov 2020 15:31:36 +0000
+Subject: virtio: virtio_console: fix DMA memory allocation for rproc serial
+
+From: Alexander Lobakin <alobakin@pm.me>
+
+commit 9d516aa82b7d4fbe7f6303348697960ba03a530b upstream.
+
+Since commit 086d08725d34 ("remoteproc: create vdev subdevice with
+specific dma memory pool"), every remoteproc has a DMA subdevice
+("remoteprocX#vdevYbuffer") for each virtio device, which inherits
+DMA capabilities from the corresponding platform device. This allowed
+to associate different DMA pools with each vdev, and required from
+virtio drivers to perform DMA operations with the parent device
+(vdev->dev.parent) instead of grandparent (vdev->dev.parent->parent).
+
+virtio_rpmsg_bus was already changed in the same merge cycle with
+commit d999b622fcfb ("rpmsg: virtio: allocate buffer from parent"),
+but virtio_console did not. In fact, operations using the grandparent
+worked fine while the grandparent was the platform device, but since
+commit c774ad010873 ("remoteproc: Fix and restore the parenting
+hierarchy for vdev") this was changed, and now the grandparent device
+is the remoteproc device without any DMA capabilities.
+So, starting v5.8-rc1 the following warning is observed:
+
+[    2.483925] ------------[ cut here ]------------
+[    2.489148] WARNING: CPU: 3 PID: 101 at kernel/dma/mapping.c:427 0x80e7eee8
+[    2.489152] Modules linked in: virtio_console(+)
+[    2.503737]  virtio_rpmsg_bus rpmsg_core
+[    2.508903]
+[    2.528898] <Other modules, stack and call trace here>
+[    2.913043]
+[    2.914907] ---[ end trace 93ac8746beab612c ]---
+[    2.920102] virtio-ports vport1p0: Error allocating inbufs
+
+kernel/dma/mapping.c:427 is:
+
+WARN_ON_ONCE(!dev->coherent_dma_mask);
+
+obviously because the grandparent now is remoteproc dev without any
+DMA caps:
+
+[    3.104943] Parent: remoteproc0#vdev1buffer, grandparent: remoteproc0
+
+Fix this the same way as it was for virtio_rpmsg_bus, using just the
+parent device (vdev->dev.parent, "remoteprocX#vdevYbuffer") for DMA
+operations.
+This also allows now to reserve DMA pools/buffers for rproc serial
+via Device Tree.
+
+Fixes: c774ad010873 ("remoteproc: Fix and restore the parenting hierarchy for vdev")
+Cc: stable@vger.kernel.org # 5.1+
+Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Alexander Lobakin <alobakin@pm.me>
+Date: Thu, 5 Nov 2020 11:10:24 +0800
+Link: https://lore.kernel.org/r/AOKowLclCbOCKxyiJ71WeNyuAAj2q8EUtxrXbyky5E@cp7-web-042.plabs.ch
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/char/virtio_console.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/virtio_console.c
++++ b/drivers/char/virtio_console.c
+@@ -435,12 +435,12 @@ static struct port_buffer *alloc_buf(str
+               /*
+                * Allocate DMA memory from ancestor. When a virtio
+                * device is created by remoteproc, the DMA memory is
+-               * associated with the grandparent device:
+-               * vdev => rproc => platform-dev.
++               * associated with the parent device:
++               * virtioY => remoteprocX#vdevYbuffer.
+                */
+-              if (!vdev->dev.parent || !vdev->dev.parent->parent)
++              buf->dev = vdev->dev.parent;
++              if (!buf->dev)
+                       goto free_buf;
+-              buf->dev = vdev->dev.parent->parent;
+               /* Increase device refcnt to avoid freeing it */
+               get_device(buf->dev);
diff --git a/queue-5.9/xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch b/queue-5.9/xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch
new file mode 100644 (file)
index 0000000..40f4c78
--- /dev/null
@@ -0,0 +1,40 @@
+From 76255470ffa2795a44032e8b3c1ced11d81aa2db Mon Sep 17 00:00:00 2001
+From: Zhang Qilong <zhangqilong3@huawei.com>
+Date: Fri, 6 Nov 2020 20:22:21 +0800
+Subject: xhci: hisilicon: fix refercence leak in xhci_histb_probe
+
+From: Zhang Qilong <zhangqilong3@huawei.com>
+
+commit 76255470ffa2795a44032e8b3c1ced11d81aa2db upstream.
+
+pm_runtime_get_sync() will increment pm usage at first and it
+will resume the device later. We should decrease the usage count
+whetever it succeeded or failed(maybe runtime of the device has
+error, or device is in inaccessible state, or other error state).
+If we do not call put operation to decrease the reference, it will
+result in reference leak in xhci_histb_probe. Moreover, this
+device cannot enter the idle state and always stay busy or other
+non-idle state later. So we fixed it by jumping to error handling
+branch.
+
+Fixes: c508f41da0788 ("xhci: hisilicon: support HiSilicon STB xHCI host controller")
+Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
+Link: https://lore.kernel.org/r/20201106122221.2304528-1-zhangqilong3@huawei.com
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci-histb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/usb/host/xhci-histb.c
++++ b/drivers/usb/host/xhci-histb.c
+@@ -240,7 +240,7 @@ static int xhci_histb_probe(struct platf
+       /* Initialize dma_mask and coherent_dma_mask to 32-bits */
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+       if (ret)
+-              return ret;
++              goto disable_pm;
+       hcd = usb_create_hcd(driver, dev, dev_name(dev));
+       if (!hcd) {