--- /dev/null
+From 50b8a742850fce7293bed45753152c425f7e931b Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 13 Nov 2020 02:27:31 +0900
+Subject: bootconfig: Extend the magic check range to the preceding 3 bytes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 50b8a742850fce7293bed45753152c425f7e931b upstream.
+
+Since Grub may align the size of initrd to 4 if user pass
+initrd from cpio, we have to check the preceding 3 bytes as well.
+
+Link: https://lkml.kernel.org/r/160520205132.303174.4876760192433315429.stgit@devnote2
+
+Cc: stable@vger.kernel.org
+Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
+Reported-by: Chen Yu <yu.chen.surf@gmail.com>
+Tested-by: Chen Yu <yu.chen.surf@gmail.com>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ init/main.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/init/main.c
++++ b/init/main.c
+@@ -267,14 +267,24 @@ static void * __init get_boot_config_fro
+ u32 size, csum;
+ char *data;
+ u32 *hdr;
++ int i;
+
+ if (!initrd_end)
+ return NULL;
+
+ data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+- if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+- return NULL;
++ /*
++ * Since Grub may align the size of initrd to 4, we must
++ * check the preceding 3 bytes as well.
++ */
++ for (i = 0; i < 4; i++) {
++ if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
++ goto found;
++ data--;
++ }
++ return NULL;
+
++found:
+ hdr = (u32 *)(data - 8);
+ size = hdr[0];
+ csum = hdr[1];
--- /dev/null
+From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001
+From: Arvind Sankar <nivedita@alum.mit.edu>
+Date: Fri, 13 Nov 2020 22:51:59 -0800
+Subject: compiler.h: fix barrier_data() on clang
+
+From: Arvind Sankar <nivedita@alum.mit.edu>
+
+commit 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 upstream.
+
+Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
+mutually exclusive") neglected to copy barrier_data() from
+compiler-gcc.h into compiler-clang.h.
+
+The definition in compiler-gcc.h was really to work around clang's more
+aggressive optimization, so this broke barrier_data() on clang, and
+consequently memzero_explicit() as well.
+
+For example, this results in at least the memzero_explicit() call in
+lib/crypto/sha256.c:sha256_transform() being optimized away by clang.
+
+Fix this by moving the definition of barrier_data() into compiler.h.
+
+Also move the gcc/clang definition of barrier() into compiler.h,
+__memory_barrier() is icc-specific (and barrier() is already defined
+using it in compiler-intel.h) and doesn't belong in compiler.h.
+
+[rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled]
+
+Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org
+Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
+Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/asm-generic/barrier.h | 1 +
+ include/linux/compiler-clang.h | 6 ------
+ include/linux/compiler-gcc.h | 19 -------------------
+ include/linux/compiler.h | 18 ++++++++++++++++--
+ 4 files changed, 17 insertions(+), 27 deletions(-)
+
+--- a/include/asm-generic/barrier.h
++++ b/include/asm-generic/barrier.h
+@@ -13,6 +13,7 @@
+
+ #ifndef __ASSEMBLY__
+
++#include <linux/compiler.h>
+ #include <asm/rwonce.h>
+
+ #ifndef nop
+--- a/include/linux/compiler-clang.h
++++ b/include/linux/compiler-clang.h
+@@ -52,12 +52,6 @@
+ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+ #endif
+
+-/* The following are for compatibility with GCC, from compiler-gcc.h,
+- * and may be redefined here because they should not be shared with other
+- * compilers, like ICC.
+- */
+-#define barrier() __asm__ __volatile__("" : : : "memory")
+-
+ #if __has_feature(shadow_call_stack)
+ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+ #endif
+--- a/include/linux/compiler-gcc.h
++++ b/include/linux/compiler-gcc.h
+@@ -15,25 +15,6 @@
+ # error Sorry, your compiler is too old - please upgrade it.
+ #endif
+
+-/* Optimization barrier */
+-
+-/* The "volatile" is due to gcc bugs */
+-#define barrier() __asm__ __volatile__("": : :"memory")
+-/*
+- * This version is i.e. to prevent dead stores elimination on @ptr
+- * where gcc and llvm may behave differently when otherwise using
+- * normal barrier(): while gcc behavior gets along with a normal
+- * barrier(), llvm needs an explicit input variable to be assumed
+- * clobbered. The issue is as follows: while the inline asm might
+- * access any memory it wants, the compiler could have fit all of
+- * @ptr into memory registers instead, and since @ptr never escaped
+- * from that, it proved that the inline asm wasn't touching any of
+- * it. This version works well with both compilers, i.e. we're telling
+- * the compiler that the inline asm absolutely may see the contents
+- * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
+- */
+-#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+-
+ /*
+ * This macro obfuscates arithmetic on a variable address so that gcc
+ * shouldn't recognize the original var, and make assumptions about it.
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_
+
+ /* Optimization barrier */
+ #ifndef barrier
+-# define barrier() __memory_barrier()
++/* The "volatile" is due to gcc bugs */
++# define barrier() __asm__ __volatile__("": : :"memory")
+ #endif
+
+ #ifndef barrier_data
+-# define barrier_data(ptr) barrier()
++/*
++ * This version is i.e. to prevent dead stores elimination on @ptr
++ * where gcc and llvm may behave differently when otherwise using
++ * normal barrier(): while gcc behavior gets along with a normal
++ * barrier(), llvm needs an explicit input variable to be assumed
++ * clobbered. The issue is as follows: while the inline asm might
++ * access any memory it wants, the compiler could have fit all of
++ * @ptr into memory registers instead, and since @ptr never escaped
++ * from that, it proved that the inline asm wasn't touching any of
++ * it. This version works well with both compilers, i.e. we're telling
++ * the compiler that the inline asm absolutely may see the contents
++ * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
++ */
++# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+ #endif
+
+ /* workaround for GCC PR82365 if needed */
--- /dev/null
+From 1e106aa3509b86738769775969822ffc1ec21bf4 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 6 Nov 2020 11:52:05 +0300
+Subject: futex: Don't enable IRQs unconditionally in put_pi_state()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 1e106aa3509b86738769775969822ffc1ec21bf4 upstream.
+
+The exit_pi_state_list() function calls put_pi_state() with IRQs disabled
+and is not expecting that IRQs will be enabled inside the function.
+
+Use the _irqsave() variant so that IRQs are restored to the original state
+instead of being enabled unconditionally.
+
+Fixes: 153fbd1226fb ("futex: Fix more put_pi_state() vs. exit_pi_state_list() races")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201106085205.GA1159983@mwanda
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi
+ */
+ if (pi_state->owner) {
+ struct task_struct *owner;
++ unsigned long flags;
+
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
+ owner = pi_state->owner;
+ if (owner) {
+ raw_spin_lock(&owner->pi_lock);
+@@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi
+ raw_spin_unlock(&owner->pi_lock);
+ }
+ rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
+ }
+
+ if (current->pi_state_cache) {
--- /dev/null
+From 336bf30eb76580b579dc711ded5d599d905c0217 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 13 Nov 2020 22:52:16 -0800
+Subject: hugetlbfs: fix anon huge page migration race
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 336bf30eb76580b579dc711ded5d599d905c0217 upstream.
+
+Qian Cai reported the following BUG in [1]
+
+ LTP: starting move_pages12
+ BUG: unable to handle page fault for address: ffffffffffffffe0
+ ...
+ RIP: 0010:anon_vma_interval_tree_iter_first+0xa2/0x170 avc_start_pgoff at mm/interval_tree.c:63
+ Call Trace:
+ rmap_walk_anon+0x141/0xa30 rmap_walk_anon at mm/rmap.c:1864
+ try_to_unmap+0x209/0x2d0 try_to_unmap at mm/rmap.c:1763
+ migrate_pages+0x1005/0x1fb0
+ move_pages_and_store_status.isra.47+0xd7/0x1a0
+ __x64_sys_move_pages+0xa5c/0x1100
+ do_syscall_64+0x5f/0x310
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Hugh Dickins diagnosed this as a migration bug caused by code introduced
+to use i_mmap_rwsem for pmd sharing synchronization. Specifically, the
+routine unmap_and_move_huge_page() is always passing the TTU_RMAP_LOCKED
+flag to try_to_unmap() while holding i_mmap_rwsem. This is wrong for
+anon pages as the anon_vma_lock should be held in this case. Further
+analysis suggested that i_mmap_rwsem was not required to he held at all
+when calling try_to_unmap for anon pages as an anon page could never be
+part of a shared pmd mapping.
+
+Discussion also revealed that the hack in hugetlb_page_mapping_lock_write
+to drop page lock and acquire i_mmap_rwsem is wrong. There is no way to
+keep mapping valid while dropping page lock.
+
+This patch does the following:
+
+ - Do not take i_mmap_rwsem and set TTU_RMAP_LOCKED for anon pages when
+ calling try_to_unmap.
+
+ - Remove the hacky code in hugetlb_page_mapping_lock_write. The routine
+ will now simply do a 'trylock' while still holding the page lock. If
+ the trylock fails, it will return NULL. This could impact the
+ callers:
+
+ - migration calling code will receive -EAGAIN and retry up to the
+ hard coded limit (10).
+
+ - memory error code will treat the page as BUSY. This will force
+ killing (SIGKILL) instead of SIGBUS any mapping tasks.
+
+ Do note that this change in behavior only happens when there is a
+ race. None of the standard kernel testing suites actually hit this
+ race, but it is possible.
+
+[1] https://lore.kernel.org/lkml/20200708012044.GC992@lca.pw/
+[2] https://lore.kernel.org/linux-mm/alpine.LSU.2.11.2010071833100.2214@eggly.anvils/
+
+Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
+Reported-by: Qian Cai <cai@lca.pw>
+Suggested-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201105195058.78401-1-mike.kravetz@oracle.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c | 90 ++--------------------------------------------------
+ mm/memory-failure.c | 36 +++++++++-----------
+ mm/migrate.c | 46 ++++++++++++++------------
+ mm/rmap.c | 5 --
+ 4 files changed, 48 insertions(+), 129 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1579,103 +1579,23 @@ int PageHeadHuge(struct page *page_head)
+ }
+
+ /*
+- * Find address_space associated with hugetlbfs page.
+- * Upon entry page is locked and page 'was' mapped although mapped state
+- * could change. If necessary, use anon_vma to find vma and associated
+- * address space. The returned mapping may be stale, but it can not be
+- * invalid as page lock (which is held) is required to destroy mapping.
+- */
+-static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
+-{
+- struct anon_vma *anon_vma;
+- pgoff_t pgoff_start, pgoff_end;
+- struct anon_vma_chain *avc;
+- struct address_space *mapping = page_mapping(hpage);
+-
+- /* Simple file based mapping */
+- if (mapping)
+- return mapping;
+-
+- /*
+- * Even anonymous hugetlbfs mappings are associated with an
+- * underlying hugetlbfs file (see hugetlb_file_setup in mmap
+- * code). Find a vma associated with the anonymous vma, and
+- * use the file pointer to get address_space.
+- */
+- anon_vma = page_lock_anon_vma_read(hpage);
+- if (!anon_vma)
+- return mapping; /* NULL */
+-
+- /* Use first found vma */
+- pgoff_start = page_to_pgoff(hpage);
+- pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
+- anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
+- pgoff_start, pgoff_end) {
+- struct vm_area_struct *vma = avc->vma;
+-
+- mapping = vma->vm_file->f_mapping;
+- break;
+- }
+-
+- anon_vma_unlock_read(anon_vma);
+- return mapping;
+-}
+-
+-/*
+ * Find and lock address space (mapping) in write mode.
+ *
+- * Upon entry, the page is locked which allows us to find the mapping
+- * even in the case of an anon page. However, locking order dictates
+- * the i_mmap_rwsem be acquired BEFORE the page lock. This is hugetlbfs
+- * specific. So, we first try to lock the sema while still holding the
+- * page lock. If this works, great! If not, then we need to drop the
+- * page lock and then acquire i_mmap_rwsem and reacquire page lock. Of
+- * course, need to revalidate state along the way.
++ * Upon entry, the page is locked which means that page_mapping() is
++ * stable. Due to locking order, we can only trylock_write. If we can
++ * not get the lock, simply return NULL to caller.
+ */
+ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
+ {
+- struct address_space *mapping, *mapping2;
++ struct address_space *mapping = page_mapping(hpage);
+
+- mapping = _get_hugetlb_page_mapping(hpage);
+-retry:
+ if (!mapping)
+ return mapping;
+
+- /*
+- * If no contention, take lock and return
+- */
+ if (i_mmap_trylock_write(mapping))
+ return mapping;
+
+- /*
+- * Must drop page lock and wait on mapping sema.
+- * Note: Once page lock is dropped, mapping could become invalid.
+- * As a hack, increase map count until we lock page again.
+- */
+- atomic_inc(&hpage->_mapcount);
+- unlock_page(hpage);
+- i_mmap_lock_write(mapping);
+- lock_page(hpage);
+- atomic_add_negative(-1, &hpage->_mapcount);
+-
+- /* verify page is still mapped */
+- if (!page_mapped(hpage)) {
+- i_mmap_unlock_write(mapping);
+- return NULL;
+- }
+-
+- /*
+- * Get address space again and verify it is the same one
+- * we locked. If not, drop lock and retry.
+- */
+- mapping2 = _get_hugetlb_page_mapping(hpage);
+- if (mapping2 != mapping) {
+- i_mmap_unlock_write(mapping);
+- mapping = mapping2;
+- goto retry;
+- }
+-
+- return mapping;
++ return NULL;
+ }
+
+ pgoff_t __basepage_index(struct page *page)
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1031,27 +1031,25 @@ static bool hwpoison_user_mappings(struc
+ if (!PageHuge(hpage)) {
+ unmap_success = try_to_unmap(hpage, ttu);
+ } else {
+- /*
+- * For hugetlb pages, try_to_unmap could potentially call
+- * huge_pmd_unshare. Because of this, take semaphore in
+- * write mode here and set TTU_RMAP_LOCKED to indicate we
+- * have taken the lock at this higer level.
+- *
+- * Note that the call to hugetlb_page_mapping_lock_write
+- * is necessary even if mapping is already set. It handles
+- * ugliness of potentially having to drop page lock to obtain
+- * i_mmap_rwsem.
+- */
+- mapping = hugetlb_page_mapping_lock_write(hpage);
+-
+- if (mapping) {
+- unmap_success = try_to_unmap(hpage,
++ if (!PageAnon(hpage)) {
++ /*
++ * For hugetlb pages in shared mappings, try_to_unmap
++ * could potentially call huge_pmd_unshare. Because of
++ * this, take semaphore in write mode here and set
++ * TTU_RMAP_LOCKED to indicate we have taken the lock
++ * at this higer level.
++ */
++ mapping = hugetlb_page_mapping_lock_write(hpage);
++ if (mapping) {
++ unmap_success = try_to_unmap(hpage,
+ ttu|TTU_RMAP_LOCKED);
+- i_mmap_unlock_write(mapping);
++ i_mmap_unlock_write(mapping);
++ } else {
++ pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
++ unmap_success = false;
++ }
+ } else {
+- pr_info("Memory failure: %#lx: could not find mapping for mapped huge page\n",
+- pfn);
+- unmap_success = false;
++ unmap_success = try_to_unmap(hpage, ttu);
+ }
+ }
+ if (!unmap_success)
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1333,34 +1333,38 @@ static int unmap_and_move_huge_page(new_
+ goto put_anon;
+
+ if (page_mapped(hpage)) {
+- /*
+- * try_to_unmap could potentially call huge_pmd_unshare.
+- * Because of this, take semaphore in write mode here and
+- * set TTU_RMAP_LOCKED to let lower levels know we have
+- * taken the lock.
+- */
+- mapping = hugetlb_page_mapping_lock_write(hpage);
+- if (unlikely(!mapping))
+- goto unlock_put_anon;
+-
+- try_to_unmap(hpage,
+- TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
+- TTU_RMAP_LOCKED);
++ bool mapping_locked = false;
++ enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK|
++ TTU_IGNORE_ACCESS;
++
++ if (!PageAnon(hpage)) {
++ /*
++ * In shared mappings, try_to_unmap could potentially
++ * call huge_pmd_unshare. Because of this, take
++ * semaphore in write mode here and set TTU_RMAP_LOCKED
++ * to let lower levels know we have taken the lock.
++ */
++ mapping = hugetlb_page_mapping_lock_write(hpage);
++ if (unlikely(!mapping))
++ goto unlock_put_anon;
++
++ mapping_locked = true;
++ ttu |= TTU_RMAP_LOCKED;
++ }
++
++ try_to_unmap(hpage, ttu);
+ page_was_mapped = 1;
+- /*
+- * Leave mapping locked until after subsequent call to
+- * remove_migration_ptes()
+- */
++
++ if (mapping_locked)
++ i_mmap_unlock_write(mapping);
+ }
+
+ if (!page_mapped(hpage))
+ rc = move_to_new_page(new_hpage, hpage, mode);
+
+- if (page_was_mapped) {
++ if (page_was_mapped)
+ remove_migration_ptes(hpage,
+- rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true);
+- i_mmap_unlock_write(mapping);
+- }
++ rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
+
+ unlock_put_anon:
+ unlock_page(new_hpage);
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1413,9 +1413,6 @@ static bool try_to_unmap_one(struct page
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+- *
+- * If called for a huge page, caller must hold i_mmap_rwsem
+- * in write mode as it is possible to call huge_pmd_unshare.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
+@@ -1462,7 +1459,7 @@ static bool try_to_unmap_one(struct page
+ subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ address = pvmw.address;
+
+- if (PageHuge(page)) {
++ if (PageHuge(page) && !PageAnon(page)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
--- /dev/null
+From 05d5233df85e9621597c5838e95235107eb624a2 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 7 Nov 2020 00:00:49 -0500
+Subject: jbd2: fix up sparse warnings in checkpoint code
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 05d5233df85e9621597c5838e95235107eb624a2 upstream.
+
+Add missing __acquires() and __releases() annotations. Also, in an
+"this should never happen" WARN_ON check, if it *does* actually
+happen, we need to release j_state_lock since this function is always
+supposed to release that lock. Otherwise, things will quickly grind
+to a halt after the WARN_ON trips.
+
+Fixes: 96f1e0974575 ("jbd2: avoid long hold times of j_state_lock...")
+Cc: stable@kernel.org
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/checkpoint.c | 2 ++
+ fs/jbd2/transaction.c | 4 +++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct j
+ * for a checkpoint to free up some space in the log.
+ */
+ void __jbd2_log_wait_for_space(journal_t *journal)
++__acquires(&journal->j_state_lock)
++__releases(&journal->j_state_lock)
+ {
+ int nblocks, space_left;
+ /* assert_spin_locked(&journal->j_state_lock); */
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -195,8 +195,10 @@ static void wait_transaction_switching(j
+ DEFINE_WAIT(wait);
+
+ if (WARN_ON(!journal->j_running_transaction ||
+- journal->j_running_transaction->t_state != T_SWITCH))
++ journal->j_running_transaction->t_state != T_SWITCH)) {
++ read_unlock(&journal->j_state_lock);
+ return;
++ }
+ prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ read_unlock(&journal->j_state_lock);
--- /dev/null
+From bcbc0b2e275f0a797de11a10eff495b4571863fc Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Thu, 29 Oct 2020 11:54:42 +0200
+Subject: mei: protect mei_cl_mtu from null dereference
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit bcbc0b2e275f0a797de11a10eff495b4571863fc upstream.
+
+A receive callback is queued while the client is still connected
+but can still be called after the client was disconnected. Upon
+disconnect cl->me_cl is set to NULL, hence we need to check
+that ME client is not-NULL in mei_cl_mtu to avoid
+null dereference.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Link: https://lore.kernel.org/r/20201029095444.957924-2-tomas.winkler@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/misc/mei/client.h
++++ b/drivers/misc/mei/client.h
+@@ -164,11 +164,11 @@ static inline u8 mei_cl_me_id(const stru
+ *
+ * @cl: host client
+ *
+- * Return: mtu
++ * Return: mtu or 0 if client is not connected
+ */
+ static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+ {
+- return cl->me_cl->props.max_msg_length;
++ return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
+ }
+
+ /**
--- /dev/null
+From 38935861d85a4d9a353d1dd5a156c97700e2765d Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 13 Nov 2020 22:51:40 -0800
+Subject: mm/compaction: count pages and stop correctly during page isolation
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit 38935861d85a4d9a353d1dd5a156c97700e2765d upstream.
+
+In isolate_migratepages_block, when cc->alloc_contig is true, we are
+able to isolate compound pages. But nr_migratepages and nr_isolated did
+not count compound pages correctly, causing us to isolate more pages
+than we thought.
+
+So count compound pages as the number of base pages they contain.
+Otherwise, we might be trapped in too_many_isolated while loop, since
+the actual isolated pages can go up to COMPACT_CLUSTER_MAX*512=16384,
+where COMPACT_CLUSTER_MAX is 32, since we stop isolation after
+cc->nr_migratepages reaches to COMPACT_CLUSTER_MAX.
+
+In addition, after we fix the issue above, cc->nr_migratepages could
+never be equal to COMPACT_CLUSTER_MAX if compound pages are isolated,
+thus page isolation could not stop as we intended. Change the isolation
+stop condition to '>='.
+
+The issue can be triggered as follows:
+
+In a system with 16GB memory and an 8GB CMA region reserved by
+hugetlb_cma, if we first allocate 10GB THPs and mlock them (so some THPs
+are allocated in the CMA region and mlocked), reserving 6 1GB hugetlb
+pages via /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages will
+get stuck (looping in too_many_isolated function) until we kill either
+task. With the patch applied, oom will kill the application with 10GB
+THPs and let hugetlb page reservation finish.
+
+[ziy@nvidia.com: v3]
+
+Link: https://lkml.kernel.org/r/20201030183809.3616803-1-zi.yan@sent.com
+Fixes: 1da2f328fa64 ("cmm,thp,compaction,cma: allow THP migration for CMA allocations")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201029200435.3386066-1-zi.yan@sent.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1013,8 +1013,8 @@ isolate_migratepages_block(struct compac
+
+ isolate_success:
+ list_add(&page->lru, &cc->migratepages);
+- cc->nr_migratepages++;
+- nr_isolated++;
++ cc->nr_migratepages += compound_nr(page);
++ nr_isolated += compound_nr(page);
+
+ /*
+ * Avoid isolating too much unless this block is being
+@@ -1022,7 +1022,7 @@ isolate_success:
+ * or a lock is contended. For contention, isolate quickly to
+ * potentially remove one source of contention.
+ */
+- if (cc->nr_migratepages == COMPACT_CLUSTER_MAX &&
++ if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
+ !cc->rescan && !cc->contended) {
+ ++low_pfn;
+ break;
+@@ -1133,7 +1133,7 @@ isolate_migratepages_range(struct compac
+ if (!pfn)
+ break;
+
+- if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
++ if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
+ break;
+ }
+
--- /dev/null
+From d20bdd571ee5c9966191568527ecdb1bd4b52368 Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Fri, 13 Nov 2020 22:51:43 -0800
+Subject: mm/compaction: stop isolation if too many pages are isolated and we have pages to migrate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit d20bdd571ee5c9966191568527ecdb1bd4b52368 upstream.
+
+In isolate_migratepages_block, if we have too many isolated pages and
+nr_migratepages is not zero, we should try to migrate what we have
+without wasting time on isolating.
+
+In theory it's possible that multiple parallel compactions will cause
+too_many_isolated() to become true even if each has isolated less than
+COMPACT_CLUSTER_MAX, and loop forever in the while loop. Bailing
+immediately prevents that.
+
+[vbabka@suse.cz: changelog addition]
+
+Fixes: 1da2f328fa64 (“mm,thp,compaction,cma: allow THP migration for CMA allocations”)
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Link: https://lkml.kernel.org/r/20201030183809.3616803-2-zi.yan@sent.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -818,6 +818,10 @@ isolate_migratepages_block(struct compac
+ * delay for some time until fewer pages are isolated
+ */
+ while (unlikely(too_many_isolated(pgdat))) {
++ /* stop isolation if there are still pages not migrated */
++ if (cc->nr_migratepages)
++ return 0;
++
+ /* async migration should just abort */
+ if (cc->mode == MIGRATE_ASYNC)
+ return 0;
--- /dev/null
+From 96e1fac162cc0086c50b2b14062112adb2ba640e Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@ziepe.ca>
+Date: Fri, 13 Nov 2020 22:51:56 -0800
+Subject: mm/gup: use unpin_user_pages() in __gup_longterm_locked()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 96e1fac162cc0086c50b2b14062112adb2ba640e upstream.
+
+When FOLL_PIN is passed to __get_user_pages() the page list must be put
+back using unpin_user_pages() otherwise the page pin reference persists
+in a corrupted state.
+
+There are two places in the unwind of __gup_longterm_locked() that put
+the pages back without checking. Normally on error this function would
+return the partial page list making this the caller's responsibility,
+but in these two cases the caller is not allowed to see these pages at
+all.
+
+Fixes: 3faa52c03f44 ("mm/gup: track FOLL_PIN pages")
+Reported-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/0-v2-3ae7d9d162e2+2a7-gup_cma_fix_jgg@nvidia.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/gup.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1637,8 +1637,11 @@ check_again:
+ /*
+ * drop the above get_user_pages reference.
+ */
+- for (i = 0; i < nr_pages; i++)
+- put_page(pages[i]);
++ if (gup_flags & FOLL_PIN)
++ unpin_user_pages(pages, nr_pages);
++ else
++ for (i = 0; i < nr_pages; i++)
++ put_page(pages[i]);
+
+ if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+@@ -1718,8 +1721,11 @@ static long __gup_longterm_locked(struct
+ goto out;
+
+ if (check_dax_vmas(vmas_tmp, rc)) {
+- for (i = 0; i < rc; i++)
+- put_page(pages[i]);
++ if (gup_flags & FOLL_PIN)
++ unpin_user_pages(pages, rc);
++ else
++ for (i = 0; i < rc; i++)
++ put_page(pages[i]);
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
--- /dev/null
+From 22e4663e916321b72972c69ca0c6b962f529bd78 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 13 Nov 2020 22:51:53 -0800
+Subject: mm/slub: fix panic in slab_alloc_node()
+
+From: Laurent Dufour <ldufour@linux.ibm.com>
+
+commit 22e4663e916321b72972c69ca0c6b962f529bd78 upstream.
+
+While doing memory hot-unplug operation on a PowerPC VM running 1024 CPUs
+with 11TB of ram, I hit the following panic:
+
+ BUG: Kernel NULL pointer dereference on read at 0x00000007
+ Faulting instruction address: 0xc000000000456048
+ Oops: Kernel access of bad area, sig: 11 [#2]
+ LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS= 2048 NUMA pSeries
+ Modules linked in: rpadlpar_io rpaphp
+ CPU: 160 PID: 1 Comm: systemd Tainted: G D 5.9.0 #1
+ NIP: c000000000456048 LR: c000000000455fd4 CTR: c00000000047b350
+ REGS: c00006028d1b77a0 TRAP: 0300 Tainted: G D (5.9.0)
+ MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24004228 XER: 00000000
+ CFAR: c00000000000f1b0 DAR: 0000000000000007 DSISR: 40000000 IRQMASK: 0
+ GPR00: c000000000455fd4 c00006028d1b7a30 c000000001bec800 0000000000000000
+ GPR04: 0000000000000dc0 0000000000000000 00000000000374ef c00007c53df99320
+ GPR08: 000007c53c980000 0000000000000000 000007c53c980000 0000000000000000
+ GPR12: 0000000000004400 c00000001e8e4400 0000000000000000 0000000000000f6a
+ GPR16: 0000000000000000 c000000001c25930 c000000001d62528 00000000000000c1
+ GPR20: c000000001d62538 c00006be469e9000 0000000fffffffe0 c0000000003c0ff8
+ GPR24: 0000000000000018 0000000000000000 0000000000000dc0 0000000000000000
+ GPR28: c00007c513755700 c000000001c236a4 c00007bc4001f800 0000000000000001
+ NIP [c000000000456048] __kmalloc_node+0x108/0x790
+ LR [c000000000455fd4] __kmalloc_node+0x94/0x790
+ Call Trace:
+ kvmalloc_node+0x58/0x110
+ mem_cgroup_css_online+0x10c/0x270
+ online_css+0x48/0xd0
+ cgroup_apply_control_enable+0x2c4/0x470
+ cgroup_mkdir+0x408/0x5f0
+ kernfs_iop_mkdir+0x90/0x100
+ vfs_mkdir+0x138/0x250
+ do_mkdirat+0x154/0x1c0
+ system_call_exception+0xf8/0x200
+ system_call_common+0xf0/0x27c
+ Instruction dump:
+ e93e0000 e90d0030 39290008 7cc9402a e94d0030 e93e0000 7ce95214 7f89502a
+ 2fbc0000 419e0018 41920230 e9270010 <89290007> 7f994800 419e0220 7ee6bb78
+
+This pointing to the following code:
+
+ mm/slub.c:2851
+ if (unlikely(!object || !node_match(page, node))) {
+ c000000000456038: 00 00 bc 2f cmpdi cr7,r28,0
+ c00000000045603c: 18 00 9e 41 beq cr7,c000000000456054 <__kmalloc_node+0x114>
+ node_match():
+ mm/slub.c:2491
+ if (node != NUMA_NO_NODE && page_to_nid(page) != node)
+ c000000000456040: 30 02 92 41 beq cr4,c000000000456270 <__kmalloc_node+0x330>
+ page_to_nid():
+ include/linux/mm.h:1294
+ c000000000456044: 10 00 27 e9 ld r9,16(r7)
+ c000000000456048: 07 00 29 89 lbz r9,7(r9) <<<< r9 = NULL
+ node_match():
+ mm/slub.c:2491
+ c00000000045604c: 00 48 99 7f cmpw cr7,r25,r9
+ c000000000456050: 20 02 9e 41 beq cr7,c000000000456270 <__kmalloc_node+0x330>
+
+The panic occurred in slab_alloc_node() when checking for the page's node:
+
+ object = c->freelist;
+ page = c->page;
+ if (unlikely(!object || !node_match(page, node))) {
+ object = __slab_alloc(s, gfpflags, node, addr, c);
+ stat(s, ALLOC_SLOWPATH);
+
+The issue is that object is not NULL while page is NULL which is odd but
+may happen if the cache flush happened after loading object but before
+loading page. Thus checking for the page pointer is required too.
+
+The cache flush is done through an inter processor interrupt when a
+piece of memory is off-lined. That interrupt is triggered when a memory
+hot-unplug operation is initiated and offline_pages() is calling the
+slub's MEM_GOING_OFFLINE callback slab_mem_going_offline_callback()
+which is calling flush_cpu_slab(). If that interrupt is caught between
+the reading of c->freelist and the reading of c->page, this could lead
+to such a situation. That situation is expected and the later call to
+this_cpu_cmpxchg_double() will detect the change to c->freelist and redo
+the whole operation.
+
+In commit 6159d0f5c03e ("mm/slub.c: page is always non-NULL in
+node_match()") check on the page pointer has been removed assuming that
+page is always valid when it is called. It happens that this is not
+true in that particular case, so check for page before calling
+node_match() here.
+
+Fixes: 6159d0f5c03e ("mm/slub.c: page is always non-NULL in node_match()")
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Christoph Lameter <cl@linux.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: Scott Cheloha <cheloha@linux.ibm.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201027190406.33283-1-ldufour@linux.ibm.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2848,7 +2848,7 @@ redo:
+
+ object = c->freelist;
+ page = c->page;
+- if (unlikely(!object || !node_match(page, node))) {
++ if (unlikely(!object || !page || !node_match(page, node))) {
+ object = __slab_alloc(s, gfpflags, node, addr, c);
+ stat(s, ALLOC_SLOWPATH);
+ } else {
--- /dev/null
+From 2da9f6305f306ffbbb44790675799328fb73119d Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 13 Nov 2020 22:51:46 -0800
+Subject: mm/vmscan: fix NR_ISOLATED_FILE corruption on 64-bit
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 2da9f6305f306ffbbb44790675799328fb73119d upstream.
+
+Previously the negated unsigned long would be cast back to signed long
+which would have the correct negative value. After commit 730ec8c01a2b
+("mm/vmscan.c: change prototype for shrink_page_list"), the large
+unsigned int converts to a large positive signed long.
+
+Symptoms include CMA allocations hanging forever holding the cma_mutex
+due to alloc_contig_range->...->isolate_migratepages_block waiting
+forever in "while (unlikely(too_many_isolated(pgdat)))".
+
+[akpm@linux-foundation.org: fix -stat.nr_lazyfree_fail as well, per Michal]
+
+Fixes: 730ec8c01a2b ("mm/vmscan.c: change prototype for shrink_page_list")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vaneet Narang <v.narang@samsung.com>
+Cc: Maninder Singh <maninder1.s@samsung.com>
+Cc: Amit Sahrawat <a.sahrawat@samsung.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201029032320.1448441-1-npiggin@gmail.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1514,7 +1514,8 @@ unsigned int reclaim_clean_pages_from_li
+ nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
+ TTU_IGNORE_ACCESS, &stat, true);
+ list_splice(&clean_pages, page_list);
+- mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -nr_reclaimed);
++ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
++ -(long)nr_reclaimed);
+ /*
+ * Since lazyfree pages are isolated from file LRU from the beginning,
+ * they will rotate back to anonymous LRU in the end if it failed to
+@@ -1524,7 +1525,7 @@ unsigned int reclaim_clean_pages_from_li
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON,
+ stat.nr_lazyfree_fail);
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
+- -stat.nr_lazyfree_fail);
++ -(long)stat.nr_lazyfree_fail);
+ return nr_reclaimed;
+ }
+
--- /dev/null
+From f5785283dd64867a711ca1fb1f5bb172f252ecdf Mon Sep 17 00:00:00 2001
+From: Wengang Wang <wen.gang.wang@oracle.com>
+Date: Fri, 13 Nov 2020 22:52:23 -0800
+Subject: ocfs2: initialize ip_next_orphan
+
+From: Wengang Wang <wen.gang.wang@oracle.com>
+
+commit f5785283dd64867a711ca1fb1f5bb172f252ecdf upstream.
+
+Though problem if found on a lower 4.1.12 kernel, I think upstream has
+same issue.
+
+In one node in the cluster, there is the following callback trace:
+
+ # cat /proc/21473/stack
+ __ocfs2_cluster_lock.isra.36+0x336/0x9e0 [ocfs2]
+ ocfs2_inode_lock_full_nested+0x121/0x520 [ocfs2]
+ ocfs2_evict_inode+0x152/0x820 [ocfs2]
+ evict+0xae/0x1a0
+ iput+0x1c6/0x230
+ ocfs2_orphan_filldir+0x5d/0x100 [ocfs2]
+ ocfs2_dir_foreach_blk+0x490/0x4f0 [ocfs2]
+ ocfs2_dir_foreach+0x29/0x30 [ocfs2]
+ ocfs2_recover_orphans+0x1b6/0x9a0 [ocfs2]
+ ocfs2_complete_recovery+0x1de/0x5c0 [ocfs2]
+ process_one_work+0x169/0x4a0
+ worker_thread+0x5b/0x560
+ kthread+0xcb/0xf0
+ ret_from_fork+0x61/0x90
+
+The above stack is not reasonable, the final iput shouldn't happen in
+ocfs2_orphan_filldir() function. Looking at the code,
+
+ 2067 /* Skip inodes which are already added to recover list, since dio may
+ 2068 * happen concurrently with unlink/rename */
+ 2069 if (OCFS2_I(iter)->ip_next_orphan) {
+ 2070 iput(iter);
+ 2071 return 0;
+ 2072 }
+ 2073
+
+The logic thinks the inode is already in recover list on seeing
+ip_next_orphan is non-NULL, so it skip this inode after dropping a
+reference which incremented in ocfs2_iget().
+
+While, if the inode is already in recover list, it should have another
+reference and the iput() at line 2070 should not be the final iput
+(dropping the last reference). So I don't think the inode is really in
+the recover list (no vmcore to confirm).
+
+Note that ocfs2_queue_orphans(), though not shown up in the call back
+trace, is holding cluster lock on the orphan directory when looking up
+for unlinked inodes. The on disk inode eviction could involve a lot of
+IOs which may need long time to finish. That means this node could hold
+the cluster lock for very long time, that can lead to the lock requests
+(from other nodes) to the orhpan directory hang for long time.
+
+Looking at more on ip_next_orphan, I found it's not initialized when
+allocating a new ocfs2_inode_info structure.
+
+This causes te reflink operations from some nodes hang for very long
+time waiting for the cluster lock on the orphan directory.
+
+Fix: initialize ip_next_orphan as NULL.
+
+Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201109171746.27884-1-wen.gang.wang@oracle.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/super.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *
+
+ oi->ip_blkno = 0ULL;
+ oi->ip_clusters = 0;
++ oi->ip_next_orphan = NULL;
+
+ ocfs2_resv_init_once(&oi->ip_la_data_resv);
+
--- /dev/null
+From df5b0ab3e08a156701b537809914b339b0daa526 Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 13 Nov 2020 22:52:07 -0800
+Subject: reboot: fix overflow parsing reboot cpu number
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit df5b0ab3e08a156701b537809914b339b0daa526 upstream.
+
+Limit the CPU number to num_possible_cpus(), because setting it to a
+value lower than INT_MAX but higher than NR_CPUS produces the following
+error on reboot and shutdown:
+
+ BUG: unable to handle page fault for address: ffffffff90ab1bb0
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 1c09067 P4D 1c09067 PUD 1c0a063 PMD 0
+ Oops: 0000 [#1] SMP
+ CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 5.9.0-rc8-kvm #110
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
+ RIP: 0010:migrate_to_reboot_cpu+0xe/0x60
+ Code: ea ea 00 48 89 fa 48 c7 c7 30 57 f1 81 e9 fa ef ff ff 66 2e 0f 1f 84 00 00 00 00 00 53 8b 1d d5 ea ea 00 e8 14 33 fe ff 89 da <48> 0f a3 15 ea fc bd 00 48 89 d0 73 29 89 c2 c1 e8 06 65 48 8b 3c
+ RSP: 0018:ffffc90000013e08 EFLAGS: 00010246
+ RAX: ffff88801f0a0000 RBX: 0000000077359400 RCX: 0000000000000000
+ RDX: 0000000077359400 RSI: 0000000000000002 RDI: ffffffff81c199e0
+ RBP: ffffffff81c1e3c0 R08: ffff88801f41f000 R09: ffffffff81c1e348
+ R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+ R13: 00007f32bedf8830 R14: 00000000fee1dead R15: 0000000000000000
+ FS: 00007f32bedf8980(0000) GS:ffff88801f480000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: ffffffff90ab1bb0 CR3: 000000001d057000 CR4: 00000000000006a0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ __do_sys_reboot.cold+0x34/0x5b
+ do_syscall_64+0x2d/0x40
+
+Fixes: 1b3a5d02ee07 ("reboot: move arch/x86 reboot= handling to generic kernel")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Fabian Frederick <fabf@skynet.be>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Robin Holt <robinmholt@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201103214025.116799-3-mcroce@linux.microsoft.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/reboot.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/kernel/reboot.c
++++ b/kernel/reboot.c
+@@ -558,6 +558,13 @@ static int __init reboot_setup(char *str
+ reboot_cpu = simple_strtoul(str+3, NULL, 0);
+ else
+ *mode = REBOOT_SOFT;
++ if (reboot_cpu >= num_possible_cpus()) {
++ pr_err("Ignoring the CPU number in reboot= option. "
++ "CPU %d exceeds possible cpu number %d\n",
++ reboot_cpu, num_possible_cpus());
++ reboot_cpu = 0;
++ break;
++ }
+ break;
+
+ case 'g':
--- /dev/null
+From 8b92c4ff4423aa9900cf838d3294fcade4dbda35 Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 13 Nov 2020 22:52:02 -0800
+Subject: Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint"
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit 8b92c4ff4423aa9900cf838d3294fcade4dbda35 upstream.
+
+Patch series "fix parsing of reboot= cmdline", v3.
+
+The parsing of the reboot= cmdline has two major errors:
+
+ - a missing bound check can crash the system on reboot
+
+ - parsing of the cpu number only works if specified last
+
+Fix both.
+
+This patch (of 2):
+
+This reverts commit 616feab753972b97.
+
+kstrtoint() and simple_strtoul() have a subtle difference which makes
+them non interchangeable: if a non digit character is found amid the
+parsing, the former will return an error, while the latter will just
+stop parsing, e.g. simple_strtoul("123xyx") = 123.
+
+The kernel cmdline reboot= argument allows to specify the CPU used for
+rebooting, with the syntax `s####` among the other flags, e.g.
+"reboot=warm,s31,force", so if this flag is not the last given, it's
+silently ignored as well as the subsequent ones.
+
+Fixes: 616feab75397 ("kernel/reboot.c: convert simple_strtoul to kstrtoint")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Guenter Roeck <linux@roeck-us.net>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Robin Holt <robinmholt@gmail.com>
+Cc: Fabian Frederick <fabf@skynet.be>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201103214025.116799-2-mcroce@linux.microsoft.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/reboot.c | 21 +++++++--------------
+ 1 file changed, 7 insertions(+), 14 deletions(-)
+
+--- a/kernel/reboot.c
++++ b/kernel/reboot.c
+@@ -551,22 +551,15 @@ static int __init reboot_setup(char *str
+ break;
+
+ case 's':
+- {
+- int rc;
+-
+- if (isdigit(*(str+1))) {
+- rc = kstrtoint(str+1, 0, &reboot_cpu);
+- if (rc)
+- return rc;
+- } else if (str[1] == 'm' && str[2] == 'p' &&
+- isdigit(*(str+3))) {
+- rc = kstrtoint(str+3, 0, &reboot_cpu);
+- if (rc)
+- return rc;
+- } else
++ if (isdigit(*(str+1)))
++ reboot_cpu = simple_strtoul(str+1, NULL, 0);
++ else if (str[1] == 'm' && str[2] == 'p' &&
++ isdigit(*(str+3)))
++ reboot_cpu = simple_strtoul(str+3, NULL, 0);
++ else
+ *mode = REBOOT_SOFT;
+ break;
+- }
++
+ case 'g':
+ *mode = REBOOT_GPIO;
+ break;
revert-usb-musb-convert-to-devm_platform_ioremap_resource_byname.patch
usb-cdc-acm-add-disable_echo-for-renesas-usb-download-mode.patch
usb-typec-ucsi-report-power-supply-changes.patch
+xhci-hisilicon-fix-refercence-leak-in-xhci_histb_probe.patch
+virtio-virtio_console-fix-dma-memory-allocation-for-rproc-serial.patch
+mei-protect-mei_cl_mtu-from-null-dereference.patch
+futex-don-t-enable-irqs-unconditionally-in-put_pi_state.patch
+jbd2-fix-up-sparse-warnings-in-checkpoint-code.patch
+bootconfig-extend-the-magic-check-range-to-the-preceding-3-bytes.patch
+mm-compaction-count-pages-and-stop-correctly-during-page-isolation.patch
+mm-compaction-stop-isolation-if-too-many-pages-are-isolated-and-we-have-pages-to-migrate.patch
+mm-slub-fix-panic-in-slab_alloc_node.patch
+mm-vmscan-fix-nr_isolated_file-corruption-on-64-bit.patch
+mm-gup-use-unpin_user_pages-in-__gup_longterm_locked.patch
+compiler.h-fix-barrier_data-on-clang.patch
+revert-kernel-reboot.c-convert-simple_strtoul-to-kstrtoint.patch
+reboot-fix-overflow-parsing-reboot-cpu-number.patch
+hugetlbfs-fix-anon-huge-page-migration-race.patch
+ocfs2-initialize-ip_next_orphan.patch
--- /dev/null
+From 9d516aa82b7d4fbe7f6303348697960ba03a530b Mon Sep 17 00:00:00 2001
+From: Alexander Lobakin <alobakin@pm.me>
+Date: Wed, 4 Nov 2020 15:31:36 +0000
+Subject: virtio: virtio_console: fix DMA memory allocation for rproc serial
+
+From: Alexander Lobakin <alobakin@pm.me>
+
+commit 9d516aa82b7d4fbe7f6303348697960ba03a530b upstream.
+
+Since commit 086d08725d34 ("remoteproc: create vdev subdevice with
+specific dma memory pool"), every remoteproc has a DMA subdevice
+("remoteprocX#vdevYbuffer") for each virtio device, which inherits
+DMA capabilities from the corresponding platform device. This allowed
+to associate different DMA pools with each vdev, and required from
+virtio drivers to perform DMA operations with the parent device
+(vdev->dev.parent) instead of grandparent (vdev->dev.parent->parent).
+
+virtio_rpmsg_bus was already changed in the same merge cycle with
+commit d999b622fcfb ("rpmsg: virtio: allocate buffer from parent"),
+but virtio_console did not. In fact, operations using the grandparent
+worked fine while the grandparent was the platform device, but since
+commit c774ad010873 ("remoteproc: Fix and restore the parenting
+hierarchy for vdev") this was changed, and now the grandparent device
+is the remoteproc device without any DMA capabilities.
+So, starting v5.8-rc1 the following warning is observed:
+
+[ 2.483925] ------------[ cut here ]------------
+[ 2.489148] WARNING: CPU: 3 PID: 101 at kernel/dma/mapping.c:427 0x80e7eee8
+[ 2.489152] Modules linked in: virtio_console(+)
+[ 2.503737] virtio_rpmsg_bus rpmsg_core
+[ 2.508903]
+[ 2.528898] <Other modules, stack and call trace here>
+[ 2.913043]
+[ 2.914907] ---[ end trace 93ac8746beab612c ]---
+[ 2.920102] virtio-ports vport1p0: Error allocating inbufs
+
+kernel/dma/mapping.c:427 is:
+
+WARN_ON_ONCE(!dev->coherent_dma_mask);
+
+obviously because the grandparent now is remoteproc dev without any
+DMA caps:
+
+[ 3.104943] Parent: remoteproc0#vdev1buffer, grandparent: remoteproc0
+
+Fix this the same way as it was for virtio_rpmsg_bus, using just the
+parent device (vdev->dev.parent, "remoteprocX#vdevYbuffer") for DMA
+operations.
+This also allows now to reserve DMA pools/buffers for rproc serial
+via Device Tree.
+
+Fixes: c774ad010873 ("remoteproc: Fix and restore the parenting hierarchy for vdev")
+Cc: stable@vger.kernel.org # 5.1+
+Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Alexander Lobakin <alobakin@pm.me>
+Date: Thu, 5 Nov 2020 11:10:24 +0800
+Link: https://lore.kernel.org/r/AOKowLclCbOCKxyiJ71WeNyuAAj2q8EUtxrXbyky5E@cp7-web-042.plabs.ch
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/char/virtio_console.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/virtio_console.c
++++ b/drivers/char/virtio_console.c
+@@ -435,12 +435,12 @@ static struct port_buffer *alloc_buf(str
+ /*
+ * Allocate DMA memory from ancestor. When a virtio
+ * device is created by remoteproc, the DMA memory is
+- * associated with the grandparent device:
+- * vdev => rproc => platform-dev.
++ * associated with the parent device:
++ * virtioY => remoteprocX#vdevYbuffer.
+ */
+- if (!vdev->dev.parent || !vdev->dev.parent->parent)
++ buf->dev = vdev->dev.parent;
++ if (!buf->dev)
+ goto free_buf;
+- buf->dev = vdev->dev.parent->parent;
+
+ /* Increase device refcnt to avoid freeing it */
+ get_device(buf->dev);
--- /dev/null
+From 76255470ffa2795a44032e8b3c1ced11d81aa2db Mon Sep 17 00:00:00 2001
+From: Zhang Qilong <zhangqilong3@huawei.com>
+Date: Fri, 6 Nov 2020 20:22:21 +0800
+Subject: xhci: hisilicon: fix refercence leak in xhci_histb_probe
+
+From: Zhang Qilong <zhangqilong3@huawei.com>
+
+commit 76255470ffa2795a44032e8b3c1ced11d81aa2db upstream.
+
+pm_runtime_get_sync() will increment pm usage at first and it
+will resume the device later. We should decrease the usage count
+whetever it succeeded or failed(maybe runtime of the device has
+error, or device is in inaccessible state, or other error state).
+If we do not call put operation to decrease the reference, it will
+result in reference leak in xhci_histb_probe. Moreover, this
+device cannot enter the idle state and always stay busy or other
+non-idle state later. So we fixed it by jumping to error handling
+branch.
+
+Fixes: c508f41da0788 ("xhci: hisilicon: support HiSilicon STB xHCI host controller")
+Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
+Link: https://lore.kernel.org/r/20201106122221.2304528-1-zhangqilong3@huawei.com
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci-histb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/usb/host/xhci-histb.c
++++ b/drivers/usb/host/xhci-histb.c
+@@ -240,7 +240,7 @@ static int xhci_histb_probe(struct platf
+ /* Initialize dma_mask and coherent_dma_mask to 32-bits */
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+ if (ret)
+- return ret;
++ goto disable_pm;
+
+ hcd = usb_create_hcd(driver, dev, dev_name(dev));
+ if (!hcd) {