--- /dev/null
+From 1642b4450d20e31439c80c28256c8eee08684698 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 30 Dec 2020 21:34:14 +0000
+Subject: io_uring: add a helper for setting a ref node
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 1642b4450d20e31439c80c28256c8eee08684698 upstream.
+
+Setting a new reference node to a file data is not trivial, don't repeat
+it, add and use a helper.
+
+Cc: stable@vger.kernel.org # 5.6+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6991,6 +6991,16 @@ static void io_file_ref_kill(struct perc
+ complete(&data->done);
+ }
+
++static void io_sqe_files_set_node(struct fixed_file_data *file_data,
++ struct fixed_file_ref_node *ref_node)
++{
++ spin_lock(&file_data->lock);
++ file_data->node = ref_node;
++ list_add_tail(&ref_node->node, &file_data->ref_list);
++ spin_unlock(&file_data->lock);
++ percpu_ref_get(&file_data->refs);
++}
++
+ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+ {
+ struct fixed_file_data *data = ctx->file_data;
+@@ -7519,11 +7529,7 @@ static int io_sqe_files_register(struct
+ return PTR_ERR(ref_node);
+ }
+
+- file_data->node = ref_node;
+- spin_lock(&file_data->lock);
+- list_add_tail(&ref_node->node, &file_data->ref_list);
+- spin_unlock(&file_data->lock);
+- percpu_ref_get(&file_data->refs);
++ io_sqe_files_set_node(file_data, ref_node);
+ return ret;
+ out_fput:
+ for (i = 0; i < ctx->nr_user_files; i++) {
+@@ -7679,11 +7685,7 @@ static int __io_sqe_files_update(struct
+
+ if (needs_switch) {
+ percpu_ref_kill(&data->node->refs);
+- spin_lock(&data->lock);
+- list_add_tail(&ref_node->node, &data->ref_list);
+- data->node = ref_node;
+- spin_unlock(&data->lock);
+- percpu_ref_get(&ctx->file_data->refs);
++ io_sqe_files_set_node(data, ref_node);
+ } else
+ destroy_fixed_file_ref_node(ref_node);
+
--- /dev/null
+From 77788775c7132a8d93c6930ab1bd84fc743c7cb7 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 29 Dec 2020 10:50:46 -0700
+Subject: io_uring: don't assume mm is constant across submits
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 77788775c7132a8d93c6930ab1bd84fc743c7cb7 upstream.
+
+If we COW the identity, we assume that ->mm never changes. But this
+isn't true of multiple processes end up sharing the ring. Hence treat
+id->mm like like any other process compontent when it comes to the
+identity mapping. This is pretty trivial, just moving the existing grab
+into io_grab_identity(), and including a check for the match.
+
+Cc: stable@vger.kernel.org # 5.10
+Fixes: 1e6fa5216a0e ("io_uring: COW io_identity on mismatch")
+Reported-by: Christian Brauner <christian.brauner@ubuntu.com>:
+Tested-by: Christian Brauner <christian.brauner@ubuntu.com>:
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1369,6 +1369,13 @@ static bool io_grab_identity(struct io_k
+ spin_unlock_irq(&ctx->inflight_lock);
+ req->work.flags |= IO_WQ_WORK_FILES;
+ }
++ if (!(req->work.flags & IO_WQ_WORK_MM) &&
++ (def->work_flags & IO_WQ_WORK_MM)) {
++ if (id->mm != current->mm)
++ return false;
++ mmgrab(id->mm);
++ req->work.flags |= IO_WQ_WORK_MM;
++ }
+
+ return true;
+ }
+@@ -1393,13 +1400,6 @@ static void io_prep_async_work(struct io
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ }
+
+- /* ->mm can never change on us */
+- if (!(req->work.flags & IO_WQ_WORK_MM) &&
+- (def->work_flags & IO_WQ_WORK_MM)) {
+- mmgrab(id->mm);
+- req->work.flags |= IO_WQ_WORK_MM;
+- }
+-
+ /* if we fail grabbing identity, we must COW, regrab, and retry */
+ if (io_grab_identity(req))
+ return;
--- /dev/null
+From 1ffc54220c444774b7f09e6d2121e732f8e19b94 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 30 Dec 2020 21:34:15 +0000
+Subject: io_uring: fix io_sqe_files_unregister() hangs
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 1ffc54220c444774b7f09e6d2121e732f8e19b94 upstream.
+
+io_sqe_files_unregister() uninterruptibly waits for enqueued ref nodes,
+however requests keeping them may never complete, e.g. because of some
+userspace dependency. Make sure it's interruptible otherwise it would
+hang forever.
+
+Cc: stable@vger.kernel.org # 5.6+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/io_uring.c | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -941,6 +941,10 @@ enum io_mem_account {
+ ACCT_PINNED,
+ };
+
++static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
++static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
++ struct io_ring_ctx *ctx);
++
+ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+ struct io_comp_state *cs);
+ static void io_cqring_fill_event(struct io_kiocb *req, long res);
+@@ -7004,11 +7008,15 @@ static void io_sqe_files_set_node(struct
+ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+ {
+ struct fixed_file_data *data = ctx->file_data;
+- struct fixed_file_ref_node *ref_node = NULL;
++ struct fixed_file_ref_node *backup_node, *ref_node = NULL;
+ unsigned nr_tables, i;
++ int ret;
+
+ if (!data)
+ return -ENXIO;
++ backup_node = alloc_fixed_file_ref_node(ctx);
++ if (!backup_node)
++ return -ENOMEM;
+
+ spin_lock(&data->lock);
+ ref_node = data->node;
+@@ -7020,7 +7028,18 @@ static int io_sqe_files_unregister(struc
+
+ /* wait for all refs nodes to complete */
+ flush_delayed_work(&ctx->file_put_work);
+- wait_for_completion(&data->done);
++ do {
++ ret = wait_for_completion_interruptible(&data->done);
++ if (!ret)
++ break;
++ ret = io_run_task_work_sig();
++ if (ret < 0) {
++ percpu_ref_resurrect(&data->refs);
++ reinit_completion(&data->done);
++ io_sqe_files_set_node(data, backup_node);
++ return ret;
++ }
++ } while (1);
+
+ __io_sqe_files_unregister(ctx);
+ nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
+@@ -7031,6 +7050,7 @@ static int io_sqe_files_unregister(struc
+ kfree(data);
+ ctx->file_data = NULL;
+ ctx->nr_user_files = 0;
++ destroy_fixed_file_ref_node(backup_node);
+ return 0;
+ }
+
--- /dev/null
+From b1b6b5a30dce872f500dc43f067cba8e7f86fc7d Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 30 Dec 2020 21:34:16 +0000
+Subject: kernel/io_uring: cancel io_uring before task works
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit b1b6b5a30dce872f500dc43f067cba8e7f86fc7d upstream.
+
+For cancelling io_uring requests it needs either to be able to run
+currently enqueued task_works or having it shut down by that moment.
+Otherwise io_uring_cancel_files() may be waiting for requests that won't
+ever complete.
+
+Go with the first way and do cancellations before setting PF_EXITING and
+so before putting the task_work infrastructure into a transition state
+where task_work_run() would better not be called.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/file.c | 2 --
+ kernel/exit.c | 2 ++
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -21,7 +21,6 @@
+ #include <linux/rcupdate.h>
+ #include <linux/close_range.h>
+ #include <net/sock.h>
+-#include <linux/io_uring.h>
+
+ unsigned int sysctl_nr_open __read_mostly = 1024*1024;
+ unsigned int sysctl_nr_open_min = BITS_PER_LONG;
+@@ -453,7 +452,6 @@ void exit_files(struct task_struct *tsk)
+ struct files_struct * files = tsk->files;
+
+ if (files) {
+- io_uring_files_cancel(files);
+ task_lock(tsk);
+ tsk->files = NULL;
+ task_unlock(tsk);
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -63,6 +63,7 @@
+ #include <linux/random.h>
+ #include <linux/rcuwait.h>
+ #include <linux/compat.h>
++#include <linux/io_uring.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/unistd.h>
+@@ -762,6 +763,7 @@ void __noreturn do_exit(long code)
+ schedule();
+ }
+
++ io_uring_files_cancel(tsk->files);
+ exit_signals(tsk); /* sets PF_EXITING */
+
+ /* sync mm's RSS info before statistics gathering */
--- /dev/null
+From f0bb29e8c4076444d32df00c8d32e169ceecf283 Mon Sep 17 00:00:00 2001
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Tue, 29 Dec 2020 15:15:01 -0800
+Subject: lib/zlib: fix inflating zlib streams on s390
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit f0bb29e8c4076444d32df00c8d32e169ceecf283 upstream.
+
+Decompressing zlib streams on s390 fails with "incorrect data check"
+error.
+
+Userspace zlib checks inflate_state.flags in order to byteswap checksums
+only for zlib streams, and s390 hardware inflate code, which was ported
+from there, tries to match this behavior. At the same time, kernel zlib
+does not use inflate_state.flags, so it contains essentially random
+values. For many use cases either zlib stream is zeroed out or checksum
+is not used, so this problem is masked, but at least SquashFS is still
+affected.
+
+Fix by always passing a checksum to and from the hardware as is, which
+matches zlib_inflate()'s expectations.
+
+Link: https://lkml.kernel.org/r/20201215155551.894884-1-iii@linux.ibm.com
+Fixes: 126196100063 ("lib/zlib: add s390 hardware support for kernel zlib_inflate")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
+Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Mikhail Zaslonko <zaslonko@linux.ibm.com>
+Cc: <stable@vger.kernel.org> [5.6+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/zlib_dfltcc/dfltcc_inflate.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/lib/zlib_dfltcc/dfltcc_inflate.c
++++ b/lib/zlib_dfltcc/dfltcc_inflate.c
+@@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate(
+ param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1);
+ if (param->hl)
+ param->nt = 0; /* Honor history for the first block */
+- param->cv = state->flags ? REVERSE(state->check) : state->check;
++ param->cv = state->check;
+
+ /* Inflate */
+ do {
+@@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate(
+ state->bits = param->sbb;
+ state->whave = param->hl;
+ state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
+- state->check = state->flags ? REVERSE(param->cv) : param->cv;
++ state->check = param->cv;
+ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
+ /* Report an error if stream is corrupted */
+ state->mode = BAD;
--- /dev/null
+From e7dd91c456a8cdbcd7066997d15e36d14276a949 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Tue, 29 Dec 2020 15:14:25 -0800
+Subject: mm/hugetlb: fix deadlock in hugetlb_cow error path
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit e7dd91c456a8cdbcd7066997d15e36d14276a949 upstream.
+
+syzbot reported the deadlock here [1]. The issue is in hugetlb cow
+error handling when there are not enough huge pages for the faulting
+task which took the original reservation. It is possible that other
+(child) tasks could have consumed pages associated with the reservation.
+In this case, we want the task which took the original reservation to
+succeed. So, we unmap any associated pages in children so that they can
+be used by the faulting task that owns the reservation.
+
+The unmapping code needs to hold i_mmap_rwsem in write mode. However,
+due to commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd
+sharing synchronization") we are already holding i_mmap_rwsem in read
+mode when hugetlb_cow is called.
+
+Technically, i_mmap_rwsem does not need to be held in read mode for COW
+mappings as they can not share pmd's. Modifying the fault code to not
+take i_mmap_rwsem in read mode for COW (and other non-sharable) mappings
+is too involved for a stable fix.
+
+Instead, we simply drop the hugetlb_fault_mutex and i_mmap_rwsem before
+unmapping. This is OK as it is technically not needed. They are
+reacquired after unmapping as expected by calling code. Since this is
+done in an uncommon error path, the overhead of dropping and reacquiring
+mutexes is acceptable.
+
+While making changes, remove redundant BUG_ON after unmap_ref_private.
+
+[1] https://lkml.kernel.org/r/000000000000b73ccc05b5cf8558@google.com
+
+Link: https://lkml.kernel.org/r/4c5781b8-3b00-761e-c0c7-c5edebb6ec1a@oracle.com
+Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reported-by: syzbot+5eee4145df3c15e96625@syzkaller.appspotmail.com
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c | 22 +++++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4106,10 +4106,30 @@ retry_avoidcopy:
+ * may get SIGKILLed if it later faults.
+ */
+ if (outside_reserve) {
++ struct address_space *mapping = vma->vm_file->f_mapping;
++ pgoff_t idx;
++ u32 hash;
++
+ put_page(old_page);
+ BUG_ON(huge_pte_none(pte));
++ /*
++ * Drop hugetlb_fault_mutex and i_mmap_rwsem before
++ * unmapping. unmapping needs to hold i_mmap_rwsem
++ * in write mode. Dropping i_mmap_rwsem in read mode
++ * here is OK as COW mappings do not interact with
++ * PMD sharing.
++ *
++ * Reacquire both after unmap operation.
++ */
++ idx = vma_hugecache_offset(h, vma, haddr);
++ hash = hugetlb_fault_mutex_hash(mapping, idx);
++ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
++ i_mmap_unlock_read(mapping);
++
+ unmap_ref_private(mm, vma, old_page, haddr);
+- BUG_ON(huge_pte_none(pte));
++
++ i_mmap_lock_read(mapping);
++ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ spin_lock(ptl);
+ ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
+ if (likely(ptep &&
--- /dev/null
+From dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Tue, 29 Dec 2020 15:14:37 -0800
+Subject: mm: memmap defer init doesn't work as expected
+
+From: Baoquan He <bhe@redhat.com>
+
+commit dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 upstream.
+
+VMware observed a performance regression during memmap init on their
+platform, and bisected to commit 73a6e474cb376 ("mm: memmap_init:
+iterate over memblock regions rather that check each PFN") causing it.
+
+Before the commit:
+
+ [0.033176] Normal zone: 1445888 pages used for memmap
+ [0.033176] Normal zone: 89391104 pages, LIFO batch:63
+ [0.035851] ACPI: PM-Timer IO Port: 0x448
+
+With commit
+
+ [0.026874] Normal zone: 1445888 pages used for memmap
+ [0.026875] Normal zone: 89391104 pages, LIFO batch:63
+ [2.028450] ACPI: PM-Timer IO Port: 0x448
+
+The root cause is the current memmap defer init doesn't work as expected.
+
+Before, memmap_init_zone() was used to do memmap init of one whole zone,
+to initialize all low zones of one numa node, but defer memmap init of
+the last zone in that numa node. However, since commit 73a6e474cb376,
+function memmap_init() is adapted to iterater over memblock regions
+inside one zone, then call memmap_init_zone() to do memmap init for each
+region.
+
+E.g, on VMware's system, the memory layout is as below, there are two
+memory regions in node 2. The current code will mistakenly initialize the
+whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to
+iniatialize only one memmory section on the 2nd region [mem
+0x10000000000-0x1033fffffff]. In fact, we only expect to see that there's
+only one memory section's memmap initialized. That's why more time is
+costed at the time.
+
+[ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff]
+[ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff]
+[ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff]
+[ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff]
+[ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff]
+[ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff]
+
+Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass
+down the real zone end pfn so that defer_init() can use it to judge
+whether defer need be taken in zone wide.
+
+Link: https://lkml.kernel.org/r/20201223080811.16211-1-bhe@redhat.com
+Link: https://lkml.kernel.org/r/20201223080811.16211-2-bhe@redhat.com
+Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN")
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Reported-by: Rahul Gopakumar <gopakumarr@vmware.com>
+Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/ia64/mm/init.c | 4 ++--
+ include/linux/mm.h | 5 +++--
+ mm/memory_hotplug.c | 2 +-
+ mm/page_alloc.c | 8 +++++---
+ 4 files changed, 11 insertions(+), 8 deletions(-)
+
+--- a/arch/ia64/mm/init.c
++++ b/arch/ia64/mm/init.c
+@@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end,
+
+ if (map_start < map_end)
+ memmap_init_zone((unsigned long)(map_end - map_start),
+- args->nid, args->zone, page_to_pfn(map_start),
++ args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+ return 0;
+ }
+@@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid
+ unsigned long start_pfn)
+ {
+ if (!vmem_map) {
+- memmap_init_zone(size, nid, zone, start_pfn,
++ memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+ } else {
+ struct page *start;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2439,8 +2439,9 @@ extern int __meminit __early_pfn_to_nid(
+ #endif
+
+ extern void set_dma_reserve(unsigned long new_dma_reserve);
+-extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
+- enum meminit_context, struct vmem_altmap *, int migratetype);
++extern void memmap_init_zone(unsigned long, int, unsigned long,
++ unsigned long, unsigned long, enum meminit_context,
++ struct vmem_altmap *, int migratetype);
+ extern void setup_per_zone_wmarks(void);
+ extern int __meminit init_per_zone_wmark_min(void);
+ extern void mem_init(void);
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct
+ * expects the zone spans the pfn range. All the pages in the range
+ * are reserved so nobody should be touching them so we should be safe
+ */
+- memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
++ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
+ MEMINIT_HOTPLUG, altmap, migratetype);
+
+ set_zone_contiguous(zone);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -448,6 +448,8 @@ defer_init(int nid, unsigned long pfn, u
+ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
+ return false;
+
++ if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
++ return true;
+ /*
+ * We start only with one section of pages, more pages are added as
+ * needed until the rest of deferred pages are initialized.
+@@ -6050,7 +6052,7 @@ overlap_memmap_init(unsigned long zone,
+ * zone stats (e.g., nr_isolate_pageblock) are touched.
+ */
+ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+- unsigned long start_pfn,
++ unsigned long start_pfn, unsigned long zone_end_pfn,
+ enum meminit_context context,
+ struct vmem_altmap *altmap, int migratetype)
+ {
+@@ -6086,7 +6088,7 @@ void __meminit memmap_init_zone(unsigned
+ if (context == MEMINIT_EARLY) {
+ if (overlap_memmap_init(zone, &pfn))
+ continue;
+- if (defer_init(nid, pfn, end_pfn))
++ if (defer_init(nid, pfn, zone_end_pfn))
+ break;
+ }
+
+@@ -6200,7 +6202,7 @@ void __meminit __weak memmap_init(unsign
+
+ if (end_pfn > start_pfn) {
+ size = end_pfn - start_pfn;
+- memmap_init_zone(size, nid, zone, start_pfn,
++ memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+ }
+ }
--- /dev/null
+From 0e1d9ca1766f5d95fb881f57b6c4a1ffa63d4648 Mon Sep 17 00:00:00 2001
+From: Viresh Kumar <viresh.kumar@linaro.org>
+Date: Mon, 28 Dec 2020 10:51:04 +0530
+Subject: opp: Call the missing clk_put() on error
+
+From: Viresh Kumar <viresh.kumar@linaro.org>
+
+commit 0e1d9ca1766f5d95fb881f57b6c4a1ffa63d4648 upstream.
+
+Fix the clock reference counting by calling the missing clk_put() in the
+error path.
+
+Cc: v5.10 <stable@vger.kernel.org> # v5.10
+Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER")
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/opp/core.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/opp/core.c
++++ b/drivers/opp/core.c
+@@ -1111,7 +1111,7 @@ static struct opp_table *_allocate_opp_t
+ ret = dev_pm_opp_of_find_icc_paths(dev, opp_table);
+ if (ret) {
+ if (ret == -EPROBE_DEFER)
+- goto remove_opp_dev;
++ goto put_clk;
+
+ dev_warn(dev, "%s: Error finding interconnect paths: %d\n",
+ __func__, ret);
+@@ -1125,6 +1125,9 @@ static struct opp_table *_allocate_opp_t
+ list_add(&opp_table->node, &opp_tables);
+ return opp_table;
+
++put_clk:
++ if (!IS_ERR(opp_table->clk))
++ clk_put(opp_table->clk);
+ remove_opp_dev:
+ _remove_opp_dev(opp_dev, opp_table);
+ err:
--- /dev/null
+From 976509bb310b913d30577f15b58bdd30effb0542 Mon Sep 17 00:00:00 2001
+From: Quanyang Wang <quanyang.wang@windriver.com>
+Date: Thu, 24 Dec 2020 18:49:27 +0800
+Subject: opp: fix memory leak in _allocate_opp_table
+
+From: Quanyang Wang <quanyang.wang@windriver.com>
+
+commit 976509bb310b913d30577f15b58bdd30effb0542 upstream.
+
+In function _allocate_opp_table, opp_dev is allocated and referenced
+by opp_table via _add_opp_dev. But in the case that the subsequent calls
+return -EPROBE_DEFER, it will jump to err label and opp_table will be
+freed. Then opp_dev becomes an unreferenced object to cause memory leak.
+So let's call _remove_opp_dev to do the cleanup.
+
+This fixes the following kmemleak report:
+
+unreferenced object 0xffff000801524a00 (size 128):
+ comm "swapper/0", pid 1, jiffies 4294892465 (age 84.616s)
+ hex dump (first 32 bytes):
+ 40 00 56 01 08 00 ff ff 40 00 56 01 08 00 ff ff @.V.....@.V.....
+ b8 52 77 7f 08 00 ff ff 00 3c 4c 00 08 00 ff ff .Rw......<L.....
+ backtrace:
+ [<00000000b1289fb1>] kmemleak_alloc+0x30/0x40
+ [<0000000056da48f0>] kmem_cache_alloc+0x3d4/0x588
+ [<00000000a84b3b0e>] _add_opp_dev+0x2c/0x88
+ [<0000000062a380cd>] _add_opp_table_indexed+0x124/0x268
+ [<000000008b4c8f1f>] dev_pm_opp_of_add_table+0x20/0x1d8
+ [<00000000e5316798>] dev_pm_opp_of_cpumask_add_table+0x48/0xf0
+ [<00000000db0a8ec2>] dt_cpufreq_probe+0x20c/0x448
+ [<0000000030a3a26c>] platform_probe+0x68/0xd8
+ [<00000000c618e78d>] really_probe+0xd0/0x3a0
+ [<00000000642e856f>] driver_probe_device+0x58/0xb8
+ [<00000000f10f5307>] device_driver_attach+0x74/0x80
+ [<0000000004f254b8>] __driver_attach+0x58/0xe0
+ [<0000000009d5d19e>] bus_for_each_dev+0x70/0xc8
+ [<0000000000d22e1c>] driver_attach+0x24/0x30
+ [<0000000001d4e952>] bus_add_driver+0x14c/0x1f0
+ [<0000000089928aaa>] driver_register+0x64/0x120
+
+Cc: v5.10 <stable@vger.kernel.org> # v5.10
+Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER")
+Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
+[ Viresh: Added the stable tag ]
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/opp/core.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/opp/core.c
++++ b/drivers/opp/core.c
+@@ -1102,7 +1102,7 @@ static struct opp_table *_allocate_opp_t
+ if (IS_ERR(opp_table->clk)) {
+ ret = PTR_ERR(opp_table->clk);
+ if (ret == -EPROBE_DEFER)
+- goto err;
++ goto remove_opp_dev;
+
+ dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret);
+ }
+@@ -1111,7 +1111,7 @@ static struct opp_table *_allocate_opp_t
+ ret = dev_pm_opp_of_find_icc_paths(dev, opp_table);
+ if (ret) {
+ if (ret == -EPROBE_DEFER)
+- goto err;
++ goto remove_opp_dev;
+
+ dev_warn(dev, "%s: Error finding interconnect paths: %d\n",
+ __func__, ret);
+@@ -1125,6 +1125,8 @@ static struct opp_table *_allocate_opp_t
+ list_add(&opp_table->node, &opp_tables);
+ return opp_table;
+
++remove_opp_dev:
++ _remove_opp_dev(opp_dev, opp_table);
+ err:
+ kfree(opp_table);
+ return ERR_PTR(ret);
--- /dev/null
+From fa4d0f1992a96f6d7c988ef423e3127e613f6ac9 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Tue, 8 Dec 2020 21:29:44 -0800
+Subject: scsi: block: Fix a race in the runtime power management code
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit fa4d0f1992a96f6d7c988ef423e3127e613f6ac9 upstream.
+
+With the current implementation the following race can happen:
+
+ * blk_pre_runtime_suspend() calls blk_freeze_queue_start() and
+ blk_mq_unfreeze_queue().
+
+ * blk_queue_enter() calls blk_queue_pm_only() and that function returns
+ true.
+
+ * blk_queue_enter() calls blk_pm_request_resume() and that function does
+ not call pm_request_resume() because the queue runtime status is
+ RPM_ACTIVE.
+
+ * blk_pre_runtime_suspend() changes the queue status into RPM_SUSPENDING.
+
+Fix this race by changing the queue runtime status into RPM_SUSPENDING
+before switching q_usage_counter to atomic mode.
+
+Link: https://lore.kernel.org/r/20201209052951.16136-2-bvanassche@acm.org
+Fixes: 986d413b7c15 ("blk-mq: Enable support for runtime power management")
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: stable <stable@vger.kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Acked-by: Alan Stern <stern@rowland.harvard.edu>
+Acked-by: Stanley Chu <stanley.chu@mediatek.com>
+Co-developed-by: Can Guo <cang@codeaurora.org>
+Signed-off-by: Can Guo <cang@codeaurora.org>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-pm.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/block/blk-pm.c
++++ b/block/blk-pm.c
+@@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct reque
+
+ WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE);
+
++ spin_lock_irq(&q->queue_lock);
++ q->rpm_status = RPM_SUSPENDING;
++ spin_unlock_irq(&q->queue_lock);
++
+ /*
+ * Increase the pm_only counter before checking whether any
+ * non-PM blk_queue_enter() calls are in progress to avoid that any
+@@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct reque
+ /* Switch q_usage_counter back to per-cpu mode. */
+ blk_mq_unfreeze_queue(q);
+
+- spin_lock_irq(&q->queue_lock);
+- if (ret < 0)
++ if (ret < 0) {
++ spin_lock_irq(&q->queue_lock);
++ q->rpm_status = RPM_ACTIVE;
+ pm_runtime_mark_last_busy(q->dev);
+- else
+- q->rpm_status = RPM_SUSPENDING;
+- spin_unlock_irq(&q->queue_lock);
++ spin_unlock_irq(&q->queue_lock);
+
+- if (ret)
+ blk_clear_pm_only(q);
++ }
+
+ return ret;
+ }
io_uring-close-a-small-race-gap-for-files-cancel.patch
jffs2-allow-setting-rp_size-to-zero-during-remountin.patch
jffs2-fix-null-pointer-dereference-in-rp_size-fs-opt.patch
+spi-dw-bt1-fix-undefined-devm_mux_control_get-symbol.patch
+opp-fix-memory-leak-in-_allocate_opp_table.patch
+opp-call-the-missing-clk_put-on-error.patch
+scsi-block-fix-a-race-in-the-runtime-power-management-code.patch
+mm-hugetlb-fix-deadlock-in-hugetlb_cow-error-path.patch
+mm-memmap-defer-init-doesn-t-work-as-expected.patch
+lib-zlib-fix-inflating-zlib-streams-on-s390.patch
+io_uring-don-t-assume-mm-is-constant-across-submits.patch
+io_uring-add-a-helper-for-setting-a-ref-node.patch
+io_uring-fix-io_sqe_files_unregister-hangs.patch
+kernel-io_uring-cancel-io_uring-before-task-works.patch
--- /dev/null
+From 7218838109fef61cdec988ff728e902d434c9cc5 Mon Sep 17 00:00:00 2001
+From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
+Date: Fri, 27 Nov 2020 17:46:11 +0300
+Subject: spi: dw-bt1: Fix undefined devm_mux_control_get symbol
+
+From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
+
+commit 7218838109fef61cdec988ff728e902d434c9cc5 upstream.
+
+I mistakenly added the select attributes to the SPI_DW_BT1_DIRMAP config
+instead of having them defined in SPI_DW_BT1. If the kernel doesn't have
+the MULTIPLEXER and MUX_MMIO configs manually enabled and the
+SPI_DW_BT1_DIRMAP config hasn't been selected, Baikal-T1 SPI device will
+always fail to be probed by the driver. Fix that and the error reported by
+the test robot:
+
+>> ld.lld: error: undefined symbol: devm_mux_control_get
+ >>> referenced by spi-dw-bt1.c
+ >>> spi/spi-dw-bt1.o:(dw_spi_bt1_sys_init) in archive drivers/built-in.a
+
+by moving the MULTIPLEXER/MUX_MMIO configs selection to the SPI_DW_BT1
+config.
+
+Link: https://lore.kernel.org/lkml/202011161745.uYRlekse-lkp@intel.com/
+Link: https://lore.kernel.org/linux-spi/20201116040721.8001-1-rdunlap@infradead.org/
+Fixes: abf00907538e ("spi: dw: Add Baikal-T1 SPI Controller glue driver")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Ramil Zaripov <Ramil.Zaripov@baikalelectronics.ru>
+Link: https://lore.kernel.org/r/20201127144612.4204-1-Sergey.Semin@baikalelectronics.ru
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/Kconfig | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/spi/Kconfig
++++ b/drivers/spi/Kconfig
+@@ -256,6 +256,7 @@ config SPI_DW_BT1
+ tristate "Baikal-T1 SPI driver for DW SPI core"
+ depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+ select MULTIPLEXER
++ select MUX_MMIO
+ help
+ Baikal-T1 SoC is equipped with three DW APB SSI-based MMIO SPI
+ controllers. Two of them are pretty much normal: with IRQ, DMA,
+@@ -269,8 +270,6 @@ config SPI_DW_BT1
+ config SPI_DW_BT1_DIRMAP
+ bool "Directly mapped Baikal-T1 Boot SPI flash support"
+ depends on SPI_DW_BT1
+- select MULTIPLEXER
+- select MUX_MMIO
+ help
+ Directly mapped SPI flash memory is an interface specific to the
+ Baikal-T1 System Boot Controller. It is a 16MB MMIO region, which