5.0-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)
diff --git a/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch b/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch

new file mode 100644 (file)

index 0000000..4716ecf
--- /dev/null
+++ b/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch
@@ -0,0 +1,40 @@
+From d4d18e3ec6091843f607e8929a56723e28f393a6 Mon Sep 17 00:00:00 2001
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+Date: Wed, 17 Apr 2019 21:29:29 -0700
+Subject: arm64: mm: Ensure tail of unaligned initrd is reserved
+
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+
+commit d4d18e3ec6091843f607e8929a56723e28f393a6 upstream.
+
+In the event that the start address of the initrd is not aligned, but
+has an aligned size, the base + size will not cover the entire initrd
+image and there is a chance that the kernel will corrupt the tail of the
+image.
+
+By aligning the end of the initrd to a page boundary and then
+subtracting the adjusted start address the memblock reservation will
+cover all pages that contains the initrd.
+
+Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size")
+Cc: stable@vger.kernel.org
+Acked-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/init.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -406,7 +406,7 @@ void __init arm64_memblock_init(void)
+                * Otherwise, this is a no-op
+                */
+               u64 base = phys_initrd_start & PAGE_MASK;
+-              u64 size = PAGE_ALIGN(phys_initrd_size);
++              u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
+ 
+               /*
+                * We can only add back the initrd memory if we don't end up
diff --git a/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch b/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch

new file mode 100644 (file)

index 0000000..d95edac
--- /dev/null
+++ b/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch
@@ -0,0 +1,39 @@
+From b257b48cd5830c5b1d0c347eb281f9c28056f881 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 15 Apr 2019 14:37:34 +0800
+Subject: crypto: lrw - Fix atomic sleep when walking skcipher
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit b257b48cd5830c5b1d0c347eb281f9c28056f881 upstream.
+
+When we perform a walk in the completion function, we need to ensure
+that it is atomic.
+
+Fixes: ac3c8f36c31d ("crypto: lrw - Do not use auxiliary buffer")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Ondrej Mosnacek <omosnace@redhat.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/lrw.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/crypto/lrw.c
++++ b/crypto/lrw.c
+@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_asy
+ {
+       struct skcipher_request *req = areq->data;
+ 
+-      if (!err)
++      if (!err) {
++              struct rctx *rctx = skcipher_request_ctx(req);
++
++              rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+               err = xor_tweak_post(req);
++      }
+ 
+       skcipher_request_complete(req, err);
+ }
diff --git a/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch b/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch

new file mode 100644 (file)

index 0000000..4c26560
--- /dev/null
+++ b/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch
@@ -0,0 +1,40 @@
+From 44427c0fbc09b448b22410978a4ef6ee37599d25 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 15 Apr 2019 14:35:19 +0800
+Subject: crypto: xts - Fix atomic sleep when walking skcipher
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 44427c0fbc09b448b22410978a4ef6ee37599d25 upstream.
+
+When we perform a walk in the completion function, we need to ensure
+that it is atomic.
+
+Reported-by: syzbot+6f72c20560060c98b566@syzkaller.appspotmail.com
+Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Ondrej Mosnacek <omosnace@redhat.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/xts.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/crypto/xts.c
++++ b/crypto/xts.c
+@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_asy
+ {
+       struct skcipher_request *req = areq->data;
+ 
+-      if (!err)
++      if (!err) {
++              struct rctx *rctx = skcipher_request_ctx(req);
++
++              rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+               err = xor_tweak_post(req);
++      }
+ 
+       skcipher_request_complete(req, err);
+ }
diff --git a/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch b/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch

new file mode 100644 (file)

index 0000000..5e1759d
--- /dev/null
+++ b/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch
@@ -0,0 +1,34 @@
+From 102bbe34b31c9159e714432afd64458f6f3876d7 Mon Sep 17 00:00:00 2001
+From: Baolin Wang <baolin.wang@linaro.org>
+Date: Wed, 10 Apr 2019 15:47:54 +0800
+Subject: gpio: eic: sprd: Fix incorrect irq type setting for the sync EIC
+
+From: Baolin Wang <baolin.wang@linaro.org>
+
+commit 102bbe34b31c9159e714432afd64458f6f3876d7 upstream.
+
+When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the
+SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals.
+
+Thus this patch fixes the issue.
+
+Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpio/gpio-eic-sprd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpio/gpio-eic-sprd.c
++++ b/drivers/gpio/gpio-eic-sprd.c
+@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct
+                       irq_set_handler_locked(data, handle_edge_irq);
+                       break;
+               case IRQ_TYPE_EDGE_BOTH:
++                      sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
+                       sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
+                       irq_set_handler_locked(data, handle_edge_irq);
+                       break;
diff --git a/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch b/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch

new file mode 100644 (file)

index 0000000..5ce5a4b
--- /dev/null
+++ b/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch
@@ -0,0 +1,83 @@
+From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001
+From: Josh Collier <josh.d.collier@intel.com>
+Date: Mon, 15 Apr 2019 11:34:22 -0700
+Subject: IB/rdmavt: Fix frwr memory registration
+
+From: Josh Collier <josh.d.collier@intel.com>
+
+commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream.
+
+Current implementation was not properly handling frwr memory
+registrations. This was uncovered by commit 27f26cec761das ("xprtrdma:
+Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is
+used for NFS over RDMA, started failing as it was the first ULP to modify
+the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR
+when attempting to perform RDMA Writes to the client.
+
+The fix is to properly capture the true iova, offset, and length in the
+call to ib_map_mr_sg, and then update the iova when processing the
+IB_WR_REG_MEM on the send queue.
+
+Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Josh Collier <josh.d.collier@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/drivers/infiniband/sw/rdmavt/mr.c
++++ b/drivers/infiniband/sw/rdmavt/mr.c
+@@ -611,11 +611,6 @@ static int rvt_set_page(struct ib_mr *ib
+       if (unlikely(mapped_segs == mr->mr.max_segs))
+               return -ENOMEM;
+ 
+-      if (mr->mr.length == 0) {
+-              mr->mr.user_base = addr;
+-              mr->mr.iova = addr;
+-      }
+-
+       m = mapped_segs / RVT_SEGSZ;
+       n = mapped_segs % RVT_SEGSZ;
+       mr->mr.map[m]->segs[n].vaddr = (void *)addr;
+@@ -633,17 +628,24 @@ static int rvt_set_page(struct ib_mr *ib
+  * @sg_nents: number of entries in sg
+  * @sg_offset: offset in bytes into sg
+  *
++ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
++ *
+  * Return: number of sg elements mapped to the memory region
+  */
+ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+                 int sg_nents, unsigned int *sg_offset)
+ {
+       struct rvt_mr *mr = to_imr(ibmr);
++      int ret;
+ 
+       mr->mr.length = 0;
+       mr->mr.page_shift = PAGE_SHIFT;
+-      return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
+-                            rvt_set_page);
++      ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
++      mr->mr.user_base = ibmr->iova;
++      mr->mr.iova = ibmr->iova;
++      mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
++      mr->mr.length = (size_t)ibmr->length;
++      return ret;
+ }
+ 
+ /**
+@@ -674,6 +676,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, s
+       ibmr->rkey = key;
+       mr->mr.lkey = key;
+       mr->mr.access_flags = access;
++      mr->mr.iova = ibmr->iova;
+       atomic_set(&mr->mr.lkey_invalid, 0);
+ 
+       return 0;
diff --git a/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch b/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch

new file mode 100644 (file)

index 0000000..e486ad0
--- /dev/null
+++ b/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch
@@ -0,0 +1,43 @@
+From ae3d6a323347940f0548bbb4b17f0bb2e9164169 Mon Sep 17 00:00:00 2001
+From: YueHaibing <yuehaibing@huawei.com>
+Date: Thu, 25 Apr 2019 22:23:44 -0700
+Subject: lib/Kconfig.debug: fix build error without CONFIG_BLOCK
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+commit ae3d6a323347940f0548bbb4b17f0bb2e9164169 upstream.
+
+If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and
+BTRFS can not be compiled successly.
+
+Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com
+Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Joe Lawrence <joe.lawrence@redhat.com>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/Kconfig.debug |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1952,6 +1952,7 @@ config TEST_KMOD
+       depends on m
+       depends on BLOCK && (64BIT || LBDAF)      # for XFS, BTRFS
+       depends on NETDEVICES && NET_CORE && INET # for TUN
++      depends on BLOCK
+       select TEST_LKM
+       select XFS_FS
+       select TUN
diff --git a/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch b/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch

new file mode 100644 (file)

index 0000000..d16d140
--- /dev/null
+++ b/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch
@@ -0,0 +1,53 @@
+From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001
+From: Aurelien Jarno <aurelien@aurel32.net>
+Date: Tue, 9 Apr 2019 16:53:55 +0200
+Subject: MIPS: scall64-o32: Fix indirect syscall number load
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Aurelien Jarno <aurelien@aurel32.net>
+
+commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream.
+
+Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32))
+added indirect syscall detection for O32 processes running on MIPS64,
+but it did not work correctly for big endian kernel/processes. The
+reason is that the syscall number is loaded from ARG1 using the lw
+instruction while this is a 64-bit value, so zero is loaded instead of
+the syscall number.
+
+Fix the code by using the ld instruction instead. When running a 32-bit
+processes on a 64 bit CPU, the values are properly sign-extended, so it
+ensures the value passed to syscall_trace_enter is correct.
+
+Recent systemd versions with seccomp enabled whitelist the getpid
+syscall for their internal  processes (e.g. systemd-journald), but call
+it through syscall(SYS_getpid). This fix therefore allows O32 big endian
+systems with a 64-bit kernel to run recent systemd versions.
+
+Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
+Cc: <stable@vger.kernel.org> # v3.15+
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: James Hogan <jhogan@kernel.org>
+Cc: linux-mips@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/kernel/scall64-o32.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -125,7 +125,7 @@ trace_a_syscall:
+       subu    t1, v0,  __NR_O32_Linux
+       move    a1, v0
+       bnez    t1, 1f /* __NR_syscall at offset 0 */
+-      lw      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
++      ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+       .set    pop
+ 
+ 1:    jal     syscall_trace_enter
diff --git a/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch b/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch

new file mode 100644 (file)

index 0000000..06a0874
--- /dev/null
+++ b/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch
@@ -0,0 +1,109 @@
+From 24512228b7a3f412b5a51f189df302616b021c33 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@techsingularity.net>
+Date: Thu, 25 Apr 2019 22:23:51 -0700
+Subject: mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+commit 24512228b7a3f412b5a51f189df302616b021c33 upstream.
+
+Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small
+amounts of memory when an external fragmentation event occurs") "broke"
+memory management on parisc.
+
+The machine is not NUMA but the DISCONTIG model creates three pgdats
+even though it's a UMA machine for the following ranges
+
+        0) Start 0x0000000000000000 End 0x000000003fffffff Size   1024 MB
+        1) Start 0x0000000100000000 End 0x00000001bfdfffff Size   3070 MB
+        2) Start 0x0000004040000000 End 0x00000040ffffffff Size   3072 MB
+
+Mikulas reported:
+
+       With the patch 1c30844d2, the kernel will incorrectly reclaim the
+       first zone when it fills up, ignoring the fact that there are two
+       completely free zones. Basiscally, it limits cache size to 1GiB.
+
+       For example, if I run:
+       # dd if=/dev/sda of=/dev/null bs=1M count=2048
+
+       - with the proper kernel, there should be "Buffers - 2GiB"
+       when this command finishes. With the patch 1c30844d2, buffers
+       will consume just 1GiB or slightly more, because the kernel was
+       incorrectly reclaiming them.
+
+The page allocator and reclaim makes assumptions that pgdats really
+represent NUMA nodes and zones represent ranges and makes decisions on
+that basis.  Watermark boosting for small pgdats leads to unexpected
+results even though this would have behaved reasonably on SPARSEMEM.
+
+DISCONTIG is essentially deprecated and even parisc plans to move to
+SPARSEMEM so there is no need to be fancy, this patch simply disables
+watermark boosting by default on DISCONTIGMEM.
+
+Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net
+Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Reported-by: Mikulas Patocka <mpatocka@redhat.com>
+Tested-by: Mikulas Patocka <mpatocka@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/sysctl/vm.txt |   16 ++++++++--------
+ mm/page_alloc.c             |   13 +++++++++++++
+ 2 files changed, 21 insertions(+), 8 deletions(-)
+
+--- a/Documentation/sysctl/vm.txt
++++ b/Documentation/sysctl/vm.txt
+@@ -866,14 +866,14 @@ The intent is that compaction has less w
+ increase the success rate of future high-order allocations such as SLUB
+ allocations, THP and hugetlbfs pages.
+ 
+-To make it sensible with respect to the watermark_scale_factor parameter,
+-the unit is in fractions of 10,000. The default value of 15,000 means
+-that up to 150% of the high watermark will be reclaimed in the event of
+-a pageblock being mixed due to fragmentation. The level of reclaim is
+-determined by the number of fragmentation events that occurred in the
+-recent past. If this value is smaller than a pageblock then a pageblocks
+-worth of pages will be reclaimed (e.g.  2MB on 64-bit x86). A boost factor
+-of 0 will disable the feature.
++To make it sensible with respect to the watermark_scale_factor
++parameter, the unit is in fractions of 10,000. The default value of
++15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
++watermark will be reclaimed in the event of a pageblock being mixed due
++to fragmentation. The level of reclaim is determined by the number of
++fragmentation events that occurred in the recent past. If this value is
++smaller than a pageblock then a pageblocks worth of pages will be reclaimed
++(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
+ 
+ =============================================================
+ 
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page
+ 
+ int min_free_kbytes = 1024;
+ int user_min_free_kbytes = -1;
++#ifdef CONFIG_DISCONTIGMEM
++/*
++ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
++ * are not on separate NUMA nodes. Functionally this works but with
++ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
++ * quite small. By default, do not boost watermarks on discontigmem as in
++ * many cases very high-order allocations like THP are likely to be
++ * unsupported and the premature reclaim offsets the advantage of long-term
++ * fragmentation avoidance.
++ */
++int watermark_boost_factor __read_mostly;
++#else
+ int watermark_boost_factor __read_mostly = 15000;
++#endif
+ int watermark_scale_factor = 10;
+ 
+ static unsigned long nr_kernel_pages __initdata;
diff --git a/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch b/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch

new file mode 100644 (file)

index 0000000..3561322
--- /dev/null
+++ b/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch
@@ -0,0 +1,41 @@
+From c660133c339f9ab684fdf568c0d51b9ae5e86002 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@mellanox.com>
+Date: Tue, 16 Apr 2019 14:07:25 +0300
+Subject: RDMA/mlx5: Do not allow the user to write to the clock page
+
+From: Jason Gunthorpe <jgg@mellanox.com>
+
+commit c660133c339f9ab684fdf568c0d51b9ae5e86002 upstream.
+
+The intent of this VMA was to be read-only from user space, but the
+VM_MAYWRITE masking was missed, so mprotect could make it writable.
+
+Cc: stable@vger.kernel.org
+Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space")
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Reviewed-by: Haggai Eran <haggaie@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx5/main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -1982,6 +1982,7 @@ static int mlx5_ib_mmap_clock_info_page(
+ 
+       if (vma->vm_flags & VM_WRITE)
+               return -EPERM;
++      vma->vm_flags &= ~VM_MAYWRITE;
+ 
+       if (!dev->mdev->clock_info_page)
+               return -EOPNOTSUPP;
+@@ -2147,6 +2148,7 @@ static int mlx5_ib_mmap(struct ib_uconte
+ 
+               if (vma->vm_flags & VM_WRITE)
+                       return -EPERM;
++              vma->vm_flags &= ~VM_MAYWRITE;
+ 
+               /* Don't expose to user-space information it shouldn't have */
+               if (PAGE_SIZE > 4096)
diff --git a/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch b/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch

new file mode 100644 (file)

index 0000000..c48104d
--- /dev/null
+++ b/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch
@@ -0,0 +1,44 @@
+From d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@mellanox.com>
+Date: Tue, 16 Apr 2019 14:07:26 +0300
+Subject: RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages
+
+From: Jason Gunthorpe <jgg@mellanox.com>
+
+commit d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 upstream.
+
+Since mlx5 supports device disassociate it must use this API for all
+BAR page mmaps, otherwise the pages can remain mapped after the device
+is unplugged causing a system crash.
+
+Cc: stable@vger.kernel.org
+Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Reviewed-by: Haggai Eran <haggaie@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx5/main.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -2154,14 +2154,12 @@ static int mlx5_ib_mmap(struct ib_uconte
+               if (PAGE_SIZE > 4096)
+                       return -EOPNOTSUPP;
+ 
+-              vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+               pfn = (dev->mdev->iseg_base +
+                      offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+                       PAGE_SHIFT;
+-              if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+-                                     PAGE_SIZE, vma->vm_page_prot))
+-                      return -EAGAIN;
+-              break;
++              return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
++                                       PAGE_SIZE,
++                                       pgprot_noncached(vma->vm_page_prot));
+       case MLX5_IB_MMAP_CLOCK_INFO:
+               return mlx5_ib_mmap_clock_info_page(dev, vma, context);
+ 
diff --git a/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch b/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch

new file mode 100644 (file)

index 0000000..5d6aabd
--- /dev/null
+++ b/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch
@@ -0,0 +1,139 @@
+From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@mellanox.com>
+Date: Tue, 16 Apr 2019 14:07:28 +0300
+Subject: RDMA/ucontext: Fix regression with disassociate
+
+From: Jason Gunthorpe <jgg@mellanox.com>
+
+commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream.
+
+When this code was consolidated the intention was that the VMA would
+become backed by anonymous zero pages after the zap_vma_pte - however this
+very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED
+bits to transform the VMA into an anonymous VMA. Since the vm_ops was
+removed this broke.
+
+Now userspace gets a SIGBUS if it touches the vma after disassociation.
+
+Instead of converting the VMA to anonymous provide a fault handler that
+puts a zero'd page into the VMA when user-space touches it after
+disassociation.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
+Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/uverbs.h      |    1 
+ drivers/infiniband/core/uverbs_main.c |   52 ++++++++++++++++++++++++++++++++--
+ 2 files changed, 50 insertions(+), 3 deletions(-)
+
+--- a/drivers/infiniband/core/uverbs.h
++++ b/drivers/infiniband/core/uverbs.h
+@@ -160,6 +160,7 @@ struct ib_uverbs_file {
+ 
+       struct mutex umap_lock;
+       struct list_head umaps;
++      struct page *disassociate_page;
+ 
+       struct idr              idr;
+       /* spinlock protects write access to idr */
+--- a/drivers/infiniband/core/uverbs_main.c
++++ b/drivers/infiniband/core/uverbs_main.c
+@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref
+               kref_put(&file->async_file->ref,
+                        ib_uverbs_release_async_event_file);
+       put_device(&file->device->dev);
++
++      if (file->disassociate_page)
++              __free_pages(file->disassociate_page, 0);
+       kfree(file);
+ }
+ 
+@@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_ar
+       kfree(priv);
+ }
+ 
++/*
++ * Once the zap_vma_ptes has been called touches to the VMA will come here and
++ * we return a dummy writable zero page for all the pfns.
++ */
++static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
++{
++      struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
++      struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
++      vm_fault_t ret = 0;
++
++      if (!priv)
++              return VM_FAULT_SIGBUS;
++
++      /* Read only pages can just use the system zero page. */
++      if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
++              vmf->page = ZERO_PAGE(vmf->vm_start);
++              get_page(vmf->page);
++              return 0;
++      }
++
++      mutex_lock(&ufile->umap_lock);
++      if (!ufile->disassociate_page)
++              ufile->disassociate_page =
++                      alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
++
++      if (ufile->disassociate_page) {
++              /*
++               * This VMA is forced to always be shared so this doesn't have
++               * to worry about COW.
++               */
++              vmf->page = ufile->disassociate_page;
++              get_page(vmf->page);
++      } else {
++              ret = VM_FAULT_SIGBUS;
++      }
++      mutex_unlock(&ufile->umap_lock);
++
++      return ret;
++}
++
+ static const struct vm_operations_struct rdma_umap_ops = {
+       .open = rdma_umap_open,
+       .close = rdma_umap_close,
++      .fault = rdma_umap_fault,
+ };
+ 
+ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
+@@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_
+       struct ib_uverbs_file *ufile = ucontext->ufile;
+       struct rdma_umap_priv *priv;
+ 
++      if (!(vma->vm_flags & VM_SHARED))
++              return ERR_PTR(-EINVAL);
++
+       if (vma->vm_end - vma->vm_start != size)
+               return ERR_PTR(-EINVAL);
+ 
+@@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struc
+                * at a time to get the lock ordering right. Typically there
+                * will only be one mm, so no big deal.
+                */
+-              down_write(&mm->mmap_sem);
++              down_read(&mm->mmap_sem);
+               if (!mmget_still_valid(mm))
+                       goto skip_mm;
+               mutex_lock(&ufile->umap_lock);
+@@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struc
+ 
+                       zap_vma_ptes(vma, vma->vm_start,
+                                    vma->vm_end - vma->vm_start);
+-                      vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+               }
+               mutex_unlock(&ufile->umap_lock);
+       skip_mm:
+-              up_write(&mm->mmap_sem);
++              up_read(&mm->mmap_sem);
+               mmput(mm);
+       }
+ }
diff --git a/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch b/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch

new file mode 100644 (file)

index 0000000..be88784
--- /dev/null
+++ b/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch
@@ -0,0 +1,53 @@
+From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001
+From: Xie XiuQi <xiexiuqi@huawei.com>
+Date: Sat, 20 Apr 2019 16:34:16 +0800
+Subject: sched/numa: Fix a possible divide-by-zero
+
+From: Xie XiuQi <xiexiuqi@huawei.com>
+
+commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream.
+
+sched_clock_cpu() may not be consistent between CPUs. If a task
+migrates to another CPU, then se.exec_start is set to that CPU's
+rq_clock_task() by update_stats_curr_start(). Specifically, the new
+value might be before the old value due to clock skew.
+
+So then if in numa_get_avg_runtime() the expression:
+
+  'now - p->last_task_numa_placement'
+
+ends up as -1, then the divider '*period + 1' in task_numa_placement()
+is 0 and things go bang. Similar to update_curr(), check if time goes
+backwards to avoid this.
+
+[ peterz: Wrote new changelog. ]
+[ mingo: Tweaked the code comment. ]
+
+Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: cj.chengjian@huawei.com
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1994,6 +1994,10 @@ static u64 numa_get_avg_runtime(struct t
+       if (p->last_task_numa_placement) {
+               delta = runtime - p->last_sum_exec_runtime;
+               *period = now - p->last_task_numa_placement;
++
++              /* Avoid time going backwards, prevent potential divide error: */
++              if (unlikely((s64)*period < 0))
++                      *period = 0;
+       } else {
+               delta = p->se.avg.load_sum;
+               *period = LOAD_AVG_MAX;
diff --git a/queue-5.0/series b/queue-5.0/series

index bcb7950173fea87887ad69fb2e16eb5c58eb739b..0e28fa524cc34c16b9e55e267e305097adcd8cd6 100644 (file)
--- a/queue-5.0/series
+++ b/queue-5.0/series
@@ -9,3 +9,18 @@ cifs-fix-memory-leak-in-smb2_read.patch
  cifs-fix-page-reference-leak-with-readv-writev.patch
  cifs-do-not-attempt-cifs-operation-on-smb2-rename-error.patch
  tracing-fix-a-memory-leak-by-early-error-exit-in-trace_pid_write.patch
+tracing-fix-buffer_ref-pipe-ops.patch
+crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch
+crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch
+gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch
+zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch
+lib-kconfig.debug-fix-build-error-without-config_block.patch
+mips-scall64-o32-fix-indirect-syscall-number-load.patch
+trace-fix-preempt_enable_no_resched-abuse.patch
+mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch
+arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch
+ib-rdmavt-fix-frwr-memory-registration.patch
+rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch
+rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch
+rdma-ucontext-fix-regression-with-disassociate.patch
+sched-numa-fix-a-possible-divide-by-zero.patch
diff --git a/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch b/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch

new file mode 100644 (file)

index 0000000..338f15c
--- /dev/null
+++ b/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch
@@ -0,0 +1,48 @@
+From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 23 Apr 2019 22:03:18 +0200
+Subject: trace: Fix preempt_enable_no_resched() abuse
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream.
+
+Unless the very next line is schedule(), or implies it, one must not use
+preempt_enable_no_resched(). It can cause a preemption to go missing and
+thereby cause arbitrary delays, breaking the PREEMPT=y invariant.
+
+Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net
+
+Cc: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: the arch/x86 maintainers <x86@kernel.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: stable@vger.kernel.org
+Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ring_buffer.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -776,7 +776,7 @@ u64 ring_buffer_time_stamp(struct ring_b
+ 
+       preempt_disable_notrace();
+       time = rb_time_stamp(buffer);
+-      preempt_enable_no_resched_notrace();
++      preempt_enable_notrace();
+ 
+       return time;
+ }
diff --git a/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch b/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch

new file mode 100644 (file)

index 0000000..ec89407
--- /dev/null
+++ b/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch
@@ -0,0 +1,140 @@
+From b987222654f84f7b4ca95b3a55eca784cb30235b Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Thu, 4 Apr 2019 23:59:25 +0200
+Subject: tracing: Fix buffer_ref pipe ops
+
+From: Jann Horn <jannh@google.com>
+
+commit b987222654f84f7b4ca95b3a55eca784cb30235b upstream.
+
+This fixes multiple issues in buffer_pipe_buf_ops:
+
+ - The ->steal() handler must not return zero unless the pipe buffer has
+   the only reference to the page. But generic_pipe_buf_steal() assumes
+   that every reference to the pipe is tracked by the page's refcount,
+   which isn't true for these buffers - buffer_pipe_buf_get(), which
+   duplicates a buffer, doesn't touch the page's refcount.
+   Fix it by using generic_pipe_buf_nosteal(), which refuses every
+   attempted theft. It should be easy to actually support ->steal, but the
+   only current users of pipe_buf_steal() are the virtio console and FUSE,
+   and they also only use it as an optimization. So it's probably not worth
+   the effort.
+ - The ->get() and ->release() handlers can be invoked concurrently on pipe
+   buffers backed by the same struct buffer_ref. Make them safe against
+   concurrency by using refcount_t.
+ - The pointers stored in ->private were only zeroed out when the last
+   reference to the buffer_ref was dropped. As far as I know, this
+   shouldn't be necessary anyway, but if we do it, let's always do it.
+
+Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com
+
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: stable@vger.kernel.org
+Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer")
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/splice.c               |    4 ++--
+ include/linux/pipe_fs_i.h |    1 +
+ kernel/trace/trace.c      |   28 ++++++++++++++--------------
+ 3 files changed, 17 insertions(+), 16 deletions(-)
+
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -333,8 +333,8 @@ const struct pipe_buf_operations default
+       .get = generic_pipe_buf_get,
+ };
+ 
+-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+-                                  struct pipe_buffer *buf)
++int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
++                           struct pipe_buffer *buf)
+ {
+       return 1;
+ }
+--- a/include/linux/pipe_fs_i.h
++++ b/include/linux/pipe_fs_i.h
+@@ -181,6 +181,7 @@ void free_pipe_info(struct pipe_inode_in
+ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+ int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
+ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
++int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
+ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+ void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
+ 
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -6823,19 +6823,23 @@ struct buffer_ref {
+       struct ring_buffer      *buffer;
+       void                    *page;
+       int                     cpu;
+-      int                     ref;
++      refcount_t              refcount;
+ };
+ 
++static void buffer_ref_release(struct buffer_ref *ref)
++{
++      if (!refcount_dec_and_test(&ref->refcount))
++              return;
++      ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
++      kfree(ref);
++}
++
+ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
+                                   struct pipe_buffer *buf)
+ {
+       struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+ 
+-      if (--ref->ref)
+-              return;
+-
+-      ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+-      kfree(ref);
++      buffer_ref_release(ref);
+       buf->private = 0;
+ }
+ 
+@@ -6844,7 +6848,7 @@ static void buffer_pipe_buf_get(struct p
+ {
+       struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+ 
+-      ref->ref++;
++      refcount_inc(&ref->refcount);
+ }
+ 
+ /* Pipe buffer operations for a buffer. */
+@@ -6852,7 +6856,7 @@ static const struct pipe_buf_operations
+       .can_merge              = 0,
+       .confirm                = generic_pipe_buf_confirm,
+       .release                = buffer_pipe_buf_release,
+-      .steal                  = generic_pipe_buf_steal,
++      .steal                  = generic_pipe_buf_nosteal,
+       .get                    = buffer_pipe_buf_get,
+ };
+ 
+@@ -6865,11 +6869,7 @@ static void buffer_spd_release(struct sp
+       struct buffer_ref *ref =
+               (struct buffer_ref *)spd->partial[i].private;
+ 
+-      if (--ref->ref)
+-              return;
+-
+-      ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+-      kfree(ref);
++      buffer_ref_release(ref);
+       spd->partial[i].private = 0;
+ }
+ 
+@@ -6924,7 +6924,7 @@ tracing_buffers_splice_read(struct file
+                       break;
+               }
+ 
+-              ref->ref = 1;
++              refcount_set(&ref->refcount, 1);
+               ref->buffer = iter->trace_buffer->buffer;
+               ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
+               if (IS_ERR(ref->page)) {
diff --git a/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch b/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch

new file mode 100644 (file)

index 0000000..446cc75
--- /dev/null
+++ b/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch
@@ -0,0 +1,67 @@
+From e153abc0739ff77bd89c9ba1688cdb963464af97 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
+Date: Thu, 25 Apr 2019 22:23:41 -0700
+Subject: zram: pass down the bvec we need to read into in the work struct
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jérôme Glisse <jglisse@redhat.com>
+
+commit e153abc0739ff77bd89c9ba1688cdb963464af97 upstream.
+
+When scheduling work item to read page we need to pass down the proper
+bvec struct which points to the page to read into.  Before this patch it
+uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is
+wrong.
+
+Note that without this patch on arch/kernel where PAGE_SIZE != 4096
+userspace could read random memory through a zram block device (thought
+userspace probably would have no control on the address being read).
+
+Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com
+Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: Nitin Gupta <ngupta@vflare.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -774,18 +774,18 @@ struct zram_work {
+       struct zram *zram;
+       unsigned long entry;
+       struct bio *bio;
++      struct bio_vec bvec;
+ };
+ 
+ #if PAGE_SIZE != 4096
+ static void zram_sync_read(struct work_struct *work)
+ {
+-      struct bio_vec bvec;
+       struct zram_work *zw = container_of(work, struct zram_work, work);
+       struct zram *zram = zw->zram;
+       unsigned long entry = zw->entry;
+       struct bio *bio = zw->bio;
+ 
+-      read_from_bdev_async(zram, &bvec, entry, bio);
++      read_from_bdev_async(zram, &zw->bvec, entry, bio);
+ }
+ 
+ /*
+@@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zr
+ {
+       struct zram_work work;
+ 
++      work.bvec = *bvec;
+       work.zram = zram;
+       work.entry = entry;
+       work.bio = bio;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 29 Apr 2019 09:11:48 +0000 (11:11 +0200)
queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/series		patch \| blob \| blame \| history
queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch	[new file with mode: 0644]	patch \| blob
queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch	[new file with mode: 0644]	patch \| blob