From: Greg Kroah-Hartman Date: Mon, 29 Apr 2019 09:11:48 +0000 (+0200) Subject: 5.0-stable patches X-Git-Tag: v4.9.172~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a47dbcf137f43b2f161a398bc2d428d7ebaf7e56;p=thirdparty%2Fkernel%2Fstable-queue.git 5.0-stable patches added patches: arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch ib-rdmavt-fix-frwr-memory-registration.patch lib-kconfig.debug-fix-build-error-without-config_block.patch mips-scall64-o32-fix-indirect-syscall-number-load.patch mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch rdma-ucontext-fix-regression-with-disassociate.patch sched-numa-fix-a-possible-divide-by-zero.patch trace-fix-preempt_enable_no_resched-abuse.patch tracing-fix-buffer_ref-pipe-ops.patch zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch --- diff --git a/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch b/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch new file mode 100644 index 00000000000..4716ecf5200 --- /dev/null +++ b/queue-5.0/arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch @@ -0,0 +1,40 @@ +From d4d18e3ec6091843f607e8929a56723e28f393a6 Mon Sep 17 00:00:00 2001 +From: Bjorn Andersson +Date: Wed, 17 Apr 2019 21:29:29 -0700 +Subject: arm64: mm: Ensure tail of unaligned initrd is reserved + +From: Bjorn Andersson + +commit d4d18e3ec6091843f607e8929a56723e28f393a6 upstream. + +In the event that the start address of the initrd is not aligned, but +has an aligned size, the base + size will not cover the entire initrd +image and there is a chance that the kernel will corrupt the tail of the +image. + +By aligning the end of the initrd to a page boundary and then +subtracting the adjusted start address the memblock reservation will +cover all pages that contains the initrd. + +Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size") +Cc: stable@vger.kernel.org +Acked-by: Will Deacon +Signed-off-by: Bjorn Andersson +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/mm/init.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/mm/init.c ++++ b/arch/arm64/mm/init.c +@@ -406,7 +406,7 @@ void __init arm64_memblock_init(void) + * Otherwise, this is a no-op + */ + u64 base = phys_initrd_start & PAGE_MASK; +- u64 size = PAGE_ALIGN(phys_initrd_size); ++ u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; + + /* + * We can only add back the initrd memory if we don't end up diff --git a/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch b/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch new file mode 100644 index 00000000000..d95edac866b --- /dev/null +++ b/queue-5.0/crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch @@ -0,0 +1,39 @@ +From b257b48cd5830c5b1d0c347eb281f9c28056f881 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Mon, 15 Apr 2019 14:37:34 +0800 +Subject: crypto: lrw - Fix atomic sleep when walking skcipher + +From: Herbert Xu + +commit b257b48cd5830c5b1d0c347eb281f9c28056f881 upstream. + +When we perform a walk in the completion function, we need to ensure +that it is atomic. + +Fixes: ac3c8f36c31d ("crypto: lrw - Do not use auxiliary buffer") +Cc: +Signed-off-by: Herbert Xu +Acked-by: Ondrej Mosnacek +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + crypto/lrw.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/crypto/lrw.c ++++ b/crypto/lrw.c +@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_asy + { + struct skcipher_request *req = areq->data; + +- if (!err) ++ if (!err) { ++ struct rctx *rctx = skcipher_request_ctx(req); ++ ++ rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = xor_tweak_post(req); ++ } + + skcipher_request_complete(req, err); + } diff --git a/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch b/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch new file mode 100644 index 00000000000..4c2656010ec --- /dev/null +++ b/queue-5.0/crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch @@ -0,0 +1,40 @@ +From 44427c0fbc09b448b22410978a4ef6ee37599d25 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Mon, 15 Apr 2019 14:35:19 +0800 +Subject: crypto: xts - Fix atomic sleep when walking skcipher + +From: Herbert Xu + +commit 44427c0fbc09b448b22410978a4ef6ee37599d25 upstream. + +When we perform a walk in the completion function, we need to ensure +that it is atomic. + +Reported-by: syzbot+6f72c20560060c98b566@syzkaller.appspotmail.com +Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer") +Cc: +Signed-off-by: Herbert Xu +Acked-by: Ondrej Mosnacek +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + crypto/xts.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/crypto/xts.c ++++ b/crypto/xts.c +@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_asy + { + struct skcipher_request *req = areq->data; + +- if (!err) ++ if (!err) { ++ struct rctx *rctx = skcipher_request_ctx(req); ++ ++ rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = xor_tweak_post(req); ++ } + + skcipher_request_complete(req, err); + } diff --git a/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch b/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch new file mode 100644 index 00000000000..5e1759d9dc1 --- /dev/null +++ b/queue-5.0/gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch @@ -0,0 +1,34 @@ +From 102bbe34b31c9159e714432afd64458f6f3876d7 Mon Sep 17 00:00:00 2001 +From: Baolin Wang +Date: Wed, 10 Apr 2019 15:47:54 +0800 +Subject: gpio: eic: sprd: Fix incorrect irq type setting for the sync EIC + +From: Baolin Wang + +commit 102bbe34b31c9159e714432afd64458f6f3876d7 upstream. + +When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the +SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals. + +Thus this patch fixes the issue. + +Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") +Cc: +Signed-off-by: Baolin Wang +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpio/gpio-eic-sprd.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpio/gpio-eic-sprd.c ++++ b/drivers/gpio/gpio-eic-sprd.c +@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct + irq_set_handler_locked(data, handle_edge_irq); + break; + case IRQ_TYPE_EDGE_BOTH: ++ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + irq_set_handler_locked(data, handle_edge_irq); + break; diff --git a/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch b/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch new file mode 100644 index 00000000000..5ce5a4b3460 --- /dev/null +++ b/queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch @@ -0,0 +1,83 @@ +From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001 +From: Josh Collier +Date: Mon, 15 Apr 2019 11:34:22 -0700 +Subject: IB/rdmavt: Fix frwr memory registration + +From: Josh Collier + +commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream. + +Current implementation was not properly handling frwr memory +registrations. This was uncovered by commit 27f26cec761das ("xprtrdma: +Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is +used for NFS over RDMA, started failing as it was the first ULP to modify +the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR +when attempting to perform RDMA Writes to the client. + +The fix is to properly capture the true iova, offset, and length in the +call to ib_map_mr_sg, and then update the iova when processing the +IB_WR_REG_MEM on the send queue. + +Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg") +Cc: stable@vger.kernel.org +Reviewed-by: Mike Marciniszyn +Reviewed-by: Dennis Dalessandro +Reviewed-by: Michael J. Ruhl +Signed-off-by: Josh Collier +Signed-off-by: Dennis Dalessandro +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/drivers/infiniband/sw/rdmavt/mr.c ++++ b/drivers/infiniband/sw/rdmavt/mr.c +@@ -611,11 +611,6 @@ static int rvt_set_page(struct ib_mr *ib + if (unlikely(mapped_segs == mr->mr.max_segs)) + return -ENOMEM; + +- if (mr->mr.length == 0) { +- mr->mr.user_base = addr; +- mr->mr.iova = addr; +- } +- + m = mapped_segs / RVT_SEGSZ; + n = mapped_segs % RVT_SEGSZ; + mr->mr.map[m]->segs[n].vaddr = (void *)addr; +@@ -633,17 +628,24 @@ static int rvt_set_page(struct ib_mr *ib + * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg + * ++ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. ++ * + * Return: number of sg elements mapped to the memory region + */ + int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) + { + struct rvt_mr *mr = to_imr(ibmr); ++ int ret; + + mr->mr.length = 0; + mr->mr.page_shift = PAGE_SHIFT; +- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, +- rvt_set_page); ++ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); ++ mr->mr.user_base = ibmr->iova; ++ mr->mr.iova = ibmr->iova; ++ mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; ++ mr->mr.length = (size_t)ibmr->length; ++ return ret; + } + + /** +@@ -674,6 +676,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, s + ibmr->rkey = key; + mr->mr.lkey = key; + mr->mr.access_flags = access; ++ mr->mr.iova = ibmr->iova; + atomic_set(&mr->mr.lkey_invalid, 0); + + return 0; diff --git a/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch b/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch new file mode 100644 index 00000000000..e486ad0abf6 --- /dev/null +++ b/queue-5.0/lib-kconfig.debug-fix-build-error-without-config_block.patch @@ -0,0 +1,43 @@ +From ae3d6a323347940f0548bbb4b17f0bb2e9164169 Mon Sep 17 00:00:00 2001 +From: YueHaibing +Date: Thu, 25 Apr 2019 22:23:44 -0700 +Subject: lib/Kconfig.debug: fix build error without CONFIG_BLOCK + +From: YueHaibing + +commit ae3d6a323347940f0548bbb4b17f0bb2e9164169 upstream. + +If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and +BTRFS can not be compiled successly. + +Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com +Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") +Signed-off-by: YueHaibing +Reported-by: Hulk Robot +Reviewed-by: Kees Cook +Cc: Masahiro Yamada +Cc: Petr Mladek +Cc: Andy Shevchenko +Cc: Matthew Wilcox +Cc: Joe Lawrence +Cc: Robin Murphy +Cc: Luis Chamberlain +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/Kconfig.debug | 1 + + 1 file changed, 1 insertion(+) + +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -1952,6 +1952,7 @@ config TEST_KMOD + depends on m + depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS + depends on NETDEVICES && NET_CORE && INET # for TUN ++ depends on BLOCK + select TEST_LKM + select XFS_FS + select TUN diff --git a/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch b/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch new file mode 100644 index 00000000000..d16d1408ff9 --- /dev/null +++ b/queue-5.0/mips-scall64-o32-fix-indirect-syscall-number-load.patch @@ -0,0 +1,53 @@ +From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001 +From: Aurelien Jarno +Date: Tue, 9 Apr 2019 16:53:55 +0200 +Subject: MIPS: scall64-o32: Fix indirect syscall number load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Aurelien Jarno + +commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream. + +Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32)) +added indirect syscall detection for O32 processes running on MIPS64, +but it did not work correctly for big endian kernel/processes. The +reason is that the syscall number is loaded from ARG1 using the lw +instruction while this is a 64-bit value, so zero is loaded instead of +the syscall number. + +Fix the code by using the ld instruction instead. When running a 32-bit +processes on a 64 bit CPU, the values are properly sign-extended, so it +ensures the value passed to syscall_trace_enter is correct. + +Recent systemd versions with seccomp enabled whitelist the getpid +syscall for their internal processes (e.g. systemd-journald), but call +it through syscall(SYS_getpid). This fix therefore allows O32 big endian +systems with a 64-bit kernel to run recent systemd versions. + +Signed-off-by: Aurelien Jarno +Cc: # v3.15+ +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paul Burton +Cc: Ralf Baechle +Cc: James Hogan +Cc: linux-mips@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/kernel/scall64-o32.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/mips/kernel/scall64-o32.S ++++ b/arch/mips/kernel/scall64-o32.S +@@ -125,7 +125,7 @@ trace_a_syscall: + subu t1, v0, __NR_O32_Linux + move a1, v0 + bnez t1, 1f /* __NR_syscall at offset 0 */ +- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ ++ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + .set pop + + 1: jal syscall_trace_enter diff --git a/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch b/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch new file mode 100644 index 00000000000..06a08746bcc --- /dev/null +++ b/queue-5.0/mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch @@ -0,0 +1,109 @@ +From 24512228b7a3f412b5a51f189df302616b021c33 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 25 Apr 2019 22:23:51 -0700 +Subject: mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model + +From: Mel Gorman + +commit 24512228b7a3f412b5a51f189df302616b021c33 upstream. + +Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small +amounts of memory when an external fragmentation event occurs") "broke" +memory management on parisc. + +The machine is not NUMA but the DISCONTIG model creates three pgdats +even though it's a UMA machine for the following ranges + + 0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB + 1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB + 2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB + +Mikulas reported: + + With the patch 1c30844d2, the kernel will incorrectly reclaim the + first zone when it fills up, ignoring the fact that there are two + completely free zones. Basiscally, it limits cache size to 1GiB. + + For example, if I run: + # dd if=/dev/sda of=/dev/null bs=1M count=2048 + + - with the proper kernel, there should be "Buffers - 2GiB" + when this command finishes. With the patch 1c30844d2, buffers + will consume just 1GiB or slightly more, because the kernel was + incorrectly reclaiming them. + +The page allocator and reclaim makes assumptions that pgdats really +represent NUMA nodes and zones represent ranges and makes decisions on +that basis. Watermark boosting for small pgdats leads to unexpected +results even though this would have behaved reasonably on SPARSEMEM. + +DISCONTIG is essentially deprecated and even parisc plans to move to +SPARSEMEM so there is no need to be fancy, this patch simply disables +watermark boosting by default on DISCONTIGMEM. + +Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net +Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") +Signed-off-by: Mel Gorman +Reported-by: Mikulas Patocka +Tested-by: Mikulas Patocka +Acked-by: Vlastimil Babka +Cc: James Bottomley +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/sysctl/vm.txt | 16 ++++++++-------- + mm/page_alloc.c | 13 +++++++++++++ + 2 files changed, 21 insertions(+), 8 deletions(-) + +--- a/Documentation/sysctl/vm.txt ++++ b/Documentation/sysctl/vm.txt +@@ -866,14 +866,14 @@ The intent is that compaction has less w + increase the success rate of future high-order allocations such as SLUB + allocations, THP and hugetlbfs pages. + +-To make it sensible with respect to the watermark_scale_factor parameter, +-the unit is in fractions of 10,000. The default value of 15,000 means +-that up to 150% of the high watermark will be reclaimed in the event of +-a pageblock being mixed due to fragmentation. The level of reclaim is +-determined by the number of fragmentation events that occurred in the +-recent past. If this value is smaller than a pageblock then a pageblocks +-worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor +-of 0 will disable the feature. ++To make it sensible with respect to the watermark_scale_factor ++parameter, the unit is in fractions of 10,000. The default value of ++15,000 on !DISCONTIGMEM configurations means that up to 150% of the high ++watermark will be reclaimed in the event of a pageblock being mixed due ++to fragmentation. The level of reclaim is determined by the number of ++fragmentation events that occurred in the recent past. If this value is ++smaller than a pageblock then a pageblocks worth of pages will be reclaimed ++(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature. + + ============================================================= + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page + + int min_free_kbytes = 1024; + int user_min_free_kbytes = -1; ++#ifdef CONFIG_DISCONTIGMEM ++/* ++ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges ++ * are not on separate NUMA nodes. Functionally this works but with ++ * watermark_boost_factor, it can reclaim prematurely as the ranges can be ++ * quite small. By default, do not boost watermarks on discontigmem as in ++ * many cases very high-order allocations like THP are likely to be ++ * unsupported and the premature reclaim offsets the advantage of long-term ++ * fragmentation avoidance. ++ */ ++int watermark_boost_factor __read_mostly; ++#else + int watermark_boost_factor __read_mostly = 15000; ++#endif + int watermark_scale_factor = 10; + + static unsigned long nr_kernel_pages __initdata; diff --git a/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch b/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch new file mode 100644 index 00000000000..35613229127 --- /dev/null +++ b/queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch @@ -0,0 +1,41 @@ +From c660133c339f9ab684fdf568c0d51b9ae5e86002 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 16 Apr 2019 14:07:25 +0300 +Subject: RDMA/mlx5: Do not allow the user to write to the clock page + +From: Jason Gunthorpe + +commit c660133c339f9ab684fdf568c0d51b9ae5e86002 upstream. + +The intent of this VMA was to be read-only from user space, but the +VM_MAYWRITE masking was missed, so mprotect could make it writable. + +Cc: stable@vger.kernel.org +Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space") +Signed-off-by: Jason Gunthorpe +Reviewed-by: Haggai Eran +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/mlx5/main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -1982,6 +1982,7 @@ static int mlx5_ib_mmap_clock_info_page( + + if (vma->vm_flags & VM_WRITE) + return -EPERM; ++ vma->vm_flags &= ~VM_MAYWRITE; + + if (!dev->mdev->clock_info_page) + return -EOPNOTSUPP; +@@ -2147,6 +2148,7 @@ static int mlx5_ib_mmap(struct ib_uconte + + if (vma->vm_flags & VM_WRITE) + return -EPERM; ++ vma->vm_flags &= ~VM_MAYWRITE; + + /* Don't expose to user-space information it shouldn't have */ + if (PAGE_SIZE > 4096) diff --git a/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch b/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch new file mode 100644 index 00000000000..c48104d82f8 --- /dev/null +++ b/queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch @@ -0,0 +1,44 @@ +From d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 16 Apr 2019 14:07:26 +0300 +Subject: RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages + +From: Jason Gunthorpe + +commit d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 upstream. + +Since mlx5 supports device disassociate it must use this API for all +BAR page mmaps, otherwise the pages can remain mapped after the device +is unplugged causing a system crash. + +Cc: stable@vger.kernel.org +Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") +Signed-off-by: Jason Gunthorpe +Reviewed-by: Haggai Eran +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/hw/mlx5/main.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -2154,14 +2154,12 @@ static int mlx5_ib_mmap(struct ib_uconte + if (PAGE_SIZE > 4096) + return -EOPNOTSUPP; + +- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = (dev->mdev->iseg_base + + offsetof(struct mlx5_init_seg, internal_timer_h)) >> + PAGE_SHIFT; +- if (io_remap_pfn_range(vma, vma->vm_start, pfn, +- PAGE_SIZE, vma->vm_page_prot)) +- return -EAGAIN; +- break; ++ return rdma_user_mmap_io(&context->ibucontext, vma, pfn, ++ PAGE_SIZE, ++ pgprot_noncached(vma->vm_page_prot)); + case MLX5_IB_MMAP_CLOCK_INFO: + return mlx5_ib_mmap_clock_info_page(dev, vma, context); + diff --git a/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch b/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch new file mode 100644 index 00000000000..5d6aabd5c79 --- /dev/null +++ b/queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch @@ -0,0 +1,139 @@ +From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 16 Apr 2019 14:07:28 +0300 +Subject: RDMA/ucontext: Fix regression with disassociate + +From: Jason Gunthorpe + +commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream. + +When this code was consolidated the intention was that the VMA would +become backed by anonymous zero pages after the zap_vma_pte - however this +very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED +bits to transform the VMA into an anonymous VMA. Since the vm_ops was +removed this broke. + +Now userspace gets a SIGBUS if it touches the vma after disassociation. + +Instead of converting the VMA to anonymous provide a fault handler that +puts a zero'd page into the VMA when user-space touches it after +disassociation. + +Cc: stable@vger.kernel.org +Suggested-by: Andrea Arcangeli +Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") +Signed-off-by: Jason Gunthorpe +Signed-off-by: Leon Romanovsky +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 52 ++++++++++++++++++++++++++++++++-- + 2 files changed, 50 insertions(+), 3 deletions(-) + +--- a/drivers/infiniband/core/uverbs.h ++++ b/drivers/infiniband/core/uverbs.h +@@ -160,6 +160,7 @@ struct ib_uverbs_file { + + struct mutex umap_lock; + struct list_head umaps; ++ struct page *disassociate_page; + + struct idr idr; + /* spinlock protects write access to idr */ +--- a/drivers/infiniband/core/uverbs_main.c ++++ b/drivers/infiniband/core/uverbs_main.c +@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); + put_device(&file->device->dev); ++ ++ if (file->disassociate_page) ++ __free_pages(file->disassociate_page, 0); + kfree(file); + } + +@@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_ar + kfree(priv); + } + ++/* ++ * Once the zap_vma_ptes has been called touches to the VMA will come here and ++ * we return a dummy writable zero page for all the pfns. ++ */ ++static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) ++{ ++ struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; ++ struct rdma_umap_priv *priv = vmf->vma->vm_private_data; ++ vm_fault_t ret = 0; ++ ++ if (!priv) ++ return VM_FAULT_SIGBUS; ++ ++ /* Read only pages can just use the system zero page. */ ++ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { ++ vmf->page = ZERO_PAGE(vmf->vm_start); ++ get_page(vmf->page); ++ return 0; ++ } ++ ++ mutex_lock(&ufile->umap_lock); ++ if (!ufile->disassociate_page) ++ ufile->disassociate_page = ++ alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); ++ ++ if (ufile->disassociate_page) { ++ /* ++ * This VMA is forced to always be shared so this doesn't have ++ * to worry about COW. ++ */ ++ vmf->page = ufile->disassociate_page; ++ get_page(vmf->page); ++ } else { ++ ret = VM_FAULT_SIGBUS; ++ } ++ mutex_unlock(&ufile->umap_lock); ++ ++ return ret; ++} ++ + static const struct vm_operations_struct rdma_umap_ops = { + .open = rdma_umap_open, + .close = rdma_umap_close, ++ .fault = rdma_umap_fault, + }; + + static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, +@@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + ++ if (!(vma->vm_flags & VM_SHARED)) ++ return ERR_PTR(-EINVAL); ++ + if (vma->vm_end - vma->vm_start != size) + return ERR_PTR(-EINVAL); + +@@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struc + * at a time to get the lock ordering right. Typically there + * will only be one mm, so no big deal. + */ +- down_write(&mm->mmap_sem); ++ down_read(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; + mutex_lock(&ufile->umap_lock); +@@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struc + + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); +- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); + } + mutex_unlock(&ufile->umap_lock); + skip_mm: +- up_write(&mm->mmap_sem); ++ up_read(&mm->mmap_sem); + mmput(mm); + } + } diff --git a/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch b/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch new file mode 100644 index 00000000000..be887849f49 --- /dev/null +++ b/queue-5.0/sched-numa-fix-a-possible-divide-by-zero.patch @@ -0,0 +1,53 @@ +From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001 +From: Xie XiuQi +Date: Sat, 20 Apr 2019 16:34:16 +0800 +Subject: sched/numa: Fix a possible divide-by-zero + +From: Xie XiuQi + +commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream. + +sched_clock_cpu() may not be consistent between CPUs. If a task +migrates to another CPU, then se.exec_start is set to that CPU's +rq_clock_task() by update_stats_curr_start(). Specifically, the new +value might be before the old value due to clock skew. + +So then if in numa_get_avg_runtime() the expression: + + 'now - p->last_task_numa_placement' + +ends up as -1, then the divider '*period + 1' in task_numa_placement() +is 0 and things go bang. Similar to update_curr(), check if time goes +backwards to avoid this. + +[ peterz: Wrote new changelog. ] +[ mingo: Tweaked the code comment. ] + +Signed-off-by: Xie XiuQi +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: cj.chengjian@huawei.com +Cc: +Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1994,6 +1994,10 @@ static u64 numa_get_avg_runtime(struct t + if (p->last_task_numa_placement) { + delta = runtime - p->last_sum_exec_runtime; + *period = now - p->last_task_numa_placement; ++ ++ /* Avoid time going backwards, prevent potential divide error: */ ++ if (unlikely((s64)*period < 0)) ++ *period = 0; + } else { + delta = p->se.avg.load_sum; + *period = LOAD_AVG_MAX; diff --git a/queue-5.0/series b/queue-5.0/series index bcb7950173f..0e28fa524cc 100644 --- a/queue-5.0/series +++ b/queue-5.0/series @@ -9,3 +9,18 @@ cifs-fix-memory-leak-in-smb2_read.patch cifs-fix-page-reference-leak-with-readv-writev.patch cifs-do-not-attempt-cifs-operation-on-smb2-rename-error.patch tracing-fix-a-memory-leak-by-early-error-exit-in-trace_pid_write.patch +tracing-fix-buffer_ref-pipe-ops.patch +crypto-xts-fix-atomic-sleep-when-walking-skcipher.patch +crypto-lrw-fix-atomic-sleep-when-walking-skcipher.patch +gpio-eic-sprd-fix-incorrect-irq-type-setting-for-the-sync-eic.patch +zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch +lib-kconfig.debug-fix-build-error-without-config_block.patch +mips-scall64-o32-fix-indirect-syscall-number-load.patch +trace-fix-preempt_enable_no_resched-abuse.patch +mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model.patch +arm64-mm-ensure-tail-of-unaligned-initrd-is-reserved.patch +ib-rdmavt-fix-frwr-memory-registration.patch +rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch +rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch +rdma-ucontext-fix-regression-with-disassociate.patch +sched-numa-fix-a-possible-divide-by-zero.patch diff --git a/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch b/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch new file mode 100644 index 00000000000..338f15c43ef --- /dev/null +++ b/queue-5.0/trace-fix-preempt_enable_no_resched-abuse.patch @@ -0,0 +1,48 @@ +From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 23 Apr 2019 22:03:18 +0200 +Subject: trace: Fix preempt_enable_no_resched() abuse + +From: Peter Zijlstra + +commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream. + +Unless the very next line is schedule(), or implies it, one must not use +preempt_enable_no_resched(). It can cause a preemption to go missing and +thereby cause arbitrary delays, breaking the PREEMPT=y invariant. + +Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net + +Cc: Waiman Long +Cc: Linus Torvalds +Cc: Ingo Molnar +Cc: Will Deacon +Cc: Thomas Gleixner +Cc: the arch/x86 maintainers +Cc: Davidlohr Bueso +Cc: Tim Chen +Cc: huang ying +Cc: Roman Gushchin +Cc: Alexei Starovoitov +Cc: Daniel Borkmann +Cc: stable@vger.kernel.org +Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -776,7 +776,7 @@ u64 ring_buffer_time_stamp(struct ring_b + + preempt_disable_notrace(); + time = rb_time_stamp(buffer); +- preempt_enable_no_resched_notrace(); ++ preempt_enable_notrace(); + + return time; + } diff --git a/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch b/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch new file mode 100644 index 00000000000..ec894070b8e --- /dev/null +++ b/queue-5.0/tracing-fix-buffer_ref-pipe-ops.patch @@ -0,0 +1,140 @@ +From b987222654f84f7b4ca95b3a55eca784cb30235b Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Thu, 4 Apr 2019 23:59:25 +0200 +Subject: tracing: Fix buffer_ref pipe ops + +From: Jann Horn + +commit b987222654f84f7b4ca95b3a55eca784cb30235b upstream. + +This fixes multiple issues in buffer_pipe_buf_ops: + + - The ->steal() handler must not return zero unless the pipe buffer has + the only reference to the page. But generic_pipe_buf_steal() assumes + that every reference to the pipe is tracked by the page's refcount, + which isn't true for these buffers - buffer_pipe_buf_get(), which + duplicates a buffer, doesn't touch the page's refcount. + Fix it by using generic_pipe_buf_nosteal(), which refuses every + attempted theft. It should be easy to actually support ->steal, but the + only current users of pipe_buf_steal() are the virtio console and FUSE, + and they also only use it as an optimization. So it's probably not worth + the effort. + - The ->get() and ->release() handlers can be invoked concurrently on pipe + buffers backed by the same struct buffer_ref. Make them safe against + concurrency by using refcount_t. + - The pointers stored in ->private were only zeroed out when the last + reference to the buffer_ref was dropped. As far as I know, this + shouldn't be necessary anyway, but if we do it, let's always do it. + +Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com + +Cc: Ingo Molnar +Cc: Masami Hiramatsu +Cc: Al Viro +Cc: stable@vger.kernel.org +Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") +Signed-off-by: Jann Horn +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + fs/splice.c | 4 ++-- + include/linux/pipe_fs_i.h | 1 + + kernel/trace/trace.c | 28 ++++++++++++++-------------- + 3 files changed, 17 insertions(+), 16 deletions(-) + +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -333,8 +333,8 @@ const struct pipe_buf_operations default + .get = generic_pipe_buf_get, + }; + +-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, +- struct pipe_buffer *buf) ++int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, ++ struct pipe_buffer *buf) + { + return 1; + } +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -181,6 +181,7 @@ void free_pipe_info(struct pipe_inode_in + void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); ++int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); + void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); + void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6823,19 +6823,23 @@ struct buffer_ref { + struct ring_buffer *buffer; + void *page; + int cpu; +- int ref; ++ refcount_t refcount; + }; + ++static void buffer_ref_release(struct buffer_ref *ref) ++{ ++ if (!refcount_dec_and_test(&ref->refcount)) ++ return; ++ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); ++ kfree(ref); ++} ++ + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + +- if (--ref->ref) +- return; +- +- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); +- kfree(ref); ++ buffer_ref_release(ref); + buf->private = 0; + } + +@@ -6844,7 +6848,7 @@ static void buffer_pipe_buf_get(struct p + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + +- ref->ref++; ++ refcount_inc(&ref->refcount); + } + + /* Pipe buffer operations for a buffer. */ +@@ -6852,7 +6856,7 @@ static const struct pipe_buf_operations + .can_merge = 0, + .confirm = generic_pipe_buf_confirm, + .release = buffer_pipe_buf_release, +- .steal = generic_pipe_buf_steal, ++ .steal = generic_pipe_buf_nosteal, + .get = buffer_pipe_buf_get, + }; + +@@ -6865,11 +6869,7 @@ static void buffer_spd_release(struct sp + struct buffer_ref *ref = + (struct buffer_ref *)spd->partial[i].private; + +- if (--ref->ref) +- return; +- +- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); +- kfree(ref); ++ buffer_ref_release(ref); + spd->partial[i].private = 0; + } + +@@ -6924,7 +6924,7 @@ tracing_buffers_splice_read(struct file + break; + } + +- ref->ref = 1; ++ refcount_set(&ref->refcount, 1); + ref->buffer = iter->trace_buffer->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); + if (IS_ERR(ref->page)) { diff --git a/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch b/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch new file mode 100644 index 00000000000..446cc75f738 --- /dev/null +++ b/queue-5.0/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch @@ -0,0 +1,67 @@ +From e153abc0739ff77bd89c9ba1688cdb963464af97 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= +Date: Thu, 25 Apr 2019 22:23:41 -0700 +Subject: zram: pass down the bvec we need to read into in the work struct +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jérôme Glisse + +commit e153abc0739ff77bd89c9ba1688cdb963464af97 upstream. + +When scheduling work item to read page we need to pass down the proper +bvec struct which points to the page to read into. Before this patch it +uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is +wrong. + +Note that without this patch on arch/kernel where PAGE_SIZE != 4096 +userspace could read random memory through a zram block device (thought +userspace probably would have no control on the address being read). + +Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com +Signed-off-by: Jérôme Glisse +Reviewed-by: Andrew Morton +Reviewed-by: Sergey Senozhatsky +Acked-by: Minchan Kim +Cc: Nitin Gupta +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -774,18 +774,18 @@ struct zram_work { + struct zram *zram; + unsigned long entry; + struct bio *bio; ++ struct bio_vec bvec; + }; + + #if PAGE_SIZE != 4096 + static void zram_sync_read(struct work_struct *work) + { +- struct bio_vec bvec; + struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram *zram = zw->zram; + unsigned long entry = zw->entry; + struct bio *bio = zw->bio; + +- read_from_bdev_async(zram, &bvec, entry, bio); ++ read_from_bdev_async(zram, &zw->bvec, entry, bio); + } + + /* +@@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zr + { + struct zram_work work; + ++ work.bvec = *bvec; + work.zram = zram; + work.entry = entry; + work.bio = bio;