From: Greg Kroah-Hartman Date: Mon, 29 Apr 2019 09:11:16 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.9.172~42 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=edf42df85947c687a1a4415609b6004cdec21c4b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: ib-rdmavt-fix-frwr-memory-registration.patch lib-kconfig.debug-fix-build-error-without-config_block.patch mips-scall64-o32-fix-indirect-syscall-number-load.patch sched-numa-fix-a-possible-divide-by-zero.patch trace-fix-preempt_enable_no_resched-abuse.patch tracing-fix-buffer_ref-pipe-ops.patch zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch --- diff --git a/queue-4.14/ib-rdmavt-fix-frwr-memory-registration.patch b/queue-4.14/ib-rdmavt-fix-frwr-memory-registration.patch new file mode 100644 index 00000000000..5ce5a4b3460 --- /dev/null +++ b/queue-4.14/ib-rdmavt-fix-frwr-memory-registration.patch @@ -0,0 +1,83 @@ +From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001 +From: Josh Collier +Date: Mon, 15 Apr 2019 11:34:22 -0700 +Subject: IB/rdmavt: Fix frwr memory registration + +From: Josh Collier + +commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream. + +Current implementation was not properly handling frwr memory +registrations. This was uncovered by commit 27f26cec761das ("xprtrdma: +Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is +used for NFS over RDMA, started failing as it was the first ULP to modify +the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR +when attempting to perform RDMA Writes to the client. + +The fix is to properly capture the true iova, offset, and length in the +call to ib_map_mr_sg, and then update the iova when processing the +IB_WR_REG_MEM on the send queue. + +Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg") +Cc: stable@vger.kernel.org +Reviewed-by: Mike Marciniszyn +Reviewed-by: Dennis Dalessandro +Reviewed-by: Michael J. Ruhl +Signed-off-by: Josh Collier +Signed-off-by: Dennis Dalessandro +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/drivers/infiniband/sw/rdmavt/mr.c ++++ b/drivers/infiniband/sw/rdmavt/mr.c +@@ -611,11 +611,6 @@ static int rvt_set_page(struct ib_mr *ib + if (unlikely(mapped_segs == mr->mr.max_segs)) + return -ENOMEM; + +- if (mr->mr.length == 0) { +- mr->mr.user_base = addr; +- mr->mr.iova = addr; +- } +- + m = mapped_segs / RVT_SEGSZ; + n = mapped_segs % RVT_SEGSZ; + mr->mr.map[m]->segs[n].vaddr = (void *)addr; +@@ -633,17 +628,24 @@ static int rvt_set_page(struct ib_mr *ib + * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg + * ++ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. ++ * + * Return: number of sg elements mapped to the memory region + */ + int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) + { + struct rvt_mr *mr = to_imr(ibmr); ++ int ret; + + mr->mr.length = 0; + mr->mr.page_shift = PAGE_SHIFT; +- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, +- rvt_set_page); ++ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); ++ mr->mr.user_base = ibmr->iova; ++ mr->mr.iova = ibmr->iova; ++ mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; ++ mr->mr.length = (size_t)ibmr->length; ++ return ret; + } + + /** +@@ -674,6 +676,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, s + ibmr->rkey = key; + mr->mr.lkey = key; + mr->mr.access_flags = access; ++ mr->mr.iova = ibmr->iova; + atomic_set(&mr->mr.lkey_invalid, 0); + + return 0; diff --git a/queue-4.14/lib-kconfig.debug-fix-build-error-without-config_block.patch b/queue-4.14/lib-kconfig.debug-fix-build-error-without-config_block.patch new file mode 100644 index 00000000000..b31bb1422e3 --- /dev/null +++ b/queue-4.14/lib-kconfig.debug-fix-build-error-without-config_block.patch @@ -0,0 +1,43 @@ +From ae3d6a323347940f0548bbb4b17f0bb2e9164169 Mon Sep 17 00:00:00 2001 +From: YueHaibing +Date: Thu, 25 Apr 2019 22:23:44 -0700 +Subject: lib/Kconfig.debug: fix build error without CONFIG_BLOCK + +From: YueHaibing + +commit ae3d6a323347940f0548bbb4b17f0bb2e9164169 upstream. + +If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and +BTRFS can not be compiled successly. + +Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com +Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") +Signed-off-by: YueHaibing +Reported-by: Hulk Robot +Reviewed-by: Kees Cook +Cc: Masahiro Yamada +Cc: Petr Mladek +Cc: Andy Shevchenko +Cc: Matthew Wilcox +Cc: Joe Lawrence +Cc: Robin Murphy +Cc: Luis Chamberlain +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/Kconfig.debug | 1 + + 1 file changed, 1 insertion(+) + +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -1884,6 +1884,7 @@ config TEST_KMOD + depends on m + depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS + depends on NETDEVICES && NET_CORE && INET # for TUN ++ depends on BLOCK + select TEST_LKM + select XFS_FS + select TUN diff --git a/queue-4.14/mips-scall64-o32-fix-indirect-syscall-number-load.patch b/queue-4.14/mips-scall64-o32-fix-indirect-syscall-number-load.patch new file mode 100644 index 00000000000..d16d1408ff9 --- /dev/null +++ b/queue-4.14/mips-scall64-o32-fix-indirect-syscall-number-load.patch @@ -0,0 +1,53 @@ +From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001 +From: Aurelien Jarno +Date: Tue, 9 Apr 2019 16:53:55 +0200 +Subject: MIPS: scall64-o32: Fix indirect syscall number load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Aurelien Jarno + +commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream. + +Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32)) +added indirect syscall detection for O32 processes running on MIPS64, +but it did not work correctly for big endian kernel/processes. The +reason is that the syscall number is loaded from ARG1 using the lw +instruction while this is a 64-bit value, so zero is loaded instead of +the syscall number. + +Fix the code by using the ld instruction instead. When running a 32-bit +processes on a 64 bit CPU, the values are properly sign-extended, so it +ensures the value passed to syscall_trace_enter is correct. + +Recent systemd versions with seccomp enabled whitelist the getpid +syscall for their internal processes (e.g. systemd-journald), but call +it through syscall(SYS_getpid). This fix therefore allows O32 big endian +systems with a 64-bit kernel to run recent systemd versions. + +Signed-off-by: Aurelien Jarno +Cc: # v3.15+ +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paul Burton +Cc: Ralf Baechle +Cc: James Hogan +Cc: linux-mips@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/kernel/scall64-o32.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/mips/kernel/scall64-o32.S ++++ b/arch/mips/kernel/scall64-o32.S +@@ -125,7 +125,7 @@ trace_a_syscall: + subu t1, v0, __NR_O32_Linux + move a1, v0 + bnez t1, 1f /* __NR_syscall at offset 0 */ +- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ ++ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + .set pop + + 1: jal syscall_trace_enter diff --git a/queue-4.14/sched-numa-fix-a-possible-divide-by-zero.patch b/queue-4.14/sched-numa-fix-a-possible-divide-by-zero.patch new file mode 100644 index 00000000000..3d51fb588c1 --- /dev/null +++ b/queue-4.14/sched-numa-fix-a-possible-divide-by-zero.patch @@ -0,0 +1,53 @@ +From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001 +From: Xie XiuQi +Date: Sat, 20 Apr 2019 16:34:16 +0800 +Subject: sched/numa: Fix a possible divide-by-zero + +From: Xie XiuQi + +commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream. + +sched_clock_cpu() may not be consistent between CPUs. If a task +migrates to another CPU, then se.exec_start is set to that CPU's +rq_clock_task() by update_stats_curr_start(). Specifically, the new +value might be before the old value due to clock skew. + +So then if in numa_get_avg_runtime() the expression: + + 'now - p->last_task_numa_placement' + +ends up as -1, then the divider '*period + 1' in task_numa_placement() +is 0 and things go bang. Similar to update_curr(), check if time goes +backwards to avoid this. + +[ peterz: Wrote new changelog. ] +[ mingo: Tweaked the code comment. ] + +Signed-off-by: Xie XiuQi +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: cj.chengjian@huawei.com +Cc: +Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2026,6 +2026,10 @@ static u64 numa_get_avg_runtime(struct t + if (p->last_task_numa_placement) { + delta = runtime - p->last_sum_exec_runtime; + *period = now - p->last_task_numa_placement; ++ ++ /* Avoid time going backwards, prevent potential divide error: */ ++ if (unlikely((s64)*period < 0)) ++ *period = 0; + } else { + delta = p->se.avg.load_sum / p->se.load.weight; + *period = LOAD_AVG_MAX; diff --git a/queue-4.14/series b/queue-4.14/series index 69b31e5c890..edd9d57fb5f 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -1,3 +1,10 @@ kbuild-simplify-ld-option-implementation.patch cifs-do-not-attempt-cifs-operation-on-smb2-rename-error.patch tracing-fix-a-memory-leak-by-early-error-exit-in-trace_pid_write.patch +tracing-fix-buffer_ref-pipe-ops.patch +zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch +lib-kconfig.debug-fix-build-error-without-config_block.patch +mips-scall64-o32-fix-indirect-syscall-number-load.patch +trace-fix-preempt_enable_no_resched-abuse.patch +ib-rdmavt-fix-frwr-memory-registration.patch +sched-numa-fix-a-possible-divide-by-zero.patch diff --git a/queue-4.14/trace-fix-preempt_enable_no_resched-abuse.patch b/queue-4.14/trace-fix-preempt_enable_no_resched-abuse.patch new file mode 100644 index 00000000000..fba7d17883e --- /dev/null +++ b/queue-4.14/trace-fix-preempt_enable_no_resched-abuse.patch @@ -0,0 +1,48 @@ +From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 23 Apr 2019 22:03:18 +0200 +Subject: trace: Fix preempt_enable_no_resched() abuse + +From: Peter Zijlstra + +commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream. + +Unless the very next line is schedule(), or implies it, one must not use +preempt_enable_no_resched(). It can cause a preemption to go missing and +thereby cause arbitrary delays, breaking the PREEMPT=y invariant. + +Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net + +Cc: Waiman Long +Cc: Linus Torvalds +Cc: Ingo Molnar +Cc: Will Deacon +Cc: Thomas Gleixner +Cc: the arch/x86 maintainers +Cc: Davidlohr Bueso +Cc: Tim Chen +Cc: huang ying +Cc: Roman Gushchin +Cc: Alexei Starovoitov +Cc: Daniel Borkmann +Cc: stable@vger.kernel.org +Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -700,7 +700,7 @@ u64 ring_buffer_time_stamp(struct ring_b + + preempt_disable_notrace(); + time = rb_time_stamp(buffer); +- preempt_enable_no_resched_notrace(); ++ preempt_enable_notrace(); + + return time; + } diff --git a/queue-4.14/tracing-fix-buffer_ref-pipe-ops.patch b/queue-4.14/tracing-fix-buffer_ref-pipe-ops.patch new file mode 100644 index 00000000000..762774a153a --- /dev/null +++ b/queue-4.14/tracing-fix-buffer_ref-pipe-ops.patch @@ -0,0 +1,140 @@ +From b987222654f84f7b4ca95b3a55eca784cb30235b Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Thu, 4 Apr 2019 23:59:25 +0200 +Subject: tracing: Fix buffer_ref pipe ops + +From: Jann Horn + +commit b987222654f84f7b4ca95b3a55eca784cb30235b upstream. + +This fixes multiple issues in buffer_pipe_buf_ops: + + - The ->steal() handler must not return zero unless the pipe buffer has + the only reference to the page. But generic_pipe_buf_steal() assumes + that every reference to the pipe is tracked by the page's refcount, + which isn't true for these buffers - buffer_pipe_buf_get(), which + duplicates a buffer, doesn't touch the page's refcount. + Fix it by using generic_pipe_buf_nosteal(), which refuses every + attempted theft. It should be easy to actually support ->steal, but the + only current users of pipe_buf_steal() are the virtio console and FUSE, + and they also only use it as an optimization. So it's probably not worth + the effort. + - The ->get() and ->release() handlers can be invoked concurrently on pipe + buffers backed by the same struct buffer_ref. Make them safe against + concurrency by using refcount_t. + - The pointers stored in ->private were only zeroed out when the last + reference to the buffer_ref was dropped. As far as I know, this + shouldn't be necessary anyway, but if we do it, let's always do it. + +Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com + +Cc: Ingo Molnar +Cc: Masami Hiramatsu +Cc: Al Viro +Cc: stable@vger.kernel.org +Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") +Signed-off-by: Jann Horn +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + fs/splice.c | 4 ++-- + include/linux/pipe_fs_i.h | 1 + + kernel/trace/trace.c | 28 ++++++++++++++-------------- + 3 files changed, 17 insertions(+), 16 deletions(-) + +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -332,8 +332,8 @@ const struct pipe_buf_operations default + .get = generic_pipe_buf_get, + }; + +-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, +- struct pipe_buffer *buf) ++int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, ++ struct pipe_buffer *buf) + { + return 1; + } +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -182,6 +182,7 @@ void free_pipe_info(struct pipe_inode_in + void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); ++int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); + void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); + void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6719,19 +6719,23 @@ struct buffer_ref { + struct ring_buffer *buffer; + void *page; + int cpu; +- int ref; ++ refcount_t refcount; + }; + ++static void buffer_ref_release(struct buffer_ref *ref) ++{ ++ if (!refcount_dec_and_test(&ref->refcount)) ++ return; ++ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); ++ kfree(ref); ++} ++ + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + +- if (--ref->ref) +- return; +- +- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); +- kfree(ref); ++ buffer_ref_release(ref); + buf->private = 0; + } + +@@ -6740,7 +6744,7 @@ static void buffer_pipe_buf_get(struct p + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + +- ref->ref++; ++ refcount_inc(&ref->refcount); + } + + /* Pipe buffer operations for a buffer. */ +@@ -6748,7 +6752,7 @@ static const struct pipe_buf_operations + .can_merge = 0, + .confirm = generic_pipe_buf_confirm, + .release = buffer_pipe_buf_release, +- .steal = generic_pipe_buf_steal, ++ .steal = generic_pipe_buf_nosteal, + .get = buffer_pipe_buf_get, + }; + +@@ -6761,11 +6765,7 @@ static void buffer_spd_release(struct sp + struct buffer_ref *ref = + (struct buffer_ref *)spd->partial[i].private; + +- if (--ref->ref) +- return; +- +- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); +- kfree(ref); ++ buffer_ref_release(ref); + spd->partial[i].private = 0; + } + +@@ -6820,7 +6820,7 @@ tracing_buffers_splice_read(struct file + break; + } + +- ref->ref = 1; ++ refcount_set(&ref->refcount, 1); + ref->buffer = iter->trace_buffer->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); + if (IS_ERR(ref->page)) { diff --git a/queue-4.14/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch b/queue-4.14/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch new file mode 100644 index 00000000000..7e21b6bc6aa --- /dev/null +++ b/queue-4.14/zram-pass-down-the-bvec-we-need-to-read-into-in-the-work-struct.patch @@ -0,0 +1,67 @@ +From e153abc0739ff77bd89c9ba1688cdb963464af97 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= +Date: Thu, 25 Apr 2019 22:23:41 -0700 +Subject: zram: pass down the bvec we need to read into in the work struct +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jérôme Glisse + +commit e153abc0739ff77bd89c9ba1688cdb963464af97 upstream. + +When scheduling work item to read page we need to pass down the proper +bvec struct which points to the page to read into. Before this patch it +uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is +wrong. + +Note that without this patch on arch/kernel where PAGE_SIZE != 4096 +userspace could read random memory through a zram block device (thought +userspace probably would have no control on the address being read). + +Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com +Signed-off-by: Jérôme Glisse +Reviewed-by: Andrew Morton +Reviewed-by: Sergey Senozhatsky +Acked-by: Minchan Kim +Cc: Nitin Gupta +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -488,18 +488,18 @@ struct zram_work { + struct zram *zram; + unsigned long entry; + struct bio *bio; ++ struct bio_vec bvec; + }; + + #if PAGE_SIZE != 4096 + static void zram_sync_read(struct work_struct *work) + { +- struct bio_vec bvec; + struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram *zram = zw->zram; + unsigned long entry = zw->entry; + struct bio *bio = zw->bio; + +- read_from_bdev_async(zram, &bvec, entry, bio); ++ read_from_bdev_async(zram, &zw->bvec, entry, bio); + } + + /* +@@ -512,6 +512,7 @@ static int read_from_bdev_sync(struct zr + { + struct zram_work work; + ++ work.bvec = *bvec; + work.zram = zram; + work.entry = entry; + work.bio = bio;