From 417f8bc0edcc40e5d9384c3842b962f0aba1d91a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2019 11:11:01 +0200 Subject: [PATCH] 4.9-stable patches added patches: ib-rdmavt-fix-frwr-memory-registration.patch mips-scall64-o32-fix-indirect-syscall-number-load.patch sched-numa-fix-a-possible-divide-by-zero.patch trace-fix-preempt_enable_no_resched-abuse.patch --- ...-rdmavt-fix-frwr-memory-registration.patch | 83 +++++++++++++++++++ ...o32-fix-indirect-syscall-number-load.patch | 53 ++++++++++++ ...d-numa-fix-a-possible-divide-by-zero.patch | 53 ++++++++++++ queue-4.9/series | 4 + ...-fix-preempt_enable_no_resched-abuse.patch | 48 +++++++++++ 5 files changed, 241 insertions(+) create mode 100644 queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch create mode 100644 queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch create mode 100644 queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch create mode 100644 queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch diff --git a/queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch b/queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch new file mode 100644 index 00000000000..23c69da31dc --- /dev/null +++ b/queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch @@ -0,0 +1,83 @@ +From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001 +From: Josh Collier +Date: Mon, 15 Apr 2019 11:34:22 -0700 +Subject: IB/rdmavt: Fix frwr memory registration + +From: Josh Collier + +commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream. + +Current implementation was not properly handling frwr memory +registrations. This was uncovered by commit 27f26cec761das ("xprtrdma: +Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is +used for NFS over RDMA, started failing as it was the first ULP to modify +the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR +when attempting to perform RDMA Writes to the client. + +The fix is to properly capture the true iova, offset, and length in the +call to ib_map_mr_sg, and then update the iova when processing the +IB_WR_REG_MEM on the send queue. + +Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg") +Cc: stable@vger.kernel.org +Reviewed-by: Mike Marciniszyn +Reviewed-by: Dennis Dalessandro +Reviewed-by: Michael J. Ruhl +Signed-off-by: Josh Collier +Signed-off-by: Dennis Dalessandro +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/drivers/infiniband/sw/rdmavt/mr.c ++++ b/drivers/infiniband/sw/rdmavt/mr.c +@@ -497,11 +497,6 @@ static int rvt_set_page(struct ib_mr *ib + if (unlikely(mapped_segs == mr->mr.max_segs)) + return -ENOMEM; + +- if (mr->mr.length == 0) { +- mr->mr.user_base = addr; +- mr->mr.iova = addr; +- } +- + m = mapped_segs / RVT_SEGSZ; + n = mapped_segs % RVT_SEGSZ; + mr->mr.map[m]->segs[n].vaddr = (void *)addr; +@@ -518,17 +513,24 @@ static int rvt_set_page(struct ib_mr *ib + * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg + * ++ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. ++ * + * Return: number of sg elements mapped to the memory region + */ + int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) + { + struct rvt_mr *mr = to_imr(ibmr); ++ int ret; + + mr->mr.length = 0; + mr->mr.page_shift = PAGE_SHIFT; +- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, +- rvt_set_page); ++ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); ++ mr->mr.user_base = ibmr->iova; ++ mr->mr.iova = ibmr->iova; ++ mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; ++ mr->mr.length = (size_t)ibmr->length; ++ return ret; + } + + /** +@@ -559,6 +561,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, s + ibmr->rkey = key; + mr->mr.lkey = key; + mr->mr.access_flags = access; ++ mr->mr.iova = ibmr->iova; + atomic_set(&mr->mr.lkey_invalid, 0); + + return 0; diff --git a/queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch b/queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch new file mode 100644 index 00000000000..d16d1408ff9 --- /dev/null +++ b/queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch @@ -0,0 +1,53 @@ +From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001 +From: Aurelien Jarno +Date: Tue, 9 Apr 2019 16:53:55 +0200 +Subject: MIPS: scall64-o32: Fix indirect syscall number load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Aurelien Jarno + +commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream. + +Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32)) +added indirect syscall detection for O32 processes running on MIPS64, +but it did not work correctly for big endian kernel/processes. The +reason is that the syscall number is loaded from ARG1 using the lw +instruction while this is a 64-bit value, so zero is loaded instead of +the syscall number. + +Fix the code by using the ld instruction instead. When running a 32-bit +processes on a 64 bit CPU, the values are properly sign-extended, so it +ensures the value passed to syscall_trace_enter is correct. + +Recent systemd versions with seccomp enabled whitelist the getpid +syscall for their internal processes (e.g. systemd-journald), but call +it through syscall(SYS_getpid). This fix therefore allows O32 big endian +systems with a 64-bit kernel to run recent systemd versions. + +Signed-off-by: Aurelien Jarno +Cc: # v3.15+ +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paul Burton +Cc: Ralf Baechle +Cc: James Hogan +Cc: linux-mips@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/kernel/scall64-o32.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/mips/kernel/scall64-o32.S ++++ b/arch/mips/kernel/scall64-o32.S +@@ -125,7 +125,7 @@ trace_a_syscall: + subu t1, v0, __NR_O32_Linux + move a1, v0 + bnez t1, 1f /* __NR_syscall at offset 0 */ +- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ ++ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + .set pop + + 1: jal syscall_trace_enter diff --git a/queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch b/queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch new file mode 100644 index 00000000000..dd5a5d2146e --- /dev/null +++ b/queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch @@ -0,0 +1,53 @@ +From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001 +From: Xie XiuQi +Date: Sat, 20 Apr 2019 16:34:16 +0800 +Subject: sched/numa: Fix a possible divide-by-zero + +From: Xie XiuQi + +commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream. + +sched_clock_cpu() may not be consistent between CPUs. If a task +migrates to another CPU, then se.exec_start is set to that CPU's +rq_clock_task() by update_stats_curr_start(). Specifically, the new +value might be before the old value due to clock skew. + +So then if in numa_get_avg_runtime() the expression: + + 'now - p->last_task_numa_placement' + +ends up as -1, then the divider '*period + 1' in task_numa_placement() +is 0 and things go bang. Similar to update_curr(), check if time goes +backwards to avoid this. + +[ peterz: Wrote new changelog. ] +[ mingo: Tweaked the code comment. ] + +Signed-off-by: Xie XiuQi +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: cj.chengjian@huawei.com +Cc: +Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1925,6 +1925,10 @@ static u64 numa_get_avg_runtime(struct t + if (p->last_task_numa_placement) { + delta = runtime - p->last_sum_exec_runtime; + *period = now - p->last_task_numa_placement; ++ ++ /* Avoid time going backwards, prevent potential divide error: */ ++ if (unlikely((s64)*period < 0)) ++ *period = 0; + } else { + delta = p->se.avg.load_sum / p->se.load.weight; + *period = LOAD_AVG_MAX; diff --git a/queue-4.9/series b/queue-4.9/series index 69b31e5c890..95f2086ea1a 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -1,3 +1,7 @@ kbuild-simplify-ld-option-implementation.patch cifs-do-not-attempt-cifs-operation-on-smb2-rename-error.patch tracing-fix-a-memory-leak-by-early-error-exit-in-trace_pid_write.patch +mips-scall64-o32-fix-indirect-syscall-number-load.patch +trace-fix-preempt_enable_no_resched-abuse.patch +ib-rdmavt-fix-frwr-memory-registration.patch +sched-numa-fix-a-possible-divide-by-zero.patch diff --git a/queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch b/queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch new file mode 100644 index 00000000000..09d6e254722 --- /dev/null +++ b/queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch @@ -0,0 +1,48 @@ +From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 23 Apr 2019 22:03:18 +0200 +Subject: trace: Fix preempt_enable_no_resched() abuse + +From: Peter Zijlstra + +commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream. + +Unless the very next line is schedule(), or implies it, one must not use +preempt_enable_no_resched(). It can cause a preemption to go missing and +thereby cause arbitrary delays, breaking the PREEMPT=y invariant. + +Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net + +Cc: Waiman Long +Cc: Linus Torvalds +Cc: Ingo Molnar +Cc: Will Deacon +Cc: Thomas Gleixner +Cc: the arch/x86 maintainers +Cc: Davidlohr Bueso +Cc: Tim Chen +Cc: huang ying +Cc: Roman Gushchin +Cc: Alexei Starovoitov +Cc: Daniel Borkmann +Cc: stable@vger.kernel.org +Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_b + + preempt_disable_notrace(); + time = rb_time_stamp(buffer); +- preempt_enable_no_resched_notrace(); ++ preempt_enable_notrace(); + + return time; + } -- 2.47.2