]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2019 09:11:01 +0000 (11:11 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Apr 2019 09:11:01 +0000 (11:11 +0200)
added patches:
ib-rdmavt-fix-frwr-memory-registration.patch
mips-scall64-o32-fix-indirect-syscall-number-load.patch
sched-numa-fix-a-possible-divide-by-zero.patch
trace-fix-preempt_enable_no_resched-abuse.patch

queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch [new file with mode: 0644]
queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch [new file with mode: 0644]
queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch [new file with mode: 0644]

diff --git a/queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch b/queue-4.9/ib-rdmavt-fix-frwr-memory-registration.patch
new file mode 100644 (file)
index 0000000..23c69da
--- /dev/null
@@ -0,0 +1,83 @@
+From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001
+From: Josh Collier <josh.d.collier@intel.com>
+Date: Mon, 15 Apr 2019 11:34:22 -0700
+Subject: IB/rdmavt: Fix frwr memory registration
+
+From: Josh Collier <josh.d.collier@intel.com>
+
+commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream.
+
+Current implementation was not properly handling frwr memory
+registrations. This was uncovered by commit 27f26cec761das ("xprtrdma:
+Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is
+used for NFS over RDMA, started failing as it was the first ULP to modify
+the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR
+when attempting to perform RDMA Writes to the client.
+
+The fix is to properly capture the true iova, offset, and length in the
+call to ib_map_mr_sg, and then update the iova when processing the
+IB_WR_REG_MEM on the send queue.
+
+Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Josh Collier <josh.d.collier@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/drivers/infiniband/sw/rdmavt/mr.c
++++ b/drivers/infiniband/sw/rdmavt/mr.c
+@@ -497,11 +497,6 @@ static int rvt_set_page(struct ib_mr *ib
+       if (unlikely(mapped_segs == mr->mr.max_segs))
+               return -ENOMEM;
+-      if (mr->mr.length == 0) {
+-              mr->mr.user_base = addr;
+-              mr->mr.iova = addr;
+-      }
+-
+       m = mapped_segs / RVT_SEGSZ;
+       n = mapped_segs % RVT_SEGSZ;
+       mr->mr.map[m]->segs[n].vaddr = (void *)addr;
+@@ -518,17 +513,24 @@ static int rvt_set_page(struct ib_mr *ib
+  * @sg_nents: number of entries in sg
+  * @sg_offset: offset in bytes into sg
+  *
++ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
++ *
+  * Return: number of sg elements mapped to the memory region
+  */
+ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+                 int sg_nents, unsigned int *sg_offset)
+ {
+       struct rvt_mr *mr = to_imr(ibmr);
++      int ret;
+       mr->mr.length = 0;
+       mr->mr.page_shift = PAGE_SHIFT;
+-      return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
+-                            rvt_set_page);
++      ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
++      mr->mr.user_base = ibmr->iova;
++      mr->mr.iova = ibmr->iova;
++      mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
++      mr->mr.length = (size_t)ibmr->length;
++      return ret;
+ }
+ /**
+@@ -559,6 +561,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, s
+       ibmr->rkey = key;
+       mr->mr.lkey = key;
+       mr->mr.access_flags = access;
++      mr->mr.iova = ibmr->iova;
+       atomic_set(&mr->mr.lkey_invalid, 0);
+       return 0;
diff --git a/queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch b/queue-4.9/mips-scall64-o32-fix-indirect-syscall-number-load.patch
new file mode 100644 (file)
index 0000000..d16d140
--- /dev/null
@@ -0,0 +1,53 @@
+From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001
+From: Aurelien Jarno <aurelien@aurel32.net>
+Date: Tue, 9 Apr 2019 16:53:55 +0200
+Subject: MIPS: scall64-o32: Fix indirect syscall number load
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Aurelien Jarno <aurelien@aurel32.net>
+
+commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream.
+
+Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32))
+added indirect syscall detection for O32 processes running on MIPS64,
+but it did not work correctly for big endian kernel/processes. The
+reason is that the syscall number is loaded from ARG1 using the lw
+instruction while this is a 64-bit value, so zero is loaded instead of
+the syscall number.
+
+Fix the code by using the ld instruction instead. When running a 32-bit
+processes on a 64 bit CPU, the values are properly sign-extended, so it
+ensures the value passed to syscall_trace_enter is correct.
+
+Recent systemd versions with seccomp enabled whitelist the getpid
+syscall for their internal  processes (e.g. systemd-journald), but call
+it through syscall(SYS_getpid). This fix therefore allows O32 big endian
+systems with a 64-bit kernel to run recent systemd versions.
+
+Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
+Cc: <stable@vger.kernel.org> # v3.15+
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: James Hogan <jhogan@kernel.org>
+Cc: linux-mips@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/kernel/scall64-o32.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -125,7 +125,7 @@ trace_a_syscall:
+       subu    t1, v0,  __NR_O32_Linux
+       move    a1, v0
+       bnez    t1, 1f /* __NR_syscall at offset 0 */
+-      lw      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
++      ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+       .set    pop
+ 1:    jal     syscall_trace_enter
diff --git a/queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch b/queue-4.9/sched-numa-fix-a-possible-divide-by-zero.patch
new file mode 100644 (file)
index 0000000..dd5a5d2
--- /dev/null
@@ -0,0 +1,53 @@
+From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001
+From: Xie XiuQi <xiexiuqi@huawei.com>
+Date: Sat, 20 Apr 2019 16:34:16 +0800
+Subject: sched/numa: Fix a possible divide-by-zero
+
+From: Xie XiuQi <xiexiuqi@huawei.com>
+
+commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream.
+
+sched_clock_cpu() may not be consistent between CPUs. If a task
+migrates to another CPU, then se.exec_start is set to that CPU's
+rq_clock_task() by update_stats_curr_start(). Specifically, the new
+value might be before the old value due to clock skew.
+
+So then if in numa_get_avg_runtime() the expression:
+
+  'now - p->last_task_numa_placement'
+
+ends up as -1, then the divider '*period + 1' in task_numa_placement()
+is 0 and things go bang. Similar to update_curr(), check if time goes
+backwards to avoid this.
+
+[ peterz: Wrote new changelog. ]
+[ mingo: Tweaked the code comment. ]
+
+Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: cj.chengjian@huawei.com
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1925,6 +1925,10 @@ static u64 numa_get_avg_runtime(struct t
+       if (p->last_task_numa_placement) {
+               delta = runtime - p->last_sum_exec_runtime;
+               *period = now - p->last_task_numa_placement;
++
++              /* Avoid time going backwards, prevent potential divide error: */
++              if (unlikely((s64)*period < 0))
++                      *period = 0;
+       } else {
+               delta = p->se.avg.load_sum / p->se.load.weight;
+               *period = LOAD_AVG_MAX;
index 69b31e5c890474777dab015fb84883dec4e1ff66..95f2086ea1aa932b15528f907d39b41b4d469083 100644 (file)
@@ -1,3 +1,7 @@
 kbuild-simplify-ld-option-implementation.patch
 cifs-do-not-attempt-cifs-operation-on-smb2-rename-error.patch
 tracing-fix-a-memory-leak-by-early-error-exit-in-trace_pid_write.patch
+mips-scall64-o32-fix-indirect-syscall-number-load.patch
+trace-fix-preempt_enable_no_resched-abuse.patch
+ib-rdmavt-fix-frwr-memory-registration.patch
+sched-numa-fix-a-possible-divide-by-zero.patch
diff --git a/queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch b/queue-4.9/trace-fix-preempt_enable_no_resched-abuse.patch
new file mode 100644 (file)
index 0000000..09d6e25
--- /dev/null
@@ -0,0 +1,48 @@
+From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 23 Apr 2019 22:03:18 +0200
+Subject: trace: Fix preempt_enable_no_resched() abuse
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream.
+
+Unless the very next line is schedule(), or implies it, one must not use
+preempt_enable_no_resched(). It can cause a preemption to go missing and
+thereby cause arbitrary delays, breaking the PREEMPT=y invariant.
+
+Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net
+
+Cc: Waiman Long <longman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: the arch/x86 maintainers <x86@kernel.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: stable@vger.kernel.org
+Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ring_buffer.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_b
+       preempt_disable_notrace();
+       time = rb_time_stamp(buffer);
+-      preempt_enable_no_resched_notrace();
++      preempt_enable_notrace();
+       return time;
+ }