]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 30 Apr 2020 15:09:31 +0000 (17:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 30 Apr 2020 15:09:31 +0000 (17:09 +0200)
added patches:
arm-dts-bcm283x-disable-dsi0-node.patch
blk-iocost-fix-error-on-iocost_ioc_vrate_adj.patch
bpf-x86_32-fix-clobbering-of-dst-for-bpf_jset.patch
bpf-x86_32-fix-incorrect-encoding-in-bpf_ldx-zero-extension.patch
bpf-x86_32-fix-logic-error-in-bpf_ldx-zero-extension.patch
cpumap-avoid-warning-when-config_debug_per_cpu_maps-is-enabled.patch
net-mlx5-fix-failing-fw-tracer-allocation-on-s390.patch
net-mlx5e-don-t-trigger-irq-multiple-times-on-xsk-wakeup-to-avoid-wq-overruns.patch
net-mlx5e-get-the-latest-values-from-counters-in-switchdev-mode.patch
netfilter-nat-fix-error-handling-upon-registering-inet-hook.patch
pci-add-acs-quirk-for-zhaoxin-multi-function-devices.patch
pci-add-acs-quirk-for-zhaoxin-root-downstream-ports.patch
pci-avoid-asmedia-xhci-usb-pme-from-d0-defect.patch
pci-move-apex-edge-tpu-class-quirk-to-fix-bar-assignment.patch
perf-core-fix-parent-pid-tid-in-task-exit-events.patch
pm-sleep-core-switch-back-to-async_schedule_dev.patch
s390-pci-do-not-set-affinity-for-floating-irqs.patch
sched-core-fix-reset-on-fork-from-rt-with-uclamp.patch
svcrdma-fix-leak-of-svc_rdma_recv_ctxt-objects.patch
svcrdma-fix-trace-point-use-after-free-race.patch
um-ensure-make-arch-um-mrproper-removes-arch-subarch-include-generated.patch

22 files changed:
queue-5.4/arm-dts-bcm283x-disable-dsi0-node.patch [new file with mode: 0644]
queue-5.4/blk-iocost-fix-error-on-iocost_ioc_vrate_adj.patch [new file with mode: 0644]
queue-5.4/bpf-x86_32-fix-clobbering-of-dst-for-bpf_jset.patch [new file with mode: 0644]
queue-5.4/bpf-x86_32-fix-incorrect-encoding-in-bpf_ldx-zero-extension.patch [new file with mode: 0644]
queue-5.4/bpf-x86_32-fix-logic-error-in-bpf_ldx-zero-extension.patch [new file with mode: 0644]
queue-5.4/cpumap-avoid-warning-when-config_debug_per_cpu_maps-is-enabled.patch [new file with mode: 0644]
queue-5.4/net-mlx5-fix-failing-fw-tracer-allocation-on-s390.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-don-t-trigger-irq-multiple-times-on-xsk-wakeup-to-avoid-wq-overruns.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-get-the-latest-values-from-counters-in-switchdev-mode.patch [new file with mode: 0644]
queue-5.4/netfilter-nat-fix-error-handling-upon-registering-inet-hook.patch [new file with mode: 0644]
queue-5.4/pci-add-acs-quirk-for-zhaoxin-multi-function-devices.patch [new file with mode: 0644]
queue-5.4/pci-add-acs-quirk-for-zhaoxin-root-downstream-ports.patch [new file with mode: 0644]
queue-5.4/pci-avoid-asmedia-xhci-usb-pme-from-d0-defect.patch [new file with mode: 0644]
queue-5.4/pci-move-apex-edge-tpu-class-quirk-to-fix-bar-assignment.patch [new file with mode: 0644]
queue-5.4/perf-core-fix-parent-pid-tid-in-task-exit-events.patch [new file with mode: 0644]
queue-5.4/pm-sleep-core-switch-back-to-async_schedule_dev.patch [new file with mode: 0644]
queue-5.4/s390-pci-do-not-set-affinity-for-floating-irqs.patch [new file with mode: 0644]
queue-5.4/sched-core-fix-reset-on-fork-from-rt-with-uclamp.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/svcrdma-fix-leak-of-svc_rdma_recv_ctxt-objects.patch [new file with mode: 0644]
queue-5.4/svcrdma-fix-trace-point-use-after-free-race.patch [new file with mode: 0644]
queue-5.4/um-ensure-make-arch-um-mrproper-removes-arch-subarch-include-generated.patch [new file with mode: 0644]

diff --git a/queue-5.4/arm-dts-bcm283x-disable-dsi0-node.patch b/queue-5.4/arm-dts-bcm283x-disable-dsi0-node.patch
new file mode 100644 (file)
index 0000000..5ab8fb2
--- /dev/null
@@ -0,0 +1,32 @@
+From 90444b958461a5f8fc299ece0fe17eab15cba1e1 Mon Sep 17 00:00:00 2001
+From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+Date: Wed, 15 Apr 2020 16:42:33 +0200
+Subject: ARM: dts: bcm283x: Disable dsi0 node
+
+From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+
+commit 90444b958461a5f8fc299ece0fe17eab15cba1e1 upstream.
+
+Since its inception the module was meant to be disabled by default, but
+the original commit failed to add the relevant property.
+
+Fixes: 4aba4cf82054 ("ARM: dts: bcm2835: Add the DSI module nodes and clocks")
+Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+Reviewed-by: Eric Anholt <eric@anholt.net>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/bcm283x.dtsi |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/boot/dts/bcm283x.dtsi
++++ b/arch/arm/boot/dts/bcm283x.dtsi
+@@ -488,6 +488,7 @@
+                                            "dsi0_ddr2",
+                                            "dsi0_ddr";
++                      status = "disabled";
+               };
+               thermal: thermal@7e212000 {
diff --git a/queue-5.4/blk-iocost-fix-error-on-iocost_ioc_vrate_adj.patch b/queue-5.4/blk-iocost-fix-error-on-iocost_ioc_vrate_adj.patch
new file mode 100644 (file)
index 0000000..21ad7b8
--- /dev/null
@@ -0,0 +1,79 @@
+From d6c8e949a35d6906d6c03a50e9a9cdf4e494528a Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 21 Apr 2020 09:07:55 -0400
+Subject: blk-iocost: Fix error on iocost_ioc_vrate_adj
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit d6c8e949a35d6906d6c03a50e9a9cdf4e494528a upstream.
+
+Systemtap 4.2 is unable to correctly interpret the "u32 (*missed_ppm)[2]"
+argument of the iocost_ioc_vrate_adj trace entry defined in
+include/trace/events/iocost.h leading to the following error:
+
+  /tmp/stapAcz0G0/stap_c89c58b83cea1724e26395efa9ed4939_6321_aux_6.c:78:8:
+  error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token
+   , u32[]* __tracepoint_arg_missed_ppm
+
+That argument type is indeed rather complex and hard to read. Looking
+at block/blk-iocost.c. It is just a 2-entry u32 array. By simplifying
+the argument to a simple "u32 *missed_ppm" and adjusting the trace
+entry accordingly, the compilation error was gone.
+
+Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
+Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-iocost.c            |    4 ++--
+ include/trace/events/iocost.h |    6 +++---
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -1594,7 +1594,7 @@ skip_surplus_transfers:
+                                     vrate_min, vrate_max);
+               }
+-              trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct,
++              trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct,
+                                          nr_lagging, nr_shortages,
+                                          nr_surpluses);
+@@ -1603,7 +1603,7 @@ skip_surplus_transfers:
+                       ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
+       } else if (ioc->busy_level != prev_busy_level || nr_lagging) {
+               trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
+-                                         &missed_ppm, rq_wait_pct, nr_lagging,
++                                         missed_ppm, rq_wait_pct, nr_lagging,
+                                          nr_shortages, nr_surpluses);
+       }
+--- a/include/trace/events/iocost.h
++++ b/include/trace/events/iocost.h
+@@ -130,7 +130,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_i
+ TRACE_EVENT(iocost_ioc_vrate_adj,
+-      TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 (*missed_ppm)[2],
++      TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm,
+               u32 rq_wait_pct, int nr_lagging, int nr_shortages,
+               int nr_surpluses),
+@@ -155,8 +155,8 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
+               __entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
+               __entry->new_vrate = new_vrate;
+               __entry->busy_level = ioc->busy_level;
+-              __entry->read_missed_ppm = (*missed_ppm)[READ];
+-              __entry->write_missed_ppm = (*missed_ppm)[WRITE];
++              __entry->read_missed_ppm = missed_ppm[READ];
++              __entry->write_missed_ppm = missed_ppm[WRITE];
+               __entry->rq_wait_pct = rq_wait_pct;
+               __entry->nr_lagging = nr_lagging;
+               __entry->nr_shortages = nr_shortages;
diff --git a/queue-5.4/bpf-x86_32-fix-clobbering-of-dst-for-bpf_jset.patch b/queue-5.4/bpf-x86_32-fix-clobbering-of-dst-for-bpf_jset.patch
new file mode 100644 (file)
index 0000000..9b1f6c8
--- /dev/null
@@ -0,0 +1,86 @@
+From 50fe7ebb6475711c15b3397467e6424e20026d94 Mon Sep 17 00:00:00 2001
+From: Luke Nelson <lukenels@cs.washington.edu>
+Date: Wed, 22 Apr 2020 10:36:30 -0700
+Subject: bpf, x86_32: Fix clobbering of dst for BPF_JSET
+
+From: Luke Nelson <lukenels@cs.washington.edu>
+
+commit 50fe7ebb6475711c15b3397467e6424e20026d94 upstream.
+
+The current JIT clobbers the destination register for BPF_JSET BPF_X
+and BPF_K by using "and" and "or" instructions. This is fine when the
+destination register is a temporary loaded from a register stored on
+the stack but not otherwise.
+
+This patch fixes the problem (for both BPF_K and BPF_X) by always loading
+the destination register into temporaries since BPF_JSET should not
+modify the destination register.
+
+This bug may not be currently triggerable as BPF_REG_AX is the only
+register not stored on the stack and the verifier uses it in a limited
+way.
+
+Fixes: 03f5781be2c7b ("bpf, x86_32: add eBPF JIT compiler for ia32")
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Wang YanQing <udknight@gmail.com>
+Link: https://lore.kernel.org/bpf/20200422173630.8351-2-luke.r.nels@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/net/bpf_jit_comp32.c |   22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -2015,8 +2015,8 @@ static int do_jit(struct bpf_prog *bpf_p
+               case BPF_JMP | BPF_JSET | BPF_X:
+               case BPF_JMP32 | BPF_JSET | BPF_X: {
+                       bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
+-                      u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+-                      u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
++                      u8 dreg_lo = IA32_EAX;
++                      u8 dreg_hi = IA32_EDX;
+                       u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+                       u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+@@ -2028,6 +2028,13 @@ static int do_jit(struct bpf_prog *bpf_p
+                                             add_2reg(0x40, IA32_EBP,
+                                                      IA32_EDX),
+                                             STACK_VAR(dst_hi));
++                      } else {
++                              /* mov dreg_lo,dst_lo */
++                              EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
++                              if (is_jmp64)
++                                      /* mov dreg_hi,dst_hi */
++                                      EMIT2(0x89,
++                                            add_2reg(0xC0, dreg_hi, dst_hi));
+                       }
+                       if (sstk) {
+@@ -2052,8 +2059,8 @@ static int do_jit(struct bpf_prog *bpf_p
+               case BPF_JMP | BPF_JSET | BPF_K:
+               case BPF_JMP32 | BPF_JSET | BPF_K: {
+                       bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
+-                      u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+-                      u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
++                      u8 dreg_lo = IA32_EAX;
++                      u8 dreg_hi = IA32_EDX;
+                       u8 sreg_lo = IA32_ECX;
+                       u8 sreg_hi = IA32_EBX;
+                       u32 hi;
+@@ -2066,6 +2073,13 @@ static int do_jit(struct bpf_prog *bpf_p
+                                             add_2reg(0x40, IA32_EBP,
+                                                      IA32_EDX),
+                                             STACK_VAR(dst_hi));
++                      } else {
++                              /* mov dreg_lo,dst_lo */
++                              EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
++                              if (is_jmp64)
++                                      /* mov dreg_hi,dst_hi */
++                                      EMIT2(0x89,
++                                            add_2reg(0xC0, dreg_hi, dst_hi));
+                       }
+                       /* mov ecx,imm32 */
diff --git a/queue-5.4/bpf-x86_32-fix-incorrect-encoding-in-bpf_ldx-zero-extension.patch b/queue-5.4/bpf-x86_32-fix-incorrect-encoding-in-bpf_ldx-zero-extension.patch
new file mode 100644 (file)
index 0000000..019069e
--- /dev/null
@@ -0,0 +1,55 @@
+From 5fa9a98fb10380e48a398998cd36a85e4ef711d6 Mon Sep 17 00:00:00 2001
+From: Luke Nelson <lukenels@cs.washington.edu>
+Date: Wed, 22 Apr 2020 10:36:29 -0700
+Subject: bpf, x86_32: Fix incorrect encoding in BPF_LDX zero-extension
+
+From: Luke Nelson <lukenels@cs.washington.edu>
+
+commit 5fa9a98fb10380e48a398998cd36a85e4ef711d6 upstream.
+
+The current JIT uses the following sequence to zero-extend into the
+upper 32 bits of the destination register for BPF_LDX BPF_{B,H,W},
+when the destination register is not on the stack:
+
+  EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
+
+The problem is that C7 /0 encodes a MOV instruction that requires a 4-byte
+immediate; the current code emits only 1 byte of the immediate. This
+means that the first 3 bytes of the next instruction will be treated as
+the rest of the immediate, breaking the stream of instructions.
+
+This patch fixes the problem by instead emitting "xor dst_hi,dst_hi"
+to clear the upper 32 bits. This fixes the problem and is more efficient
+than using MOV to load a zero immediate.
+
+This bug may not be currently triggerable as BPF_REG_AX is the only
+register not stored on the stack and the verifier uses it in a limited
+way, and the verifier implements a zero-extension optimization. But the
+JIT should avoid emitting incorrect encodings regardless.
+
+Fixes: 03f5781be2c7b ("bpf, x86_32: add eBPF JIT compiler for ia32")
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Reviewed-by: H. Peter Anvin (Intel) <hpa@zytor.com>
+Acked-by: Wang YanQing <udknight@gmail.com>
+Link: https://lore.kernel.org/bpf/20200422173630.8351-1-luke.r.nels@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/net/bpf_jit_comp32.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -1854,7 +1854,9 @@ static int do_jit(struct bpf_prog *bpf_p
+                                             STACK_VAR(dst_hi));
+                                       EMIT(0x0, 4);
+                               } else {
+-                                      EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
++                                      /* xor dst_hi,dst_hi */
++                                      EMIT2(0x33,
++                                            add_2reg(0xC0, dst_hi, dst_hi));
+                               }
+                               break;
+                       case BPF_DW:
diff --git a/queue-5.4/bpf-x86_32-fix-logic-error-in-bpf_ldx-zero-extension.patch b/queue-5.4/bpf-x86_32-fix-logic-error-in-bpf_ldx-zero-extension.patch
new file mode 100644 (file)
index 0000000..c84ad0f
--- /dev/null
@@ -0,0 +1,33 @@
+From 5ca1ca01fae1e90f8d010eb1d83374f28dc11ee6 Mon Sep 17 00:00:00 2001
+From: Wang YanQing <udknight@gmail.com>
+Date: Thu, 23 Apr 2020 13:06:37 +0800
+Subject: bpf, x86_32: Fix logic error in BPF_LDX zero-extension
+
+From: Wang YanQing <udknight@gmail.com>
+
+commit 5ca1ca01fae1e90f8d010eb1d83374f28dc11ee6 upstream.
+
+When verifier_zext is true, we don't need to emit code
+for zero-extension.
+
+Fixes: 836256bf5f37 ("x32: bpf: eliminate zero extension code-gen")
+Signed-off-by: Wang YanQing <udknight@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/bpf/20200423050637.GA4029@udknight
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/net/bpf_jit_comp32.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -1847,7 +1847,7 @@ static int do_jit(struct bpf_prog *bpf_p
+                       case BPF_B:
+                       case BPF_H:
+                       case BPF_W:
+-                              if (!bpf_prog->aux->verifier_zext)
++                              if (bpf_prog->aux->verifier_zext)
+                                       break;
+                               if (dstk) {
+                                       EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
diff --git a/queue-5.4/cpumap-avoid-warning-when-config_debug_per_cpu_maps-is-enabled.patch b/queue-5.4/cpumap-avoid-warning-when-config_debug_per_cpu_maps-is-enabled.patch
new file mode 100644 (file)
index 0000000..7657757
--- /dev/null
@@ -0,0 +1,46 @@
+From bc23d0e3f717ced21fbfacab3ab887d55e5ba367 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 16 Apr 2020 10:31:20 +0200
+Subject: cpumap: Avoid warning when CONFIG_DEBUG_PER_CPU_MAPS is enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+commit bc23d0e3f717ced21fbfacab3ab887d55e5ba367 upstream.
+
+When the kernel is built with CONFIG_DEBUG_PER_CPU_MAPS, the cpumap code
+can trigger a spurious warning if CONFIG_CPUMASK_OFFSTACK is also set. This
+happens because in this configuration, NR_CPUS can be larger than
+nr_cpumask_bits, so the initial check in cpu_map_alloc() is not sufficient
+to guard against hitting the warning in cpumask_check().
+
+Fix this by explicitly checking the supplied key against the
+nr_cpumask_bits variable before calling cpu_possible().
+
+Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Tested-by: Xiumei Mu <xmu@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Song Liu <songliubraving@fb.com>
+Link: https://lore.kernel.org/bpf/20200416083120.453718-1-toke@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/cpumap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -486,7 +486,7 @@ static int cpu_map_update_elem(struct bp
+               return -EOVERFLOW;
+       /* Make sure CPU is a valid possible cpu */
+-      if (!cpu_possible(key_cpu))
++      if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
+               return -ENODEV;
+       if (qsize == 0) {
diff --git a/queue-5.4/net-mlx5-fix-failing-fw-tracer-allocation-on-s390.patch b/queue-5.4/net-mlx5-fix-failing-fw-tracer-allocation-on-s390.patch
new file mode 100644 (file)
index 0000000..7744e05
--- /dev/null
@@ -0,0 +1,56 @@
+From a019b36123aec9700b21ae0724710f62928a8bc1 Mon Sep 17 00:00:00 2001
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+Date: Thu, 9 Apr 2020 09:46:20 +0200
+Subject: net/mlx5: Fix failing fw tracer allocation on s390
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+commit a019b36123aec9700b21ae0724710f62928a8bc1 upstream.
+
+On s390 FORCE_MAX_ZONEORDER is 9 instead of 11, thus a larger kzalloc()
+allocation as done for the firmware tracer will always fail.
+
+Looking at mlx5_fw_tracer_save_trace(), it is actually the driver itself
+that copies the debug data into the trace array and there is no need for
+the allocation to be contiguous in physical memory. We can therefor use
+kvzalloc() instead of kzalloc() and get rid of the large contiguous
+allcoation.
+
+Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic")
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+@@ -935,7 +935,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_cr
+               return NULL;
+       }
+-      tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
++      tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL);
+       if (!tracer)
+               return ERR_PTR(-ENOMEM);
+@@ -982,7 +982,7 @@ destroy_workqueue:
+       tracer->dev = NULL;
+       destroy_workqueue(tracer->work_queue);
+ free_tracer:
+-      kfree(tracer);
++      kvfree(tracer);
+       return ERR_PTR(err);
+ }
+@@ -1061,7 +1061,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_
+       mlx5_fw_tracer_destroy_log_buf(tracer);
+       flush_workqueue(tracer->work_queue);
+       destroy_workqueue(tracer->work_queue);
+-      kfree(tracer);
++      kvfree(tracer);
+ }
+ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
diff --git a/queue-5.4/net-mlx5e-don-t-trigger-irq-multiple-times-on-xsk-wakeup-to-avoid-wq-overruns.patch b/queue-5.4/net-mlx5e-don-t-trigger-irq-multiple-times-on-xsk-wakeup-to-avoid-wq-overruns.patch
new file mode 100644 (file)
index 0000000..aebb76b
--- /dev/null
@@ -0,0 +1,111 @@
+From e7e0004abdd6f83ae4be5613b29ed396beff576c Mon Sep 17 00:00:00 2001
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Tue, 11 Feb 2020 16:02:35 +0200
+Subject: net/mlx5e: Don't trigger IRQ multiple times on XSK wakeup to avoid WQ overruns
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+commit e7e0004abdd6f83ae4be5613b29ed396beff576c upstream.
+
+XSK wakeup function triggers NAPI by posting a NOP WQE to a special XSK
+ICOSQ. When the application floods the driver with wakeup requests by
+calling sendto() in a certain pattern that ends up in mlx5e_trigger_irq,
+the XSK ICOSQ may overflow.
+
+Multiple NOPs are not required and won't accelerate the process, so
+avoid posting a second NOP if there is one already on the way. This way
+we also avoid increasing the queue size (which might not help anyway).
+
+Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h        |    3 ++-
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c |    3 +++
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c     |    8 +++++---
+ drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c   |    6 +++++-
+ 4 files changed, 15 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -367,6 +367,7 @@ enum {
+       MLX5E_SQ_STATE_AM,
+       MLX5E_SQ_STATE_TLS,
+       MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
++      MLX5E_SQ_STATE_PENDING_XSK_TX,
+ };
+ struct mlx5e_sq_wqe_info {
+@@ -948,7 +949,7 @@ void mlx5e_page_release_dynamic(struct m
+ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+-void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
++int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
+ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
+ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+@@ -33,6 +33,9 @@ int mlx5e_xsk_wakeup(struct net_device *
+               if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state)))
+                       return 0;
++              if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->xskicosq.state))
++                      return 0;
++
+               spin_lock(&c->xskicosq_lock);
+               mlx5e_trigger_irq(&c->xskicosq);
+               spin_unlock(&c->xskicosq_lock);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -587,7 +587,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq
+       return !!err;
+ }
+-void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
++int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
+ {
+       struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
+       struct mlx5_cqe64 *cqe;
+@@ -595,11 +595,11 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *
+       int i;
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+-              return;
++              return 0;
+       cqe = mlx5_cqwq_get_cqe(&cq->wq);
+       if (likely(!cqe))
+-              return;
++              return 0;
+       /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+        * otherwise a cq overrun may occur
+@@ -646,6 +646,8 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *
+       sq->cc = sqcc;
+       mlx5_cqwq_update_db_record(&cq->wq);
++
++      return i;
+ }
+ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+@@ -145,7 +145,11 @@ int mlx5e_napi_poll(struct napi_struct *
+       busy |= rq->post_wqes(rq);
+       if (xsk_open) {
+-              mlx5e_poll_ico_cq(&c->xskicosq.cq);
++              if (mlx5e_poll_ico_cq(&c->xskicosq.cq))
++                      /* Don't clear the flag if nothing was polled to prevent
++                       * queueing more WQEs and overflowing XSKICOSQ.
++                       */
++                      clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->xskicosq.state);
+               busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+               busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
+       }
diff --git a/queue-5.4/net-mlx5e-get-the-latest-values-from-counters-in-switchdev-mode.patch b/queue-5.4/net-mlx5e-get-the-latest-values-from-counters-in-switchdev-mode.patch
new file mode 100644 (file)
index 0000000..b7fb7f2
--- /dev/null
@@ -0,0 +1,53 @@
+From dcdf4ce0ff4ba206fc362e149c8ae81d6a2f849c Mon Sep 17 00:00:00 2001
+From: Zhu Yanjun <yanjunz@mellanox.com>
+Date: Wed, 8 Apr 2020 14:51:52 +0800
+Subject: net/mlx5e: Get the latest values from counters in switchdev mode
+
+From: Zhu Yanjun <yanjunz@mellanox.com>
+
+commit dcdf4ce0ff4ba206fc362e149c8ae81d6a2f849c upstream.
+
+In the switchdev mode, when running "cat
+/sys/class/net/NIC/statistics/tx_packets", the ppcnt register is
+accessed to get the latest values. But currently this command can
+not get the correct values from ppcnt.
+
+From firmware manual, before getting the 802_3 counters, the 802_3
+data layout should be set to the ppcnt register.
+
+When the command "cat /sys/class/net/NIC/statistics/tx_packets" is
+run, before updating 802_3 data layout with ppcnt register, the
+monitor counters are tested. The test result will decide the
+802_3 data layout is updated or not.
+
+Actually the monitor counters do not support to monitor rx/tx
+stats of 802_3 in switchdev mode. So the rx/tx counters change
+will not trigger monitor counters. So the 802_3 data layout will
+not be updated in ppcnt register. Finally this command can not get
+the latest values from ppcnt register with 802_3 data layout.
+
+Fixes: 5c7e8bbb0257 ("net/mlx5e: Use monitor counters for update stats")
+Signed-off-by: Zhu Yanjun <yanjunz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3579,7 +3579,12 @@ mlx5e_get_stats(struct net_device *dev,
+       struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+       struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+-      if (!mlx5e_monitor_counter_supported(priv)) {
++      /* In switchdev mode, monitor counters doesn't monitor
++       * rx/tx stats of 802_3. The update stats mechanism
++       * should keep the 802_3 layout counters updated
++       */
++      if (!mlx5e_monitor_counter_supported(priv) ||
++          mlx5e_is_uplink_rep(priv)) {
+               /* update HW stats in background for next time */
+               mlx5e_queue_update_stats(priv);
+       }
diff --git a/queue-5.4/netfilter-nat-fix-error-handling-upon-registering-inet-hook.patch b/queue-5.4/netfilter-nat-fix-error-handling-upon-registering-inet-hook.patch
new file mode 100644 (file)
index 0000000..dc626e2
--- /dev/null
@@ -0,0 +1,84 @@
+From b4faef1739dd1f3b3981b8bf173a2266ea86b1eb Mon Sep 17 00:00:00 2001
+From: Hillf Danton <hdanton@sina.com>
+Date: Sat, 18 Apr 2020 16:28:32 +0800
+Subject: netfilter: nat: fix error handling upon registering inet hook
+
+From: Hillf Danton <hdanton@sina.com>
+
+commit b4faef1739dd1f3b3981b8bf173a2266ea86b1eb upstream.
+
+A case of warning was reported by syzbot.
+
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 19934 at net/netfilter/nf_nat_core.c:1106
+nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106
+Kernel panic - not syncing: panic_on_warn set ...
+CPU: 0 PID: 19934 Comm: syz-executor.5 Not tainted 5.6.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x188/0x20d lib/dump_stack.c:118
+ panic+0x2e3/0x75c kernel/panic.c:221
+ __warn.cold+0x2f/0x35 kernel/panic.c:582
+ report_bug+0x27b/0x2f0 lib/bug.c:195
+ fixup_bug arch/x86/kernel/traps.c:175 [inline]
+ fixup_bug arch/x86/kernel/traps.c:170 [inline]
+ do_error_trap+0x12b/0x220 arch/x86/kernel/traps.c:267
+ do_invalid_op+0x32/0x40 arch/x86/kernel/traps.c:286
+ invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
+RIP: 0010:nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106
+Code: ff df 48 c1 ea 03 80 3c 02 00 75 75 48 8b 44 24 10 4c 89 ef 48 c7 00 00 00 00 00 e8 e8 f8 53 fb e9 4d fe ff ff e8 ee 9c 16 fb <0f> 0b e9 41 fe ff ff e8 e2 45 54 fb e9 b5 fd ff ff 48 8b 7c 24 20
+RSP: 0018:ffffc90005487208 EFLAGS: 00010246
+RAX: 0000000000040000 RBX: 0000000000000004 RCX: ffffc9001444a000
+RDX: 0000000000040000 RSI: ffffffff865c94a2 RDI: 0000000000000005
+RBP: ffff88808b5cf000 R08: ffff8880a2620140 R09: fffffbfff14bcd79
+R10: ffffc90005487208 R11: fffffbfff14bcd78 R12: 0000000000000000
+R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000
+ nf_nat_ipv6_unregister_fn net/netfilter/nf_nat_proto.c:1017 [inline]
+ nf_nat_inet_register_fn net/netfilter/nf_nat_proto.c:1038 [inline]
+ nf_nat_inet_register_fn+0xfc/0x140 net/netfilter/nf_nat_proto.c:1023
+ nf_tables_register_hook net/netfilter/nf_tables_api.c:224 [inline]
+ nf_tables_addchain.constprop.0+0x82e/0x13c0 net/netfilter/nf_tables_api.c:1981
+ nf_tables_newchain+0xf68/0x16a0 net/netfilter/nf_tables_api.c:2235
+ nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433
+ nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline]
+ nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561
+ netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
+ netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
+ netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
+ sock_sendmsg_nosec net/socket.c:652 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:672
+ ____sys_sendmsg+0x6bf/0x7e0 net/socket.c:2362
+ ___sys_sendmsg+0x100/0x170 net/socket.c:2416
+ __sys_sendmsg+0xec/0x1b0 net/socket.c:2449
+ do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
+ entry_SYSCALL_64_after_hwframe+0x49/0xb3
+
+and to quiesce it, unregister NFPROTO_IPV6 hook instead of NFPROTO_INET
+in case of failing to register NFPROTO_IPV4 hook.
+
+Reported-by: syzbot <syzbot+33e06702fd6cffc24c40@syzkaller.appspotmail.com>
+Fixes: d164385ec572 ("netfilter: nat: add inet family nat support")
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Hillf Danton <hdanton@sina.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_nat_proto.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nf_nat_proto.c
++++ b/net/netfilter/nf_nat_proto.c
+@@ -1035,8 +1035,8 @@ int nf_nat_inet_register_fn(struct net *
+       ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
+                                ARRAY_SIZE(nf_nat_ipv4_ops));
+       if (ret)
+-              nf_nat_ipv6_unregister_fn(net, ops);
+-
++              nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
++                                      ARRAY_SIZE(nf_nat_ipv6_ops));
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
diff --git a/queue-5.4/pci-add-acs-quirk-for-zhaoxin-multi-function-devices.patch b/queue-5.4/pci-add-acs-quirk-for-zhaoxin-multi-function-devices.patch
new file mode 100644 (file)
index 0000000..990d832
--- /dev/null
@@ -0,0 +1,35 @@
+From 0325837c51cb7c9a5bd3e354ac0c0cda0667d50e Mon Sep 17 00:00:00 2001
+From: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+Date: Fri, 27 Mar 2020 17:11:47 +0800
+Subject: PCI: Add ACS quirk for Zhaoxin multi-function devices
+
+From: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+
+commit 0325837c51cb7c9a5bd3e354ac0c0cda0667d50e upstream.
+
+Some Zhaoxin endpoints are implemented as multi-function devices without an
+ACS capability, but they actually don't support peer-to-peer transactions.
+Add ACS quirks to declare DMA isolation.
+
+Link: https://lore.kernel.org/r/20200327091148.5190-3-RaymondPang-oc@zhaoxin.com
+Signed-off-by: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/quirks.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -4759,6 +4759,10 @@ static const struct pci_dev_acs_enabled
+       { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
+       /* Amazon Annapurna Labs */
+       { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
++      /* Zhaoxin multi-function devices */
++      { PCI_VENDOR_ID_ZHAOXIN, 0x3038, pci_quirk_mf_endpoint_acs },
++      { PCI_VENDOR_ID_ZHAOXIN, 0x3104, pci_quirk_mf_endpoint_acs },
++      { PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
+       { 0 }
+ };
diff --git a/queue-5.4/pci-add-acs-quirk-for-zhaoxin-root-downstream-ports.patch b/queue-5.4/pci-add-acs-quirk-for-zhaoxin-root-downstream-ports.patch
new file mode 100644 (file)
index 0000000..9d57fcc
--- /dev/null
@@ -0,0 +1,64 @@
+From 299bd044a6f332b4a6c8f708575c27cad70a35c1 Mon Sep 17 00:00:00 2001
+From: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+Date: Fri, 27 Mar 2020 17:11:48 +0800
+Subject: PCI: Add ACS quirk for Zhaoxin Root/Downstream Ports
+
+From: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+
+commit 299bd044a6f332b4a6c8f708575c27cad70a35c1 upstream.
+
+Many Zhaoxin Root Ports and Switch Downstream Ports do provide ACS-like
+capability but have no ACS Capability Structure.  Peer-to-Peer transactions
+could be blocked between these ports, so add quirk so devices behind them
+could be assigned to different IOMMU group.
+
+Link: https://lore.kernel.org/r/20200327091148.5190-4-RaymondPang-oc@zhaoxin.com
+Signed-off-by: Raymond Pang <RaymondPang-oc@zhaoxin.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/quirks.c |   25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -4353,6 +4353,29 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_C
+                        quirk_chelsio_T5_disable_root_port_attributes);
+ /*
++ * Many Zhaoxin Root Ports and Switch Downstream Ports have no ACS capability.
++ * But the implementation could block peer-to-peer transactions between them
++ * and provide ACS-like functionality.
++ */
++static int  pci_quirk_zhaoxin_pcie_ports_acs(struct pci_dev *dev, u16 acs_flags)
++{
++      if (!pci_is_pcie(dev) ||
++          ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) &&
++           (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM)))
++              return -ENOTTY;
++
++      switch (dev->device) {
++      case 0x0710 ... 0x071e:
++      case 0x0721:
++      case 0x0723 ... 0x0732:
++              return pci_acs_ctrl_enabled(acs_flags,
++                      PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
++      }
++
++      return false;
++}
++
++/*
+  * AMD has indicated that the devices below do not support peer-to-peer
+  * in any system where they are found in the southbridge with an AMD
+  * IOMMU in the system.  Multifunction devices that do not support
+@@ -4763,6 +4786,8 @@ static const struct pci_dev_acs_enabled
+       { PCI_VENDOR_ID_ZHAOXIN, 0x3038, pci_quirk_mf_endpoint_acs },
+       { PCI_VENDOR_ID_ZHAOXIN, 0x3104, pci_quirk_mf_endpoint_acs },
+       { PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
++      /* Zhaoxin Root/Downstream Ports */
++      { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+       { 0 }
+ };
diff --git a/queue-5.4/pci-avoid-asmedia-xhci-usb-pme-from-d0-defect.patch b/queue-5.4/pci-avoid-asmedia-xhci-usb-pme-from-d0-defect.patch
new file mode 100644 (file)
index 0000000..5e0cb66
--- /dev/null
@@ -0,0 +1,50 @@
+From 2880325bda8d53566dcb9725abc929eec871608e Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Fri, 20 Dec 2019 03:20:06 +0800
+Subject: PCI: Avoid ASMedia XHCI USB PME# from D0 defect
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit 2880325bda8d53566dcb9725abc929eec871608e upstream.
+
+The ASMedia USB XHCI Controller claims to support generating PME# while
+in D0:
+
+  01:00.0 USB controller: ASMedia Technology Inc. Device 2142 (prog-if 30 [XHCI])
+    Subsystem: SUNIX Co., Ltd. Device 312b
+    Capabilities: [78] Power Management version 3
+      Flags: PMEClk- DSI- D1- D2- AuxCurrent=55mA PME(D0+,D1-,D2-,D3hot-,D3cold-)
+      Status: D0 NoSoftRst+ PME-Enable+ DSel=0 DScale=0 PME-
+
+However PME# only gets asserted when plugging USB 2.0 or USB 1.1 devices,
+but not for USB 3.0 devices.
+
+Remove PCI_PM_CAP_PME_D0 to avoid using PME under D0.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=205919
+Link: https://lore.kernel.org/r/20191219192006.16270-1-kai.heng.feng@canonical.com
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/quirks.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -5490,3 +5490,14 @@ out_disable:
+ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, 0x13b1,
+                             PCI_CLASS_DISPLAY_VGA, 8,
+                             quirk_reset_lenovo_thinkpad_p50_nvgpu);
++
++/*
++ * Device [1b21:2142]
++ * When in D0, PME# doesn't get asserted when plugging USB 3.0 device.
++ */
++static void pci_fixup_no_d0_pme(struct pci_dev *dev)
++{
++      pci_info(dev, "PME# does not work under D0, disabling it\n");
++      dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT);
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x2142, pci_fixup_no_d0_pme);
diff --git a/queue-5.4/pci-move-apex-edge-tpu-class-quirk-to-fix-bar-assignment.patch b/queue-5.4/pci-move-apex-edge-tpu-class-quirk-to-fix-bar-assignment.patch
new file mode 100644 (file)
index 0000000..56faf57
--- /dev/null
@@ -0,0 +1,75 @@
+From 0a8f41023e8a3c100b3dc458ed2da651bf961ead Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 9 Apr 2020 12:43:45 -0500
+Subject: PCI: Move Apex Edge TPU class quirk to fix BAR assignment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 0a8f41023e8a3c100b3dc458ed2da651bf961ead upstream.
+
+Some Google Apex Edge TPU devices have a class code of 0
+(PCI_CLASS_NOT_DEFINED).  This prevents the PCI core from assigning
+resources for the Apex BARs because __dev_sort_resources() ignores
+classless devices, host bridges, and IOAPICs.
+
+On x86, firmware typically assigns those resources, so this was not a
+problem.  But on some architectures, firmware does *not* assign BARs, and
+since the PCI core didn't do it either, the Apex device didn't work
+correctly:
+
+  apex 0000:01:00.0: can't enable device: BAR 0 [mem 0x00000000-0x00003fff 64bit pref] not claimed
+  apex 0000:01:00.0: error enabling PCI device
+
+f390d08d8b87 ("staging: gasket: apex: fixup undefined PCI class") added a
+quirk to fix the class code, but it was in the apex driver, and if the
+driver was built as a module, it was too late to help.
+
+Move the quirk to the PCI core, where it will always run early enough that
+the PCI core will assign resources if necessary.
+
+Link: https://lore.kernel.org/r/CAEzXK1r0Er039iERnc2KJ4jn7ySNUOG9H=Ha8TD8XroVqiZjgg@mail.gmail.com
+Fixes: f390d08d8b87 ("staging: gasket: apex: fixup undefined PCI class")
+Reported-by: Luís Mendes <luis.p.mendes@gmail.com>
+Debugged-by: Luís Mendes <luis.p.mendes@gmail.com>
+Tested-by: Luis Mendes <luis.p.mendes@gmail.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: Todd Poynor <toddpoynor@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/quirks.c                 |    7 +++++++
+ drivers/staging/gasket/apex_driver.c |    7 -------
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -5530,3 +5530,10 @@ static void pci_fixup_no_d0_pme(struct p
+       dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT);
+ }
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x2142, pci_fixup_no_d0_pme);
++
++static void apex_pci_fixup_class(struct pci_dev *pdev)
++{
++      pdev->class = (PCI_CLASS_SYSTEM_OTHER << 8) | pdev->class;
++}
++DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a,
++                             PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class);
+--- a/drivers/staging/gasket/apex_driver.c
++++ b/drivers/staging/gasket/apex_driver.c
+@@ -570,13 +570,6 @@ static const struct pci_device_id apex_p
+       { PCI_DEVICE(APEX_PCI_VENDOR_ID, APEX_PCI_DEVICE_ID) }, { 0 }
+ };
+-static void apex_pci_fixup_class(struct pci_dev *pdev)
+-{
+-      pdev->class = (PCI_CLASS_SYSTEM_OTHER << 8) | pdev->class;
+-}
+-DECLARE_PCI_FIXUP_CLASS_HEADER(APEX_PCI_VENDOR_ID, APEX_PCI_DEVICE_ID,
+-                             PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class);
+-
+ static int apex_pci_probe(struct pci_dev *pci_dev,
+                         const struct pci_device_id *id)
+ {
diff --git a/queue-5.4/perf-core-fix-parent-pid-tid-in-task-exit-events.patch b/queue-5.4/perf-core-fix-parent-pid-tid-in-task-exit-events.patch
new file mode 100644 (file)
index 0000000..d8eb486
--- /dev/null
@@ -0,0 +1,59 @@
+From f3bed55e850926614b9898fe982f66d2541a36a5 Mon Sep 17 00:00:00 2001
+From: Ian Rogers <irogers@google.com>
+Date: Fri, 17 Apr 2020 11:28:42 -0700
+Subject: perf/core: fix parent pid/tid in task exit events
+
+From: Ian Rogers <irogers@google.com>
+
+commit f3bed55e850926614b9898fe982f66d2541a36a5 upstream.
+
+Current logic yields the child task as the parent.
+
+Before:
+$ perf record bash -c "perf list > /dev/null"
+$ perf script -D |grep 'FORK\|EXIT'
+4387036190981094 0x5a70 [0x30]: PERF_RECORD_FORK(10472:10472):(10470:10470)
+4387036606207580 0xf050 [0x30]: PERF_RECORD_EXIT(10472:10472):(10472:10472)
+4387036607103839 0x17150 [0x30]: PERF_RECORD_EXIT(10470:10470):(10470:10470)
+                                                   ^
+  Note the repeated values here -------------------/
+
+After:
+383281514043 0x9d8 [0x30]: PERF_RECORD_FORK(2268:2268):(2266:2266)
+383442003996 0x2180 [0x30]: PERF_RECORD_EXIT(2268:2268):(2266:2266)
+383451297778 0xb70 [0x30]: PERF_RECORD_EXIT(2266:2266):(2265:2265)
+
+Fixes: 94d5d1b2d891 ("perf_counter: Report the cloning task as parent on perf_counter_fork()")
+Reported-by: KP Singh <kpsingh@google.com>
+Signed-off-by: Ian Rogers <irogers@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20200417182842.12522-1-irogers@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7052,10 +7052,17 @@ static void perf_event_task_output(struc
+               goto out;
+       task_event->event_id.pid = perf_event_pid(event, task);
+-      task_event->event_id.ppid = perf_event_pid(event, current);
+-
+       task_event->event_id.tid = perf_event_tid(event, task);
+-      task_event->event_id.ptid = perf_event_tid(event, current);
++
++      if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
++              task_event->event_id.ppid = perf_event_pid(event,
++                                                      task->real_parent);
++              task_event->event_id.ptid = perf_event_pid(event,
++                                                      task->real_parent);
++      } else {  /* PERF_RECORD_FORK */
++              task_event->event_id.ppid = perf_event_pid(event, current);
++              task_event->event_id.ptid = perf_event_tid(event, current);
++      }
+       task_event->event_id.time = perf_event_clock(event);
diff --git a/queue-5.4/pm-sleep-core-switch-back-to-async_schedule_dev.patch b/queue-5.4/pm-sleep-core-switch-back-to-async_schedule_dev.patch
new file mode 100644 (file)
index 0000000..5b88d71
--- /dev/null
@@ -0,0 +1,37 @@
+From 09beebd8f93b3c8bf894e342f0a203a5c612478c Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Tue, 21 Apr 2020 16:21:55 +0800
+Subject: PM: sleep: core: Switch back to async_schedule_dev()
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit 09beebd8f93b3c8bf894e342f0a203a5c612478c upstream.
+
+Commit 8b9ec6b73277 ("PM core: Use new async_schedule_dev command")
+introduced a new function for better performance.
+
+However commit f2a424f6c613 ("PM / core: Introduce dpm_async_fn()
+helper") went back to the non-optimized version, async_schedule().
+
+So switch back to the sync_schedule_dev() to improve performance
+
+Fixes: f2a424f6c613 ("PM / core: Introduce dpm_async_fn() helper")
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -726,7 +726,7 @@ static bool dpm_async_fn(struct device *
+       if (is_async(dev)) {
+               get_device(dev);
+-              async_schedule(func, dev);
++              async_schedule_dev(func, dev);
+               return true;
+       }
diff --git a/queue-5.4/s390-pci-do-not-set-affinity-for-floating-irqs.patch b/queue-5.4/s390-pci-do-not-set-affinity-for-floating-irqs.patch
new file mode 100644 (file)
index 0000000..98b3010
--- /dev/null
@@ -0,0 +1,54 @@
+From 86dbf32da150339ca81509fa2eb84c814b55258b Mon Sep 17 00:00:00 2001
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+Date: Thu, 16 Apr 2020 13:44:30 +0200
+Subject: s390/pci: do not set affinity for floating irqs
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+commit 86dbf32da150339ca81509fa2eb84c814b55258b upstream.
+
+with the introduction of CPU directed interrupts the kernel
+parameter pci=force_floating was introduced to fall back
+to the previous behavior using floating irqs.
+
+However we were still setting the affinity in that case,
+both in __irq_alloc_descs() and via the irq_set_affinity
+callback in struct irq_chip.
+
+For the former only set the affinity in the directed case.
+
+The latter is explicitly set in zpci_directed_irq_init()
+so we can just leave it unset for the floating case.
+
+Fixes: e979ce7bced2 ("s390/pci: provide support for CPU directed interrupts")
+Co-developed-by: Alexander Schmidt <alexs@linux.ibm.com>
+Signed-off-by: Alexander Schmidt <alexs@linux.ibm.com>
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/pci/pci_irq.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/pci/pci_irq.c
++++ b/arch/s390/pci/pci_irq.c
+@@ -115,7 +115,6 @@ static struct irq_chip zpci_irq_chip = {
+       .name = "PCI-MSI",
+       .irq_unmask = pci_msi_unmask_irq,
+       .irq_mask = pci_msi_mask_irq,
+-      .irq_set_affinity = zpci_set_irq_affinity,
+ };
+ static void zpci_handle_cpu_local_irq(bool rescan)
+@@ -276,7 +275,9 @@ int arch_setup_msi_irqs(struct pci_dev *
+               rc = -EIO;
+               if (hwirq - bit >= msi_vecs)
+                       break;
+-              irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
++              irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
++                              (irq_delivery == DIRECTED) ?
++                              msi->affinity : NULL);
+               if (irq < 0)
+                       return -ENOMEM;
+               rc = irq_set_msi_desc(irq, msi);
diff --git a/queue-5.4/sched-core-fix-reset-on-fork-from-rt-with-uclamp.patch b/queue-5.4/sched-core-fix-reset-on-fork-from-rt-with-uclamp.patch
new file mode 100644 (file)
index 0000000..9c1e8ba
--- /dev/null
@@ -0,0 +1,51 @@
+From eaf5a92ebde5bca3bb2565616115bd6d579486cd Mon Sep 17 00:00:00 2001
+From: Quentin Perret <qperret@google.com>
+Date: Thu, 16 Apr 2020 09:59:56 +0100
+Subject: sched/core: Fix reset-on-fork from RT with uclamp
+
+From: Quentin Perret <qperret@google.com>
+
+commit eaf5a92ebde5bca3bb2565616115bd6d579486cd upstream.
+
+uclamp_fork() resets the uclamp values to their default when the
+reset-on-fork flag is set. It also checks whether the task has a RT
+policy, and sets its uclamp.min to 1024 accordingly. However, during
+reset-on-fork, the task's policy is lowered to SCHED_NORMAL right after,
+hence leading to an erroneous uclamp.min setting for the new task if it
+was forked from RT.
+
+Fix this by removing the unnecessary check on rt_task() in
+uclamp_fork() as this doesn't make sense if the reset-on-fork flag is
+set.
+
+Fixes: 1a00d999971c ("sched/uclamp: Set default clamps for RT tasks")
+Reported-by: Chitti Babu Theegala <ctheegal@codeaurora.org>
+Signed-off-by: Quentin Perret <qperret@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Patrick Bellasi <patrick.bellasi@matbug.net>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Link: https://lkml.kernel.org/r/20200416085956.217587-1-qperret@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1233,13 +1233,8 @@ static void uclamp_fork(struct task_stru
+               return;
+       for_each_clamp_id(clamp_id) {
+-              unsigned int clamp_value = uclamp_none(clamp_id);
+-
+-              /* By default, RT tasks always get 100% boost */
+-              if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
+-                      clamp_value = uclamp_none(UCLAMP_MAX);
+-
+-              uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
++              uclamp_se_set(&p->uclamp_req[clamp_id],
++                            uclamp_none(clamp_id), false);
+       }
+ }
index 6787040aafa671d71f7bd91efa3be7812d6c34c1..6c0df1a2a223bfdca1b58adf490bf29d223d0ad3 100644 (file)
@@ -23,3 +23,24 @@ i2c-altera-use-proper-variable-to-hold-errno.patch
 rxrpc-fix-data-tx-to-disable-nofrag-for-udp-on-af_inet6-socket.patch
 net-cxgb4-check-the-return-from-t4_query_params-properly.patch
 xfs-acquire-superblock-freeze-protection-on-eofblocks-scans.patch
+svcrdma-fix-trace-point-use-after-free-race.patch
+svcrdma-fix-leak-of-svc_rdma_recv_ctxt-objects.patch
+net-mlx5e-don-t-trigger-irq-multiple-times-on-xsk-wakeup-to-avoid-wq-overruns.patch
+net-mlx5e-get-the-latest-values-from-counters-in-switchdev-mode.patch
+pci-avoid-asmedia-xhci-usb-pme-from-d0-defect.patch
+pci-add-acs-quirk-for-zhaoxin-multi-function-devices.patch
+pci-add-acs-quirk-for-zhaoxin-root-downstream-ports.patch
+pci-move-apex-edge-tpu-class-quirk-to-fix-bar-assignment.patch
+arm-dts-bcm283x-disable-dsi0-node.patch
+cpumap-avoid-warning-when-config_debug_per_cpu_maps-is-enabled.patch
+s390-pci-do-not-set-affinity-for-floating-irqs.patch
+net-mlx5-fix-failing-fw-tracer-allocation-on-s390.patch
+sched-core-fix-reset-on-fork-from-rt-with-uclamp.patch
+perf-core-fix-parent-pid-tid-in-task-exit-events.patch
+netfilter-nat-fix-error-handling-upon-registering-inet-hook.patch
+pm-sleep-core-switch-back-to-async_schedule_dev.patch
+blk-iocost-fix-error-on-iocost_ioc_vrate_adj.patch
+um-ensure-make-arch-um-mrproper-removes-arch-subarch-include-generated.patch
+bpf-x86_32-fix-incorrect-encoding-in-bpf_ldx-zero-extension.patch
+bpf-x86_32-fix-clobbering-of-dst-for-bpf_jset.patch
+bpf-x86_32-fix-logic-error-in-bpf_ldx-zero-extension.patch
diff --git a/queue-5.4/svcrdma-fix-leak-of-svc_rdma_recv_ctxt-objects.patch b/queue-5.4/svcrdma-fix-leak-of-svc_rdma_recv_ctxt-objects.patch
new file mode 100644 (file)
index 0000000..24a8212
--- /dev/null
@@ -0,0 +1,169 @@
+From 23cf1ee1f1869966b75518c59b5cbda4c6c92450 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Tue, 31 Mar 2020 17:02:33 -0400
+Subject: svcrdma: Fix leak of svc_rdma_recv_ctxt objects
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 23cf1ee1f1869966b75518c59b5cbda4c6c92450 upstream.
+
+Utilize the xpo_release_rqst transport method to ensure that each
+rqstp's svc_rdma_recv_ctxt object is released even when the server
+cannot return a Reply for that rqstp.
+
+Without this fix, each RPC whose Reply cannot be sent leaks one
+svc_rdma_recv_ctxt. This is a 2.5KB structure, a 4KB DMA-mapped
+Receive buffer, and any pages that might be part of the Reply
+message.
+
+The leak is infrequent unless the network fabric is unreliable or
+Kerberos is in use, as GSS sequence window overruns, which result
+in connection loss, are more common on fast transports.
+
+Fixes: 3a88092ee319 ("svcrdma: Preserve Receive buffer until svc_rdma_sendto")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sunrpc/svc_rdma.h          |    1 +
+ net/sunrpc/svc_xprt.c                    |    3 ---
+ net/sunrpc/svcsock.c                     |    4 ++++
+ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |   22 ++++++++++++++++++++++
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c    |   13 +++----------
+ net/sunrpc/xprtrdma/svc_rdma_transport.c |    5 -----
+ 6 files changed, 30 insertions(+), 18 deletions(-)
+
+--- a/include/linux/sunrpc/svc_rdma.h
++++ b/include/linux/sunrpc/svc_rdma.h
+@@ -162,6 +162,7 @@ extern bool svc_rdma_post_recvs(struct s
+ extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
+                                  struct svc_rdma_recv_ctxt *ctxt);
+ extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
++extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
+ extern int svc_rdma_recvfrom(struct svc_rqst *);
+ /* svc_rdma_rw.c */
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -897,9 +897,6 @@ int svc_send(struct svc_rqst *rqstp)
+       if (!xprt)
+               goto out;
+-      /* release the receive skb before sending the reply */
+-      xprt->xpt_ops->xpo_release_rqst(rqstp);
+-
+       /* calculate over-all length */
+       xb = &rqstp->rq_res;
+       xb->len = xb->head[0].iov_len +
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -605,6 +605,8 @@ svc_udp_sendto(struct svc_rqst *rqstp)
+ {
+       int             error;
++      svc_release_udp_skb(rqstp);
++
+       error = svc_sendto(rqstp, &rqstp->rq_res);
+       if (error == -ECONNREFUSED)
+               /* ICMP error on earlier request. */
+@@ -1137,6 +1139,8 @@ static int svc_tcp_sendto(struct svc_rqs
+       int sent;
+       __be32 reclen;
++      svc_release_skb(rqstp);
++
+       /* Set up the first element of the reply kvec.
+        * Any other kvecs that may be in use have been taken
+        * care of by the server implementation itself.
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -222,6 +222,26 @@ void svc_rdma_recv_ctxt_put(struct svcxp
+               svc_rdma_recv_ctxt_destroy(rdma, ctxt);
+ }
++/**
++ * svc_rdma_release_rqst - Release transport-specific per-rqst resources
++ * @rqstp: svc_rqst being released
++ *
++ * Ensure that the recv_ctxt is released whether or not a Reply
++ * was sent. For example, the client could close the connection,
++ * or svc_process could drop an RPC, before the Reply is sent.
++ */
++void svc_rdma_release_rqst(struct svc_rqst *rqstp)
++{
++      struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
++      struct svc_xprt *xprt = rqstp->rq_xprt;
++      struct svcxprt_rdma *rdma =
++              container_of(xprt, struct svcxprt_rdma, sc_xprt);
++
++      rqstp->rq_xprt_ctxt = NULL;
++      if (ctxt)
++              svc_rdma_recv_ctxt_put(rdma, ctxt);
++}
++
+ static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
+                               struct svc_rdma_recv_ctxt *ctxt)
+ {
+@@ -756,6 +776,8 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+       __be32 *p;
+       int ret;
++      rqstp->rq_xprt_ctxt = NULL;
++
+       spin_lock(&rdma_xprt->sc_rq_dto_lock);
+       ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+       if (ctxt) {
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -873,12 +873,7 @@ int svc_rdma_sendto(struct svc_rqst *rqs
+                                     wr_lst, rp_ch);
+       if (ret < 0)
+               goto err1;
+-      ret = 0;
+-
+-out:
+-      rqstp->rq_xprt_ctxt = NULL;
+-      svc_rdma_recv_ctxt_put(rdma, rctxt);
+-      return ret;
++      return 0;
+  err2:
+       if (ret != -E2BIG && ret != -EINVAL)
+@@ -887,14 +882,12 @@ out:
+       ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
+       if (ret < 0)
+               goto err1;
+-      ret = 0;
+-      goto out;
++      return 0;
+  err1:
+       svc_rdma_send_ctxt_put(rdma, sctxt);
+  err0:
+       trace_svcrdma_send_failed(rqstp, ret);
+       set_bit(XPT_CLOSE, &xprt->xpt_flags);
+-      ret = -ENOTCONN;
+-      goto out;
++      return -ENOTCONN;
+ }
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(
+                                       struct sockaddr *sa, int salen,
+                                       int flags);
+ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
+-static void svc_rdma_release_rqst(struct svc_rqst *);
+ static void svc_rdma_detach(struct svc_xprt *xprt);
+ static void svc_rdma_free(struct svc_xprt *xprt);
+ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+@@ -558,10 +557,6 @@ static struct svc_xprt *svc_rdma_accept(
+       return NULL;
+ }
+-static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
+-{
+-}
+-
+ /*
+  * When connected, an svc_xprt has at least two references:
+  *
diff --git a/queue-5.4/svcrdma-fix-trace-point-use-after-free-race.patch b/queue-5.4/svcrdma-fix-trace-point-use-after-free-race.patch
new file mode 100644 (file)
index 0000000..cb9fd90
--- /dev/null
@@ -0,0 +1,214 @@
+From e28b4fc652c1830796a4d3e09565f30c20f9a2cf Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Mon, 30 Mar 2020 14:27:37 -0400
+Subject: svcrdma: Fix trace point use-after-free race
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit e28b4fc652c1830796a4d3e09565f30c20f9a2cf upstream.
+
+I hit this while testing nfsd-5.7 with kernel memory debugging
+enabled on my server:
+
+Mar 30 13:21:45 klimt kernel: BUG: unable to handle page fault for address: ffff8887e6c279a8
+Mar 30 13:21:45 klimt kernel: #PF: supervisor read access in kernel mode
+Mar 30 13:21:45 klimt kernel: #PF: error_code(0x0000) - not-present page
+Mar 30 13:21:45 klimt kernel: PGD 3601067 P4D 3601067 PUD 87c519067 PMD 87c3e2067 PTE 800ffff8193d8060
+Mar 30 13:21:45 klimt kernel: Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
+Mar 30 13:21:45 klimt kernel: CPU: 2 PID: 1933 Comm: nfsd Not tainted 5.6.0-rc6-00040-g881e87a3c6f9 #1591
+Mar 30 13:21:45 klimt kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015
+Mar 30 13:21:45 klimt kernel: RIP: 0010:svc_rdma_post_chunk_ctxt+0xab/0x284 [rpcrdma]
+Mar 30 13:21:45 klimt kernel: Code: c1 83 34 02 00 00 29 d0 85 c0 7e 72 48 8b bb a0 02 00 00 48 8d 54 24 08 4c 89 e6 48 8b 07 48 8b 40 20 e8 5a 5c 2b e1 41 89 c6 <8b> 45 20 89 44 24 04 8b 05 02 e9 01 00 85 c0 7e 33 e9 5e 01 00 00
+Mar 30 13:21:45 klimt kernel: RSP: 0018:ffffc90000dfbdd8 EFLAGS: 00010286
+Mar 30 13:21:45 klimt kernel: RAX: 0000000000000000 RBX: ffff8887db8db400 RCX: 0000000000000030
+Mar 30 13:21:45 klimt kernel: RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000246
+Mar 30 13:21:45 klimt kernel: RBP: ffff8887e6c27988 R08: 0000000000000000 R09: 0000000000000004
+Mar 30 13:21:45 klimt kernel: R10: ffffc90000dfbdd8 R11: 00c068ef00000000 R12: ffff8887eb4e4a80
+Mar 30 13:21:45 klimt kernel: R13: ffff8887db8db634 R14: 0000000000000000 R15: ffff8887fc931000
+Mar 30 13:21:45 klimt kernel: FS:  0000000000000000(0000) GS:ffff88885bd00000(0000) knlGS:0000000000000000
+Mar 30 13:21:45 klimt kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+Mar 30 13:21:45 klimt kernel: CR2: ffff8887e6c279a8 CR3: 000000081b72e002 CR4: 00000000001606e0
+Mar 30 13:21:45 klimt kernel: Call Trace:
+Mar 30 13:21:45 klimt kernel: ? svc_rdma_vec_to_sg+0x7f/0x7f [rpcrdma]
+Mar 30 13:21:45 klimt kernel: svc_rdma_send_write_chunk+0x59/0xce [rpcrdma]
+Mar 30 13:21:45 klimt kernel: svc_rdma_sendto+0xf9/0x3ae [rpcrdma]
+Mar 30 13:21:45 klimt kernel: ? nfsd_destroy+0x51/0x51 [nfsd]
+Mar 30 13:21:45 klimt kernel: svc_send+0x105/0x1e3 [sunrpc]
+Mar 30 13:21:45 klimt kernel: nfsd+0xf2/0x149 [nfsd]
+Mar 30 13:21:45 klimt kernel: kthread+0xf6/0xfb
+Mar 30 13:21:45 klimt kernel: ? kthread_queue_delayed_work+0x74/0x74
+Mar 30 13:21:45 klimt kernel: ret_from_fork+0x3a/0x50
+Mar 30 13:21:45 klimt kernel: Modules linked in: ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue ib_umad ib_ipoib mlx4_ib sb_edac x86_pkg_temp_thermal iTCO_wdt iTCO_vendor_support coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel glue_helper crypto_simd cryptd pcspkr rpcrdma i2c_i801 rdma_ucm lpc_ich mfd_core ib_iser rdma_cm iw_cm ib_cm mei_me raid0 libiscsi mei sg scsi_transport_iscsi ioatdma wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter nfsd nfs_acl lockd auth_rpcgss grace sunrpc ip_tables xfs libcrc32c mlx4_en sd_mod sr_mod cdrom mlx4_core crc32c_intel igb nvme i2c_algo_bit ahci i2c_core libahci nvme_core dca libata t10_pi qedr dm_mirror dm_region_hash dm_log dm_mod dax qede qed crc8 ib_uverbs ib_core
+Mar 30 13:21:45 klimt kernel: CR2: ffff8887e6c279a8
+Mar 30 13:21:45 klimt kernel: ---[ end trace 87971d2ad3429424 ]---
+
+It's absolutely not safe to use resources pointed to by the @send_wr
+argument of ib_post_send() _after_ that function returns. Those
+resources are typically freed by the Send completion handler, which
+can run before ib_post_send() returns.
+
+Thus the trace points currently around ib_post_send() in the
+server's RPC/RDMA transport are a hazard, even when they are
+disabled. Rearrange them so that they touch the Work Request only
+_before_ ib_post_send() is invoked.
+
+Fixes: bd2abef33394 ("svcrdma: Trace key RDMA API events")
+Fixes: 4201c7464753 ("svcrdma: Introduce svc_rdma_send_ctxt")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/rpcrdma.h        |   50 ++++++++++++++++++++++++----------
+ net/sunrpc/xprtrdma/svc_rdma_rw.c     |    3 --
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c |   16 ++++++----
+ 3 files changed, 46 insertions(+), 23 deletions(-)
+
+--- a/include/trace/events/rpcrdma.h
++++ b/include/trace/events/rpcrdma.h
+@@ -1638,17 +1638,15 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_eve
+ TRACE_EVENT(svcrdma_post_send,
+       TP_PROTO(
+-              const struct ib_send_wr *wr,
+-              int status
++              const struct ib_send_wr *wr
+       ),
+-      TP_ARGS(wr, status),
++      TP_ARGS(wr),
+       TP_STRUCT__entry(
+               __field(const void *, cqe)
+               __field(unsigned int, num_sge)
+               __field(u32, inv_rkey)
+-              __field(int, status)
+       ),
+       TP_fast_assign(
+@@ -1656,12 +1654,11 @@ TRACE_EVENT(svcrdma_post_send,
+               __entry->num_sge = wr->num_sge;
+               __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ?
+                                       wr->ex.invalidate_rkey : 0;
+-              __entry->status = status;
+       ),
+-      TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d",
++      TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x",
+               __entry->cqe, __entry->num_sge,
+-              __entry->inv_rkey, __entry->status
++              __entry->inv_rkey
+       )
+ );
+@@ -1726,26 +1723,23 @@ TRACE_EVENT(svcrdma_wc_receive,
+ TRACE_EVENT(svcrdma_post_rw,
+       TP_PROTO(
+               const void *cqe,
+-              int sqecount,
+-              int status
++              int sqecount
+       ),
+-      TP_ARGS(cqe, sqecount, status),
++      TP_ARGS(cqe, sqecount),
+       TP_STRUCT__entry(
+               __field(const void *, cqe)
+               __field(int, sqecount)
+-              __field(int, status)
+       ),
+       TP_fast_assign(
+               __entry->cqe = cqe;
+               __entry->sqecount = sqecount;
+-              __entry->status = status;
+       ),
+-      TP_printk("cqe=%p sqecount=%d status=%d",
+-              __entry->cqe, __entry->sqecount, __entry->status
++      TP_printk("cqe=%p sqecount=%d",
++              __entry->cqe, __entry->sqecount
+       )
+ );
+@@ -1841,6 +1835,34 @@ DECLARE_EVENT_CLASS(svcrdma_sendqueue_ev
+ DEFINE_SQ_EVENT(full);
+ DEFINE_SQ_EVENT(retry);
++TRACE_EVENT(svcrdma_sq_post_err,
++      TP_PROTO(
++              const struct svcxprt_rdma *rdma,
++              int status
++      ),
++
++      TP_ARGS(rdma, status),
++
++      TP_STRUCT__entry(
++              __field(int, avail)
++              __field(int, depth)
++              __field(int, status)
++              __string(addr, rdma->sc_xprt.xpt_remotebuf)
++      ),
++
++      TP_fast_assign(
++              __entry->avail = atomic_read(&rdma->sc_sq_avail);
++              __entry->depth = rdma->sc_sq_depth;
++              __entry->status = status;
++              __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
++      ),
++
++      TP_printk("addr=%s sc_sq_avail=%d/%d status=%d",
++              __get_str(addr), __entry->avail, __entry->depth,
++              __entry->status
++      )
++);
++
+ #endif /* _TRACE_RPCRDMA_H */
+ #include <trace/define_trace.h>
+--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
+@@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(stru
+               if (atomic_sub_return(cc->cc_sqecount,
+                                     &rdma->sc_sq_avail) > 0) {
+                       ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+-                      trace_svcrdma_post_rw(&cc->cc_cqe,
+-                                            cc->cc_sqecount, ret);
+                       if (ret)
+                               break;
+                       return 0;
+@@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(stru
+               trace_svcrdma_sq_retry(rdma);
+       } while (1);
++      trace_svcrdma_sq_post_err(rdma, ret);
+       set_bit(XPT_CLOSE, &xprt->xpt_flags);
+       /* If even one was posted, there will be a completion. */
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -306,15 +306,17 @@ int svc_rdma_send(struct svcxprt_rdma *r
+               }
+               svc_xprt_get(&rdma->sc_xprt);
++              trace_svcrdma_post_send(wr);
+               ret = ib_post_send(rdma->sc_qp, wr, NULL);
+-              trace_svcrdma_post_send(wr, ret);
+-              if (ret) {
+-                      set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+-                      svc_xprt_put(&rdma->sc_xprt);
+-                      wake_up(&rdma->sc_send_wait);
+-              }
+-              break;
++              if (ret)
++                      break;
++              return 0;
+       }
++
++      trace_svcrdma_sq_post_err(rdma, ret);
++      set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
++      svc_xprt_put(&rdma->sc_xprt);
++      wake_up(&rdma->sc_send_wait);
+       return ret;
+ }
diff --git a/queue-5.4/um-ensure-make-arch-um-mrproper-removes-arch-subarch-include-generated.patch b/queue-5.4/um-ensure-make-arch-um-mrproper-removes-arch-subarch-include-generated.patch
new file mode 100644 (file)
index 0000000..2ab5c67
--- /dev/null
@@ -0,0 +1,58 @@
+From 63ec90f18204f2fe072df108de8a021b28b1b173 Mon Sep 17 00:00:00 2001
+From: Vitor Massaru Iha <vitor@massaru.org>
+Date: Tue, 21 Apr 2020 21:48:44 -0300
+Subject: um: ensure `make ARCH=um mrproper` removes arch/$(SUBARCH)/include/generated/
+
+From: Vitor Massaru Iha <vitor@massaru.org>
+
+commit 63ec90f18204f2fe072df108de8a021b28b1b173 upstream.
+
+In this workflow:
+
+$ make ARCH=um defconfig && make ARCH=um -j8
+  [snip]
+$ make ARCH=um mrproper
+  [snip]
+$ make ARCH=um defconfig O=./build_um && make ARCH=um -j8 O=./build_um
+  [snip]
+  CC      scripts/mod/empty.o
+In file included from ../include/linux/types.h:6,
+                 from ../include/linux/mod_devicetable.h:12,
+                 from ../scripts/mod/devicetable-offsets.c:3:
+../include/uapi/linux/types.h:5:10: fatal error: asm/types.h: No such file or directory
+    5 | #include <asm/types.h>
+      |          ^~~~~~~~~~~~~
+compilation terminated.
+make[2]: *** [../scripts/Makefile.build:100: scripts/mod/devicetable-offsets.s] Error 1
+make[2]: *** Waiting for unfinished jobs....
+make[1]: *** [/home/iha/sdb/opensource/lkmp/linux-kselftest.git/Makefile:1140: prepare0] Error 2
+make[1]: Leaving directory '/home/iha/sdb/opensource/lkmp/linux-kselftest.git/build_um'
+make: *** [Makefile:180: sub-make] Error 2
+
+The cause of the error was because arch/$(SUBARCH)/include/generated files
+weren't properly cleaned by `make ARCH=um mrproper`.
+
+Fixes: a788b2ed81ab ("kbuild: check arch/$(SRCARCH)/include/generated before out-of-tree build")
+Reported-by: Theodore Ts'o <tytso@mit.edu>
+Suggested-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
+Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
+Tested-by: Brendan Higgins <brendanhiggins@google.com>
+Link: https://groups.google.com/forum/#!msg/kunit-dev/QmA27YEgEgI/hvS1kiz2CwAJ
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/um/Makefile |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -140,6 +140,7 @@ export CFLAGS_vmlinux := $(LINK-y) $(LIN
+ # When cleaning we don't include .config, so we don't include
+ # TT or skas makefiles and don't clean skas_ptregs.h.
+ CLEAN_FILES += linux x.i gmon.out
++MRPROPER_DIRS += arch/$(SUBARCH)/include/generated
+ archclean:
+       @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \