]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 30 May 2021 14:04:40 +0000 (16:04 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 30 May 2021 14:04:40 +0000 (16:04 +0200)
added patches:
kvm-selftests-fix-32-bit-truncation-of-vm_get_max_gfn.patch
kvm-x86-fix-warning-caused-by-stale-emulation-context.patch
kvm-x86-use-_bitul-macro-in-uapi-headers.patch
revert-net-tipc-fix-a-double-free-in-tipc_sk_mcast_rcv.patch
spi-spi-fsl-dspi-fix-a-resource-leak-in-an-error-handling-path.patch
sunrpc-in-case-of-backlog-hand-free-slots-directly-to-waiting-task.patch
tipc-skb_linearize-the-head-skb-when-reassembling-msgs.patch
tipc-wait-and-exit-until-all-work-queues-are-done.patch

queue-5.12/kvm-selftests-fix-32-bit-truncation-of-vm_get_max_gfn.patch [new file with mode: 0644]
queue-5.12/kvm-x86-fix-warning-caused-by-stale-emulation-context.patch [new file with mode: 0644]
queue-5.12/kvm-x86-use-_bitul-macro-in-uapi-headers.patch [new file with mode: 0644]
queue-5.12/revert-net-tipc-fix-a-double-free-in-tipc_sk_mcast_rcv.patch [new file with mode: 0644]
queue-5.12/series
queue-5.12/spi-spi-fsl-dspi-fix-a-resource-leak-in-an-error-handling-path.patch [new file with mode: 0644]
queue-5.12/sunrpc-in-case-of-backlog-hand-free-slots-directly-to-waiting-task.patch [new file with mode: 0644]
queue-5.12/tipc-skb_linearize-the-head-skb-when-reassembling-msgs.patch [new file with mode: 0644]
queue-5.12/tipc-wait-and-exit-until-all-work-queues-are-done.patch [new file with mode: 0644]

diff --git a/queue-5.12/kvm-selftests-fix-32-bit-truncation-of-vm_get_max_gfn.patch b/queue-5.12/kvm-selftests-fix-32-bit-truncation-of-vm_get_max_gfn.patch
new file mode 100644 (file)
index 0000000..907c521
--- /dev/null
@@ -0,0 +1,118 @@
+From ef4c9f4f654622fa15b7a94a9bd1f19e76bb7feb Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Fri, 21 May 2021 17:38:28 +0000
+Subject: KVM: selftests: Fix 32-bit truncation of vm_get_max_gfn()
+
+From: David Matlack <dmatlack@google.com>
+
+commit ef4c9f4f654622fa15b7a94a9bd1f19e76bb7feb upstream.
+
+vm_get_max_gfn() casts vm->max_gfn from a uint64_t to an unsigned int,
+which causes the upper 32-bits of the max_gfn to get truncated.
+
+Nobody noticed until now likely because vm_get_max_gfn() is only used
+as a mechanism to create a memslot in an unused region of the guest
+physical address space (the top), and the top of the 32-bit physical
+address space was always good enough.
+
+This fix reveals a bug in memslot_modification_stress_test which was
+trying to create a dummy memslot past the end of guest physical memory.
+Fix that by moving the dummy memslot lower.
+
+Fixes: 52200d0d944e ("KVM: selftests: Remove duplicate guest mode handling")
+Reviewed-by: Venkatesh Srinivas <venkateshs@chromium.org>
+Signed-off-by: David Matlack <dmatlack@google.com>
+Message-Id: <20210521173828.1180619-1-dmatlack@google.com>
+Reviewed-by: Andrew Jones <drjones@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/kvm/include/kvm_util.h                 |    2 -
+ tools/testing/selftests/kvm/lib/kvm_util.c                     |    2 -
+ tools/testing/selftests/kvm/lib/perf_test_util.c               |    4 +-
+ tools/testing/selftests/kvm/memslot_modification_stress_test.c |   18 ++++++----
+ 4 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/tools/testing/selftests/kvm/include/kvm_util.h
++++ b/tools/testing/selftests/kvm/include/kvm_util.h
+@@ -295,7 +295,7 @@ bool vm_is_unrestricted_guest(struct kvm
+ unsigned int vm_get_page_size(struct kvm_vm *vm);
+ unsigned int vm_get_page_shift(struct kvm_vm *vm);
+-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
++uint64_t vm_get_max_gfn(struct kvm_vm *vm);
+ int vm_get_fd(struct kvm_vm *vm);
+ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+--- a/tools/testing/selftests/kvm/lib/kvm_util.c
++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
+@@ -1969,7 +1969,7 @@ unsigned int vm_get_page_shift(struct kv
+       return vm->page_shift;
+ }
+-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
++uint64_t vm_get_max_gfn(struct kvm_vm *vm)
+ {
+       return vm->max_gfn;
+ }
+--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
++++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
+@@ -2,6 +2,7 @@
+ /*
+  * Copyright (C) 2020, Google LLC.
+  */
++#include <inttypes.h>
+ #include "kvm_util.h"
+ #include "perf_test_util.h"
+@@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum
+        */
+       TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+                   "Requested more guest memory than address space allows.\n"
+-                  "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
++                  "    guest pages: %" PRIx64 " max gfn: %" PRIx64
++                  " vcpus: %d wss: %" PRIx64 "]\n",
+                   guest_num_pages, vm_get_max_gfn(vm), vcpus,
+                   vcpu_memory_bytes);
+--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
++++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+@@ -71,14 +71,22 @@ struct memslot_antagonist_args {
+ };
+ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
+-                            uint64_t nr_modifications, uint64_t gpa)
++                             uint64_t nr_modifications)
+ {
++      const uint64_t pages = 1;
++      uint64_t gpa;
+       int i;
++      /*
++       * Add the dummy memslot just below the perf_test_util memslot, which is
++       * at the top of the guest physical address space.
++       */
++      gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
++
+       for (i = 0; i < nr_modifications; i++) {
+               usleep(delay);
+               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
+-                                          DUMMY_MEMSLOT_INDEX, 1, 0);
++                                          DUMMY_MEMSLOT_INDEX, pages, 0);
+               vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
+       }
+@@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode
+       pr_info("Started all vCPUs\n");
+       add_remove_memslot(vm, p->memslot_modification_delay,
+-                         p->nr_memslot_modifications,
+-                         guest_test_phys_mem +
+-                         (guest_percpu_mem_size * nr_vcpus) +
+-                         perf_test_args.host_page_size +
+-                         perf_test_args.guest_page_size);
++                         p->nr_memslot_modifications);
+       run_vcpus = false;
diff --git a/queue-5.12/kvm-x86-fix-warning-caused-by-stale-emulation-context.patch b/queue-5.12/kvm-x86-fix-warning-caused-by-stale-emulation-context.patch
new file mode 100644 (file)
index 0000000..9da77e1
--- /dev/null
@@ -0,0 +1,72 @@
+From da6393cdd8aaa354b3a2437cd73ebb34cac958e3 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpengli@tencent.com>
+Date: Thu, 27 May 2021 17:01:36 -0700
+Subject: KVM: X86: Fix warning caused by stale emulation context
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+commit da6393cdd8aaa354b3a2437cd73ebb34cac958e3 upstream.
+
+Reported by syzkaller:
+
+  WARNING: CPU: 7 PID: 10526 at linux/arch/x86/kvm//x86.c:7621 x86_emulate_instruction+0x41b/0x510 [kvm]
+  RIP: 0010:x86_emulate_instruction+0x41b/0x510 [kvm]
+  Call Trace:
+   kvm_mmu_page_fault+0x126/0x8f0 [kvm]
+   vmx_handle_exit+0x11e/0x680 [kvm_intel]
+   vcpu_enter_guest+0xd95/0x1b40 [kvm]
+   kvm_arch_vcpu_ioctl_run+0x377/0x6a0 [kvm]
+   kvm_vcpu_ioctl+0x389/0x630 [kvm]
+   __x64_sys_ioctl+0x8e/0xd0
+   do_syscall_64+0x3c/0xb0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Commit 4a1e10d5b5d8 ("KVM: x86: handle hardware breakpoints during emulation())
+adds hardware breakpoints check before emulation the instruction and parts of
+emulation context initialization, actually we don't have the EMULTYPE_NO_DECODE flag
+here and the emulation context will not be reused. Commit c8848cee74ff ("KVM: x86:
+set ctxt->have_exception in x86_decode_insn()) triggers the warning because it
+catches the stale emulation context has #UD, however, it is not during instruction
+decoding which should result in EMULATION_FAILED. This patch fixes it by moving
+the second part emulation context initialization into init_emulate_ctxt() and
+before hardware breakpoints check. The ctxt->ud will be dropped by a follow-up
+patch.
+
+syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=134683fdd00000
+
+Reported-by: syzbot+71271244f206d17f6441@syzkaller.appspotmail.com
+Fixes: 4a1e10d5b5d8 (KVM: x86: handle hardware breakpoints during emulation)
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <1622160097-37633-1-git-send-email-wanpengli@tencent.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7115,6 +7115,11 @@ static void init_emulate_ctxt(struct kvm
+       BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+       BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
++      ctxt->interruptibility = 0;
++      ctxt->have_exception = false;
++      ctxt->exception.vector = -1;
++      ctxt->perm_ok = false;
++
+       init_decode_cache(ctxt);
+       vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
+ }
+@@ -7450,11 +7455,6 @@ int x86_decode_emulated_instruction(stru
+           kvm_vcpu_check_breakpoint(vcpu, &r))
+               return r;
+-      ctxt->interruptibility = 0;
+-      ctxt->have_exception = false;
+-      ctxt->exception.vector = -1;
+-      ctxt->perm_ok = false;
+-
+       ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
+       r = x86_decode_insn(ctxt, insn, insn_len);
diff --git a/queue-5.12/kvm-x86-use-_bitul-macro-in-uapi-headers.patch b/queue-5.12/kvm-x86-use-_bitul-macro-in-uapi-headers.patch
new file mode 100644 (file)
index 0000000..f9646b7
--- /dev/null
@@ -0,0 +1,64 @@
+From fb1070d18edb37daf3979662975bc54625a19953 Mon Sep 17 00:00:00 2001
+From: Joe Richey <joerichey@google.com>
+Date: Fri, 21 May 2021 01:58:43 -0700
+Subject: KVM: X86: Use _BITUL() macro in UAPI headers
+
+From: Joe Richey <joerichey@google.com>
+
+commit fb1070d18edb37daf3979662975bc54625a19953 upstream.
+
+Replace BIT() in KVM's UPAI header with _BITUL(). BIT() is not defined
+in the UAPI headers and its usage may cause userspace build errors.
+
+Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking")
+Signed-off-by: Joe Richey <joerichey@google.com>
+Message-Id: <20210521085849.37676-3-joerichey94@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/kvm.h       |    5 +++--
+ tools/include/uapi/linux/kvm.h |    5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/include/uapi/linux/kvm.h
++++ b/include/uapi/linux/kvm.h
+@@ -8,6 +8,7 @@
+  * Note: you must update KVM_API_VERSION if you change this interface.
+  */
++#include <linux/const.h>
+ #include <linux/types.h>
+ #include <linux/compiler.h>
+ #include <linux/ioctl.h>
+@@ -1834,8 +1835,8 @@ struct kvm_hyperv_eventfd {
+  * conversion after harvesting an entry.  Also, it must not skip any
+  * dirty bits, so that dirty bits are always harvested in sequence.
+  */
+-#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+-#define KVM_DIRTY_GFN_F_RESET           BIT(1)
++#define KVM_DIRTY_GFN_F_DIRTY           _BITUL(0)
++#define KVM_DIRTY_GFN_F_RESET           _BITUL(1)
+ #define KVM_DIRTY_GFN_F_MASK            0x3
+ /*
+--- a/tools/include/uapi/linux/kvm.h
++++ b/tools/include/uapi/linux/kvm.h
+@@ -8,6 +8,7 @@
+  * Note: you must update KVM_API_VERSION if you change this interface.
+  */
++#include <linux/const.h>
+ #include <linux/types.h>
+ #include <linux/compiler.h>
+ #include <linux/ioctl.h>
+@@ -1834,8 +1835,8 @@ struct kvm_hyperv_eventfd {
+  * conversion after harvesting an entry.  Also, it must not skip any
+  * dirty bits, so that dirty bits are always harvested in sequence.
+  */
+-#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+-#define KVM_DIRTY_GFN_F_RESET           BIT(1)
++#define KVM_DIRTY_GFN_F_DIRTY           _BITUL(0)
++#define KVM_DIRTY_GFN_F_RESET           _BITUL(1)
+ #define KVM_DIRTY_GFN_F_MASK            0x3
+ /*
diff --git a/queue-5.12/revert-net-tipc-fix-a-double-free-in-tipc_sk_mcast_rcv.patch b/queue-5.12/revert-net-tipc-fix-a-double-free-in-tipc_sk_mcast_rcv.patch
new file mode 100644 (file)
index 0000000..3485549
--- /dev/null
@@ -0,0 +1,36 @@
+From 75016891357a628d2b8acc09e2b9b2576c18d318 Mon Sep 17 00:00:00 2001
+From: Hoang Le <hoang.h.le@dektech.com.au>
+Date: Fri, 14 May 2021 08:23:03 +0700
+Subject: Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"
+
+From: Hoang Le <hoang.h.le@dektech.com.au>
+
+commit 75016891357a628d2b8acc09e2b9b2576c18d318 upstream.
+
+This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
+Above fix is not correct and caused memory leak issue.
+
+Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
+Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -1265,7 +1265,10 @@ void tipc_sk_mcast_rcv(struct net *net,
+               spin_lock_bh(&inputq->lock);
+               if (skb_peek(arrvq) == skb) {
+                       skb_queue_splice_tail_init(&tmpq, inputq);
+-                      __skb_dequeue(arrvq);
++                      /* Decrease the skb's refcnt as increasing in the
++                       * function tipc_skb_peek
++                       */
++                      kfree_skb(__skb_dequeue(arrvq));
+               }
+               spin_unlock_bh(&inputq->lock);
+               __skb_queue_purge(&tmpq);
index 04373a98639078e3af5f7f8b321e73954a41d0b3..79cf70227c3873116d5fafeb438faf57d7d0966c 100644 (file)
@@ -127,3 +127,11 @@ net-mlx5e-fix-null-deref-accessing-lag-dev.patch
 net-mlx4-fix-eeprom-dump-support.patch
 net-rdma-mlx5-fix-override-of-log_max_qp-by-other-device.patch
 net-mlx5-set-term-table-as-an-unmanaged-flow-table.patch
+kvm-x86-fix-warning-caused-by-stale-emulation-context.patch
+kvm-x86-use-_bitul-macro-in-uapi-headers.patch
+kvm-selftests-fix-32-bit-truncation-of-vm_get_max_gfn.patch
+sunrpc-in-case-of-backlog-hand-free-slots-directly-to-waiting-task.patch
+revert-net-tipc-fix-a-double-free-in-tipc_sk_mcast_rcv.patch
+tipc-wait-and-exit-until-all-work-queues-are-done.patch
+tipc-skb_linearize-the-head-skb-when-reassembling-msgs.patch
+spi-spi-fsl-dspi-fix-a-resource-leak-in-an-error-handling-path.patch
diff --git a/queue-5.12/spi-spi-fsl-dspi-fix-a-resource-leak-in-an-error-handling-path.patch b/queue-5.12/spi-spi-fsl-dspi-fix-a-resource-leak-in-an-error-handling-path.patch
new file mode 100644 (file)
index 0000000..3a9a9ee
--- /dev/null
@@ -0,0 +1,40 @@
+From 680ec0549a055eb464dce6ffb4bfb736ef87236e Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Sun, 9 May 2021 21:12:27 +0200
+Subject: spi: spi-fsl-dspi: Fix a resource leak in an error handling path
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+commit 680ec0549a055eb464dce6ffb4bfb736ef87236e upstream.
+
+'dspi_request_dma()' should be undone by a 'dspi_release_dma()' call in the
+error handling path of the probe function, as already done in the remove
+function
+
+Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Link: https://lore.kernel.org/r/d51caaac747277a1099ba8dea07acd85435b857e.1620587472.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-fsl-dspi.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/spi/spi-fsl-dspi.c
++++ b/drivers/spi/spi-fsl-dspi.c
+@@ -1375,11 +1375,13 @@ poll_mode:
+       ret = spi_register_controller(ctlr);
+       if (ret != 0) {
+               dev_err(&pdev->dev, "Problem registering DSPI ctlr\n");
+-              goto out_free_irq;
++              goto out_release_dma;
+       }
+       return ret;
++out_release_dma:
++      dspi_release_dma(dspi);
+ out_free_irq:
+       if (dspi->irq)
+               free_irq(dspi->irq, dspi);
diff --git a/queue-5.12/sunrpc-in-case-of-backlog-hand-free-slots-directly-to-waiting-task.patch b/queue-5.12/sunrpc-in-case-of-backlog-hand-free-slots-directly-to-waiting-task.patch
new file mode 100644 (file)
index 0000000..e6069de
--- /dev/null
@@ -0,0 +1,182 @@
+From e877a88d1f069edced4160792f42c2a8e2dba942 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 17 May 2021 09:59:10 +1000
+Subject: SUNRPC in case of backlog, hand free slots directly to waiting task
+
+From: NeilBrown <neilb@suse.de>
+
+commit e877a88d1f069edced4160792f42c2a8e2dba942 upstream.
+
+If sunrpc.tcp_max_slot_table_entries is small and there are tasks
+on the backlog queue, then when a request completes it is freed and the
+first task on the queue is woken.  The expectation is that it will wake
+and claim that request.  However if it was a sync task and the waiting
+process was killed at just that moment, it will wake and NOT claim the
+request.
+
+As long as TASK_CONGESTED remains set, requests can only be claimed by
+tasks woken from the backlog, and they are woken only as requests are
+freed, so when a task doesn't claim a request, no other task can ever
+get that request until TASK_CONGESTED is cleared.  Each time this
+happens the number of available requests is decreased by one.
+
+With a sufficiently high workload and sufficiently low setting of
+max_slot (16 in the case where this was seen), TASK_CONGESTED can remain
+set for an extended period, and the above scenario (of a process being
+killed just as its task was woken) can repeat until no requests can be
+allocated.  Then traffic stops.
+
+This patch addresses the problem by introducing a positive handover of a
+request from a completing task to a backlog task - the request is never
+freed when there is a backlog.
+
+When a task is woken it might not already have a request attached in
+which case it is *not* freed (as with current code) but is initialised
+(if needed) and used.  If it isn't used it will eventually be freed by
+rpc_exit_task().  xprt_release() is enhanced to be able to correctly
+release an uninitialised request.
+
+Fixes: ba60eb25ff6b ("SUNRPC: Fix a livelock problem in the xprt->backlog queue")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sunrpc/clnt.c |    7 -----
+ net/sunrpc/xprt.c |   68 +++++++++++++++++++++++++++++++++++++-----------------
+ 2 files changed, 47 insertions(+), 28 deletions(-)
+
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task
+               return;
+       }
+-      /*
+-       * Even though there was an error, we may have acquired
+-       * a request slot somehow.  Make sure not to leak it.
+-       */
+-      if (task->tk_rqstp)
+-              xprt_release(task);
+-
+       switch (status) {
+       case -ENOMEM:
+               rpc_delay(task, HZ >> 2);
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -70,6 +70,7 @@
+ static void    xprt_init(struct rpc_xprt *xprt, struct net *net);
+ static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
+ static void    xprt_destroy(struct rpc_xprt *xprt);
++static void    xprt_request_init(struct rpc_task *task);
+ static DEFINE_SPINLOCK(xprt_list_lock);
+ static LIST_HEAD(xprt_list);
+@@ -1608,10 +1609,26 @@ static void xprt_add_backlog(struct rpc_
+       rpc_sleep_on(&xprt->backlog, task, NULL);
+ }
+-static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
++static bool __xprt_set_rq(struct rpc_task *task, void *data)
+ {
+-      if (rpc_wake_up_next(&xprt->backlog) == NULL)
++      struct rpc_rqst *req = data;
++
++      if (task->tk_rqstp == NULL) {
++              memset(req, 0, sizeof(*req));   /* mark unused */
++              task->tk_status = -EAGAIN;
++              task->tk_rqstp = req;
++              return true;
++      }
++      return false;
++}
++
++static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
++{
++      if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
+               clear_bit(XPRT_CONGESTED, &xprt->state);
++              return false;
++      }
++      return true;
+ }
+ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
+@@ -1699,11 +1716,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot);
+ void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+ {
+       spin_lock(&xprt->reserve_lock);
+-      if (!xprt_dynamic_free_slot(xprt, req)) {
++      if (!xprt_wake_up_backlog(xprt, req) &&
++          !xprt_dynamic_free_slot(xprt, req)) {
+               memset(req, 0, sizeof(*req));   /* mark unused */
+               list_add(&req->rq_list, &xprt->free);
+       }
+-      xprt_wake_up_backlog(xprt);
+       spin_unlock(&xprt->reserve_lock);
+ }
+ EXPORT_SYMBOL_GPL(xprt_free_slot);
+@@ -1791,6 +1808,10 @@ xprt_request_init(struct rpc_task *task)
+       struct rpc_xprt *xprt = task->tk_xprt;
+       struct rpc_rqst *req = task->tk_rqstp;
++      if (req->rq_task)
++              /* Already initialized */
++              return;
++
+       req->rq_task    = task;
+       req->rq_xprt    = xprt;
+       req->rq_buffer  = NULL;
+@@ -1851,8 +1872,10 @@ void xprt_retry_reserve(struct rpc_task
+       struct rpc_xprt *xprt = task->tk_xprt;
+       task->tk_status = 0;
+-      if (task->tk_rqstp != NULL)
++      if (task->tk_rqstp != NULL) {
++              xprt_request_init(task);
+               return;
++      }
+       task->tk_status = -EAGAIN;
+       xprt_do_reserve(xprt, task);
+@@ -1877,23 +1900,26 @@ void xprt_release(struct rpc_task *task)
+       }
+       xprt = req->rq_xprt;
+-      xprt_request_dequeue_xprt(task);
+-      spin_lock(&xprt->transport_lock);
+-      xprt->ops->release_xprt(xprt, task);
+-      if (xprt->ops->release_request)
+-              xprt->ops->release_request(task);
+-      xprt_schedule_autodisconnect(xprt);
+-      spin_unlock(&xprt->transport_lock);
+-      if (req->rq_buffer)
+-              xprt->ops->buf_free(task);
+-      xdr_free_bvec(&req->rq_rcv_buf);
+-      xdr_free_bvec(&req->rq_snd_buf);
+-      if (req->rq_cred != NULL)
+-              put_rpccred(req->rq_cred);
+-      task->tk_rqstp = NULL;
+-      if (req->rq_release_snd_buf)
+-              req->rq_release_snd_buf(req);
++      if (xprt) {
++              xprt_request_dequeue_xprt(task);
++              spin_lock(&xprt->transport_lock);
++              xprt->ops->release_xprt(xprt, task);
++              if (xprt->ops->release_request)
++                      xprt->ops->release_request(task);
++              xprt_schedule_autodisconnect(xprt);
++              spin_unlock(&xprt->transport_lock);
++              if (req->rq_buffer)
++                      xprt->ops->buf_free(task);
++              xdr_free_bvec(&req->rq_rcv_buf);
++              xdr_free_bvec(&req->rq_snd_buf);
++              if (req->rq_cred != NULL)
++                      put_rpccred(req->rq_cred);
++              if (req->rq_release_snd_buf)
++                      req->rq_release_snd_buf(req);
++      } else
++              xprt = task->tk_xprt;
++      task->tk_rqstp = NULL;
+       if (likely(!bc_prealloc(req)))
+               xprt->ops->free_slot(xprt, req);
+       else
diff --git a/queue-5.12/tipc-skb_linearize-the-head-skb-when-reassembling-msgs.patch b/queue-5.12/tipc-skb_linearize-the-head-skb-when-reassembling-msgs.patch
new file mode 100644 (file)
index 0000000..f9d972b
--- /dev/null
@@ -0,0 +1,95 @@
+From b7df21cf1b79ab7026f545e7bf837bd5750ac026 Mon Sep 17 00:00:00 2001
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 8 May 2021 03:57:03 +0800
+Subject: tipc: skb_linearize the head skb when reassembling msgs
+
+From: Xin Long <lucien.xin@gmail.com>
+
+commit b7df21cf1b79ab7026f545e7bf837bd5750ac026 upstream.
+
+It's not a good idea to append the frag skb to a skb's frag_list if
+the frag_list already has skbs from elsewhere, such as this skb was
+created by pskb_copy() where the frag_list was cloned (all the skbs
+in it were skb_get'ed) and shared by multiple skbs.
+
+However, the new appended frag skb should have been only seen by the
+current skb. Otherwise, it will cause use after free crashes as this
+appended frag skb are seen by multiple skbs but it only got skb_get
+called once.
+
+The same thing happens with a skb updated by pskb_may_pull() with a
+skb_cloned skb. Li Shuang has reported quite a few crashes caused
+by this when doing testing over macvlan devices:
+
+  [] kernel BUG at net/core/skbuff.c:1970!
+  [] Call Trace:
+  []  skb_clone+0x4d/0xb0
+  []  macvlan_broadcast+0xd8/0x160 [macvlan]
+  []  macvlan_process_broadcast+0x148/0x150 [macvlan]
+  []  process_one_work+0x1a7/0x360
+  []  worker_thread+0x30/0x390
+
+  [] kernel BUG at mm/usercopy.c:102!
+  [] Call Trace:
+  []  __check_heap_object+0xd3/0x100
+  []  __check_object_size+0xff/0x16b
+  []  simple_copy_to_iter+0x1c/0x30
+  []  __skb_datagram_iter+0x7d/0x310
+  []  __skb_datagram_iter+0x2a5/0x310
+  []  skb_copy_datagram_iter+0x3b/0x90
+  []  tipc_recvmsg+0x14a/0x3a0 [tipc]
+  []  ____sys_recvmsg+0x91/0x150
+  []  ___sys_recvmsg+0x7b/0xc0
+
+  [] kernel BUG at mm/slub.c:305!
+  [] Call Trace:
+  []  <IRQ>
+  []  kmem_cache_free+0x3ff/0x400
+  []  __netif_receive_skb_core+0x12c/0xc40
+  []  ? kmem_cache_alloc+0x12e/0x270
+  []  netif_receive_skb_internal+0x3d/0xb0
+  []  ? get_rx_page_info+0x8e/0xa0 [be2net]
+  []  be_poll+0x6ef/0xd00 [be2net]
+  []  ? irq_exit+0x4f/0x100
+  []  net_rx_action+0x149/0x3b0
+
+  ...
+
+This patch is to fix it by linearizing the head skb if it has frag_list
+set in tipc_buf_append(). Note that we choose to do this before calling
+skb_unshare(), as __skb_linearize() will avoid skb_copy(). Also, we can
+not just drop the frag_list either as the early time.
+
+Fixes: 45c8b7b175ce ("tipc: allow non-linear first fragment buffer")
+Reported-by: Li Shuang <shuali@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/msg.c |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **hea
+               if (unlikely(head))
+                       goto err;
+               *buf = NULL;
++              if (skb_has_frag_list(frag) && __skb_linearize(frag))
++                      goto err;
+               frag = skb_unshare(frag, GFP_ATOMIC);
+               if (unlikely(!frag))
+                       goto err;
+               head = *headbuf = frag;
+               TIPC_SKB_CB(head)->tail = NULL;
+-              if (skb_is_nonlinear(head)) {
+-                      skb_walk_frags(head, tail) {
+-                              TIPC_SKB_CB(head)->tail = tail;
+-                      }
+-              } else {
+-                      skb_frag_list_init(head);
+-              }
+               return 0;
+       }
diff --git a/queue-5.12/tipc-wait-and-exit-until-all-work-queues-are-done.patch b/queue-5.12/tipc-wait-and-exit-until-all-work-queues-are-done.patch
new file mode 100644 (file)
index 0000000..c020e31
--- /dev/null
@@ -0,0 +1,88 @@
+From 04c26faa51d1e2fe71cf13c45791f5174c37f986 Mon Sep 17 00:00:00 2001
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 17 May 2021 02:28:58 +0800
+Subject: tipc: wait and exit until all work queues are done
+
+From: Xin Long <lucien.xin@gmail.com>
+
+commit 04c26faa51d1e2fe71cf13c45791f5174c37f986 upstream.
+
+On some host, a crash could be triggered simply by repeating these
+commands several times:
+
+  # modprobe tipc
+  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
+  # rmmod tipc
+
+  [] BUG: unable to handle kernel paging request at ffffffffc096bb00
+  [] Workqueue: events 0xffffffffc096bb00
+  [] Call Trace:
+  []  ? process_one_work+0x1a7/0x360
+  []  ? worker_thread+0x30/0x390
+  []  ? create_worker+0x1a0/0x1a0
+  []  ? kthread+0x116/0x130
+  []  ? kthread_flush_work_fn+0x10/0x10
+  []  ? ret_from_fork+0x35/0x40
+
+When removing the TIPC module, the UDP tunnel sock will be delayed to
+release in a work queue as sock_release() can't be done in rtnl_lock().
+If the work queue is schedule to run after the TIPC module is removed,
+kernel will crash as the work queue function cleanup_beareri() code no
+longer exists when trying to invoke it.
+
+To fix it, this patch introduce a member wq_count in tipc_net to track
+the numbers of work queues in schedule, and  wait and exit until all
+work queues are done in tipc_exit_net().
+
+Fixes: d0f91938bede ("tipc: add ip/udp media type")
+Reported-by: Shuang Li <shuali@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/core.c      |    2 ++
+ net/tipc/core.h      |    2 ++
+ net/tipc/udp_media.c |    2 ++
+ 3 files changed, 6 insertions(+)
+
+--- a/net/tipc/core.c
++++ b/net/tipc/core.c
+@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(str
+ #ifdef CONFIG_TIPC_CRYPTO
+       tipc_crypto_stop(&tipc_net(net)->crypto_tx);
+ #endif
++      while (atomic_read(&tn->wq_count))
++              cond_resched();
+ }
+ static void __net_exit tipc_pernet_pre_exit(struct net *net)
+--- a/net/tipc/core.h
++++ b/net/tipc/core.h
+@@ -149,6 +149,8 @@ struct tipc_net {
+ #endif
+       /* Work item for net finalize */
+       struct tipc_net_work final_work;
++      /* The numbers of work queues in schedule */
++      atomic_t wq_count;
+ };
+ static inline struct tipc_net *tipc_net(struct net *net)
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -812,6 +812,7 @@ static void cleanup_bearer(struct work_s
+               kfree_rcu(rcast, rcu);
+       }
++      atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
+       dst_cache_destroy(&ub->rcast.dst_cache);
+       udp_tunnel_sock_release(ub->ubsock);
+       synchronize_net();
+@@ -832,6 +833,7 @@ static void tipc_udp_disable(struct tipc
+       RCU_INIT_POINTER(ub->bearer, NULL);
+       /* sock_release need to be done outside of rtnl lock */
++      atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
+       INIT_WORK(&ub->work, cleanup_bearer);
+       schedule_work(&ub->work);
+ }