5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)
diff --git a/queue-5.4/ib-mlx5-fix-initializing-cq-fragments-buffer.patch b/queue-5.4/ib-mlx5-fix-initializing-cq-fragments-buffer.patch

new file mode 100644 (file)

index 0000000..271046c
--- /dev/null
+++ b/queue-5.4/ib-mlx5-fix-initializing-cq-fragments-buffer.patch
@@ -0,0 +1,79 @@
+From 2ba0aa2feebda680ecfc3c552e867cf4d1b05a3a Mon Sep 17 00:00:00 2001
+From: Alaa Hleihel <alaa@nvidia.com>
+Date: Thu, 10 Jun 2021 10:34:27 +0300
+Subject: IB/mlx5: Fix initializing CQ fragments buffer
+
+From: Alaa Hleihel <alaa@nvidia.com>
+
+commit 2ba0aa2feebda680ecfc3c552e867cf4d1b05a3a upstream.
+
+The function init_cq_frag_buf() can be called to initialize the current CQ
+fragments buffer cq->buf, or the temporary cq->resize_buf that is filled
+during CQ resize operation.
+
+However, the offending commit started to use function get_cqe() for
+getting the CQEs, the issue with this change is that get_cqe() always
+returns CQEs from cq->buf, which leads us to initialize the wrong buffer,
+and in case of enlarging the CQ we try to access elements beyond the size
+of the current cq->buf and eventually hit a kernel panic.
+
+ [exception RIP: init_cq_frag_buf+103]
+  [ffff9f799ddcbcd8] mlx5_ib_resize_cq at ffffffffc0835d60 [mlx5_ib]
+  [ffff9f799ddcbdb0] ib_resize_cq at ffffffffc05270df [ib_core]
+  [ffff9f799ddcbdc0] llt_rdma_setup_qp at ffffffffc0a6a712 [llt]
+  [ffff9f799ddcbe10] llt_rdma_cc_event_action at ffffffffc0a6b411 [llt]
+  [ffff9f799ddcbe98] llt_rdma_client_conn_thread at ffffffffc0a6bb75 [llt]
+  [ffff9f799ddcbec8] kthread at ffffffffa66c5da1
+  [ffff9f799ddcbf50] ret_from_fork_nospec_begin at ffffffffa6d95ddd
+
+Fix it by getting the needed CQE by calling mlx5_frag_buf_get_wqe() that
+takes the correct source buffer as a parameter.
+
+Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)")
+Link: https://lore.kernel.org/r/90a0e8c924093cfa50a482880ad7e7edb73dc19a.1623309971.git.leonro@nvidia.com
+Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/cq.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -829,15 +829,14 @@ static void destroy_cq_user(struct mlx5_
+       ib_umem_release(cq->buf.umem);
+ }
+ 
+-static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+-                           struct mlx5_ib_cq_buf *buf)
++static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
+ {
+       int i;
+       void *cqe;
+       struct mlx5_cqe64 *cqe64;
+ 
+       for (i = 0; i < buf->nent; i++) {
+-              cqe = get_cqe(cq, i);
++              cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
+               cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+               cqe64->op_own = MLX5_CQE_INVALID << 4;
+       }
+@@ -863,7 +862,7 @@ static int create_cq_kernel(struct mlx5_
+       if (err)
+               goto err_db;
+ 
+-      init_cq_frag_buf(cq, &cq->buf);
++      init_cq_frag_buf(&cq->buf);
+ 
+       *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+@@ -1163,7 +1162,7 @@ static int resize_kernel(struct mlx5_ib_
+       if (err)
+               goto ex;
+ 
+-      init_cq_frag_buf(cq, cq->resize_buf);
++      init_cq_frag_buf(cq->resize_buf);
+ 
+       return 0;
+ 
diff --git a/queue-5.4/kvm-x86-ensure-liveliness-of-nested-vm-enter-fail-tracepoint-message.patch b/queue-5.4/kvm-x86-ensure-liveliness-of-nested-vm-enter-fail-tracepoint-message.patch

new file mode 100644 (file)

index 0000000..7b55fc0
--- /dev/null
+++ b/queue-5.4/kvm-x86-ensure-liveliness-of-nested-vm-enter-fail-tracepoint-message.patch
@@ -0,0 +1,83 @@
+From f31500b0d437a2464ca5972d8f5439e156b74960 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 7 Jun 2021 10:57:48 -0700
+Subject: KVM: x86: Ensure liveliness of nested VM-Enter fail tracepoint message
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f31500b0d437a2464ca5972d8f5439e156b74960 upstream.
+
+Use the __string() machinery provided by the tracing subystem to make a
+copy of the string literals consumed by the "nested VM-Enter failed"
+tracepoint.  A complete copy is necessary to ensure that the tracepoint
+can't outlive the data/memory it consumes and deference stale memory.
+
+Because the tracepoint itself is defined by kvm, if kvm-intel and/or
+kvm-amd are built as modules, the memory holding the string literals
+defined by the vendor modules will be freed when the module is unloaded,
+whereas the tracepoint and its data in the ring buffer will live until
+kvm is unloaded (or "indefinitely" if kvm is built-in).
+
+This bug has existed since the tracepoint was added, but was recently
+exposed by a new check in tracing to detect exactly this type of bug.
+
+  fmt: '%s%s
+  ' current_buffer: ' vmx_dirty_log_t-140127  [003] ....  kvm_nested_vmenter_failed: '
+  WARNING: CPU: 3 PID: 140134 at kernel/trace/trace.c:3759 trace_check_vprintf+0x3be/0x3e0
+  CPU: 3 PID: 140134 Comm: less Not tainted 5.13.0-rc1-ce2e73ce600a-req #184
+  Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014
+  RIP: 0010:trace_check_vprintf+0x3be/0x3e0
+  Code: <0f> 0b 44 8b 4c 24 1c e9 a9 fe ff ff c6 44 02 ff 00 49 8b 97 b0 20
+  RSP: 0018:ffffa895cc37bcb0 EFLAGS: 00010282
+  RAX: 0000000000000000 RBX: ffffa895cc37bd08 RCX: 0000000000000027
+  RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: ffff9766cfad74f8
+  RBP: ffffffffc0a041d4 R08: ffff9766cfad74f0 R09: ffffa895cc37bad8
+  R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffc0a041d4
+  R13: ffffffffc0f4dba8 R14: 0000000000000000 R15: ffff976409f2c000
+  FS:  00007f92fa200740(0000) GS:ffff9766cfac0000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000559bd11b0000 CR3: 000000019fbaa002 CR4: 00000000001726e0
+  Call Trace:
+   trace_event_printf+0x5e/0x80
+   trace_raw_output_kvm_nested_vmenter_failed+0x3a/0x60 [kvm]
+   print_trace_line+0x1dd/0x4e0
+   s_show+0x45/0x150
+   seq_read_iter+0x2d5/0x4c0
+   seq_read+0x106/0x150
+   vfs_read+0x98/0x180
+   ksys_read+0x5f/0xe0
+   do_syscall_64+0x40/0xb0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Fixes: 380e0055bc7e ("KVM: nVMX: trace nested VM-Enter failures detected by H/W")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Message-Id: <20210607175748.674002-1-seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/trace.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/trace.h
++++ b/arch/x86/kvm/trace.h
+@@ -1483,16 +1483,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
+       TP_ARGS(msg, err),
+ 
+       TP_STRUCT__entry(
+-              __field(const char *, msg)
++              __string(msg, msg)
+               __field(u32, err)
+       ),
+ 
+       TP_fast_assign(
+-              __entry->msg = msg;
++              __assign_str(msg, msg);
+               __entry->err = err;
+       ),
+ 
+-      TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
++      TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
+               __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
+ );
+ 
diff --git a/queue-5.4/perf-fix-data-race-between-pin_count-increment-decrement.patch b/queue-5.4/perf-fix-data-race-between-pin_count-increment-decrement.patch

new file mode 100644 (file)

index 0000000..385c955
--- /dev/null
+++ b/queue-5.4/perf-fix-data-race-between-pin_count-increment-decrement.patch
@@ -0,0 +1,48 @@
+From 6c605f8371159432ec61cbb1488dcf7ad24ad19a Mon Sep 17 00:00:00 2001
+From: Marco Elver <elver@google.com>
+Date: Thu, 27 May 2021 12:47:11 +0200
+Subject: perf: Fix data race between pin_count increment/decrement
+
+From: Marco Elver <elver@google.com>
+
+commit 6c605f8371159432ec61cbb1488dcf7ad24ad19a upstream.
+
+KCSAN reports a data race between increment and decrement of pin_count:
+
+  write to 0xffff888237c2d4e0 of 4 bytes by task 15740 on cpu 1:
+   find_get_context            kernel/events/core.c:4617
+   __do_sys_perf_event_open    kernel/events/core.c:12097 [inline]
+   __se_sys_perf_event_open    kernel/events/core.c:11933
+   ...
+  read to 0xffff888237c2d4e0 of 4 bytes by task 15743 on cpu 0:
+   perf_unpin_context          kernel/events/core.c:1525 [inline]
+   __do_sys_perf_event_open    kernel/events/core.c:12328 [inline]
+   __se_sys_perf_event_open    kernel/events/core.c:11933
+   ...
+
+Because neither read-modify-write here is atomic, this can lead to one
+of the operations being lost, resulting in an inconsistent pin_count.
+Fix it by adding the missing locking in the CPU-event case.
+
+Fixes: fe4b04fa31a6 ("perf: Cure task_oncpu_function_call() races")
+Reported-by: syzbot+142c9018f5962db69c7e@syzkaller.appspotmail.com
+Signed-off-by: Marco Elver <elver@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210527104711.2671610-1-elver@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -4249,7 +4249,9 @@ find_get_context(struct pmu *pmu, struct
+               cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+               ctx = &cpuctx->ctx;
+               get_ctx(ctx);
++              raw_spin_lock_irqsave(&ctx->lock, flags);
+               ++ctx->pin_count;
++              raw_spin_unlock_irqrestore(&ctx->lock, flags);
+ 
+               return ctx;
+       }
diff --git a/queue-5.4/rdma-ipoib-fix-warning-caused-by-destroying-non-initial-netns.patch b/queue-5.4/rdma-ipoib-fix-warning-caused-by-destroying-non-initial-netns.patch

new file mode 100644 (file)

index 0000000..f90141d
--- /dev/null
+++ b/queue-5.4/rdma-ipoib-fix-warning-caused-by-destroying-non-initial-netns.patch
@@ -0,0 +1,80 @@
+From a3e74fb9247cd530dca246699d5eb5a691884d32 Mon Sep 17 00:00:00 2001
+From: Kamal Heib <kamalheib1@gmail.com>
+Date: Tue, 25 May 2021 18:01:34 +0300
+Subject: RDMA/ipoib: Fix warning caused by destroying non-initial netns
+
+From: Kamal Heib <kamalheib1@gmail.com>
+
+commit a3e74fb9247cd530dca246699d5eb5a691884d32 upstream.
+
+After the commit 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib
+interfaces"), if the IPoIB device is moved to non-initial netns,
+destroying that netns lets the device vanish instead of moving it back to
+the initial netns, This is happening because default_device_exit() skips
+the interfaces due to having rtnl_link_ops set.
+
+Steps to reporoduce:
+  ip netns add foo
+  ip link set mlx5_ib0 netns foo
+  ip netns delete foo
+
+WARNING: CPU: 1 PID: 704 at net/core/dev.c:11435 netdev_exit+0x3f/0x50
+Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT
+nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack
+nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun d
+ fuse
+CPU: 1 PID: 704 Comm: kworker/u64:3 Tainted: G S      W  5.13.0-rc1+ #1
+Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.5 04/11/2016
+Workqueue: netns cleanup_net
+RIP: 0010:netdev_exit+0x3f/0x50
+Code: 48 8b bb 30 01 00 00 e8 ef 81 b1 ff 48 81 fb c0 3a 54 a1 74 13 48
+8b 83 90 00 00 00 48 81 c3 90 00 00 00 48 39 d8 75 02 5b c3 <0f> 0b 5b
+c3 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00
+RSP: 0018:ffffb297079d7e08 EFLAGS: 00010206
+RAX: ffff8eb542c00040 RBX: ffff8eb541333150 RCX: 000000008010000d
+RDX: 000000008010000e RSI: 000000008010000d RDI: ffff8eb440042c00
+RBP: ffffb297079d7e48 R08: 0000000000000001 R09: ffffffff9fdeac00
+R10: ffff8eb5003be000 R11: 0000000000000001 R12: ffffffffa1545620
+R13: ffffffffa1545628 R14: 0000000000000000 R15: ffffffffa1543b20
+FS:  0000000000000000(0000) GS:ffff8ed37fa00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00005601b5f4c2e8 CR3: 0000001fc8c10002 CR4: 00000000003706e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ ops_exit_list.isra.9+0x36/0x70
+ cleanup_net+0x234/0x390
+ process_one_work+0x1cb/0x360
+ ? process_one_work+0x360/0x360
+ worker_thread+0x30/0x370
+ ? process_one_work+0x360/0x360
+ kthread+0x116/0x130
+ ? kthread_park+0x80/0x80
+ ret_from_fork+0x22/0x30
+
+To avoid the above warning and later on the kernel panic that could happen
+on shutdown due to a NULL pointer dereference, make sure to set the
+netns_refund flag that was introduced by commit 3a5ca857079e ("can: dev:
+Move device back to init netns on owning netns delete") to properly
+restore the IPoIB interfaces to the initial netns.
+
+Fixes: 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces")
+Link: https://lore.kernel.org/r/20210525150134.139342-1-kamalheib1@gmail.com
+Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/ipoib/ipoib_netlink.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+@@ -163,6 +163,7 @@ static size_t ipoib_get_size(const struc
+ 
+ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
+       .kind           = "ipoib",
++      .netns_refund   = true,
+       .maxtype        = IFLA_IPOIB_MAX,
+       .policy         = ipoib_policy,
+       .priv_size      = sizeof(struct ipoib_dev_priv),
diff --git a/queue-5.4/rdma-mlx4-do-not-map-the-core_clock-page-to-user-space-unless-enabled.patch b/queue-5.4/rdma-mlx4-do-not-map-the-core_clock-page-to-user-space-unless-enabled.patch

new file mode 100644 (file)

index 0000000..a215fa5
--- /dev/null
+++ b/queue-5.4/rdma-mlx4-do-not-map-the-core_clock-page-to-user-space-unless-enabled.patch
@@ -0,0 +1,112 @@
+From 404e5a12691fe797486475fe28cc0b80cb8bef2c Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Thu, 3 Jun 2021 16:19:39 +0300
+Subject: RDMA/mlx4: Do not map the core_clock page to user space unless enabled
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 404e5a12691fe797486475fe28cc0b80cb8bef2c upstream.
+
+Currently when mlx4 maps the hca_core_clock page to the user space there
+are read-modifiable registers, one of which is semaphore, on this page as
+well as the clock counter. If user reads the wrong offset, it can modify
+the semaphore and hang the device.
+
+Do not map the hca_core_clock page to the user space unless the device has
+been put in a backwards compatibility mode to support this feature.
+
+After this patch, mlx4 core_clock won't be mapped to user space on the
+majority of existing devices and the uverbs device time feature in
+ibv_query_rt_values_ex() will be disabled.
+
+Fixes: 52033cfb5aab ("IB/mlx4: Add mmap call to map the hardware clock")
+Link: https://lore.kernel.org/r/9632304e0d6790af84b3b706d8c18732bc0d5e27.1622726305.git.leonro@nvidia.com
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx4/main.c         |    5 +----
+ drivers/net/ethernet/mellanox/mlx4/fw.c   |    3 +++
+ drivers/net/ethernet/mellanox/mlx4/fw.h   |    1 +
+ drivers/net/ethernet/mellanox/mlx4/main.c |    6 ++++++
+ include/linux/mlx4/device.h               |    1 +
+ 5 files changed, 12 insertions(+), 4 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx4/main.c
++++ b/drivers/infiniband/hw/mlx4/main.c
+@@ -577,12 +577,9 @@ static int mlx4_ib_query_device(struct i
+       props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
+       props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
+ 
+-      if (!mlx4_is_slave(dev->dev))
+-              err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
+-
+       if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
+               resp.response_length += sizeof(resp.hca_core_clock_offset);
+-              if (!err && !mlx4_is_slave(dev->dev)) {
++              if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
+                       resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
+                       resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
+               }
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
+@@ -823,6 +823,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
+ #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET                0xb0
+ #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET  0xa8
+ #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac
++#define QUERY_DEV_CAP_MAP_CLOCK_TO_USER 0xc1
+ #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET        0xcc
+ #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET        0xd0
+ #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET        0xd2
+@@ -841,6 +842,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
+ 
+       if (mlx4_is_mfunc(dev))
+               disable_unsupported_roce_caps(outbox);
++      MLX4_GET(field, outbox, QUERY_DEV_CAP_MAP_CLOCK_TO_USER);
++      dev_cap->map_clock_to_user = field & 0x80;
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
+       dev_cap->reserved_qps = 1 << (field & 0xf);
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
+@@ -131,6 +131,7 @@ struct mlx4_dev_cap {
+       u32 health_buffer_addrs;
+       struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
+       bool wol_port[MLX4_MAX_PORTS + 1];
++      bool map_clock_to_user;
+ };
+ 
+ struct mlx4_func_cap {
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -498,6 +498,7 @@ static int mlx4_dev_cap(struct mlx4_dev
+               }
+       }
+ 
++      dev->caps.map_clock_to_user  = dev_cap->map_clock_to_user;
+       dev->caps.uar_page_size      = PAGE_SIZE;
+       dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
+       dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
+@@ -1948,6 +1949,11 @@ int mlx4_get_internal_clock_params(struc
+       if (mlx4_is_slave(dev))
+               return -EOPNOTSUPP;
+ 
++      if (!dev->caps.map_clock_to_user) {
++              mlx4_dbg(dev, "Map clock to user is not supported.\n");
++              return -EOPNOTSUPP;
++      }
++
+       if (!params)
+               return -EINVAL;
+ 
+--- a/include/linux/mlx4/device.h
++++ b/include/linux/mlx4/device.h
+@@ -632,6 +632,7 @@ struct mlx4_caps {
+       bool                    wol_port[MLX4_MAX_PORTS + 1];
+       struct mlx4_rate_limit_caps rl_caps;
+       u32                     health_buffer_addrs;
++      bool                    map_clock_to_user;
+ };
+ 
+ struct mlx4_buf_list {
diff --git a/queue-5.4/regulator-core-resolve-supply-for-boot-on-always-on-regulators.patch b/queue-5.4/regulator-core-resolve-supply-for-boot-on-always-on-regulators.patch

new file mode 100644 (file)

index 0000000..2398a7e
--- /dev/null
+++ b/queue-5.4/regulator-core-resolve-supply-for-boot-on-always-on-regulators.patch
@@ -0,0 +1,38 @@
+From 98e48cd9283dbac0e1445ee780889f10b3d1db6a Mon Sep 17 00:00:00 2001
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Date: Thu, 20 May 2021 01:12:23 +0300
+Subject: regulator: core: resolve supply for boot-on/always-on regulators
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+commit 98e48cd9283dbac0e1445ee780889f10b3d1db6a upstream.
+
+For the boot-on/always-on regulators the set_machine_constrainst() is
+called before resolving rdev->supply. Thus the code would try to enable
+rdev before enabling supplying regulator. Enforce resolving supply
+regulator before enabling rdev.
+
+Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Link: https://lore.kernel.org/r/20210519221224.2868496-1-dmitry.baryshkov@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/regulator/core.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -1381,6 +1381,12 @@ static int set_machine_constraints(struc
+        * and we have control then make sure it is enabled.
+        */
+       if (rdev->constraints->always_on || rdev->constraints->boot_on) {
++              /* If we want to enable this regulator, make sure that we know
++               * the supplying regulator.
++               */
++              if (rdev->supply_name && !rdev->supply)
++                      return -EPROBE_DEFER;
++
+               if (rdev->supply) {
+                       ret = regulator_enable(rdev->supply);
+                       if (ret < 0) {
diff --git a/queue-5.4/regulator-max77620-use-device_set_of_node_from_dev.patch b/queue-5.4/regulator-max77620-use-device_set_of_node_from_dev.patch

new file mode 100644 (file)

index 0000000..aa8fe3d
--- /dev/null
+++ b/queue-5.4/regulator-max77620-use-device_set_of_node_from_dev.patch
@@ -0,0 +1,40 @@
+From 6f55c5dd1118b3076d11d9cb17f5c5f4bc3a1162 Mon Sep 17 00:00:00 2001
+From: Dmitry Osipenko <digetx@gmail.com>
+Date: Mon, 24 May 2021 01:42:42 +0300
+Subject: regulator: max77620: Use device_set_of_node_from_dev()
+
+From: Dmitry Osipenko <digetx@gmail.com>
+
+commit 6f55c5dd1118b3076d11d9cb17f5c5f4bc3a1162 upstream.
+
+The MAX77620 driver fails to re-probe on deferred probe because driver
+core tries to claim resources that are already claimed by the PINCTRL
+device. Use device_set_of_node_from_dev() helper which marks OF node as
+reused, skipping erroneous execution of pinctrl_bind_pins() for the PMIC
+device on the re-probe.
+
+Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator")
+Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
+Link: https://lore.kernel.org/r/20210523224243.13219-2-digetx@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/regulator/max77620-regulator.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/regulator/max77620-regulator.c
++++ b/drivers/regulator/max77620-regulator.c
+@@ -814,6 +814,13 @@ static int max77620_regulator_probe(stru
+       config.dev = dev;
+       config.driver_data = pmic;
+ 
++      /*
++       * Set of_node_reuse flag to prevent driver core from attempting to
++       * claim any pinmux resources already claimed by the parent device.
++       * Otherwise PMIC driver will fail to re-probe.
++       */
++      device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
++
+       for (id = 0; id < MAX77620_NUM_REGS; id++) {
+               struct regulator_dev *rdev;
+               struct regulator_desc *rdesc;
diff --git a/queue-5.4/sched-fair-make-sure-to-update-tg-contrib-for-blocked-load.patch b/queue-5.4/sched-fair-make-sure-to-update-tg-contrib-for-blocked-load.patch

new file mode 100644 (file)

index 0000000..867e243
--- /dev/null
+++ b/queue-5.4/sched-fair-make-sure-to-update-tg-contrib-for-blocked-load.patch
@@ -0,0 +1,62 @@
+From 02da26ad5ed6ea8680e5d01f20661439611ed776 Mon Sep 17 00:00:00 2001
+From: Vincent Guittot <vincent.guittot@linaro.org>
+Date: Thu, 27 May 2021 14:29:16 +0200
+Subject: sched/fair: Make sure to update tg contrib for blocked load
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+commit 02da26ad5ed6ea8680e5d01f20661439611ed776 upstream.
+
+During the update of fair blocked load (__update_blocked_fair()), we
+update the contribution of the cfs in tg->load_avg if cfs_rq's pelt
+has decayed.  Nevertheless, the pelt values of a cfs_rq could have
+been recently updated while propagating the change of a child. In this
+case, cfs_rq's pelt will not decayed because it has already been
+updated and we don't update tg->load_avg.
+
+__update_blocked_fair
+  ...
+  for_each_leaf_cfs_rq_safe: child cfs_rq
+    update cfs_rq_load_avg() for child cfs_rq
+    ...
+    update_load_avg(cfs_rq_of(se), se, 0)
+      ...
+      update cfs_rq_load_avg() for parent cfs_rq
+               -propagation of child's load makes parent cfs_rq->load_sum
+                becoming null
+        -UPDATE_TG is not set so it doesn't update parent
+                cfs_rq->tg_load_avg_contrib
+  ..
+  for_each_leaf_cfs_rq_safe: parent cfs_rq
+    update cfs_rq_load_avg() for parent cfs_rq
+      - nothing to do because parent cfs_rq has already been updated
+               recently so cfs_rq->tg_load_avg_contrib is not updated
+    ...
+    parent cfs_rq is decayed
+      list_del_leaf_cfs_rq parent cfs_rq
+         - but it still contibutes to tg->load_avg
+
+we must set UPDATE_TG flags when propagting pending load to the parent
+
+Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
+Reported-by: Odin Ugedal <odin@uged.al>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Odin Ugedal <odin@uged.al>
+Link: https://lkml.kernel.org/r/20210527122916.27683-3-vincent.guittot@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -7660,7 +7660,7 @@ static bool __update_blocked_fair(struct
+               /* Propagate pending load changes to the parent, if any: */
+               se = cfs_rq->tg->se[cpu];
+               if (se && !skip_blocked_update(se))
+-                      update_load_avg(cfs_rq_of(se), se, 0);
++                      update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
+ 
+               /*
+                * There can be a lot of idle CPU cgroups.  Don't let fully
diff --git a/queue-5.4/series b/queue-5.4/series

index aea9301496f6746381737709fcfe50c67b9352f4..ffd551649d507d773ebbd82c24b344e3b3b0c9f9 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -59,3 +59,13 @@ usb-serial-cp210x-fix-alternate-function-for-cp2102n-qfn20.patch
  usb-gadget-eem-fix-wrong-eem-header-operation.patch
  usb-fix-various-gadgets-null-ptr-deref-on-10gbps-cabling.patch
  usb-fix-various-gadget-panics-on-10gbps-cabling.patch
+regulator-core-resolve-supply-for-boot-on-always-on-regulators.patch
+regulator-max77620-use-device_set_of_node_from_dev.patch
+usb-typec-mux-fix-copy-paste-mistake-in-typec_mux_match.patch
+rdma-ipoib-fix-warning-caused-by-destroying-non-initial-netns.patch
+rdma-mlx4-do-not-map-the-core_clock-page-to-user-space-unless-enabled.patch
+vmlinux.lds.h-avoid-orphan-section-with-smp.patch
+perf-fix-data-race-between-pin_count-increment-decrement.patch
+sched-fair-make-sure-to-update-tg-contrib-for-blocked-load.patch
+kvm-x86-ensure-liveliness-of-nested-vm-enter-fail-tracepoint-message.patch
+ib-mlx5-fix-initializing-cq-fragments-buffer.patch
diff --git a/queue-5.4/usb-typec-mux-fix-copy-paste-mistake-in-typec_mux_match.patch b/queue-5.4/usb-typec-mux-fix-copy-paste-mistake-in-typec_mux_match.patch

new file mode 100644 (file)

index 0000000..a6b4665
--- /dev/null
+++ b/queue-5.4/usb-typec-mux-fix-copy-paste-mistake-in-typec_mux_match.patch
@@ -0,0 +1,36 @@
+From 142d0b24c1b17139f1aaaacae7542a38aa85640f Mon Sep 17 00:00:00 2001
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+Date: Wed, 9 Jun 2021 17:21:32 -0700
+Subject: usb: typec: mux: Fix copy-paste mistake in typec_mux_match
+
+From: Bjorn Andersson <bjorn.andersson@linaro.org>
+
+commit 142d0b24c1b17139f1aaaacae7542a38aa85640f upstream.
+
+Fix the copy-paste mistake in the return path of typec_mux_match(),
+where dev is considered a member of struct typec_switch rather than
+struct typec_mux.
+
+The two structs are identical in regards to having the struct device as
+the first entry, so this provides no functional change.
+
+Fixes: 3370db35193b ("usb: typec: Registering real device entries for the muxes")
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Link: https://lore.kernel.org/r/20210610002132.3088083-1-bjorn.andersson@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/typec/mux.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/usb/typec/mux.c
++++ b/drivers/usb/typec/mux.c
+@@ -243,7 +243,7 @@ find_mux:
+       dev = class_find_device(&typec_mux_class, NULL, con->fwnode,
+                               mux_fwnode_match);
+ 
+-      return dev ? to_typec_switch(dev) : ERR_PTR(-EPROBE_DEFER);
++      return dev ? to_typec_mux(dev) : ERR_PTR(-EPROBE_DEFER);
+ }
+ 
+ /**
diff --git a/queue-5.4/vmlinux.lds.h-avoid-orphan-section-with-smp.patch b/queue-5.4/vmlinux.lds.h-avoid-orphan-section-with-smp.patch

new file mode 100644 (file)

index 0000000..e885dd1
--- /dev/null
+++ b/queue-5.4/vmlinux.lds.h-avoid-orphan-section-with-smp.patch
@@ -0,0 +1,54 @@
+From d4c6399900364facd84c9e35ce1540b6046c345f Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Wed, 5 May 2021 17:14:11 -0700
+Subject: vmlinux.lds.h: Avoid orphan section with !SMP
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit d4c6399900364facd84c9e35ce1540b6046c345f upstream.
+
+With x86_64_defconfig and the following configs, there is an orphan
+section warning:
+
+CONFIG_SMP=n
+CONFIG_AMD_MEM_ENCRYPT=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_KVM=y
+CONFIG_PARAVIRT=y
+
+ld: warning: orphan section `.data..decrypted' from `arch/x86/kernel/cpu/vmware.o' being placed in section `.data..decrypted'
+ld: warning: orphan section `.data..decrypted' from `arch/x86/kernel/kvm.o' being placed in section `.data..decrypted'
+
+These sections are created with DEFINE_PER_CPU_DECRYPTED, which
+ultimately turns into __PCPU_ATTRS, which in turn has a section
+attribute with a value of PER_CPU_BASE_SECTION + the section name. When
+CONFIG_SMP is not set, the base section is .data and that is not
+currently handled in any linker script.
+
+Add .data..decrypted to PERCPU_DECRYPTED_SECTION, which is included in
+PERCPU_INPUT -> PERCPU_SECTION, which is include in the x86 linker
+script when either CONFIG_X86_64 or CONFIG_SMP is unset, taking care of
+the warning.
+
+Fixes: ac26963a1175 ("percpu: Introduce DEFINE_PER_CPU_DECRYPTED")
+Link: https://github.com/ClangBuiltLinux/linux/issues/1360
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Tested-by: Nick Desaulniers <ndesaulniers@google.com> # build
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210506001410.1026691-1-nathan@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/vmlinux.lds.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -882,6 +882,7 @@
+ #ifdef CONFIG_AMD_MEM_ENCRYPT
+ #define PERCPU_DECRYPTED_SECTION                                      \
+       . = ALIGN(PAGE_SIZE);                                           \
++      *(.data..decrypted)                                             \
+       *(.data..percpu..decrypted)                                     \
+       . = ALIGN(PAGE_SIZE);
+ #else
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 13 Jun 2021 13:21:38 +0000 (15:21 +0200)
queue-5.4/ib-mlx5-fix-initializing-cq-fragments-buffer.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-ensure-liveliness-of-nested-vm-enter-fail-tracepoint-message.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/perf-fix-data-race-between-pin_count-increment-decrement.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/rdma-ipoib-fix-warning-caused-by-destroying-non-initial-netns.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/rdma-mlx4-do-not-map-the-core_clock-page-to-user-space-unless-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/regulator-core-resolve-supply-for-boot-on-always-on-regulators.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/regulator-max77620-use-device_set_of_node_from_dev.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sched-fair-make-sure-to-update-tg-contrib-for-blocked-load.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/usb-typec-mux-fix-copy-paste-mistake-in-typec_mux_match.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/vmlinux.lds.h-avoid-orphan-section-with-smp.patch	[new file with mode: 0644]	patch \| blob