]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Mon, 6 Dec 2021 04:03:02 +0000 (23:03 -0500)
committerSasha Levin <sashal@kernel.org>
Mon, 6 Dec 2021 04:03:02 +0000 (23:03 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
34 files changed:
queue-5.15/asoc-rk817-add-module-alias-for-rk817-codec.patch [new file with mode: 0644]
queue-5.15/drm-msm-devfreq-fix-opp-refcnt-leak.patch [new file with mode: 0644]
queue-5.15/drm-msm-fix-mmap-to-include-vm_io-and-vm_dontdump.patch [new file with mode: 0644]
queue-5.15/drm-msm-fix-wait_fence-submitqueue-leak.patch [new file with mode: 0644]
queue-5.15/drm-msm-restore-error-return-on-invalid-fence.patch [new file with mode: 0644]
queue-5.15/io-wq-don-t-retry-task_work-creation-failure-on-fata.patch [new file with mode: 0644]
queue-5.15/iwlwifi-fix-memory-leaks-in-error-handling-path.patch [new file with mode: 0644]
queue-5.15/kvm-sev-initialize-regions_list-of-a-mirror-vm.patch [new file with mode: 0644]
queue-5.15/kvm-sev-return-appropriate-error-codes-if-sev-es-scr.patch [new file with mode: 0644]
queue-5.15/kvm-vmx-set-failure-code-in-prepare_vmcs02.patch [new file with mode: 0644]
queue-5.15/kvm-x86-fix-when-shadow_root_level-5-guest-root_leve.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-pass-parameter-flush-as-false-in-kvm_tdp.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-remove-spurious-tlb-flushes-in-tdp-mmu-z.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-rename-slot_handle_leaf-to-slot_handle_l.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-skip-tlb-flush-if-it-has-been-done-in-za.patch [new file with mode: 0644]
queue-5.15/kvm-x86-pmu-fix-reserved-bits-for-amd-perfevtseln-re.patch [new file with mode: 0644]
queue-5.15/mctp-don-t-let-rtm_delroute-delete-local-routes.patch [new file with mode: 0644]
queue-5.15/net-mlx5-e-switch-check-group-pointer-before-reading.patch [new file with mode: 0644]
queue-5.15/net-mlx5-e-switch-fix-single-fdb-creation-on-bluefie.patch [new file with mode: 0644]
queue-5.15/net-mlx5-e-switch-respect-bw-share-of-the-new-group.patch [new file with mode: 0644]
queue-5.15/net-mlx5-move-modify_rqt-command-to-ignore-list-in-i.patch [new file with mode: 0644]
queue-5.15/net-mlx5e-fix-missing-ipsec-statistics-on-uplink-rep.patch [new file with mode: 0644]
queue-5.15/net-mlx5e-rename-lro_timeout-to-packet_merge_timeout.patch [new file with mode: 0644]
queue-5.15/net-mlx5e-rename-tir-lro-functions-to-tir-packet-mer.patch [new file with mode: 0644]
queue-5.15/net-mlx5e-sync-tir-params-updates-against-concurrent.patch [new file with mode: 0644]
queue-5.15/preempt-dynamic-fix-setup_preempt_mode-return-value.patch [new file with mode: 0644]
queue-5.15/revert-drm-i915-implement-wa_1508744258.patch [new file with mode: 0644]
queue-5.15/sched-uclamp-fix-rq-uclamp_max-not-set-on-first-enqu.patch [new file with mode: 0644]
queue-5.15/serial-8250_bcm7271-uart-errors-after-resuming-from-.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/x86-entry-add-a-fence-for-kernel-entry-swapgs-in-par.patch [new file with mode: 0644]
queue-5.15/x86-entry-use-the-correct-fence-macro-after-swapgs-i.patch [new file with mode: 0644]
queue-5.15/x86-sev-fix-sev-es-ins-outs-instructions-for-word-dw.patch [new file with mode: 0644]
queue-5.15/x86-xen-add-xenpv_restore_regs_and_return_to_usermod.patch [new file with mode: 0644]

diff --git a/queue-5.15/asoc-rk817-add-module-alias-for-rk817-codec.patch b/queue-5.15/asoc-rk817-add-module-alias-for-rk817-codec.patch
new file mode 100644 (file)
index 0000000..6947a70
--- /dev/null
@@ -0,0 +1,36 @@
+From 2acf13bbb89bc0c201531608a02ba8ce83493938 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Nov 2021 16:05:20 +0100
+Subject: ASoC: rk817: Add module alias for rk817-codec
+
+From: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
+
+[ Upstream commit 428ee30a05cd1362c8aa86a4c909b0d1c6bc48a4 ]
+
+Without a module alias, autoloading the driver does not occurr
+when it is built as a module.
+
+By adding a module alias, the driver now probes fine automatically
+and therefore analog audio output works as it should.
+
+Fixes: 0d6a04da9b25 ("ASoC: Add Rockchip rk817 audio CODEC support")
+Signed-off-by: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
+Link: https://lore.kernel.org/r/20211121150521.159543-1-frattaroli.nicolas@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/rk817_codec.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c
+index 943d7d933e81b..03f24edfe4f64 100644
+--- a/sound/soc/codecs/rk817_codec.c
++++ b/sound/soc/codecs/rk817_codec.c
+@@ -539,3 +539,4 @@ module_platform_driver(rk817_codec_driver);
+ MODULE_DESCRIPTION("ASoC RK817 codec driver");
+ MODULE_AUTHOR("binyuan <kevan.lan@rock-chips.com>");
+ MODULE_LICENSE("GPL v2");
++MODULE_ALIAS("platform:rk817-codec");
+-- 
+2.33.0
+
diff --git a/queue-5.15/drm-msm-devfreq-fix-opp-refcnt-leak.patch b/queue-5.15/drm-msm-devfreq-fix-opp-refcnt-leak.patch
new file mode 100644 (file)
index 0000000..1752f48
--- /dev/null
@@ -0,0 +1,48 @@
+From e4a3321131768ba6ed1961e8834764cd0a50f958 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Nov 2021 13:20:21 -0700
+Subject: drm/msm/devfreq: Fix OPP refcnt leak
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 59ba1b2b4825342676300f66d785764be3fcb093 ]
+
+Reported-by: Douglas Anderson <dianders@chromium.org>
+Fixes: 9bc95570175a ("drm/msm: Devfreq tuning")
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Douglas Anderson <dianders@chromium.org>
+Tested-By: Steev Klimaszewski <steev@kali.org>
+Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org>
+Link: https://lore.kernel.org/r/20211105202021.181092-1-robdclark@gmail.com
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_gpu_devfreq.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+index 20006d060b5b5..4ac2a4eb984d8 100644
+--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
++++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+@@ -20,6 +20,10 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
+       struct msm_gpu *gpu = dev_to_gpu(dev);
+       struct dev_pm_opp *opp;
++      /*
++       * Note that devfreq_recommended_opp() can modify the freq
++       * to something that actually is in the opp table:
++       */
+       opp = devfreq_recommended_opp(dev, freq, flags);
+       /*
+@@ -28,6 +32,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
+        */
+       if (gpu->devfreq.idle_freq) {
+               gpu->devfreq.idle_freq = *freq;
++              dev_pm_opp_put(opp);
+               return 0;
+       }
+-- 
+2.33.0
+
diff --git a/queue-5.15/drm-msm-fix-mmap-to-include-vm_io-and-vm_dontdump.patch b/queue-5.15/drm-msm-fix-mmap-to-include-vm_io-and-vm_dontdump.patch
new file mode 100644 (file)
index 0000000..9384be3
--- /dev/null
@@ -0,0 +1,101 @@
+From 7bc61a260da7942fb7f63e8391ca75d81f308a43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Nov 2021 11:33:42 -0800
+Subject: drm/msm: Fix mmap to include VM_IO and VM_DONTDUMP
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit 3466d9e217b337bf473ee629c608e53f9f3ab786 ]
+
+In commit 510410bfc034 ("drm/msm: Implement mmap as GEM object
+function") we switched to a new/cleaner method of doing things. That's
+good, but we missed a little bit.
+
+Before that commit, we used to _first_ run through the
+drm_gem_mmap_obj() case where `obj->funcs->mmap()` was NULL. That meant
+that we ran:
+
+  vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+  vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+  vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
+...and _then_ we modified those mappings with our own. Now that
+`obj->funcs->mmap()` is no longer NULL we don't run the default
+code. It looks like the fact that the vm_flags got VM_IO / VM_DONTDUMP
+was important because we're now getting crashes on Chromebooks that
+use ARC++ while logging out. Specifically a crash that looks like this
+(this is on a 5.10 kernel w/ relevant backports but also seen on a
+5.15 kernel):
+
+  Unable to handle kernel paging request at virtual address ffffffc008000000
+  Mem abort info:
+    ESR = 0x96000006
+    EC = 0x25: DABT (current EL), IL = 32 bits
+    SET = 0, FnV = 0
+    EA = 0, S1PTW = 0
+  Data abort info:
+    ISV = 0, ISS = 0x00000006
+    CM = 0, WnR = 0
+  swapper pgtable: 4k pages, 39-bit VAs, pgdp=000000008293d000
+  [ffffffc008000000] pgd=00000001002b3003, p4d=00000001002b3003,
+                     pud=00000001002b3003, pmd=0000000000000000
+  Internal error: Oops: 96000006 [#1] PREEMPT SMP
+  [...]
+  CPU: 7 PID: 15734 Comm: crash_dump64 Tainted: G W 5.10.67 #1 [...]
+  Hardware name: Qualcomm Technologies, Inc. sc7280 IDP SKU2 platform (DT)
+  pstate: 80400009 (Nzcv daif +PAN -UAO -TCO BTYPE=--)
+  pc : __arch_copy_to_user+0xc0/0x30c
+  lr : copyout+0xac/0x14c
+  [...]
+  Call trace:
+   __arch_copy_to_user+0xc0/0x30c
+   copy_page_to_iter+0x1a0/0x294
+   process_vm_rw_core+0x240/0x408
+   process_vm_rw+0x110/0x16c
+   __arm64_sys_process_vm_readv+0x30/0x3c
+   el0_svc_common+0xf8/0x250
+   do_el0_svc+0x30/0x80
+   el0_svc+0x10/0x1c
+   el0_sync_handler+0x78/0x108
+   el0_sync+0x184/0x1c0
+  Code: f8408423 f80008c3 910020c6 36100082 (b8404423)
+
+Let's add the two flags back in.
+
+While we're at it, the fact that we aren't running the default means
+that we _don't_ need to clear out VM_PFNMAP, so remove that and save
+an instruction.
+
+NOTE: it was confirmed that VM_IO was the important flag to fix the
+problem I was seeing, but adding back VM_DONTDUMP seems like a sane
+thing to do so I'm doing that too.
+
+Fixes: 510410bfc034 ("drm/msm: Implement mmap as GEM object function")
+Reported-by: Stephen Boyd <swboyd@chromium.org>
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Tested-by: Stephen Boyd <swboyd@chromium.org>
+Link: https://lore.kernel.org/r/20211110113334.1.I1687e716adb2df746da58b508db3f25423c40b27@changeid
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_gem.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
+index bd6ec04f345e1..cb52ac01e5122 100644
+--- a/drivers/gpu/drm/msm/msm_gem.c
++++ b/drivers/gpu/drm/msm/msm_gem.c
+@@ -1055,8 +1055,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct
+ {
+       struct msm_gem_object *msm_obj = to_msm_bo(obj);
+-      vma->vm_flags &= ~VM_PFNMAP;
+-      vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
++      vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags));
+       return 0;
+-- 
+2.33.0
+
diff --git a/queue-5.15/drm-msm-fix-wait_fence-submitqueue-leak.patch b/queue-5.15/drm-msm-fix-wait_fence-submitqueue-leak.patch
new file mode 100644 (file)
index 0000000..03d3111
--- /dev/null
@@ -0,0 +1,104 @@
+From 029b53ab8623e45bb3efd9bbe30f004d3e37ccaf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Nov 2021 11:24:55 -0800
+Subject: drm/msm: Fix wait_fence submitqueue leak
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit ea0006d390a28012f8187717aea61498b2b341e5 ]
+
+We weren't dropping the submitqueue reference in all paths.  In
+particular, when the fence has already been signalled. Split out
+a helper to simplify handling this in the various different return
+paths.
+
+Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Link: https://lore.kernel.org/r/20211111192457.747899-2-robdclark@gmail.com
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_drv.c | 49 +++++++++++++++++++++--------------
+ 1 file changed, 29 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
+index d4e09703a87db..4c5661f38dd26 100644
+--- a/drivers/gpu/drm/msm/msm_drv.c
++++ b/drivers/gpu/drm/msm/msm_drv.c
+@@ -938,29 +938,12 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
+       return ret;
+ }
+-static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+-              struct drm_file *file)
++static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
++                    ktime_t timeout)
+ {
+-      struct msm_drm_private *priv = dev->dev_private;
+-      struct drm_msm_wait_fence *args = data;
+-      ktime_t timeout = to_ktime(args->timeout);
+-      struct msm_gpu_submitqueue *queue;
+-      struct msm_gpu *gpu = priv->gpu;
+       struct dma_fence *fence;
+       int ret;
+-      if (args->pad) {
+-              DRM_ERROR("invalid pad: %08x\n", args->pad);
+-              return -EINVAL;
+-      }
+-
+-      if (!gpu)
+-              return 0;
+-
+-      queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+-      if (!queue)
+-              return -ENOENT;
+-
+       /*
+        * Map submitqueue scoped "seqno" (which is actually an idr key)
+        * back to underlying dma-fence
+@@ -972,7 +955,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+       ret = mutex_lock_interruptible(&queue->lock);
+       if (ret)
+               return ret;
+-      fence = idr_find(&queue->fence_idr, args->fence);
++      fence = idr_find(&queue->fence_idr, fence_id);
+       if (fence)
+               fence = dma_fence_get_rcu(fence);
+       mutex_unlock(&queue->lock);
+@@ -988,6 +971,32 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+       }
+       dma_fence_put(fence);
++
++      return ret;
++}
++
++static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
++              struct drm_file *file)
++{
++      struct msm_drm_private *priv = dev->dev_private;
++      struct drm_msm_wait_fence *args = data;
++      struct msm_gpu_submitqueue *queue;
++      int ret;
++
++      if (args->pad) {
++              DRM_ERROR("invalid pad: %08x\n", args->pad);
++              return -EINVAL;
++      }
++
++      if (!priv->gpu)
++              return 0;
++
++      queue = msm_submitqueue_get(file->driver_priv, args->queueid);
++      if (!queue)
++              return -ENOENT;
++
++      ret = wait_fence(queue, args->fence, to_ktime(args->timeout));
++
+       msm_submitqueue_put(queue);
+       return ret;
+-- 
+2.33.0
+
diff --git a/queue-5.15/drm-msm-restore-error-return-on-invalid-fence.patch b/queue-5.15/drm-msm-restore-error-return-on-invalid-fence.patch
new file mode 100644 (file)
index 0000000..fea3ad8
--- /dev/null
@@ -0,0 +1,79 @@
+From 77ee7908bf51e9003997e81116b543d93d7b2c14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Nov 2021 11:24:56 -0800
+Subject: drm/msm: Restore error return on invalid fence
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 067ecab9eef620d41040715669e5fcdc2f8ff963 ]
+
+When converting to use an idr to map userspace fence seqno values back
+to a dma_fence, we lost the error return when userspace passes seqno
+that is larger than the last submitted fence.  Restore this check.
+
+Reported-by: Akhil P Oommen <akhilpo@codeaurora.org>
+Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr")
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org>
+Link: https://lore.kernel.org/r/20211111192457.747899-3-robdclark@gmail.com
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_drv.c        | 6 ++++++
+ drivers/gpu/drm/msm/msm_gem_submit.c | 1 +
+ drivers/gpu/drm/msm/msm_gpu.h        | 3 +++
+ 3 files changed, 10 insertions(+)
+
+diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
+index 4c5661f38dd26..27f737a253c77 100644
+--- a/drivers/gpu/drm/msm/msm_drv.c
++++ b/drivers/gpu/drm/msm/msm_drv.c
+@@ -944,6 +944,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
+       struct dma_fence *fence;
+       int ret;
++      if (fence_id > queue->last_fence) {
++              DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
++                                    fence_id, queue->last_fence);
++              return -EINVAL;
++      }
++
+       /*
+        * Map submitqueue scoped "seqno" (which is actually an idr key)
+        * back to underlying dma-fence
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index 151d19e4453cd..a38f23be497d8 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -911,6 +911,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+       drm_sched_entity_push_job(&submit->base, queue->entity);
+       args->fence = submit->fence_id;
++      queue->last_fence = submit->fence_id;
+       msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
+       msm_process_post_deps(post_deps, args->nr_out_syncobjs,
+diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
+index ee25d556c8a10..2e2424066e701 100644
+--- a/drivers/gpu/drm/msm/msm_gpu.h
++++ b/drivers/gpu/drm/msm/msm_gpu.h
+@@ -352,6 +352,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
+  * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
+  *             by the submitqueue's priority
+  * @faults:    the number of GPU hangs associated with this submitqueue
++ * @last_fence: the sequence number of the last allocated fence (for error
++ *             checking)
+  * @ctx:       the per-drm_file context associated with the submitqueue (ie.
+  *             which set of pgtables do submits jobs associated with the
+  *             submitqueue use)
+@@ -367,6 +369,7 @@ struct msm_gpu_submitqueue {
+       u32 flags;
+       u32 ring_nr;
+       int faults;
++      uint32_t last_fence;
+       struct msm_file_private *ctx;
+       struct list_head node;
+       struct idr fence_idr;
+-- 
+2.33.0
+
diff --git a/queue-5.15/io-wq-don-t-retry-task_work-creation-failure-on-fata.patch b/queue-5.15/io-wq-don-t-retry-task_work-creation-failure-on-fata.patch
new file mode 100644 (file)
index 0000000..7c303c0
--- /dev/null
@@ -0,0 +1,45 @@
+From 8002c2b26e443976119d7bb8fa8bfc3ae0406af8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Dec 2021 19:40:15 -0700
+Subject: io-wq: don't retry task_work creation failure on fatal conditions
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit a226abcd5d427fe9d42efc442818a4a1821e2664 ]
+
+We don't want to be retrying task_work creation failure if there's
+an actual signal pending for the parent task. If we do, then we can
+enter an infinite loop of perpetually retrying and each retry failing
+with -ERESTARTNOINTR because a signal is pending.
+
+Fixes: 3146cba99aa2 ("io-wq: make worker creation resilient against signals")
+Reported-by: Florian Fischer <florian.fl.fischer@fau.de>
+Link: https://lore.kernel.org/io-uring/20211202165606.mqryio4yzubl7ms5@pasture/
+Tested-by: Florian Fischer <florian.fl.fischer@fau.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io-wq.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/io-wq.c b/fs/io-wq.c
+index 8c61315657546..e8f77903d7757 100644
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -711,6 +711,13 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+ static inline bool io_should_retry_thread(long err)
+ {
++      /*
++       * Prevent perpetual task_work retry, if the task (or its group) is
++       * exiting.
++       */
++      if (fatal_signal_pending(current))
++              return false;
++
+       switch (err) {
+       case -EAGAIN:
+       case -ERESTARTSYS:
+-- 
+2.33.0
+
diff --git a/queue-5.15/iwlwifi-fix-memory-leaks-in-error-handling-path.patch b/queue-5.15/iwlwifi-fix-memory-leaks-in-error-handling-path.patch
new file mode 100644 (file)
index 0000000..036d23d
--- /dev/null
@@ -0,0 +1,56 @@
+From 1d82c7a2dfa682ea5d77ec619018792ea5f9601f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Nov 2021 08:23:11 +0100
+Subject: iwlwifi: Fix memory leaks in error handling path
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit a571bc28326d9f3e13f5f2d9cda2883e0631b0ce ]
+
+Should an error occur (invalid TLV len or memory allocation failure), the
+memory already allocated in 'reduce_power_data' should be freed before
+returning, otherwise it is leaking.
+
+Fixes: 9dad325f9d57 ("iwlwifi: support loading the reduced power table from UEFI")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Link: https://lore.kernel.org/r/1504cd7d842d13ddb8244e18004523128d5c9523.1636615284.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+index c875bf35533ce..009dd4be597b0 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+@@ -86,6 +86,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+               if (len < tlv_len) {
+                       IWL_ERR(trans, "invalid TLV len: %zd/%u\n",
+                               len, tlv_len);
++                      kfree(reduce_power_data);
+                       reduce_power_data = ERR_PTR(-EINVAL);
+                       goto out;
+               }
+@@ -105,6 +106,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+                               IWL_DEBUG_FW(trans,
+                                            "Couldn't allocate (more) reduce_power_data\n");
++                              kfree(reduce_power_data);
+                               reduce_power_data = ERR_PTR(-ENOMEM);
+                               goto out;
+                       }
+@@ -134,6 +136,10 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+ done:
+       if (!size) {
+               IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n");
++              /* Better safe than sorry, but 'reduce_power_data' should
++               * always be NULL if !size.
++               */
++              kfree(reduce_power_data);
+               reduce_power_data = ERR_PTR(-ENOENT);
+               goto out;
+       }
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-sev-initialize-regions_list-of-a-mirror-vm.patch b/queue-5.15/kvm-sev-initialize-regions_list-of-a-mirror-vm.patch
new file mode 100644 (file)
index 0000000..cd84c2c
--- /dev/null
@@ -0,0 +1,37 @@
+From cf78e42e8964519ded1de7b4a4a73d5cdb488446 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Nov 2021 19:50:30 -0500
+Subject: KVM: SEV: initialize regions_list of a mirror VM
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 2b347a387811cb4aa7bcdb96e9203c5019a6fb41 ]
+
+This was broken before the introduction of KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
+but technically harmless because the region list was unused for a mirror
+VM.  However, it is untidy and it now causes a NULL pointer access when
+attempting to move the encryption context of a mirror VM.
+
+Fixes: 54526d1fd593 ("KVM: x86: Support KVM VMs sharing SEV context")
+Message-Id: <20211123005036.2954379-7-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index ff19ce0780fea..ca0effb79eab9 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1805,6 +1805,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
+       mirror_sev->fd = source_sev.fd;
+       mirror_sev->es_active = source_sev.es_active;
+       mirror_sev->handle = source_sev.handle;
++      INIT_LIST_HEAD(&mirror_sev->regions_list);
+       /*
+        * Do not copy ap_jump_table. Since the mirror does not share the same
+        * KVM contexts as the original, and they may have different
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-sev-return-appropriate-error-codes-if-sev-es-scr.patch b/queue-5.15/kvm-sev-return-appropriate-error-codes-if-sev-es-scr.patch
new file mode 100644 (file)
index 0000000..42e3860
--- /dev/null
@@ -0,0 +1,149 @@
+From d7097040e6ee36d7d6c21aea3fbd2114ed550777 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Nov 2021 22:23:49 +0000
+Subject: KVM: SEV: Return appropriate error codes if SEV-ES scratch setup
+ fails
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 75236f5f2299b502e4b9b267c1ce3bc14a222ceb ]
+
+Return appropriate error codes if setting up the GHCB scratch area for an
+SEV-ES guest fails.  In particular, returning -EINVAL instead of -ENOMEM
+when allocating the kernel buffer could be confusing as userspace would
+likely suspect a guest issue.
+
+Fixes: 8f423a80d299 ("KVM: SVM: Support MMIO for an SEV-ES guest")
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211109222350.2266045-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c | 30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index ca0effb79eab9..134c4ea5e6ad8 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2317,7 +2317,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ }
+ #define GHCB_SCRATCH_AREA_LIMIT               (16ULL * PAGE_SIZE)
+-static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
++static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ {
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct ghcb *ghcb = svm->ghcb;
+@@ -2328,14 +2328,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       if (!scratch_gpa_beg) {
+               pr_err("vmgexit: scratch gpa not provided\n");
+-              return false;
++              return -EINVAL;
+       }
+       scratch_gpa_end = scratch_gpa_beg + len;
+       if (scratch_gpa_end < scratch_gpa_beg) {
+               pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
+                      len, scratch_gpa_beg);
+-              return false;
++              return -EINVAL;
+       }
+       if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+@@ -2353,7 +2353,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+                   scratch_gpa_end > ghcb_scratch_end) {
+                       pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
+                              scratch_gpa_beg, scratch_gpa_end);
+-                      return false;
++                      return -EINVAL;
+               }
+               scratch_va = (void *)svm->ghcb;
+@@ -2366,18 +2366,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+               if (len > GHCB_SCRATCH_AREA_LIMIT) {
+                       pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
+                              len, GHCB_SCRATCH_AREA_LIMIT);
+-                      return false;
++                      return -EINVAL;
+               }
+               scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
+               if (!scratch_va)
+-                      return false;
++                      return -ENOMEM;
+               if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
+                       /* Unable to copy scratch area from guest */
+                       pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
+                       kfree(scratch_va);
+-                      return false;
++                      return -EFAULT;
+               }
+               /*
+@@ -2393,7 +2393,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+       svm->ghcb_sa = scratch_va;
+       svm->ghcb_sa_len = len;
+-      return true;
++      return 0;
+ }
+ static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+@@ -2532,10 +2532,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+       ghcb_set_sw_exit_info_1(ghcb, 0);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+-      ret = -EINVAL;
+       switch (exit_code) {
+       case SVM_VMGEXIT_MMIO_READ:
+-              if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
++              ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
++              if (ret)
+                       break;
+               ret = kvm_sev_es_mmio_read(vcpu,
+@@ -2544,7 +2544,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+                                          svm->ghcb_sa);
+               break;
+       case SVM_VMGEXIT_MMIO_WRITE:
+-              if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
++              ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
++              if (ret)
+                       break;
+               ret = kvm_sev_es_mmio_write(vcpu,
+@@ -2587,6 +2588,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+               vcpu_unimpl(vcpu,
+                           "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+                           control->exit_info_1, control->exit_info_2);
++              ret = -EINVAL;
+               break;
+       default:
+               ret = svm_invoke_exit_handler(vcpu, exit_code);
+@@ -2599,6 +2601,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+ {
+       int count;
+       int bytes;
++      int r;
+       if (svm->vmcb->control.exit_info_2 > INT_MAX)
+               return -EINVAL;
+@@ -2607,8 +2610,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+       if (unlikely(check_mul_overflow(count, size, &bytes)))
+               return -EINVAL;
+-      if (!setup_vmgexit_scratch(svm, in, bytes))
+-              return -EINVAL;
++      r = setup_vmgexit_scratch(svm, in, bytes);
++      if (r)
++              return r;
+       return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+ }
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-vmx-set-failure-code-in-prepare_vmcs02.patch b/queue-5.15/kvm-vmx-set-failure-code-in-prepare_vmcs02.patch
new file mode 100644 (file)
index 0000000..4aa9796
--- /dev/null
@@ -0,0 +1,41 @@
+From 8768b094aba2f4277c67b933eb549581f7632a00 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Nov 2021 15:53:37 +0300
+Subject: KVM: VMX: Set failure code in prepare_vmcs02()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit bfbb307c628676929c2d329da0daf9d22afa8ad2 ]
+
+The error paths in the prepare_vmcs02() function are supposed to set
+*entry_failure_code but this path does not.  It leads to using an
+uninitialized variable in the caller.
+
+Fixes: 71f7347025bf ("KVM: nVMX: Load GUEST_IA32_PERF_GLOBAL_CTRL MSR on VM-Entry")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Message-Id: <20211130125337.GB24578@kili>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 302f1752cc4c2..e97a11abc1d85 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2609,8 +2609,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+           WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+-                                   vmcs12->guest_ia32_perf_global_ctrl)))
++                                   vmcs12->guest_ia32_perf_global_ctrl))) {
++              *entry_failure_code = ENTRY_FAIL_DEFAULT;
+               return -EINVAL;
++      }
+       kvm_rsp_write(vcpu, vmcs12->guest_rsp);
+       kvm_rip_write(vcpu, vmcs12->guest_rip);
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-fix-when-shadow_root_level-5-guest-root_leve.patch b/queue-5.15/kvm-x86-fix-when-shadow_root_level-5-guest-root_leve.patch
new file mode 100644 (file)
index 0000000..a45cffc
--- /dev/null
@@ -0,0 +1,41 @@
+From 949dca7348cc2017930aedbb002b75ae3afa0512 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Nov 2021 20:20:43 +0800
+Subject: KVM: X86: Fix when shadow_root_level=5 && guest root_level<4
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+[ Upstream commit 12ec33a705749e18d9588b0a0e69e02821371156 ]
+
+If the is an L1 with nNPT in 32bit, the shadow walk starts with
+pae_root.
+
+Fixes: a717a780fc4e ("KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in host)
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Message-Id: <20211124122055.64424-2-jiangshanlai@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index c9b1d63d3cfba..287fc1086db78 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2188,10 +2188,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
+       iterator->shadow_addr = root;
+       iterator->level = vcpu->arch.mmu->shadow_root_level;
+-      if (iterator->level == PT64_ROOT_4LEVEL &&
++      if (iterator->level >= PT64_ROOT_4LEVEL &&
+           vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+           !vcpu->arch.mmu->direct_map)
+-              --iterator->level;
++              iterator->level = PT32E_ROOT_LEVEL;
+       if (iterator->level == PT32E_ROOT_LEVEL) {
+               /*
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-mmu-pass-parameter-flush-as-false-in-kvm_tdp.patch b/queue-5.15/kvm-x86-mmu-pass-parameter-flush-as-false-in-kvm_tdp.patch
new file mode 100644 (file)
index 0000000..e053ef0
--- /dev/null
@@ -0,0 +1,48 @@
+From fcff29fa0b54bea20a88d735fb3fafa1e6a60fd8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Nov 2021 17:20:40 +0800
+Subject: KVM: x86/mmu: Pass parameter flush as false in
+ kvm_tdp_mmu_zap_collapsible_sptes()
+
+From: Hou Wenlong <houwenlong93@linux.alibaba.com>
+
+[ Upstream commit 8ed716ca7dc91f058be0ba644a3048667a20db13 ]
+
+Since tlb flush has been done for legacy MMU before
+kvm_tdp_mmu_zap_collapsible_sptes(), so the parameter flush
+should be false for kvm_tdp_mmu_zap_collapsible_sptes().
+
+Fixes: e2209710ccc5d ("KVM: x86/mmu: Skip rmap operations if rmaps not allocated")
+Signed-off-by: Hou Wenlong <houwenlong93@linux.alibaba.com>
+Message-Id: <21453a1d2533afb6e59fb6c729af89e771ff2e76.1637140154.git.houwenlong93@linux.alibaba.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 9d04474b00272..c9b1d63d3cfba 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5855,7 +5855,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                  const struct kvm_memory_slot *slot)
+ {
+-      bool flush = false;
++      bool flush;
+       if (kvm_memslots_have_rmaps(kvm)) {
+               write_lock(&kvm->mmu_lock);
+@@ -5867,7 +5867,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+-              flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
++              flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, false);
+               if (flush)
+                       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               read_unlock(&kvm->mmu_lock);
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-mmu-remove-spurious-tlb-flushes-in-tdp-mmu-z.patch b/queue-5.15/kvm-x86-mmu-remove-spurious-tlb-flushes-in-tdp-mmu-z.patch
new file mode 100644 (file)
index 0000000..dbbe61d
--- /dev/null
@@ -0,0 +1,154 @@
+From 2ced6b43343f6d6d3a3bb90b6a20ca4a2a394385 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Nov 2021 04:50:21 +0000
+Subject: KVM: x86/mmu: Remove spurious TLB flushes in TDP MMU zap collapsible
+ path
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 4b85c921cd393764d22c0cdab6d7d5d120aa0980 ]
+
+Drop the "flush" param and return values to/from the TDP MMU's helper for
+zapping collapsible SPTEs.  Because the helper runs with mmu_lock held
+for read, not write, it uses tdp_mmu_zap_spte_atomic(), and the atomic
+zap handles the necessary remote TLB flush.
+
+Similarly, because mmu_lock is dropped and re-acquired between zapping
+legacy MMUs and zapping TDP MMUs, kvm_mmu_zap_collapsible_sptes() must
+handle remote TLB flushes from the legacy MMU before calling into the TDP
+MMU.
+
+Fixes: e2209710ccc5d ("KVM: x86/mmu: Skip rmap operations if rmaps not allocated")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211120045046.3940942-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c     |  9 ++-------
+ arch/x86/kvm/mmu/tdp_mmu.c | 22 +++++++---------------
+ arch/x86/kvm/mmu/tdp_mmu.h |  5 ++---
+ 3 files changed, 11 insertions(+), 25 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index f2e74e8c1651a..0a88cb4f731f4 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5855,8 +5855,6 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                  const struct kvm_memory_slot *slot)
+ {
+-      bool flush;
+-
+       if (kvm_memslots_have_rmaps(kvm)) {
+               write_lock(&kvm->mmu_lock);
+               /*
+@@ -5864,17 +5862,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                * logging at a 4k granularity and never creates collapsible
+                * 2m SPTEs during dirty logging.
+                */
+-              flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
+-              if (flush)
++              if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
+                       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               write_unlock(&kvm->mmu_lock);
+       }
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+-              flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, false);
+-              if (flush)
+-                      kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
++              kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
+               read_unlock(&kvm->mmu_lock);
+       }
+ }
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index aa75689a91b4c..0e4227b59d7bb 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -1413,10 +1413,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+  * Clear leaf entries which could be replaced by large mappings, for
+  * GFNs within the slot.
+  */
+-static bool zap_collapsible_spte_range(struct kvm *kvm,
++static void zap_collapsible_spte_range(struct kvm *kvm,
+                                      struct kvm_mmu_page *root,
+-                                     const struct kvm_memory_slot *slot,
+-                                     bool flush)
++                                     const struct kvm_memory_slot *slot)
+ {
+       gfn_t start = slot->base_gfn;
+       gfn_t end = start + slot->npages;
+@@ -1427,10 +1426,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
+       tdp_root_for_each_pte(iter, root, start, end) {
+ retry:
+-              if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
+-                      flush = false;
++              if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+                       continue;
+-              }
+               if (!is_shadow_present_pte(iter.old_spte) ||
+                   !is_last_spte(iter.old_spte, iter.level))
+@@ -1442,6 +1439,7 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
+                                                           pfn, PG_LEVEL_NUM))
+                       continue;
++              /* Note, a successful atomic zap also does a remote TLB flush. */
+               if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
+                       /*
+                        * The iter must explicitly re-read the SPTE because
+@@ -1450,30 +1448,24 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
+                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+                       goto retry;
+               }
+-              flush = true;
+       }
+       rcu_read_unlock();
+-
+-      return flush;
+ }
+ /*
+  * Clear non-leaf entries (and free associated page tables) which could
+  * be replaced by large mappings, for GFNs within the slot.
+  */
+-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+-                                     const struct kvm_memory_slot *slot,
+-                                     bool flush)
++void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
++                                     const struct kvm_memory_slot *slot)
+ {
+       struct kvm_mmu_page *root;
+       lockdep_assert_held_read(&kvm->mmu_lock);
+       for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+-              flush = zap_collapsible_spte_range(kvm, root, slot, flush);
+-
+-      return flush;
++              zap_collapsible_spte_range(kvm, root, slot);
+ }
+ /*
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
+index 358f447d40120..ba3681cd38ab4 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.h
++++ b/arch/x86/kvm/mmu/tdp_mmu.h
+@@ -66,9 +66,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+                                      struct kvm_memory_slot *slot,
+                                      gfn_t gfn, unsigned long mask,
+                                      bool wrprot);
+-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+-                                     const struct kvm_memory_slot *slot,
+-                                     bool flush);
++void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
++                                     const struct kvm_memory_slot *slot);
+ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
+                                  struct kvm_memory_slot *slot, gfn_t gfn,
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-mmu-rename-slot_handle_leaf-to-slot_handle_l.patch b/queue-5.15/kvm-x86-mmu-rename-slot_handle_leaf-to-slot_handle_l.patch
new file mode 100644 (file)
index 0000000..b8e9397
--- /dev/null
@@ -0,0 +1,81 @@
+From 4105a964362b1295859954f06f18f77bf97d6f3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Oct 2021 16:22:23 +0000
+Subject: KVM: x86/mmu: Rename slot_handle_leaf to slot_handle_level_4k
+
+From: David Matlack <dmatlack@google.com>
+
+[ Upstream commit 610265ea3da117db435868bd109f1861534a5634 ]
+
+slot_handle_leaf is a misnomer because it only operates on 4K SPTEs
+whereas "leaf" is used to describe any valid terminal SPTE (4K or
+large page). Rename slot_handle_leaf to slot_handle_level_4k to
+avoid confusion.
+
+Making this change makes it more obvious there is a benign discrepency
+between the legacy MMU and the TDP MMU when it comes to dirty logging.
+The legacy MMU only iterates through 4K SPTEs when zapping for
+collapsing and when clearing D-bits. The TDP MMU, on the other hand,
+iterates through SPTEs on all levels.
+
+The TDP MMU behavior of zapping SPTEs at all levels is technically
+overkill for its current dirty logging implementation, which always
+demotes to 4k SPTES, but both the TDP MMU and legacy MMU zap if and only
+if the SPTE can be replaced by a larger page, i.e. will not spuriously
+zap 2m (or larger) SPTEs. Opportunistically add comments to explain this
+discrepency in the code.
+
+Signed-off-by: David Matlack <dmatlack@google.com>
+Message-Id: <20211019162223.3935109-1-dmatlack@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 287fc1086db78..f2e74e8c1651a 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5474,8 +5474,8 @@ slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot,
+ }
+ static __always_inline bool
+-slot_handle_leaf(struct kvm *kvm, const struct kvm_memory_slot *memslot,
+-               slot_level_handler fn, bool flush_on_yield)
++slot_handle_level_4k(struct kvm *kvm, const struct kvm_memory_slot *memslot,
++                   slot_level_handler fn, bool flush_on_yield)
+ {
+       return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
+                                PG_LEVEL_4K, flush_on_yield);
+@@ -5859,7 +5859,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+       if (kvm_memslots_have_rmaps(kvm)) {
+               write_lock(&kvm->mmu_lock);
+-              flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
++              /*
++               * Zap only 4k SPTEs since the legacy MMU only supports dirty
++               * logging at a 4k granularity and never creates collapsible
++               * 2m SPTEs during dirty logging.
++               */
++              flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
+               if (flush)
+                       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               write_unlock(&kvm->mmu_lock);
+@@ -5896,8 +5901,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+       if (kvm_memslots_have_rmaps(kvm)) {
+               write_lock(&kvm->mmu_lock);
+-              flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty,
+-                                       false);
++              /*
++               * Clear dirty bits only on 4k SPTEs since the legacy MMU only
++               * support dirty logging at a 4k granularity.
++               */
++              flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
+               write_unlock(&kvm->mmu_lock);
+       }
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-mmu-skip-tlb-flush-if-it-has-been-done-in-za.patch b/queue-5.15/kvm-x86-mmu-skip-tlb-flush-if-it-has-been-done-in-za.patch
new file mode 100644 (file)
index 0000000..2342152
--- /dev/null
@@ -0,0 +1,56 @@
+From 1b35ddf169bd4aec1037d0087ea1c504f12b5900 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Nov 2021 17:20:39 +0800
+Subject: KVM: x86/mmu: Skip tlb flush if it has been done in zap_gfn_range()
+
+From: Hou Wenlong <houwenlong93@linux.alibaba.com>
+
+[ Upstream commit c7785d85b6c6cc9f3d0f1a8cab128f4062b30abb ]
+
+If the parameter flush is set, zap_gfn_range() would flush remote tlb
+when yield, then tlb flush is not needed outside. So use the return
+value of zap_gfn_range() directly instead of OR on it in
+kvm_unmap_gfn_range() and kvm_tdp_mmu_unmap_gfn_range().
+
+Fixes: 3039bcc744980 ("KVM: Move x86's MMU notifier memslot walkers to generic code")
+Signed-off-by: Hou Wenlong <houwenlong93@linux.alibaba.com>
+Message-Id: <5e16546e228877a4d974f8c0e448a93d52c7a5a9.1637140154.git.houwenlong93@linux.alibaba.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c     | 2 +-
+ arch/x86/kvm/mmu/tdp_mmu.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 9fb69546e21b8..9d04474b00272 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1592,7 +1592,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+               flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
+       if (is_tdp_mmu_enabled(kvm))
+-              flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
++              flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+       return flush;
+ }
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 838603a653b9a..aa75689a91b4c 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -1081,8 +1081,8 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
+       struct kvm_mmu_page *root;
+       for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
+-              flush |= zap_gfn_range(kvm, root, range->start, range->end,
+-                                     range->may_block, flush, false);
++              flush = zap_gfn_range(kvm, root, range->start, range->end,
++                                    range->may_block, flush, false);
+       return flush;
+ }
+-- 
+2.33.0
+
diff --git a/queue-5.15/kvm-x86-pmu-fix-reserved-bits-for-amd-perfevtseln-re.patch b/queue-5.15/kvm-x86-pmu-fix-reserved-bits-for-amd-perfevtseln-re.patch
new file mode 100644 (file)
index 0000000..f96fd3c
--- /dev/null
@@ -0,0 +1,55 @@
+From 33dff19738f5b14832995bcb4285f4b79e083bbc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Nov 2021 21:03:20 +0800
+Subject: KVM: x86/pmu: Fix reserved bits for AMD PerfEvtSeln register
+
+From: Like Xu <likexu@tencent.com>
+
+[ Upstream commit cb1d220da0faa5ca0deb93449aff953f0c2cce6d ]
+
+If we run the following perf command in an AMD Milan guest:
+
+  perf stat \
+  -e cpu/event=0x1d0/ \
+  -e cpu/event=0x1c7/ \
+  -e cpu/umask=0x1f,event=0x18e/ \
+  -e cpu/umask=0x7,event=0x18e/ \
+  -e cpu/umask=0x18,event=0x18e/ \
+  ./workload
+
+dmesg will report a #GP warning from an unchecked MSR access
+error on MSR_F15H_PERF_CTLx.
+
+This is because according to APM (Revision: 4.03) Figure 13-7,
+the bits [35:32] of AMD PerfEvtSeln register is a part of the
+event select encoding, which extends the EVENT_SELECT field
+from 8 bits to 12 bits.
+
+Opportunistically update pmu->reserved_bits for reserved bit 19.
+
+Reported-by: Jim Mattson <jmattson@google.com>
+Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM")
+Signed-off-by: Like Xu <likexu@tencent.com>
+Message-Id: <20211118130320.95997-1-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/pmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
+index fdf587f19c5fb..e152241d1d709 100644
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -282,7 +282,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
+               pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+       pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+-      pmu->reserved_bits = 0xffffffff00200000ull;
++      pmu->reserved_bits = 0xfffffff000280000ull;
+       pmu->version = 1;
+       /* not applicable to AMD; but clean them to prevent any fall out */
+       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+-- 
+2.33.0
+
diff --git a/queue-5.15/mctp-don-t-let-rtm_delroute-delete-local-routes.patch b/queue-5.15/mctp-don-t-let-rtm_delroute-delete-local-routes.patch
new file mode 100644 (file)
index 0000000..43f6fa3
--- /dev/null
@@ -0,0 +1,64 @@
+From bea7c2e2eeeb1fa0d63a3282ec8fbe1f57c7c311 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Dec 2021 16:07:42 +0800
+Subject: mctp: Don't let RTM_DELROUTE delete local routes
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit 76d001603c509562181f3787a7065b8e163bc7b9 ]
+
+We need to test against the existing route type, not
+the rtm_type in the netlink request.
+
+Fixes: 83f0a0b7285b ("mctp: Specify route types, require rtm_type in RTM_*ROUTE messages")
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mctp/route.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index 5ca186d53cb0f..fb1bf4ec85296 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -760,7 +760,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ }
+ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+-                           unsigned int daddr_extent)
++                           unsigned int daddr_extent, unsigned char type)
+ {
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_route *rt, *tmp;
+@@ -777,7 +777,8 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+       list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+               if (rt->dev == mdev &&
+-                  rt->min == daddr_start && rt->max == daddr_end) {
++                  rt->min == daddr_start && rt->max == daddr_end &&
++                  rt->type == type) {
+                       list_del_rcu(&rt->list);
+                       /* TODO: immediate RTM_DELROUTE */
+                       mctp_route_release(rt);
+@@ -795,7 +796,7 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+ int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+ {
+-      return mctp_route_remove(mdev, addr, 0);
++      return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
+ }
+ /* removes all entries for a given device */
+@@ -975,7 +976,7 @@ static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+       if (rtm->rtm_type != RTN_UNICAST)
+               return -EINVAL;
+-      rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
++      rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
+       return rc;
+ }
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5-e-switch-check-group-pointer-before-reading.patch b/queue-5.15/net-mlx5-e-switch-check-group-pointer-before-reading.patch
new file mode 100644 (file)
index 0000000..2034220
--- /dev/null
@@ -0,0 +1,37 @@
+From e54e264c44896db8e1d8c3e772d479d38955b10d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Nov 2021 11:47:21 +0200
+Subject: net/mlx5: E-Switch, Check group pointer before reading bw_share value
+
+From: Dmytro Linkin <dlinkin@nvidia.com>
+
+[ Upstream commit 5c4e8ae7aa4875041102406801ee434e6c581aef ]
+
+If log_esw_max_sched_depth is not supported group pointer of the vport
+is NULL. Hence, check the pointer before reading bw_share value.
+
+Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups")
+Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index 4501e3d737f80..d377ddc70fc70 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+       /* If vports min rate divider is 0 but their group has bw_share configured, then
+        * need to set bw_share for vports to minimal value.
+        */
+-      if (!group_level && !max_guarantee && group->bw_share)
++      if (!group_level && !max_guarantee && group && group->bw_share)
+               return 1;
+       return 0;
+ }
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5-e-switch-fix-single-fdb-creation-on-bluefie.patch b/queue-5.15/net-mlx5-e-switch-fix-single-fdb-creation-on-bluefie.patch
new file mode 100644 (file)
index 0000000..74e7ba6
--- /dev/null
@@ -0,0 +1,47 @@
+From 6f729dbadb28412c450dc97c5246f1fe72a71dd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Oct 2021 12:46:17 +0000
+Subject: net/mlx5: E-Switch, fix single FDB creation on BlueField
+
+From: Mark Bloch <mbloch@nvidia.com>
+
+[ Upstream commit 43a0696f11567278b9412f947e43dd7906c831a8 ]
+
+Always use MLX5_FLOW_TABLE_OTHER_VPORT flag when creating egress ACL
+table for single FDB. Not doing so on BlueField will make firmware fail
+the command. On BlueField the E-Switch manager is the ECPF (vport 0xFFFE)
+which is filled in the flow table creation command but as the
+other_vport field wasn't set the firmware complains about a bad parameter.
+
+This is different from a regular HCA where the E-Switch manager vport is
+the PF (vport 0x0). Passing MLX5_FLOW_TABLE_OTHER_VPORT will make the
+firmware happy both on BlueField and on regular HCAs without special
+condition for each.
+
+This fixes the bellow firmware syndrome:
+mlx5_cmd_check:819:(pid 571): CREATE_FLOW_TABLE(0x930) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x754a4)
+
+Fixes: db202995f503 ("net/mlx5: E-Switch, add logic to enable shared FDB")
+Signed-off-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 0c79e11339362..f3f23fdc20229 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2471,6 +2471,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       struct mlx5_flow_table_attr ft_attr = {
+               .max_fte = 1, .prio = 0, .level = 0,
++              .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
+       };
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_flow_table *acl;
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5-e-switch-respect-bw-share-of-the-new-group.patch b/queue-5.15/net-mlx5-e-switch-respect-bw-share-of-the-new-group.patch
new file mode 100644 (file)
index 0000000..bdddd4b
--- /dev/null
@@ -0,0 +1,43 @@
+From f25b8cb6f7ac27124bbc923a30510dffd095b010 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Sep 2021 15:47:33 +0300
+Subject: net/mlx5: E-switch, Respect BW share of the new group
+
+From: Dmytro Linkin <dlinkin@nvidia.com>
+
+[ Upstream commit 1e59b32e45e47c8ea5455182286ba010bfa87813 ]
+
+To enable transmit schduler on vport FW require non-zero configuration
+for vport's TSAR. If vport added to the group which has configured BW
+share value and TX rate values of the vport are zero, then scheduler
+wouldn't be enabled on this vport.
+Fix that by calling BW normalization if BW share of the new group is
+configured.
+
+Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups")
+Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index c6cc67cb4f6ad..4501e3d737f80 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -423,7 +423,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+               return err;
+       /* Recalculate bw share weights of old and new groups */
+-      if (vport->qos.bw_share) {
++      if (vport->qos.bw_share || new_group->bw_share) {
+               esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+               esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+       }
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5-move-modify_rqt-command-to-ignore-list-in-i.patch b/queue-5.15/net-mlx5-move-modify_rqt-command-to-ignore-list-in-i.patch
new file mode 100644 (file)
index 0000000..ea0e618
--- /dev/null
@@ -0,0 +1,52 @@
+From b733d4cc94eb200e0726c5150f4ac779480eca60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Nov 2021 15:03:20 +0200
+Subject: net/mlx5: Move MODIFY_RQT command to ignore list in internal error
+ state
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit e45c0b34493c24eeeebf89f63a5293aac7728ed7 ]
+
+When the device is in internal error state, command interface isn't
+accessible and the driver decides which commands to fail and which
+to ignore.
+
+Move the MODIFY_RQT command to the ignore list in order to avoid
+the following redundant warning messages in internal error state:
+
+mlx5_core 0000:82:00.1: mlx5e_rss_disable:419:(pid 23754): Failed to redirect RQT 0x0 to drop RQ 0xc00848: err = -5
+mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:598:(pid 23754): Failed to redirect direct RQT 0x1 to drop RQ 0xc00848 (channel 0): err = -5
+mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:607:(pid 23754): Failed to redirect XSK RQT 0x19 to drop RQ 0xc00848 (channel 0): err = -5
+
+Fixes: 43ec0f41fa73 ("net/mlx5e: Hide all implementation details of mlx5e_rx_res")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+index c698e4b5381d7..bea35530c2d0b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -336,6 +336,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+       case MLX5_CMD_OP_DEALLOC_SF:
+       case MLX5_CMD_OP_DESTROY_UCTX:
+       case MLX5_CMD_OP_DESTROY_UMEM:
++      case MLX5_CMD_OP_MODIFY_RQT:
+               return MLX5_CMD_STAT_OK;
+       case MLX5_CMD_OP_QUERY_HCA_CAP:
+@@ -441,7 +442,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+       case MLX5_CMD_OP_MODIFY_TIS:
+       case MLX5_CMD_OP_QUERY_TIS:
+       case MLX5_CMD_OP_CREATE_RQT:
+-      case MLX5_CMD_OP_MODIFY_RQT:
+       case MLX5_CMD_OP_QUERY_RQT:
+       case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5e-fix-missing-ipsec-statistics-on-uplink-rep.patch b/queue-5.15/net-mlx5e-fix-missing-ipsec-statistics-on-uplink-rep.patch
new file mode 100644 (file)
index 0000000..53952a4
--- /dev/null
@@ -0,0 +1,43 @@
+From e04c04209351f164ca1bf065993d99a8f7d0e018 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Jul 2021 12:48:24 +0300
+Subject: net/mlx5e: Fix missing IPsec statistics on uplink representor
+
+From: Raed Salem <raeds@nvidia.com>
+
+[ Upstream commit 51ebf5db67f5c6aed79c05f1aa5137bdf5ca6614 ]
+
+The cited patch added the IPsec support to uplink representor, however
+as uplink representors have his private statistics where IPsec stats
+is not part of it, that effectively makes IPsec stats hidden when uplink
+representor stats queried.
+
+Resolve by adding IPsec stats to uplink representor private statistics.
+
+Fixes: 5589b8f1a2c7 ("net/mlx5e: Add IPsec support to uplink representor")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 0684ac6699b2d..c100728c381cc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -1070,6 +1070,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
+       &MLX5E_STATS_GRP(pme),
+       &MLX5E_STATS_GRP(channels),
+       &MLX5E_STATS_GRP(per_port_buff_congest),
++#ifdef CONFIG_MLX5_EN_IPSEC
++      &MLX5E_STATS_GRP(ipsec_sw),
++      &MLX5E_STATS_GRP(ipsec_hw),
++#endif
+ };
+ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5e-rename-lro_timeout-to-packet_merge_timeout.patch b/queue-5.15/net-mlx5e-rename-lro_timeout-to-packet_merge_timeout.patch
new file mode 100644 (file)
index 0000000..390b286
--- /dev/null
@@ -0,0 +1,112 @@
+From 9e17ef7a20b8b507ae852dcf5faab91fd994f13f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jul 2020 17:22:45 +0300
+Subject: net/mlx5e: Rename lro_timeout to packet_merge_timeout
+
+From: Ben Ben-Ishay <benishay@nvidia.com>
+
+[ Upstream commit 50f477fe9933193e960785f1192be801d7cd307a ]
+
+TIR stands for transport interface receive, the TIR object is
+responsible for performing all transport related operations on
+the receive side like packet processing, demultiplexing the packets
+to different RQ's, etc.
+lro_timeout is a field in the TIR that is used to set the timeout for lro
+session, this series introduces new packet merge type, therefore rename
+lro_timeout to packet_merge_timeout for all packet merge types.
+
+Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h        | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/en/tir.c    | 6 +++---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 2 +-
+ include/linux/mlx5/mlx5_ifc.h                       | 6 +++---
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index 03a7a4ce5cd5e..d9d19130c1a34 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -264,7 +264,7 @@ struct mlx5e_params {
+       bool scatter_fcs_en;
+       bool rx_dim_enabled;
+       bool tx_dim_enabled;
+-      u32 lro_timeout;
++      u32 packet_merge_timeout;
+       u32 pflags;
+       struct bpf_prog *xdp_prog;
+       struct mlx5e_xsk *xsk;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+index 3cbb596821e89..2b2b3c5cdbd5c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+@@ -173,7 +173,7 @@ struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
+       lro_param = (struct mlx5e_lro_param) {
+               .enabled = params->lro_en,
+-              .timeout = params->lro_timeout,
++              .timeout = params->packet_merge_timeout,
+       };
+       return lro_param;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+index de936dc4bc483..857ea09791597 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+@@ -82,9 +82,9 @@ void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+       if (!lro_param->enabled)
+               return;
+-      MLX5_SET(tirc, tirc, lro_enable_mask,
+-               MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+-               MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
++      MLX5_SET(tirc, tirc, packet_merge_mask,
++               MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
++               MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+       MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+                (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 41ef6eb70a585..a9d80ffb25376 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -4323,7 +4323,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
+               if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+                       params->lro_en = !slow_pci_heuristic(mdev);
+       }
+-      params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
++      params->packet_merge_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+       /* CQ moderation params */
+       rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 993204a6c1a13..944bb9f5006c1 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -3309,8 +3309,8 @@ enum {
+ };
+ enum {
+-      MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO  = 0x1,
+-      MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO  = 0x2,
++      MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO  = BIT(0),
++      MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO  = BIT(1),
+ };
+ enum {
+@@ -3335,7 +3335,7 @@ struct mlx5_ifc_tirc_bits {
+       u8         reserved_at_80[0x4];
+       u8         lro_timeout_period_usecs[0x10];
+-      u8         lro_enable_mask[0x4];
++      u8         packet_merge_mask[0x4];
+       u8         lro_max_ip_payload_size[0x8];
+       u8         reserved_at_a0[0x40];
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5e-rename-tir-lro-functions-to-tir-packet-mer.patch b/queue-5.15/net-mlx5e-rename-tir-lro-functions-to-tir-packet-mer.patch
new file mode 100644 (file)
index 0000000..abe982b
--- /dev/null
@@ -0,0 +1,693 @@
+From 597a1a82690148832316458b644a7385993a7f49 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Jun 2021 12:27:32 +0300
+Subject: net/mlx5e: Rename TIR lro functions to TIR packet merge functions
+
+From: Khalid Manaa <khalidm@nvidia.com>
+
+[ Upstream commit eaee12f046924eeb1210c7e4f3b326603ff1bd85 ]
+
+This series introduces new packet merge type, therefore rename lro
+functions to packet merge to support the new merge type:
+- Generalize + rename mlx5e_build_tir_ctx_lro to
+  mlx5e_build_tir_ctx_packet_merge.
+- Rename mlx5e_modify_tirs_lro to mlx5e_modify_tirs_packet_merge.
+- Rename lro bit in mlx5_ifc_modify_tir_bitmask_bits to packet_merge.
+- Rename lro_en in mlx5e_params to packet_merge_type type and combine
+  packet_merge params into one struct mlx5e_packet_merge_param.
+
+Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
+Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h  | 14 +++++-
+ .../ethernet/mellanox/mlx5/core/en/params.c   | 21 +++------
+ .../ethernet/mellanox/mlx5/core/en/params.h   |  6 ---
+ .../net/ethernet/mellanox/mlx5/core/en/rss.c  | 23 +++++-----
+ .../net/ethernet/mellanox/mlx5/core/en/rss.h  |  7 +--
+ .../ethernet/mellanox/mlx5/core/en/rx_res.c   | 25 +++++-----
+ .../ethernet/mellanox/mlx5/core/en/rx_res.h   |  5 +-
+ .../net/ethernet/mellanox/mlx5/core/en/tir.c  | 10 ++--
+ .../net/ethernet/mellanox/mlx5/core/en/tir.h  |  6 +--
+ .../ethernet/mellanox/mlx5/core/en_ethtool.c  |  4 +-
+ .../mellanox/mlx5/core/en_fs_ethtool.c        |  6 +--
+ .../net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++++++++++--------
+ .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  5 +-
+ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c |  7 ++-
+ include/linux/mlx5/mlx5_ifc.h                 |  2 +-
+ 15 files changed, 95 insertions(+), 92 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index d9d19130c1a34..c10a107a3ea53 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -244,6 +244,17 @@ enum mlx5e_priv_flag {
+ #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
++enum packet_merge {
++      MLX5E_PACKET_MERGE_NONE,
++      MLX5E_PACKET_MERGE_LRO,
++      MLX5E_PACKET_MERGE_SHAMPO,
++};
++
++struct mlx5e_packet_merge_param {
++      enum packet_merge type;
++      u32 timeout;
++};
++
+ struct mlx5e_params {
+       u8  log_sq_size;
+       u8  rq_wq_type;
+@@ -258,13 +269,12 @@ struct mlx5e_params {
+       bool tunneled_offload_en;
+       struct dim_cq_moder rx_cq_moderation;
+       struct dim_cq_moder tx_cq_moderation;
+-      bool lro_en;
++      struct mlx5e_packet_merge_param packet_merge;
+       u8  tx_min_inline_mode;
+       bool vlan_strip_disable;
+       bool scatter_fcs_en;
+       bool rx_dim_enabled;
+       bool tx_dim_enabled;
+-      u32 packet_merge_timeout;
+       u32 pflags;
+       struct bpf_prog *xdp_prog;
+       struct mlx5e_xsk *xsk;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+index 2b2b3c5cdbd5c..15f441a1b80c2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+@@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+       u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+                                mlx5e_rx_get_linear_frag_sz(params, NULL));
+-      return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
++      return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
++              linear_frag_sz <= PAGE_SIZE;
+ }
+ bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+@@ -164,19 +165,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+               mlx5e_rx_is_linear_skb(params, xsk) :
+               mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+-      return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+-}
+-
+-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
+-{
+-      struct mlx5e_lro_param lro_param;
+-
+-      lro_param = (struct mlx5e_lro_param) {
+-              .enabled = params->lro_en,
+-              .timeout = params->packet_merge_timeout,
+-      };
+-
+-      return lro_param;
++      return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
++              mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+ }
+ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+@@ -485,10 +475,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
+ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+ {
++      bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
+       bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+               MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+-      return ro && params->lro_en ?
++      return ro && lro_en ?
+               MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+index 879ad46d754e1..e9593f5f06610 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+@@ -11,11 +11,6 @@ struct mlx5e_xsk_param {
+       u16 chunk_size;
+ };
+-struct mlx5e_lro_param {
+-      bool enabled;
+-      u32 timeout;
+-};
+-
+ struct mlx5e_cq_param {
+       u32                        cqc[MLX5_ST_SZ_DW(cqc)];
+       struct mlx5_wq_param       wq;
+@@ -125,7 +120,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+                         struct mlx5e_params *params,
+                         struct mlx5e_xsk_param *xsk);
+-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
+ /* Build queue parameters */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+index 625cd49ef96c5..7b55b14d47ef7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+@@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+                               enum mlx5_traffic_types tt,
+-                              const struct mlx5e_lro_param *init_lro_param,
++                              const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                               bool inner)
+ {
+       struct mlx5e_rss_params_traffic_type rss_tt;
+@@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+       rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+       mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   rqtn, rss->inner_ft_support);
+-      mlx5e_tir_builder_build_lro(builder, init_lro_param);
++      mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+       rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+       mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+@@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types
+ }
+ static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+-                               const struct mlx5e_lro_param *init_lro_param,
++                               const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                                bool inner)
+ {
+       enum mlx5_traffic_types tt, max_tt;
+       int err;
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+-              err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
++              err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+               if (err)
+                       goto err_destroy_tirs;
+       }
+@@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                  bool inner_ft_support, u32 drop_rqn,
+-                 const struct mlx5e_lro_param *init_lro_param)
++                 const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+ {
+       int err;
+@@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+       if (err)
+               goto err_out;
+-      err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
++      err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
+       if (err)
+               goto err_destroy_rqt;
+       if (inner_ft_support) {
+-              err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
++              err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
+               if (err)
+                       goto err_destroy_tirs;
+       }
+@@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+  */
+ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+                         enum mlx5_traffic_types tt,
+-                        const struct mlx5e_lro_param *init_lro_param,
++                        const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                         bool inner, u32 *tirn)
+ {
+       struct mlx5e_tir *tir;
+@@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+       if (!tir) { /* TIR doesn't exist, create one */
+               int err;
+-              err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
++              err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+               if (err)
+                       return err;
+               tir = rss_get_tir(rss, tt, inner);
+@@ -418,7 +418,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
+                              mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+ }
+-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
++int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
++                                   struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+       struct mlx5e_tir_builder *builder;
+       enum mlx5_traffic_types tt;
+@@ -428,7 +429,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p
+       if (!builder)
+               return -ENOMEM;
+-      mlx5e_tir_builder_build_lro(builder, lro_param);
++      mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+       final_err = 0;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+index d522a10dadf33..c6b2164163440 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+@@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void);
+ void mlx5e_rss_free(struct mlx5e_rss *rss);
+ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                  bool inner_ft_support, u32 drop_rqn,
+-                 const struct mlx5e_lro_param *init_lro_param);
++                 const struct mlx5e_packet_merge_param *init_pkt_merge_param);
+ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                          bool inner_ft_support, u32 drop_rqn);
+ int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+@@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                      bool inner);
+ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+                         enum mlx5_traffic_types tt,
+-                        const struct mlx5e_lro_param *init_lro_param,
++                        const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                         bool inner, u32 *tirn);
+ void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+ void mlx5e_rss_disable(struct mlx5e_rss *rss);
+-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
++int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
++                                   struct mlx5e_packet_merge_param *pkt_merge_param);
+ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+                      const u8 *key, const u8 *hfunc,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+index 13056cb9757d4..1429538479960 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+@@ -34,7 +34,7 @@ struct mlx5e_rx_res {
+ /* API for rx_res_rss_* */
+ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+-                                   const struct mlx5e_lro_param *init_lro_param,
++                                   const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                                    unsigned int init_nch)
+ {
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+@@ -49,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+               return -ENOMEM;
+       err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+-                           init_lro_param);
++                           init_pkt_merge_param);
+       if (err)
+               goto err_rss_free;
+@@ -275,7 +275,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+ }
+ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+-                                    const struct mlx5e_lro_param *init_lro_param)
++                                    const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+ {
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+@@ -306,7 +306,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                           inner_ft_support);
+-              mlx5e_tir_builder_build_lro(builder, init_lro_param);
++              mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+               mlx5e_tir_builder_build_direct(builder);
+               err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+@@ -336,7 +336,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                           inner_ft_support);
+-              mlx5e_tir_builder_build_lro(builder, init_lro_param);
++              mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+               mlx5e_tir_builder_build_direct(builder);
+               err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+@@ -437,7 +437,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+-                    u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
++                    u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                     unsigned int init_nch)
+ {
+       int err;
+@@ -447,11 +447,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+       res->max_nch = max_nch;
+       res->drop_rqn = drop_rqn;
+-      err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
++      err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
+       if (err)
+               goto err_out;
+-      err = mlx5e_rx_res_channels_init(res, init_lro_param);
++      err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
+       if (err)
+               goto err_rss_destroy;
+@@ -645,7 +645,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+       return err;
+ }
+-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
++int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
++                                      struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+       struct mlx5e_tir_builder *builder;
+       int err, final_err;
+@@ -655,7 +656,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+       if (!builder)
+               return -ENOMEM;
+-      mlx5e_tir_builder_build_lro(builder, lro_param);
++      mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+       final_err = 0;
+@@ -665,7 +666,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+               if (!rss)
+                       continue;
+-              err = mlx5e_rss_lro_set_param(rss, lro_param);
++              err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
+               if (err)
+                       final_err = final_err ? : err;
+       }
+@@ -673,7 +674,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+               if (err) {
+-                      mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
++                      mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+                       if (!final_err)
+                               final_err = err;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+index 4a15942d79f7d..d09f7d174a518 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+@@ -25,7 +25,7 @@ enum mlx5e_rx_res_features {
+ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+-                    u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
++                    u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+                     unsigned int init_nch);
+ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+ void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+@@ -57,7 +57,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+ int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields);
+-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
++int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
++                                      struct mlx5e_packet_merge_param *pkt_merge_param);
+ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+ int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+index 857ea09791597..a1afb8585e37f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+@@ -70,16 +70,16 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+       MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+ }
+-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+-                               const struct mlx5e_lro_param *lro_param)
++void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
++                                        const struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+       const unsigned int rough_max_l2_l3_hdr_sz = 256;
+       if (builder->modify)
+-              MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
++              MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+-      if (!lro_param->enabled)
++      if (pkt_merge_param->type == MLX5E_PACKET_MERGE_NONE)
+               return;
+       MLX5_SET(tirc, tirc, packet_merge_mask,
+@@ -87,7 +87,7 @@ void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+                MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+       MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+                (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+-      MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
++      MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ }
+ static int mlx5e_hfunc_to_hw(u8 hfunc)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+index e45149a78ed9d..857a84bcd53af 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+@@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type {
+ };
+ struct mlx5e_tir_builder;
+-struct mlx5e_lro_param;
++struct mlx5e_packet_merge_param;
+ struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+ void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+@@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+ void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+                                u32 rqtn, bool inner_ft_support);
+-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+-                               const struct mlx5e_lro_param *lro_param);
++void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
++                                        const struct mlx5e_packet_merge_param *pkt_merge_param);
+ void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+                                const struct mlx5e_rss_params_hash *rss_hash,
+                                const struct mlx5e_rss_params_traffic_type *rss_tt,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index 9d451b8ee467c..dc9b8718c3c10 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1954,8 +1954,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+                       return -EOPNOTSUPP;
+               if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
+                       return -EINVAL;
+-      } else if (priv->channels.params.lro_en) {
+-              netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
++      } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
++              netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
+               return -EINVAL;
+       }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+index 03693fa74a704..d32b70c62c949 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
+                        u32 rss_context, u32 *tirn)
+ {
+       if (fs->flow_type & FLOW_RSS) {
+-              struct mlx5e_lro_param lro_param;
++              struct mlx5e_packet_merge_param pkt_merge_param;
+               struct mlx5e_rss *rss;
+               u32 flow_type;
+               int err;
+@@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
+               if (tt < 0)
+                       return -EINVAL;
+-              lro_param = mlx5e_get_lro_param(&priv->channels.params);
+-              err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
++              pkt_merge_param = priv->channels.params.packet_merge;
++              err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
+               if (err)
+                       return err;
+               eth_rule->rss = rss;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index a9d80ffb25376..8cf5fbebd674b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -2185,17 +2185,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
+       chs->num = 0;
+ }
+-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
++static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
+ {
+       struct mlx5e_rx_res *res = priv->rx_res;
+-      struct mlx5e_lro_param lro_param;
+-      lro_param = mlx5e_get_lro_param(&priv->channels.params);
+-
+-      return mlx5e_rx_res_lro_set_param(res, &lro_param);
++      return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
+ }
+-static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
++static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
+ static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+                        struct mlx5e_params *params, u16 mtu)
+@@ -3270,16 +3267,25 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
+       }
+       new_params = *cur_params;
+-      new_params.lro_en = enable;
+-      if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+-              if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+-                  mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+-                      reset = false;
++      if (enable)
++              new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
++      else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
++              new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
++      else
++              goto out;
++
++      if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
++            new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
++              if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
++                      if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
++                          mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
++                              reset = false;
++              }
+       }
+       err = mlx5e_safe_switch_params(priv, &new_params,
+-                                     mlx5e_modify_tirs_lro_ctx, NULL, reset);
++                                     mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+ out:
+       mutex_unlock(&priv->state_lock);
+       return err;
+@@ -3606,7 +3612,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+               goto out;
+       }
+-      if (params->lro_en)
++      if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+               reset = false;
+       if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+@@ -4063,8 +4069,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+       struct net_device *netdev = priv->netdev;
+       struct mlx5e_params new_params;
+-      if (priv->channels.params.lro_en) {
+-              netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
++      if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
++              netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
+               return -EINVAL;
+       }
+@@ -4321,9 +4327,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
+           params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+               /* No XSK params: checking the availability of striding RQ in general. */
+               if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+-                      params->lro_en = !slow_pci_heuristic(mdev);
++                      params->packet_merge.type = slow_pci_heuristic(mdev) ?
++                              MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
+       }
+-      params->packet_merge_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
++      params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+       /* CQ moderation params */
+       rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+@@ -4608,7 +4615,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+ {
+       struct mlx5_core_dev *mdev = priv->mdev;
+       enum mlx5e_rx_res_features features;
+-      struct mlx5e_lro_param lro_param;
+       int err;
+       priv->rx_res = mlx5e_rx_res_alloc();
+@@ -4626,9 +4632,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+       features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+       if (priv->channels.params.tunneled_offload_en)
+               features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+-      lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+-                              priv->max_nch, priv->drop_rq.rqn, &lro_param,
++                              priv->max_nch, priv->drop_rq.rqn,
++                              &priv->channels.params.packet_merge,
+                               priv->channels.params.num_channels);
+       if (err)
+               goto err_close_drop_rq;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index c100728c381cc..edecd149dcab3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -793,7 +793,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+ {
+       struct mlx5_core_dev *mdev = priv->mdev;
+-      struct mlx5e_lro_param lro_param;
+       int err;
+       priv->rx_res = mlx5e_rx_res_alloc();
+@@ -808,9 +807,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+               return err;
+       }
+-      lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+-                              priv->max_nch, priv->drop_rq.rqn, &lro_param,
++                              priv->max_nch, priv->drop_rq.rqn,
++                              &priv->channels.params.packet_merge,
+                               priv->channels.params.num_channels);
+       if (err)
+               goto err_close_drop_rq;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+index 269ebb53eda67..cfde0a45b8b8a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+@@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+               MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+               MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
+-      params->lro_en = false;
++      params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+       params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+       params->tunneled_offload_en = false;
+ }
+@@ -353,7 +353,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
+ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+ {
+       struct mlx5_core_dev *mdev = priv->mdev;
+-      struct mlx5e_lro_param lro_param;
+       int err;
+       priv->rx_res = mlx5e_rx_res_alloc();
+@@ -368,9 +367,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+               goto err_destroy_q_counters;
+       }
+-      lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+-                              priv->max_nch, priv->drop_rq.rqn, &lro_param,
++                              priv->max_nch, priv->drop_rq.rqn,
++                              &priv->channels.params.packet_merge,
+                               priv->channels.params.num_channels);
+       if (err)
+               goto err_close_drop_rq;
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 944bb9f5006c1..25d775764a5ac 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -6369,7 +6369,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
+       u8         reserved_at_3c[0x1];
+       u8         hash[0x1];
+       u8         reserved_at_3e[0x1];
+-      u8         lro[0x1];
++      u8         packet_merge[0x1];
+ };
+ struct mlx5_ifc_modify_tir_out_bits {
+-- 
+2.33.0
+
diff --git a/queue-5.15/net-mlx5e-sync-tir-params-updates-against-concurrent.patch b/queue-5.15/net-mlx5e-sync-tir-params-updates-against-concurrent.patch
new file mode 100644 (file)
index 0000000..457fc6b
--- /dev/null
@@ -0,0 +1,216 @@
+From 172419a2acf5b8acaffefebb129d574017a7e935 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Sep 2021 13:54:30 +0300
+Subject: net/mlx5e: Sync TIR params updates against concurrent create/modify
+
+From: Tariq Toukan <tariqt@nvidia.com>
+
+[ Upstream commit 4cce2ccf08fbc27ae34ce0e72db15166e7b5f6a7 ]
+
+Transport Interface Receive (TIR) objects perform the packet processing and
+reassembly and is also responsible for demultiplexing the packets into the
+different RQs.
+
+There are certain TIR context attributes that propagate to the pointed RQs
+and applied to them (like packet_merge offloads (LRO/SHAMPO) and
+tunneled_offload_en).  When TIRs do not agree on attributes values, a "last
+one wins" policy is applied.  Hence, if not synced properly, a race between
+TIR params update and a concurrent TIR create/modify operation might yield
+to a mismatch between the shadow parameters in SW and the actual applied
+state of the RQs in HW.
+
+tunneled_offload_en is a fixed attribute per profile, while packet merge
+offload state might be toggled and get out-of-sync. When this happens,
+packet_merge offload might be working although not requested, or the
+opposite.
+
+All updates to packet_merge state and all create/modify operations of
+regular redirection/steering TIRs are done under the same priv->state_lock,
+so they do not run in parallel, and no race is possible.
+
+However, there are other kind of TIRs (acceleration offloads TIRs, like TLS
+TIRs) which are created on demand for each new connection without holding
+the coarse priv->state_lock, hence might race.
+
+Fix this by synchronizing all packet_merge state reads and writes against
+all TIR create/modify operations. Include the modify operations of the
+regular redirection steering TIRs under the new lock, for better code
+layering and division of responsibilities.
+
+Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support")
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/en/rx_res.c   | 41 ++++++++++++++++++-
+ .../ethernet/mellanox/mlx5/core/en/rx_res.h   |  6 +--
+ .../mellanox/mlx5/core/en_accel/ktls_rx.c     | 24 +----------
+ 3 files changed, 44 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+index 1429538479960..0015a81eb9a17 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+@@ -13,6 +13,9 @@ struct mlx5e_rx_res {
+       unsigned int max_nch;
+       u32 drop_rqn;
++      struct mlx5e_packet_merge_param pkt_merge_param;
++      struct rw_semaphore pkt_merge_param_sem;
++
+       struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+       bool rss_active;
+       u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+@@ -392,6 +395,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+       if (err)
+               goto out;
++      /* Separated from the channels RQs, does not share pkt_merge state with them */
+       mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                   inner_ft_support);
+@@ -447,6 +451,9 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+       res->max_nch = max_nch;
+       res->drop_rqn = drop_rqn;
++      res->pkt_merge_param = *init_pkt_merge_param;
++      init_rwsem(&res->pkt_merge_param_sem);
++
+       err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
+       if (err)
+               goto err_out;
+@@ -513,7 +520,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+       return mlx5e_tir_get_tirn(&res->ptp.tir);
+ }
+-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
++static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+ {
+       return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+ }
+@@ -656,6 +663,9 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+       if (!builder)
+               return -ENOMEM;
++      down_write(&res->pkt_merge_param_sem);
++      res->pkt_merge_param = *pkt_merge_param;
++
+       mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+       final_err = 0;
+@@ -681,6 +691,7 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+               }
+       }
++      up_write(&res->pkt_merge_param_sem);
+       mlx5e_tir_builder_free(builder);
+       return final_err;
+ }
+@@ -689,3 +700,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *
+ {
+       return mlx5e_rss_get_hash(res->rss[0]);
+ }
++
++int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
++                              struct mlx5e_tir *tir)
++{
++      bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
++      struct mlx5e_tir_builder *builder;
++      u32 rqtn;
++      int err;
++
++      builder = mlx5e_tir_builder_alloc(false);
++      if (!builder)
++              return -ENOMEM;
++
++      rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
++
++      mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
++                                  inner_ft_support);
++      mlx5e_tir_builder_build_direct(builder);
++      mlx5e_tir_builder_build_tls(builder);
++      down_read(&res->pkt_merge_param_sem);
++      mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
++      err = mlx5e_tir_init(tir, builder, res->mdev, false);
++      up_read(&res->pkt_merge_param_sem);
++
++      mlx5e_tir_builder_free(builder);
++
++      return err;
++}
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+index d09f7d174a518..b39b20a720e0f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+@@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types
+ u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+-/* RQTN getters for modules that create their own TIRs */
+-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+-
+ /* Activate/deactivate API */
+ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+@@ -69,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+ /* Workaround for hairpin */
+ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
++/* Accel TIRs */
++int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
++                              struct mlx5e_tir *tir);
+ #endif /* __MLX5_EN_RX_RES_H__ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+index a2a9f68579dd8..15711814d2d28 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+@@ -100,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
+       return resp_list;
+ }
+-static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn)
+-{
+-      struct mlx5e_tir_builder *builder;
+-      int err;
+-
+-      builder = mlx5e_tir_builder_alloc(false);
+-      if (!builder)
+-              return -ENOMEM;
+-
+-      mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false);
+-      mlx5e_tir_builder_build_direct(builder);
+-      mlx5e_tir_builder_build_tls(builder);
+-      err = mlx5e_tir_init(tir, builder, mdev, false);
+-
+-      mlx5e_tir_builder_free(builder);
+-
+-      return err;
+-}
+-
+ static void accel_rule_handle_work(struct work_struct *work)
+ {
+       struct mlx5e_ktls_offload_context_rx *priv_rx;
+@@ -609,7 +590,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+       struct mlx5_core_dev *mdev;
+       struct mlx5e_priv *priv;
+       int rxq, err;
+-      u32 rqtn;
+       tls_ctx = tls_get_ctx(sk);
+       priv = netdev_priv(netdev);
+@@ -635,9 +615,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+       priv_rx->sw_stats = &priv->tls->sw_stats;
+       mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
+-      rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
+-
+-      err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
++      err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir);
+       if (err)
+               goto err_create_tir;
+-- 
+2.33.0
+
diff --git a/queue-5.15/preempt-dynamic-fix-setup_preempt_mode-return-value.patch b/queue-5.15/preempt-dynamic-fix-setup_preempt_mode-return-value.patch
new file mode 100644 (file)
index 0000000..97b3a73
--- /dev/null
@@ -0,0 +1,43 @@
+From 1a238618915dcab11f9e8e466378dc6342085907 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Dec 2021 17:32:03 -0600
+Subject: preempt/dynamic: Fix setup_preempt_mode() return value
+
+From: Andrew Halaney <ahalaney@redhat.com>
+
+[ Upstream commit 9ed20bafc85806ca6c97c9128cec46c3ef80ae86 ]
+
+__setup() callbacks expect 1 for success and 0 for failure. Correct the
+usage here to reflect that.
+
+Fixes: 826bfeb37bb4 ("preempt/dynamic: Support dynamic preempt with preempt= boot option")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20211203233203.133581-1-ahalaney@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 6f4625f8276f1..4170ec15926ee 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6660,11 +6660,11 @@ static int __init setup_preempt_mode(char *str)
+       int mode = sched_dynamic_mode(str);
+       if (mode < 0) {
+               pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
+-              return 1;
++              return 0;
+       }
+       sched_dynamic_update(mode);
+-      return 0;
++      return 1;
+ }
+ __setup("preempt=", setup_preempt_mode);
+-- 
+2.33.0
+
diff --git a/queue-5.15/revert-drm-i915-implement-wa_1508744258.patch b/queue-5.15/revert-drm-i915-implement-wa_1508744258.patch
new file mode 100644 (file)
index 0000000..555596e
--- /dev/null
@@ -0,0 +1,54 @@
+From fcceec5c7581cc5b1d33563cc19295632864704c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Nov 2021 06:09:30 -0800
+Subject: Revert "drm/i915: Implement Wa_1508744258"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: José Roberto de Souza <jose.souza@intel.com>
+
+[ Upstream commit 72641d8d60401a5f1e1a0431ceaf928680d34418 ]
+
+This workarounds are causing hangs, because I missed the fact that it
+needs to be enabled for all cases and disabled when doing a resolve
+pass.
+
+So KMD only needs to whitelist it and UMD will be the one setting it
+on per case.
+
+This reverts commit 28ec02c9cbebf3feeaf21a59df9dfbc02bda3362.
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4145
+Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
+Fixes: 28ec02c9cbeb ("drm/i915: Implement Wa_1508744258")
+Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211119140931.32791-1-jose.souza@intel.com
+(cherry picked from commit f3799ff16fcfacd44aee55db162830df461b631f)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 -------
+ 1 file changed, 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+index aae609d7d85dd..6b5ab19a2ada9 100644
+--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+@@ -621,13 +621,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
+              FF_MODE2_GS_TIMER_MASK,
+              FF_MODE2_GS_TIMER_224,
+              0, false);
+-
+-      /*
+-       * Wa_14012131227:dg1
+-       * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
+-       */
+-      wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
+-                   GEN9_RHWO_OPTIMIZATION_DISABLE);
+ }
+ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
+-- 
+2.33.0
+
diff --git a/queue-5.15/sched-uclamp-fix-rq-uclamp_max-not-set-on-first-enqu.patch b/queue-5.15/sched-uclamp-fix-rq-uclamp_max-not-set-on-first-enqu.patch
new file mode 100644 (file)
index 0000000..2f20324
--- /dev/null
@@ -0,0 +1,66 @@
+From 7c9ed311b7435c84bfee0465b6936131a9cbc91a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Dec 2021 11:20:33 +0000
+Subject: sched/uclamp: Fix rq->uclamp_max not set on first enqueue
+
+From: Qais Yousef <qais.yousef@arm.com>
+
+[ Upstream commit 315c4f884800c45cb6bd8c90422fad554a8b9588 ]
+
+Commit d81ae8aac85c ("sched/uclamp: Fix initialization of struct
+uclamp_rq") introduced a bug where uclamp_max of the rq is not reset to
+match the woken up task's uclamp_max when the rq is idle.
+
+The code was relying on rq->uclamp_max initialized to zero, so on first
+enqueue
+
+       static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
+                                           enum uclamp_id clamp_id)
+       {
+               ...
+
+               if (uc_se->value > READ_ONCE(uc_rq->value))
+                       WRITE_ONCE(uc_rq->value, uc_se->value);
+       }
+
+was actually resetting it. But since commit d81ae8aac85c changed the
+default to 1024, this no longer works. And since rq->uclamp_flags is
+also initialized to 0, neither above code path nor uclamp_idle_reset()
+update the rq->uclamp_max on first wake up from idle.
+
+This is only visible from first wake up(s) until the first dequeue to
+idle after enabling the static key. And it only matters if the
+uclamp_max of this task is < 1024 since only then its uclamp_max will be
+effectively ignored.
+
+Fix it by properly initializing rq->uclamp_flags = UCLAMP_FLAG_IDLE to
+ensure uclamp_idle_reset() is called which then will update the rq
+uclamp_max value as expected.
+
+Fixes: d81ae8aac85c ("sched/uclamp: Fix initialization of struct uclamp_rq")
+Signed-off-by: Qais Yousef <qais.yousef@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Valentin Schneider <Valentin.Schneider@arm.com>
+Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Link: https://lkml.kernel.org/r/20211202112033.1705279-1-qais.yousef@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 4170ec15926ee..0d12ec7be3017 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1914,7 +1914,7 @@ static void __init init_uclamp_rq(struct rq *rq)
+               };
+       }
+-      rq->uclamp_flags = 0;
++      rq->uclamp_flags = UCLAMP_FLAG_IDLE;
+ }
+ static void __init init_uclamp(void)
+-- 
+2.33.0
+
diff --git a/queue-5.15/serial-8250_bcm7271-uart-errors-after-resuming-from-.patch b/queue-5.15/serial-8250_bcm7271-uart-errors-after-resuming-from-.patch
new file mode 100644 (file)
index 0000000..72ba2e6
--- /dev/null
@@ -0,0 +1,82 @@
+From 819dfae8ff64d600570da82c3a5d90674aecc7fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Dec 2021 15:14:02 -0500
+Subject: serial: 8250_bcm7271: UART errors after resuming from S2
+
+From: Al Cooper <alcooperx@gmail.com>
+
+[ Upstream commit 9cabe26e65a893afd5846908aa393bd283ab6609 ]
+
+There is a small window in time during resume where the hardware
+flow control signal RTS can be asserted (which allows a sender to
+resume sending data to the UART) but the baud rate has not yet
+been restored. This will cause corrupted data and FRAMING, OVERRUN
+and BREAK errors. This is happening because the MCTRL register is
+shadowed in uart_port struct and is later used during resume to set
+the MCTRL register during both serial8250_do_startup() and
+uart_resume_port(). Unfortunately, serial8250_do_startup()
+happens before the UART baud rate is restored. The fix is to clear
+the shadowed mctrl value at the end of suspend and restore it at the
+end of resume.
+
+Fixes: 41a469482de2 ("serial: 8250: Add new 8250-core based Broadcom STB driver")
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Al Cooper <alcooperx@gmail.com>
+Link: https://lore.kernel.org/r/20211201201402.47446-1-alcooperx@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_bcm7271.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
+index 7f656fac503fe..5163d60756b73 100644
+--- a/drivers/tty/serial/8250/8250_bcm7271.c
++++ b/drivers/tty/serial/8250/8250_bcm7271.c
+@@ -237,6 +237,7 @@ struct brcmuart_priv {
+       u32             rx_err;
+       u32             rx_timeout;
+       u32             rx_abort;
++      u32             saved_mctrl;
+ };
+ static struct dentry *brcmuart_debugfs_root;
+@@ -1133,16 +1134,27 @@ static int brcmuart_remove(struct platform_device *pdev)
+ static int __maybe_unused brcmuart_suspend(struct device *dev)
+ {
+       struct brcmuart_priv *priv = dev_get_drvdata(dev);
++      struct uart_8250_port *up = serial8250_get_port(priv->line);
++      struct uart_port *port = &up->port;
+       serial8250_suspend_port(priv->line);
+       clk_disable_unprepare(priv->baud_mux_clk);
++      /*
++       * This will prevent resume from enabling RTS before the
++       *  baud rate has been resored.
++       */
++      priv->saved_mctrl = port->mctrl;
++      port->mctrl = 0;
++
+       return 0;
+ }
+ static int __maybe_unused brcmuart_resume(struct device *dev)
+ {
+       struct brcmuart_priv *priv = dev_get_drvdata(dev);
++      struct uart_8250_port *up = serial8250_get_port(priv->line);
++      struct uart_port *port = &up->port;
+       int ret;
+       ret = clk_prepare_enable(priv->baud_mux_clk);
+@@ -1165,6 +1177,7 @@ static int __maybe_unused brcmuart_resume(struct device *dev)
+               start_rx_dma(serial8250_get_port(priv->line));
+       }
+       serial8250_resume_port(priv->line);
++      port->mctrl = priv->saved_mctrl;
+       return 0;
+ }
+-- 
+2.33.0
+
index 55d4ebd554568823b75fd9682d5433c226d1f799..de221934ed5feabf3f65f672f81002d31b44e624 100644 (file)
@@ -149,3 +149,36 @@ atlantic-add-missing-dids-and-fix-115c.patch
 remove-half-duplex-mode-speed-capabilities.patch
 atlantic-fix-statistics-logic-for-production-hardware.patch
 atlantic-remove-warn-trace-message.patch
+kvm-x86-mmu-skip-tlb-flush-if-it-has-been-done-in-za.patch
+kvm-x86-mmu-pass-parameter-flush-as-false-in-kvm_tdp.patch
+drm-msm-devfreq-fix-opp-refcnt-leak.patch
+drm-msm-fix-mmap-to-include-vm_io-and-vm_dontdump.patch
+drm-msm-fix-wait_fence-submitqueue-leak.patch
+drm-msm-restore-error-return-on-invalid-fence.patch
+asoc-rk817-add-module-alias-for-rk817-codec.patch
+iwlwifi-fix-memory-leaks-in-error-handling-path.patch
+kvm-x86-fix-when-shadow_root_level-5-guest-root_leve.patch
+kvm-sev-initialize-regions_list-of-a-mirror-vm.patch
+net-mlx5e-fix-missing-ipsec-statistics-on-uplink-rep.patch
+net-mlx5-move-modify_rqt-command-to-ignore-list-in-i.patch
+net-mlx5-e-switch-respect-bw-share-of-the-new-group.patch
+net-mlx5-e-switch-fix-single-fdb-creation-on-bluefie.patch
+net-mlx5-e-switch-check-group-pointer-before-reading.patch
+kvm-x86-pmu-fix-reserved-bits-for-amd-perfevtseln-re.patch
+kvm-vmx-set-failure-code-in-prepare_vmcs02.patch
+mctp-don-t-let-rtm_delroute-delete-local-routes.patch
+revert-drm-i915-implement-wa_1508744258.patch
+io-wq-don-t-retry-task_work-creation-failure-on-fata.patch
+x86-sev-fix-sev-es-ins-outs-instructions-for-word-dw.patch
+x86-entry-add-a-fence-for-kernel-entry-swapgs-in-par.patch
+x86-entry-use-the-correct-fence-macro-after-swapgs-i.patch
+x86-xen-add-xenpv_restore_regs_and_return_to_usermod.patch
+preempt-dynamic-fix-setup_preempt_mode-return-value.patch
+sched-uclamp-fix-rq-uclamp_max-not-set-on-first-enqu.patch
+kvm-sev-return-appropriate-error-codes-if-sev-es-scr.patch
+kvm-x86-mmu-rename-slot_handle_leaf-to-slot_handle_l.patch
+kvm-x86-mmu-remove-spurious-tlb-flushes-in-tdp-mmu-z.patch
+net-mlx5e-rename-lro_timeout-to-packet_merge_timeout.patch
+net-mlx5e-rename-tir-lro-functions-to-tir-packet-mer.patch
+net-mlx5e-sync-tir-params-updates-against-concurrent.patch
+serial-8250_bcm7271-uart-errors-after-resuming-from-.patch
diff --git a/queue-5.15/x86-entry-add-a-fence-for-kernel-entry-swapgs-in-par.patch b/queue-5.15/x86-entry-add-a-fence-for-kernel-entry-swapgs-in-par.patch
new file mode 100644 (file)
index 0000000..160bb6b
--- /dev/null
@@ -0,0 +1,80 @@
+From 72a916377088a8612f13e6110fa3952950ddc43e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Nov 2021 18:11:21 +0800
+Subject: x86/entry: Add a fence for kernel entry SWAPGS in paranoid_entry()
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+[ Upstream commit c07e45553da1808aa802e9f0ffa8108cfeaf7a17 ]
+
+Commit
+
+  18ec54fdd6d18 ("x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations")
+
+added FENCE_SWAPGS_{KERNEL|USER}_ENTRY for conditional SWAPGS. In
+paranoid_entry(), it uses only FENCE_SWAPGS_KERNEL_ENTRY for both
+branches. This is because the fence is required for both cases since the
+CR3 write is conditional even when PTI is enabled.
+
+But
+
+  96b2371413e8f ("x86/entry/64: Switch CR3 before SWAPGS in paranoid entry")
+
+changed the order of SWAPGS and the CR3 write. And it missed the needed
+FENCE_SWAPGS_KERNEL_ENTRY for the user gsbase case.
+
+Add it back by changing the branches so that FENCE_SWAPGS_KERNEL_ENTRY
+can cover both branches.
+
+  [ bp: Massage, fix typos, remove obsolete comment while at it. ]
+
+Fixes: 96b2371413e8f ("x86/entry/64: Switch CR3 before SWAPGS in paranoid entry")
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20211126101209.8613-2-jiangshanlai@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/entry/entry_64.S | 16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index e38a4cf795d96..f1a8b5b2af964 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -890,6 +890,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ .Lparanoid_entry_checkgs:
+       /* EBX = 1 -> kernel GSBASE active, no restore required */
+       movl    $1, %ebx
++
+       /*
+        * The kernel-enforced convention is a negative GSBASE indicates
+        * a kernel value. No SWAPGS needed on entry and exit.
+@@ -897,21 +898,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+       movl    $MSR_GS_BASE, %ecx
+       rdmsr
+       testl   %edx, %edx
+-      jns     .Lparanoid_entry_swapgs
+-      ret
++      js      .Lparanoid_kernel_gsbase
+-.Lparanoid_entry_swapgs:
++      /* EBX = 0 -> SWAPGS required on exit */
++      xorl    %ebx, %ebx
+       swapgs
++.Lparanoid_kernel_gsbase:
+-      /*
+-       * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
+-       * unconditional CR3 write, even in the PTI case.  So do an lfence
+-       * to prevent GS speculation, regardless of whether PTI is enabled.
+-       */
+       FENCE_SWAPGS_KERNEL_ENTRY
+-
+-      /* EBX = 0 -> SWAPGS required on exit */
+-      xorl    %ebx, %ebx
+       ret
+ SYM_CODE_END(paranoid_entry)
+-- 
+2.33.0
+
diff --git a/queue-5.15/x86-entry-use-the-correct-fence-macro-after-swapgs-i.patch b/queue-5.15/x86-entry-use-the-correct-fence-macro-after-swapgs-i.patch
new file mode 100644 (file)
index 0000000..65fe4fa
--- /dev/null
@@ -0,0 +1,70 @@
+From 85a98bb04d7f80e7827154b9a86ba543221c5d32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Nov 2021 18:11:22 +0800
+Subject: x86/entry: Use the correct fence macro after swapgs in kernel CR3
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+[ Upstream commit 1367afaa2ee90d1c956dfc224e199fcb3ff3f8cc ]
+
+The commit
+
+  c75890700455 ("x86/entry/64: Remove unneeded kernel CR3 switching")
+
+removed a CR3 write in the faulting path of load_gs_index().
+
+But the path's FENCE_SWAPGS_USER_ENTRY has no fence operation if PTI is
+enabled, see spectre_v1_select_mitigation().
+
+Rather, it depended on the serializing CR3 write of SWITCH_TO_KERNEL_CR3
+and since it got removed, add a FENCE_SWAPGS_KERNEL_ENTRY call to make
+sure speculation is blocked.
+
+ [ bp: Massage commit message and comment. ]
+
+Fixes: c75890700455 ("x86/entry/64: Remove unneeded kernel CR3 switching")
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20211126101209.8613-3-jiangshanlai@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/entry/entry_64.S | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index f1a8b5b2af964..f9e1c06a1c329 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -987,11 +987,6 @@ SYM_CODE_START_LOCAL(error_entry)
+       pushq   %r12
+       ret
+-.Lerror_entry_done_lfence:
+-      FENCE_SWAPGS_KERNEL_ENTRY
+-.Lerror_entry_done:
+-      ret
+-
+       /*
+        * There are two places in the kernel that can potentially fault with
+        * usergs. Handle them here.  B stepping K8s sometimes report a
+@@ -1014,8 +1009,14 @@ SYM_CODE_START_LOCAL(error_entry)
+        * .Lgs_change's error handler with kernel gsbase.
+        */
+       SWAPGS
+-      FENCE_SWAPGS_USER_ENTRY
+-      jmp .Lerror_entry_done
++
++      /*
++       * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
++       * kernel or user gsbase.
++       */
++.Lerror_entry_done_lfence:
++      FENCE_SWAPGS_KERNEL_ENTRY
++      ret
+ .Lbstep_iret:
+       /* Fix truncated RIP */
+-- 
+2.33.0
+
diff --git a/queue-5.15/x86-sev-fix-sev-es-ins-outs-instructions-for-word-dw.patch b/queue-5.15/x86-sev-fix-sev-es-ins-outs-instructions-for-word-dw.patch
new file mode 100644 (file)
index 0000000..d736e77
--- /dev/null
@@ -0,0 +1,157 @@
+From bae683507ebf34f6565221f233ed921fee0df081 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Nov 2021 15:27:57 -0800
+Subject: x86/sev: Fix SEV-ES INS/OUTS instructions for word, dword, and qword
+
+From: Michael Sterritt <sterritt@google.com>
+
+[ Upstream commit 1d5379d0475419085d3575bd9155f2e558e96390 ]
+
+Properly type the operands being passed to __put_user()/__get_user().
+Otherwise, these routines truncate data for dependent instructions
+(e.g., INSW) and only read/write one byte.
+
+This has been tested by sending a string with REP OUTSW to a port and
+then reading it back in with REP INSW on the same port.
+
+Previous behavior was to only send and receive the first char of the
+size. For example, word operations for "abcd" would only read/write
+"ac". With change, the full string is now written and read back.
+
+Fixes: f980f9c31a923 (x86/sev-es: Compile early handler code into kernel image)
+Signed-off-by: Michael Sterritt <sterritt@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Marc Orr <marcorr@google.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Reviewed-by: Joerg Roedel <jroedel@suse.de>
+Link: https://lkml.kernel.org/r/20211119232757.176201-1-sterritt@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/sev.c | 57 +++++++++++++++++++++++++++++--------------
+ 1 file changed, 39 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
+index 88401675dabb0..a0064cf77e562 100644
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+                                  char *dst, char *buf, size_t size)
+ {
+       unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+-      char __user *target = (char __user *)dst;
+-      u64 d8;
+-      u32 d4;
+-      u16 d2;
+-      u8  d1;
+       /*
+        * This function uses __put_user() independent of whether kernel or user
+@@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+        * instructions here would cause infinite nesting.
+        */
+       switch (size) {
+-      case 1:
++      case 1: {
++              u8 d1;
++              u8 __user *target = (u8 __user *)dst;
++
+               memcpy(&d1, buf, 1);
+               if (__put_user(d1, target))
+                       goto fault;
+               break;
+-      case 2:
++      }
++      case 2: {
++              u16 d2;
++              u16 __user *target = (u16 __user *)dst;
++
+               memcpy(&d2, buf, 2);
+               if (__put_user(d2, target))
+                       goto fault;
+               break;
+-      case 4:
++      }
++      case 4: {
++              u32 d4;
++              u32 __user *target = (u32 __user *)dst;
++
+               memcpy(&d4, buf, 4);
+               if (__put_user(d4, target))
+                       goto fault;
+               break;
+-      case 8:
++      }
++      case 8: {
++              u64 d8;
++              u64 __user *target = (u64 __user *)dst;
++
+               memcpy(&d8, buf, 8);
+               if (__put_user(d8, target))
+                       goto fault;
+               break;
++      }
+       default:
+               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+               return ES_UNSUPPORTED;
+@@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+                                 char *src, char *buf, size_t size)
+ {
+       unsigned long error_code = X86_PF_PROT;
+-      char __user *s = (char __user *)src;
+-      u64 d8;
+-      u32 d4;
+-      u16 d2;
+-      u8  d1;
+       /*
+        * This function uses __get_user() independent of whether kernel or user
+@@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+        * instructions here would cause infinite nesting.
+        */
+       switch (size) {
+-      case 1:
++      case 1: {
++              u8 d1;
++              u8 __user *s = (u8 __user *)src;
++
+               if (__get_user(d1, s))
+                       goto fault;
+               memcpy(buf, &d1, 1);
+               break;
+-      case 2:
++      }
++      case 2: {
++              u16 d2;
++              u16 __user *s = (u16 __user *)src;
++
+               if (__get_user(d2, s))
+                       goto fault;
+               memcpy(buf, &d2, 2);
+               break;
+-      case 4:
++      }
++      case 4: {
++              u32 d4;
++              u32 __user *s = (u32 __user *)src;
++
+               if (__get_user(d4, s))
+                       goto fault;
+               memcpy(buf, &d4, 4);
+               break;
+-      case 8:
++      }
++      case 8: {
++              u64 d8;
++              u64 __user *s = (u64 __user *)src;
+               if (__get_user(d8, s))
+                       goto fault;
+               memcpy(buf, &d8, 8);
+               break;
++      }
+       default:
+               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+               return ES_UNSUPPORTED;
+-- 
+2.33.0
+
diff --git a/queue-5.15/x86-xen-add-xenpv_restore_regs_and_return_to_usermod.patch b/queue-5.15/x86-xen-add-xenpv_restore_regs_and_return_to_usermod.patch
new file mode 100644 (file)
index 0000000..31443cf
--- /dev/null
@@ -0,0 +1,95 @@
+From 974fc36031f713b1d3048d2d33acd51b611d1645 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Nov 2021 18:11:23 +0800
+Subject: x86/xen: Add xenpv_restore_regs_and_return_to_usermode()
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+[ Upstream commit 5c8f6a2e316efebb3ba93d8c1af258155dcf5632 ]
+
+In the native case, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is the
+trampoline stack. But XEN pv doesn't use trampoline stack, so
+PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is also the kernel stack.
+
+In that case, source and destination stacks are identical, which means
+that reusing swapgs_restore_regs_and_return_to_usermode() in XEN pv
+would cause %rsp to move up to the top of the kernel stack and leave the
+IRET frame below %rsp.
+
+This is dangerous as it can be corrupted if #NMI / #MC hit as either of
+these events occurring in the middle of the stack pushing would clobber
+data on the (original) stack.
+
+And, with  XEN pv, swapgs_restore_regs_and_return_to_usermode() pushing
+the IRET frame on to the original address is useless and error-prone
+when there is any future attempt to modify the code.
+
+ [ bp: Massage commit message. ]
+
+Fixes: 7f2590a110b8 ("x86/entry/64: Use a per-CPU trampoline stack for IDT entries")
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Link: https://lkml.kernel.org/r/20211126101209.8613-4-jiangshanlai@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/entry/entry_64.S |  4 ++++
+ arch/x86/xen/xen-asm.S    | 20 ++++++++++++++++++++
+ 2 files changed, 24 insertions(+)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index f9e1c06a1c329..97b1f84bb53f8 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+       ud2
+ 1:
+ #endif
++#ifdef CONFIG_XEN_PV
++      ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
++#endif
++
+       POP_REGS pop_rdi=0
+       /*
+diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
+index 1e626444712be..3bebf66569b48 100644
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -20,6 +20,7 @@
+ #include <linux/init.h>
+ #include <linux/linkage.h>
++#include <../entry/calling.h>
+ /*
+  * Enable events.  This clears the event mask and tests the pending
+@@ -191,6 +192,25 @@ SYM_CODE_START(xen_iret)
+       jmp hypercall_iret
+ SYM_CODE_END(xen_iret)
++/*
++ * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
++ * also the kernel stack.  Reusing swapgs_restore_regs_and_return_to_usermode()
++ * in XEN pv would cause %rsp to move up to the top of the kernel stack and
++ * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
++ * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
++ * frame at the same address is useless.
++ */
++SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
++      UNWIND_HINT_REGS
++      POP_REGS
++
++      /* stackleak_erase() can work safely on the kernel stack. */
++      STACKLEAK_ERASE_NOCLOBBER
++
++      addq    $8, %rsp        /* skip regs->orig_ax */
++      jmp xen_iret
++SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
++
+ /*
+  * Xen handles syscall callbacks much like ordinary exceptions, which
+  * means we have:
+-- 
+2.33.0
+