]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 28 Jan 2023 03:46:11 +0000 (22:46 -0500)
committerSasha Levin <sashal@kernel.org>
Sat, 28 Jan 2023 03:46:11 +0000 (22:46 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch [new file with mode: 0644]
queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch [new file with mode: 0644]
queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch [new file with mode: 0644]
queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch [new file with mode: 0644]
queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch [new file with mode: 0644]

diff --git a/queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch b/queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch
new file mode 100644 (file)
index 0000000..b0d1b71
--- /dev/null
@@ -0,0 +1,99 @@
+From fd33207736a2552bd0d5f6c62a4f115b1e09da5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 07:05:28 -0800
+Subject: kvm/vfio: Fix potential deadlock on vfio group_lock
+
+From: Yi Liu <yi.l.liu@intel.com>
+
+[ Upstream commit 51cdc8bc120ef6e42f6fb758341f5d91bc955952 ]
+
+Currently it is possible that the final put of a KVM reference comes from
+vfio during its device close operation.  This occurs while the vfio group
+lock is held; however, if the vfio device is still in the kvm device list,
+then the following call chain could result in a deadlock:
+
+VFIO holds group->group_lock/group_rwsem
+  -> kvm_put_kvm
+   -> kvm_destroy_vm
+    -> kvm_destroy_devices
+     -> kvm_vfio_destroy
+      -> kvm_vfio_file_set_kvm
+       -> vfio_file_set_kvm
+        -> try to hold group->group_lock/group_rwsem
+
+The key function is the kvm_destroy_devices() which triggers destroy cb
+of kvm_device_ops. It calls back to vfio and try to hold group_lock. So
+if this path doesn't call back to vfio, this dead lock would be fixed.
+Actually, there is a way for it. KVM provides another point to free the
+kvm-vfio device which is the point when the device file descriptor is
+closed. This can be achieved by providing the release cb instead of the
+destroy cb. Also rename kvm_vfio_destroy() to be kvm_vfio_release().
+
+       /*
+        * Destroy is responsible for freeing dev.
+        *
+        * Destroy may be called before or after destructors are called
+        * on emulated I/O regions, depending on whether a reference is
+        * held by a vcpu or other kvm component that gets destroyed
+        * after the emulated I/O.
+        */
+       void (*destroy)(struct kvm_device *dev);
+
+       /*
+        * Release is an alternative method to free the device. It is
+        * called when the device file descriptor is closed. Once
+        * release is called, the destroy method will not be called
+        * anymore as the device is removed from the device list of
+        * the VM. kvm->lock is held.
+        */
+       void (*release)(struct kvm_device *dev);
+
+Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM")
+Reported-by: Alex Williamson <alex.williamson@redhat.com>
+Suggested-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Yi Liu <yi.l.liu@intel.com>
+Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230114000351.115444-1-mjrosato@linux.ibm.com
+Link: https://lore.kernel.org/r/20230120150528.471752-1-yi.l.liu@intel.com
+[aw: update comment as well, s/destroy/release/]
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/vfio.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
+index 495ceabffe88..9584eb57e0ed 100644
+--- a/virt/kvm/vfio.c
++++ b/virt/kvm/vfio.c
+@@ -336,7 +336,7 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
+       return -ENXIO;
+ }
+-static void kvm_vfio_destroy(struct kvm_device *dev)
++static void kvm_vfio_release(struct kvm_device *dev)
+ {
+       struct kvm_vfio *kv = dev->private;
+       struct kvm_vfio_group *kvg, *tmp;
+@@ -355,7 +355,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
+       kvm_vfio_update_coherency(dev);
+       kfree(kv);
+-      kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
++      kfree(dev); /* alloc by kvm_ioctl_create_device, free by .release */
+ }
+ static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+@@ -363,7 +363,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+ static struct kvm_device_ops kvm_vfio_ops = {
+       .name = "kvm-vfio",
+       .create = kvm_vfio_create,
+-      .destroy = kvm_vfio_destroy,
++      .release = kvm_vfio_release,
+       .set_attr = kvm_vfio_set_attr,
+       .has_attr = kvm_vfio_has_attr,
+ };
+-- 
+2.39.0
+
diff --git a/queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch b/queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch
new file mode 100644 (file)
index 0000000..76ea63b
--- /dev/null
@@ -0,0 +1,121 @@
+From 39ddad2cd9582eac2e3a5cced784702f15ddc6d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 14:52:14 -0500
+Subject: nfsd: don't free files unconditionally in __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 4bdbba54e9b1c769da8ded9abd209d765715e1d6 ]
+
+nfsd_file_cache_purge is called when the server is shutting down, in
+which case, tearing things down is generally fine, but it also gets
+called when the exports cache is flushed.
+
+Instead of walking the cache and freeing everything unconditionally,
+handle it the same as when we have a notification of conflicting access.
+
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Reported-by: Ruben Vestergaard <rubenv@drcmr.dk>
+Reported-by: Torkil Svensgaard <torkil@drcmr.dk>
+Reported-by: Shachar Kagan <skagan@nvidia.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Shachar Kagan <skagan@nvidia.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 61 ++++++++++++++++++++++++++-------------------
+ 1 file changed, 36 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index ea6fb0e6b165..142b3c928f76 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -638,6 +638,39 @@ static struct shrinker    nfsd_file_shrinker = {
+       .seeks = 1,
+ };
++/**
++ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
++ * @nf: nfsd_file to attempt to queue
++ * @dispose: private list to queue successfully-put objects
++ *
++ * Unhash an nfsd_file, try to get a reference to it, and then put that
++ * reference. If it's the last reference, queue it to the dispose list.
++ */
++static void
++nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
++      __must_hold(RCU)
++{
++      int decrement = 1;
++
++      /* If we raced with someone else unhashing, ignore it */
++      if (!nfsd_file_unhash(nf))
++              return;
++
++      /* If we can't get a reference, ignore it */
++      if (!nfsd_file_get(nf))
++              return;
++
++      /* Extra decrement if we remove from the LRU */
++      if (nfsd_file_lru_remove(nf))
++              ++decrement;
++
++      /* If refcount goes to 0, then put on the dispose list */
++      if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
++              list_add(&nf->nf_lru, dispose);
++              trace_nfsd_file_closing(nf);
++      }
++}
++
+ /**
+  * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
+  * @inode:   inode on which to close out nfsd_files
+@@ -665,30 +698,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+       rcu_read_lock();
+       do {
+-              int decrement = 1;
+-
+               nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+                                      nfsd_file_rhash_params);
+               if (!nf)
+                       break;
+-
+-              /* If we raced with someone else unhashing, ignore it */
+-              if (!nfsd_file_unhash(nf))
+-                      continue;
+-
+-              /* If we can't get a reference, ignore it */
+-              if (!nfsd_file_get(nf))
+-                      continue;
+-
+-              /* Extra decrement if we remove from the LRU */
+-              if (nfsd_file_lru_remove(nf))
+-                      ++decrement;
+-
+-              /* If refcount goes to 0, then put on the dispose list */
+-              if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+-                      list_add(&nf->nf_lru, dispose);
+-                      trace_nfsd_file_closing(nf);
+-              }
++              nfsd_file_cond_queue(nf, dispose);
+       } while (1);
+       rcu_read_unlock();
+ }
+@@ -905,11 +919,8 @@ __nfsd_file_cache_purge(struct net *net)
+               nf = rhashtable_walk_next(&iter);
+               while (!IS_ERR_OR_NULL(nf)) {
+-                      if (!net || nf->nf_net == net) {
+-                              nfsd_file_unhash(nf);
+-                              nfsd_file_lru_remove(nf);
+-                              list_add(&nf->nf_lru, &dispose);
+-                      }
++                      if (!net || nf->nf_net == net)
++                              nfsd_file_cond_queue(nf, &dispose);
+                       nf = rhashtable_walk_next(&iter);
+               }
+-- 
+2.39.0
+
diff --git a/queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch b/queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch
new file mode 100644 (file)
index 0000000..07b9817
--- /dev/null
@@ -0,0 +1,98 @@
+From d110d8b85513cf38f084fa26afeaa2784c6dbf35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Oct 2022 10:10:52 +0200
+Subject: sched/fair: Check if prev_cpu has highest spare cap in feec()
+
+From: Pierre Gondois <pierre.gondois@arm.com>
+
+[ Upstream commit ad841e569f5c88e3332b32a000f251f33ff32187 ]
+
+When evaluating the CPU candidates in the perf domain (pd) containing
+the previously used CPU (prev_cpu), find_energy_efficient_cpu()
+evaluates the energy of the pd:
+- without the task (base_energy)
+- with the task placed on prev_cpu (if the task fits)
+- with the task placed on the CPU with the highest spare capacity,
+  prev_cpu being excluded from this set
+
+If prev_cpu is already the CPU with the highest spare capacity,
+max_spare_cap_cpu will be the CPU with the second highest spare
+capacity.
+
+On an Arm64 Juno-r2, with a workload of 10 tasks at a 10% duty cycle,
+when prev_cpu and max_spare_cap_cpu are both valid candidates,
+prev_spare_cap > max_spare_cap at ~82%.
+Thus the energy of the pd when placing the task on max_spare_cap_cpu
+is computed with no possible positive outcome 82% most of the time.
+
+Do not consider max_spare_cap_cpu as a valid candidate if
+prev_spare_cap > max_spare_cap.
+
+Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20221006081052.3862167-2-pierre.gondois@arm.com
+Stable-dep-of: e26fd28db828 ("sched/uclamp: Fix a uninitialized variable warnings")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 0f32acb05055..bb04ca795fc3 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -7217,7 +7217,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+               unsigned long cur_delta, max_spare_cap = 0;
+               unsigned long rq_util_min, rq_util_max;
+               unsigned long util_min, util_max;
+-              bool compute_prev_delta = false;
++              unsigned long prev_spare_cap = 0;
+               int max_spare_cap_cpu = -1;
+               unsigned long base_energy;
+@@ -7279,18 +7279,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+                       if (cpu == prev_cpu) {
+                               /* Always use prev_cpu as a candidate. */
+-                              compute_prev_delta = true;
++                              prev_spare_cap = cpu_cap;
+                       } else if (cpu_cap > max_spare_cap) {
+                               /*
+                                * Find the CPU with the maximum spare capacity
+-                               * in the performance domain.
++                               * among the remaining CPUs in the performance
++                               * domain.
+                                */
+                               max_spare_cap = cpu_cap;
+                               max_spare_cap_cpu = cpu;
+                       }
+               }
+-              if (max_spare_cap_cpu < 0 && !compute_prev_delta)
++              if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
+                       continue;
+               eenv_pd_busy_time(&eenv, cpus, p);
+@@ -7298,7 +7299,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+               base_energy = compute_energy(&eenv, pd, cpus, p, -1);
+               /* Evaluate the energy impact of using prev_cpu. */
+-              if (compute_prev_delta) {
++              if (prev_spare_cap > 0) {
+                       prev_delta = compute_energy(&eenv, pd, cpus, p,
+                                                   prev_cpu);
+                       /* CPU utilization has changed */
+@@ -7309,7 +7310,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+               }
+               /* Evaluate the energy impact of using max_spare_cap_cpu. */
+-              if (max_spare_cap_cpu >= 0) {
++              if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
+                       cur_delta = compute_energy(&eenv, pd, cpus, p,
+                                                  max_spare_cap_cpu);
+                       /* CPU utilization has changed */
+-- 
+2.39.0
+
diff --git a/queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch b/queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch
new file mode 100644 (file)
index 0000000..e2477d1
--- /dev/null
@@ -0,0 +1,96 @@
+From cd5fc02331ccded414dd2de5190d308d4ca11da4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 12:27:07 +0000
+Subject: sched/uclamp: Fix a uninitialized variable warnings
+
+From: Qais Yousef <qyousef@layalina.io>
+
+[ Upstream commit e26fd28db82899be71b4b949527373d0a6be1e65 ]
+
+Addresses the following warnings:
+
+> config: riscv-randconfig-m031-20221111
+> compiler: riscv64-linux-gcc (GCC) 12.1.0
+>
+> smatch warnings:
+> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'.
+> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'.
+
+Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()")
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 35 ++++++++++++++++-------------------
+ 1 file changed, 16 insertions(+), 19 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index bb04ca795fc3..2c3d0d49c80e 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -7213,10 +7213,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+       eenv_task_busy_time(&eenv, p, prev_cpu);
+       for (; pd; pd = pd->next) {
++              unsigned long util_min = p_util_min, util_max = p_util_max;
+               unsigned long cpu_cap, cpu_thermal_cap, util;
+               unsigned long cur_delta, max_spare_cap = 0;
+               unsigned long rq_util_min, rq_util_max;
+-              unsigned long util_min, util_max;
+               unsigned long prev_spare_cap = 0;
+               int max_spare_cap_cpu = -1;
+               unsigned long base_energy;
+@@ -7235,6 +7235,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+               eenv.pd_cap = 0;
+               for_each_cpu(cpu, cpus) {
++                      struct rq *rq = cpu_rq(cpu);
++
+                       eenv.pd_cap += cpu_thermal_cap;
+                       if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
+@@ -7253,24 +7255,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+                        * much capacity we can get out of the CPU; this is
+                        * aligned with sched_cpu_util().
+                        */
+-                      if (uclamp_is_used()) {
+-                              if (uclamp_rq_is_idle(cpu_rq(cpu))) {
+-                                      util_min = p_util_min;
+-                                      util_max = p_util_max;
+-                              } else {
+-                                      /*
+-                                       * Open code uclamp_rq_util_with() except for
+-                                       * the clamp() part. Ie: apply max aggregation
+-                                       * only. util_fits_cpu() logic requires to
+-                                       * operate on non clamped util but must use the
+-                                       * max-aggregated uclamp_{min, max}.
+-                                       */
+-                                      rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
+-                                      rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
+-
+-                                      util_min = max(rq_util_min, p_util_min);
+-                                      util_max = max(rq_util_max, p_util_max);
+-                              }
++                      if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
++                              /*
++                               * Open code uclamp_rq_util_with() except for
++                               * the clamp() part. Ie: apply max aggregation
++                               * only. util_fits_cpu() logic requires to
++                               * operate on non clamped util but must use the
++                               * max-aggregated uclamp_{min, max}.
++                               */
++                              rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
++                              rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
++
++                              util_min = max(rq_util_min, p_util_min);
++                              util_max = max(rq_util_max, p_util_max);
+                       }
+                       if (!util_fits_cpu(util, util_min, util_max, cpu))
+                               continue;
+-- 
+2.39.0
+
diff --git a/queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch b/queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch
new file mode 100644 (file)
index 0000000..32562d4
--- /dev/null
@@ -0,0 +1,40 @@
+From fdc9a2376321ace7faf4c7383591f9f52ac61231 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 06:12:55 +0300
+Subject: scsi: hpsa: Fix allocation size for scsi_host_alloc()
+
+From: Alexey V. Vissarionov <gremlin@altlinux.org>
+
+[ Upstream commit bbbd25499100c810ceaf5193c3cfcab9f7402a33 ]
+
+The 'h' is a pointer to struct ctlr_info, so it's just 4 or 8 bytes, while
+the structure itself is much bigger.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: edd163687ea5 ("hpsa: add driver for HP Smart Array controllers.")
+Link: https://lore.kernel.org/r/20230118031255.GE15213@altlinux.org
+Signed-off-by: Alexey V. Vissarionov <gremlin@altlinux.org>
+Acked-by: Don Brace <don.brace@microchip.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/hpsa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
+index 4dbf51e2623a..f6da34850af9 100644
+--- a/drivers/scsi/hpsa.c
++++ b/drivers/scsi/hpsa.c
+@@ -5850,7 +5850,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h)
+ {
+       struct Scsi_Host *sh;
+-      sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h));
++      sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info));
+       if (sh == NULL) {
+               dev_err(&h->pdev->dev, "scsi_host_alloc failed\n");
+               return -ENOMEM;
+-- 
+2.39.0
+
index 293acb51827629743a994b4a95003f2d9d057086..4deec8c46a178d4fbb809b436a7422b216ac6ac9 100644 (file)
@@ -201,3 +201,9 @@ drm-amdgpu-complete-gfxoff-allow-signal-during-suspend-without-delay.patch
 io_uring-msg_ring-fix-remote-queue-to-disabled-ring.patch
 wifi-mac80211-proper-mark-itxqs-for-resumption.patch
 wifi-mac80211-fix-itxq-ampdu-fragmentation-handling.patch
+sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch
+sched-uclamp-fix-a-uninitialized-variable-warnings.patch
+vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch
+scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch
+kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch
+nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch
diff --git a/queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch b/queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch
new file mode 100644 (file)
index 0000000..46102b3
--- /dev/null
@@ -0,0 +1,103 @@
+From f0fa37772ed7be147154796fd36d4ba1bcf23ff4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Jan 2023 17:44:27 +0100
+Subject: vfio/type1: Respect IOMMU reserved regions in vfio_test_domain_fgsp()
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+[ Upstream commit 895c0747f726bb50c9b7a805613a61d1b6f9fa06 ]
+
+Since commit cbf7827bc5dc ("iommu/s390: Fix potential s390_domain
+aperture shrinking") the s390 IOMMU driver uses reserved regions for the
+system provided DMA ranges of PCI devices. Previously it reduced the
+size of the IOMMU aperture and checked it on each mapping operation.
+On current machines the system denies use of DMA addresses below 2^32 for
+all PCI devices.
+
+Usually mapping IOVAs in a reserved regions is harmless until a DMA
+actually tries to utilize the mapping. However on s390 there is
+a virtual PCI device called ISM which is implemented in firmware and
+used for cross LPAR communication. Unlike real PCI devices this device
+does not use the hardware IOMMU but inspects IOMMU translation tables
+directly on IOTLB flush (s390 RPCIT instruction). If it detects IOVA
+mappings outside the allowed ranges it goes into an error state. This
+error state then causes the device to be unavailable to the KVM guest.
+
+Analysing this we found that vfio_test_domain_fgsp() maps 2 pages at DMA
+address 0 irrespective of the IOMMUs reserved regions. Even if usually
+harmless this seems wrong in the general case so instead go through the
+freshly updated IOVA list and try to find a range that isn't reserved,
+and fits 2 pages, is PAGE_SIZE * 2 aligned. If found use that for
+testing for fine grained super pages.
+
+Fixes: af029169b8fd ("vfio/type1: Check reserved region conflict and update iova list")
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/20230110164427.4051938-2-schnelle@linux.ibm.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 31 ++++++++++++++++++++-----------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
+index 23c24fe98c00..2209372f236d 100644
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -1856,24 +1856,33 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
+  * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
+  * hugetlbfs is in use.
+  */
+-static void vfio_test_domain_fgsp(struct vfio_domain *domain)
++static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions)
+ {
+-      struct page *pages;
+       int ret, order = get_order(PAGE_SIZE * 2);
++      struct vfio_iova *region;
++      struct page *pages;
++      dma_addr_t start;
+       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+       if (!pages)
+               return;
+-      ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
+-                      IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+-      if (!ret) {
+-              size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
++      list_for_each_entry(region, regions, list) {
++              start = ALIGN(region->start, PAGE_SIZE * 2);
++              if (start >= region->end || (region->end - start < PAGE_SIZE * 2))
++                      continue;
+-              if (unmapped == PAGE_SIZE)
+-                      iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
+-              else
+-                      domain->fgsp = true;
++              ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2,
++                              IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
++              if (!ret) {
++                      size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE);
++
++                      if (unmapped == PAGE_SIZE)
++                              iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE);
++                      else
++                              domain->fgsp = true;
++              }
++              break;
+       }
+       __free_pages(pages, order);
+@@ -2326,7 +2335,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
+               }
+       }
+-      vfio_test_domain_fgsp(domain);
++      vfio_test_domain_fgsp(domain, &iova_copy);
+       /* replay mappings on new domains */
+       ret = vfio_iommu_replay(iommu, domain);
+-- 
+2.39.0
+