From: Sasha Levin Date: Sat, 28 Jan 2023 03:46:11 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v5.10.166~65 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0e311b28d47b08fe1509319c197e36060a845eec;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch b/queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch new file mode 100644 index 00000000000..b0d1b71567b --- /dev/null +++ b/queue-6.1/kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch @@ -0,0 +1,99 @@ +From fd33207736a2552bd0d5f6c62a4f115b1e09da5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Jan 2023 07:05:28 -0800 +Subject: kvm/vfio: Fix potential deadlock on vfio group_lock + +From: Yi Liu + +[ Upstream commit 51cdc8bc120ef6e42f6fb758341f5d91bc955952 ] + +Currently it is possible that the final put of a KVM reference comes from +vfio during its device close operation. This occurs while the vfio group +lock is held; however, if the vfio device is still in the kvm device list, +then the following call chain could result in a deadlock: + +VFIO holds group->group_lock/group_rwsem + -> kvm_put_kvm + -> kvm_destroy_vm + -> kvm_destroy_devices + -> kvm_vfio_destroy + -> kvm_vfio_file_set_kvm + -> vfio_file_set_kvm + -> try to hold group->group_lock/group_rwsem + +The key function is the kvm_destroy_devices() which triggers destroy cb +of kvm_device_ops. It calls back to vfio and try to hold group_lock. So +if this path doesn't call back to vfio, this dead lock would be fixed. +Actually, there is a way for it. KVM provides another point to free the +kvm-vfio device which is the point when the device file descriptor is +closed. This can be achieved by providing the release cb instead of the +destroy cb. Also rename kvm_vfio_destroy() to be kvm_vfio_release(). + + /* + * Destroy is responsible for freeing dev. + * + * Destroy may be called before or after destructors are called + * on emulated I/O regions, depending on whether a reference is + * held by a vcpu or other kvm component that gets destroyed + * after the emulated I/O. + */ + void (*destroy)(struct kvm_device *dev); + + /* + * Release is an alternative method to free the device. It is + * called when the device file descriptor is closed. Once + * release is called, the destroy method will not be called + * anymore as the device is removed from the device list of + * the VM. kvm->lock is held. + */ + void (*release)(struct kvm_device *dev); + +Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM") +Reported-by: Alex Williamson +Suggested-by: Kevin Tian +Reviewed-by: Jason Gunthorpe +Signed-off-by: Yi Liu +Reviewed-by: Matthew Rosato +Link: https://lore.kernel.org/r/20230114000351.115444-1-mjrosato@linux.ibm.com +Link: https://lore.kernel.org/r/20230120150528.471752-1-yi.l.liu@intel.com +[aw: update comment as well, s/destroy/release/] +Signed-off-by: Alex Williamson +Signed-off-by: Sasha Levin +--- + virt/kvm/vfio.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c +index 495ceabffe88..9584eb57e0ed 100644 +--- a/virt/kvm/vfio.c ++++ b/virt/kvm/vfio.c +@@ -336,7 +336,7 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, + return -ENXIO; + } + +-static void kvm_vfio_destroy(struct kvm_device *dev) ++static void kvm_vfio_release(struct kvm_device *dev) + { + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; +@@ -355,7 +355,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) + kvm_vfio_update_coherency(dev); + + kfree(kv); +- kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ ++ kfree(dev); /* alloc by kvm_ioctl_create_device, free by .release */ + } + + static int kvm_vfio_create(struct kvm_device *dev, u32 type); +@@ -363,7 +363,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type); + static struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, +- .destroy = kvm_vfio_destroy, ++ .release = kvm_vfio_release, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, + }; +-- +2.39.0 + diff --git a/queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch b/queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch new file mode 100644 index 00000000000..76ea63b2e45 --- /dev/null +++ b/queue-6.1/nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch @@ -0,0 +1,121 @@ +From 39ddad2cd9582eac2e3a5cced784702f15ddc6d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Jan 2023 14:52:14 -0500 +Subject: nfsd: don't free files unconditionally in __nfsd_file_cache_purge + +From: Jeff Layton + +[ Upstream commit 4bdbba54e9b1c769da8ded9abd209d765715e1d6 ] + +nfsd_file_cache_purge is called when the server is shutting down, in +which case, tearing things down is generally fine, but it also gets +called when the exports cache is flushed. + +Instead of walking the cache and freeing everything unconditionally, +handle it the same as when we have a notification of conflicting access. + +Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache") +Reported-by: Ruben Vestergaard +Reported-by: Torkil Svensgaard +Reported-by: Shachar Kagan +Signed-off-by: Jeff Layton +Tested-by: Shachar Kagan +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 61 ++++++++++++++++++++++++++------------------- + 1 file changed, 36 insertions(+), 25 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index ea6fb0e6b165..142b3c928f76 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -638,6 +638,39 @@ static struct shrinker nfsd_file_shrinker = { + .seeks = 1, + }; + ++/** ++ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file ++ * @nf: nfsd_file to attempt to queue ++ * @dispose: private list to queue successfully-put objects ++ * ++ * Unhash an nfsd_file, try to get a reference to it, and then put that ++ * reference. If it's the last reference, queue it to the dispose list. ++ */ ++static void ++nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) ++ __must_hold(RCU) ++{ ++ int decrement = 1; ++ ++ /* If we raced with someone else unhashing, ignore it */ ++ if (!nfsd_file_unhash(nf)) ++ return; ++ ++ /* If we can't get a reference, ignore it */ ++ if (!nfsd_file_get(nf)) ++ return; ++ ++ /* Extra decrement if we remove from the LRU */ ++ if (nfsd_file_lru_remove(nf)) ++ ++decrement; ++ ++ /* If refcount goes to 0, then put on the dispose list */ ++ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { ++ list_add(&nf->nf_lru, dispose); ++ trace_nfsd_file_closing(nf); ++ } ++} ++ + /** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files +@@ -665,30 +698,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) + + rcu_read_lock(); + do { +- int decrement = 1; +- + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; +- +- /* If we raced with someone else unhashing, ignore it */ +- if (!nfsd_file_unhash(nf)) +- continue; +- +- /* If we can't get a reference, ignore it */ +- if (!nfsd_file_get(nf)) +- continue; +- +- /* Extra decrement if we remove from the LRU */ +- if (nfsd_file_lru_remove(nf)) +- ++decrement; +- +- /* If refcount goes to 0, then put on the dispose list */ +- if (refcount_sub_and_test(decrement, &nf->nf_ref)) { +- list_add(&nf->nf_lru, dispose); +- trace_nfsd_file_closing(nf); +- } ++ nfsd_file_cond_queue(nf, dispose); + } while (1); + rcu_read_unlock(); + } +@@ -905,11 +919,8 @@ __nfsd_file_cache_purge(struct net *net) + + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { +- if (!net || nf->nf_net == net) { +- nfsd_file_unhash(nf); +- nfsd_file_lru_remove(nf); +- list_add(&nf->nf_lru, &dispose); +- } ++ if (!net || nf->nf_net == net) ++ nfsd_file_cond_queue(nf, &dispose); + nf = rhashtable_walk_next(&iter); + } + +-- +2.39.0 + diff --git a/queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch b/queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch new file mode 100644 index 00000000000..07b9817e473 --- /dev/null +++ b/queue-6.1/sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch @@ -0,0 +1,98 @@ +From d110d8b85513cf38f084fa26afeaa2784c6dbf35 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Oct 2022 10:10:52 +0200 +Subject: sched/fair: Check if prev_cpu has highest spare cap in feec() + +From: Pierre Gondois + +[ Upstream commit ad841e569f5c88e3332b32a000f251f33ff32187 ] + +When evaluating the CPU candidates in the perf domain (pd) containing +the previously used CPU (prev_cpu), find_energy_efficient_cpu() +evaluates the energy of the pd: +- without the task (base_energy) +- with the task placed on prev_cpu (if the task fits) +- with the task placed on the CPU with the highest spare capacity, + prev_cpu being excluded from this set + +If prev_cpu is already the CPU with the highest spare capacity, +max_spare_cap_cpu will be the CPU with the second highest spare +capacity. + +On an Arm64 Juno-r2, with a workload of 10 tasks at a 10% duty cycle, +when prev_cpu and max_spare_cap_cpu are both valid candidates, +prev_spare_cap > max_spare_cap at ~82%. +Thus the energy of the pd when placing the task on max_spare_cap_cpu +is computed with no possible positive outcome 82% most of the time. + +Do not consider max_spare_cap_cpu as a valid candidate if +prev_spare_cap > max_spare_cap. + +Signed-off-by: Pierre Gondois +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20221006081052.3862167-2-pierre.gondois@arm.com +Stable-dep-of: e26fd28db828 ("sched/uclamp: Fix a uninitialized variable warnings") +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 0f32acb05055..bb04ca795fc3 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7217,7 +7217,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + unsigned long cur_delta, max_spare_cap = 0; + unsigned long rq_util_min, rq_util_max; + unsigned long util_min, util_max; +- bool compute_prev_delta = false; ++ unsigned long prev_spare_cap = 0; + int max_spare_cap_cpu = -1; + unsigned long base_energy; + +@@ -7279,18 +7279,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + + if (cpu == prev_cpu) { + /* Always use prev_cpu as a candidate. */ +- compute_prev_delta = true; ++ prev_spare_cap = cpu_cap; + } else if (cpu_cap > max_spare_cap) { + /* + * Find the CPU with the maximum spare capacity +- * in the performance domain. ++ * among the remaining CPUs in the performance ++ * domain. + */ + max_spare_cap = cpu_cap; + max_spare_cap_cpu = cpu; + } + } + +- if (max_spare_cap_cpu < 0 && !compute_prev_delta) ++ if (max_spare_cap_cpu < 0 && prev_spare_cap == 0) + continue; + + eenv_pd_busy_time(&eenv, cpus, p); +@@ -7298,7 +7299,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + base_energy = compute_energy(&eenv, pd, cpus, p, -1); + + /* Evaluate the energy impact of using prev_cpu. */ +- if (compute_prev_delta) { ++ if (prev_spare_cap > 0) { + prev_delta = compute_energy(&eenv, pd, cpus, p, + prev_cpu); + /* CPU utilization has changed */ +@@ -7309,7 +7310,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + } + + /* Evaluate the energy impact of using max_spare_cap_cpu. */ +- if (max_spare_cap_cpu >= 0) { ++ if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) { + cur_delta = compute_energy(&eenv, pd, cpus, p, + max_spare_cap_cpu); + /* CPU utilization has changed */ +-- +2.39.0 + diff --git a/queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch b/queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch new file mode 100644 index 00000000000..e2477d177d3 --- /dev/null +++ b/queue-6.1/sched-uclamp-fix-a-uninitialized-variable-warnings.patch @@ -0,0 +1,96 @@ +From cd5fc02331ccded414dd2de5190d308d4ca11da4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jan 2023 12:27:07 +0000 +Subject: sched/uclamp: Fix a uninitialized variable warnings + +From: Qais Yousef + +[ Upstream commit e26fd28db82899be71b4b949527373d0a6be1e65 ] + +Addresses the following warnings: + +> config: riscv-randconfig-m031-20221111 +> compiler: riscv64-linux-gcc (GCC) 12.1.0 +> +> smatch warnings: +> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'. +> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'. + +Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()") +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 35 ++++++++++++++++------------------- + 1 file changed, 16 insertions(+), 19 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index bb04ca795fc3..2c3d0d49c80e 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7213,10 +7213,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + eenv_task_busy_time(&eenv, p, prev_cpu); + + for (; pd; pd = pd->next) { ++ unsigned long util_min = p_util_min, util_max = p_util_max; + unsigned long cpu_cap, cpu_thermal_cap, util; + unsigned long cur_delta, max_spare_cap = 0; + unsigned long rq_util_min, rq_util_max; +- unsigned long util_min, util_max; + unsigned long prev_spare_cap = 0; + int max_spare_cap_cpu = -1; + unsigned long base_energy; +@@ -7235,6 +7235,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + eenv.pd_cap = 0; + + for_each_cpu(cpu, cpus) { ++ struct rq *rq = cpu_rq(cpu); ++ + eenv.pd_cap += cpu_thermal_cap; + + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) +@@ -7253,24 +7255,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + * much capacity we can get out of the CPU; this is + * aligned with sched_cpu_util(). + */ +- if (uclamp_is_used()) { +- if (uclamp_rq_is_idle(cpu_rq(cpu))) { +- util_min = p_util_min; +- util_max = p_util_max; +- } else { +- /* +- * Open code uclamp_rq_util_with() except for +- * the clamp() part. Ie: apply max aggregation +- * only. util_fits_cpu() logic requires to +- * operate on non clamped util but must use the +- * max-aggregated uclamp_{min, max}. +- */ +- rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); +- rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); +- +- util_min = max(rq_util_min, p_util_min); +- util_max = max(rq_util_max, p_util_max); +- } ++ if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { ++ /* ++ * Open code uclamp_rq_util_with() except for ++ * the clamp() part. Ie: apply max aggregation ++ * only. util_fits_cpu() logic requires to ++ * operate on non clamped util but must use the ++ * max-aggregated uclamp_{min, max}. ++ */ ++ rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); ++ rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); ++ ++ util_min = max(rq_util_min, p_util_min); ++ util_max = max(rq_util_max, p_util_max); + } + if (!util_fits_cpu(util, util_min, util_max, cpu)) + continue; +-- +2.39.0 + diff --git a/queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch b/queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch new file mode 100644 index 00000000000..32562d42e97 --- /dev/null +++ b/queue-6.1/scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch @@ -0,0 +1,40 @@ +From fdc9a2376321ace7faf4c7383591f9f52ac61231 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Jan 2023 06:12:55 +0300 +Subject: scsi: hpsa: Fix allocation size for scsi_host_alloc() + +From: Alexey V. Vissarionov + +[ Upstream commit bbbd25499100c810ceaf5193c3cfcab9f7402a33 ] + +The 'h' is a pointer to struct ctlr_info, so it's just 4 or 8 bytes, while +the structure itself is much bigger. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: edd163687ea5 ("hpsa: add driver for HP Smart Array controllers.") +Link: https://lore.kernel.org/r/20230118031255.GE15213@altlinux.org +Signed-off-by: Alexey V. Vissarionov +Acked-by: Don Brace +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/hpsa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index 4dbf51e2623a..f6da34850af9 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -5850,7 +5850,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) + { + struct Scsi_Host *sh; + +- sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h)); ++ sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info)); + if (sh == NULL) { + dev_err(&h->pdev->dev, "scsi_host_alloc failed\n"); + return -ENOMEM; +-- +2.39.0 + diff --git a/queue-6.1/series b/queue-6.1/series index 293acb51827..4deec8c46a1 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -201,3 +201,9 @@ drm-amdgpu-complete-gfxoff-allow-signal-during-suspend-without-delay.patch io_uring-msg_ring-fix-remote-queue-to-disabled-ring.patch wifi-mac80211-proper-mark-itxqs-for-resumption.patch wifi-mac80211-fix-itxq-ampdu-fragmentation-handling.patch +sched-fair-check-if-prev_cpu-has-highest-spare-cap-i.patch +sched-uclamp-fix-a-uninitialized-variable-warnings.patch +vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch +scsi-hpsa-fix-allocation-size-for-scsi_host_alloc.patch +kvm-vfio-fix-potential-deadlock-on-vfio-group_lock.patch +nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch diff --git a/queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch b/queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch new file mode 100644 index 00000000000..46102b36499 --- /dev/null +++ b/queue-6.1/vfio-type1-respect-iommu-reserved-regions-in-vfio_te.patch @@ -0,0 +1,103 @@ +From f0fa37772ed7be147154796fd36d4ba1bcf23ff4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Jan 2023 17:44:27 +0100 +Subject: vfio/type1: Respect IOMMU reserved regions in vfio_test_domain_fgsp() + +From: Niklas Schnelle + +[ Upstream commit 895c0747f726bb50c9b7a805613a61d1b6f9fa06 ] + +Since commit cbf7827bc5dc ("iommu/s390: Fix potential s390_domain +aperture shrinking") the s390 IOMMU driver uses reserved regions for the +system provided DMA ranges of PCI devices. Previously it reduced the +size of the IOMMU aperture and checked it on each mapping operation. +On current machines the system denies use of DMA addresses below 2^32 for +all PCI devices. + +Usually mapping IOVAs in a reserved regions is harmless until a DMA +actually tries to utilize the mapping. However on s390 there is +a virtual PCI device called ISM which is implemented in firmware and +used for cross LPAR communication. Unlike real PCI devices this device +does not use the hardware IOMMU but inspects IOMMU translation tables +directly on IOTLB flush (s390 RPCIT instruction). If it detects IOVA +mappings outside the allowed ranges it goes into an error state. This +error state then causes the device to be unavailable to the KVM guest. + +Analysing this we found that vfio_test_domain_fgsp() maps 2 pages at DMA +address 0 irrespective of the IOMMUs reserved regions. Even if usually +harmless this seems wrong in the general case so instead go through the +freshly updated IOVA list and try to find a range that isn't reserved, +and fits 2 pages, is PAGE_SIZE * 2 aligned. If found use that for +testing for fine grained super pages. + +Fixes: af029169b8fd ("vfio/type1: Check reserved region conflict and update iova list") +Signed-off-by: Niklas Schnelle +Reviewed-by: Matthew Rosato +Reviewed-by: Jason Gunthorpe +Link: https://lore.kernel.org/r/20230110164427.4051938-2-schnelle@linux.ibm.com +Signed-off-by: Alex Williamson +Signed-off-by: Sasha Levin +--- + drivers/vfio/vfio_iommu_type1.c | 31 ++++++++++++++++++++----------- + 1 file changed, 20 insertions(+), 11 deletions(-) + +diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c +index 23c24fe98c00..2209372f236d 100644 +--- a/drivers/vfio/vfio_iommu_type1.c ++++ b/drivers/vfio/vfio_iommu_type1.c +@@ -1856,24 +1856,33 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, + * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when + * hugetlbfs is in use. + */ +-static void vfio_test_domain_fgsp(struct vfio_domain *domain) ++static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions) + { +- struct page *pages; + int ret, order = get_order(PAGE_SIZE * 2); ++ struct vfio_iova *region; ++ struct page *pages; ++ dma_addr_t start; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!pages) + return; + +- ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, +- IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); +- if (!ret) { +- size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); ++ list_for_each_entry(region, regions, list) { ++ start = ALIGN(region->start, PAGE_SIZE * 2); ++ if (start >= region->end || (region->end - start < PAGE_SIZE * 2)) ++ continue; + +- if (unmapped == PAGE_SIZE) +- iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE); +- else +- domain->fgsp = true; ++ ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, ++ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); ++ if (!ret) { ++ size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); ++ ++ if (unmapped == PAGE_SIZE) ++ iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE); ++ else ++ domain->fgsp = true; ++ } ++ break; + } + + __free_pages(pages, order); +@@ -2326,7 +2335,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, + } + } + +- vfio_test_domain_fgsp(domain); ++ vfio_test_domain_fgsp(domain, &iova_copy); + + /* replay mappings on new domains */ + ret = vfio_iommu_replay(iommu, domain); +-- +2.39.0 +