]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
iommupt: Only cache flush memory changed by unmap
authorJason Gunthorpe <jgg@nvidia.com>
Sat, 24 Jan 2026 21:00:21 +0000 (17:00 -0400)
committerJoerg Roedel <joerg.roedel@amd.com>
Wed, 28 Jan 2026 14:14:17 +0000 (15:14 +0100)
The cache flush was happening on every level across the whole range of
iteration, even if no leafs or tables were cleared. Instead flush only the
sub range that was actually written.

Overflushing isn't a correctness problem but it does impact the
performance of unmap.

After this series the performance compared to the original VT-d
implementation with cache flushing turned on is:

map_pages
   pgsz  ,avg new,old ns, min new,old ns  , min % (+ve is better)
     2^12,    253,266   ,     213,227     ,   6.06
     2^21,    246,244   ,     221,219     ,   0.00
     2^30,    231,240   ,     209,217     ,   3.03
 256*2^12,   2604,2668  ,    2415,2540    ,   4.04
 256*2^21,   2495,2824  ,    2390,2734    ,  12.12
 256*2^30,   2542,2845  ,    2380,2718    ,  12.12

unmap_pages
   pgsz  ,avg new,old ns, min new,old ns  , min % (+ve is better)
     2^12,    259,292   ,     222,251     ,  11.11
     2^21,    255,259   ,     227,236     ,   3.03
     2^30,    238,254   ,     217,230     ,   5.05
 256*2^12,   2751,2620  ,    2417,2437    ,   0.00
 256*2^21,   2461,2526  ,    2377,2423    ,   1.01
 256*2^30,   2498,2543  ,    2370,2404    ,   1.01

Fixes: efa03dab7ce4 ("iommupt: Flush the CPU cache after any writes to the page table")
Reported-by: Francois Dugast <francois.dugast@intel.com>
Closes: https://lore.kernel.org/all/20260121130233.257428-1-francois.dugast@intel.com/
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
drivers/iommu/generic_pt/iommu_pt.h

index 52ef028ed2db97b7850ed44e8ede7681a3c1cb35..d575f3ba9d3416b7d0395e530b3a7496897737ca 100644 (file)
@@ -931,6 +931,8 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
                                        struct pt_table_p *table)
 {
        struct pt_state pts = pt_init(range, level, table);
+       unsigned int flush_start_index = UINT_MAX;
+       unsigned int flush_end_index = UINT_MAX;
        struct pt_unmap_args *unmap = arg;
        unsigned int num_oas = 0;
        unsigned int start_index;
@@ -986,6 +988,9 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
                                iommu_pages_list_add(&unmap->free_list,
                                                     pts.table_lower);
                                pt_clear_entries(&pts, ilog2(1));
+                               if (pts.index < flush_start_index)
+                                       flush_start_index = pts.index;
+                               flush_end_index = pts.index + 1;
                        }
                        pts.index++;
                } else {
@@ -999,7 +1004,10 @@ start_oa:
                        num_contig_lg2 = pt_entry_num_contig_lg2(&pts);
                        pt_clear_entries(&pts, num_contig_lg2);
                        num_oas += log2_to_int(num_contig_lg2);
+                       if (pts.index < flush_start_index)
+                               flush_start_index = pts.index;
                        pts.index += log2_to_int(num_contig_lg2);
+                       flush_end_index = pts.index;
                }
                if (pts.index >= pts.end_index)
                        break;
@@ -1007,7 +1015,8 @@ start_oa:
        } while (true);
 
        unmap->unmapped += log2_mul(num_oas, pt_table_item_lg2sz(&pts));
-       flush_writes_range(&pts, start_index, pts.index);
+       if (flush_start_index != flush_end_index)
+               flush_writes_range(&pts, flush_start_index, flush_end_index);
 
        return ret;
 }