]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/hmm: let users to tag specific PFN with DMA mapped bit
authorLeon Romanovsky <leonro@nvidia.com>
Mon, 28 Apr 2025 09:22:16 +0000 (12:22 +0300)
committerLeon Romanovsky <leon@kernel.org>
Mon, 12 May 2025 10:06:37 +0000 (06:06 -0400)
Introduce new sticky flag (HMM_PFN_DMA_MAPPED), which isn't overwritten
by HMM range fault. Such flag allows users to tag specific PFNs with
information if this specific PFN was already DMA mapped.

Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
include/linux/hmm.h
mm/hmm.c

index 126a365716676a0eecb4b1e6cb97f7f8df1bce9a..a43e56f273a110e1161d14355b5c5e3c9cb045ab 100644 (file)
@@ -23,6 +23,8 @@ struct mmu_interval_notifier;
  * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
  * HMM_PFN_ERROR - accessing the pfn is impossible and the device should
  *                 fail. ie poisoned memory, special pages, no vma, etc
+ * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation
+ *                      to mark that page is already DMA mapped
  *
  * On input:
  * 0                 - Return the current state of the page, do not fault it.
@@ -36,13 +38,19 @@ enum hmm_pfn_flags {
        HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
        HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
        HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
-       HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8),
+       /*
+        * Sticky flags, carried from input to output,
+        * don't forget to update HMM_PFN_INOUT_FLAGS
+        */
+       HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4),
+
+       HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9),
 
        /* Input flags */
        HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
        HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
 
-       HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT,
+       HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1),
 };
 
 /*
@@ -57,6 +65,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
        return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
 }
 
+/*
+ * hmm_pfn_to_phys() - return physical address pointed to by a device entry
+ */
+static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn)
+{
+       return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS);
+}
+
 /*
  * hmm_pfn_to_map_order() - return the CPU mapping size order
  *
index 082f7b7c0b9ebc40fd4cc1867ca55699348d46f8..51fe8b011cc7054d356580258b7f493075e34db1 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -39,13 +39,20 @@ enum {
        HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
 };
 
+enum {
+       /* These flags are carried from input-to-output */
+       HMM_PFN_INOUT_FLAGS = HMM_PFN_DMA_MAPPED,
+};
+
 static int hmm_pfns_fill(unsigned long addr, unsigned long end,
                         struct hmm_range *range, unsigned long cpu_flags)
 {
        unsigned long i = (addr - range->start) >> PAGE_SHIFT;
 
-       for (; addr < end; addr += PAGE_SIZE, i++)
-               range->hmm_pfns[i] = cpu_flags;
+       for (; addr < end; addr += PAGE_SIZE, i++) {
+               range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+               range->hmm_pfns[i] |= cpu_flags;
+       }
        return 0;
 }
 
@@ -202,8 +209,10 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
                return hmm_vma_fault(addr, end, required_fault, walk);
 
        pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-       for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
-               hmm_pfns[i] = pfn | cpu_flags;
+       for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
+               hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+               hmm_pfns[i] |= pfn | cpu_flags;
+       }
        return 0;
 }
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -230,14 +239,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
        unsigned long cpu_flags;
        pte_t pte = ptep_get(ptep);
        uint64_t pfn_req_flags = *hmm_pfn;
+       uint64_t new_pfn_flags = 0;
 
        if (pte_none_mostly(pte)) {
                required_fault =
                        hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
                if (required_fault)
                        goto fault;
-               *hmm_pfn = 0;
-               return 0;
+               goto out;
        }
 
        if (!pte_present(pte)) {
@@ -253,16 +262,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                        cpu_flags = HMM_PFN_VALID;
                        if (is_writable_device_private_entry(entry))
                                cpu_flags |= HMM_PFN_WRITE;
-                       *hmm_pfn = swp_offset_pfn(entry) | cpu_flags;
-                       return 0;
+                       new_pfn_flags = swp_offset_pfn(entry) | cpu_flags;
+                       goto out;
                }
 
                required_fault =
                        hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
-               if (!required_fault) {
-                       *hmm_pfn = 0;
-                       return 0;
-               }
+               if (!required_fault)
+                       goto out;
 
                if (!non_swap_entry(entry))
                        goto fault;
@@ -304,11 +311,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                        pte_unmap(ptep);
                        return -EFAULT;
                }
-               *hmm_pfn = HMM_PFN_ERROR;
-               return 0;
+               new_pfn_flags = HMM_PFN_ERROR;
+               goto out;
        }
 
-       *hmm_pfn = pte_pfn(pte) | cpu_flags;
+       new_pfn_flags = pte_pfn(pte) | cpu_flags;
+out:
+       *hmm_pfn = (*hmm_pfn & HMM_PFN_INOUT_FLAGS) | new_pfn_flags;
        return 0;
 
 fault:
@@ -448,8 +457,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
                }
 
                pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-               for (i = 0; i < npages; ++i, ++pfn)
-                       hmm_pfns[i] = pfn | cpu_flags;
+               for (i = 0; i < npages; ++i, ++pfn) {
+                       hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+                       hmm_pfns[i] |= pfn | cpu_flags;
+               }
                goto out_unlock;
        }
 
@@ -507,8 +518,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
        }
 
        pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
-       for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-               range->hmm_pfns[i] = pfn | cpu_flags;
+       for (; addr < end; addr += PAGE_SIZE, i++, pfn++) {
+               range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+               range->hmm_pfns[i] |= pfn | cpu_flags;
+       }
 
        spin_unlock(ptl);
        return 0;