]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
iommufd: Add IOMMU_IOAS_CHANGE_PROCESS
authorSteve Sistare <steven.sistare@oracle.com>
Wed, 13 Nov 2024 19:51:36 +0000 (11:51 -0800)
committerJason Gunthorpe <jgg@nvidia.com>
Thu, 14 Nov 2024 16:57:13 +0000 (12:57 -0400)
Add an ioctl that updates all DMA mappings to reflect the current process,
Change the mm and transfer locked memory accounting from old to current mm.
This will be used for live update, allowing an old process to hand the
iommufd device descriptor to a new process.  The new process calls the
ioctl.

IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with
IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does
not depend on the userland VA of the pages (which is different in the new
process).
IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present.

This is a revised version of code originally provided by Jason.

Link: https://patch.msgid.link/r/1731527497-16091-4-git-send-email-steven.sistare@oracle.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/iommu/iommufd/io_pagetable.h
drivers/iommu/iommufd/ioas.c
drivers/iommu/iommufd/iommufd_private.h
drivers/iommu/iommufd/main.c
include/uapi/linux/iommufd.h

index f5f20fa639ef203b2bc8bc013c9ed4b66e1b30b1..10c928a9a4633254c4cabecb7626df0c3383a56c 100644 (file)
@@ -173,6 +173,7 @@ enum {
        IOPT_PAGES_ACCOUNT_NONE = 0,
        IOPT_PAGES_ACCOUNT_USER = 1,
        IOPT_PAGES_ACCOUNT_MM = 2,
+       IOPT_PAGES_ACCOUNT_MODE_NUM = 3,
 };
 
 enum iopt_address_type {
index c82ed5a92e3bc9f770aa30c9c1aeccf8e1c19cb6..1542c5fd10a85cac4e20d37cc0f3fa1f904e0dd6 100644 (file)
@@ -439,6 +439,153 @@ static int iommufd_take_all_iova_rwsem(struct iommufd_ctx *ictx,
        return 0;
 }
 
+static bool need_charge_update(struct iopt_pages *pages)
+{
+       switch (pages->account_mode) {
+       case IOPT_PAGES_ACCOUNT_NONE:
+               return false;
+       case IOPT_PAGES_ACCOUNT_MM:
+               return pages->source_mm != current->mm;
+       case IOPT_PAGES_ACCOUNT_USER:
+               /*
+                * Update when mm changes because it also accounts
+                * in mm->pinned_vm.
+                */
+               return (pages->source_user != current_user()) ||
+                      (pages->source_mm != current->mm);
+       }
+       return true;
+}
+
+static int charge_current(unsigned long *npinned)
+{
+       struct iopt_pages tmp = {
+               .source_mm = current->mm,
+               .source_task = current->group_leader,
+               .source_user = current_user(),
+       };
+       unsigned int account_mode;
+       int rc;
+
+       for (account_mode = 0; account_mode != IOPT_PAGES_ACCOUNT_MODE_NUM;
+            account_mode++) {
+               if (!npinned[account_mode])
+                       continue;
+
+               tmp.account_mode = account_mode;
+               rc = iopt_pages_update_pinned(&tmp, npinned[account_mode], true,
+                                             NULL);
+               if (rc)
+                       goto err_undo;
+       }
+       return 0;
+
+err_undo:
+       while (account_mode != 0) {
+               account_mode--;
+               if (!npinned[account_mode])
+                       continue;
+               tmp.account_mode = account_mode;
+               iopt_pages_update_pinned(&tmp, npinned[account_mode], false,
+                                        NULL);
+       }
+       return rc;
+}
+
+static void change_mm(struct iopt_pages *pages)
+{
+       struct task_struct *old_task = pages->source_task;
+       struct user_struct *old_user = pages->source_user;
+       struct mm_struct *old_mm = pages->source_mm;
+
+       pages->source_mm = current->mm;
+       mmgrab(pages->source_mm);
+       mmdrop(old_mm);
+
+       pages->source_task = current->group_leader;
+       get_task_struct(pages->source_task);
+       put_task_struct(old_task);
+
+       pages->source_user = get_uid(current_user());
+       free_uid(old_user);
+}
+
+#define for_each_ioas_area(_xa, _index, _ioas, _area) \
+       xa_for_each((_xa), (_index), (_ioas)) \
+               for (_area = iopt_area_iter_first(&_ioas->iopt, 0, ULONG_MAX); \
+                    _area; \
+                    _area = iopt_area_iter_next(_area, 0, ULONG_MAX))
+
+int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd)
+{
+       struct iommu_ioas_change_process *cmd = ucmd->cmd;
+       struct iommufd_ctx *ictx = ucmd->ictx;
+       unsigned long all_npinned[IOPT_PAGES_ACCOUNT_MODE_NUM] = {};
+       struct iommufd_ioas *ioas;
+       struct iopt_area *area;
+       struct iopt_pages *pages;
+       struct xarray ioas_list;
+       unsigned long index;
+       int rc;
+
+       if (cmd->__reserved)
+               return -EOPNOTSUPP;
+
+       xa_init(&ioas_list);
+       rc = iommufd_take_all_iova_rwsem(ictx, &ioas_list);
+       if (rc)
+               return rc;
+
+       for_each_ioas_area(&ioas_list, index, ioas, area)  {
+               if (area->pages->type != IOPT_ADDRESS_FILE) {
+                       rc = -EINVAL;
+                       goto out;
+               }
+       }
+
+       /*
+        * Count last_pinned pages, then clear it to avoid double counting
+        * if the same iopt_pages is visited multiple times in this loop.
+        * Since we are under all the locks, npinned == last_npinned, so we
+        * can easily restore last_npinned before we return.
+        */
+       for_each_ioas_area(&ioas_list, index, ioas, area)  {
+               pages = area->pages;
+
+               if (need_charge_update(pages)) {
+                       all_npinned[pages->account_mode] += pages->last_npinned;
+                       pages->last_npinned = 0;
+               }
+       }
+
+       rc = charge_current(all_npinned);
+
+       if (rc) {
+               /* Charge failed.  Fix last_npinned and bail. */
+               for_each_ioas_area(&ioas_list, index, ioas, area)
+                       area->pages->last_npinned = area->pages->npinned;
+               goto out;
+       }
+
+       for_each_ioas_area(&ioas_list, index, ioas, area) {
+               pages = area->pages;
+
+               /* Uncharge the old one (which also restores last_npinned) */
+               if (need_charge_update(pages)) {
+                       int r = iopt_pages_update_pinned(pages, pages->npinned,
+                                                        false, NULL);
+
+                       if (WARN_ON(r))
+                               rc = r;
+               }
+               change_mm(pages);
+       }
+
+out:
+       iommufd_release_all_iova_rwsem(ictx, &ioas_list);
+       return rc;
+}
+
 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
                               struct iommufd_ctx *ictx)
 {
index 57c0c8f0f6a5dfee02fef63ab54e630dcc3dccdc..b6d706cf2c66fb59273c14acd89bf8cffade7be0 100644 (file)
@@ -255,6 +255,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
index 13ac2286035eb74da3ec567ff3657966a1249a8d..0a96cc8f27dacd6a76f0ad7f0565c37c53e64029 100644 (file)
@@ -349,6 +349,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
                 struct iommu_ioas_alloc, out_ioas_id),
        IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
                 struct iommu_ioas_allow_iovas, allowed_iovas),
+       IOCTL_OP(IOMMU_IOAS_CHANGE_PROCESS, iommufd_ioas_change_process,
+                struct iommu_ioas_change_process, __reserved),
        IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy,
                 src_iova),
        IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,
index 747d3d9baa3dda1ca75ce5700866c705822f8757..4ae8b1ee0444182179066b94237a029a4b3ac4b9 100644 (file)
@@ -54,6 +54,7 @@ enum {
        IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
        IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
        IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
+       IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
 };
 
 /**
@@ -972,4 +973,26 @@ struct iommu_vdevice_alloc {
        __aligned_u64 virt_id;
 };
 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
+
+/**
+ * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
+ * @size: sizeof(struct iommu_ioas_change_process)
+ * @__reserved: Must be 0
+ *
+ * This transfers pinned memory counts for every memory map in every IOAS
+ * in the context to the current process.  This only supports maps created
+ * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
+ * If the ioctl returns a failure status, then nothing is changed.
+ *
+ * This API is useful for transferring operation of a device from one process
+ * to another, such as during userland live update.
+ */
+struct iommu_ioas_change_process {
+       __u32 size;
+       __u32 __reserved;
+};
+
+#define IOMMU_IOAS_CHANGE_PROCESS \
+       _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
+
 #endif