From: Greg Kroah-Hartman Date: Wed, 17 May 2017 08:35:20 +0000 (+0200) Subject: 4.10-stable patches X-Git-Tag: v3.18.54~30 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a2de01987552528aa5876485cc71e4413dac2450;p=thirdparty%2Fkernel%2Fstable-queue.git 4.10-stable patches added patches: vfio-type1-remove-locked-page-accounting-workqueue.patch --- diff --git a/queue-4.10/series b/queue-4.10/series index 973c840f761..11dcb1b9356 100644 --- a/queue-4.10/series +++ b/queue-4.10/series @@ -38,3 +38,4 @@ dm-crypt-rewrite-wipe-key-in-crypto-layer-using-random-data.patch dm-era-save-spacemap-metadata-root-after-the-pre-commit.patch dm-rq-check-blk_mq_register_dev-return-value-in-dm_mq_init_request_queue.patch dm-thin-fix-a-memory-leak-when-passing-discard-bio-down.patch +vfio-type1-remove-locked-page-accounting-workqueue.patch diff --git a/queue-4.10/vfio-type1-remove-locked-page-accounting-workqueue.patch b/queue-4.10/vfio-type1-remove-locked-page-accounting-workqueue.patch new file mode 100644 index 00000000000..ba67099ff7e --- /dev/null +++ b/queue-4.10/vfio-type1-remove-locked-page-accounting-workqueue.patch @@ -0,0 +1,232 @@ +From 0cfef2b7410b64d7a430947e0b533314c4f97153 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Thu, 13 Apr 2017 14:10:15 -0600 +Subject: vfio/type1: Remove locked page accounting workqueue + +From: Alex Williamson + +commit 0cfef2b7410b64d7a430947e0b533314c4f97153 upstream. + +If the mmap_sem is contented then the vfio type1 IOMMU backend will +defer locked page accounting updates to a workqueue task. This has a +few problems and depending on which side the user tries to play, they +might be over-penalized for unmaps that haven't yet been accounted or +race the workqueue to enter more mappings than they're allowed. The +original intent of this workqueue mechanism seems to be focused on +reducing latency through the ioctl, but we cannot do so at the cost +of correctness. Remove this workqueue mechanism and update the +callers to allow for failure. We can also now recheck the limit under +write lock to make sure we don't exceed it. + +vfio_pin_pages_remote() also now necessarily includes an unwind path +which we can jump to directly if the consecutive page pinning finds +that we're exceeding the user's memory limits. This avoids the +current lazy approach which does accounting and mapping up to the +fault, only to return an error on the next iteration to unwind the +entire vfio_dma. + +Reviewed-by: Peter Xu +Reviewed-by: Kirti Wankhede +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/vfio/vfio_iommu_type1.c | 110 ++++++++++++++++++---------------------- + 1 file changed, 51 insertions(+), 59 deletions(-) + +--- a/drivers/vfio/vfio_iommu_type1.c ++++ b/drivers/vfio/vfio_iommu_type1.c +@@ -243,69 +243,46 @@ static int vfio_iova_put_vfio_pfn(struct + return ret; + } + +-struct vwork { +- struct mm_struct *mm; +- long npage; +- struct work_struct work; +-}; +- +-/* delayed decrement/increment for locked_vm */ +-static void vfio_lock_acct_bg(struct work_struct *work) +-{ +- struct vwork *vwork = container_of(work, struct vwork, work); +- struct mm_struct *mm; +- +- mm = vwork->mm; +- down_write(&mm->mmap_sem); +- mm->locked_vm += vwork->npage; +- up_write(&mm->mmap_sem); +- mmput(mm); +- kfree(vwork); +-} +- +-static void vfio_lock_acct(struct task_struct *task, long npage) ++static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) + { +- struct vwork *vwork; + struct mm_struct *mm; + bool is_current; ++ int ret; + + if (!npage) +- return; ++ return 0; + + is_current = (task->mm == current->mm); + + mm = is_current ? task->mm : get_task_mm(task); + if (!mm) +- return; /* process exited */ ++ return -ESRCH; /* process exited */ + +- if (down_write_trylock(&mm->mmap_sem)) { +- mm->locked_vm += npage; +- up_write(&mm->mmap_sem); +- if (!is_current) +- mmput(mm); +- return; +- } ++ ret = down_write_killable(&mm->mmap_sem); ++ if (!ret) { ++ if (npage > 0) { ++ if (lock_cap ? !*lock_cap : ++ !has_capability(task, CAP_IPC_LOCK)) { ++ unsigned long limit; ++ ++ limit = task_rlimit(task, ++ RLIMIT_MEMLOCK) >> PAGE_SHIFT; + +- if (is_current) { +- mm = get_task_mm(task); +- if (!mm) +- return; ++ if (mm->locked_vm + npage > limit) ++ ret = -ENOMEM; ++ } ++ } ++ ++ if (!ret) ++ mm->locked_vm += npage; ++ ++ up_write(&mm->mmap_sem); + } + +- /* +- * Couldn't get mmap_sem lock, so must setup to update +- * mm->locked_vm later. If locked_vm were atomic, we +- * wouldn't need this silliness +- */ +- vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); +- if (WARN_ON(!vwork)) { ++ if (!is_current) + mmput(mm); +- return; +- } +- INIT_WORK(&vwork->work, vfio_lock_acct_bg); +- vwork->mm = mm; +- vwork->npage = npage; +- schedule_work(&vwork->work); ++ ++ return ret; + } + + /* +@@ -402,7 +379,7 @@ static int vaddr_get_pfn(struct mm_struc + static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, + long npage, unsigned long *pfn_base) + { +- unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++ unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); + long ret, pinned = 0, lock_acct = 0; + bool rsvd; +@@ -439,8 +416,6 @@ static long vfio_pin_pages_remote(struct + /* Lock all the consecutive pages from pfn_base */ + for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; + pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { +- unsigned long pfn = 0; +- + ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + if (ret) + break; +@@ -457,14 +432,25 @@ static long vfio_pin_pages_remote(struct + put_pfn(pfn, dma->prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); +- break; ++ ret = -ENOMEM; ++ goto unpin_out; + } + lock_acct++; + } + } + + out: +- vfio_lock_acct(current, lock_acct); ++ ret = vfio_lock_acct(current, lock_acct, &lock_cap); ++ ++unpin_out: ++ if (ret) { ++ if (!rsvd) { ++ for (pfn = *pfn_base ; pinned ; pfn++, pinned--) ++ put_pfn(pfn, dma->prot); ++ } ++ ++ return ret; ++ } + + return pinned; + } +@@ -485,7 +471,7 @@ static long vfio_unpin_pages_remote(stru + } + + if (do_accounting) +- vfio_lock_acct(dma->task, locked - unlocked); ++ vfio_lock_acct(dma->task, locked - unlocked, NULL); + + return unlocked; + } +@@ -519,8 +505,14 @@ static int vfio_pin_page_external(struct + goto pin_page_exit; + } + +- if (!rsvd && do_accounting) +- vfio_lock_acct(dma->task, 1); ++ if (!rsvd && do_accounting) { ++ ret = vfio_lock_acct(dma->task, 1, &lock_cap); ++ if (ret) { ++ put_pfn(*pfn_base, dma->prot); ++ goto pin_page_exit; ++ } ++ } ++ + ret = 1; + + pin_page_exit: +@@ -540,7 +532,7 @@ static int vfio_unpin_page_external(stru + unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); + + if (do_accounting) +- vfio_lock_acct(dma->task, -unlocked); ++ vfio_lock_acct(dma->task, -unlocked, NULL); + + return unlocked; + } +@@ -737,7 +729,7 @@ static long vfio_unmap_unpin(struct vfio + + dma->iommu_mapped = false; + if (do_accounting) { +- vfio_lock_acct(dma->task, -unlocked); ++ vfio_lock_acct(dma->task, -unlocked, NULL); + return 0; + } + return unlocked; +@@ -1346,7 +1338,7 @@ static void vfio_iommu_unmap_unpin_reacc + if (!is_invalid_reserved_pfn(vpfn->pfn)) + locked++; + } +- vfio_lock_acct(dma->task, locked - unlocked); ++ vfio_lock_acct(dma->task, locked - unlocked, NULL); + } + } +