--- /dev/null
+From dbe97cff7dd9f0f75c524afdd55ad46be3d15295 Mon Sep 17 00:00:00 2001
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+Date: Tue, 21 Jun 2022 22:27:26 -0400
+Subject: xen/gntdev: Avoid blocking in unmap_grant_pages()
+
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+
+commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream.
+
+unmap_grant_pages() currently waits for the pages to no longer be used.
+In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
+deadlock against i915: i915 was waiting for gntdev's MMU notifier to
+finish, while gntdev was waiting for i915 to free its pages. I also
+believe this is responsible for various deadlocks I have experienced in
+the past.
+
+Avoid these problems by making unmap_grant_pages async. This requires
+making it return void, as any errors will not be available when the
+function returns. Fortunately, the only use of the return value is a
+WARN_ON(), which can be replaced by a WARN_ON when the error is
+detected. Additionally, a failed call will not prevent further calls
+from being made, but this is harmless.
+
+Because unmap_grant_pages is now async, the grant handle will be sent to
+INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
+handle. Instead, a separate bool array is allocated for this purpose.
+This wastes memory, but stuffing this information in padding bytes is
+too fragile. Furthermore, it is necessary to grab a reference to the
+map before making the asynchronous call, and release the reference when
+the call returns.
+
+It is also necessary to guard against reentrancy in gntdev_map_put(),
+and to handle the case where userspace tries to map a mapping whose
+contents have not all been freed yet.
+
+Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
+Cc: stable@vger.kernel.org
+Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/gntdev.c | 145 ++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 103 insertions(+), 42 deletions(-)
+
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -59,6 +59,7 @@ MODULE_PARM_DESC(limit, "Maximum number
+
+ static atomic_t pages_mapped = ATOMIC_INIT(0);
+
++/* True in PV mode, false otherwise */
+ static int use_ptemod;
+ #define populate_freeable_maps use_ptemod
+
+@@ -94,11 +95,16 @@ struct grant_map {
+ struct gnttab_unmap_grant_ref *unmap_ops;
+ struct gnttab_map_grant_ref *kmap_ops;
+ struct gnttab_unmap_grant_ref *kunmap_ops;
++ bool *being_removed;
+ struct page **pages;
+ unsigned long pages_vm_start;
++ /* Number of live grants */
++ atomic_t live_grants;
++ /* Needed to avoid allocation in unmap_grant_pages */
++ struct gntab_unmap_queue_data unmap_data;
+ };
+
+-static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
++static void unmap_grant_pages(struct grant_map *map, int offset, int pages);
+
+ /* ------------------------------------------------------------------ */
+
+@@ -129,6 +135,7 @@ static void gntdev_free_map(struct grant
+ kfree(map->unmap_ops);
+ kfree(map->kmap_ops);
+ kfree(map->kunmap_ops);
++ kfree(map->being_removed);
+ kfree(map);
+ }
+
+@@ -147,12 +154,15 @@ static struct grant_map *gntdev_alloc_ma
+ add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
+ add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
++ add->being_removed =
++ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
+ if (NULL == add->grants ||
+ NULL == add->map_ops ||
+ NULL == add->unmap_ops ||
+ NULL == add->kmap_ops ||
+ NULL == add->kunmap_ops ||
+- NULL == add->pages)
++ NULL == add->pages ||
++ NULL == add->being_removed)
+ goto err;
+
+ if (gnttab_alloc_pages(count, add->pages))
+@@ -217,6 +227,35 @@ static void gntdev_put_map(struct gntdev
+ return;
+
+ atomic_sub(map->count, &pages_mapped);
++ if (map->pages && !use_ptemod) {
++ /*
++ * Increment the reference count. This ensures that the
++ * subsequent call to unmap_grant_pages() will not wind up
++ * re-entering itself. It *can* wind up calling
++ * gntdev_put_map() recursively, but such calls will be with a
++ * reference count greater than 1, so they will return before
++ * this code is reached. The recursion depth is thus limited to
++ * 1. Do NOT use refcount_inc() here, as it will detect that
++ * the reference count is zero and WARN().
++ */
++ refcount_set(&map->users, 1);
++
++ /*
++ * Unmap the grants. This may or may not be asynchronous, so it
++ * is possible that the reference count is 1 on return, but it
++ * could also be greater than 1.
++ */
++ unmap_grant_pages(map, 0, map->count);
++
++ /* Check if the memory now needs to be freed */
++ if (!refcount_dec_and_test(&map->users))
++ return;
++
++ /*
++ * All pages have been returned to the hypervisor, so free the
++ * map.
++ */
++ }
+
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+ notify_remote_via_evtchn(map->notify.event);
+@@ -274,6 +313,7 @@ static int set_grant_ptes_as_special(pte
+
+ static int map_grant_pages(struct grant_map *map)
+ {
++ size_t alloced = 0;
+ int i, err = 0;
+
+ if (!use_ptemod) {
+@@ -322,85 +362,107 @@ static int map_grant_pages(struct grant_
+ map->pages, map->count);
+
+ for (i = 0; i < map->count; i++) {
+- if (map->map_ops[i].status == GNTST_okay)
++ if (map->map_ops[i].status == GNTST_okay) {
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+- else if (!err)
++ if (!use_ptemod)
++ alloced++;
++ } else if (!err)
+ err = -EINVAL;
+
+ if (map->flags & GNTMAP_device_map)
+ map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
+
+ if (use_ptemod) {
+- if (map->kmap_ops[i].status == GNTST_okay)
++ if (map->kmap_ops[i].status == GNTST_okay) {
++ if (map->map_ops[i].status == GNTST_okay)
++ alloced++;
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+- else if (!err)
++ } else if (!err)
+ err = -EINVAL;
+ }
+ }
++ atomic_add(alloced, &map->live_grants);
+ return err;
+ }
+
+-static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
++static void __unmap_grant_pages_done(int result,
++ struct gntab_unmap_queue_data *data)
+ {
+- int i, err = 0;
+- struct gntab_unmap_queue_data unmap_data;
++ unsigned int i;
++ struct grant_map *map = data->data;
++ unsigned int offset = data->unmap_ops - map->unmap_ops;
++
++ for (i = 0; i < data->count; i++) {
++ WARN_ON(map->unmap_ops[offset+i].status);
++ pr_debug("unmap handle=%d st=%d\n",
++ map->unmap_ops[offset+i].handle,
++ map->unmap_ops[offset+i].status);
++ map->unmap_ops[offset+i].handle = -1;
++ }
++ /*
++ * Decrease the live-grant counter. This must happen after the loop to
++ * prevent premature reuse of the grants by gnttab_mmap().
++ */
++ atomic_sub(data->count, &map->live_grants);
++
++ /* Release reference taken by unmap_grant_pages */
++ gntdev_put_map(NULL, map);
++}
+
++static void __unmap_grant_pages(struct grant_map *map, int offset, int pages)
++{
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
++
+ if (pgno >= offset && pgno < offset + pages) {
+ /* No need for kmap, pages are in lowmem */
+ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
++
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ }
+
+- unmap_data.unmap_ops = map->unmap_ops + offset;
+- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+- unmap_data.pages = map->pages + offset;
+- unmap_data.count = pages;
++ map->unmap_data.unmap_ops = map->unmap_ops + offset;
++ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
++ map->unmap_data.pages = map->pages + offset;
++ map->unmap_data.count = pages;
++ map->unmap_data.done = __unmap_grant_pages_done;
++ map->unmap_data.data = map;
++ refcount_inc(&map->users); /* to keep map alive during async call below */
+
+- err = gnttab_unmap_refs_sync(&unmap_data);
+- if (err)
+- return err;
+-
+- for (i = 0; i < pages; i++) {
+- if (map->unmap_ops[offset+i].status)
+- err = -EINVAL;
+- pr_debug("unmap handle=%d st=%d\n",
+- map->unmap_ops[offset+i].handle,
+- map->unmap_ops[offset+i].status);
+- map->unmap_ops[offset+i].handle = -1;
+- }
+- return err;
++ gnttab_unmap_refs_async(&map->unmap_data);
+ }
+
+-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
++static void unmap_grant_pages(struct grant_map *map, int offset, int pages)
+ {
+- int range, err = 0;
++ int range;
++
++ if (atomic_read(&map->live_grants) == 0)
++ return; /* Nothing to do */
+
+ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
+
+ /* It is possible the requested range will have a "hole" where we
+ * already unmapped some of the grants. Only unmap valid ranges.
+ */
+- while (pages && !err) {
+- while (pages && map->unmap_ops[offset].handle == -1) {
++ while (pages) {
++ while (pages && map->being_removed[offset]) {
+ offset++;
+ pages--;
+ }
+ range = 0;
+ while (range < pages) {
+- if (map->unmap_ops[offset+range].handle == -1)
++ if (map->being_removed[offset + range])
+ break;
++ map->being_removed[offset + range] = true;
+ range++;
+ }
+- err = __unmap_grant_pages(map, offset, range);
++ if (range)
++ __unmap_grant_pages(map, offset, range);
+ offset += range;
+ pages -= range;
+ }
+-
+- return err;
+ }
+
+ /* ------------------------------------------------------------------ */
+@@ -456,7 +518,6 @@ static void unmap_if_in_range(struct gra
+ unsigned long start, unsigned long end)
+ {
+ unsigned long mstart, mend;
+- int err;
+
+ if (!map->vma)
+ return;
+@@ -470,10 +531,9 @@ static void unmap_if_in_range(struct gra
+ map->index, map->count,
+ map->vma->vm_start, map->vma->vm_end,
+ start, end, mstart, mend);
+- err = unmap_grant_pages(map,
++ unmap_grant_pages(map,
+ (mstart - map->vma->vm_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
+- WARN_ON(err);
+ }
+
+ static void mn_invl_range_start(struct mmu_notifier *mn,
+@@ -498,7 +558,6 @@ static void mn_release(struct mmu_notifi
+ {
+ struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
+ struct grant_map *map;
+- int err;
+
+ mutex_lock(&priv->lock);
+ list_for_each_entry(map, &priv->maps, next) {
+@@ -507,8 +566,7 @@ static void mn_release(struct mmu_notifi
+ pr_debug("map %d+%d (%lx %lx)\n",
+ map->index, map->count,
+ map->vma->vm_start, map->vma->vm_end);
+- err = unmap_grant_pages(map, /* offset */ 0, map->count);
+- WARN_ON(err);
++ unmap_grant_pages(map, /* offset */ 0, map->count);
+ }
+ list_for_each_entry(map, &priv->freeable_maps, next) {
+ if (!map->vma)
+@@ -516,8 +574,7 @@ static void mn_release(struct mmu_notifi
+ pr_debug("map %d+%d (%lx %lx)\n",
+ map->index, map->count,
+ map->vma->vm_start, map->vma->vm_end);
+- err = unmap_grant_pages(map, /* offset */ 0, map->count);
+- WARN_ON(err);
++ unmap_grant_pages(map, /* offset */ 0, map->count);
+ }
+ mutex_unlock(&priv->lock);
+ }
+@@ -1006,6 +1063,10 @@ static int gntdev_mmap(struct file *flip
+ goto unlock_out;
+ }
+
++ if (atomic_read(&map->live_grants)) {
++ err = -EAGAIN;
++ goto unlock_out;
++ }
+ refcount_inc(&map->users);
+
+ vma->vm_ops = &gntdev_vmops;