]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 Dec 2022 13:55:29 +0000 (14:55 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 Dec 2022 13:55:29 +0000 (14:55 +0100)
added patches:
io_uring-cmpxchg-for-poll-arm-refs-release.patch
io_uring-fix-tw-losing-poll-events.patch
io_uring-make-poll-refs-more-robust.patch
io_uring-poll-fix-poll_refs-race-with-cancelation.patch
io_uring-update-res-mask-in-io_poll_check_events.patch
kvm-x86-mmu-fix-race-condition-in-direct_page_fault.patch

queue-5.15/io_uring-cmpxchg-for-poll-arm-refs-release.patch [new file with mode: 0644]
queue-5.15/io_uring-fix-tw-losing-poll-events.patch [new file with mode: 0644]
queue-5.15/io_uring-make-poll-refs-more-robust.patch [new file with mode: 0644]
queue-5.15/io_uring-poll-fix-poll_refs-race-with-cancelation.patch [new file with mode: 0644]
queue-5.15/io_uring-update-res-mask-in-io_poll_check_events.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mmu-fix-race-condition-in-direct_page_fault.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/io_uring-cmpxchg-for-poll-arm-refs-release.patch b/queue-5.15/io_uring-cmpxchg-for-poll-arm-refs-release.patch
new file mode 100644 (file)
index 0000000..ec378ed
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Sat Dec  3 02:18:58 PM CET 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri,  2 Dec 2022 14:27:13 +0000
+Subject: io_uring: cmpxchg for poll arm refs release
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <ee156ffab31e37433493e3200e9ecb95c556e67d.1669990799.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 2f3893437a4ebf2e892ca172e9e122841319d675 ]
+
+Replace atomically substracting the ownership reference at the end of
+arming a poll with a cmpxchg. We try to release ownership by setting 0
+assuming that poll_refs didn't change while we were arming. If it did
+change, we keep the ownership and use it to queue a tw, which is fully
+capable to process all events and (even tolerates spurious wake ups).
+
+It's a bit more elegant as we reduce races b/w setting the cancellation
+flag and getting refs with this release, and with that we don't have to
+worry about any kinds of underflows. It's not the fastest path for
+polling. The performance difference b/w cmpxchg and atomic dec is
+usually negligible and it's not the fastest path.
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/0c95251624397ea6def568ff040cad2d7926fd51.1668963050.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5650,7 +5650,6 @@ static int __io_arm_poll_handler(struct
+                                struct io_poll_table *ipt, __poll_t mask)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+-      int v;
+       INIT_HLIST_NODE(&req->hash_node);
+       io_init_poll_iocb(poll, mask, io_poll_wake);
+@@ -5696,11 +5695,10 @@ static int __io_arm_poll_handler(struct
+       }
+       /*
+-       * Release ownership. If someone tried to queue a tw while it was
+-       * locked, kick it off for them.
++       * Try to release ownership. If we see a change of state, e.g.
++       * poll was waken up, queue up a tw, it'll deal with it.
+        */
+-      v = atomic_dec_return(&req->poll_refs);
+-      if (unlikely(v & IO_POLL_REF_MASK))
++      if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
+               __io_poll_execute(req, 0);
+       return 0;
+ }
diff --git a/queue-5.15/io_uring-fix-tw-losing-poll-events.patch b/queue-5.15/io_uring-fix-tw-losing-poll-events.patch
new file mode 100644 (file)
index 0000000..2139b24
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Sat Dec  3 02:18:58 PM CET 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri,  2 Dec 2022 14:27:12 +0000
+Subject: io_uring: fix tw losing poll events
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <f802e6ca1e75aca0d42a4febe8088527d9b3bd93.1669990799.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 539bcb57da2f58886d7d5c17134236b0ec9cd15d ]
+
+We may never try to process a poll wake and its mask if there was
+multiple wake ups racing for queueing up a tw. Force
+io_poll_check_events() to update the mask by vfs_poll().
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/00344d60f8b18907171178d7cf598de71d127b0b.1668710222.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5440,6 +5440,13 @@ static int io_poll_check_events(struct i
+                       return 0;
+               if (v & IO_POLL_CANCEL_FLAG)
+                       return -ECANCELED;
++              /*
++               * cqe.res contains only events of the first wake up
++               * and all others are be lost. Redo vfs_poll() to get
++               * up to date state.
++               */
++              if ((v & IO_POLL_REF_MASK) != 1)
++                      req->result = 0;
+               if (!req->result) {
+                       struct poll_table_struct pt = { ._key = poll->events };
diff --git a/queue-5.15/io_uring-make-poll-refs-more-robust.patch b/queue-5.15/io_uring-make-poll-refs-more-robust.patch
new file mode 100644 (file)
index 0000000..0ea483d
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Sat Dec  3 02:18:58 PM CET 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri,  2 Dec 2022 14:27:14 +0000
+Subject: io_uring: make poll refs more robust
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com, Lin Ma <linma@zju.edu.cn>
+Message-ID: <f305c479060d33d5653a253898ac42f4bb11d329.1669990799.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit a26a35e9019fd70bf3cf647dcfdae87abc7bacea ]
+
+poll_refs carry two functions, the first is ownership over the request.
+The second is notifying the io_poll_check_events() that there was an
+event but wake up couldn't grab the ownership, so io_poll_check_events()
+should retry.
+
+We want to make poll_refs more robust against overflows. Instead of
+always incrementing it, which covers two purposes with one atomic, check
+if poll_refs is elevated enough and if so set a retry flag without
+attempts to grab ownership. The gap between the bias check and following
+atomics may seem racy, but we don't need it to be strict. Moreover there
+might only be maximum 4 parallel updates: by the first and the second
+poll entries, __io_arm_poll_handler() and cancellation. From those four,
+only poll wake ups may be executed multiple times, but they're protected
+by a spin.
+
+Cc: stable@vger.kernel.org
+Reported-by: Lin Ma <linma@zju.edu.cn>
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/c762bc31f8683b3270f3587691348a7119ef9c9d.1668963050.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   36 +++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5322,7 +5322,29 @@ struct io_poll_table {
+ };
+ #define IO_POLL_CANCEL_FLAG   BIT(31)
+-#define IO_POLL_REF_MASK      GENMASK(30, 0)
++#define IO_POLL_RETRY_FLAG    BIT(30)
++#define IO_POLL_REF_MASK      GENMASK(29, 0)
++
++/*
++ * We usually have 1-2 refs taken, 128 is more than enough and we want to
++ * maximise the margin between this amount and the moment when it overflows.
++ */
++#define IO_POLL_REF_BIAS       128
++
++static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
++{
++      int v;
++
++      /*
++       * poll_refs are already elevated and we don't have much hope for
++       * grabbing the ownership. Instead of incrementing set a retry flag
++       * to notify the loop that there might have been some change.
++       */
++      v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
++      if (v & IO_POLL_REF_MASK)
++              return false;
++      return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
++}
+ /*
+  * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
+@@ -5332,6 +5354,8 @@ struct io_poll_table {
+  */
+ static inline bool io_poll_get_ownership(struct io_kiocb *req)
+ {
++      if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
++              return io_poll_get_ownership_slowpath(req);
+       return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
+ }
+@@ -5447,6 +5471,16 @@ static int io_poll_check_events(struct i
+                */
+               if ((v & IO_POLL_REF_MASK) != 1)
+                       req->result = 0;
++              if (v & IO_POLL_RETRY_FLAG) {
++                      req->result = 0;
++                      /*
++                       * We won't find new events that came in between
++                       * vfs_poll and the ref put unless we clear the
++                       * flag in advance.
++                       */
++                      atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
++                      v &= ~IO_POLL_RETRY_FLAG;
++              }
+               if (!req->result) {
+                       struct poll_table_struct pt = { ._key = poll->events };
diff --git a/queue-5.15/io_uring-poll-fix-poll_refs-race-with-cancelation.patch b/queue-5.15/io_uring-poll-fix-poll_refs-race-with-cancelation.patch
new file mode 100644 (file)
index 0000000..30e70e3
--- /dev/null
@@ -0,0 +1,153 @@
+From foo@baz Sat Dec  3 02:18:58 PM CET 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri,  2 Dec 2022 14:27:15 +0000
+Subject: io_uring/poll: fix poll_refs race with cancelation
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com, Lin Ma <linma@zju.edu.cn>
+Message-ID: <886a17368c8fd21e58b9d5d47ead7d1f45fe5ebf.1669990799.git.asml.silence@gmail.com>
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ upstream commit 12ad3d2d6c5b0131a6052de91360849e3e154846 ]
+
+There is an interesting race condition of poll_refs which could result
+in a NULL pointer dereference. The crash trace is like:
+
+KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
+CPU: 0 PID: 30781 Comm: syz-executor.2 Not tainted 6.0.0-g493ffd6605b2 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+1.13.0-1ubuntu1.1 04/01/2014
+RIP: 0010:io_poll_remove_entry io_uring/poll.c:154 [inline]
+RIP: 0010:io_poll_remove_entries+0x171/0x5b4 io_uring/poll.c:190
+Code: ...
+RSP: 0018:ffff88810dfefba0 EFLAGS: 00010202
+RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000040000
+RDX: ffffc900030c4000 RSI: 000000000003ffff RDI: 0000000000040000
+RBP: 0000000000000008 R08: ffffffff9764d3dd R09: fffffbfff3836781
+R10: fffffbfff3836781 R11: 0000000000000000 R12: 1ffff11003422d60
+R13: ffff88801a116b04 R14: ffff88801a116ac0 R15: dffffc0000000000
+FS:  00007f9c07497700(0000) GS:ffff88811a600000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007ffb5c00ea98 CR3: 0000000105680005 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ io_apoll_task_func+0x3f/0xa0 io_uring/poll.c:299
+ handle_tw_list io_uring/io_uring.c:1037 [inline]
+ tctx_task_work+0x37e/0x4f0 io_uring/io_uring.c:1090
+ task_work_run+0x13a/0x1b0 kernel/task_work.c:177
+ get_signal+0x2402/0x25a0 kernel/signal.c:2635
+ arch_do_signal_or_restart+0x3b/0x660 arch/x86/kernel/signal.c:869
+ exit_to_user_mode_loop kernel/entry/common.c:166 [inline]
+ exit_to_user_mode_prepare+0xc2/0x160 kernel/entry/common.c:201
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:283 [inline]
+ syscall_exit_to_user_mode+0x58/0x160 kernel/entry/common.c:294
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+The root cause for this is a tiny overlooking in
+io_poll_check_events() when cocurrently run with poll cancel routine
+io_poll_cancel_req().
+
+The interleaving to trigger use-after-free:
+
+CPU0                                       |  CPU1
+                                           |
+io_apoll_task_func()                       |  io_poll_cancel_req()
+ io_poll_check_events()                    |
+  // do while first loop                   |
+  v = atomic_read(...)                     |
+  // v = poll_refs = 1                     |
+  ...                                      |  io_poll_mark_cancelled()
+                                           |   atomic_or()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |
+  atomic_sub_return(...)                   |
+  // poll_refs = IO_POLL_CANCEL_FLAG       |
+  // loop continue                         |
+                                           |
+                                           |  io_poll_execute()
+                                           |   io_poll_get_ownership()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |   // gets the ownership
+  v = atomic_read(...)                     |
+  // poll_refs not change                  |
+                                           |
+  if (v & IO_POLL_CANCEL_FLAG)             |
+   return -ECANCELED;                      |
+  // io_poll_check_events return           |
+  // will go into                          |
+  // io_req_complete_failed() free req     |
+                                           |
+                                           |  io_apoll_task_func()
+                                           |  // also go into
+io_req_complete_failed()
+
+And the interleaving to trigger the kernel WARNING:
+
+CPU0                                       |  CPU1
+                                           |
+io_apoll_task_func()                       |  io_poll_cancel_req()
+ io_poll_check_events()                    |
+  // do while first loop                   |
+  v = atomic_read(...)                     |
+  // v = poll_refs = 1                     |
+  ...                                      |  io_poll_mark_cancelled()
+                                           |   atomic_or()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |
+  atomic_sub_return(...)                   |
+  // poll_refs = IO_POLL_CANCEL_FLAG       |
+  // loop continue                         |
+                                           |
+  v = atomic_read(...)                     |
+  // v = IO_POLL_CANCEL_FLAG               |
+                                           |  io_poll_execute()
+                                           |   io_poll_get_ownership()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |   // gets the ownership
+                                           |
+  WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))   |
+  // v & IO_POLL_REF_MASK = 0 WARN         |
+                                           |
+                                           |  io_apoll_task_func()
+                                           |  // also go into
+io_req_complete_failed()
+
+By looking up the source code and communicating with Pavel, the
+implementation of this atomic poll refs should continue the loop of
+io_poll_check_events() just to avoid somewhere else to grab the
+ownership. Therefore, this patch simply adds another AND operation to
+make sure the loop will stop if it finds the poll_refs is exactly equal
+to IO_POLL_CANCEL_FLAG. Since io_poll_cancel_req() grabs ownership and
+will finally make its way to io_req_complete_failed(), the req will
+be reclaimed as expected.
+
+Fixes: aa43477b0402 ("io_uring: poll rework")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+[axboe: tweak description and code style]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5512,7 +5512,8 @@ static int io_poll_check_events(struct i
+                * Release all references, retry if someone tried to restart
+                * task_work while we were executing it.
+                */
+-      } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
++      } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
++                                      IO_POLL_REF_MASK);
+       return 1;
+ }
diff --git a/queue-5.15/io_uring-update-res-mask-in-io_poll_check_events.patch b/queue-5.15/io_uring-update-res-mask-in-io_poll_check_events.patch
new file mode 100644 (file)
index 0000000..1deae72
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Sat Dec  3 02:18:58 PM CET 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri,  2 Dec 2022 14:27:11 +0000
+Subject: io_uring: update res mask in io_poll_check_events
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <df5d7849a63502012196a9a5f78f7d46626b846d.1669990799.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit b98186aee22fa593bc8c6b2c5d839c2ee518bc8c ]
+
+When io_poll_check_events() collides with someone attempting to queue a
+task work, it'll spin for one more time. However, it'll continue to use
+the mask from the first iteration instead of updating it. For example,
+if the first wake up was a EPOLLIN and the second EPOLLOUT, the
+userspace will not get EPOLLOUT in time.
+
+Clear the mask for all subsequent iterations to force vfs_poll().
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/2dac97e8f691231049cb259c4ae57e79e40b537c.1668710222.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5464,6 +5464,9 @@ static int io_poll_check_events(struct i
+                       return 0;
+               }
++              /* force the next iteration to vfs_poll() */
++              req->result = 0;
++
+               /*
+                * Release all references, retry if someone tried to restart
+                * task_work while we were executing it.
diff --git a/queue-5.15/kvm-x86-mmu-fix-race-condition-in-direct_page_fault.patch b/queue-5.15/kvm-x86-mmu-fix-race-condition-in-direct_page_fault.patch
new file mode 100644 (file)
index 0000000..3778421
--- /dev/null
@@ -0,0 +1,103 @@
+From 47b0c2e4c220f2251fd8dcfbb44479819c715e15 Mon Sep 17 00:00:00 2001
+From: Kazuki Takiguchi <takiguchi.kazuki171@gmail.com>
+Date: Wed, 23 Nov 2022 14:36:00 -0500
+Subject: KVM: x86/mmu: Fix race condition in direct_page_fault
+
+From: Kazuki Takiguchi <takiguchi.kazuki171@gmail.com>
+
+commit 47b0c2e4c220f2251fd8dcfbb44479819c715e15 upstream.
+
+make_mmu_pages_available() must be called with mmu_lock held for write.
+However, if the TDP MMU is used, it will be called with mmu_lock held for
+read.
+This function does nothing unless shadow pages are used, so there is no
+race unless nested TDP is used.
+Since nested TDP uses shadow pages, old shadow pages may be zapped by this
+function even when the TDP MMU is enabled.
+Since shadow pages are never allocated by kvm_tdp_mmu_map(), a race
+condition can be avoided by not calling make_mmu_pages_available() if the
+TDP MMU is currently in use.
+
+I encountered this when repeatedly starting and stopping nested VM.
+It can be artificially caused by allocating a large number of nested TDP
+SPTEs.
+
+For example, the following BUG and general protection fault are caused in
+the host kernel.
+
+pte_list_remove: 00000000cd54fc10 many->many
+------------[ cut here ]------------
+kernel BUG at arch/x86/kvm/mmu/mmu.c:963!
+invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+RIP: 0010:pte_list_remove.cold+0x16/0x48 [kvm]
+Call Trace:
+ <TASK>
+ drop_spte+0xe0/0x180 [kvm]
+ mmu_page_zap_pte+0x4f/0x140 [kvm]
+ __kvm_mmu_prepare_zap_page+0x62/0x3e0 [kvm]
+ kvm_mmu_zap_oldest_mmu_pages+0x7d/0xf0 [kvm]
+ direct_page_fault+0x3cb/0x9b0 [kvm]
+ kvm_tdp_page_fault+0x2c/0xa0 [kvm]
+ kvm_mmu_page_fault+0x207/0x930 [kvm]
+ npf_interception+0x47/0xb0 [kvm_amd]
+ svm_invoke_exit_handler+0x13c/0x1a0 [kvm_amd]
+ svm_handle_exit+0xfc/0x2c0 [kvm_amd]
+ kvm_arch_vcpu_ioctl_run+0xa79/0x1780 [kvm]
+ kvm_vcpu_ioctl+0x29b/0x6f0 [kvm]
+ __x64_sys_ioctl+0x95/0xd0
+ do_syscall_64+0x5c/0x90
+
+general protection fault, probably for non-canonical address
+0xdead000000000122: 0000 [#1] PREEMPT SMP NOPTI
+RIP: 0010:kvm_mmu_commit_zap_page.part.0+0x4b/0xe0 [kvm]
+Call Trace:
+ <TASK>
+ kvm_mmu_zap_oldest_mmu_pages+0xae/0xf0 [kvm]
+ direct_page_fault+0x3cb/0x9b0 [kvm]
+ kvm_tdp_page_fault+0x2c/0xa0 [kvm]
+ kvm_mmu_page_fault+0x207/0x930 [kvm]
+ npf_interception+0x47/0xb0 [kvm_amd]
+
+CVE: CVE-2022-45869
+Fixes: a2855afc7ee8 ("KVM: x86/mmu: Allow parallel page faults for the TDP MMU")
+Signed-off-by: Kazuki Takiguchi <takiguchi.kazuki171@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2357,6 +2357,7 @@ static bool __kvm_mmu_prepare_zap_page(s
+ {
+       bool list_unstable;
++      lockdep_assert_held_write(&kvm->mmu_lock);
+       trace_kvm_mmu_prepare_zap_page(sp);
+       ++kvm->stat.mmu_shadow_zapped;
+       *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
+@@ -4007,16 +4008,17 @@ static int direct_page_fault(struct kvm_
+       if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
+               goto out_unlock;
+-      r = make_mmu_pages_available(vcpu);
+-      if (r)
+-              goto out_unlock;
+-      if (is_tdp_mmu_fault)
++      if (is_tdp_mmu_fault) {
+               r = kvm_tdp_mmu_map(vcpu, gpa, error_code, map_writable, max_level,
+                                   pfn, prefault);
+-      else
++      } else {
++              r = make_mmu_pages_available(vcpu);
++              if (r)
++                      goto out_unlock;
+               r = __direct_map(vcpu, gpa, error_code, map_writable, max_level, pfn,
+                                prefault, is_tdp);
++      }
+ out_unlock:
+       if (is_tdp_mmu_fault)
index e6253d6f3c7c9ea60f5b6da05ee6be3e5e8a38b0..29a3d25ca67aaaa3af24f7b192125f8f9b44440a 100644 (file)
@@ -85,3 +85,9 @@ drm-i915-never-return-0-if-not-all-requests-retired.patch
 tracing-osnoise-fix-duration-type.patch
 tracing-fix-race-where-histograms-can-be-called-before-the-event.patch
 tracing-free-buffers-when-a-used-dynamic-event-is-removed.patch
+io_uring-update-res-mask-in-io_poll_check_events.patch
+io_uring-fix-tw-losing-poll-events.patch
+io_uring-cmpxchg-for-poll-arm-refs-release.patch
+io_uring-make-poll-refs-more-robust.patch
+io_uring-poll-fix-poll_refs-race-with-cancelation.patch
+kvm-x86-mmu-fix-race-condition-in-direct_page_fault.patch