]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
nvme-tcp: fix possible UAF in nvme_tcp_poll
authorSagi Grimberg <sagi@grimberg.me>
Thu, 20 Feb 2025 11:18:30 +0000 (13:18 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 Apr 2025 12:32:03 +0000 (14:32 +0200)
[ Upstream commit 8c1624b63a7d24142a2bbc3a5ee7e95f004ea36e ]

nvme_tcp_poll() may race with the send path error handler because
it may complete the request while it is actively being polled for
completion, resulting in a UAF panic [1]:

We should make sure to stop polling when we see an error when
trying to read from the socket. Hence make sure to propagate the
error so that the block layer breaks the polling cycle.

[1]:
--
[35665.692310] nvme nvme2: failed to send request -13
[35665.702265] nvme nvme2: unsupported pdu type (3)
[35665.702272] BUG: kernel NULL pointer dereference, address: 0000000000000000
[35665.702542] nvme nvme2: queue 1 receive failed:  -22
[35665.703209] #PF: supervisor write access in kernel mode
[35665.703213] #PF: error_code(0x0002) - not-present page
[35665.703214] PGD 8000003801cce067 P4D 8000003801cce067 PUD 37e6f79067 PMD 0
[35665.703220] Oops: 0002 [#1] SMP PTI
[35665.703658] nvme nvme2: starting error recovery
[35665.705809] Hardware name: Inspur aaabbb/YZMB-00882-104, BIOS 4.1.26 09/22/2022
[35665.705812] Workqueue: kblockd blk_mq_requeue_work
[35665.709172] RIP: 0010:_raw_spin_lock+0xc/0x30
[35665.715788] Call Trace:
[35665.716201]  <TASK>
[35665.716613]  ? show_trace_log_lvl+0x1c1/0x2d9
[35665.717049]  ? show_trace_log_lvl+0x1c1/0x2d9
[35665.717457]  ? blk_mq_request_bypass_insert+0x2c/0xb0
[35665.717950]  ? __die_body.cold+0x8/0xd
[35665.718361]  ? page_fault_oops+0xac/0x140
[35665.718749]  ? blk_mq_start_request+0x30/0xf0
[35665.719144]  ? nvme_tcp_queue_rq+0xc7/0x170 [nvme_tcp]
[35665.719547]  ? exc_page_fault+0x62/0x130
[35665.719938]  ? asm_exc_page_fault+0x22/0x30
[35665.720333]  ? _raw_spin_lock+0xc/0x30
[35665.720723]  blk_mq_request_bypass_insert+0x2c/0xb0
[35665.721101]  blk_mq_requeue_work+0xa5/0x180
[35665.721451]  process_one_work+0x1e8/0x390
[35665.721809]  worker_thread+0x53/0x3d0
[35665.722159]  ? process_one_work+0x390/0x390
[35665.722501]  kthread+0x124/0x150
[35665.722849]  ? set_kthread_struct+0x50/0x50
[35665.723182]  ret_from_fork+0x1f/0x30

Reported-by: Zhang Guanghui <zhang.guanghui@cestc.cn>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/nvme/host/tcp.c

index 4ca7ef9416002c09592c2a49a0fe5dcf2474bd77..0fc5aba88bc15b4fa01d6a8191f5da87f5f070f4 100644 (file)
@@ -2491,6 +2491,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
 {
        struct nvme_tcp_queue *queue = hctx->driver_data;
        struct sock *sk = queue->sock->sk;
+       int ret;
 
        if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
                return 0;
@@ -2498,9 +2499,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
        set_bit(NVME_TCP_Q_POLLING, &queue->flags);
        if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
                sk_busy_loop(sk, true);
-       nvme_tcp_try_recv(queue);
+       ret = nvme_tcp_try_recv(queue);
        clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
-       return queue->nr_cqe;
+       return ret < 0 ? ret : queue->nr_cqe;
 }
 
 static const struct blk_mq_ops nvme_tcp_mq_ops = {