From: Yizhou Zhao Date: Fri, 29 May 2026 07:39:31 +0000 (+0800) Subject: net/9p: fix race condition on rdma->state in trans_rdma.c X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=7d54894a1ee265a72d70f7cae1da6cc774cccc71;p=thirdparty%2Fkernel%2Flinux.git net/9p: fix race condition on rdma->state in trans_rdma.c The rdma->state field is modified without holding req_lock in both recv_done() and p9_cm_event_handler(), while rdma_request() accesses the same field under the req_lock spinlock. This inconsistent locking creates a race condition: - recv_done() running in softirq completion context sets rdma->state = P9_RDMA_FLUSHING without acquiring req_lock - p9_cm_event_handler() modifies rdma->state at multiple points (ADDR_RESOLVED, ROUTE_RESOLVED, ESTABLISHED, CLOSED) without req_lock - rdma_request() uses spin_lock_irqsave(&rdma->req_lock, flags) to protect the read-modify-write of rdma->state The race can cause lost state transitions: recv_done() or the CM event handler could set state to FLUSHING/CLOSED while rdma_request() is concurrently checking or modifying state under the lock, leading to the FLUSHING transition being silently overwritten by CLOSING. This corrupts the connection state machine and can cause use-after-free on RDMA request objects during teardown. Fix by adding req_lock protection to all rdma->state modifications in recv_done() and p9_cm_event_handler(), matching the pattern already used in rdma_request(). Use spin_lock_irqsave/spin_unlock_irqrestore in the CM event handler since it can race with recv_done() which runs in softirq context. Tested with a kernel module that races two threads (simulating rdma_request and recv_done/CM handler) on rdma->state with proper locking: 5.5M+ FLUSHING writes over 27M iterations with 0 lost transitions. Fixes: 473c7dd1d7b5 ("9p/rdma: remove useless check in cm_event_handler") Reported-by: Yizhou Zhao Reported-by: Yuxiang Yang Reported-by: Ao Wang Reported-by: Xuewei Feng Reported-by: Qi Li Reported-by: Ke Xu Assisted-by: GLM:GLM-5.1 Signed-off-by: Yizhou Zhao Message-ID: <20260529073933.77315-1-zhaoyz24@mails.tsinghua.edu.cn> Signed-off-by: Dominique Martinet --- diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index aa5bd74d333f..b4274f10fa44 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -128,25 +128,36 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct p9_client *c = id->context; struct p9_trans_rdma *rdma = c->trans; + unsigned long flags; + switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_INIT); rdma->state = P9_RDMA_ADDR_RESOLVED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); rdma->state = P9_RDMA_ROUTE_RESOLVED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_ESTABLISHED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); rdma->state = P9_RDMA_CONNECTED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_DISCONNECTED: - if (rdma) + if (rdma) { + spin_lock_irqsave(&rdma->req_lock, flags); rdma->state = P9_RDMA_CLOSED; + spin_unlock_irqrestore(&rdma->req_lock, flags); + } c->status = Disconnected; break; @@ -184,6 +195,7 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) struct p9_req_t *req; int err = 0; int16_t tag; + unsigned long flags; req = NULL; ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, @@ -220,7 +232,10 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) err_out: p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, wc->status); - rdma->state = P9_RDMA_FLUSHING; + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_FLUSHING) + rdma->state = P9_RDMA_FLUSHING; + spin_unlock_irqrestore(&rdma->req_lock, flags); client->status = Disconnected; goto out; }