]>
Commit | Line | Data |
---|---|---|
80347ec8 GKH |
1 | From foo@baz Mon Oct 8 18:01:43 CEST 2018 |
2 | From: Sagi Grimberg <sagi@grimberg.me> | |
3 | Date: Mon, 3 Sep 2018 03:47:07 -0700 | |
4 | Subject: nvmet-rdma: fix possible bogus dereference under heavy load | |
5 | ||
6 | From: Sagi Grimberg <sagi@grimberg.me> | |
7 | ||
8 | [ Upstream commit 8407879c4e0d7731f6e7e905893cecf61a7762c7 ] | |
9 | ||
10 | Currently we always repost the recv buffer before we send a response | |
11 | capsule back to the host. Since ordering is not guaranteed for send | |
12 | and recv completions, it is posible that we will receive a new request | |
13 | from the host before we got a send completion for the response capsule. | |
14 | ||
15 | Today, we pre-allocate 2x rsps the length of the queue, but in reality, | |
16 | under heavy load there is nothing that is really preventing the gap to | |
17 | expand until we exhaust all our rsps. | |
18 | ||
19 | To fix this, if we don't have any pre-allocated rsps left, we dynamically | |
20 | allocate a rsp and make sure to free it when we are done. If under memory | |
21 | pressure we fail to allocate a rsp, we silently drop the command and | |
22 | wait for the host to retry. | |
23 | ||
24 | Reported-by: Steve Wise <swise@opengridcomputing.com> | |
25 | Tested-by: Steve Wise <swise@opengridcomputing.com> | |
26 | Signed-off-by: Sagi Grimberg <sagi@grimberg.me> | |
27 | [hch: dropped a superflous assignment] | |
28 | Signed-off-by: Christoph Hellwig <hch@lst.de> | |
29 | Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> | |
30 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
31 | --- | |
32 | drivers/nvme/target/rdma.c | 27 +++++++++++++++++++++++++-- | |
33 | 1 file changed, 25 insertions(+), 2 deletions(-) | |
34 | ||
35 | --- a/drivers/nvme/target/rdma.c | |
36 | +++ b/drivers/nvme/target/rdma.c | |
37 | @@ -65,6 +65,7 @@ struct nvmet_rdma_rsp { | |
38 | ||
39 | struct nvmet_req req; | |
40 | ||
41 | + bool allocated; | |
42 | u8 n_rdma; | |
43 | u32 flags; | |
44 | u32 invalidate_rkey; | |
45 | @@ -167,11 +168,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_que | |
46 | unsigned long flags; | |
47 | ||
48 | spin_lock_irqsave(&queue->rsps_lock, flags); | |
49 | - rsp = list_first_entry(&queue->free_rsps, | |
50 | + rsp = list_first_entry_or_null(&queue->free_rsps, | |
51 | struct nvmet_rdma_rsp, free_list); | |
52 | - list_del(&rsp->free_list); | |
53 | + if (likely(rsp)) | |
54 | + list_del(&rsp->free_list); | |
55 | spin_unlock_irqrestore(&queue->rsps_lock, flags); | |
56 | ||
57 | + if (unlikely(!rsp)) { | |
58 | + rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); | |
59 | + if (unlikely(!rsp)) | |
60 | + return NULL; | |
61 | + rsp->allocated = true; | |
62 | + } | |
63 | + | |
64 | return rsp; | |
65 | } | |
66 | ||
67 | @@ -180,6 +189,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp | |
68 | { | |
69 | unsigned long flags; | |
70 | ||
71 | + if (rsp->allocated) { | |
72 | + kfree(rsp); | |
73 | + return; | |
74 | + } | |
75 | + | |
76 | spin_lock_irqsave(&rsp->queue->rsps_lock, flags); | |
77 | list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); | |
78 | spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); | |
79 | @@ -755,6 +769,15 @@ static void nvmet_rdma_recv_done(struct | |
80 | ||
81 | cmd->queue = queue; | |
82 | rsp = nvmet_rdma_get_rsp(queue); | |
83 | + if (unlikely(!rsp)) { | |
84 | + /* | |
85 | + * we get here only under memory pressure, | |
86 | + * silently drop and have the host retry | |
87 | + * as we can't even fail it. | |
88 | + */ | |
89 | + nvmet_rdma_post_recv(queue->dev, cmd); | |
90 | + return; | |
91 | + } | |
92 | rsp->queue = queue; | |
93 | rsp->cmd = cmd; | |
94 | rsp->flags = 0; |