]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Alexander Schmidt <ALEXSCHM@de.ibm.com> |
2 | Subject: Adds software flush CQE generation to the ehca driver. | |
3 | References: bnc#430344 | |
4 | ||
5 | When a QP goes into error state, it is required that flush CQEs are | |
6 | delivered to the application for any outstanding work requests. eHCA does not | |
7 | do this in hardware, so this patch adds software flush CQE generation to the | |
8 | ehca driver. | |
9 | ||
10 | Whenever a QP gets into error state, it is added to the QP error list of its | |
11 | respective CQ. If the error QP list of a CQ is not empty, poll_cq() | |
12 | generates flush CQEs before polling the actual CQ. | |
13 | ||
14 | Signed-off-by: Alexander Schmidt <alexs@linux.vnet.ibm.com> | |
15 | Acked-by: John Jolly <jjolly@suse.de> | |
16 | --- | |
17 | Applies on top of 2.6.27-rc3, please consider this for 2.6.28. | |
18 | ||
19 | drivers/infiniband/hw/ehca/ehca_classes.h | 14 + | |
20 | drivers/infiniband/hw/ehca/ehca_cq.c | 3 | |
21 | drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 | |
22 | drivers/infiniband/hw/ehca/ehca_qp.c | 225 ++++++++++++++++++++++++++++-- | |
23 | drivers/infiniband/hw/ehca/ehca_reqs.c | 211 ++++++++++++++++++++++++---- | |
24 | 5 files changed, 412 insertions(+), 43 deletions(-) | |
25 | ||
26 | --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_classes.h | |
27 | +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_classes.h | |
28 | @@ -164,6 +164,13 @@ struct ehca_qmap_entry { | |
29 | u16 reported; | |
30 | }; | |
31 | ||
32 | +struct ehca_queue_map { | |
33 | + struct ehca_qmap_entry *map; | |
34 | + unsigned int entries; | |
35 | + unsigned int tail; | |
36 | + unsigned int left_to_poll; | |
37 | +}; | |
38 | + | |
39 | struct ehca_qp { | |
40 | union { | |
41 | struct ib_qp ib_qp; | |
42 | @@ -173,8 +180,9 @@ struct ehca_qp { | |
43 | enum ehca_ext_qp_type ext_type; | |
44 | enum ib_qp_state state; | |
45 | struct ipz_queue ipz_squeue; | |
46 | - struct ehca_qmap_entry *sq_map; | |
47 | + struct ehca_queue_map sq_map; | |
48 | struct ipz_queue ipz_rqueue; | |
49 | + struct ehca_queue_map rq_map; | |
50 | struct h_galpas galpas; | |
51 | u32 qkey; | |
52 | u32 real_qp_num; | |
53 | @@ -204,6 +212,8 @@ struct ehca_qp { | |
54 | atomic_t nr_events; /* events seen */ | |
55 | wait_queue_head_t wait_completion; | |
56 | int mig_armed; | |
57 | + struct list_head sq_err_node; | |
58 | + struct list_head rq_err_node; | |
59 | }; | |
60 | ||
61 | #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) | |
62 | @@ -233,6 +243,8 @@ struct ehca_cq { | |
63 | /* mmap counter for resources mapped into user space */ | |
64 | u32 mm_count_queue; | |
65 | u32 mm_count_galpa; | |
66 | + struct list_head sqp_err_list; | |
67 | + struct list_head rqp_err_list; | |
68 | }; | |
69 | ||
70 | enum ehca_mr_flag { | |
71 | --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_reqs.c | |
72 | +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_reqs.c | |
73 | @@ -53,9 +53,25 @@ | |
74 | /* in RC traffic, insert an empty RDMA READ every this many packets */ | |
75 | #define ACK_CIRC_THRESHOLD 2000000 | |
76 | ||
77 | +static u64 replace_wr_id(u64 wr_id, u16 idx) | |
78 | +{ | |
79 | + u64 ret; | |
80 | + | |
81 | + ret = wr_id & ~QMAP_IDX_MASK; | |
82 | + ret |= idx & QMAP_IDX_MASK; | |
83 | + | |
84 | + return ret; | |
85 | +} | |
86 | + | |
87 | +static u16 get_app_wr_id(u64 wr_id) | |
88 | +{ | |
89 | + return wr_id & QMAP_IDX_MASK; | |
90 | +} | |
91 | + | |
92 | static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, | |
93 | struct ehca_wqe *wqe_p, | |
94 | - struct ib_recv_wr *recv_wr) | |
95 | + struct ib_recv_wr *recv_wr, | |
96 | + u32 rq_map_idx) | |
97 | { | |
98 | u8 cnt_ds; | |
99 | if (unlikely((recv_wr->num_sge < 0) || | |
100 | @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct | |
101 | /* clear wqe header until sglist */ | |
102 | memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); | |
103 | ||
104 | - wqe_p->work_request_id = recv_wr->wr_id; | |
105 | + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); | |
106 | wqe_p->nr_of_data_seg = recv_wr->num_sge; | |
107 | ||
108 | for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { | |
109 | @@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct | |
110 | u64 dma_length; | |
111 | struct ehca_av *my_av; | |
112 | u32 remote_qkey = send_wr->wr.ud.remote_qkey; | |
113 | + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; | |
114 | ||
115 | if (unlikely((send_wr->num_sge < 0) || | |
116 | (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { | |
117 | @@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct | |
118 | /* clear wqe header until sglist */ | |
119 | memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); | |
120 | ||
121 | - wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK; | |
122 | - wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK; | |
123 | + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); | |
124 | ||
125 | - qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK; | |
126 | - qp->sq_map[sq_map_idx].reported = 0; | |
127 | + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); | |
128 | + qmap_entry->reported = 0; | |
129 | ||
130 | switch (send_wr->opcode) { | |
131 | case IB_WR_SEND: | |
132 | @@ -496,7 +512,9 @@ static int internal_post_recv(struct ehc | |
133 | struct ehca_wqe *wqe_p; | |
134 | int wqe_cnt = 0; | |
135 | int ret = 0; | |
136 | + u32 rq_map_idx; | |
137 | unsigned long flags; | |
138 | + struct ehca_qmap_entry *qmap_entry; | |
139 | ||
140 | if (unlikely(!HAS_RQ(my_qp))) { | |
141 | ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", | |
142 | @@ -524,8 +542,15 @@ static int internal_post_recv(struct ehc | |
143 | } | |
144 | goto post_recv_exit0; | |
145 | } | |
146 | + /* | |
147 | + * Get the index of the WQE in the recv queue. The same index | |
148 | + * is used for writing into the rq_map. | |
149 | + */ | |
150 | + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; | |
151 | + | |
152 | /* write a RECV WQE into the QUEUE */ | |
153 | - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); | |
154 | + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, | |
155 | + rq_map_idx); | |
156 | /* | |
157 | * if something failed, | |
158 | * reset the free entry pointer to the start value | |
159 | @@ -540,6 +565,11 @@ static int internal_post_recv(struct ehc | |
160 | } | |
161 | goto post_recv_exit0; | |
162 | } | |
163 | + | |
164 | + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; | |
165 | + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); | |
166 | + qmap_entry->reported = 0; | |
167 | + | |
168 | wqe_cnt++; | |
169 | } /* eof for cur_recv_wr */ | |
170 | ||
171 | @@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = { | |
172 | /* internal function to poll one entry of cq */ | |
173 | static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) | |
174 | { | |
175 | - int ret = 0; | |
176 | + int ret = 0, qmap_tail_idx; | |
177 | struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); | |
178 | struct ehca_cqe *cqe; | |
179 | struct ehca_qp *my_qp; | |
180 | + struct ehca_qmap_entry *qmap_entry; | |
181 | + struct ehca_queue_map *qmap; | |
182 | int cqe_count = 0, is_error; | |
183 | ||
184 | repoll: | |
185 | @@ -674,27 +706,52 @@ repoll: | |
186 | goto repoll; | |
187 | wc->qp = &my_qp->ib_qp; | |
188 | ||
189 | - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) { | |
190 | - struct ehca_qmap_entry *qmap_entry; | |
191 | + if (is_error) { | |
192 | /* | |
193 | - * We got a send completion and need to restore the original | |
194 | - * wr_id. | |
195 | + * set left_to_poll to 0 because in error state, we will not | |
196 | + * get any additional CQEs | |
197 | */ | |
198 | - qmap_entry = &my_qp->sq_map[cqe->work_request_id & | |
199 | - QMAP_IDX_MASK]; | |
200 | + ehca_add_to_err_list(my_qp, 1); | |
201 | + my_qp->sq_map.left_to_poll = 0; | |
202 | ||
203 | - if (qmap_entry->reported) { | |
204 | - ehca_warn(cq->device, "Double cqe on qp_num=%#x", | |
205 | - my_qp->real_qp_num); | |
206 | - /* found a double cqe, discard it and read next one */ | |
207 | - goto repoll; | |
208 | - } | |
209 | - wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK; | |
210 | - wc->wr_id |= qmap_entry->app_wr_id; | |
211 | - qmap_entry->reported = 1; | |
212 | - } else | |
213 | + if (HAS_RQ(my_qp)) | |
214 | + ehca_add_to_err_list(my_qp, 0); | |
215 | + my_qp->rq_map.left_to_poll = 0; | |
216 | + } | |
217 | + | |
218 | + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); | |
219 | + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) | |
220 | + /* We got a send completion. */ | |
221 | + qmap = &my_qp->sq_map; | |
222 | + else | |
223 | /* We got a receive completion. */ | |
224 | - wc->wr_id = cqe->work_request_id; | |
225 | + qmap = &my_qp->rq_map; | |
226 | + | |
227 | + qmap_entry = &qmap->map[qmap_tail_idx]; | |
228 | + if (qmap_entry->reported) { | |
229 | + ehca_warn(cq->device, "Double cqe on qp_num=%#x", | |
230 | + my_qp->real_qp_num); | |
231 | + /* found a double cqe, discard it and read next one */ | |
232 | + goto repoll; | |
233 | + } | |
234 | + | |
235 | + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); | |
236 | + qmap_entry->reported = 1; | |
237 | + | |
238 | + /* this is a proper completion, we need to advance the tail pointer */ | |
239 | + if (++qmap->tail == qmap->entries) | |
240 | + qmap->tail = 0; | |
241 | + | |
242 | + /* if left_to_poll is decremented to 0, add the QP to the error list */ | |
243 | + if (qmap->left_to_poll > 0) { | |
244 | + qmap->left_to_poll--; | |
245 | + if ((my_qp->sq_map.left_to_poll == 0) && | |
246 | + (my_qp->rq_map.left_to_poll == 0)) { | |
247 | + ehca_add_to_err_list(my_qp, 1); | |
248 | + if (HAS_RQ(my_qp)) | |
249 | + ehca_add_to_err_list(my_qp, 0); | |
250 | + } | |
251 | + } | |
252 | ||
253 | /* eval ib_wc_opcode */ | |
254 | wc->opcode = ib_wc_opcode[cqe->optype]-1; | |
255 | @@ -733,13 +790,88 @@ poll_cq_one_exit0: | |
256 | return ret; | |
257 | } | |
258 | ||
259 | +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, | |
260 | + struct ib_wc *wc, int num_entries, | |
261 | + struct ipz_queue *ipz_queue, int on_sq) | |
262 | +{ | |
263 | + int nr = 0; | |
264 | + struct ehca_wqe *wqe; | |
265 | + u64 offset; | |
266 | + struct ehca_queue_map *qmap; | |
267 | + struct ehca_qmap_entry *qmap_entry; | |
268 | + | |
269 | + if (on_sq) | |
270 | + qmap = &my_qp->sq_map; | |
271 | + else | |
272 | + qmap = &my_qp->rq_map; | |
273 | + | |
274 | + qmap_entry = &qmap->map[qmap->tail]; | |
275 | + | |
276 | + while ((nr < num_entries) && (qmap_entry->reported == 0)) { | |
277 | + /* generate flush CQE */ | |
278 | + memset(wc, 0, sizeof(*wc)); | |
279 | + | |
280 | + offset = qmap->tail * ipz_queue->qe_size; | |
281 | + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); | |
282 | + if (!wqe) { | |
283 | + ehca_err(cq->device, "Invalid wqe offset=%#lx on " | |
284 | + "qp_num=%#x", offset, my_qp->real_qp_num); | |
285 | + return nr; | |
286 | + } | |
287 | + | |
288 | + wc->wr_id = replace_wr_id(wqe->work_request_id, | |
289 | + qmap_entry->app_wr_id); | |
290 | + | |
291 | + if (on_sq) { | |
292 | + switch (wqe->optype) { | |
293 | + case WQE_OPTYPE_SEND: | |
294 | + wc->opcode = IB_WC_SEND; | |
295 | + break; | |
296 | + case WQE_OPTYPE_RDMAWRITE: | |
297 | + wc->opcode = IB_WC_RDMA_WRITE; | |
298 | + break; | |
299 | + case WQE_OPTYPE_RDMAREAD: | |
300 | + wc->opcode = IB_WC_RDMA_READ; | |
301 | + break; | |
302 | + default: | |
303 | + ehca_err(cq->device, "Invalid optype=%x", | |
304 | + wqe->optype); | |
305 | + return nr; | |
306 | + } | |
307 | + } else | |
308 | + wc->opcode = IB_WC_RECV; | |
309 | + | |
310 | + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { | |
311 | + wc->ex.imm_data = wqe->immediate_data; | |
312 | + wc->wc_flags |= IB_WC_WITH_IMM; | |
313 | + } | |
314 | + | |
315 | + wc->status = IB_WC_WR_FLUSH_ERR; | |
316 | + | |
317 | + wc->qp = &my_qp->ib_qp; | |
318 | + | |
319 | + /* mark as reported and advance tail pointer */ | |
320 | + qmap_entry->reported = 1; | |
321 | + if (++qmap->tail == qmap->entries) | |
322 | + qmap->tail = 0; | |
323 | + qmap_entry = &qmap->map[qmap->tail]; | |
324 | + | |
325 | + wc++; nr++; | |
326 | + } | |
327 | + | |
328 | + return nr; | |
329 | + | |
330 | +} | |
331 | + | |
332 | int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) | |
333 | { | |
334 | struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); | |
335 | int nr; | |
336 | + struct ehca_qp *err_qp; | |
337 | struct ib_wc *current_wc = wc; | |
338 | int ret = 0; | |
339 | unsigned long flags; | |
340 | + int entries_left = num_entries; | |
341 | ||
342 | if (num_entries < 1) { | |
343 | ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " | |
344 | @@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int n | |
345 | } | |
346 | ||
347 | spin_lock_irqsave(&my_cq->spinlock, flags); | |
348 | - for (nr = 0; nr < num_entries; nr++) { | |
349 | + | |
350 | + /* generate flush cqes for send queues */ | |
351 | + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { | |
352 | + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, | |
353 | + &err_qp->ipz_squeue, 1); | |
354 | + entries_left -= nr; | |
355 | + current_wc += nr; | |
356 | + | |
357 | + if (entries_left == 0) | |
358 | + break; | |
359 | + } | |
360 | + | |
361 | + /* generate flush cqes for receive queues */ | |
362 | + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { | |
363 | + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, | |
364 | + &err_qp->ipz_rqueue, 0); | |
365 | + entries_left -= nr; | |
366 | + current_wc += nr; | |
367 | + | |
368 | + if (entries_left == 0) | |
369 | + break; | |
370 | + } | |
371 | + | |
372 | + for (nr = 0; nr < entries_left; nr++) { | |
373 | ret = ehca_poll_cq_one(cq, current_wc); | |
374 | if (ret) | |
375 | break; | |
376 | current_wc++; | |
377 | } /* eof for nr */ | |
378 | + entries_left -= nr; | |
379 | + | |
380 | spin_unlock_irqrestore(&my_cq->spinlock, flags); | |
381 | if (ret == -EAGAIN || !ret) | |
382 | - ret = nr; | |
383 | + ret = num_entries - entries_left; | |
384 | ||
385 | poll_cq_exit0: | |
386 | return ret; | |
387 | --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_cq.c | |
388 | +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_cq.c | |
389 | @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_d | |
390 | for (i = 0; i < QP_HASHTAB_LEN; i++) | |
391 | INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); | |
392 | ||
393 | + INIT_LIST_HEAD(&my_cq->sqp_err_list); | |
394 | + INIT_LIST_HEAD(&my_cq->rqp_err_list); | |
395 | + | |
396 | if (context) { | |
397 | struct ipz_queue *ipz_queue = &my_cq->ipz_queue; | |
398 | struct ehca_create_cq_resp resp; | |
399 | --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_qp.c | |
400 | +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_qp.c | |
401 | @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(s | |
402 | queue->is_small = (queue->page_size != 0); | |
403 | } | |
404 | ||
405 | +/* needs to be called with cq->spinlock held */ | |
406 | +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) | |
407 | +{ | |
408 | + struct list_head *list, *node; | |
409 | + | |
410 | + /* TODO: support low latency QPs */ | |
411 | + if (qp->ext_type == EQPT_LLQP) | |
412 | + return; | |
413 | + | |
414 | + if (on_sq) { | |
415 | + list = &qp->send_cq->sqp_err_list; | |
416 | + node = &qp->sq_err_node; | |
417 | + } else { | |
418 | + list = &qp->recv_cq->rqp_err_list; | |
419 | + node = &qp->rq_err_node; | |
420 | + } | |
421 | + | |
422 | + if (list_empty(node)) | |
423 | + list_add_tail(node, list); | |
424 | + | |
425 | + return; | |
426 | +} | |
427 | + | |
428 | +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) | |
429 | +{ | |
430 | + unsigned long flags; | |
431 | + | |
432 | + spin_lock_irqsave(&cq->spinlock, flags); | |
433 | + | |
434 | + if (!list_empty(node)) | |
435 | + list_del_init(node); | |
436 | + | |
437 | + spin_unlock_irqrestore(&cq->spinlock, flags); | |
438 | +} | |
439 | + | |
440 | +static void reset_queue_map(struct ehca_queue_map *qmap) | |
441 | +{ | |
442 | + int i; | |
443 | + | |
444 | + qmap->tail = 0; | |
445 | + for (i = 0; i < qmap->entries; i++) | |
446 | + qmap->map[i].reported = 1; | |
447 | +} | |
448 | + | |
449 | /* | |
450 | * Create an ib_qp struct that is either a QP or an SRQ, depending on | |
451 | * the value of the is_srq parameter. If init_attr and srq_init_attr share | |
452 | @@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_q | |
453 | struct ib_srq_init_attr *srq_init_attr, | |
454 | struct ib_udata *udata, int is_srq) | |
455 | { | |
456 | - struct ehca_qp *my_qp; | |
457 | + struct ehca_qp *my_qp, *my_srq = NULL; | |
458 | struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); | |
459 | struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, | |
460 | ib_device); | |
461 | struct ib_ucontext *context = NULL; | |
462 | - u32 nr_qes; | |
463 | u64 h_ret; | |
464 | int is_llqp = 0, has_srq = 0; | |
465 | int qp_type, max_send_sge, max_recv_sge, ret; | |
466 | @@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_q | |
467 | ||
468 | /* handle SRQ base QPs */ | |
469 | if (init_attr->srq) { | |
470 | - struct ehca_qp *my_srq = | |
471 | - container_of(init_attr->srq, struct ehca_qp, ib_srq); | |
472 | + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); | |
473 | ||
474 | has_srq = 1; | |
475 | parms.ext_type = EQPT_SRQBASE; | |
476 | @@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_q | |
477 | "and pages ret=%i", ret); | |
478 | goto create_qp_exit2; | |
479 | } | |
480 | - nr_qes = my_qp->ipz_squeue.queue_length / | |
481 | + | |
482 | + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / | |
483 | my_qp->ipz_squeue.qe_size; | |
484 | - my_qp->sq_map = vmalloc(nr_qes * | |
485 | + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * | |
486 | sizeof(struct ehca_qmap_entry)); | |
487 | - if (!my_qp->sq_map) { | |
488 | + if (!my_qp->sq_map.map) { | |
489 | ehca_err(pd->device, "Couldn't allocate squeue " | |
490 | "map ret=%i", ret); | |
491 | goto create_qp_exit3; | |
492 | } | |
493 | + INIT_LIST_HEAD(&my_qp->sq_err_node); | |
494 | + /* to avoid the generation of bogus flush CQEs */ | |
495 | + reset_queue_map(&my_qp->sq_map); | |
496 | } | |
497 | ||
498 | if (HAS_RQ(my_qp)) { | |
499 | @@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_q | |
500 | "and pages ret=%i", ret); | |
501 | goto create_qp_exit4; | |
502 | } | |
503 | + | |
504 | + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / | |
505 | + my_qp->ipz_rqueue.qe_size; | |
506 | + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * | |
507 | + sizeof(struct ehca_qmap_entry)); | |
508 | + if (!my_qp->rq_map.map) { | |
509 | + ehca_err(pd->device, "Couldn't allocate squeue " | |
510 | + "map ret=%i", ret); | |
511 | + goto create_qp_exit5; | |
512 | + } | |
513 | + INIT_LIST_HEAD(&my_qp->rq_err_node); | |
514 | + /* to avoid the generation of bogus flush CQEs */ | |
515 | + reset_queue_map(&my_qp->rq_map); | |
516 | + } else if (init_attr->srq) { | |
517 | + /* this is a base QP, use the queue map of the SRQ */ | |
518 | + my_qp->rq_map = my_srq->rq_map; | |
519 | + INIT_LIST_HEAD(&my_qp->rq_err_node); | |
520 | + | |
521 | + my_qp->ipz_rqueue = my_srq->ipz_rqueue; | |
522 | } | |
523 | ||
524 | if (is_srq) { | |
525 | @@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_q | |
526 | if (ret) { | |
527 | ehca_err(pd->device, | |
528 | "Couldn't assign qp to send_cq ret=%i", ret); | |
529 | - goto create_qp_exit6; | |
530 | + goto create_qp_exit7; | |
531 | } | |
532 | } | |
533 | ||
534 | @@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_q | |
535 | if (ib_copy_to_udata(udata, &resp, sizeof resp)) { | |
536 | ehca_err(pd->device, "Copy to udata failed"); | |
537 | ret = -EINVAL; | |
538 | - goto create_qp_exit7; | |
539 | + goto create_qp_exit8; | |
540 | } | |
541 | } | |
542 | ||
543 | return my_qp; | |
544 | ||
545 | -create_qp_exit7: | |
546 | +create_qp_exit8: | |
547 | ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); | |
548 | ||
549 | -create_qp_exit6: | |
550 | +create_qp_exit7: | |
551 | kfree(my_qp->mod_qp_parm); | |
552 | ||
553 | +create_qp_exit6: | |
554 | + if (HAS_RQ(my_qp)) | |
555 | + vfree(my_qp->rq_map.map); | |
556 | + | |
557 | create_qp_exit5: | |
558 | if (HAS_RQ(my_qp)) | |
559 | ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); | |
560 | ||
561 | create_qp_exit4: | |
562 | if (HAS_SQ(my_qp)) | |
563 | - vfree(my_qp->sq_map); | |
564 | + vfree(my_qp->sq_map.map); | |
565 | ||
566 | create_qp_exit3: | |
567 | if (HAS_SQ(my_qp)) | |
568 | @@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_q | |
569 | return 0; | |
570 | } | |
571 | ||
572 | +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, | |
573 | + struct ehca_queue_map *qmap) | |
574 | +{ | |
575 | + void *wqe_v; | |
576 | + u64 q_ofs; | |
577 | + u32 wqe_idx; | |
578 | + | |
579 | + /* convert real to abs address */ | |
580 | + wqe_p = wqe_p & (~(1UL << 63)); | |
581 | + | |
582 | + wqe_v = abs_to_virt(wqe_p); | |
583 | + | |
584 | + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { | |
585 | + ehca_gen_err("Invalid offset for calculating left cqes " | |
586 | + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v); | |
587 | + return -EFAULT; | |
588 | + } | |
589 | + | |
590 | + wqe_idx = q_ofs / ipz_queue->qe_size; | |
591 | + if (wqe_idx < qmap->tail) | |
592 | + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; | |
593 | + else | |
594 | + qmap->left_to_poll = wqe_idx - qmap->tail; | |
595 | + | |
596 | + return 0; | |
597 | +} | |
598 | + | |
599 | +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) | |
600 | +{ | |
601 | + u64 h_ret; | |
602 | + void *send_wqe_p, *recv_wqe_p; | |
603 | + int ret; | |
604 | + unsigned long flags; | |
605 | + int qp_num = my_qp->ib_qp.qp_num; | |
606 | + | |
607 | + /* this hcall is not supported on base QPs */ | |
608 | + if (my_qp->ext_type != EQPT_SRQBASE) { | |
609 | + /* get send and receive wqe pointer */ | |
610 | + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, | |
611 | + my_qp->ipz_qp_handle, &my_qp->pf, | |
612 | + &send_wqe_p, &recv_wqe_p, 4); | |
613 | + if (h_ret != H_SUCCESS) { | |
614 | + ehca_err(&shca->ib_device, "disable_and_get_wqe() " | |
615 | + "failed ehca_qp=%p qp_num=%x h_ret=%li", | |
616 | + my_qp, qp_num, h_ret); | |
617 | + return ehca2ib_return_code(h_ret); | |
618 | + } | |
619 | + | |
620 | + /* | |
621 | + * acquire lock to ensure that nobody is polling the cq which | |
622 | + * could mean that the qmap->tail pointer is in an | |
623 | + * inconsistent state. | |
624 | + */ | |
625 | + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); | |
626 | + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, | |
627 | + &my_qp->sq_map); | |
628 | + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); | |
629 | + if (ret) | |
630 | + return ret; | |
631 | + | |
632 | + | |
633 | + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); | |
634 | + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, | |
635 | + &my_qp->rq_map); | |
636 | + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); | |
637 | + if (ret) | |
638 | + return ret; | |
639 | + } else { | |
640 | + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); | |
641 | + my_qp->sq_map.left_to_poll = 0; | |
642 | + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); | |
643 | + | |
644 | + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); | |
645 | + my_qp->rq_map.left_to_poll = 0; | |
646 | + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); | |
647 | + } | |
648 | + | |
649 | + /* this assures flush cqes being generated only for pending wqes */ | |
650 | + if ((my_qp->sq_map.left_to_poll == 0) && | |
651 | + (my_qp->rq_map.left_to_poll == 0)) { | |
652 | + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); | |
653 | + ehca_add_to_err_list(my_qp, 1); | |
654 | + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); | |
655 | + | |
656 | + if (HAS_RQ(my_qp)) { | |
657 | + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); | |
658 | + ehca_add_to_err_list(my_qp, 0); | |
659 | + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, | |
660 | + flags); | |
661 | + } | |
662 | + } | |
663 | + | |
664 | + return 0; | |
665 | +} | |
666 | + | |
667 | /* | |
668 | * internal_modify_qp with circumvention to handle aqp0 properly | |
669 | * smi_reset2init indicates if this is an internal reset-to-init-call for | |
670 | @@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_ | |
671 | goto modify_qp_exit2; | |
672 | } | |
673 | } | |
674 | + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { | |
675 | + ret = check_for_left_cqes(my_qp, shca); | |
676 | + if (ret) | |
677 | + goto modify_qp_exit2; | |
678 | + } | |
679 | ||
680 | if (statetrans == IB_QPST_ANY2RESET) { | |
681 | ipz_qeit_reset(&my_qp->ipz_rqueue); | |
682 | ipz_qeit_reset(&my_qp->ipz_squeue); | |
683 | + | |
684 | + if (qp_cur_state == IB_QPS_ERR) { | |
685 | + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); | |
686 | + | |
687 | + if (HAS_RQ(my_qp)) | |
688 | + del_from_err_list(my_qp->recv_cq, | |
689 | + &my_qp->rq_err_node); | |
690 | + } | |
691 | + reset_queue_map(&my_qp->sq_map); | |
692 | + | |
693 | + if (HAS_RQ(my_qp)) | |
694 | + reset_queue_map(&my_qp->rq_map); | |
695 | } | |
696 | ||
697 | if (attr_mask & IB_QP_QKEY) | |
698 | @@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib | |
699 | idr_remove(&ehca_qp_idr, my_qp->token); | |
700 | write_unlock_irqrestore(&ehca_qp_idr_lock, flags); | |
701 | ||
702 | + /* | |
703 | + * SRQs will never get into an error list and do not have a recv_cq, | |
704 | + * so we need to skip them here. | |
705 | + */ | |
706 | + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) | |
707 | + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); | |
708 | + | |
709 | + if (HAS_SQ(my_qp)) | |
710 | + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); | |
711 | + | |
712 | /* now wait until all pending events have completed */ | |
713 | wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); | |
714 | ||
715 | @@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib | |
716 | if (qp_type == IB_QPT_GSI) { | |
717 | struct ib_event event; | |
718 | ehca_info(dev, "device %s: port %x is inactive.", | |
719 | - shca->ib_device.name, port_num); | |
720 | + shca->ib_device.name, port_num); | |
721 | event.device = &shca->ib_device; | |
722 | event.event = IB_EVENT_PORT_ERR; | |
723 | event.element.port_num = port_num; | |
724 | @@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib | |
725 | ib_dispatch_event(&event); | |
726 | } | |
727 | ||
728 | - if (HAS_RQ(my_qp)) | |
729 | + if (HAS_RQ(my_qp)) { | |
730 | ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); | |
731 | + | |
732 | + vfree(my_qp->rq_map.map); | |
733 | + } | |
734 | if (HAS_SQ(my_qp)) { | |
735 | ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); | |
736 | - vfree(my_qp->sq_map); | |
737 | + | |
738 | + vfree(my_qp->sq_map.map); | |
739 | } | |
740 | kmem_cache_free(qp_cache, my_qp); | |
741 | atomic_dec(&shca->num_qps); | |
742 | --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_iverbs.h | |
743 | +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_iverbs.h | |
744 | @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); | |
745 | int ehca_calc_ipd(struct ehca_shca *shca, int port, | |
746 | enum ib_rate path_rate, u32 *ipd); | |
747 | ||
748 | +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); | |
749 | + | |
750 | #ifdef CONFIG_PPC_64K_PAGES | |
751 | void *ehca_alloc_fw_ctrlblock(gfp_t flags); | |
752 | void ehca_free_fw_ctrlblock(void *ptr); | |
753 | -- | |
754 | To unsubscribe from this list: send the line "unsubscribe linux-kernel" in | |
755 | the body of a message to majordomo@vger.kernel.org | |
756 | More majordomo info at http://vger.kernel.org/majordomo-info.html | |
757 | Please read the FAQ at http://www.tux.org/lkml/ |