]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.drivers/ehca-flush-cqe.patch
Revert "Move xen patchset to new version's subdir."
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.drivers / ehca-flush-cqe.patch
1 From: Alexander Schmidt <ALEXSCHM@de.ibm.com>
2 Subject: Adds software flush CQE generation to the ehca driver.
3 References: bnc#430344
4
5 When a QP goes into error state, it is required that flush CQEs are
6 delivered to the application for any outstanding work requests. eHCA does not
7 do this in hardware, so this patch adds software flush CQE generation to the
8 ehca driver.
9
10 Whenever a QP gets into error state, it is added to the QP error list of its
11 respective CQ. If the error QP list of a CQ is not empty, poll_cq()
12 generates flush CQEs before polling the actual CQ.
13
14 Signed-off-by: Alexander Schmidt <alexs@linux.vnet.ibm.com>
15 Acked-by: John Jolly <jjolly@suse.de>
16 ---
17 Applies on top of 2.6.27-rc3, please consider this for 2.6.28.
18
19 drivers/infiniband/hw/ehca/ehca_classes.h | 14 +
20 drivers/infiniband/hw/ehca/ehca_cq.c | 3
21 drivers/infiniband/hw/ehca/ehca_iverbs.h | 2
22 drivers/infiniband/hw/ehca/ehca_qp.c | 225 ++++++++++++++++++++++++++++--
23 drivers/infiniband/hw/ehca/ehca_reqs.c | 211 ++++++++++++++++++++++++----
24 5 files changed, 412 insertions(+), 43 deletions(-)
25
26 --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_classes.h
27 +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_classes.h
28 @@ -164,6 +164,13 @@ struct ehca_qmap_entry {
29 u16 reported;
30 };
31
32 +struct ehca_queue_map {
33 + struct ehca_qmap_entry *map;
34 + unsigned int entries;
35 + unsigned int tail;
36 + unsigned int left_to_poll;
37 +};
38 +
39 struct ehca_qp {
40 union {
41 struct ib_qp ib_qp;
42 @@ -173,8 +180,9 @@ struct ehca_qp {
43 enum ehca_ext_qp_type ext_type;
44 enum ib_qp_state state;
45 struct ipz_queue ipz_squeue;
46 - struct ehca_qmap_entry *sq_map;
47 + struct ehca_queue_map sq_map;
48 struct ipz_queue ipz_rqueue;
49 + struct ehca_queue_map rq_map;
50 struct h_galpas galpas;
51 u32 qkey;
52 u32 real_qp_num;
53 @@ -204,6 +212,8 @@ struct ehca_qp {
54 atomic_t nr_events; /* events seen */
55 wait_queue_head_t wait_completion;
56 int mig_armed;
57 + struct list_head sq_err_node;
58 + struct list_head rq_err_node;
59 };
60
61 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
62 @@ -233,6 +243,8 @@ struct ehca_cq {
63 /* mmap counter for resources mapped into user space */
64 u32 mm_count_queue;
65 u32 mm_count_galpa;
66 + struct list_head sqp_err_list;
67 + struct list_head rqp_err_list;
68 };
69
70 enum ehca_mr_flag {
71 --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_reqs.c
72 +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_reqs.c
73 @@ -53,9 +53,25 @@
74 /* in RC traffic, insert an empty RDMA READ every this many packets */
75 #define ACK_CIRC_THRESHOLD 2000000
76
77 +static u64 replace_wr_id(u64 wr_id, u16 idx)
78 +{
79 + u64 ret;
80 +
81 + ret = wr_id & ~QMAP_IDX_MASK;
82 + ret |= idx & QMAP_IDX_MASK;
83 +
84 + return ret;
85 +}
86 +
87 +static u16 get_app_wr_id(u64 wr_id)
88 +{
89 + return wr_id & QMAP_IDX_MASK;
90 +}
91 +
92 static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
93 struct ehca_wqe *wqe_p,
94 - struct ib_recv_wr *recv_wr)
95 + struct ib_recv_wr *recv_wr,
96 + u32 rq_map_idx)
97 {
98 u8 cnt_ds;
99 if (unlikely((recv_wr->num_sge < 0) ||
100 @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct
101 /* clear wqe header until sglist */
102 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
103
104 - wqe_p->work_request_id = recv_wr->wr_id;
105 + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
106 wqe_p->nr_of_data_seg = recv_wr->num_sge;
107
108 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
109 @@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct
110 u64 dma_length;
111 struct ehca_av *my_av;
112 u32 remote_qkey = send_wr->wr.ud.remote_qkey;
113 + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
114
115 if (unlikely((send_wr->num_sge < 0) ||
116 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
117 @@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct
118 /* clear wqe header until sglist */
119 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
120
121 - wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK;
122 - wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
123 + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
124
125 - qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK;
126 - qp->sq_map[sq_map_idx].reported = 0;
127 + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
128 + qmap_entry->reported = 0;
129
130 switch (send_wr->opcode) {
131 case IB_WR_SEND:
132 @@ -496,7 +512,9 @@ static int internal_post_recv(struct ehc
133 struct ehca_wqe *wqe_p;
134 int wqe_cnt = 0;
135 int ret = 0;
136 + u32 rq_map_idx;
137 unsigned long flags;
138 + struct ehca_qmap_entry *qmap_entry;
139
140 if (unlikely(!HAS_RQ(my_qp))) {
141 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
142 @@ -524,8 +542,15 @@ static int internal_post_recv(struct ehc
143 }
144 goto post_recv_exit0;
145 }
146 + /*
147 + * Get the index of the WQE in the recv queue. The same index
148 + * is used for writing into the rq_map.
149 + */
150 + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
151 +
152 /* write a RECV WQE into the QUEUE */
153 - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
154 + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
155 + rq_map_idx);
156 /*
157 * if something failed,
158 * reset the free entry pointer to the start value
159 @@ -540,6 +565,11 @@ static int internal_post_recv(struct ehc
160 }
161 goto post_recv_exit0;
162 }
163 +
164 + qmap_entry = &my_qp->rq_map.map[rq_map_idx];
165 + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
166 + qmap_entry->reported = 0;
167 +
168 wqe_cnt++;
169 } /* eof for cur_recv_wr */
170
171 @@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
172 /* internal function to poll one entry of cq */
173 static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
174 {
175 - int ret = 0;
176 + int ret = 0, qmap_tail_idx;
177 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
178 struct ehca_cqe *cqe;
179 struct ehca_qp *my_qp;
180 + struct ehca_qmap_entry *qmap_entry;
181 + struct ehca_queue_map *qmap;
182 int cqe_count = 0, is_error;
183
184 repoll:
185 @@ -674,27 +706,52 @@ repoll:
186 goto repoll;
187 wc->qp = &my_qp->ib_qp;
188
189 - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) {
190 - struct ehca_qmap_entry *qmap_entry;
191 + if (is_error) {
192 /*
193 - * We got a send completion and need to restore the original
194 - * wr_id.
195 + * set left_to_poll to 0 because in error state, we will not
196 + * get any additional CQEs
197 */
198 - qmap_entry = &my_qp->sq_map[cqe->work_request_id &
199 - QMAP_IDX_MASK];
200 + ehca_add_to_err_list(my_qp, 1);
201 + my_qp->sq_map.left_to_poll = 0;
202
203 - if (qmap_entry->reported) {
204 - ehca_warn(cq->device, "Double cqe on qp_num=%#x",
205 - my_qp->real_qp_num);
206 - /* found a double cqe, discard it and read next one */
207 - goto repoll;
208 - }
209 - wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK;
210 - wc->wr_id |= qmap_entry->app_wr_id;
211 - qmap_entry->reported = 1;
212 - } else
213 + if (HAS_RQ(my_qp))
214 + ehca_add_to_err_list(my_qp, 0);
215 + my_qp->rq_map.left_to_poll = 0;
216 + }
217 +
218 + qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
219 + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
220 + /* We got a send completion. */
221 + qmap = &my_qp->sq_map;
222 + else
223 /* We got a receive completion. */
224 - wc->wr_id = cqe->work_request_id;
225 + qmap = &my_qp->rq_map;
226 +
227 + qmap_entry = &qmap->map[qmap_tail_idx];
228 + if (qmap_entry->reported) {
229 + ehca_warn(cq->device, "Double cqe on qp_num=%#x",
230 + my_qp->real_qp_num);
231 + /* found a double cqe, discard it and read next one */
232 + goto repoll;
233 + }
234 +
235 + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
236 + qmap_entry->reported = 1;
237 +
238 + /* this is a proper completion, we need to advance the tail pointer */
239 + if (++qmap->tail == qmap->entries)
240 + qmap->tail = 0;
241 +
242 + /* if left_to_poll is decremented to 0, add the QP to the error list */
243 + if (qmap->left_to_poll > 0) {
244 + qmap->left_to_poll--;
245 + if ((my_qp->sq_map.left_to_poll == 0) &&
246 + (my_qp->rq_map.left_to_poll == 0)) {
247 + ehca_add_to_err_list(my_qp, 1);
248 + if (HAS_RQ(my_qp))
249 + ehca_add_to_err_list(my_qp, 0);
250 + }
251 + }
252
253 /* eval ib_wc_opcode */
254 wc->opcode = ib_wc_opcode[cqe->optype]-1;
255 @@ -733,13 +790,88 @@ poll_cq_one_exit0:
256 return ret;
257 }
258
259 +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
260 + struct ib_wc *wc, int num_entries,
261 + struct ipz_queue *ipz_queue, int on_sq)
262 +{
263 + int nr = 0;
264 + struct ehca_wqe *wqe;
265 + u64 offset;
266 + struct ehca_queue_map *qmap;
267 + struct ehca_qmap_entry *qmap_entry;
268 +
269 + if (on_sq)
270 + qmap = &my_qp->sq_map;
271 + else
272 + qmap = &my_qp->rq_map;
273 +
274 + qmap_entry = &qmap->map[qmap->tail];
275 +
276 + while ((nr < num_entries) && (qmap_entry->reported == 0)) {
277 + /* generate flush CQE */
278 + memset(wc, 0, sizeof(*wc));
279 +
280 + offset = qmap->tail * ipz_queue->qe_size;
281 + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
282 + if (!wqe) {
283 + ehca_err(cq->device, "Invalid wqe offset=%#lx on "
284 + "qp_num=%#x", offset, my_qp->real_qp_num);
285 + return nr;
286 + }
287 +
288 + wc->wr_id = replace_wr_id(wqe->work_request_id,
289 + qmap_entry->app_wr_id);
290 +
291 + if (on_sq) {
292 + switch (wqe->optype) {
293 + case WQE_OPTYPE_SEND:
294 + wc->opcode = IB_WC_SEND;
295 + break;
296 + case WQE_OPTYPE_RDMAWRITE:
297 + wc->opcode = IB_WC_RDMA_WRITE;
298 + break;
299 + case WQE_OPTYPE_RDMAREAD:
300 + wc->opcode = IB_WC_RDMA_READ;
301 + break;
302 + default:
303 + ehca_err(cq->device, "Invalid optype=%x",
304 + wqe->optype);
305 + return nr;
306 + }
307 + } else
308 + wc->opcode = IB_WC_RECV;
309 +
310 + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
311 + wc->ex.imm_data = wqe->immediate_data;
312 + wc->wc_flags |= IB_WC_WITH_IMM;
313 + }
314 +
315 + wc->status = IB_WC_WR_FLUSH_ERR;
316 +
317 + wc->qp = &my_qp->ib_qp;
318 +
319 + /* mark as reported and advance tail pointer */
320 + qmap_entry->reported = 1;
321 + if (++qmap->tail == qmap->entries)
322 + qmap->tail = 0;
323 + qmap_entry = &qmap->map[qmap->tail];
324 +
325 + wc++; nr++;
326 + }
327 +
328 + return nr;
329 +
330 +}
331 +
332 int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
333 {
334 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
335 int nr;
336 + struct ehca_qp *err_qp;
337 struct ib_wc *current_wc = wc;
338 int ret = 0;
339 unsigned long flags;
340 + int entries_left = num_entries;
341
342 if (num_entries < 1) {
343 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
344 @@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int n
345 }
346
347 spin_lock_irqsave(&my_cq->spinlock, flags);
348 - for (nr = 0; nr < num_entries; nr++) {
349 +
350 + /* generate flush cqes for send queues */
351 + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
352 + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
353 + &err_qp->ipz_squeue, 1);
354 + entries_left -= nr;
355 + current_wc += nr;
356 +
357 + if (entries_left == 0)
358 + break;
359 + }
360 +
361 + /* generate flush cqes for receive queues */
362 + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
363 + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
364 + &err_qp->ipz_rqueue, 0);
365 + entries_left -= nr;
366 + current_wc += nr;
367 +
368 + if (entries_left == 0)
369 + break;
370 + }
371 +
372 + for (nr = 0; nr < entries_left; nr++) {
373 ret = ehca_poll_cq_one(cq, current_wc);
374 if (ret)
375 break;
376 current_wc++;
377 } /* eof for nr */
378 + entries_left -= nr;
379 +
380 spin_unlock_irqrestore(&my_cq->spinlock, flags);
381 if (ret == -EAGAIN || !ret)
382 - ret = nr;
383 + ret = num_entries - entries_left;
384
385 poll_cq_exit0:
386 return ret;
387 --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_cq.c
388 +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_cq.c
389 @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_d
390 for (i = 0; i < QP_HASHTAB_LEN; i++)
391 INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
392
393 + INIT_LIST_HEAD(&my_cq->sqp_err_list);
394 + INIT_LIST_HEAD(&my_cq->rqp_err_list);
395 +
396 if (context) {
397 struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
398 struct ehca_create_cq_resp resp;
399 --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_qp.c
400 +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_qp.c
401 @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(s
402 queue->is_small = (queue->page_size != 0);
403 }
404
405 +/* needs to be called with cq->spinlock held */
406 +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
407 +{
408 + struct list_head *list, *node;
409 +
410 + /* TODO: support low latency QPs */
411 + if (qp->ext_type == EQPT_LLQP)
412 + return;
413 +
414 + if (on_sq) {
415 + list = &qp->send_cq->sqp_err_list;
416 + node = &qp->sq_err_node;
417 + } else {
418 + list = &qp->recv_cq->rqp_err_list;
419 + node = &qp->rq_err_node;
420 + }
421 +
422 + if (list_empty(node))
423 + list_add_tail(node, list);
424 +
425 + return;
426 +}
427 +
428 +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
429 +{
430 + unsigned long flags;
431 +
432 + spin_lock_irqsave(&cq->spinlock, flags);
433 +
434 + if (!list_empty(node))
435 + list_del_init(node);
436 +
437 + spin_unlock_irqrestore(&cq->spinlock, flags);
438 +}
439 +
440 +static void reset_queue_map(struct ehca_queue_map *qmap)
441 +{
442 + int i;
443 +
444 + qmap->tail = 0;
445 + for (i = 0; i < qmap->entries; i++)
446 + qmap->map[i].reported = 1;
447 +}
448 +
449 /*
450 * Create an ib_qp struct that is either a QP or an SRQ, depending on
451 * the value of the is_srq parameter. If init_attr and srq_init_attr share
452 @@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_q
453 struct ib_srq_init_attr *srq_init_attr,
454 struct ib_udata *udata, int is_srq)
455 {
456 - struct ehca_qp *my_qp;
457 + struct ehca_qp *my_qp, *my_srq = NULL;
458 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
459 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
460 ib_device);
461 struct ib_ucontext *context = NULL;
462 - u32 nr_qes;
463 u64 h_ret;
464 int is_llqp = 0, has_srq = 0;
465 int qp_type, max_send_sge, max_recv_sge, ret;
466 @@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_q
467
468 /* handle SRQ base QPs */
469 if (init_attr->srq) {
470 - struct ehca_qp *my_srq =
471 - container_of(init_attr->srq, struct ehca_qp, ib_srq);
472 + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
473
474 has_srq = 1;
475 parms.ext_type = EQPT_SRQBASE;
476 @@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_q
477 "and pages ret=%i", ret);
478 goto create_qp_exit2;
479 }
480 - nr_qes = my_qp->ipz_squeue.queue_length /
481 +
482 + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
483 my_qp->ipz_squeue.qe_size;
484 - my_qp->sq_map = vmalloc(nr_qes *
485 + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
486 sizeof(struct ehca_qmap_entry));
487 - if (!my_qp->sq_map) {
488 + if (!my_qp->sq_map.map) {
489 ehca_err(pd->device, "Couldn't allocate squeue "
490 "map ret=%i", ret);
491 goto create_qp_exit3;
492 }
493 + INIT_LIST_HEAD(&my_qp->sq_err_node);
494 + /* to avoid the generation of bogus flush CQEs */
495 + reset_queue_map(&my_qp->sq_map);
496 }
497
498 if (HAS_RQ(my_qp)) {
499 @@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_q
500 "and pages ret=%i", ret);
501 goto create_qp_exit4;
502 }
503 +
504 + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
505 + my_qp->ipz_rqueue.qe_size;
506 + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
507 + sizeof(struct ehca_qmap_entry));
508 + if (!my_qp->rq_map.map) {
509 + ehca_err(pd->device, "Couldn't allocate squeue "
510 + "map ret=%i", ret);
511 + goto create_qp_exit5;
512 + }
513 + INIT_LIST_HEAD(&my_qp->rq_err_node);
514 + /* to avoid the generation of bogus flush CQEs */
515 + reset_queue_map(&my_qp->rq_map);
516 + } else if (init_attr->srq) {
517 + /* this is a base QP, use the queue map of the SRQ */
518 + my_qp->rq_map = my_srq->rq_map;
519 + INIT_LIST_HEAD(&my_qp->rq_err_node);
520 +
521 + my_qp->ipz_rqueue = my_srq->ipz_rqueue;
522 }
523
524 if (is_srq) {
525 @@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_q
526 if (ret) {
527 ehca_err(pd->device,
528 "Couldn't assign qp to send_cq ret=%i", ret);
529 - goto create_qp_exit6;
530 + goto create_qp_exit7;
531 }
532 }
533
534 @@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_q
535 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
536 ehca_err(pd->device, "Copy to udata failed");
537 ret = -EINVAL;
538 - goto create_qp_exit7;
539 + goto create_qp_exit8;
540 }
541 }
542
543 return my_qp;
544
545 -create_qp_exit7:
546 +create_qp_exit8:
547 ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
548
549 -create_qp_exit6:
550 +create_qp_exit7:
551 kfree(my_qp->mod_qp_parm);
552
553 +create_qp_exit6:
554 + if (HAS_RQ(my_qp))
555 + vfree(my_qp->rq_map.map);
556 +
557 create_qp_exit5:
558 if (HAS_RQ(my_qp))
559 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
560
561 create_qp_exit4:
562 if (HAS_SQ(my_qp))
563 - vfree(my_qp->sq_map);
564 + vfree(my_qp->sq_map.map);
565
566 create_qp_exit3:
567 if (HAS_SQ(my_qp))
568 @@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_q
569 return 0;
570 }
571
572 +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
573 + struct ehca_queue_map *qmap)
574 +{
575 + void *wqe_v;
576 + u64 q_ofs;
577 + u32 wqe_idx;
578 +
579 + /* convert real to abs address */
580 + wqe_p = wqe_p & (~(1UL << 63));
581 +
582 + wqe_v = abs_to_virt(wqe_p);
583 +
584 + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
585 + ehca_gen_err("Invalid offset for calculating left cqes "
586 + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
587 + return -EFAULT;
588 + }
589 +
590 + wqe_idx = q_ofs / ipz_queue->qe_size;
591 + if (wqe_idx < qmap->tail)
592 + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
593 + else
594 + qmap->left_to_poll = wqe_idx - qmap->tail;
595 +
596 + return 0;
597 +}
598 +
599 +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
600 +{
601 + u64 h_ret;
602 + void *send_wqe_p, *recv_wqe_p;
603 + int ret;
604 + unsigned long flags;
605 + int qp_num = my_qp->ib_qp.qp_num;
606 +
607 + /* this hcall is not supported on base QPs */
608 + if (my_qp->ext_type != EQPT_SRQBASE) {
609 + /* get send and receive wqe pointer */
610 + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
611 + my_qp->ipz_qp_handle, &my_qp->pf,
612 + &send_wqe_p, &recv_wqe_p, 4);
613 + if (h_ret != H_SUCCESS) {
614 + ehca_err(&shca->ib_device, "disable_and_get_wqe() "
615 + "failed ehca_qp=%p qp_num=%x h_ret=%li",
616 + my_qp, qp_num, h_ret);
617 + return ehca2ib_return_code(h_ret);
618 + }
619 +
620 + /*
621 + * acquire lock to ensure that nobody is polling the cq which
622 + * could mean that the qmap->tail pointer is in an
623 + * inconsistent state.
624 + */
625 + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
626 + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
627 + &my_qp->sq_map);
628 + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
629 + if (ret)
630 + return ret;
631 +
632 +
633 + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
634 + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
635 + &my_qp->rq_map);
636 + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
637 + if (ret)
638 + return ret;
639 + } else {
640 + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
641 + my_qp->sq_map.left_to_poll = 0;
642 + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
643 +
644 + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
645 + my_qp->rq_map.left_to_poll = 0;
646 + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
647 + }
648 +
649 + /* this assures flush cqes being generated only for pending wqes */
650 + if ((my_qp->sq_map.left_to_poll == 0) &&
651 + (my_qp->rq_map.left_to_poll == 0)) {
652 + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
653 + ehca_add_to_err_list(my_qp, 1);
654 + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
655 +
656 + if (HAS_RQ(my_qp)) {
657 + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
658 + ehca_add_to_err_list(my_qp, 0);
659 + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
660 + flags);
661 + }
662 + }
663 +
664 + return 0;
665 +}
666 +
667 /*
668 * internal_modify_qp with circumvention to handle aqp0 properly
669 * smi_reset2init indicates if this is an internal reset-to-init-call for
670 @@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_
671 goto modify_qp_exit2;
672 }
673 }
674 + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
675 + ret = check_for_left_cqes(my_qp, shca);
676 + if (ret)
677 + goto modify_qp_exit2;
678 + }
679
680 if (statetrans == IB_QPST_ANY2RESET) {
681 ipz_qeit_reset(&my_qp->ipz_rqueue);
682 ipz_qeit_reset(&my_qp->ipz_squeue);
683 +
684 + if (qp_cur_state == IB_QPS_ERR) {
685 + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
686 +
687 + if (HAS_RQ(my_qp))
688 + del_from_err_list(my_qp->recv_cq,
689 + &my_qp->rq_err_node);
690 + }
691 + reset_queue_map(&my_qp->sq_map);
692 +
693 + if (HAS_RQ(my_qp))
694 + reset_queue_map(&my_qp->rq_map);
695 }
696
697 if (attr_mask & IB_QP_QKEY)
698 @@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib
699 idr_remove(&ehca_qp_idr, my_qp->token);
700 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
701
702 + /*
703 + * SRQs will never get into an error list and do not have a recv_cq,
704 + * so we need to skip them here.
705 + */
706 + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
707 + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
708 +
709 + if (HAS_SQ(my_qp))
710 + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
711 +
712 /* now wait until all pending events have completed */
713 wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
714
715 @@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib
716 if (qp_type == IB_QPT_GSI) {
717 struct ib_event event;
718 ehca_info(dev, "device %s: port %x is inactive.",
719 - shca->ib_device.name, port_num);
720 + shca->ib_device.name, port_num);
721 event.device = &shca->ib_device;
722 event.event = IB_EVENT_PORT_ERR;
723 event.element.port_num = port_num;
724 @@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib
725 ib_dispatch_event(&event);
726 }
727
728 - if (HAS_RQ(my_qp))
729 + if (HAS_RQ(my_qp)) {
730 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
731 +
732 + vfree(my_qp->rq_map.map);
733 + }
734 if (HAS_SQ(my_qp)) {
735 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
736 - vfree(my_qp->sq_map);
737 +
738 + vfree(my_qp->sq_map.map);
739 }
740 kmem_cache_free(qp_cache, my_qp);
741 atomic_dec(&shca->num_qps);
742 --- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_iverbs.h
743 +++ infiniband.git/drivers/infiniband/hw/ehca/ehca_iverbs.h
744 @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
745 int ehca_calc_ipd(struct ehca_shca *shca, int port,
746 enum ib_rate path_rate, u32 *ipd);
747
748 +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
749 +
750 #ifdef CONFIG_PPC_64K_PAGES
751 void *ehca_alloc_fw_ctrlblock(gfp_t flags);
752 void ehca_free_fw_ctrlblock(void *ptr);
753 --
754 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
755 the body of a message to majordomo@vger.kernel.org
756 More majordomo info at http://vger.kernel.org/majordomo-info.html
757 Please read the FAQ at http://www.tux.org/lkml/