]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.drivers/ehca-flush-cqe.patch
Move xen patchset to new version's subdir.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.drivers / ehca-flush-cqe.patch
CommitLineData
00e5a55c
BS
1From: Alexander Schmidt <ALEXSCHM@de.ibm.com>
2Subject: Adds software flush CQE generation to the ehca driver.
3References: bnc#430344
4
5When a QP goes into error state, it is required that flush CQEs are
6delivered to the application for any outstanding work requests. eHCA does not
7do this in hardware, so this patch adds software flush CQE generation to the
8ehca driver.
9
10Whenever a QP gets into error state, it is added to the QP error list of its
11respective CQ. If the error QP list of a CQ is not empty, poll_cq()
12generates flush CQEs before polling the actual CQ.
13
14Signed-off-by: Alexander Schmidt <alexs@linux.vnet.ibm.com>
15Acked-by: John Jolly <jjolly@suse.de>
16---
17Applies on top of 2.6.27-rc3, please consider this for 2.6.28.
18
19 drivers/infiniband/hw/ehca/ehca_classes.h | 14 +
20 drivers/infiniband/hw/ehca/ehca_cq.c | 3
21 drivers/infiniband/hw/ehca/ehca_iverbs.h | 2
22 drivers/infiniband/hw/ehca/ehca_qp.c | 225 ++++++++++++++++++++++++++++--
23 drivers/infiniband/hw/ehca/ehca_reqs.c | 211 ++++++++++++++++++++++++----
24 5 files changed, 412 insertions(+), 43 deletions(-)
25
26--- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_classes.h
27+++ infiniband.git/drivers/infiniband/hw/ehca/ehca_classes.h
28@@ -164,6 +164,13 @@ struct ehca_qmap_entry {
29 u16 reported;
30 };
31
32+struct ehca_queue_map {
33+ struct ehca_qmap_entry *map;
34+ unsigned int entries;
35+ unsigned int tail;
36+ unsigned int left_to_poll;
37+};
38+
39 struct ehca_qp {
40 union {
41 struct ib_qp ib_qp;
42@@ -173,8 +180,9 @@ struct ehca_qp {
43 enum ehca_ext_qp_type ext_type;
44 enum ib_qp_state state;
45 struct ipz_queue ipz_squeue;
46- struct ehca_qmap_entry *sq_map;
47+ struct ehca_queue_map sq_map;
48 struct ipz_queue ipz_rqueue;
49+ struct ehca_queue_map rq_map;
50 struct h_galpas galpas;
51 u32 qkey;
52 u32 real_qp_num;
53@@ -204,6 +212,8 @@ struct ehca_qp {
54 atomic_t nr_events; /* events seen */
55 wait_queue_head_t wait_completion;
56 int mig_armed;
57+ struct list_head sq_err_node;
58+ struct list_head rq_err_node;
59 };
60
61 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
62@@ -233,6 +243,8 @@ struct ehca_cq {
63 /* mmap counter for resources mapped into user space */
64 u32 mm_count_queue;
65 u32 mm_count_galpa;
66+ struct list_head sqp_err_list;
67+ struct list_head rqp_err_list;
68 };
69
70 enum ehca_mr_flag {
71--- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_reqs.c
72+++ infiniband.git/drivers/infiniband/hw/ehca/ehca_reqs.c
73@@ -53,9 +53,25 @@
74 /* in RC traffic, insert an empty RDMA READ every this many packets */
75 #define ACK_CIRC_THRESHOLD 2000000
76
77+static u64 replace_wr_id(u64 wr_id, u16 idx)
78+{
79+ u64 ret;
80+
81+ ret = wr_id & ~QMAP_IDX_MASK;
82+ ret |= idx & QMAP_IDX_MASK;
83+
84+ return ret;
85+}
86+
87+static u16 get_app_wr_id(u64 wr_id)
88+{
89+ return wr_id & QMAP_IDX_MASK;
90+}
91+
92 static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
93 struct ehca_wqe *wqe_p,
94- struct ib_recv_wr *recv_wr)
95+ struct ib_recv_wr *recv_wr,
96+ u32 rq_map_idx)
97 {
98 u8 cnt_ds;
99 if (unlikely((recv_wr->num_sge < 0) ||
100@@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct
101 /* clear wqe header until sglist */
102 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
103
104- wqe_p->work_request_id = recv_wr->wr_id;
105+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
106 wqe_p->nr_of_data_seg = recv_wr->num_sge;
107
108 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
109@@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct
110 u64 dma_length;
111 struct ehca_av *my_av;
112 u32 remote_qkey = send_wr->wr.ud.remote_qkey;
113+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
114
115 if (unlikely((send_wr->num_sge < 0) ||
116 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
117@@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct
118 /* clear wqe header until sglist */
119 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
120
121- wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK;
122- wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
123+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
124
125- qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK;
126- qp->sq_map[sq_map_idx].reported = 0;
127+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
128+ qmap_entry->reported = 0;
129
130 switch (send_wr->opcode) {
131 case IB_WR_SEND:
132@@ -496,7 +512,9 @@ static int internal_post_recv(struct ehc
133 struct ehca_wqe *wqe_p;
134 int wqe_cnt = 0;
135 int ret = 0;
136+ u32 rq_map_idx;
137 unsigned long flags;
138+ struct ehca_qmap_entry *qmap_entry;
139
140 if (unlikely(!HAS_RQ(my_qp))) {
141 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
142@@ -524,8 +542,15 @@ static int internal_post_recv(struct ehc
143 }
144 goto post_recv_exit0;
145 }
146+ /*
147+ * Get the index of the WQE in the recv queue. The same index
148+ * is used for writing into the rq_map.
149+ */
150+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
151+
152 /* write a RECV WQE into the QUEUE */
153- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
154+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
155+ rq_map_idx);
156 /*
157 * if something failed,
158 * reset the free entry pointer to the start value
159@@ -540,6 +565,11 @@ static int internal_post_recv(struct ehc
160 }
161 goto post_recv_exit0;
162 }
163+
164+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
165+ qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
166+ qmap_entry->reported = 0;
167+
168 wqe_cnt++;
169 } /* eof for cur_recv_wr */
170
171@@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
172 /* internal function to poll one entry of cq */
173 static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
174 {
175- int ret = 0;
176+ int ret = 0, qmap_tail_idx;
177 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
178 struct ehca_cqe *cqe;
179 struct ehca_qp *my_qp;
180+ struct ehca_qmap_entry *qmap_entry;
181+ struct ehca_queue_map *qmap;
182 int cqe_count = 0, is_error;
183
184 repoll:
185@@ -674,27 +706,52 @@ repoll:
186 goto repoll;
187 wc->qp = &my_qp->ib_qp;
188
189- if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) {
190- struct ehca_qmap_entry *qmap_entry;
191+ if (is_error) {
192 /*
193- * We got a send completion and need to restore the original
194- * wr_id.
195+ * set left_to_poll to 0 because in error state, we will not
196+ * get any additional CQEs
197 */
198- qmap_entry = &my_qp->sq_map[cqe->work_request_id &
199- QMAP_IDX_MASK];
200+ ehca_add_to_err_list(my_qp, 1);
201+ my_qp->sq_map.left_to_poll = 0;
202
203- if (qmap_entry->reported) {
204- ehca_warn(cq->device, "Double cqe on qp_num=%#x",
205- my_qp->real_qp_num);
206- /* found a double cqe, discard it and read next one */
207- goto repoll;
208- }
209- wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK;
210- wc->wr_id |= qmap_entry->app_wr_id;
211- qmap_entry->reported = 1;
212- } else
213+ if (HAS_RQ(my_qp))
214+ ehca_add_to_err_list(my_qp, 0);
215+ my_qp->rq_map.left_to_poll = 0;
216+ }
217+
218+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
219+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
220+ /* We got a send completion. */
221+ qmap = &my_qp->sq_map;
222+ else
223 /* We got a receive completion. */
224- wc->wr_id = cqe->work_request_id;
225+ qmap = &my_qp->rq_map;
226+
227+ qmap_entry = &qmap->map[qmap_tail_idx];
228+ if (qmap_entry->reported) {
229+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
230+ my_qp->real_qp_num);
231+ /* found a double cqe, discard it and read next one */
232+ goto repoll;
233+ }
234+
235+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
236+ qmap_entry->reported = 1;
237+
238+ /* this is a proper completion, we need to advance the tail pointer */
239+ if (++qmap->tail == qmap->entries)
240+ qmap->tail = 0;
241+
242+ /* if left_to_poll is decremented to 0, add the QP to the error list */
243+ if (qmap->left_to_poll > 0) {
244+ qmap->left_to_poll--;
245+ if ((my_qp->sq_map.left_to_poll == 0) &&
246+ (my_qp->rq_map.left_to_poll == 0)) {
247+ ehca_add_to_err_list(my_qp, 1);
248+ if (HAS_RQ(my_qp))
249+ ehca_add_to_err_list(my_qp, 0);
250+ }
251+ }
252
253 /* eval ib_wc_opcode */
254 wc->opcode = ib_wc_opcode[cqe->optype]-1;
255@@ -733,13 +790,88 @@ poll_cq_one_exit0:
256 return ret;
257 }
258
259+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
260+ struct ib_wc *wc, int num_entries,
261+ struct ipz_queue *ipz_queue, int on_sq)
262+{
263+ int nr = 0;
264+ struct ehca_wqe *wqe;
265+ u64 offset;
266+ struct ehca_queue_map *qmap;
267+ struct ehca_qmap_entry *qmap_entry;
268+
269+ if (on_sq)
270+ qmap = &my_qp->sq_map;
271+ else
272+ qmap = &my_qp->rq_map;
273+
274+ qmap_entry = &qmap->map[qmap->tail];
275+
276+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
277+ /* generate flush CQE */
278+ memset(wc, 0, sizeof(*wc));
279+
280+ offset = qmap->tail * ipz_queue->qe_size;
281+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
282+ if (!wqe) {
283+ ehca_err(cq->device, "Invalid wqe offset=%#lx on "
284+ "qp_num=%#x", offset, my_qp->real_qp_num);
285+ return nr;
286+ }
287+
288+ wc->wr_id = replace_wr_id(wqe->work_request_id,
289+ qmap_entry->app_wr_id);
290+
291+ if (on_sq) {
292+ switch (wqe->optype) {
293+ case WQE_OPTYPE_SEND:
294+ wc->opcode = IB_WC_SEND;
295+ break;
296+ case WQE_OPTYPE_RDMAWRITE:
297+ wc->opcode = IB_WC_RDMA_WRITE;
298+ break;
299+ case WQE_OPTYPE_RDMAREAD:
300+ wc->opcode = IB_WC_RDMA_READ;
301+ break;
302+ default:
303+ ehca_err(cq->device, "Invalid optype=%x",
304+ wqe->optype);
305+ return nr;
306+ }
307+ } else
308+ wc->opcode = IB_WC_RECV;
309+
310+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
311+ wc->ex.imm_data = wqe->immediate_data;
312+ wc->wc_flags |= IB_WC_WITH_IMM;
313+ }
314+
315+ wc->status = IB_WC_WR_FLUSH_ERR;
316+
317+ wc->qp = &my_qp->ib_qp;
318+
319+ /* mark as reported and advance tail pointer */
320+ qmap_entry->reported = 1;
321+ if (++qmap->tail == qmap->entries)
322+ qmap->tail = 0;
323+ qmap_entry = &qmap->map[qmap->tail];
324+
325+ wc++; nr++;
326+ }
327+
328+ return nr;
329+
330+}
331+
332 int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
333 {
334 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
335 int nr;
336+ struct ehca_qp *err_qp;
337 struct ib_wc *current_wc = wc;
338 int ret = 0;
339 unsigned long flags;
340+ int entries_left = num_entries;
341
342 if (num_entries < 1) {
343 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
344@@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int n
345 }
346
347 spin_lock_irqsave(&my_cq->spinlock, flags);
348- for (nr = 0; nr < num_entries; nr++) {
349+
350+ /* generate flush cqes for send queues */
351+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
352+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
353+ &err_qp->ipz_squeue, 1);
354+ entries_left -= nr;
355+ current_wc += nr;
356+
357+ if (entries_left == 0)
358+ break;
359+ }
360+
361+ /* generate flush cqes for receive queues */
362+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
363+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
364+ &err_qp->ipz_rqueue, 0);
365+ entries_left -= nr;
366+ current_wc += nr;
367+
368+ if (entries_left == 0)
369+ break;
370+ }
371+
372+ for (nr = 0; nr < entries_left; nr++) {
373 ret = ehca_poll_cq_one(cq, current_wc);
374 if (ret)
375 break;
376 current_wc++;
377 } /* eof for nr */
378+ entries_left -= nr;
379+
380 spin_unlock_irqrestore(&my_cq->spinlock, flags);
381 if (ret == -EAGAIN || !ret)
382- ret = nr;
383+ ret = num_entries - entries_left;
384
385 poll_cq_exit0:
386 return ret;
387--- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_cq.c
388+++ infiniband.git/drivers/infiniband/hw/ehca/ehca_cq.c
389@@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_d
390 for (i = 0; i < QP_HASHTAB_LEN; i++)
391 INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
392
393+ INIT_LIST_HEAD(&my_cq->sqp_err_list);
394+ INIT_LIST_HEAD(&my_cq->rqp_err_list);
395+
396 if (context) {
397 struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
398 struct ehca_create_cq_resp resp;
399--- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_qp.c
400+++ infiniband.git/drivers/infiniband/hw/ehca/ehca_qp.c
401@@ -396,6 +396,50 @@ static void ehca_determine_small_queue(s
402 queue->is_small = (queue->page_size != 0);
403 }
404
405+/* needs to be called with cq->spinlock held */
406+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
407+{
408+ struct list_head *list, *node;
409+
410+ /* TODO: support low latency QPs */
411+ if (qp->ext_type == EQPT_LLQP)
412+ return;
413+
414+ if (on_sq) {
415+ list = &qp->send_cq->sqp_err_list;
416+ node = &qp->sq_err_node;
417+ } else {
418+ list = &qp->recv_cq->rqp_err_list;
419+ node = &qp->rq_err_node;
420+ }
421+
422+ if (list_empty(node))
423+ list_add_tail(node, list);
424+
425+ return;
426+}
427+
428+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
429+{
430+ unsigned long flags;
431+
432+ spin_lock_irqsave(&cq->spinlock, flags);
433+
434+ if (!list_empty(node))
435+ list_del_init(node);
436+
437+ spin_unlock_irqrestore(&cq->spinlock, flags);
438+}
439+
440+static void reset_queue_map(struct ehca_queue_map *qmap)
441+{
442+ int i;
443+
444+ qmap->tail = 0;
445+ for (i = 0; i < qmap->entries; i++)
446+ qmap->map[i].reported = 1;
447+}
448+
449 /*
450 * Create an ib_qp struct that is either a QP or an SRQ, depending on
451 * the value of the is_srq parameter. If init_attr and srq_init_attr share
452@@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_q
453 struct ib_srq_init_attr *srq_init_attr,
454 struct ib_udata *udata, int is_srq)
455 {
456- struct ehca_qp *my_qp;
457+ struct ehca_qp *my_qp, *my_srq = NULL;
458 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
459 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
460 ib_device);
461 struct ib_ucontext *context = NULL;
462- u32 nr_qes;
463 u64 h_ret;
464 int is_llqp = 0, has_srq = 0;
465 int qp_type, max_send_sge, max_recv_sge, ret;
466@@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_q
467
468 /* handle SRQ base QPs */
469 if (init_attr->srq) {
470- struct ehca_qp *my_srq =
471- container_of(init_attr->srq, struct ehca_qp, ib_srq);
472+ my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
473
474 has_srq = 1;
475 parms.ext_type = EQPT_SRQBASE;
476@@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_q
477 "and pages ret=%i", ret);
478 goto create_qp_exit2;
479 }
480- nr_qes = my_qp->ipz_squeue.queue_length /
481+
482+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
483 my_qp->ipz_squeue.qe_size;
484- my_qp->sq_map = vmalloc(nr_qes *
485+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
486 sizeof(struct ehca_qmap_entry));
487- if (!my_qp->sq_map) {
488+ if (!my_qp->sq_map.map) {
489 ehca_err(pd->device, "Couldn't allocate squeue "
490 "map ret=%i", ret);
491 goto create_qp_exit3;
492 }
493+ INIT_LIST_HEAD(&my_qp->sq_err_node);
494+ /* to avoid the generation of bogus flush CQEs */
495+ reset_queue_map(&my_qp->sq_map);
496 }
497
498 if (HAS_RQ(my_qp)) {
499@@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_q
500 "and pages ret=%i", ret);
501 goto create_qp_exit4;
502 }
503+
504+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
505+ my_qp->ipz_rqueue.qe_size;
506+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
507+ sizeof(struct ehca_qmap_entry));
508+ if (!my_qp->rq_map.map) {
509+ ehca_err(pd->device, "Couldn't allocate squeue "
510+ "map ret=%i", ret);
511+ goto create_qp_exit5;
512+ }
513+ INIT_LIST_HEAD(&my_qp->rq_err_node);
514+ /* to avoid the generation of bogus flush CQEs */
515+ reset_queue_map(&my_qp->rq_map);
516+ } else if (init_attr->srq) {
517+ /* this is a base QP, use the queue map of the SRQ */
518+ my_qp->rq_map = my_srq->rq_map;
519+ INIT_LIST_HEAD(&my_qp->rq_err_node);
520+
521+ my_qp->ipz_rqueue = my_srq->ipz_rqueue;
522 }
523
524 if (is_srq) {
525@@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_q
526 if (ret) {
527 ehca_err(pd->device,
528 "Couldn't assign qp to send_cq ret=%i", ret);
529- goto create_qp_exit6;
530+ goto create_qp_exit7;
531 }
532 }
533
534@@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_q
535 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
536 ehca_err(pd->device, "Copy to udata failed");
537 ret = -EINVAL;
538- goto create_qp_exit7;
539+ goto create_qp_exit8;
540 }
541 }
542
543 return my_qp;
544
545-create_qp_exit7:
546+create_qp_exit8:
547 ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
548
549-create_qp_exit6:
550+create_qp_exit7:
551 kfree(my_qp->mod_qp_parm);
552
553+create_qp_exit6:
554+ if (HAS_RQ(my_qp))
555+ vfree(my_qp->rq_map.map);
556+
557 create_qp_exit5:
558 if (HAS_RQ(my_qp))
559 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
560
561 create_qp_exit4:
562 if (HAS_SQ(my_qp))
563- vfree(my_qp->sq_map);
564+ vfree(my_qp->sq_map.map);
565
566 create_qp_exit3:
567 if (HAS_SQ(my_qp))
568@@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_q
569 return 0;
570 }
571
572+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
573+ struct ehca_queue_map *qmap)
574+{
575+ void *wqe_v;
576+ u64 q_ofs;
577+ u32 wqe_idx;
578+
579+ /* convert real to abs address */
580+ wqe_p = wqe_p & (~(1UL << 63));
581+
582+ wqe_v = abs_to_virt(wqe_p);
583+
584+ if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
585+ ehca_gen_err("Invalid offset for calculating left cqes "
586+ "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
587+ return -EFAULT;
588+ }
589+
590+ wqe_idx = q_ofs / ipz_queue->qe_size;
591+ if (wqe_idx < qmap->tail)
592+ qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
593+ else
594+ qmap->left_to_poll = wqe_idx - qmap->tail;
595+
596+ return 0;
597+}
598+
599+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
600+{
601+ u64 h_ret;
602+ void *send_wqe_p, *recv_wqe_p;
603+ int ret;
604+ unsigned long flags;
605+ int qp_num = my_qp->ib_qp.qp_num;
606+
607+ /* this hcall is not supported on base QPs */
608+ if (my_qp->ext_type != EQPT_SRQBASE) {
609+ /* get send and receive wqe pointer */
610+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
611+ my_qp->ipz_qp_handle, &my_qp->pf,
612+ &send_wqe_p, &recv_wqe_p, 4);
613+ if (h_ret != H_SUCCESS) {
614+ ehca_err(&shca->ib_device, "disable_and_get_wqe() "
615+ "failed ehca_qp=%p qp_num=%x h_ret=%li",
616+ my_qp, qp_num, h_ret);
617+ return ehca2ib_return_code(h_ret);
618+ }
619+
620+ /*
621+ * acquire lock to ensure that nobody is polling the cq which
622+ * could mean that the qmap->tail pointer is in an
623+ * inconsistent state.
624+ */
625+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
626+ ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
627+ &my_qp->sq_map);
628+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
629+ if (ret)
630+ return ret;
631+
632+
633+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
634+ ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
635+ &my_qp->rq_map);
636+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
637+ if (ret)
638+ return ret;
639+ } else {
640+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
641+ my_qp->sq_map.left_to_poll = 0;
642+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
643+
644+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
645+ my_qp->rq_map.left_to_poll = 0;
646+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
647+ }
648+
649+ /* this assures flush cqes being generated only for pending wqes */
650+ if ((my_qp->sq_map.left_to_poll == 0) &&
651+ (my_qp->rq_map.left_to_poll == 0)) {
652+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
653+ ehca_add_to_err_list(my_qp, 1);
654+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
655+
656+ if (HAS_RQ(my_qp)) {
657+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
658+ ehca_add_to_err_list(my_qp, 0);
659+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
660+ flags);
661+ }
662+ }
663+
664+ return 0;
665+}
666+
667 /*
668 * internal_modify_qp with circumvention to handle aqp0 properly
669 * smi_reset2init indicates if this is an internal reset-to-init-call for
670@@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_
671 goto modify_qp_exit2;
672 }
673 }
674+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
675+ ret = check_for_left_cqes(my_qp, shca);
676+ if (ret)
677+ goto modify_qp_exit2;
678+ }
679
680 if (statetrans == IB_QPST_ANY2RESET) {
681 ipz_qeit_reset(&my_qp->ipz_rqueue);
682 ipz_qeit_reset(&my_qp->ipz_squeue);
683+
684+ if (qp_cur_state == IB_QPS_ERR) {
685+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
686+
687+ if (HAS_RQ(my_qp))
688+ del_from_err_list(my_qp->recv_cq,
689+ &my_qp->rq_err_node);
690+ }
691+ reset_queue_map(&my_qp->sq_map);
692+
693+ if (HAS_RQ(my_qp))
694+ reset_queue_map(&my_qp->rq_map);
695 }
696
697 if (attr_mask & IB_QP_QKEY)
698@@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib
699 idr_remove(&ehca_qp_idr, my_qp->token);
700 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
701
702+ /*
703+ * SRQs will never get into an error list and do not have a recv_cq,
704+ * so we need to skip them here.
705+ */
706+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
707+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
708+
709+ if (HAS_SQ(my_qp))
710+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
711+
712 /* now wait until all pending events have completed */
713 wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
714
715@@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib
716 if (qp_type == IB_QPT_GSI) {
717 struct ib_event event;
718 ehca_info(dev, "device %s: port %x is inactive.",
719- shca->ib_device.name, port_num);
720+ shca->ib_device.name, port_num);
721 event.device = &shca->ib_device;
722 event.event = IB_EVENT_PORT_ERR;
723 event.element.port_num = port_num;
724@@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib
725 ib_dispatch_event(&event);
726 }
727
728- if (HAS_RQ(my_qp))
729+ if (HAS_RQ(my_qp)) {
730 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
731+
732+ vfree(my_qp->rq_map.map);
733+ }
734 if (HAS_SQ(my_qp)) {
735 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
736- vfree(my_qp->sq_map);
737+
738+ vfree(my_qp->sq_map.map);
739 }
740 kmem_cache_free(qp_cache, my_qp);
741 atomic_dec(&shca->num_qps);
742--- infiniband.git.orig/drivers/infiniband/hw/ehca/ehca_iverbs.h
743+++ infiniband.git/drivers/infiniband/hw/ehca/ehca_iverbs.h
744@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
745 int ehca_calc_ipd(struct ehca_shca *shca, int port,
746 enum ib_rate path_rate, u32 *ipd);
747
748+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
749+
750 #ifdef CONFIG_PPC_64K_PAGES
751 void *ehca_alloc_fw_ctrlblock(gfp_t flags);
752 void ehca_free_fw_ctrlblock(void *ptr);
753--
754To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
755the body of a message to majordomo@vger.kernel.org
756More majordomo info at http://vger.kernel.org/majordomo-info.html
757Please read the FAQ at http://www.tux.org/lkml/