]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.drivers/ehca-malloc-speedup
Revert "Move xen patchset to new version's subdir."
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.drivers / ehca-malloc-speedup
CommitLineData
00e5a55c
BS
1From: Stefan Roscher <stefan.roscher@de.ibm.com>
2Subject: Re: [PATCH 1/3] IB/ehca: Replace vmalloc with kmalloc
3Date Wed, 22 Apr 2009 16:02:28 +0200
4References: bnc#491430
5Patch-mainline: 2.6.31
6
7In case of large queue pairs there is the possibillity of allocation failures
8due to memory fragmentation with kmalloc(). To ensure the memory is allocated
9even if kmalloc() can not find chunks which are big enough, we try to allocate
10the memory with vmalloc().
11
12Because kmalloc() is faster than vmalloc() causing a huge performance win
13when someone allocates a large number of queue pairs. We fall back to
14vmalloc() only if kmalloc() can't deliver the memory chunk.
15
16Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
17Acked-by: <mfrueh@suse.de>
18
19--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ipz_pt_fn.c
20+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ipz_pt_fn.c
21@@ -220,10 +220,13 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
22 queue->small_page = NULL;
23
24 /* allocate queue page pointers */
25- queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
26+ queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
27 if (!queue->queue_pages) {
28- ehca_gen_err("Couldn't allocate queue page list");
29- return 0;
30+ queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
31+ if (!queue->queue_pages) {
32+ ehca_gen_err("Couldn't allocate queue page list");
33+ return 0;
34+ }
35 }
36 memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
37
38@@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
39 ipz_queue_ctor_exit0:
40 ehca_gen_err("Couldn't alloc pages queue=%p "
41 "nr_of_pages=%x", queue, nr_of_pages);
42- vfree(queue->queue_pages);
43+ if (is_vmalloc_addr(queue->queue_pages))
44+ vfree(queue->queue_pages);
45+ else
46+ kfree(queue->queue_pages);
47
48 return 0;
49 }
50@@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, s
51 free_page((unsigned long)queue->queue_pages[i]);
52 }
53
54- vfree(queue->queue_pages);
55+ if (is_vmalloc_addr(queue->queue_pages))
56+ vfree(queue->queue_pages);
57+ else
58+ kfree(queue->queue_pages);
59
60 return 1;
61 }
62--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_qp.c
63+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_qp.c
64@@ -457,7 +457,7 @@ static struct ehca_qp *internal_create_q
65 ib_device);
66 struct ib_ucontext *context = NULL;
67 u64 h_ret;
68- int is_llqp = 0, has_srq = 0;
69+ int is_llqp = 0, has_srq = 0, is_user = 0;
70 int qp_type, max_send_sge, max_recv_sge, ret;
71
72 /* h_call's out parameters */
73@@ -599,9 +599,6 @@ static struct ehca_qp *internal_create_q
74 }
75 }
76
77- if (pd->uobject && udata)
78- context = pd->uobject->context;
79-
80 my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
81 if (!my_qp) {
82 ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
83@@ -609,6 +606,11 @@ static struct ehca_qp *internal_create_q
84 return ERR_PTR(-ENOMEM);
85 }
86
87+ if (pd->uobject && udata) {
88+ is_user = 1;
89+ context = pd->uobject->context;
90+ }
91+
92 atomic_set(&my_qp->nr_events, 0);
93 init_waitqueue_head(&my_qp->wait_completion);
94 spin_lock_init(&my_qp->spinlock_s);
95@@ -697,7 +699,7 @@ static struct ehca_qp *internal_create_q
96 (parms.squeue.is_small || parms.rqueue.is_small);
97 }
98
99- h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
100+ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
101 if (h_ret != H_SUCCESS) {
102 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
103 h_ret);
104@@ -759,18 +761,20 @@ static struct ehca_qp *internal_create_q
105 goto create_qp_exit2;
106 }
107
108- my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
109- my_qp->ipz_squeue.qe_size;
110- my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
111- sizeof(struct ehca_qmap_entry));
112- if (!my_qp->sq_map.map) {
113- ehca_err(pd->device, "Couldn't allocate squeue "
114- "map ret=%i", ret);
115- goto create_qp_exit3;
116- }
117- INIT_LIST_HEAD(&my_qp->sq_err_node);
118- /* to avoid the generation of bogus flush CQEs */
119- reset_queue_map(&my_qp->sq_map);
120+ if (!is_user) {
121+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
122+ my_qp->ipz_squeue.qe_size;
123+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
124+ sizeof(struct ehca_qmap_entry));
125+ if (!my_qp->sq_map.map) {
126+ ehca_err(pd->device, "Couldn't allocate squeue "
127+ "map ret=%i", ret);
128+ goto create_qp_exit3;
129+ }
130+ INIT_LIST_HEAD(&my_qp->sq_err_node);
131+ /* to avoid the generation of bogus flush CQEs */
132+ reset_queue_map(&my_qp->sq_map);
133+ }
134 }
135
136 if (HAS_RQ(my_qp)) {
137@@ -782,20 +786,21 @@ static struct ehca_qp *internal_create_q
138 "and pages ret=%i", ret);
139 goto create_qp_exit4;
140 }
141-
142- my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
143- my_qp->ipz_rqueue.qe_size;
144- my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
145- sizeof(struct ehca_qmap_entry));
146- if (!my_qp->rq_map.map) {
147- ehca_err(pd->device, "Couldn't allocate squeue "
148- "map ret=%i", ret);
149- goto create_qp_exit5;
150+ if (!is_user) {
151+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
152+ my_qp->ipz_rqueue.qe_size;
153+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
154+ sizeof(struct ehca_qmap_entry));
155+ if (!my_qp->rq_map.map) {
156+ ehca_err(pd->device, "Couldn't allocate squeue "
157+ "map ret=%i", ret);
158+ goto create_qp_exit5;
159+ }
160+ INIT_LIST_HEAD(&my_qp->rq_err_node);
161+ /* to avoid the generation of bogus flush CQEs */
162+ reset_queue_map(&my_qp->rq_map);
163 }
164- INIT_LIST_HEAD(&my_qp->rq_err_node);
165- /* to avoid the generation of bogus flush CQEs */
166- reset_queue_map(&my_qp->rq_map);
167- } else if (init_attr->srq) {
168+ } else if (init_attr->srq && !is_user) {
169 /* this is a base QP, use the queue map of the SRQ */
170 my_qp->rq_map = my_srq->rq_map;
171 INIT_LIST_HEAD(&my_qp->rq_err_node);
172@@ -908,7 +913,7 @@ create_qp_exit7:
173 kfree(my_qp->mod_qp_parm);
174
175 create_qp_exit6:
176- if (HAS_RQ(my_qp))
177+ if (HAS_RQ(my_qp) && !is_user)
178 vfree(my_qp->rq_map.map);
179
180 create_qp_exit5:
181@@ -916,7 +921,7 @@ create_qp_exit5:
182 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
183
184 create_qp_exit4:
185- if (HAS_SQ(my_qp))
186+ if (HAS_SQ(my_qp) && !is_user)
187 vfree(my_qp->sq_map.map);
188
189 create_qp_exit3:
190@@ -1224,6 +1229,7 @@ static int internal_modify_qp(struct ib_
191 u64 update_mask;
192 u64 h_ret;
193 int bad_wqe_cnt = 0;
194+ int is_user = 0;
195 int squeue_locked = 0;
196 unsigned long flags = 0;
197
198@@ -1246,6 +1252,8 @@ static int internal_modify_qp(struct ib_
199 ret = ehca2ib_return_code(h_ret);
200 goto modify_qp_exit1;
201 }
202+ if (ibqp->uobject)
203+ is_user = 1;
204
205 qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
206
207@@ -1708,7 +1716,8 @@ static int internal_modify_qp(struct ib_
208 goto modify_qp_exit2;
209 }
210 }
211- if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
212+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
213+ && !is_user) {
214 ret = check_for_left_cqes(my_qp, shca);
215 if (ret)
216 goto modify_qp_exit2;
217@@ -1718,16 +1727,17 @@ static int internal_modify_qp(struct ib_
218 ipz_qeit_reset(&my_qp->ipz_rqueue);
219 ipz_qeit_reset(&my_qp->ipz_squeue);
220
221- if (qp_cur_state == IB_QPS_ERR) {
222+ if (qp_cur_state == IB_QPS_ERR && !is_user) {
223 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
224
225 if (HAS_RQ(my_qp))
226 del_from_err_list(my_qp->recv_cq,
227 &my_qp->rq_err_node);
228 }
229- reset_queue_map(&my_qp->sq_map);
230+ if (!is_user)
231+ reset_queue_map(&my_qp->sq_map);
232
233- if (HAS_RQ(my_qp))
234+ if (HAS_RQ(my_qp) && !is_user)
235 reset_queue_map(&my_qp->rq_map);
236 }
237
238@@ -2118,10 +2128,12 @@ static int internal_destroy_qp(struct ib
239 int ret;
240 u64 h_ret;
241 u8 port_num;
242+ int is_user = 0;
243 enum ib_qp_type qp_type;
244 unsigned long flags;
245
246 if (uobject) {
247+ is_user = 1;
248 if (my_qp->mm_count_galpa ||
249 my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
250 ehca_err(dev, "Resources still referenced in "
251@@ -2148,10 +2160,10 @@ static int internal_destroy_qp(struct ib
252 * SRQs will never get into an error list and do not have a recv_cq,
253 * so we need to skip them here.
254 */
255- if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
256+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
257 del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
258
259- if (HAS_SQ(my_qp))
260+ if (HAS_SQ(my_qp) && !is_user)
261 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
262
263 /* now wait until all pending events have completed */
264@@ -2189,13 +2201,13 @@ static int internal_destroy_qp(struct ib
265
266 if (HAS_RQ(my_qp)) {
267 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
268-
269- vfree(my_qp->rq_map.map);
270+ if (!is_user)
271+ vfree(my_qp->rq_map.map);
272 }
273 if (HAS_SQ(my_qp)) {
274 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
275-
276- vfree(my_qp->sq_map.map);
277+ if (!is_user)
278+ vfree(my_qp->sq_map.map);
279 }
280 kmem_cache_free(qp_cache, my_qp);
281 atomic_dec(&shca->num_qps);
282--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.c
283+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.c
284@@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struc
285 param->act_pages = (u32)outs[4];
286
287 if (ret == H_SUCCESS)
288- hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
289+ hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
290
291 if (ret == H_NOT_ENOUGH_RESOURCES)
292 ehca_gen_err("Not enough resources. ret=%li", ret);
293@@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struc
294 }
295
296 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
297- struct ehca_alloc_qp_parms *parms)
298+ struct ehca_alloc_qp_parms *parms, int is_user)
299 {
300 u64 ret;
301 u64 allocate_controls, max_r10_reg, r11, r12;
302@@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struc
303 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
304
305 if (ret == H_SUCCESS)
306- hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
307+ hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
308
309 if (ret == H_NOT_ENOUGH_RESOURCES)
310 ehca_gen_err("Not enough resources. ret=%li", ret);
311--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.h
312+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.h
313@@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struc
314 * initialize resources, create empty QPPTs (2 rings).
315 */
316 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
317- struct ehca_alloc_qp_parms *parms);
318+ struct ehca_alloc_qp_parms *parms, int is_user);
319
320 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
321 const u8 port_id,
322--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.c
323+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.c
324@@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr)
325 return 0;
326 }
327
328-int hcp_galpas_ctor(struct h_galpas *galpas,
329+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
330 u64 paddr_kernel, u64 paddr_user)
331 {
332- int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
333- if (ret)
334- return ret;
335+ if (!is_user) {
336+ int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
337+ if (ret)
338+ return ret;
339+ } else
340+ galpas->kernel.fw_handle = NULL;
341
342 galpas->user.fw_handle = paddr_user;
343
344--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.h
345+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.h
346@@ -78,7 +78,7 @@ static inline void hipz_galpa_store(stru
347 *(volatile u64 __force *)addr = value;
348 }
349
350-int hcp_galpas_ctor(struct h_galpas *galpas,
351+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
352 u64 paddr_kernel, u64 paddr_user);
353
354 int hcp_galpas_dtor(struct h_galpas *galpas);
355--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_main.c
356+++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_main.c
357@@ -52,7 +52,7 @@
358 #include "ehca_tools.h"
359 #include "hcp_if.h"
360
361-#define HCAD_VERSION "0026"
362+#define HCAD_VERSION "0027"
363
364 MODULE_LICENSE("Dual BSD/GPL");
365 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");