1 From: Stefan Roscher <stefan.roscher@de.ibm.com>
2 Subject: Re: [PATCH 1/3] IB/ehca: Replace vmalloc with kmalloc
3 Date Wed, 22 Apr 2009 16:02:28 +0200
7 In case of large queue pairs there is the possibillity of allocation failures
8 due to memory fragmentation with kmalloc(). To ensure the memory is allocated
9 even if kmalloc() can not find chunks which are big enough, we try to allocate
10 the memory with vmalloc().
12 Because kmalloc() is faster than vmalloc() causing a huge performance win
13 when someone allocates a large number of queue pairs. We fall back to
14 vmalloc() only if kmalloc() can't deliver the memory chunk.
16 Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
17 Acked-by: <mfrueh@suse.de>
19 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ipz_pt_fn.c
20 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ipz_pt_fn.c
21 @@ -220,10 +220,13 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
22 queue->small_page = NULL;
24 /* allocate queue page pointers */
25 - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
26 + queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
27 if (!queue->queue_pages) {
28 - ehca_gen_err("Couldn't allocate queue page list");
30 + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
31 + if (!queue->queue_pages) {
32 + ehca_gen_err("Couldn't allocate queue page list");
36 memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
38 @@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
40 ehca_gen_err("Couldn't alloc pages queue=%p "
41 "nr_of_pages=%x", queue, nr_of_pages);
42 - vfree(queue->queue_pages);
43 + if (is_vmalloc_addr(queue->queue_pages))
44 + vfree(queue->queue_pages);
46 + kfree(queue->queue_pages);
50 @@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, s
51 free_page((unsigned long)queue->queue_pages[i]);
54 - vfree(queue->queue_pages);
55 + if (is_vmalloc_addr(queue->queue_pages))
56 + vfree(queue->queue_pages);
58 + kfree(queue->queue_pages);
62 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_qp.c
63 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_qp.c
64 @@ -457,7 +457,7 @@ static struct ehca_qp *internal_create_q
66 struct ib_ucontext *context = NULL;
68 - int is_llqp = 0, has_srq = 0;
69 + int is_llqp = 0, has_srq = 0, is_user = 0;
70 int qp_type, max_send_sge, max_recv_sge, ret;
72 /* h_call's out parameters */
73 @@ -599,9 +599,6 @@ static struct ehca_qp *internal_create_q
77 - if (pd->uobject && udata)
78 - context = pd->uobject->context;
80 my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
82 ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
83 @@ -609,6 +606,11 @@ static struct ehca_qp *internal_create_q
84 return ERR_PTR(-ENOMEM);
87 + if (pd->uobject && udata) {
89 + context = pd->uobject->context;
92 atomic_set(&my_qp->nr_events, 0);
93 init_waitqueue_head(&my_qp->wait_completion);
94 spin_lock_init(&my_qp->spinlock_s);
95 @@ -697,7 +699,7 @@ static struct ehca_qp *internal_create_q
96 (parms.squeue.is_small || parms.rqueue.is_small);
99 - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
100 + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
101 if (h_ret != H_SUCCESS) {
102 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
104 @@ -759,18 +761,20 @@ static struct ehca_qp *internal_create_q
105 goto create_qp_exit2;
108 - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
109 - my_qp->ipz_squeue.qe_size;
110 - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
111 - sizeof(struct ehca_qmap_entry));
112 - if (!my_qp->sq_map.map) {
113 - ehca_err(pd->device, "Couldn't allocate squeue "
114 - "map ret=%i", ret);
115 - goto create_qp_exit3;
117 - INIT_LIST_HEAD(&my_qp->sq_err_node);
118 - /* to avoid the generation of bogus flush CQEs */
119 - reset_queue_map(&my_qp->sq_map);
121 + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
122 + my_qp->ipz_squeue.qe_size;
123 + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
124 + sizeof(struct ehca_qmap_entry));
125 + if (!my_qp->sq_map.map) {
126 + ehca_err(pd->device, "Couldn't allocate squeue "
127 + "map ret=%i", ret);
128 + goto create_qp_exit3;
130 + INIT_LIST_HEAD(&my_qp->sq_err_node);
131 + /* to avoid the generation of bogus flush CQEs */
132 + reset_queue_map(&my_qp->sq_map);
137 @@ -782,20 +786,21 @@ static struct ehca_qp *internal_create_q
138 "and pages ret=%i", ret);
139 goto create_qp_exit4;
142 - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
143 - my_qp->ipz_rqueue.qe_size;
144 - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
145 - sizeof(struct ehca_qmap_entry));
146 - if (!my_qp->rq_map.map) {
147 - ehca_err(pd->device, "Couldn't allocate squeue "
148 - "map ret=%i", ret);
149 - goto create_qp_exit5;
151 + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
152 + my_qp->ipz_rqueue.qe_size;
153 + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
154 + sizeof(struct ehca_qmap_entry));
155 + if (!my_qp->rq_map.map) {
156 + ehca_err(pd->device, "Couldn't allocate squeue "
157 + "map ret=%i", ret);
158 + goto create_qp_exit5;
160 + INIT_LIST_HEAD(&my_qp->rq_err_node);
161 + /* to avoid the generation of bogus flush CQEs */
162 + reset_queue_map(&my_qp->rq_map);
164 - INIT_LIST_HEAD(&my_qp->rq_err_node);
165 - /* to avoid the generation of bogus flush CQEs */
166 - reset_queue_map(&my_qp->rq_map);
167 - } else if (init_attr->srq) {
168 + } else if (init_attr->srq && !is_user) {
169 /* this is a base QP, use the queue map of the SRQ */
170 my_qp->rq_map = my_srq->rq_map;
171 INIT_LIST_HEAD(&my_qp->rq_err_node);
172 @@ -908,7 +913,7 @@ create_qp_exit7:
173 kfree(my_qp->mod_qp_parm);
177 + if (HAS_RQ(my_qp) && !is_user)
178 vfree(my_qp->rq_map.map);
181 @@ -916,7 +921,7 @@ create_qp_exit5:
182 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
186 + if (HAS_SQ(my_qp) && !is_user)
187 vfree(my_qp->sq_map.map);
190 @@ -1224,6 +1229,7 @@ static int internal_modify_qp(struct ib_
195 int squeue_locked = 0;
196 unsigned long flags = 0;
198 @@ -1246,6 +1252,8 @@ static int internal_modify_qp(struct ib_
199 ret = ehca2ib_return_code(h_ret);
200 goto modify_qp_exit1;
205 qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
207 @@ -1708,7 +1716,8 @@ static int internal_modify_qp(struct ib_
208 goto modify_qp_exit2;
211 - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
212 + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
214 ret = check_for_left_cqes(my_qp, shca);
216 goto modify_qp_exit2;
217 @@ -1718,16 +1727,17 @@ static int internal_modify_qp(struct ib_
218 ipz_qeit_reset(&my_qp->ipz_rqueue);
219 ipz_qeit_reset(&my_qp->ipz_squeue);
221 - if (qp_cur_state == IB_QPS_ERR) {
222 + if (qp_cur_state == IB_QPS_ERR && !is_user) {
223 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
226 del_from_err_list(my_qp->recv_cq,
227 &my_qp->rq_err_node);
229 - reset_queue_map(&my_qp->sq_map);
231 + reset_queue_map(&my_qp->sq_map);
234 + if (HAS_RQ(my_qp) && !is_user)
235 reset_queue_map(&my_qp->rq_map);
238 @@ -2118,10 +2128,12 @@ static int internal_destroy_qp(struct ib
243 enum ib_qp_type qp_type;
248 if (my_qp->mm_count_galpa ||
249 my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
250 ehca_err(dev, "Resources still referenced in "
251 @@ -2148,10 +2160,10 @@ static int internal_destroy_qp(struct ib
252 * SRQs will never get into an error list and do not have a recv_cq,
253 * so we need to skip them here.
255 - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
256 + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
257 del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
260 + if (HAS_SQ(my_qp) && !is_user)
261 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
263 /* now wait until all pending events have completed */
264 @@ -2189,13 +2201,13 @@ static int internal_destroy_qp(struct ib
267 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
269 - vfree(my_qp->rq_map.map);
271 + vfree(my_qp->rq_map.map);
274 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
276 - vfree(my_qp->sq_map.map);
278 + vfree(my_qp->sq_map.map);
280 kmem_cache_free(qp_cache, my_qp);
281 atomic_dec(&shca->num_qps);
282 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.c
283 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.c
284 @@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struc
285 param->act_pages = (u32)outs[4];
287 if (ret == H_SUCCESS)
288 - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
289 + hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
291 if (ret == H_NOT_ENOUGH_RESOURCES)
292 ehca_gen_err("Not enough resources. ret=%li", ret);
293 @@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struc
296 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
297 - struct ehca_alloc_qp_parms *parms)
298 + struct ehca_alloc_qp_parms *parms, int is_user)
301 u64 allocate_controls, max_r10_reg, r11, r12;
302 @@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struc
303 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
305 if (ret == H_SUCCESS)
306 - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
307 + hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
309 if (ret == H_NOT_ENOUGH_RESOURCES)
310 ehca_gen_err("Not enough resources. ret=%li", ret);
311 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.h
312 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.h
313 @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struc
314 * initialize resources, create empty QPPTs (2 rings).
316 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
317 - struct ehca_alloc_qp_parms *parms);
318 + struct ehca_alloc_qp_parms *parms, int is_user);
320 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
322 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.c
323 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.c
324 @@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr)
328 -int hcp_galpas_ctor(struct h_galpas *galpas,
329 +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
330 u64 paddr_kernel, u64 paddr_user)
332 - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
336 + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
340 + galpas->kernel.fw_handle = NULL;
342 galpas->user.fw_handle = paddr_user;
344 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.h
345 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.h
346 @@ -78,7 +78,7 @@ static inline void hipz_galpa_store(stru
347 *(volatile u64 __force *)addr = value;
350 -int hcp_galpas_ctor(struct h_galpas *galpas,
351 +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
352 u64 paddr_kernel, u64 paddr_user);
354 int hcp_galpas_dtor(struct h_galpas *galpas);
355 --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_main.c
356 +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_main.c
358 #include "ehca_tools.h"
361 -#define HCAD_VERSION "0026"
362 +#define HCAD_VERSION "0027"
364 MODULE_LICENSE("Dual BSD/GPL");
365 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");