From: Stefan Roscher Subject: Re: [PATCH 1/3] IB/ehca: Replace vmalloc with kmalloc Date Wed, 22 Apr 2009 16:02:28 +0200 References: bnc#491430 Patch-mainline: 2.6.31 In case of large queue pairs there is the possibillity of allocation failures due to memory fragmentation with kmalloc(). To ensure the memory is allocated even if kmalloc() can not find chunks which are big enough, we try to allocate the memory with vmalloc(). Because kmalloc() is faster than vmalloc() causing a huge performance win when someone allocates a large number of queue pairs. We fall back to vmalloc() only if kmalloc() can't deliver the memory chunk. Signed-off-by: Stefan Roscher Acked-by: --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -220,10 +220,13 @@ int ipz_queue_ctor(struct ehca_pd *pd, s queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); @@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, s ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, s free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_qp.c +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_qp.c @@ -457,7 +457,7 @@ static struct ehca_qp *internal_create_q ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -599,9 +599,6 @@ static struct ehca_qp *internal_create_q } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); @@ -609,6 +606,11 @@ static struct ehca_qp *internal_create_q return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + atomic_set(&my_qp->nr_events, 0); init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); @@ -697,7 +699,7 @@ static struct ehca_qp *internal_create_q (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li", h_ret); @@ -759,18 +761,20 @@ static struct ehca_qp *internal_create_q goto create_qp_exit2; } - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; - } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } } if (HAS_RQ(my_qp)) { @@ -782,20 +786,21 @@ static struct ehca_qp *internal_create_q "and pages ret=%i", ret); goto create_qp_exit4; } - - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } else if (init_attr->srq) { + } else if (init_attr->srq && !is_user) { /* this is a base QP, use the queue map of the SRQ */ my_qp->rq_map = my_srq->rq_map; INIT_LIST_HEAD(&my_qp->rq_err_node); @@ -908,7 +913,7 @@ create_qp_exit7: kfree(my_qp->mod_qp_parm); create_qp_exit6: - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) vfree(my_qp->rq_map.map); create_qp_exit5: @@ -916,7 +921,7 @@ create_qp_exit5: ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) vfree(my_qp->sq_map.map); create_qp_exit3: @@ -1224,6 +1229,7 @@ static int internal_modify_qp(struct ib_ u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; @@ -1246,6 +1252,8 @@ static int internal_modify_qp(struct ib_ ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1708,7 +1716,8 @@ static int internal_modify_qp(struct ib_ goto modify_qp_exit2; } } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { ret = check_for_left_cqes(my_qp, shca); if (ret) goto modify_qp_exit2; @@ -1718,16 +1727,17 @@ static int internal_modify_qp(struct ib_ ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); - if (qp_cur_state == IB_QPS_ERR) { + if (qp_cur_state == IB_QPS_ERR && !is_user) { del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); if (HAS_RQ(my_qp)) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); } - reset_queue_map(&my_qp->sq_map); + if (!is_user) + reset_queue_map(&my_qp->sq_map); - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) reset_queue_map(&my_qp->rq_map); } @@ -2118,10 +2128,12 @@ static int internal_destroy_qp(struct ib int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " @@ -2148,10 +2160,10 @@ static int internal_destroy_qp(struct ib * SRQs will never get into an error list and do not have a recv_cq, * so we need to skip them here. */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); /* now wait until all pending events have completed */ @@ -2189,13 +2201,13 @@ static int internal_destroy_qp(struct ib if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - - vfree(my_qp->rq_map.map); + if (!is_user) + vfree(my_qp->rq_map.map); } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - - vfree(my_qp->sq_map.map); + if (!is_user) + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.c +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.c @@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struc param->act_pages = (u32)outs[4]; if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%li", ret); @@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struc } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; @@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struc (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%li", ret); --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.h +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struc * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.c +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + } else + galpas->kernel.fw_handle = NULL; galpas->user.fw_handle = paddr_user; --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.h +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,7 +78,7 @@ static inline void hipz_galpa_store(stru *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); --- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_main.c +++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0026" +#define HCAD_VERSION "0027" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch ");