--- /dev/null
+From: Stefan Roscher <stefan.roscher@de.ibm.com>
+Subject: Re: [PATCH 1/3] IB/ehca: Replace vmalloc with kmalloc
+Date Wed, 22 Apr 2009 16:02:28 +0200
+References: bnc#491430
+Patch-mainline: 2.6.31
+
+In case of large queue pairs there is the possibillity of allocation failures
+due to memory fragmentation with kmalloc(). To ensure the memory is allocated
+even if kmalloc() can not find chunks which are big enough, we try to allocate
+the memory with vmalloc().
+
+Because kmalloc() is faster than vmalloc() causing a huge performance win
+when someone allocates a large number of queue pairs. We fall back to
+vmalloc() only if kmalloc() can't deliver the memory chunk.
+
+Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
+Acked-by: <mfrueh@suse.de>
+
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ipz_pt_fn.c
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+@@ -220,10 +220,13 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
+ queue->small_page = NULL;
+
+ /* allocate queue page pointers */
+- queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
++ queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
+ if (!queue->queue_pages) {
+- ehca_gen_err("Couldn't allocate queue page list");
+- return 0;
++ queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
++ if (!queue->queue_pages) {
++ ehca_gen_err("Couldn't allocate queue page list");
++ return 0;
++ }
+ }
+ memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+
+@@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, s
+ ipz_queue_ctor_exit0:
+ ehca_gen_err("Couldn't alloc pages queue=%p "
+ "nr_of_pages=%x", queue, nr_of_pages);
+- vfree(queue->queue_pages);
++ if (is_vmalloc_addr(queue->queue_pages))
++ vfree(queue->queue_pages);
++ else
++ kfree(queue->queue_pages);
+
+ return 0;
+ }
+@@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, s
+ free_page((unsigned long)queue->queue_pages[i]);
+ }
+
+- vfree(queue->queue_pages);
++ if (is_vmalloc_addr(queue->queue_pages))
++ vfree(queue->queue_pages);
++ else
++ kfree(queue->queue_pages);
+
+ return 1;
+ }
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_qp.c
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -457,7 +457,7 @@ static struct ehca_qp *internal_create_q
+ ib_device);
+ struct ib_ucontext *context = NULL;
+ u64 h_ret;
+- int is_llqp = 0, has_srq = 0;
++ int is_llqp = 0, has_srq = 0, is_user = 0;
+ int qp_type, max_send_sge, max_recv_sge, ret;
+
+ /* h_call's out parameters */
+@@ -599,9 +599,6 @@ static struct ehca_qp *internal_create_q
+ }
+ }
+
+- if (pd->uobject && udata)
+- context = pd->uobject->context;
+-
+ my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
+ if (!my_qp) {
+ ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+@@ -609,6 +606,11 @@ static struct ehca_qp *internal_create_q
+ return ERR_PTR(-ENOMEM);
+ }
+
++ if (pd->uobject && udata) {
++ is_user = 1;
++ context = pd->uobject->context;
++ }
++
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
+ spin_lock_init(&my_qp->spinlock_s);
+@@ -697,7 +699,7 @@ static struct ehca_qp *internal_create_q
+ (parms.squeue.is_small || parms.rqueue.is_small);
+ }
+
+- h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
++ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
+ h_ret);
+@@ -759,18 +761,20 @@ static struct ehca_qp *internal_create_q
+ goto create_qp_exit2;
+ }
+
+- my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+- my_qp->ipz_squeue.qe_size;
+- my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+- sizeof(struct ehca_qmap_entry));
+- if (!my_qp->sq_map.map) {
+- ehca_err(pd->device, "Couldn't allocate squeue "
+- "map ret=%i", ret);
+- goto create_qp_exit3;
+- }
+- INIT_LIST_HEAD(&my_qp->sq_err_node);
+- /* to avoid the generation of bogus flush CQEs */
+- reset_queue_map(&my_qp->sq_map);
++ if (!is_user) {
++ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
++ my_qp->ipz_squeue.qe_size;
++ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
++ sizeof(struct ehca_qmap_entry));
++ if (!my_qp->sq_map.map) {
++ ehca_err(pd->device, "Couldn't allocate squeue "
++ "map ret=%i", ret);
++ goto create_qp_exit3;
++ }
++ INIT_LIST_HEAD(&my_qp->sq_err_node);
++ /* to avoid the generation of bogus flush CQEs */
++ reset_queue_map(&my_qp->sq_map);
++ }
+ }
+
+ if (HAS_RQ(my_qp)) {
+@@ -782,20 +786,21 @@ static struct ehca_qp *internal_create_q
+ "and pages ret=%i", ret);
+ goto create_qp_exit4;
+ }
+-
+- my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+- my_qp->ipz_rqueue.qe_size;
+- my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+- sizeof(struct ehca_qmap_entry));
+- if (!my_qp->rq_map.map) {
+- ehca_err(pd->device, "Couldn't allocate squeue "
+- "map ret=%i", ret);
+- goto create_qp_exit5;
++ if (!is_user) {
++ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
++ my_qp->ipz_rqueue.qe_size;
++ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
++ sizeof(struct ehca_qmap_entry));
++ if (!my_qp->rq_map.map) {
++ ehca_err(pd->device, "Couldn't allocate squeue "
++ "map ret=%i", ret);
++ goto create_qp_exit5;
++ }
++ INIT_LIST_HEAD(&my_qp->rq_err_node);
++ /* to avoid the generation of bogus flush CQEs */
++ reset_queue_map(&my_qp->rq_map);
+ }
+- INIT_LIST_HEAD(&my_qp->rq_err_node);
+- /* to avoid the generation of bogus flush CQEs */
+- reset_queue_map(&my_qp->rq_map);
+- } else if (init_attr->srq) {
++ } else if (init_attr->srq && !is_user) {
+ /* this is a base QP, use the queue map of the SRQ */
+ my_qp->rq_map = my_srq->rq_map;
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+@@ -908,7 +913,7 @@ create_qp_exit7:
+ kfree(my_qp->mod_qp_parm);
+
+ create_qp_exit6:
+- if (HAS_RQ(my_qp))
++ if (HAS_RQ(my_qp) && !is_user)
+ vfree(my_qp->rq_map.map);
+
+ create_qp_exit5:
+@@ -916,7 +921,7 @@ create_qp_exit5:
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+
+ create_qp_exit4:
+- if (HAS_SQ(my_qp))
++ if (HAS_SQ(my_qp) && !is_user)
+ vfree(my_qp->sq_map.map);
+
+ create_qp_exit3:
+@@ -1224,6 +1229,7 @@ static int internal_modify_qp(struct ib_
+ u64 update_mask;
+ u64 h_ret;
+ int bad_wqe_cnt = 0;
++ int is_user = 0;
+ int squeue_locked = 0;
+ unsigned long flags = 0;
+
+@@ -1246,6 +1252,8 @@ static int internal_modify_qp(struct ib_
+ ret = ehca2ib_return_code(h_ret);
+ goto modify_qp_exit1;
+ }
++ if (ibqp->uobject)
++ is_user = 1;
+
+ qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+@@ -1708,7 +1716,8 @@ static int internal_modify_qp(struct ib_
+ goto modify_qp_exit2;
+ }
+ }
+- if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
++ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
++ && !is_user) {
+ ret = check_for_left_cqes(my_qp, shca);
+ if (ret)
+ goto modify_qp_exit2;
+@@ -1718,16 +1727,17 @@ static int internal_modify_qp(struct ib_
+ ipz_qeit_reset(&my_qp->ipz_rqueue);
+ ipz_qeit_reset(&my_qp->ipz_squeue);
+
+- if (qp_cur_state == IB_QPS_ERR) {
++ if (qp_cur_state == IB_QPS_ERR && !is_user) {
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ if (HAS_RQ(my_qp))
+ del_from_err_list(my_qp->recv_cq,
+ &my_qp->rq_err_node);
+ }
+- reset_queue_map(&my_qp->sq_map);
++ if (!is_user)
++ reset_queue_map(&my_qp->sq_map);
+
+- if (HAS_RQ(my_qp))
++ if (HAS_RQ(my_qp) && !is_user)
+ reset_queue_map(&my_qp->rq_map);
+ }
+
+@@ -2118,10 +2128,12 @@ static int internal_destroy_qp(struct ib
+ int ret;
+ u64 h_ret;
+ u8 port_num;
++ int is_user = 0;
+ enum ib_qp_type qp_type;
+ unsigned long flags;
+
+ if (uobject) {
++ is_user = 1;
+ if (my_qp->mm_count_galpa ||
+ my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
+ ehca_err(dev, "Resources still referenced in "
+@@ -2148,10 +2160,10 @@ static int internal_destroy_qp(struct ib
+ * SRQs will never get into an error list and do not have a recv_cq,
+ * so we need to skip them here.
+ */
+- if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
++ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+- if (HAS_SQ(my_qp))
++ if (HAS_SQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ /* now wait until all pending events have completed */
+@@ -2189,13 +2201,13 @@ static int internal_destroy_qp(struct ib
+
+ if (HAS_RQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+-
+- vfree(my_qp->rq_map.map);
++ if (!is_user)
++ vfree(my_qp->rq_map.map);
+ }
+ if (HAS_SQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
+-
+- vfree(my_qp->sq_map.map);
++ if (!is_user)
++ vfree(my_qp->sq_map.map);
+ }
+ kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.c
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.c
+@@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struc
+ param->act_pages = (u32)outs[4];
+
+ if (ret == H_SUCCESS)
+- hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
++ hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resources. ret=%li", ret);
+@@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struc
+ }
+
+ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+- struct ehca_alloc_qp_parms *parms)
++ struct ehca_alloc_qp_parms *parms, int is_user)
+ {
+ u64 ret;
+ u64 allocate_controls, max_r10_reg, r11, r12;
+@@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struc
+ (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+ if (ret == H_SUCCESS)
+- hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
++ hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resources. ret=%li", ret);
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_if.h
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_if.h
+@@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struc
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+- struct ehca_alloc_qp_parms *parms);
++ struct ehca_alloc_qp_parms *parms, int is_user);
+
+ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id,
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.c
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.c
+@@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr)
+ return 0;
+ }
+
+-int hcp_galpas_ctor(struct h_galpas *galpas,
++int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
+ u64 paddr_kernel, u64 paddr_user)
+ {
+- int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+- if (ret)
+- return ret;
++ if (!is_user) {
++ int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
++ if (ret)
++ return ret;
++ } else
++ galpas->kernel.fw_handle = NULL;
+
+ galpas->user.fw_handle = paddr_user;
+
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/hcp_phyp.h
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/hcp_phyp.h
+@@ -78,7 +78,7 @@ static inline void hipz_galpa_store(stru
+ *(volatile u64 __force *)addr = value;
+ }
+
+-int hcp_galpas_ctor(struct h_galpas *galpas,
++int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
+ u64 paddr_kernel, u64 paddr_user);
+
+ int hcp_galpas_dtor(struct h_galpas *galpas);
+--- linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa.orig/drivers/infiniband/hw/ehca/ehca_main.c
++++ linux-2.6.27.21-SLE11_BRANCH_20090427084335_da0f63fa/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -52,7 +52,7 @@
+ #include "ehca_tools.h"
+ #include "hcp_if.h"
+
+-#define HCAD_VERSION "0026"
++#define HCAD_VERSION "0027"
+
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");