2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/platform_device.h>
35 #include <linux/vmalloc.h>
36 #include <rdma/ib_umem.h>
37 #include "hns_roce_device.h"
38 #include "hns_roce_cmd.h"
39 #include "hns_roce_hem.h"
41 static u32
hw_index_to_key(unsigned long ind
)
43 return (u32
)(ind
>> 24) | (ind
<< 8);
46 unsigned long key_to_hw_index(u32 key
)
48 return (key
<< 24) | (key
>> 8);
50 EXPORT_SYMBOL_GPL(key_to_hw_index
);
52 static int hns_roce_sw2hw_mpt(struct hns_roce_dev
*hr_dev
,
53 struct hns_roce_cmd_mailbox
*mailbox
,
54 unsigned long mpt_index
)
56 return hns_roce_cmd_mbox(hr_dev
, mailbox
->dma
, 0, mpt_index
, 0,
57 HNS_ROCE_CMD_SW2HW_MPT
,
58 HNS_ROCE_CMD_TIMEOUT_MSECS
);
61 int hns_roce_hw2sw_mpt(struct hns_roce_dev
*hr_dev
,
62 struct hns_roce_cmd_mailbox
*mailbox
,
63 unsigned long mpt_index
)
65 return hns_roce_cmd_mbox(hr_dev
, 0, mailbox
? mailbox
->dma
: 0,
66 mpt_index
, !mailbox
, HNS_ROCE_CMD_HW2SW_MPT
,
67 HNS_ROCE_CMD_TIMEOUT_MSECS
);
69 EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt
);
71 static int hns_roce_buddy_alloc(struct hns_roce_buddy
*buddy
, int order
,
77 spin_lock(&buddy
->lock
);
79 for (o
= order
; o
<= buddy
->max_order
; ++o
) {
80 if (buddy
->num_free
[o
]) {
81 m
= 1 << (buddy
->max_order
- o
);
82 *seg
= find_first_bit(buddy
->bits
[o
], m
);
87 spin_unlock(&buddy
->lock
);
91 clear_bit(*seg
, buddy
->bits
[o
]);
97 set_bit(*seg
^ 1, buddy
->bits
[o
]);
101 spin_unlock(&buddy
->lock
);
107 static void hns_roce_buddy_free(struct hns_roce_buddy
*buddy
, unsigned long seg
,
112 spin_lock(&buddy
->lock
);
114 while (test_bit(seg
^ 1, buddy
->bits
[order
])) {
115 clear_bit(seg
^ 1, buddy
->bits
[order
]);
116 --buddy
->num_free
[order
];
121 set_bit(seg
, buddy
->bits
[order
]);
122 ++buddy
->num_free
[order
];
124 spin_unlock(&buddy
->lock
);
127 static int hns_roce_buddy_init(struct hns_roce_buddy
*buddy
, int max_order
)
131 buddy
->max_order
= max_order
;
132 spin_lock_init(&buddy
->lock
);
133 buddy
->bits
= kcalloc(buddy
->max_order
+ 1,
134 sizeof(*buddy
->bits
),
136 buddy
->num_free
= kcalloc(buddy
->max_order
+ 1,
137 sizeof(*buddy
->num_free
),
139 if (!buddy
->bits
|| !buddy
->num_free
)
142 for (i
= 0; i
<= buddy
->max_order
; ++i
) {
143 s
= BITS_TO_LONGS(1 << (buddy
->max_order
- i
));
144 buddy
->bits
[i
] = kcalloc(s
, sizeof(long), GFP_KERNEL
|
146 if (!buddy
->bits
[i
]) {
147 buddy
->bits
[i
] = vzalloc(array_size(s
, sizeof(long)));
153 set_bit(0, buddy
->bits
[buddy
->max_order
]);
154 buddy
->num_free
[buddy
->max_order
] = 1;
159 for (i
= 0; i
<= buddy
->max_order
; ++i
)
160 kvfree(buddy
->bits
[i
]);
164 kfree(buddy
->num_free
);
168 static void hns_roce_buddy_cleanup(struct hns_roce_buddy
*buddy
)
172 for (i
= 0; i
<= buddy
->max_order
; ++i
)
173 kvfree(buddy
->bits
[i
]);
176 kfree(buddy
->num_free
);
179 static int hns_roce_alloc_mtt_range(struct hns_roce_dev
*hr_dev
, int order
,
180 unsigned long *seg
, u32 mtt_type
)
182 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
183 struct hns_roce_hem_table
*table
;
184 struct hns_roce_buddy
*buddy
;
189 buddy
= &mr_table
->mtt_buddy
;
190 table
= &mr_table
->mtt_table
;
193 buddy
= &mr_table
->mtt_cqe_buddy
;
194 table
= &mr_table
->mtt_cqe_table
;
196 case MTT_TYPE_SRQWQE
:
197 buddy
= &mr_table
->mtt_srqwqe_buddy
;
198 table
= &mr_table
->mtt_srqwqe_table
;
201 buddy
= &mr_table
->mtt_idx_buddy
;
202 table
= &mr_table
->mtt_idx_table
;
205 dev_err(hr_dev
->dev
, "Unsupport MTT table type: %d\n",
210 ret
= hns_roce_buddy_alloc(buddy
, order
, seg
);
214 if (hns_roce_table_get_range(hr_dev
, table
, *seg
,
215 *seg
+ (1 << order
) - 1)) {
216 hns_roce_buddy_free(buddy
, *seg
, order
);
223 int hns_roce_mtt_init(struct hns_roce_dev
*hr_dev
, int npages
, int page_shift
,
224 struct hns_roce_mtt
*mtt
)
229 /* Page num is zero, correspond to DMA memory register */
232 mtt
->page_shift
= HNS_ROCE_HEM_PAGE_SHIFT
;
236 /* Note: if page_shift is zero, FAST memory register */
237 mtt
->page_shift
= page_shift
;
239 /* Compute MTT entry necessary */
240 for (mtt
->order
= 0, i
= HNS_ROCE_MTT_ENTRY_PER_SEG
; i
< npages
;
244 /* Allocate MTT entry */
245 ret
= hns_roce_alloc_mtt_range(hr_dev
, mtt
->order
, &mtt
->first_seg
,
253 void hns_roce_mtt_cleanup(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtt
*mtt
)
255 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
260 switch (mtt
->mtt_type
) {
262 hns_roce_buddy_free(&mr_table
->mtt_buddy
, mtt
->first_seg
,
264 hns_roce_table_put_range(hr_dev
, &mr_table
->mtt_table
,
266 mtt
->first_seg
+ (1 << mtt
->order
) - 1);
269 hns_roce_buddy_free(&mr_table
->mtt_cqe_buddy
, mtt
->first_seg
,
271 hns_roce_table_put_range(hr_dev
, &mr_table
->mtt_cqe_table
,
273 mtt
->first_seg
+ (1 << mtt
->order
) - 1);
275 case MTT_TYPE_SRQWQE
:
276 hns_roce_buddy_free(&mr_table
->mtt_srqwqe_buddy
, mtt
->first_seg
,
278 hns_roce_table_put_range(hr_dev
, &mr_table
->mtt_srqwqe_table
,
280 mtt
->first_seg
+ (1 << mtt
->order
) - 1);
283 hns_roce_buddy_free(&mr_table
->mtt_idx_buddy
, mtt
->first_seg
,
285 hns_roce_table_put_range(hr_dev
, &mr_table
->mtt_idx_table
,
287 mtt
->first_seg
+ (1 << mtt
->order
) - 1);
291 "Unsupport mtt type %d, clean mtt failed\n",
296 EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup
);
298 static void hns_roce_loop_free(struct hns_roce_dev
*hr_dev
,
299 struct hns_roce_mr
*mr
, int err_loop_index
,
300 int loop_i
, int loop_j
)
302 struct device
*dev
= hr_dev
->dev
;
308 pbl_bt_sz
= 1 << (hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
);
309 mhop_num
= hr_dev
->caps
.pbl_hop_num
;
312 if (mhop_num
== 3 && err_loop_index
== 2) {
313 for (; i
>= 0; i
--) {
314 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l1
[i
],
315 mr
->pbl_l1_dma_addr
[i
]);
317 for (j
= 0; j
< pbl_bt_sz
/ 8; j
++) {
318 if (i
== loop_i
&& j
>= loop_j
)
321 bt_idx
= i
* pbl_bt_sz
/ 8 + j
;
322 dma_free_coherent(dev
, pbl_bt_sz
,
323 mr
->pbl_bt_l2
[bt_idx
],
324 mr
->pbl_l2_dma_addr
[bt_idx
]);
327 } else if (mhop_num
== 3 && err_loop_index
== 1) {
328 for (i
-= 1; i
>= 0; i
--) {
329 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l1
[i
],
330 mr
->pbl_l1_dma_addr
[i
]);
332 for (j
= 0; j
< pbl_bt_sz
/ 8; j
++) {
333 bt_idx
= i
* pbl_bt_sz
/ 8 + j
;
334 dma_free_coherent(dev
, pbl_bt_sz
,
335 mr
->pbl_bt_l2
[bt_idx
],
336 mr
->pbl_l2_dma_addr
[bt_idx
]);
339 } else if (mhop_num
== 2 && err_loop_index
== 1) {
340 for (i
-= 1; i
>= 0; i
--)
341 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l1
[i
],
342 mr
->pbl_l1_dma_addr
[i
]);
344 dev_warn(dev
, "not support: mhop_num=%d, err_loop_index=%d.",
345 mhop_num
, err_loop_index
);
349 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l0
, mr
->pbl_l0_dma_addr
);
350 mr
->pbl_bt_l0
= NULL
;
351 mr
->pbl_l0_dma_addr
= 0;
354 /* PBL multi hop addressing */
355 static int hns_roce_mhop_alloc(struct hns_roce_dev
*hr_dev
, int npages
,
356 struct hns_roce_mr
*mr
)
358 struct device
*dev
= hr_dev
->dev
;
359 int mr_alloc_done
= 0;
360 int npages_allocated
;
369 mhop_num
= (mr
->type
== MR_TYPE_FRMR
? 1 : hr_dev
->caps
.pbl_hop_num
);
370 pbl_bt_sz
= 1 << (hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
);
371 pbl_last_bt_num
= (npages
+ pbl_bt_sz
/ 8 - 1) / (pbl_bt_sz
/ 8);
373 if (mhop_num
== HNS_ROCE_HOP_NUM_0
)
378 if (npages
> pbl_bt_sz
/ 8) {
379 dev_err(dev
, "npages %d is larger than buf_pg_sz!",
383 mr
->pbl_buf
= dma_alloc_coherent(dev
, npages
* 8,
389 mr
->pbl_size
= npages
;
390 mr
->pbl_ba
= mr
->pbl_dma_addr
;
391 mr
->pbl_hop_num
= mhop_num
;
392 mr
->pbl_ba_pg_sz
= hr_dev
->caps
.pbl_ba_pg_sz
;
393 mr
->pbl_buf_pg_sz
= hr_dev
->caps
.pbl_buf_pg_sz
;
397 mr
->pbl_l1_dma_addr
= kcalloc(pbl_bt_sz
/ 8,
398 sizeof(*mr
->pbl_l1_dma_addr
),
400 if (!mr
->pbl_l1_dma_addr
)
403 mr
->pbl_bt_l1
= kcalloc(pbl_bt_sz
/ 8, sizeof(*mr
->pbl_bt_l1
),
406 goto err_kcalloc_bt_l1
;
409 mr
->pbl_l2_dma_addr
= kcalloc(pbl_last_bt_num
,
410 sizeof(*mr
->pbl_l2_dma_addr
),
412 if (!mr
->pbl_l2_dma_addr
)
413 goto err_kcalloc_l2_dma
;
415 mr
->pbl_bt_l2
= kcalloc(pbl_last_bt_num
,
416 sizeof(*mr
->pbl_bt_l2
),
419 goto err_kcalloc_bt_l2
;
423 mr
->pbl_bt_l0
= dma_alloc_coherent(dev
, pbl_bt_sz
,
424 &(mr
->pbl_l0_dma_addr
),
427 goto err_dma_alloc_l0
;
431 for (i
= 0; i
< pbl_bt_sz
/ 8; i
++) {
432 if (pbl_bt_cnt
+ 1 < pbl_last_bt_num
) {
435 npages_allocated
= i
* (pbl_bt_sz
/ 8);
436 size
= (npages
- npages_allocated
) * 8;
438 mr
->pbl_bt_l1
[i
] = dma_alloc_coherent(dev
, size
,
439 &(mr
->pbl_l1_dma_addr
[i
]),
441 if (!mr
->pbl_bt_l1
[i
]) {
442 hns_roce_loop_free(hr_dev
, mr
, 1, i
, 0);
443 goto err_dma_alloc_l0
;
446 *(mr
->pbl_bt_l0
+ i
) = mr
->pbl_l1_dma_addr
[i
];
449 if (pbl_bt_cnt
>= pbl_last_bt_num
)
452 } else if (mhop_num
== 3) {
453 /* alloc L1, L2 BT */
454 for (i
= 0; i
< pbl_bt_sz
/ 8; i
++) {
455 mr
->pbl_bt_l1
[i
] = dma_alloc_coherent(dev
, pbl_bt_sz
,
456 &(mr
->pbl_l1_dma_addr
[i
]),
458 if (!mr
->pbl_bt_l1
[i
]) {
459 hns_roce_loop_free(hr_dev
, mr
, 1, i
, 0);
460 goto err_dma_alloc_l0
;
463 *(mr
->pbl_bt_l0
+ i
) = mr
->pbl_l1_dma_addr
[i
];
465 for (j
= 0; j
< pbl_bt_sz
/ 8; j
++) {
466 bt_idx
= i
* pbl_bt_sz
/ 8 + j
;
468 if (pbl_bt_cnt
+ 1 < pbl_last_bt_num
) {
471 npages_allocated
= bt_idx
*
473 size
= (npages
- npages_allocated
) * 8;
475 mr
->pbl_bt_l2
[bt_idx
] = dma_alloc_coherent(
477 &(mr
->pbl_l2_dma_addr
[bt_idx
]),
479 if (!mr
->pbl_bt_l2
[bt_idx
]) {
480 hns_roce_loop_free(hr_dev
, mr
, 2, i
, j
);
481 goto err_dma_alloc_l0
;
484 *(mr
->pbl_bt_l1
[i
] + j
) =
485 mr
->pbl_l2_dma_addr
[bt_idx
];
488 if (pbl_bt_cnt
>= pbl_last_bt_num
) {
499 mr
->l0_chunk_last_num
= i
+ 1;
501 mr
->l1_chunk_last_num
= j
+ 1;
503 mr
->pbl_size
= npages
;
504 mr
->pbl_ba
= mr
->pbl_l0_dma_addr
;
505 mr
->pbl_hop_num
= hr_dev
->caps
.pbl_hop_num
;
506 mr
->pbl_ba_pg_sz
= hr_dev
->caps
.pbl_ba_pg_sz
;
507 mr
->pbl_buf_pg_sz
= hr_dev
->caps
.pbl_buf_pg_sz
;
512 kfree(mr
->pbl_bt_l2
);
513 mr
->pbl_bt_l2
= NULL
;
516 kfree(mr
->pbl_l2_dma_addr
);
517 mr
->pbl_l2_dma_addr
= NULL
;
520 kfree(mr
->pbl_bt_l1
);
521 mr
->pbl_bt_l1
= NULL
;
524 kfree(mr
->pbl_l1_dma_addr
);
525 mr
->pbl_l1_dma_addr
= NULL
;
530 static int hns_roce_mr_alloc(struct hns_roce_dev
*hr_dev
, u32 pd
, u64 iova
,
531 u64 size
, u32 access
, int npages
,
532 struct hns_roce_mr
*mr
)
534 struct device
*dev
= hr_dev
->dev
;
535 unsigned long index
= 0;
538 /* Allocate a key for mr from mr_table */
539 ret
= hns_roce_bitmap_alloc(&hr_dev
->mr_table
.mtpt_bitmap
, &index
);
543 mr
->iova
= iova
; /* MR va starting addr */
544 mr
->size
= size
; /* MR addr range */
545 mr
->pd
= pd
; /* MR num */
546 mr
->access
= access
; /* MR access permit */
547 mr
->enabled
= 0; /* MR active status */
548 mr
->key
= hw_index_to_key(index
); /* MR key */
552 mr
->pbl_dma_addr
= 0;
553 /* PBL multi-hop addressing parameters */
554 mr
->pbl_bt_l2
= NULL
;
555 mr
->pbl_bt_l1
= NULL
;
556 mr
->pbl_bt_l0
= NULL
;
557 mr
->pbl_l2_dma_addr
= NULL
;
558 mr
->pbl_l1_dma_addr
= NULL
;
559 mr
->pbl_l0_dma_addr
= 0;
561 if (!hr_dev
->caps
.pbl_hop_num
) {
562 mr
->pbl_buf
= dma_alloc_coherent(dev
, npages
* 8,
568 ret
= hns_roce_mhop_alloc(hr_dev
, npages
, mr
);
575 static void hns_roce_mhop_free(struct hns_roce_dev
*hr_dev
,
576 struct hns_roce_mr
*mr
)
578 struct device
*dev
= hr_dev
->dev
;
579 int npages_allocated
;
586 npages
= mr
->pbl_size
;
587 pbl_bt_sz
= 1 << (hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
);
588 mhop_num
= (mr
->type
== MR_TYPE_FRMR
) ? 1 : hr_dev
->caps
.pbl_hop_num
;
590 if (mhop_num
== HNS_ROCE_HOP_NUM_0
)
595 dma_free_coherent(dev
, (unsigned int)(npages
* 8),
596 mr
->pbl_buf
, mr
->pbl_dma_addr
);
600 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l0
,
601 mr
->pbl_l0_dma_addr
);
604 for (i
= 0; i
< mr
->l0_chunk_last_num
; i
++) {
605 if (i
== mr
->l0_chunk_last_num
- 1) {
606 npages_allocated
= i
* (pbl_bt_sz
/ 8);
608 dma_free_coherent(dev
,
609 (npages
- npages_allocated
) * 8,
611 mr
->pbl_l1_dma_addr
[i
]);
616 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l1
[i
],
617 mr
->pbl_l1_dma_addr
[i
]);
619 } else if (mhop_num
== 3) {
620 for (i
= 0; i
< mr
->l0_chunk_last_num
; i
++) {
621 dma_free_coherent(dev
, pbl_bt_sz
, mr
->pbl_bt_l1
[i
],
622 mr
->pbl_l1_dma_addr
[i
]);
624 for (j
= 0; j
< pbl_bt_sz
/ 8; j
++) {
625 bt_idx
= i
* (pbl_bt_sz
/ 8) + j
;
627 if ((i
== mr
->l0_chunk_last_num
- 1)
628 && j
== mr
->l1_chunk_last_num
- 1) {
629 npages_allocated
= bt_idx
*
632 dma_free_coherent(dev
,
633 (npages
- npages_allocated
) * 8,
634 mr
->pbl_bt_l2
[bt_idx
],
635 mr
->pbl_l2_dma_addr
[bt_idx
]);
640 dma_free_coherent(dev
, pbl_bt_sz
,
641 mr
->pbl_bt_l2
[bt_idx
],
642 mr
->pbl_l2_dma_addr
[bt_idx
]);
647 kfree(mr
->pbl_bt_l1
);
648 kfree(mr
->pbl_l1_dma_addr
);
649 mr
->pbl_bt_l1
= NULL
;
650 mr
->pbl_l1_dma_addr
= NULL
;
652 kfree(mr
->pbl_bt_l2
);
653 kfree(mr
->pbl_l2_dma_addr
);
654 mr
->pbl_bt_l2
= NULL
;
655 mr
->pbl_l2_dma_addr
= NULL
;
659 static void hns_roce_mr_free(struct hns_roce_dev
*hr_dev
,
660 struct hns_roce_mr
*mr
)
662 struct device
*dev
= hr_dev
->dev
;
667 ret
= hns_roce_hw2sw_mpt(hr_dev
, NULL
, key_to_hw_index(mr
->key
)
668 & (hr_dev
->caps
.num_mtpts
- 1));
670 dev_warn(dev
, "HW2SW_MPT failed (%d)\n", ret
);
673 if (mr
->size
!= ~0ULL) {
674 if (mr
->type
== MR_TYPE_MR
)
675 npages
= ib_umem_page_count(mr
->umem
);
677 if (!hr_dev
->caps
.pbl_hop_num
)
678 dma_free_coherent(dev
, (unsigned int)(npages
* 8),
679 mr
->pbl_buf
, mr
->pbl_dma_addr
);
681 hns_roce_mhop_free(hr_dev
, mr
);
685 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
686 key_to_hw_index(mr
->key
));
688 hns_roce_bitmap_free(&hr_dev
->mr_table
.mtpt_bitmap
,
689 key_to_hw_index(mr
->key
), BITMAP_NO_RR
);
692 static int hns_roce_mr_enable(struct hns_roce_dev
*hr_dev
,
693 struct hns_roce_mr
*mr
)
696 unsigned long mtpt_idx
= key_to_hw_index(mr
->key
);
697 struct device
*dev
= hr_dev
->dev
;
698 struct hns_roce_cmd_mailbox
*mailbox
;
699 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
701 /* Prepare HEM entry memory */
702 ret
= hns_roce_table_get(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
706 /* Allocate mailbox memory */
707 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
708 if (IS_ERR(mailbox
)) {
709 ret
= PTR_ERR(mailbox
);
713 if (mr
->type
!= MR_TYPE_FRMR
)
714 ret
= hr_dev
->hw
->write_mtpt(mailbox
->buf
, mr
, mtpt_idx
);
716 ret
= hr_dev
->hw
->frmr_write_mtpt(mailbox
->buf
, mr
);
718 dev_err(dev
, "Write mtpt fail!\n");
722 ret
= hns_roce_sw2hw_mpt(hr_dev
, mailbox
,
723 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
725 dev_err(dev
, "SW2HW_MPT failed (%d)\n", ret
);
730 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
735 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
738 hns_roce_table_put(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
742 static int hns_roce_write_mtt_chunk(struct hns_roce_dev
*hr_dev
,
743 struct hns_roce_mtt
*mtt
, u32 start_index
,
744 u32 npages
, u64
*page_list
)
746 struct hns_roce_hem_table
*table
;
747 dma_addr_t dma_handle
;
749 u32 s
= start_index
* sizeof(u64
);
753 switch (mtt
->mtt_type
) {
755 table
= &hr_dev
->mr_table
.mtt_table
;
756 bt_page_size
= 1 << (hr_dev
->caps
.mtt_ba_pg_sz
+ PAGE_SHIFT
);
759 table
= &hr_dev
->mr_table
.mtt_cqe_table
;
760 bt_page_size
= 1 << (hr_dev
->caps
.cqe_ba_pg_sz
+ PAGE_SHIFT
);
762 case MTT_TYPE_SRQWQE
:
763 table
= &hr_dev
->mr_table
.mtt_srqwqe_table
;
764 bt_page_size
= 1 << (hr_dev
->caps
.srqwqe_ba_pg_sz
+ PAGE_SHIFT
);
767 table
= &hr_dev
->mr_table
.mtt_idx_table
;
768 bt_page_size
= 1 << (hr_dev
->caps
.idx_ba_pg_sz
+ PAGE_SHIFT
);
774 /* All MTTs must fit in the same page */
775 if (start_index
/ (bt_page_size
/ sizeof(u64
)) !=
776 (start_index
+ npages
- 1) / (bt_page_size
/ sizeof(u64
)))
779 if (start_index
& (HNS_ROCE_MTT_ENTRY_PER_SEG
- 1))
782 mtts
= hns_roce_table_find(hr_dev
, table
,
783 mtt
->first_seg
+ s
/ hr_dev
->caps
.mtt_entry_sz
,
788 /* Save page addr, low 12 bits : 0 */
789 for (i
= 0; i
< npages
; ++i
) {
790 if (!hr_dev
->caps
.mtt_hop_num
)
791 mtts
[i
] = cpu_to_le64(page_list
[i
] >> PAGE_ADDR_SHIFT
);
793 mtts
[i
] = cpu_to_le64(page_list
[i
]);
799 static int hns_roce_write_mtt(struct hns_roce_dev
*hr_dev
,
800 struct hns_roce_mtt
*mtt
, u32 start_index
,
801 u32 npages
, u64
*page_list
)
810 switch (mtt
->mtt_type
) {
812 bt_page_size
= 1 << (hr_dev
->caps
.mtt_ba_pg_sz
+ PAGE_SHIFT
);
815 bt_page_size
= 1 << (hr_dev
->caps
.cqe_ba_pg_sz
+ PAGE_SHIFT
);
817 case MTT_TYPE_SRQWQE
:
818 bt_page_size
= 1 << (hr_dev
->caps
.srqwqe_ba_pg_sz
+ PAGE_SHIFT
);
821 bt_page_size
= 1 << (hr_dev
->caps
.idx_ba_pg_sz
+ PAGE_SHIFT
);
825 "Unsupport mtt type %d, write mtt failed\n",
831 chunk
= min_t(int, bt_page_size
/ sizeof(u64
), npages
);
833 ret
= hns_roce_write_mtt_chunk(hr_dev
, mtt
, start_index
, chunk
,
839 start_index
+= chunk
;
846 int hns_roce_buf_write_mtt(struct hns_roce_dev
*hr_dev
,
847 struct hns_roce_mtt
*mtt
, struct hns_roce_buf
*buf
)
853 page_list
= kmalloc_array(buf
->npages
, sizeof(*page_list
), GFP_KERNEL
);
857 for (i
= 0; i
< buf
->npages
; ++i
) {
859 page_list
[i
] = buf
->direct
.map
+ (i
<< buf
->page_shift
);
861 page_list
[i
] = buf
->page_list
[i
].map
;
864 ret
= hns_roce_write_mtt(hr_dev
, mtt
, 0, buf
->npages
, page_list
);
871 int hns_roce_init_mr_table(struct hns_roce_dev
*hr_dev
)
873 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
876 ret
= hns_roce_bitmap_init(&mr_table
->mtpt_bitmap
,
877 hr_dev
->caps
.num_mtpts
,
878 hr_dev
->caps
.num_mtpts
- 1,
879 hr_dev
->caps
.reserved_mrws
, 0);
883 ret
= hns_roce_buddy_init(&mr_table
->mtt_buddy
,
884 ilog2(hr_dev
->caps
.num_mtt_segs
));
888 if (hns_roce_check_whether_mhop(hr_dev
, HEM_TYPE_CQE
)) {
889 ret
= hns_roce_buddy_init(&mr_table
->mtt_cqe_buddy
,
890 ilog2(hr_dev
->caps
.num_cqe_segs
));
895 if (hr_dev
->caps
.num_srqwqe_segs
) {
896 ret
= hns_roce_buddy_init(&mr_table
->mtt_srqwqe_buddy
,
897 ilog2(hr_dev
->caps
.num_srqwqe_segs
));
899 goto err_buddy_srqwqe
;
902 if (hr_dev
->caps
.num_idx_segs
) {
903 ret
= hns_roce_buddy_init(&mr_table
->mtt_idx_buddy
,
904 ilog2(hr_dev
->caps
.num_idx_segs
));
912 if (hr_dev
->caps
.num_srqwqe_segs
)
913 hns_roce_buddy_cleanup(&mr_table
->mtt_srqwqe_buddy
);
916 if (hns_roce_check_whether_mhop(hr_dev
, HEM_TYPE_CQE
))
917 hns_roce_buddy_cleanup(&mr_table
->mtt_cqe_buddy
);
920 hns_roce_buddy_cleanup(&mr_table
->mtt_buddy
);
923 hns_roce_bitmap_cleanup(&mr_table
->mtpt_bitmap
);
927 void hns_roce_cleanup_mr_table(struct hns_roce_dev
*hr_dev
)
929 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
931 if (hr_dev
->caps
.num_idx_segs
)
932 hns_roce_buddy_cleanup(&mr_table
->mtt_idx_buddy
);
933 if (hr_dev
->caps
.num_srqwqe_segs
)
934 hns_roce_buddy_cleanup(&mr_table
->mtt_srqwqe_buddy
);
935 hns_roce_buddy_cleanup(&mr_table
->mtt_buddy
);
936 if (hns_roce_check_whether_mhop(hr_dev
, HEM_TYPE_CQE
))
937 hns_roce_buddy_cleanup(&mr_table
->mtt_cqe_buddy
);
938 hns_roce_bitmap_cleanup(&mr_table
->mtpt_bitmap
);
941 struct ib_mr
*hns_roce_get_dma_mr(struct ib_pd
*pd
, int acc
)
943 struct hns_roce_mr
*mr
;
946 mr
= kmalloc(sizeof(*mr
), GFP_KERNEL
);
948 return ERR_PTR(-ENOMEM
);
950 mr
->type
= MR_TYPE_DMA
;
952 /* Allocate memory region key */
953 ret
= hns_roce_mr_alloc(to_hr_dev(pd
->device
), to_hr_pd(pd
)->pdn
, 0,
958 ret
= hns_roce_mr_enable(to_hr_dev(pd
->device
), mr
);
962 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
968 hns_roce_mr_free(to_hr_dev(pd
->device
), mr
);
975 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev
*hr_dev
,
976 struct hns_roce_mtt
*mtt
, struct ib_umem
*umem
)
978 struct device
*dev
= hr_dev
->dev
;
979 struct sg_dma_page_iter sg_iter
;
989 switch (mtt
->mtt_type
) {
991 order
= hr_dev
->caps
.mtt_ba_pg_sz
;
994 order
= hr_dev
->caps
.cqe_ba_pg_sz
;
996 case MTT_TYPE_SRQWQE
:
997 order
= hr_dev
->caps
.srqwqe_ba_pg_sz
;
1000 order
= hr_dev
->caps
.idx_ba_pg_sz
;
1003 dev_err(dev
, "Unsupport mtt type %d, write mtt failed\n",
1008 bt_page_size
= 1 << (order
+ PAGE_SHIFT
);
1010 pages
= (u64
*) __get_free_pages(GFP_KERNEL
, order
);
1016 for_each_sg_dma_page(umem
->sg_head
.sgl
, &sg_iter
, umem
->nmap
, 0) {
1017 page_addr
= sg_page_iter_dma_address(&sg_iter
);
1018 if (!(npage
% (1 << (mtt
->page_shift
- PAGE_SHIFT
)))) {
1019 if (page_addr
& ((1 << mtt
->page_shift
) - 1)) {
1021 "page_addr 0x%llx is not page_shift %d alignment!\n",
1022 page_addr
, mtt
->page_shift
);
1026 pages
[i
++] = page_addr
;
1029 if (i
== bt_page_size
/ sizeof(u64
)) {
1030 ret
= hns_roce_write_mtt(hr_dev
, mtt
, n
, i
, pages
);
1039 ret
= hns_roce_write_mtt(hr_dev
, mtt
, n
, i
, pages
);
1042 free_pages((unsigned long) pages
, order
);
1046 static int hns_roce_ib_umem_write_mr(struct hns_roce_dev
*hr_dev
,
1047 struct hns_roce_mr
*mr
,
1048 struct ib_umem
*umem
)
1050 struct sg_dma_page_iter sg_iter
;
1055 if (hr_dev
->caps
.pbl_hop_num
== HNS_ROCE_HOP_NUM_0
)
1058 pbl_bt_sz
= 1 << (hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
);
1059 for_each_sg_dma_page(umem
->sg_head
.sgl
, &sg_iter
, umem
->nmap
, 0) {
1060 page_addr
= sg_page_iter_dma_address(&sg_iter
);
1061 if (!hr_dev
->caps
.pbl_hop_num
) {
1062 mr
->pbl_buf
[i
++] = page_addr
>> 12;
1063 } else if (hr_dev
->caps
.pbl_hop_num
== 1) {
1064 mr
->pbl_buf
[i
++] = page_addr
;
1066 if (hr_dev
->caps
.pbl_hop_num
== 2)
1067 mr
->pbl_bt_l1
[i
][j
] = page_addr
;
1068 else if (hr_dev
->caps
.pbl_hop_num
== 3)
1069 mr
->pbl_bt_l2
[i
][j
] = page_addr
;
1072 if (j
>= (pbl_bt_sz
/ 8)) {
1079 /* Memory barrier */
1085 struct ib_mr
*hns_roce_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
1086 u64 virt_addr
, int access_flags
,
1087 struct ib_udata
*udata
)
1089 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
1090 struct device
*dev
= hr_dev
->dev
;
1091 struct hns_roce_mr
*mr
;
1097 mr
= kmalloc(sizeof(*mr
), GFP_KERNEL
);
1099 return ERR_PTR(-ENOMEM
);
1101 mr
->umem
= ib_umem_get(udata
, start
, length
, access_flags
, 0);
1102 if (IS_ERR(mr
->umem
)) {
1103 ret
= PTR_ERR(mr
->umem
);
1107 n
= ib_umem_page_count(mr
->umem
);
1109 if (!hr_dev
->caps
.pbl_hop_num
) {
1110 if (n
> HNS_ROCE_MAX_MTPT_PBL_NUM
) {
1112 " MR len %lld err. MR is limited to 4G at most!\n",
1120 bt_size
= (1 << (hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
)) / 8;
1121 for (i
= 0; i
< hr_dev
->caps
.pbl_hop_num
; i
++)
1122 pbl_size
*= bt_size
;
1125 " MR len %lld err. MR page num is limited to %lld!\n",
1132 mr
->type
= MR_TYPE_MR
;
1134 ret
= hns_roce_mr_alloc(hr_dev
, to_hr_pd(pd
)->pdn
, virt_addr
, length
,
1135 access_flags
, n
, mr
);
1139 ret
= hns_roce_ib_umem_write_mr(hr_dev
, mr
, mr
->umem
);
1143 ret
= hns_roce_mr_enable(hr_dev
, mr
);
1147 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
1152 hns_roce_mr_free(hr_dev
, mr
);
1155 ib_umem_release(mr
->umem
);
1159 return ERR_PTR(ret
);
1162 int hns_roce_rereg_user_mr(struct ib_mr
*ibmr
, int flags
, u64 start
, u64 length
,
1163 u64 virt_addr
, int mr_access_flags
, struct ib_pd
*pd
,
1164 struct ib_udata
*udata
)
1166 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
1167 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
1168 struct hns_roce_cmd_mailbox
*mailbox
;
1169 struct device
*dev
= hr_dev
->dev
;
1170 unsigned long mtpt_idx
;
1178 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
1179 if (IS_ERR(mailbox
))
1180 return PTR_ERR(mailbox
);
1182 mtpt_idx
= key_to_hw_index(mr
->key
) & (hr_dev
->caps
.num_mtpts
- 1);
1183 ret
= hns_roce_cmd_mbox(hr_dev
, 0, mailbox
->dma
, mtpt_idx
, 0,
1184 HNS_ROCE_CMD_QUERY_MPT
,
1185 HNS_ROCE_CMD_TIMEOUT_MSECS
);
1189 ret
= hns_roce_hw2sw_mpt(hr_dev
, NULL
, mtpt_idx
);
1191 dev_warn(dev
, "HW2SW_MPT failed (%d)\n", ret
);
1195 if (flags
& IB_MR_REREG_PD
)
1196 pdn
= to_hr_pd(pd
)->pdn
;
1198 if (flags
& IB_MR_REREG_TRANS
) {
1199 if (mr
->size
!= ~0ULL) {
1200 npages
= ib_umem_page_count(mr
->umem
);
1202 if (hr_dev
->caps
.pbl_hop_num
)
1203 hns_roce_mhop_free(hr_dev
, mr
);
1205 dma_free_coherent(dev
, npages
* 8, mr
->pbl_buf
,
1208 ib_umem_release(mr
->umem
);
1211 ib_umem_get(udata
, start
, length
, mr_access_flags
, 0);
1212 if (IS_ERR(mr
->umem
)) {
1213 ret
= PTR_ERR(mr
->umem
);
1217 npages
= ib_umem_page_count(mr
->umem
);
1219 if (hr_dev
->caps
.pbl_hop_num
) {
1220 ret
= hns_roce_mhop_alloc(hr_dev
, npages
, mr
);
1224 mr
->pbl_buf
= dma_alloc_coherent(dev
, npages
* 8,
1225 &(mr
->pbl_dma_addr
),
1234 ret
= hr_dev
->hw
->rereg_write_mtpt(hr_dev
, mr
, flags
, pdn
,
1235 mr_access_flags
, virt_addr
,
1236 length
, mailbox
->buf
);
1238 if (flags
& IB_MR_REREG_TRANS
)
1244 if (flags
& IB_MR_REREG_TRANS
) {
1245 ret
= hns_roce_ib_umem_write_mr(hr_dev
, mr
, mr
->umem
);
1247 if (mr
->size
!= ~0ULL) {
1248 npages
= ib_umem_page_count(mr
->umem
);
1250 if (hr_dev
->caps
.pbl_hop_num
)
1251 hns_roce_mhop_free(hr_dev
, mr
);
1253 dma_free_coherent(dev
, npages
* 8,
1262 ret
= hns_roce_sw2hw_mpt(hr_dev
, mailbox
, mtpt_idx
);
1264 dev_err(dev
, "SW2HW_MPT failed (%d)\n", ret
);
1269 if (flags
& IB_MR_REREG_ACCESS
)
1270 mr
->access
= mr_access_flags
;
1272 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
1277 ib_umem_release(mr
->umem
);
1280 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
1285 int hns_roce_dereg_mr(struct ib_mr
*ibmr
)
1287 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
1288 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
1291 if (hr_dev
->hw
->dereg_mr
) {
1292 ret
= hr_dev
->hw
->dereg_mr(hr_dev
, mr
);
1294 hns_roce_mr_free(hr_dev
, mr
);
1297 ib_umem_release(mr
->umem
);
1305 struct ib_mr
*hns_roce_alloc_mr(struct ib_pd
*pd
, enum ib_mr_type mr_type
,
1308 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
1309 struct device
*dev
= hr_dev
->dev
;
1310 struct hns_roce_mr
*mr
;
1315 page_size
= 1 << (hr_dev
->caps
.pbl_buf_pg_sz
+ PAGE_SHIFT
);
1316 length
= max_num_sg
* page_size
;
1318 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1319 return ERR_PTR(-EINVAL
);
1321 if (max_num_sg
> HNS_ROCE_FRMR_MAX_PA
) {
1322 dev_err(dev
, "max_num_sg larger than %d\n",
1323 HNS_ROCE_FRMR_MAX_PA
);
1324 return ERR_PTR(-EINVAL
);
1327 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1329 return ERR_PTR(-ENOMEM
);
1331 mr
->type
= MR_TYPE_FRMR
;
1333 /* Allocate memory region key */
1334 ret
= hns_roce_mr_alloc(hr_dev
, to_hr_pd(pd
)->pdn
, 0, length
,
1339 ret
= hns_roce_mr_enable(hr_dev
, mr
);
1343 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
1349 hns_roce_mr_free(to_hr_dev(pd
->device
), mr
);
1353 return ERR_PTR(ret
);
1356 static int hns_roce_set_page(struct ib_mr
*ibmr
, u64 addr
)
1358 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
1360 mr
->pbl_buf
[mr
->npages
++] = cpu_to_le64(addr
);
1365 int hns_roce_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
, int sg_nents
,
1366 unsigned int *sg_offset
)
1368 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
1372 return ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, hns_roce_set_page
);
1375 static void hns_roce_mw_free(struct hns_roce_dev
*hr_dev
,
1376 struct hns_roce_mw
*mw
)
1378 struct device
*dev
= hr_dev
->dev
;
1382 ret
= hns_roce_hw2sw_mpt(hr_dev
, NULL
, key_to_hw_index(mw
->rkey
)
1383 & (hr_dev
->caps
.num_mtpts
- 1));
1385 dev_warn(dev
, "MW HW2SW_MPT failed (%d)\n", ret
);
1387 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
1388 key_to_hw_index(mw
->rkey
));
1391 hns_roce_bitmap_free(&hr_dev
->mr_table
.mtpt_bitmap
,
1392 key_to_hw_index(mw
->rkey
), BITMAP_NO_RR
);
1395 static int hns_roce_mw_enable(struct hns_roce_dev
*hr_dev
,
1396 struct hns_roce_mw
*mw
)
1398 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
1399 struct hns_roce_cmd_mailbox
*mailbox
;
1400 struct device
*dev
= hr_dev
->dev
;
1401 unsigned long mtpt_idx
= key_to_hw_index(mw
->rkey
);
1404 /* prepare HEM entry memory */
1405 ret
= hns_roce_table_get(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
1409 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
1410 if (IS_ERR(mailbox
)) {
1411 ret
= PTR_ERR(mailbox
);
1415 ret
= hr_dev
->hw
->mw_write_mtpt(mailbox
->buf
, mw
);
1417 dev_err(dev
, "MW write mtpt fail!\n");
1421 ret
= hns_roce_sw2hw_mpt(hr_dev
, mailbox
,
1422 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
1424 dev_err(dev
, "MW sw2hw_mpt failed (%d)\n", ret
);
1430 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
1435 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
1438 hns_roce_table_put(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
1443 struct ib_mw
*hns_roce_alloc_mw(struct ib_pd
*ib_pd
, enum ib_mw_type type
,
1444 struct ib_udata
*udata
)
1446 struct hns_roce_dev
*hr_dev
= to_hr_dev(ib_pd
->device
);
1447 struct hns_roce_mw
*mw
;
1448 unsigned long index
= 0;
1451 mw
= kmalloc(sizeof(*mw
), GFP_KERNEL
);
1453 return ERR_PTR(-ENOMEM
);
1455 /* Allocate a key for mw from bitmap */
1456 ret
= hns_roce_bitmap_alloc(&hr_dev
->mr_table
.mtpt_bitmap
, &index
);
1460 mw
->rkey
= hw_index_to_key(index
);
1462 mw
->ibmw
.rkey
= mw
->rkey
;
1463 mw
->ibmw
.type
= type
;
1464 mw
->pdn
= to_hr_pd(ib_pd
)->pdn
;
1465 mw
->pbl_hop_num
= hr_dev
->caps
.pbl_hop_num
;
1466 mw
->pbl_ba_pg_sz
= hr_dev
->caps
.pbl_ba_pg_sz
;
1467 mw
->pbl_buf_pg_sz
= hr_dev
->caps
.pbl_buf_pg_sz
;
1469 ret
= hns_roce_mw_enable(hr_dev
, mw
);
1476 hns_roce_mw_free(hr_dev
, mw
);
1481 return ERR_PTR(ret
);
1484 int hns_roce_dealloc_mw(struct ib_mw
*ibmw
)
1486 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmw
->device
);
1487 struct hns_roce_mw
*mw
= to_hr_mw(ibmw
);
1489 hns_roce_mw_free(hr_dev
, mw
);