]> git.ipfire.org Git - thirdparty/kernel/linux.git/blob - drivers/infiniband/sw/rxe/rxe_mr.c
Merge branch 'sg_nents' into rdma.git for-next
[thirdparty/kernel/linux.git] / drivers / infiniband / sw / rxe / rxe_mr.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8 #include "rxe_loc.h"
9
10 /* Return a random 8 bit key value that is
11 * different than the last_key. Set last_key to -1
12 * if this is the first key for an MR or MW
13 */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 u8 key;
17
18 do {
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
21
22 return key;
23 }
24
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 switch (mr->type) {
28 case RXE_MR_TYPE_DMA:
29 return 0;
30
31 case RXE_MR_TYPE_MR:
32 if (iova < mr->iova || length > mr->length ||
33 iova > mr->iova + mr->length - length)
34 return -EFAULT;
35 return 0;
36
37 default:
38 return -EFAULT;
39 }
40 }
41
42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
43 | IB_ACCESS_REMOTE_WRITE \
44 | IB_ACCESS_REMOTE_ATOMIC)
45
46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50
51 mr->ibmr.lkey = lkey;
52 mr->ibmr.rkey = rkey;
53 mr->state = RXE_MR_STATE_INVALID;
54 mr->type = RXE_MR_TYPE_NONE;
55 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
56 }
57
58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
59 {
60 int i;
61 int num_map;
62 struct rxe_map **map = mr->map;
63
64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
65
66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
67 if (!mr->map)
68 goto err1;
69
70 for (i = 0; i < num_map; i++) {
71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
72 if (!mr->map[i])
73 goto err2;
74 }
75
76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
77
78 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 mr->map_mask = RXE_BUF_PER_MAP - 1;
80
81 mr->num_buf = num_buf;
82 mr->num_map = num_map;
83 mr->max_buf = num_map * RXE_BUF_PER_MAP;
84
85 return 0;
86
87 err2:
88 for (i--; i >= 0; i--)
89 kfree(mr->map[i]);
90
91 kfree(mr->map);
92 err1:
93 return -ENOMEM;
94 }
95
96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
97 {
98 rxe_mr_init(access, mr);
99
100 mr->ibmr.pd = &pd->ibpd;
101 mr->access = access;
102 mr->state = RXE_MR_STATE_VALID;
103 mr->type = RXE_MR_TYPE_DMA;
104 }
105
106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 int access, struct rxe_mr *mr)
108 {
109 struct rxe_map **map;
110 struct rxe_phys_buf *buf = NULL;
111 struct ib_umem *umem;
112 struct sg_page_iter sg_iter;
113 int num_buf;
114 void *vaddr;
115 int err;
116 int i;
117
118 umem = ib_umem_get(pd->ibpd.device, start, length, access);
119 if (IS_ERR(umem)) {
120 pr_warn("%s: Unable to pin memory region err = %d\n",
121 __func__, (int)PTR_ERR(umem));
122 err = PTR_ERR(umem);
123 goto err_out;
124 }
125
126 num_buf = ib_umem_num_pages(umem);
127
128 rxe_mr_init(access, mr);
129
130 err = rxe_mr_alloc(mr, num_buf);
131 if (err) {
132 pr_warn("%s: Unable to allocate memory for map\n",
133 __func__);
134 goto err_release_umem;
135 }
136
137 mr->page_shift = PAGE_SHIFT;
138 mr->page_mask = PAGE_SIZE - 1;
139
140 num_buf = 0;
141 map = mr->map;
142 if (length > 0) {
143 buf = map[0]->buf;
144
145 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
146 if (num_buf >= RXE_BUF_PER_MAP) {
147 map++;
148 buf = map[0]->buf;
149 num_buf = 0;
150 }
151
152 vaddr = page_address(sg_page_iter_page(&sg_iter));
153 if (!vaddr) {
154 pr_warn("%s: Unable to get virtual address\n",
155 __func__);
156 err = -ENOMEM;
157 goto err_cleanup_map;
158 }
159
160 buf->addr = (uintptr_t)vaddr;
161 buf->size = PAGE_SIZE;
162 num_buf++;
163 buf++;
164
165 }
166 }
167
168 mr->ibmr.pd = &pd->ibpd;
169 mr->umem = umem;
170 mr->access = access;
171 mr->length = length;
172 mr->iova = iova;
173 mr->va = start;
174 mr->offset = ib_umem_offset(umem);
175 mr->state = RXE_MR_STATE_VALID;
176 mr->type = RXE_MR_TYPE_MR;
177
178 return 0;
179
180 err_cleanup_map:
181 for (i = 0; i < mr->num_map; i++)
182 kfree(mr->map[i]);
183 kfree(mr->map);
184 err_release_umem:
185 ib_umem_release(umem);
186 err_out:
187 return err;
188 }
189
190 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
191 {
192 int err;
193
194 rxe_mr_init(0, mr);
195
196 /* In fastreg, we also set the rkey */
197 mr->ibmr.rkey = mr->ibmr.lkey;
198
199 err = rxe_mr_alloc(mr, max_pages);
200 if (err)
201 goto err1;
202
203 mr->ibmr.pd = &pd->ibpd;
204 mr->max_buf = max_pages;
205 mr->state = RXE_MR_STATE_FREE;
206 mr->type = RXE_MR_TYPE_MR;
207
208 return 0;
209
210 err1:
211 return err;
212 }
213
214 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
215 size_t *offset_out)
216 {
217 size_t offset = iova - mr->iova + mr->offset;
218 int map_index;
219 int buf_index;
220 u64 length;
221
222 if (likely(mr->page_shift)) {
223 *offset_out = offset & mr->page_mask;
224 offset >>= mr->page_shift;
225 *n_out = offset & mr->map_mask;
226 *m_out = offset >> mr->map_shift;
227 } else {
228 map_index = 0;
229 buf_index = 0;
230
231 length = mr->map[map_index]->buf[buf_index].size;
232
233 while (offset >= length) {
234 offset -= length;
235 buf_index++;
236
237 if (buf_index == RXE_BUF_PER_MAP) {
238 map_index++;
239 buf_index = 0;
240 }
241 length = mr->map[map_index]->buf[buf_index].size;
242 }
243
244 *m_out = map_index;
245 *n_out = buf_index;
246 *offset_out = offset;
247 }
248 }
249
250 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
251 {
252 size_t offset;
253 int m, n;
254 void *addr;
255
256 if (mr->state != RXE_MR_STATE_VALID) {
257 pr_warn("mr not in valid state\n");
258 addr = NULL;
259 goto out;
260 }
261
262 if (!mr->map) {
263 addr = (void *)(uintptr_t)iova;
264 goto out;
265 }
266
267 if (mr_check_range(mr, iova, length)) {
268 pr_warn("range violation\n");
269 addr = NULL;
270 goto out;
271 }
272
273 lookup_iova(mr, iova, &m, &n, &offset);
274
275 if (offset + length > mr->map[m]->buf[n].size) {
276 pr_warn("crosses page boundary\n");
277 addr = NULL;
278 goto out;
279 }
280
281 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
282
283 out:
284 return addr;
285 }
286
287 /* copy data from a range (vaddr, vaddr+length-1) to or from
288 * a mr object starting at iova.
289 */
290 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
291 enum rxe_mr_copy_dir dir)
292 {
293 int err;
294 int bytes;
295 u8 *va;
296 struct rxe_map **map;
297 struct rxe_phys_buf *buf;
298 int m;
299 int i;
300 size_t offset;
301
302 if (length == 0)
303 return 0;
304
305 if (mr->type == RXE_MR_TYPE_DMA) {
306 u8 *src, *dest;
307
308 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
309
310 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
311
312 memcpy(dest, src, length);
313
314 return 0;
315 }
316
317 WARN_ON_ONCE(!mr->map);
318
319 err = mr_check_range(mr, iova, length);
320 if (err) {
321 err = -EFAULT;
322 goto err1;
323 }
324
325 lookup_iova(mr, iova, &m, &i, &offset);
326
327 map = mr->map + m;
328 buf = map[0]->buf + i;
329
330 while (length > 0) {
331 u8 *src, *dest;
332
333 va = (u8 *)(uintptr_t)buf->addr + offset;
334 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
335 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
336
337 bytes = buf->size - offset;
338
339 if (bytes > length)
340 bytes = length;
341
342 memcpy(dest, src, bytes);
343
344 length -= bytes;
345 addr += bytes;
346
347 offset = 0;
348 buf++;
349 i++;
350
351 if (i == RXE_BUF_PER_MAP) {
352 i = 0;
353 map++;
354 buf = map[0]->buf;
355 }
356 }
357
358 return 0;
359
360 err1:
361 return err;
362 }
363
364 /* copy data in or out of a wqe, i.e. sg list
365 * under the control of a dma descriptor
366 */
367 int copy_data(
368 struct rxe_pd *pd,
369 int access,
370 struct rxe_dma_info *dma,
371 void *addr,
372 int length,
373 enum rxe_mr_copy_dir dir)
374 {
375 int bytes;
376 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
377 int offset = dma->sge_offset;
378 int resid = dma->resid;
379 struct rxe_mr *mr = NULL;
380 u64 iova;
381 int err;
382
383 if (length == 0)
384 return 0;
385
386 if (length > resid) {
387 err = -EINVAL;
388 goto err2;
389 }
390
391 if (sge->length && (offset < sge->length)) {
392 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
393 if (!mr) {
394 err = -EINVAL;
395 goto err1;
396 }
397 }
398
399 while (length > 0) {
400 bytes = length;
401
402 if (offset >= sge->length) {
403 if (mr) {
404 rxe_drop_ref(mr);
405 mr = NULL;
406 }
407 sge++;
408 dma->cur_sge++;
409 offset = 0;
410
411 if (dma->cur_sge >= dma->num_sge) {
412 err = -ENOSPC;
413 goto err2;
414 }
415
416 if (sge->length) {
417 mr = lookup_mr(pd, access, sge->lkey,
418 RXE_LOOKUP_LOCAL);
419 if (!mr) {
420 err = -EINVAL;
421 goto err1;
422 }
423 } else {
424 continue;
425 }
426 }
427
428 if (bytes > sge->length - offset)
429 bytes = sge->length - offset;
430
431 if (bytes > 0) {
432 iova = sge->addr + offset;
433
434 err = rxe_mr_copy(mr, iova, addr, bytes, dir);
435 if (err)
436 goto err2;
437
438 offset += bytes;
439 resid -= bytes;
440 length -= bytes;
441 addr += bytes;
442 }
443 }
444
445 dma->sge_offset = offset;
446 dma->resid = resid;
447
448 if (mr)
449 rxe_drop_ref(mr);
450
451 return 0;
452
453 err2:
454 if (mr)
455 rxe_drop_ref(mr);
456 err1:
457 return err;
458 }
459
460 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
461 {
462 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
463 int offset = dma->sge_offset;
464 int resid = dma->resid;
465
466 while (length) {
467 unsigned int bytes;
468
469 if (offset >= sge->length) {
470 sge++;
471 dma->cur_sge++;
472 offset = 0;
473 if (dma->cur_sge >= dma->num_sge)
474 return -ENOSPC;
475 }
476
477 bytes = length;
478
479 if (bytes > sge->length - offset)
480 bytes = sge->length - offset;
481
482 offset += bytes;
483 resid -= bytes;
484 length -= bytes;
485 }
486
487 dma->sge_offset = offset;
488 dma->resid = resid;
489
490 return 0;
491 }
492
493 /* (1) find the mr corresponding to lkey/rkey
494 * depending on lookup_type
495 * (2) verify that the (qp) pd matches the mr pd
496 * (3) verify that the mr can support the requested access
497 * (4) verify that mr state is valid
498 */
499 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
500 enum rxe_mr_lookup_type type)
501 {
502 struct rxe_mr *mr;
503 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
504 int index = key >> 8;
505
506 mr = rxe_pool_get_index(&rxe->mr_pool, index);
507 if (!mr)
508 return NULL;
509
510 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
511 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
512 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
513 mr->state != RXE_MR_STATE_VALID)) {
514 rxe_drop_ref(mr);
515 mr = NULL;
516 }
517
518 return mr;
519 }
520
521 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
522 {
523 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
524 struct rxe_mr *mr;
525 int ret;
526
527 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
528 if (!mr) {
529 pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
530 ret = -EINVAL;
531 goto err;
532 }
533
534 if (rkey != mr->ibmr.rkey) {
535 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
536 __func__, rkey, mr->ibmr.rkey);
537 ret = -EINVAL;
538 goto err_drop_ref;
539 }
540
541 if (atomic_read(&mr->num_mw) > 0) {
542 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
543 __func__);
544 ret = -EINVAL;
545 goto err_drop_ref;
546 }
547
548 mr->state = RXE_MR_STATE_FREE;
549 ret = 0;
550
551 err_drop_ref:
552 rxe_drop_ref(mr);
553 err:
554 return ret;
555 }
556
557 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
558 {
559 struct rxe_mr *mr = to_rmr(ibmr);
560
561 if (atomic_read(&mr->num_mw) > 0) {
562 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
563 __func__);
564 return -EINVAL;
565 }
566
567 mr->state = RXE_MR_STATE_ZOMBIE;
568 rxe_drop_ref(mr_pd(mr));
569 rxe_drop_index(mr);
570 rxe_drop_ref(mr);
571
572 return 0;
573 }
574
575 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
576 {
577 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
578 int i;
579
580 ib_umem_release(mr->umem);
581
582 if (mr->map) {
583 for (i = 0; i < mr->num_map; i++)
584 kfree(mr->map[i]);
585
586 kfree(mr->map);
587 }
588 }