]> git.ipfire.org Git - thirdparty/linux.git/blob - block/blk-map.c
blk-mq: add blk_mq_all_tag_iter
[thirdparty/linux.git] / block / blk-map.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Functions related to mapping data to requests
4 */
5 #include <linux/kernel.h>
6 #include <linux/sched/task_stack.h>
7 #include <linux/module.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/uio.h>
11
12 #include "blk.h"
13
14 struct bio_map_data {
15 int is_our_pages;
16 struct iov_iter iter;
17 struct iovec iov[];
18 };
19
20 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
21 gfp_t gfp_mask)
22 {
23 struct bio_map_data *bmd;
24
25 if (data->nr_segs > UIO_MAXIOV)
26 return NULL;
27
28 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
29 if (!bmd)
30 return NULL;
31 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
32 bmd->iter = *data;
33 bmd->iter.iov = bmd->iov;
34 return bmd;
35 }
36
37 /**
38 * bio_copy_from_iter - copy all pages from iov_iter to bio
39 * @bio: The &struct bio which describes the I/O as destination
40 * @iter: iov_iter as source
41 *
42 * Copy all pages from iov_iter to bio.
43 * Returns 0 on success, or error on failure.
44 */
45 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
46 {
47 struct bio_vec *bvec;
48 struct bvec_iter_all iter_all;
49
50 bio_for_each_segment_all(bvec, bio, iter_all) {
51 ssize_t ret;
52
53 ret = copy_page_from_iter(bvec->bv_page,
54 bvec->bv_offset,
55 bvec->bv_len,
56 iter);
57
58 if (!iov_iter_count(iter))
59 break;
60
61 if (ret < bvec->bv_len)
62 return -EFAULT;
63 }
64
65 return 0;
66 }
67
68 /**
69 * bio_copy_to_iter - copy all pages from bio to iov_iter
70 * @bio: The &struct bio which describes the I/O as source
71 * @iter: iov_iter as destination
72 *
73 * Copy all pages from bio to iov_iter.
74 * Returns 0 on success, or error on failure.
75 */
76 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
77 {
78 struct bio_vec *bvec;
79 struct bvec_iter_all iter_all;
80
81 bio_for_each_segment_all(bvec, bio, iter_all) {
82 ssize_t ret;
83
84 ret = copy_page_to_iter(bvec->bv_page,
85 bvec->bv_offset,
86 bvec->bv_len,
87 &iter);
88
89 if (!iov_iter_count(&iter))
90 break;
91
92 if (ret < bvec->bv_len)
93 return -EFAULT;
94 }
95
96 return 0;
97 }
98
99 /**
100 * bio_uncopy_user - finish previously mapped bio
101 * @bio: bio being terminated
102 *
103 * Free pages allocated from bio_copy_user_iov() and write back data
104 * to user space in case of a read.
105 */
106 static int bio_uncopy_user(struct bio *bio)
107 {
108 struct bio_map_data *bmd = bio->bi_private;
109 int ret = 0;
110
111 if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
112 /*
113 * if we're in a workqueue, the request is orphaned, so
114 * don't copy into a random user address space, just free
115 * and return -EINTR so user space doesn't expect any data.
116 */
117 if (!current->mm)
118 ret = -EINTR;
119 else if (bio_data_dir(bio) == READ)
120 ret = bio_copy_to_iter(bio, bmd->iter);
121 if (bmd->is_our_pages)
122 bio_free_pages(bio);
123 }
124 kfree(bmd);
125 bio_put(bio);
126 return ret;
127 }
128
129 /**
130 * bio_copy_user_iov - copy user data to bio
131 * @q: destination block queue
132 * @map_data: pointer to the rq_map_data holding pages (if necessary)
133 * @iter: iovec iterator
134 * @gfp_mask: memory allocation flags
135 *
136 * Prepares and returns a bio for indirect user io, bouncing data
137 * to/from kernel pages as necessary. Must be paired with
138 * call bio_uncopy_user() on io completion.
139 */
140 static struct bio *bio_copy_user_iov(struct request_queue *q,
141 struct rq_map_data *map_data, struct iov_iter *iter,
142 gfp_t gfp_mask)
143 {
144 struct bio_map_data *bmd;
145 struct page *page;
146 struct bio *bio;
147 int i = 0, ret;
148 int nr_pages;
149 unsigned int len = iter->count;
150 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
151
152 bmd = bio_alloc_map_data(iter, gfp_mask);
153 if (!bmd)
154 return ERR_PTR(-ENOMEM);
155
156 /*
157 * We need to do a deep copy of the iov_iter including the iovecs.
158 * The caller provided iov might point to an on-stack or otherwise
159 * shortlived one.
160 */
161 bmd->is_our_pages = map_data ? 0 : 1;
162
163 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
164 if (nr_pages > BIO_MAX_PAGES)
165 nr_pages = BIO_MAX_PAGES;
166
167 ret = -ENOMEM;
168 bio = bio_kmalloc(gfp_mask, nr_pages);
169 if (!bio)
170 goto out_bmd;
171
172 ret = 0;
173
174 if (map_data) {
175 nr_pages = 1 << map_data->page_order;
176 i = map_data->offset / PAGE_SIZE;
177 }
178 while (len) {
179 unsigned int bytes = PAGE_SIZE;
180
181 bytes -= offset;
182
183 if (bytes > len)
184 bytes = len;
185
186 if (map_data) {
187 if (i == map_data->nr_entries * nr_pages) {
188 ret = -ENOMEM;
189 break;
190 }
191
192 page = map_data->pages[i / nr_pages];
193 page += (i % nr_pages);
194
195 i++;
196 } else {
197 page = alloc_page(q->bounce_gfp | gfp_mask);
198 if (!page) {
199 ret = -ENOMEM;
200 break;
201 }
202 }
203
204 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
205 if (!map_data)
206 __free_page(page);
207 break;
208 }
209
210 len -= bytes;
211 offset = 0;
212 }
213
214 if (ret)
215 goto cleanup;
216
217 if (map_data)
218 map_data->offset += bio->bi_iter.bi_size;
219
220 /*
221 * success
222 */
223 if ((iov_iter_rw(iter) == WRITE &&
224 (!map_data || !map_data->null_mapped)) ||
225 (map_data && map_data->from_user)) {
226 ret = bio_copy_from_iter(bio, iter);
227 if (ret)
228 goto cleanup;
229 } else {
230 if (bmd->is_our_pages)
231 zero_fill_bio(bio);
232 iov_iter_advance(iter, bio->bi_iter.bi_size);
233 }
234
235 bio->bi_private = bmd;
236 if (map_data && map_data->null_mapped)
237 bio_set_flag(bio, BIO_NULL_MAPPED);
238 return bio;
239 cleanup:
240 if (!map_data)
241 bio_free_pages(bio);
242 bio_put(bio);
243 out_bmd:
244 kfree(bmd);
245 return ERR_PTR(ret);
246 }
247
248 /**
249 * bio_map_user_iov - map user iovec into bio
250 * @q: the struct request_queue for the bio
251 * @iter: iovec iterator
252 * @gfp_mask: memory allocation flags
253 *
254 * Map the user space address into a bio suitable for io to a block
255 * device. Returns an error pointer in case of error.
256 */
257 static struct bio *bio_map_user_iov(struct request_queue *q,
258 struct iov_iter *iter, gfp_t gfp_mask)
259 {
260 unsigned int max_sectors = queue_max_hw_sectors(q);
261 int j;
262 struct bio *bio;
263 int ret;
264
265 if (!iov_iter_count(iter))
266 return ERR_PTR(-EINVAL);
267
268 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
269 if (!bio)
270 return ERR_PTR(-ENOMEM);
271
272 while (iov_iter_count(iter)) {
273 struct page **pages;
274 ssize_t bytes;
275 size_t offs, added = 0;
276 int npages;
277
278 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
279 if (unlikely(bytes <= 0)) {
280 ret = bytes ? bytes : -EFAULT;
281 goto out_unmap;
282 }
283
284 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
285
286 if (unlikely(offs & queue_dma_alignment(q))) {
287 ret = -EINVAL;
288 j = 0;
289 } else {
290 for (j = 0; j < npages; j++) {
291 struct page *page = pages[j];
292 unsigned int n = PAGE_SIZE - offs;
293 bool same_page = false;
294
295 if (n > bytes)
296 n = bytes;
297
298 if (!bio_add_hw_page(q, bio, page, n, offs,
299 max_sectors, &same_page)) {
300 if (same_page)
301 put_page(page);
302 break;
303 }
304
305 added += n;
306 bytes -= n;
307 offs = 0;
308 }
309 iov_iter_advance(iter, added);
310 }
311 /*
312 * release the pages we didn't map into the bio, if any
313 */
314 while (j < npages)
315 put_page(pages[j++]);
316 kvfree(pages);
317 /* couldn't stuff something into bio? */
318 if (bytes)
319 break;
320 }
321
322 bio_set_flag(bio, BIO_USER_MAPPED);
323
324 /*
325 * subtle -- if bio_map_user_iov() ended up bouncing a bio,
326 * it would normally disappear when its bi_end_io is run.
327 * however, we need it for the unmap, so grab an extra
328 * reference to it
329 */
330 bio_get(bio);
331 return bio;
332
333 out_unmap:
334 bio_release_pages(bio, false);
335 bio_put(bio);
336 return ERR_PTR(ret);
337 }
338
339 /**
340 * bio_unmap_user - unmap a bio
341 * @bio: the bio being unmapped
342 *
343 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
344 * process context.
345 *
346 * bio_unmap_user() may sleep.
347 */
348 static void bio_unmap_user(struct bio *bio)
349 {
350 bio_release_pages(bio, bio_data_dir(bio) == READ);
351 bio_put(bio);
352 bio_put(bio);
353 }
354
355 static void bio_invalidate_vmalloc_pages(struct bio *bio)
356 {
357 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
358 if (bio->bi_private && !op_is_write(bio_op(bio))) {
359 unsigned long i, len = 0;
360
361 for (i = 0; i < bio->bi_vcnt; i++)
362 len += bio->bi_io_vec[i].bv_len;
363 invalidate_kernel_vmap_range(bio->bi_private, len);
364 }
365 #endif
366 }
367
368 static void bio_map_kern_endio(struct bio *bio)
369 {
370 bio_invalidate_vmalloc_pages(bio);
371 bio_put(bio);
372 }
373
374 /**
375 * bio_map_kern - map kernel address into bio
376 * @q: the struct request_queue for the bio
377 * @data: pointer to buffer to map
378 * @len: length in bytes
379 * @gfp_mask: allocation flags for bio allocation
380 *
381 * Map the kernel address into a bio suitable for io to a block
382 * device. Returns an error pointer in case of error.
383 */
384 static struct bio *bio_map_kern(struct request_queue *q, void *data,
385 unsigned int len, gfp_t gfp_mask)
386 {
387 unsigned long kaddr = (unsigned long)data;
388 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
389 unsigned long start = kaddr >> PAGE_SHIFT;
390 const int nr_pages = end - start;
391 bool is_vmalloc = is_vmalloc_addr(data);
392 struct page *page;
393 int offset, i;
394 struct bio *bio;
395
396 bio = bio_kmalloc(gfp_mask, nr_pages);
397 if (!bio)
398 return ERR_PTR(-ENOMEM);
399
400 if (is_vmalloc) {
401 flush_kernel_vmap_range(data, len);
402 bio->bi_private = data;
403 }
404
405 offset = offset_in_page(kaddr);
406 for (i = 0; i < nr_pages; i++) {
407 unsigned int bytes = PAGE_SIZE - offset;
408
409 if (len <= 0)
410 break;
411
412 if (bytes > len)
413 bytes = len;
414
415 if (!is_vmalloc)
416 page = virt_to_page(data);
417 else
418 page = vmalloc_to_page(data);
419 if (bio_add_pc_page(q, bio, page, bytes,
420 offset) < bytes) {
421 /* we don't support partial mappings */
422 bio_put(bio);
423 return ERR_PTR(-EINVAL);
424 }
425
426 data += bytes;
427 len -= bytes;
428 offset = 0;
429 }
430
431 bio->bi_end_io = bio_map_kern_endio;
432 return bio;
433 }
434
435 static void bio_copy_kern_endio(struct bio *bio)
436 {
437 bio_free_pages(bio);
438 bio_put(bio);
439 }
440
441 static void bio_copy_kern_endio_read(struct bio *bio)
442 {
443 char *p = bio->bi_private;
444 struct bio_vec *bvec;
445 struct bvec_iter_all iter_all;
446
447 bio_for_each_segment_all(bvec, bio, iter_all) {
448 memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
449 p += bvec->bv_len;
450 }
451
452 bio_copy_kern_endio(bio);
453 }
454
455 /**
456 * bio_copy_kern - copy kernel address into bio
457 * @q: the struct request_queue for the bio
458 * @data: pointer to buffer to copy
459 * @len: length in bytes
460 * @gfp_mask: allocation flags for bio and page allocation
461 * @reading: data direction is READ
462 *
463 * copy the kernel address into a bio suitable for io to a block
464 * device. Returns an error pointer in case of error.
465 */
466 static struct bio *bio_copy_kern(struct request_queue *q, void *data,
467 unsigned int len, gfp_t gfp_mask, int reading)
468 {
469 unsigned long kaddr = (unsigned long)data;
470 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
471 unsigned long start = kaddr >> PAGE_SHIFT;
472 struct bio *bio;
473 void *p = data;
474 int nr_pages = 0;
475
476 /*
477 * Overflow, abort
478 */
479 if (end < start)
480 return ERR_PTR(-EINVAL);
481
482 nr_pages = end - start;
483 bio = bio_kmalloc(gfp_mask, nr_pages);
484 if (!bio)
485 return ERR_PTR(-ENOMEM);
486
487 while (len) {
488 struct page *page;
489 unsigned int bytes = PAGE_SIZE;
490
491 if (bytes > len)
492 bytes = len;
493
494 page = alloc_page(q->bounce_gfp | gfp_mask);
495 if (!page)
496 goto cleanup;
497
498 if (!reading)
499 memcpy(page_address(page), p, bytes);
500
501 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
502 break;
503
504 len -= bytes;
505 p += bytes;
506 }
507
508 if (reading) {
509 bio->bi_end_io = bio_copy_kern_endio_read;
510 bio->bi_private = data;
511 } else {
512 bio->bi_end_io = bio_copy_kern_endio;
513 }
514
515 return bio;
516
517 cleanup:
518 bio_free_pages(bio);
519 bio_put(bio);
520 return ERR_PTR(-ENOMEM);
521 }
522
523 /*
524 * Append a bio to a passthrough request. Only works if the bio can be merged
525 * into the request based on the driver constraints.
526 */
527 int blk_rq_append_bio(struct request *rq, struct bio **bio)
528 {
529 struct bio *orig_bio = *bio;
530 struct bvec_iter iter;
531 struct bio_vec bv;
532 unsigned int nr_segs = 0;
533
534 blk_queue_bounce(rq->q, bio);
535
536 bio_for_each_bvec(bv, *bio, iter)
537 nr_segs++;
538
539 if (!rq->bio) {
540 blk_rq_bio_prep(rq, *bio, nr_segs);
541 } else {
542 if (!ll_back_merge_fn(rq, *bio, nr_segs)) {
543 if (orig_bio != *bio) {
544 bio_put(*bio);
545 *bio = orig_bio;
546 }
547 return -EINVAL;
548 }
549
550 rq->biotail->bi_next = *bio;
551 rq->biotail = *bio;
552 rq->__data_len += (*bio)->bi_iter.bi_size;
553 bio_crypt_free_ctx(*bio);
554 }
555
556 return 0;
557 }
558 EXPORT_SYMBOL(blk_rq_append_bio);
559
560 static int __blk_rq_unmap_user(struct bio *bio)
561 {
562 int ret = 0;
563
564 if (bio) {
565 if (bio_flagged(bio, BIO_USER_MAPPED))
566 bio_unmap_user(bio);
567 else
568 ret = bio_uncopy_user(bio);
569 }
570
571 return ret;
572 }
573
574 static int __blk_rq_map_user_iov(struct request *rq,
575 struct rq_map_data *map_data, struct iov_iter *iter,
576 gfp_t gfp_mask, bool copy)
577 {
578 struct request_queue *q = rq->q;
579 struct bio *bio, *orig_bio;
580 int ret;
581
582 if (copy)
583 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
584 else
585 bio = bio_map_user_iov(q, iter, gfp_mask);
586
587 if (IS_ERR(bio))
588 return PTR_ERR(bio);
589
590 bio->bi_opf &= ~REQ_OP_MASK;
591 bio->bi_opf |= req_op(rq);
592
593 orig_bio = bio;
594
595 /*
596 * We link the bounce buffer in and could have to traverse it
597 * later so we have to get a ref to prevent it from being freed
598 */
599 ret = blk_rq_append_bio(rq, &bio);
600 if (ret) {
601 __blk_rq_unmap_user(orig_bio);
602 return ret;
603 }
604 bio_get(bio);
605
606 return 0;
607 }
608
609 /**
610 * blk_rq_map_user_iov - map user data to a request, for passthrough requests
611 * @q: request queue where request should be inserted
612 * @rq: request to map data to
613 * @map_data: pointer to the rq_map_data holding pages (if necessary)
614 * @iter: iovec iterator
615 * @gfp_mask: memory allocation flags
616 *
617 * Description:
618 * Data will be mapped directly for zero copy I/O, if possible. Otherwise
619 * a kernel bounce buffer is used.
620 *
621 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
622 * still in process context.
623 *
624 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
625 * before being submitted to the device, as pages mapped may be out of
626 * reach. It's the callers responsibility to make sure this happens. The
627 * original bio must be passed back in to blk_rq_unmap_user() for proper
628 * unmapping.
629 */
630 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
631 struct rq_map_data *map_data,
632 const struct iov_iter *iter, gfp_t gfp_mask)
633 {
634 bool copy = false;
635 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
636 struct bio *bio = NULL;
637 struct iov_iter i;
638 int ret = -EINVAL;
639
640 if (!iter_is_iovec(iter))
641 goto fail;
642
643 if (map_data)
644 copy = true;
645 else if (iov_iter_alignment(iter) & align)
646 copy = true;
647 else if (queue_virt_boundary(q))
648 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
649
650 i = *iter;
651 do {
652 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
653 if (ret)
654 goto unmap_rq;
655 if (!bio)
656 bio = rq->bio;
657 } while (iov_iter_count(&i));
658
659 return 0;
660
661 unmap_rq:
662 blk_rq_unmap_user(bio);
663 fail:
664 rq->bio = NULL;
665 return ret;
666 }
667 EXPORT_SYMBOL(blk_rq_map_user_iov);
668
669 int blk_rq_map_user(struct request_queue *q, struct request *rq,
670 struct rq_map_data *map_data, void __user *ubuf,
671 unsigned long len, gfp_t gfp_mask)
672 {
673 struct iovec iov;
674 struct iov_iter i;
675 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
676
677 if (unlikely(ret < 0))
678 return ret;
679
680 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
681 }
682 EXPORT_SYMBOL(blk_rq_map_user);
683
684 /**
685 * blk_rq_unmap_user - unmap a request with user data
686 * @bio: start of bio list
687 *
688 * Description:
689 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
690 * supply the original rq->bio from the blk_rq_map_user() return, since
691 * the I/O completion may have changed rq->bio.
692 */
693 int blk_rq_unmap_user(struct bio *bio)
694 {
695 struct bio *mapped_bio;
696 int ret = 0, ret2;
697
698 while (bio) {
699 mapped_bio = bio;
700 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
701 mapped_bio = bio->bi_private;
702
703 ret2 = __blk_rq_unmap_user(mapped_bio);
704 if (ret2 && !ret)
705 ret = ret2;
706
707 mapped_bio = bio;
708 bio = bio->bi_next;
709 bio_put(mapped_bio);
710 }
711
712 return ret;
713 }
714 EXPORT_SYMBOL(blk_rq_unmap_user);
715
716 /**
717 * blk_rq_map_kern - map kernel data to a request, for passthrough requests
718 * @q: request queue where request should be inserted
719 * @rq: request to fill
720 * @kbuf: the kernel buffer
721 * @len: length of user data
722 * @gfp_mask: memory allocation flags
723 *
724 * Description:
725 * Data will be mapped directly if possible. Otherwise a bounce
726 * buffer is used. Can be called multiple times to append multiple
727 * buffers.
728 */
729 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
730 unsigned int len, gfp_t gfp_mask)
731 {
732 int reading = rq_data_dir(rq) == READ;
733 unsigned long addr = (unsigned long) kbuf;
734 struct bio *bio, *orig_bio;
735 int ret;
736
737 if (len > (queue_max_hw_sectors(q) << 9))
738 return -EINVAL;
739 if (!len || !kbuf)
740 return -EINVAL;
741
742 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
743 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
744 else
745 bio = bio_map_kern(q, kbuf, len, gfp_mask);
746
747 if (IS_ERR(bio))
748 return PTR_ERR(bio);
749
750 bio->bi_opf &= ~REQ_OP_MASK;
751 bio->bi_opf |= req_op(rq);
752
753 orig_bio = bio;
754 ret = blk_rq_append_bio(rq, &bio);
755 if (unlikely(ret)) {
756 /* request is too big */
757 bio_put(orig_bio);
758 return ret;
759 }
760
761 return 0;
762 }
763 EXPORT_SYMBOL(blk_rq_map_kern);