]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0 | |
2 | /* bounce buffer handling for block devices | |
3 | * | |
4 | * - Split from highmem.c | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
8 | ||
9 | #include <linux/mm.h> | |
10 | #include <linux/export.h> | |
11 | #include <linux/swap.h> | |
12 | #include <linux/gfp.h> | |
13 | #include <linux/bio.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/mempool.h> | |
16 | #include <linux/blkdev.h> | |
17 | #include <linux/backing-dev.h> | |
18 | #include <linux/init.h> | |
19 | #include <linux/hash.h> | |
20 | #include <linux/highmem.h> | |
21 | #include <linux/memblock.h> | |
22 | #include <linux/printk.h> | |
23 | #include <asm/tlbflush.h> | |
24 | ||
25 | #include <trace/events/block.h> | |
26 | #include "blk.h" | |
27 | ||
28 | #define POOL_SIZE 64 | |
29 | #define ISA_POOL_SIZE 16 | |
30 | ||
31 | static struct bio_set bounce_bio_set, bounce_bio_split; | |
32 | static mempool_t page_pool, isa_page_pool; | |
33 | ||
34 | static void init_bounce_bioset(void) | |
35 | { | |
36 | static bool bounce_bs_setup; | |
37 | int ret; | |
38 | ||
39 | if (bounce_bs_setup) | |
40 | return; | |
41 | ||
42 | ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); | |
43 | BUG_ON(ret); | |
44 | if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) | |
45 | BUG_ON(1); | |
46 | ||
47 | ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); | |
48 | BUG_ON(ret); | |
49 | bounce_bs_setup = true; | |
50 | } | |
51 | ||
52 | #if defined(CONFIG_HIGHMEM) | |
53 | static __init int init_emergency_pool(void) | |
54 | { | |
55 | int ret; | |
56 | #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG) | |
57 | if (max_pfn <= max_low_pfn) | |
58 | return 0; | |
59 | #endif | |
60 | ||
61 | ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0); | |
62 | BUG_ON(ret); | |
63 | pr_info("pool size: %d pages\n", POOL_SIZE); | |
64 | ||
65 | init_bounce_bioset(); | |
66 | return 0; | |
67 | } | |
68 | ||
69 | __initcall(init_emergency_pool); | |
70 | #endif | |
71 | ||
72 | #ifdef CONFIG_HIGHMEM | |
73 | /* | |
74 | * highmem version, map in to vec | |
75 | */ | |
76 | static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
77 | { | |
78 | unsigned char *vto; | |
79 | ||
80 | vto = kmap_atomic(to->bv_page); | |
81 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
82 | kunmap_atomic(vto); | |
83 | } | |
84 | ||
85 | #else /* CONFIG_HIGHMEM */ | |
86 | ||
87 | #define bounce_copy_vec(to, vfrom) \ | |
88 | memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) | |
89 | ||
90 | #endif /* CONFIG_HIGHMEM */ | |
91 | ||
92 | /* | |
93 | * allocate pages in the DMA region for the ISA pool | |
94 | */ | |
95 | static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) | |
96 | { | |
97 | return mempool_alloc_pages(gfp_mask | GFP_DMA, data); | |
98 | } | |
99 | ||
100 | static DEFINE_MUTEX(isa_mutex); | |
101 | ||
102 | /* | |
103 | * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA | |
104 | * as the max address, so check if the pool has already been created. | |
105 | */ | |
106 | int init_emergency_isa_pool(void) | |
107 | { | |
108 | int ret; | |
109 | ||
110 | mutex_lock(&isa_mutex); | |
111 | ||
112 | if (mempool_initialized(&isa_page_pool)) { | |
113 | mutex_unlock(&isa_mutex); | |
114 | return 0; | |
115 | } | |
116 | ||
117 | ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, | |
118 | mempool_free_pages, (void *) 0); | |
119 | BUG_ON(ret); | |
120 | ||
121 | pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); | |
122 | init_bounce_bioset(); | |
123 | mutex_unlock(&isa_mutex); | |
124 | return 0; | |
125 | } | |
126 | ||
127 | /* | |
128 | * Simple bounce buffer support for highmem pages. Depending on the | |
129 | * queue gfp mask set, *to may or may not be a highmem page. kmap it | |
130 | * always, it will do the Right Thing | |
131 | */ | |
132 | static void copy_to_high_bio_irq(struct bio *to, struct bio *from) | |
133 | { | |
134 | unsigned char *vfrom; | |
135 | struct bio_vec tovec, fromvec; | |
136 | struct bvec_iter iter; | |
137 | /* | |
138 | * The bio of @from is created by bounce, so we can iterate | |
139 | * its bvec from start to end, but the @from->bi_iter can't be | |
140 | * trusted because it might be changed by splitting. | |
141 | */ | |
142 | struct bvec_iter from_iter = BVEC_ITER_ALL_INIT; | |
143 | ||
144 | bio_for_each_segment(tovec, to, iter) { | |
145 | fromvec = bio_iter_iovec(from, from_iter); | |
146 | if (tovec.bv_page != fromvec.bv_page) { | |
147 | /* | |
148 | * fromvec->bv_offset and fromvec->bv_len might have | |
149 | * been modified by the block layer, so use the original | |
150 | * copy, bounce_copy_vec already uses tovec->bv_len | |
151 | */ | |
152 | vfrom = page_address(fromvec.bv_page) + | |
153 | tovec.bv_offset; | |
154 | ||
155 | bounce_copy_vec(&tovec, vfrom); | |
156 | flush_dcache_page(tovec.bv_page); | |
157 | } | |
158 | bio_advance_iter(from, &from_iter, tovec.bv_len); | |
159 | } | |
160 | } | |
161 | ||
162 | static void bounce_end_io(struct bio *bio, mempool_t *pool) | |
163 | { | |
164 | struct bio *bio_orig = bio->bi_private; | |
165 | struct bio_vec *bvec, orig_vec; | |
166 | struct bvec_iter orig_iter = bio_orig->bi_iter; | |
167 | struct bvec_iter_all iter_all; | |
168 | ||
169 | /* | |
170 | * free up bounce indirect pages used | |
171 | */ | |
172 | bio_for_each_segment_all(bvec, bio, iter_all) { | |
173 | orig_vec = bio_iter_iovec(bio_orig, orig_iter); | |
174 | if (bvec->bv_page != orig_vec.bv_page) { | |
175 | dec_zone_page_state(bvec->bv_page, NR_BOUNCE); | |
176 | mempool_free(bvec->bv_page, pool); | |
177 | } | |
178 | bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len); | |
179 | } | |
180 | ||
181 | bio_orig->bi_status = bio->bi_status; | |
182 | bio_endio(bio_orig); | |
183 | bio_put(bio); | |
184 | } | |
185 | ||
186 | static void bounce_end_io_write(struct bio *bio) | |
187 | { | |
188 | bounce_end_io(bio, &page_pool); | |
189 | } | |
190 | ||
191 | static void bounce_end_io_write_isa(struct bio *bio) | |
192 | { | |
193 | ||
194 | bounce_end_io(bio, &isa_page_pool); | |
195 | } | |
196 | ||
197 | static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) | |
198 | { | |
199 | struct bio *bio_orig = bio->bi_private; | |
200 | ||
201 | if (!bio->bi_status) | |
202 | copy_to_high_bio_irq(bio_orig, bio); | |
203 | ||
204 | bounce_end_io(bio, pool); | |
205 | } | |
206 | ||
207 | static void bounce_end_io_read(struct bio *bio) | |
208 | { | |
209 | __bounce_end_io_read(bio, &page_pool); | |
210 | } | |
211 | ||
212 | static void bounce_end_io_read_isa(struct bio *bio) | |
213 | { | |
214 | __bounce_end_io_read(bio, &isa_page_pool); | |
215 | } | |
216 | ||
217 | static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, | |
218 | struct bio_set *bs) | |
219 | { | |
220 | struct bvec_iter iter; | |
221 | struct bio_vec bv; | |
222 | struct bio *bio; | |
223 | ||
224 | /* | |
225 | * Pre immutable biovecs, __bio_clone() used to just do a memcpy from | |
226 | * bio_src->bi_io_vec to bio->bi_io_vec. | |
227 | * | |
228 | * We can't do that anymore, because: | |
229 | * | |
230 | * - The point of cloning the biovec is to produce a bio with a biovec | |
231 | * the caller can modify: bi_idx and bi_bvec_done should be 0. | |
232 | * | |
233 | * - The original bio could've had more than BIO_MAX_PAGES biovecs; if | |
234 | * we tried to clone the whole thing bio_alloc_bioset() would fail. | |
235 | * But the clone should succeed as long as the number of biovecs we | |
236 | * actually need to allocate is fewer than BIO_MAX_PAGES. | |
237 | * | |
238 | * - Lastly, bi_vcnt should not be looked at or relied upon by code | |
239 | * that does not own the bio - reason being drivers don't use it for | |
240 | * iterating over the biovec anymore, so expecting it to be kept up | |
241 | * to date (i.e. for clones that share the parent biovec) is just | |
242 | * asking for trouble and would force extra work on | |
243 | * __bio_clone_fast() anyways. | |
244 | */ | |
245 | ||
246 | bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); | |
247 | if (!bio) | |
248 | return NULL; | |
249 | bio->bi_disk = bio_src->bi_disk; | |
250 | bio->bi_opf = bio_src->bi_opf; | |
251 | bio->bi_ioprio = bio_src->bi_ioprio; | |
252 | bio->bi_write_hint = bio_src->bi_write_hint; | |
253 | bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; | |
254 | bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; | |
255 | ||
256 | switch (bio_op(bio)) { | |
257 | case REQ_OP_DISCARD: | |
258 | case REQ_OP_SECURE_ERASE: | |
259 | case REQ_OP_WRITE_ZEROES: | |
260 | break; | |
261 | case REQ_OP_WRITE_SAME: | |
262 | bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; | |
263 | break; | |
264 | default: | |
265 | bio_for_each_segment(bv, bio_src, iter) | |
266 | bio->bi_io_vec[bio->bi_vcnt++] = bv; | |
267 | break; | |
268 | } | |
269 | ||
270 | if (bio_integrity(bio_src)) { | |
271 | int ret; | |
272 | ||
273 | ret = bio_integrity_clone(bio, bio_src, gfp_mask); | |
274 | if (ret < 0) { | |
275 | bio_put(bio); | |
276 | return NULL; | |
277 | } | |
278 | } | |
279 | ||
280 | bio_clone_blkg_association(bio, bio_src); | |
281 | blkcg_bio_issue_init(bio); | |
282 | ||
283 | return bio; | |
284 | } | |
285 | ||
286 | static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, | |
287 | mempool_t *pool) | |
288 | { | |
289 | struct bio *bio; | |
290 | int rw = bio_data_dir(*bio_orig); | |
291 | struct bio_vec *to, from; | |
292 | struct bvec_iter iter; | |
293 | unsigned i = 0; | |
294 | bool bounce = false; | |
295 | int sectors = 0; | |
296 | bool passthrough = bio_is_passthrough(*bio_orig); | |
297 | ||
298 | bio_for_each_segment(from, *bio_orig, iter) { | |
299 | if (i++ < BIO_MAX_PAGES) | |
300 | sectors += from.bv_len >> 9; | |
301 | if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn) | |
302 | bounce = true; | |
303 | } | |
304 | if (!bounce) | |
305 | return; | |
306 | ||
307 | if (!passthrough && sectors < bio_sectors(*bio_orig)) { | |
308 | bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split); | |
309 | bio_chain(bio, *bio_orig); | |
310 | generic_make_request(*bio_orig); | |
311 | *bio_orig = bio; | |
312 | } | |
313 | bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL : | |
314 | &bounce_bio_set); | |
315 | ||
316 | /* | |
317 | * Bvec table can't be updated by bio_for_each_segment_all(), | |
318 | * so retrieve bvec from the table directly. This way is safe | |
319 | * because the 'bio' is single-page bvec. | |
320 | */ | |
321 | for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) { | |
322 | struct page *page = to->bv_page; | |
323 | ||
324 | if (page_to_pfn(page) <= q->limits.bounce_pfn) | |
325 | continue; | |
326 | ||
327 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); | |
328 | inc_zone_page_state(to->bv_page, NR_BOUNCE); | |
329 | ||
330 | if (rw == WRITE) { | |
331 | char *vto, *vfrom; | |
332 | ||
333 | flush_dcache_page(page); | |
334 | ||
335 | vto = page_address(to->bv_page) + to->bv_offset; | |
336 | vfrom = kmap_atomic(page) + to->bv_offset; | |
337 | memcpy(vto, vfrom, to->bv_len); | |
338 | kunmap_atomic(vfrom); | |
339 | } | |
340 | } | |
341 | ||
342 | trace_block_bio_bounce(q, *bio_orig); | |
343 | ||
344 | bio->bi_flags |= (1 << BIO_BOUNCED); | |
345 | ||
346 | if (pool == &page_pool) { | |
347 | bio->bi_end_io = bounce_end_io_write; | |
348 | if (rw == READ) | |
349 | bio->bi_end_io = bounce_end_io_read; | |
350 | } else { | |
351 | bio->bi_end_io = bounce_end_io_write_isa; | |
352 | if (rw == READ) | |
353 | bio->bi_end_io = bounce_end_io_read_isa; | |
354 | } | |
355 | ||
356 | bio->bi_private = *bio_orig; | |
357 | *bio_orig = bio; | |
358 | } | |
359 | ||
360 | void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) | |
361 | { | |
362 | mempool_t *pool; | |
363 | ||
364 | /* | |
365 | * Data-less bio, nothing to bounce | |
366 | */ | |
367 | if (!bio_has_data(*bio_orig)) | |
368 | return; | |
369 | ||
370 | /* | |
371 | * for non-isa bounce case, just check if the bounce pfn is equal | |
372 | * to or bigger than the highest pfn in the system -- in that case, | |
373 | * don't waste time iterating over bio segments | |
374 | */ | |
375 | if (!(q->bounce_gfp & GFP_DMA)) { | |
376 | if (q->limits.bounce_pfn >= blk_max_pfn) | |
377 | return; | |
378 | pool = &page_pool; | |
379 | } else { | |
380 | BUG_ON(!mempool_initialized(&isa_page_pool)); | |
381 | pool = &isa_page_pool; | |
382 | } | |
383 | ||
384 | /* | |
385 | * slow path | |
386 | */ | |
387 | __blk_queue_bounce(q, bio_orig, pool); | |
388 | } |