]>
Commit | Line | Data |
---|---|---|
9f645532 TH |
1 | /* |
2 | * mm/percpu-vm.c - vmalloc area based chunk allocation | |
3 | * | |
4 | * Copyright (C) 2010 SUSE Linux Products GmbH | |
5 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> | |
6 | * | |
7 | * This file is released under the GPLv2. | |
8 | * | |
9 | * Chunks are mapped into vmalloc areas and populated page by page. | |
10 | * This is the default chunk allocator. | |
11 | */ | |
12 | ||
13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, | |
14 | unsigned int cpu, int page_idx) | |
15 | { | |
16 | /* must not be used on pre-mapped chunk */ | |
17 | WARN_ON(chunk->immutable); | |
18 | ||
19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); | |
20 | } | |
21 | ||
22 | /** | |
23 | * pcpu_get_pages_and_bitmap - get temp pages array and bitmap | |
24 | * @chunk: chunk of interest | |
25 | * @bitmapp: output parameter for bitmap | |
26 | * @may_alloc: may allocate the array | |
27 | * | |
28 | * Returns pointer to array of pointers to struct page and bitmap, | |
29 | * both of which can be indexed with pcpu_page_idx(). The returned | |
30 | * array is cleared to zero and *@bitmapp is copied from | |
31 | * @chunk->populated. Note that there is only one array and bitmap | |
32 | * and access exclusion is the caller's responsibility. | |
33 | * | |
34 | * CONTEXT: | |
35 | * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. | |
36 | * Otherwise, don't care. | |
37 | * | |
38 | * RETURNS: | |
39 | * Pointer to temp pages array on success, NULL on failure. | |
40 | */ | |
41 | static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | |
42 | unsigned long **bitmapp, | |
43 | bool may_alloc) | |
44 | { | |
45 | static struct page **pages; | |
46 | static unsigned long *bitmap; | |
47 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); | |
48 | size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * | |
49 | sizeof(unsigned long); | |
50 | ||
51 | if (!pages || !bitmap) { | |
52 | if (may_alloc && !pages) | |
90459ce0 | 53 | pages = pcpu_mem_zalloc(pages_size); |
9f645532 | 54 | if (may_alloc && !bitmap) |
90459ce0 | 55 | bitmap = pcpu_mem_zalloc(bitmap_size); |
9f645532 TH |
56 | if (!pages || !bitmap) |
57 | return NULL; | |
58 | } | |
59 | ||
9f645532 TH |
60 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); |
61 | ||
62 | *bitmapp = bitmap; | |
63 | return pages; | |
64 | } | |
65 | ||
66 | /** | |
67 | * pcpu_free_pages - free pages which were allocated for @chunk | |
68 | * @chunk: chunk pages were allocated for | |
69 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() | |
70 | * @populated: populated bitmap | |
71 | * @page_start: page index of the first page to be freed | |
72 | * @page_end: page index of the last page to be freed + 1 | |
73 | * | |
74 | * Free pages [@page_start and @page_end) in @pages for all units. | |
75 | * The pages were allocated for @chunk. | |
76 | */ | |
77 | static void pcpu_free_pages(struct pcpu_chunk *chunk, | |
78 | struct page **pages, unsigned long *populated, | |
79 | int page_start, int page_end) | |
80 | { | |
81 | unsigned int cpu; | |
82 | int i; | |
83 | ||
84 | for_each_possible_cpu(cpu) { | |
85 | for (i = page_start; i < page_end; i++) { | |
86 | struct page *page = pages[pcpu_page_idx(cpu, i)]; | |
87 | ||
88 | if (page) | |
89 | __free_page(page); | |
90 | } | |
91 | } | |
92 | } | |
93 | ||
94 | /** | |
95 | * pcpu_alloc_pages - allocates pages for @chunk | |
96 | * @chunk: target chunk | |
97 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | |
98 | * @populated: populated bitmap | |
99 | * @page_start: page index of the first page to be allocated | |
100 | * @page_end: page index of the last page to be allocated + 1 | |
101 | * | |
102 | * Allocate pages [@page_start,@page_end) into @pages for all units. | |
103 | * The allocation is for @chunk. Percpu core doesn't care about the | |
104 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | |
105 | */ | |
106 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | |
107 | struct page **pages, unsigned long *populated, | |
108 | int page_start, int page_end) | |
109 | { | |
110 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | |
f0d27965 | 111 | unsigned int cpu, tcpu; |
9f645532 TH |
112 | int i; |
113 | ||
114 | for_each_possible_cpu(cpu) { | |
115 | for (i = page_start; i < page_end; i++) { | |
116 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | |
117 | ||
118 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); | |
f0d27965 TH |
119 | if (!*pagep) |
120 | goto err; | |
9f645532 TH |
121 | } |
122 | } | |
123 | return 0; | |
f0d27965 TH |
124 | |
125 | err: | |
126 | while (--i >= page_start) | |
127 | __free_page(pages[pcpu_page_idx(cpu, i)]); | |
128 | ||
129 | for_each_possible_cpu(tcpu) { | |
130 | if (tcpu == cpu) | |
131 | break; | |
132 | for (i = page_start; i < page_end; i++) | |
133 | __free_page(pages[pcpu_page_idx(tcpu, i)]); | |
134 | } | |
135 | return -ENOMEM; | |
9f645532 TH |
136 | } |
137 | ||
138 | /** | |
139 | * pcpu_pre_unmap_flush - flush cache prior to unmapping | |
140 | * @chunk: chunk the regions to be flushed belongs to | |
141 | * @page_start: page index of the first page to be flushed | |
142 | * @page_end: page index of the last page to be flushed + 1 | |
143 | * | |
144 | * Pages in [@page_start,@page_end) of @chunk are about to be | |
145 | * unmapped. Flush cache. As each flushing trial can be very | |
146 | * expensive, issue flush on the whole region at once rather than | |
147 | * doing it for each cpu. This could be an overkill but is more | |
148 | * scalable. | |
149 | */ | |
150 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | |
151 | int page_start, int page_end) | |
152 | { | |
153 | flush_cache_vunmap( | |
a855b84c TH |
154 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
155 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
9f645532 TH |
156 | } |
157 | ||
158 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | |
159 | { | |
160 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); | |
161 | } | |
162 | ||
163 | /** | |
164 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk | |
165 | * @chunk: chunk of interest | |
166 | * @pages: pages array which can be used to pass information to free | |
167 | * @populated: populated bitmap | |
168 | * @page_start: page index of the first page to unmap | |
169 | * @page_end: page index of the last page to unmap + 1 | |
170 | * | |
171 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | |
172 | * Corresponding elements in @pages were cleared by the caller and can | |
173 | * be used to carry information to pcpu_free_pages() which will be | |
174 | * called after all unmaps are finished. The caller should call | |
175 | * proper pre/post flush functions. | |
176 | */ | |
177 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, | |
178 | struct page **pages, unsigned long *populated, | |
179 | int page_start, int page_end) | |
180 | { | |
181 | unsigned int cpu; | |
182 | int i; | |
183 | ||
184 | for_each_possible_cpu(cpu) { | |
185 | for (i = page_start; i < page_end; i++) { | |
186 | struct page *page; | |
187 | ||
188 | page = pcpu_chunk_page(chunk, cpu, i); | |
189 | WARN_ON(!page); | |
190 | pages[pcpu_page_idx(cpu, i)] = page; | |
191 | } | |
192 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), | |
193 | page_end - page_start); | |
194 | } | |
195 | ||
26dd8e02 | 196 | bitmap_clear(populated, page_start, page_end - page_start); |
9f645532 TH |
197 | } |
198 | ||
199 | /** | |
200 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping | |
201 | * @chunk: pcpu_chunk the regions to be flushed belong to | |
202 | * @page_start: page index of the first page to be flushed | |
203 | * @page_end: page index of the last page to be flushed + 1 | |
204 | * | |
205 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush | |
206 | * TLB for the regions. This can be skipped if the area is to be | |
207 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. | |
208 | * | |
209 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | |
210 | * for the whole region. | |
211 | */ | |
212 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | |
213 | int page_start, int page_end) | |
214 | { | |
215 | flush_tlb_kernel_range( | |
a855b84c TH |
216 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
217 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
9f645532 TH |
218 | } |
219 | ||
220 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | |
221 | int nr_pages) | |
222 | { | |
223 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, | |
224 | PAGE_KERNEL, pages); | |
225 | } | |
226 | ||
227 | /** | |
228 | * pcpu_map_pages - map pages into a pcpu_chunk | |
229 | * @chunk: chunk of interest | |
230 | * @pages: pages array containing pages to be mapped | |
231 | * @populated: populated bitmap | |
232 | * @page_start: page index of the first page to map | |
233 | * @page_end: page index of the last page to map + 1 | |
234 | * | |
235 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The | |
236 | * caller is responsible for calling pcpu_post_map_flush() after all | |
237 | * mappings are complete. | |
238 | * | |
239 | * This function is responsible for setting corresponding bits in | |
240 | * @chunk->populated bitmap and whatever is necessary for reverse | |
241 | * lookup (addr -> chunk). | |
242 | */ | |
243 | static int pcpu_map_pages(struct pcpu_chunk *chunk, | |
244 | struct page **pages, unsigned long *populated, | |
245 | int page_start, int page_end) | |
246 | { | |
247 | unsigned int cpu, tcpu; | |
248 | int i, err; | |
249 | ||
250 | for_each_possible_cpu(cpu) { | |
251 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), | |
252 | &pages[pcpu_page_idx(cpu, page_start)], | |
253 | page_end - page_start); | |
254 | if (err < 0) | |
255 | goto err; | |
256 | } | |
257 | ||
258 | /* mapping successful, link chunk and mark populated */ | |
259 | for (i = page_start; i < page_end; i++) { | |
260 | for_each_possible_cpu(cpu) | |
261 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], | |
262 | chunk); | |
263 | __set_bit(i, populated); | |
264 | } | |
265 | ||
266 | return 0; | |
267 | ||
268 | err: | |
269 | for_each_possible_cpu(tcpu) { | |
270 | if (tcpu == cpu) | |
271 | break; | |
272 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), | |
273 | page_end - page_start); | |
274 | } | |
849f5169 | 275 | pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); |
9f645532 TH |
276 | return err; |
277 | } | |
278 | ||
279 | /** | |
280 | * pcpu_post_map_flush - flush cache after mapping | |
281 | * @chunk: pcpu_chunk the regions to be flushed belong to | |
282 | * @page_start: page index of the first page to be flushed | |
283 | * @page_end: page index of the last page to be flushed + 1 | |
284 | * | |
285 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush | |
286 | * cache. | |
287 | * | |
288 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | |
289 | * for the whole region. | |
290 | */ | |
291 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |
292 | int page_start, int page_end) | |
293 | { | |
294 | flush_cache_vmap( | |
a855b84c TH |
295 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
296 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
9f645532 TH |
297 | } |
298 | ||
299 | /** | |
300 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk | |
301 | * @chunk: chunk of interest | |
302 | * @off: offset to the area to populate | |
303 | * @size: size of the area to populate in bytes | |
304 | * | |
305 | * For each cpu, populate and map pages [@page_start,@page_end) into | |
306 | * @chunk. The area is cleared on return. | |
307 | * | |
308 | * CONTEXT: | |
309 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | |
310 | */ | |
311 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | |
312 | { | |
313 | int page_start = PFN_DOWN(off); | |
314 | int page_end = PFN_UP(off + size); | |
315 | int free_end = page_start, unmap_end = page_start; | |
316 | struct page **pages; | |
317 | unsigned long *populated; | |
318 | unsigned int cpu; | |
319 | int rs, re, rc; | |
320 | ||
321 | /* quick path, check whether all pages are already there */ | |
322 | rs = page_start; | |
323 | pcpu_next_pop(chunk, &rs, &re, page_end); | |
324 | if (rs == page_start && re == page_end) | |
325 | goto clear; | |
326 | ||
327 | /* need to allocate and map pages, this chunk can't be immutable */ | |
328 | WARN_ON(chunk->immutable); | |
329 | ||
330 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); | |
331 | if (!pages) | |
332 | return -ENOMEM; | |
333 | ||
334 | /* alloc and map */ | |
335 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | |
336 | rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); | |
337 | if (rc) | |
338 | goto err_free; | |
339 | free_end = re; | |
340 | } | |
341 | ||
342 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | |
343 | rc = pcpu_map_pages(chunk, pages, populated, rs, re); | |
344 | if (rc) | |
345 | goto err_unmap; | |
346 | unmap_end = re; | |
347 | } | |
348 | pcpu_post_map_flush(chunk, page_start, page_end); | |
349 | ||
350 | /* commit new bitmap */ | |
351 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | |
352 | clear: | |
353 | for_each_possible_cpu(cpu) | |
354 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | |
355 | return 0; | |
356 | ||
357 | err_unmap: | |
358 | pcpu_pre_unmap_flush(chunk, page_start, unmap_end); | |
359 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) | |
360 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | |
361 | pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); | |
362 | err_free: | |
363 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) | |
364 | pcpu_free_pages(chunk, pages, populated, rs, re); | |
365 | return rc; | |
366 | } | |
367 | ||
368 | /** | |
369 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk | |
370 | * @chunk: chunk to depopulate | |
371 | * @off: offset to the area to depopulate | |
372 | * @size: size of the area to depopulate in bytes | |
9f645532 TH |
373 | * |
374 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) | |
375 | * from @chunk. If @flush is true, vcache is flushed before unmapping | |
376 | * and tlb after. | |
377 | * | |
378 | * CONTEXT: | |
379 | * pcpu_alloc_mutex. | |
380 | */ | |
381 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) | |
382 | { | |
383 | int page_start = PFN_DOWN(off); | |
384 | int page_end = PFN_UP(off + size); | |
385 | struct page **pages; | |
386 | unsigned long *populated; | |
387 | int rs, re; | |
388 | ||
389 | /* quick path, check whether it's empty already */ | |
390 | rs = page_start; | |
391 | pcpu_next_unpop(chunk, &rs, &re, page_end); | |
392 | if (rs == page_start && re == page_end) | |
393 | return; | |
394 | ||
395 | /* immutable chunks can't be depopulated */ | |
396 | WARN_ON(chunk->immutable); | |
397 | ||
398 | /* | |
399 | * If control reaches here, there must have been at least one | |
400 | * successful population attempt so the temp pages array must | |
401 | * be available now. | |
402 | */ | |
403 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); | |
404 | BUG_ON(!pages); | |
405 | ||
406 | /* unmap and free */ | |
407 | pcpu_pre_unmap_flush(chunk, page_start, page_end); | |
408 | ||
409 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | |
410 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | |
411 | ||
412 | /* no need to flush tlb, vmalloc will handle it lazily */ | |
413 | ||
414 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | |
415 | pcpu_free_pages(chunk, pages, populated, rs, re); | |
416 | ||
417 | /* commit new bitmap */ | |
418 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | |
419 | } | |
420 | ||
421 | static struct pcpu_chunk *pcpu_create_chunk(void) | |
422 | { | |
423 | struct pcpu_chunk *chunk; | |
424 | struct vm_struct **vms; | |
425 | ||
426 | chunk = pcpu_alloc_chunk(); | |
427 | if (!chunk) | |
428 | return NULL; | |
429 | ||
430 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, | |
ec3f64fc | 431 | pcpu_nr_groups, pcpu_atom_size); |
9f645532 TH |
432 | if (!vms) { |
433 | pcpu_free_chunk(chunk); | |
434 | return NULL; | |
435 | } | |
436 | ||
437 | chunk->data = vms; | |
438 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; | |
439 | return chunk; | |
440 | } | |
441 | ||
442 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | |
443 | { | |
444 | if (chunk && chunk->data) | |
445 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); | |
446 | pcpu_free_chunk(chunk); | |
447 | } | |
448 | ||
449 | static struct page *pcpu_addr_to_page(void *addr) | |
450 | { | |
451 | return vmalloc_to_page(addr); | |
452 | } | |
453 | ||
454 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) | |
455 | { | |
456 | /* no extra restriction */ | |
457 | return 0; | |
458 | } |