]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - mm/gup.c
tracing: Fix very unlikely race of registering two stat tracers
[thirdparty/kernel/stable.git] / mm / gup.c
CommitLineData
4bbd4c77
KS
1#include <linux/kernel.h>
2#include <linux/errno.h>
3#include <linux/err.h>
4#include <linux/spinlock.h>
5
6#include <linux/hugetlb.h>
7#include <linux/mm.h>
8#include <linux/pagemap.h>
9#include <linux/rmap.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
12
13#include "internal.h"
14
69e68b4f
KS
15static struct page *no_page_table(struct vm_area_struct *vma,
16 unsigned int flags)
4bbd4c77 17{
69e68b4f
KS
18 /*
19 * When core dumping an enormous anonymous area that nobody
20 * has touched so far, we don't want to allocate unnecessary pages or
21 * page tables. Return error instead of NULL to skip handle_mm_fault,
22 * then get_dump_page() will return NULL to leave a hole in the dump.
23 * But we can only make this optimization where a hole would surely
24 * be zero-filled if handle_mm_fault() actually did handle it.
25 */
26 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
27 return ERR_PTR(-EFAULT);
28 return NULL;
29}
4bbd4c77 30
2649c26f
LT
31/*
32 * FOLL_FORCE can write to even unwritable pte's, but only
33 * after we've gone through a COW cycle and they are dirty.
34 */
35static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
36{
37 return pte_write(pte) ||
38 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
39}
40
69e68b4f
KS
41static struct page *follow_page_pte(struct vm_area_struct *vma,
42 unsigned long address, pmd_t *pmd, unsigned int flags)
43{
44 struct mm_struct *mm = vma->vm_mm;
45 struct page *page;
46 spinlock_t *ptl;
47 pte_t *ptep, pte;
4bbd4c77 48
69e68b4f 49retry:
4bbd4c77 50 if (unlikely(pmd_bad(*pmd)))
69e68b4f 51 return no_page_table(vma, flags);
4bbd4c77
KS
52
53 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
4bbd4c77
KS
54 pte = *ptep;
55 if (!pte_present(pte)) {
56 swp_entry_t entry;
57 /*
58 * KSM's break_ksm() relies upon recognizing a ksm page
59 * even while it is being migrated, so for that case we
60 * need migration_entry_wait().
61 */
62 if (likely(!(flags & FOLL_MIGRATION)))
63 goto no_page;
38ae3752 64 if (pte_none(pte))
4bbd4c77
KS
65 goto no_page;
66 entry = pte_to_swp_entry(pte);
67 if (!is_migration_entry(entry))
68 goto no_page;
69 pte_unmap_unlock(ptep, ptl);
70 migration_entry_wait(mm, pmd, address);
69e68b4f 71 goto retry;
4bbd4c77
KS
72 }
73 if ((flags & FOLL_NUMA) && pte_numa(pte))
74 goto no_page;
2649c26f 75 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
69e68b4f
KS
76 pte_unmap_unlock(ptep, ptl);
77 return NULL;
78 }
4bbd4c77
KS
79
80 page = vm_normal_page(vma, address, pte);
81 if (unlikely(!page)) {
82 if ((flags & FOLL_DUMP) ||
83 !is_zero_pfn(pte_pfn(pte)))
84 goto bad_page;
85 page = pte_page(pte);
86 }
87
88 if (flags & FOLL_GET)
89 get_page_foll(page);
90 if (flags & FOLL_TOUCH) {
91 if ((flags & FOLL_WRITE) &&
92 !pte_dirty(pte) && !PageDirty(page))
93 set_page_dirty(page);
94 /*
95 * pte_mkyoung() would be more correct here, but atomic care
96 * is needed to avoid losing the dirty bit: it is easier to use
97 * mark_page_accessed().
98 */
99 mark_page_accessed(page);
100 }
101 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
102 /*
103 * The preliminary mapping check is mainly to avoid the
104 * pointless overhead of lock_page on the ZERO_PAGE
105 * which might bounce very badly if there is contention.
106 *
107 * If the page is already locked, we don't need to
108 * handle it now - vmscan will handle it later if and
109 * when it attempts to reclaim the page.
110 */
111 if (page->mapping && trylock_page(page)) {
112 lru_add_drain(); /* push cached pages to LRU */
113 /*
114 * Because we lock page here, and migration is
115 * blocked by the pte's page reference, and we
116 * know the page is still mapped, we don't even
117 * need to check for file-cache page truncation.
118 */
119 mlock_vma_page(page);
120 unlock_page(page);
121 }
122 }
4bbd4c77 123 pte_unmap_unlock(ptep, ptl);
4bbd4c77 124 return page;
4bbd4c77
KS
125bad_page:
126 pte_unmap_unlock(ptep, ptl);
127 return ERR_PTR(-EFAULT);
128
129no_page:
130 pte_unmap_unlock(ptep, ptl);
131 if (!pte_none(pte))
69e68b4f
KS
132 return NULL;
133 return no_page_table(vma, flags);
134}
135
136/**
137 * follow_page_mask - look up a page descriptor from a user-virtual address
138 * @vma: vm_area_struct mapping @address
139 * @address: virtual address to look up
140 * @flags: flags modifying lookup behaviour
141 * @page_mask: on output, *page_mask is set according to the size of the page
142 *
143 * @flags can have FOLL_ flags set, defined in <linux/mm.h>
144 *
145 * Returns the mapped (struct page *), %NULL if no mapping exists, or
146 * an error pointer if there is a mapping to something not represented
147 * by a page descriptor (see also vm_normal_page()).
148 */
149struct page *follow_page_mask(struct vm_area_struct *vma,
150 unsigned long address, unsigned int flags,
151 unsigned int *page_mask)
152{
153 pgd_t *pgd;
154 pud_t *pud;
155 pmd_t *pmd;
156 spinlock_t *ptl;
157 struct page *page;
158 struct mm_struct *mm = vma->vm_mm;
159
160 *page_mask = 0;
161
162 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
163 if (!IS_ERR(page)) {
164 BUG_ON(flags & FOLL_GET);
4bbd4c77 165 return page;
69e68b4f 166 }
4bbd4c77 167
69e68b4f
KS
168 pgd = pgd_offset(mm, address);
169 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
170 return no_page_table(vma, flags);
171
172 pud = pud_offset(pgd, address);
173 if (pud_none(*pud))
174 return no_page_table(vma, flags);
175 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
7b0f6722
NH
176 page = follow_huge_pud(mm, address, pud, flags);
177 if (page)
178 return page;
179 return no_page_table(vma, flags);
69e68b4f
KS
180 }
181 if (unlikely(pud_bad(*pud)))
182 return no_page_table(vma, flags);
183
184 pmd = pmd_offset(pud, address);
185 if (pmd_none(*pmd))
186 return no_page_table(vma, flags);
187 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
7b0f6722
NH
188 page = follow_huge_pmd(mm, address, pmd, flags);
189 if (page)
190 return page;
191 return no_page_table(vma, flags);
69e68b4f
KS
192 }
193 if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
194 return no_page_table(vma, flags);
195 if (pmd_trans_huge(*pmd)) {
196 if (flags & FOLL_SPLIT) {
197 split_huge_page_pmd(vma, address, pmd);
198 return follow_page_pte(vma, address, pmd, flags);
199 }
200 ptl = pmd_lock(mm, pmd);
201 if (likely(pmd_trans_huge(*pmd))) {
202 if (unlikely(pmd_trans_splitting(*pmd))) {
203 spin_unlock(ptl);
204 wait_split_huge_page(vma->anon_vma, pmd);
205 } else {
206 page = follow_trans_huge_pmd(vma, address,
207 pmd, flags);
208 spin_unlock(ptl);
209 *page_mask = HPAGE_PMD_NR - 1;
210 return page;
211 }
212 } else
213 spin_unlock(ptl);
214 }
215 return follow_page_pte(vma, address, pmd, flags);
4bbd4c77
KS
216}
217
f2b495ca
KS
218static int get_gate_page(struct mm_struct *mm, unsigned long address,
219 unsigned int gup_flags, struct vm_area_struct **vma,
220 struct page **page)
221{
222 pgd_t *pgd;
223 pud_t *pud;
224 pmd_t *pmd;
225 pte_t *pte;
226 int ret = -EFAULT;
227
228 /* user gate pages are read-only */
229 if (gup_flags & FOLL_WRITE)
230 return -EFAULT;
231 if (address > TASK_SIZE)
232 pgd = pgd_offset_k(address);
233 else
234 pgd = pgd_offset_gate(mm, address);
235 BUG_ON(pgd_none(*pgd));
236 pud = pud_offset(pgd, address);
237 BUG_ON(pud_none(*pud));
238 pmd = pmd_offset(pud, address);
239 if (pmd_none(*pmd))
240 return -EFAULT;
241 VM_BUG_ON(pmd_trans_huge(*pmd));
242 pte = pte_offset_map(pmd, address);
243 if (pte_none(*pte))
244 goto unmap;
245 *vma = get_gate_vma(mm);
246 if (!page)
247 goto out;
248 *page = vm_normal_page(*vma, address, *pte);
249 if (!*page) {
250 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
251 goto unmap;
252 *page = pte_page(*pte);
253 }
254 get_page(*page);
255out:
256 ret = 0;
257unmap:
258 pte_unmap(pte);
259 return ret;
260}
261
16744483
KS
262static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
263 unsigned long address, unsigned int *flags, int *nonblocking)
264{
265 struct mm_struct *mm = vma->vm_mm;
266 unsigned int fault_flags = 0;
267 int ret;
268
16744483
KS
269 if (*flags & FOLL_WRITE)
270 fault_flags |= FAULT_FLAG_WRITE;
271 if (nonblocking)
272 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
273 if (*flags & FOLL_NOWAIT)
274 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
275
276 ret = handle_mm_fault(mm, vma, address, fault_flags);
277 if (ret & VM_FAULT_ERROR) {
278 if (ret & VM_FAULT_OOM)
279 return -ENOMEM;
280 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
281 return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
903575f1 282 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
16744483
KS
283 return -EFAULT;
284 BUG();
285 }
286
287 if (tsk) {
288 if (ret & VM_FAULT_MAJOR)
289 tsk->maj_flt++;
290 else
291 tsk->min_flt++;
292 }
293
294 if (ret & VM_FAULT_RETRY) {
295 if (nonblocking)
296 *nonblocking = 0;
297 return -EBUSY;
298 }
299
300 /*
301 * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
302 * necessary, even if maybe_mkwrite decided not to set pte_write. We
303 * can thus safely do subsequent page lookups as if they were reads.
304 * But only do so when looping for pte_write is futile: in some cases
305 * userspace may also be wanting to write to the gotten user page,
306 * which a read fault here might prevent (a readonly page might get
307 * reCOWed by userspace write).
308 */
309 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
2649c26f 310 *flags |= FOLL_COW;
16744483
KS
311 return 0;
312}
313
fa5bb209
KS
314static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
315{
316 vm_flags_t vm_flags = vma->vm_flags;
317
318 if (vm_flags & (VM_IO | VM_PFNMAP))
319 return -EFAULT;
320
321 if (gup_flags & FOLL_WRITE) {
322 if (!(vm_flags & VM_WRITE)) {
323 if (!(gup_flags & FOLL_FORCE))
324 return -EFAULT;
325 /*
326 * We used to let the write,force case do COW in a
327 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
328 * set a breakpoint in a read-only mapping of an
329 * executable, without corrupting the file (yet only
330 * when that file had been opened for writing!).
331 * Anon pages in shared mappings are surprising: now
332 * just reject it.
333 */
334 if (!is_cow_mapping(vm_flags)) {
335 WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
336 return -EFAULT;
337 }
338 }
339 } else if (!(vm_flags & VM_READ)) {
340 if (!(gup_flags & FOLL_FORCE))
341 return -EFAULT;
342 /*
343 * Is there actually any vma we can reach here which does not
344 * have VM_MAYREAD set?
345 */
346 if (!(vm_flags & VM_MAYREAD))
347 return -EFAULT;
348 }
349 return 0;
350}
351
4bbd4c77
KS
352/**
353 * __get_user_pages() - pin user pages in memory
354 * @tsk: task_struct of target task
355 * @mm: mm_struct of target mm
356 * @start: starting user address
357 * @nr_pages: number of pages from start to pin
358 * @gup_flags: flags modifying pin behaviour
359 * @pages: array that receives pointers to the pages pinned.
360 * Should be at least nr_pages long. Or NULL, if caller
361 * only intends to ensure the pages are faulted in.
362 * @vmas: array of pointers to vmas corresponding to each page.
363 * Or NULL if the caller does not require them.
364 * @nonblocking: whether waiting for disk IO or mmap_sem contention
365 *
366 * Returns number of pages pinned. This may be fewer than the number
367 * requested. If nr_pages is 0 or negative, returns 0. If no pages
368 * were pinned, returns -errno. Each page returned must be released
369 * with a put_page() call when it is finished with. vmas will only
370 * remain valid while mmap_sem is held.
371 *
372 * Must be called with mmap_sem held for read or write.
373 *
374 * __get_user_pages walks a process's page tables and takes a reference to
375 * each struct page that each user address corresponds to at a given
376 * instant. That is, it takes the page that would be accessed if a user
377 * thread accesses the given user virtual address at that instant.
378 *
379 * This does not guarantee that the page exists in the user mappings when
380 * __get_user_pages returns, and there may even be a completely different
381 * page there in some cases (eg. if mmapped pagecache has been invalidated
382 * and subsequently re faulted). However it does guarantee that the page
383 * won't be freed completely. And mostly callers simply care that the page
384 * contains data that was valid *at some point in time*. Typically, an IO
385 * or similar operation cannot guarantee anything stronger anyway because
386 * locks can't be held over the syscall boundary.
387 *
388 * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
389 * the page is written to, set_page_dirty (or set_page_dirty_lock, as
390 * appropriate) must be called after the page is finished with, and
391 * before put_page is called.
392 *
393 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
394 * or mmap_sem contention, and if waiting is needed to pin all pages,
395 * *@nonblocking will be set to 0.
396 *
397 * In most cases, get_user_pages or get_user_pages_fast should be used
398 * instead of __get_user_pages. __get_user_pages should be used only if
399 * you need some special @gup_flags.
400 */
401long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
402 unsigned long start, unsigned long nr_pages,
403 unsigned int gup_flags, struct page **pages,
404 struct vm_area_struct **vmas, int *nonblocking)
405{
fa5bb209 406 long i = 0;
4bbd4c77 407 unsigned int page_mask;
fa5bb209 408 struct vm_area_struct *vma = NULL;
4bbd4c77
KS
409
410 if (!nr_pages)
411 return 0;
412
413 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
414
415 /*
416 * If FOLL_FORCE is set then do not force a full fault as the hinting
417 * fault information is unrelated to the reference behaviour of a task
418 * using the address space
419 */
420 if (!(gup_flags & FOLL_FORCE))
421 gup_flags |= FOLL_NUMA;
422
4bbd4c77 423 do {
fa5bb209
KS
424 struct page *page;
425 unsigned int foll_flags = gup_flags;
426 unsigned int page_increm;
427
428 /* first iteration or cross vma bound */
429 if (!vma || start >= vma->vm_end) {
430 vma = find_extend_vma(mm, start);
431 if (!vma && in_gate_area(mm, start)) {
432 int ret;
433 ret = get_gate_page(mm, start & PAGE_MASK,
434 gup_flags, &vma,
435 pages ? &pages[i] : NULL);
436 if (ret)
437 return i ? : ret;
438 page_mask = 0;
439 goto next_page;
440 }
4bbd4c77 441
fa5bb209
KS
442 if (!vma || check_vma_flags(vma, gup_flags))
443 return i ? : -EFAULT;
444 if (is_vm_hugetlb_page(vma)) {
445 i = follow_hugetlb_page(mm, vma, pages, vmas,
446 &start, &nr_pages, i,
447 gup_flags);
448 continue;
4bbd4c77 449 }
fa5bb209
KS
450 }
451retry:
452 /*
453 * If we have a pending SIGKILL, don't keep faulting pages and
454 * potentially allocating memory.
455 */
456 if (unlikely(fatal_signal_pending(current)))
457 return i ? i : -ERESTARTSYS;
458 cond_resched();
459 page = follow_page_mask(vma, start, foll_flags, &page_mask);
460 if (!page) {
461 int ret;
462 ret = faultin_page(tsk, vma, start, &foll_flags,
463 nonblocking);
464 switch (ret) {
465 case 0:
466 goto retry;
467 case -EFAULT:
468 case -ENOMEM:
469 case -EHWPOISON:
470 return i ? i : ret;
471 case -EBUSY:
472 return i;
473 case -ENOENT:
474 goto next_page;
4bbd4c77 475 }
fa5bb209 476 BUG();
4bbd4c77 477 }
fa5bb209
KS
478 if (IS_ERR(page))
479 return i ? i : PTR_ERR(page);
480 if (pages) {
481 pages[i] = page;
482 flush_anon_page(vma, page, start);
483 flush_dcache_page(page);
484 page_mask = 0;
4bbd4c77 485 }
4bbd4c77 486next_page:
fa5bb209
KS
487 if (vmas) {
488 vmas[i] = vma;
489 page_mask = 0;
490 }
491 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
492 if (page_increm > nr_pages)
493 page_increm = nr_pages;
494 i += page_increm;
495 start += page_increm * PAGE_SIZE;
496 nr_pages -= page_increm;
4bbd4c77
KS
497 } while (nr_pages);
498 return i;
4bbd4c77
KS
499}
500EXPORT_SYMBOL(__get_user_pages);
501
502/*
503 * fixup_user_fault() - manually resolve a user page fault
504 * @tsk: the task_struct to use for page fault accounting, or
505 * NULL if faults are not to be recorded.
506 * @mm: mm_struct of target mm
507 * @address: user address
508 * @fault_flags:flags to pass down to handle_mm_fault()
509 *
510 * This is meant to be called in the specific scenario where for locking reasons
511 * we try to access user memory in atomic context (within a pagefault_disable()
512 * section), this returns -EFAULT, and we want to resolve the user fault before
513 * trying again.
514 *
515 * Typically this is meant to be used by the futex code.
516 *
517 * The main difference with get_user_pages() is that this function will
518 * unconditionally call handle_mm_fault() which will in turn perform all the
519 * necessary SW fixup of the dirty and young bits in the PTE, while
520 * handle_mm_fault() only guarantees to update these in the struct page.
521 *
522 * This is important for some architectures where those bits also gate the
523 * access permission to the page because they are maintained in software. On
524 * such architectures, gup() will not be enough to make a subsequent access
525 * succeed.
526 *
527 * This should be called with the mm_sem held for read.
528 */
529int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
530 unsigned long address, unsigned int fault_flags)
531{
532 struct vm_area_struct *vma;
533 vm_flags_t vm_flags;
534 int ret;
535
536 vma = find_extend_vma(mm, address);
537 if (!vma || address < vma->vm_start)
538 return -EFAULT;
539
540 vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
541 if (!(vm_flags & vma->vm_flags))
542 return -EFAULT;
543
544 ret = handle_mm_fault(mm, vma, address, fault_flags);
545 if (ret & VM_FAULT_ERROR) {
546 if (ret & VM_FAULT_OOM)
547 return -ENOMEM;
548 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
549 return -EHWPOISON;
903575f1 550 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
4bbd4c77
KS
551 return -EFAULT;
552 BUG();
553 }
554 if (tsk) {
555 if (ret & VM_FAULT_MAJOR)
556 tsk->maj_flt++;
557 else
558 tsk->min_flt++;
559 }
560 return 0;
561}
562
563/*
564 * get_user_pages() - pin user pages in memory
565 * @tsk: the task_struct to use for page fault accounting, or
566 * NULL if faults are not to be recorded.
567 * @mm: mm_struct of target mm
568 * @start: starting user address
569 * @nr_pages: number of pages from start to pin
570 * @write: whether pages will be written to by the caller
571 * @force: whether to force access even when user mapping is currently
572 * protected (but never forces write access to shared mapping).
573 * @pages: array that receives pointers to the pages pinned.
574 * Should be at least nr_pages long. Or NULL, if caller
575 * only intends to ensure the pages are faulted in.
576 * @vmas: array of pointers to vmas corresponding to each page.
577 * Or NULL if the caller does not require them.
578 *
579 * Returns number of pages pinned. This may be fewer than the number
580 * requested. If nr_pages is 0 or negative, returns 0. If no pages
581 * were pinned, returns -errno. Each page returned must be released
582 * with a put_page() call when it is finished with. vmas will only
583 * remain valid while mmap_sem is held.
584 *
585 * Must be called with mmap_sem held for read or write.
586 *
587 * get_user_pages walks a process's page tables and takes a reference to
588 * each struct page that each user address corresponds to at a given
589 * instant. That is, it takes the page that would be accessed if a user
590 * thread accesses the given user virtual address at that instant.
591 *
592 * This does not guarantee that the page exists in the user mappings when
593 * get_user_pages returns, and there may even be a completely different
594 * page there in some cases (eg. if mmapped pagecache has been invalidated
595 * and subsequently re faulted). However it does guarantee that the page
596 * won't be freed completely. And mostly callers simply care that the page
597 * contains data that was valid *at some point in time*. Typically, an IO
598 * or similar operation cannot guarantee anything stronger anyway because
599 * locks can't be held over the syscall boundary.
600 *
601 * If write=0, the page must not be written to. If the page is written to,
602 * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
603 * after the page is finished with, and before put_page is called.
604 *
605 * get_user_pages is typically used for fewer-copy IO operations, to get a
606 * handle on the memory by some means other than accesses via the user virtual
607 * addresses. The pages may be submitted for DMA to devices or accessed via
608 * their kernel linear mapping (via the kmap APIs). Care should be taken to
609 * use the correct cache flushing APIs.
610 *
611 * See also get_user_pages_fast, for performance critical applications.
612 */
613long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
614 unsigned long start, unsigned long nr_pages, int write,
615 int force, struct page **pages, struct vm_area_struct **vmas)
616{
617 int flags = FOLL_TOUCH;
618
619 if (pages)
620 flags |= FOLL_GET;
621 if (write)
622 flags |= FOLL_WRITE;
623 if (force)
624 flags |= FOLL_FORCE;
625
626 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
627 NULL);
628}
629EXPORT_SYMBOL(get_user_pages);
630
631/**
632 * get_dump_page() - pin user page in memory while writing it to core dump
633 * @addr: user address
634 *
635 * Returns struct page pointer of user page pinned for dump,
636 * to be freed afterwards by page_cache_release() or put_page().
637 *
638 * Returns NULL on any kind of failure - a hole must then be inserted into
639 * the corefile, to preserve alignment with its headers; and also returns
640 * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
641 * allowing a hole to be left in the corefile to save diskspace.
642 *
643 * Called without mmap_sem, but after all other threads have been killed.
644 */
645#ifdef CONFIG_ELF_CORE
646struct page *get_dump_page(unsigned long addr)
647{
648 struct vm_area_struct *vma;
649 struct page *page;
650
651 if (__get_user_pages(current, current->mm, addr, 1,
652 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
653 NULL) < 1)
654 return NULL;
655 flush_cache_page(vma, addr, page_to_pfn(page));
656 return page;
657}
658#endif /* CONFIG_ELF_CORE */