]>
Commit | Line | Data |
---|---|---|
4bdbf22e AF |
1 | From a289eb38cfb481de48e36b481c72fb2492c6d242 Mon Sep 17 00:00:00 2001 |
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
3 | Date: Thu, 13 Oct 2016 13:07:36 -0700 | |
4 | Subject: [PATCH] mm: remove gup_flags FOLL_WRITE games from __get_user_pages() | |
5 | ||
6 | commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. | |
7 | ||
8 | This is an ancient bug that was actually attempted to be fixed once | |
9 | (badly) by me eleven years ago in commit 4ceb5db9757a ("Fix | |
10 | get_user_pages() race for write access") but that was then undone due to | |
11 | problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). | |
12 | ||
13 | In the meantime, the s390 situation has long been fixed, and we can now | |
14 | fix it by checking the pte_dirty() bit properly (and do it better). The | |
15 | s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement | |
16 | software dirty bits") which made it into v3.9. Earlier kernels will | |
17 | have to look at the page state itself. | |
18 | ||
19 | Also, the VM has become more scalable, and what used a purely | |
20 | theoretical race back then has become easier to trigger. | |
21 | ||
22 | To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, | |
23 | we already did a COW" rather than play racy games with FOLL_WRITE that | |
24 | is very fundamental, and then use the pte dirty flag to validate that | |
25 | the FOLL_COW flag is still valid. | |
26 | ||
27 | Reported-and-tested-by: Phil "not Paul" Oester <kernel@linuxace.com> | |
28 | Acked-by: Hugh Dickins <hughd@google.com> | |
29 | Reviewed-by: Michal Hocko <mhocko@suse.com> | |
30 | Cc: Andy Lutomirski <luto@kernel.org> | |
31 | Cc: Kees Cook <keescook@chromium.org> | |
32 | Cc: Oleg Nesterov <oleg@redhat.com> | |
33 | Cc: Willy Tarreau <w@1wt.eu> | |
34 | Cc: Nick Piggin <npiggin@gmail.com> | |
35 | Cc: Greg Thelen <gthelen@google.com> | |
36 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
37 | Signed-off-by: Jiri Slaby <jslaby@suse.cz> | |
38 | --- | |
39 | include/linux/mm.h | 1 + | |
40 | mm/memory.c | 14 ++++++++++++-- | |
41 | 2 files changed, 13 insertions(+), 2 deletions(-) | |
42 | ||
43 | diff --git a/include/linux/mm.h b/include/linux/mm.h | |
44 | index 5aef73c..6f56355 100644 | |
45 | --- a/include/linux/mm.h | |
46 | +++ b/include/linux/mm.h | |
47 | @@ -1964,6 +1964,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, | |
48 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | |
49 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | |
50 | #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ | |
51 | +#define FOLL_COW 0x4000 /* internal GUP flag */ | |
52 | ||
53 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | |
54 | void *data); | |
55 | diff --git a/mm/memory.c b/mm/memory.c | |
56 | index e9f4d40..5c1df12 100644 | |
57 | --- a/mm/memory.c | |
58 | +++ b/mm/memory.c | |
59 | @@ -1449,6 +1449,16 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | |
60 | } | |
61 | EXPORT_SYMBOL_GPL(zap_vma_ptes); | |
62 | ||
63 | +/* | |
64 | + * FOLL_FORCE can write to even unwritable pte's, but only | |
65 | + * after we've gone through a COW cycle and they are dirty. | |
66 | + */ | |
67 | +static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) | |
68 | +{ | |
69 | + return pte_write(pte) || | |
70 | + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); | |
71 | +} | |
72 | + | |
73 | /** | |
74 | * follow_page_mask - look up a page descriptor from a user-virtual address | |
75 | * @vma: vm_area_struct mapping @address | |
76 | @@ -1569,7 +1579,7 @@ split_fallthrough: | |
77 | } | |
78 | if ((flags & FOLL_NUMA) && pte_numa(pte)) | |
79 | goto no_page; | |
80 | - if ((flags & FOLL_WRITE) && !pte_write(pte)) | |
81 | + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) | |
82 | goto unlock; | |
83 | ||
84 | page = vm_normal_page(vma, address, pte); | |
85 | @@ -1866,7 +1876,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |
86 | */ | |
87 | if ((ret & VM_FAULT_WRITE) && | |
88 | !(vma->vm_flags & VM_WRITE)) | |
89 | - foll_flags &= ~FOLL_WRITE; | |
90 | + foll_flags |= FOLL_COW; | |
91 | ||
92 | cond_resched(); | |
93 | } | |
94 | -- | |
95 | 2.7.4 | |
96 |