]>
Commit | Line | Data |
---|---|---|
2532c0bb CW |
1 | From stable-bounces@linux.kernel.org Sun Dec 10 02:24:42 2006 |
2 | Message-Id: <200612101018.kBAAIiFj021055@shell0.pdx.osdl.net> | |
3 | From: akpm@osdl.org | |
4 | To: torvalds@osdl.org | |
5 | Date: Sun, 10 Dec 2006 02:18:43 -0800 | |
6 | Cc: akpm@osdl.org, hugh@veritas.com, Ramiro.Voicu@cern.ch, stable@kernel.org | |
7 | Subject: read_zero_pagealigned() locking fix | |
8 | ||
9 | From: Hugh Dickins <hugh@veritas.com> | |
10 | ||
11 | Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel | |
12 | bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem, | |
13 | but another thread's racing read of /dev/zero, or a normal fault, can | |
14 | easily set that pte again, in between zap_page_range and zeromap_page_range | |
15 | getting there. It's been wrong ever since 2.4.3. | |
16 | ||
17 | The simple fix is to use down_write instead, but that would serialize reads | |
18 | of /dev/zero more than at present: perhaps some app would be badly | |
19 | affected. So instead let zeromap_page_range return the error instead of | |
20 | BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in | |
21 | that case - there's no need to optimize for it. | |
22 | ||
23 | Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of | |
24 | zeromap_page_range), though it really isn't interesting there. And since | |
25 | mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that | |
26 | than -ENOMEM. | |
27 | ||
28 | Signed-off-by: Hugh Dickins <hugh@veritas.com> | |
29 | Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch> | |
30 | Cc: <stable@kernel.org> | |
31 | Signed-off-by: Andrew Morton <akpm@osdl.org> | |
32 | Signed-off-by: Chris Wright <chrisw@sous-sol.org> | |
33 | --- | |
34 | ||
35 | drivers/char/mem.c | 12 ++++++++---- | |
36 | mm/memory.c | 32 +++++++++++++++++++++----------- | |
37 | 2 files changed, 29 insertions(+), 15 deletions(-) | |
38 | ||
39 | --- linux-2.6.19.1.orig/drivers/char/mem.c | |
40 | +++ linux-2.6.19.1/drivers/char/mem.c | |
41 | @@ -646,7 +646,8 @@ static inline size_t read_zero_pagealign | |
42 | count = size; | |
43 | ||
44 | zap_page_range(vma, addr, count, NULL); | |
45 | - zeromap_page_range(vma, addr, count, PAGE_COPY); | |
46 | + if (zeromap_page_range(vma, addr, count, PAGE_COPY)) | |
47 | + break; | |
48 | ||
49 | size -= count; | |
50 | buf += count; | |
51 | @@ -713,11 +714,14 @@ out: | |
52 | ||
53 | static int mmap_zero(struct file * file, struct vm_area_struct * vma) | |
54 | { | |
55 | + int err; | |
56 | + | |
57 | if (vma->vm_flags & VM_SHARED) | |
58 | return shmem_zero_setup(vma); | |
59 | - if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) | |
60 | - return -EAGAIN; | |
61 | - return 0; | |
62 | + err = zeromap_page_range(vma, vma->vm_start, | |
63 | + vma->vm_end - vma->vm_start, vma->vm_page_prot); | |
64 | + BUG_ON(err == -EEXIST); | |
65 | + return err; | |
66 | } | |
67 | #else /* CONFIG_MMU */ | |
68 | static ssize_t read_zero(struct file * file, char * buf, | |
69 | --- linux-2.6.19.1.orig/mm/memory.c | |
70 | +++ linux-2.6.19.1/mm/memory.c | |
71 | @@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_s | |
72 | { | |
73 | pte_t *pte; | |
74 | spinlock_t *ptl; | |
75 | + int err = 0; | |
76 | ||
77 | pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); | |
78 | if (!pte) | |
79 | - return -ENOMEM; | |
80 | + return -EAGAIN; | |
81 | arch_enter_lazy_mmu_mode(); | |
82 | do { | |
83 | struct page *page = ZERO_PAGE(addr); | |
84 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); | |
85 | + | |
86 | + if (unlikely(!pte_none(*pte))) { | |
87 | + err = -EEXIST; | |
88 | + pte++; | |
89 | + break; | |
90 | + } | |
91 | page_cache_get(page); | |
92 | page_add_file_rmap(page); | |
93 | inc_mm_counter(mm, file_rss); | |
94 | - BUG_ON(!pte_none(*pte)); | |
95 | set_pte_at(mm, addr, pte, zero_pte); | |
96 | } while (pte++, addr += PAGE_SIZE, addr != end); | |
97 | arch_leave_lazy_mmu_mode(); | |
98 | pte_unmap_unlock(pte - 1, ptl); | |
99 | - return 0; | |
100 | + return err; | |
101 | } | |
102 | ||
103 | static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, | |
104 | @@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(stru | |
105 | { | |
106 | pmd_t *pmd; | |
107 | unsigned long next; | |
108 | + int err; | |
109 | ||
110 | pmd = pmd_alloc(mm, pud, addr); | |
111 | if (!pmd) | |
112 | - return -ENOMEM; | |
113 | + return -EAGAIN; | |
114 | do { | |
115 | next = pmd_addr_end(addr, end); | |
116 | - if (zeromap_pte_range(mm, pmd, addr, next, prot)) | |
117 | - return -ENOMEM; | |
118 | + err = zeromap_pte_range(mm, pmd, addr, next, prot); | |
119 | + if (err) | |
120 | + break; | |
121 | } while (pmd++, addr = next, addr != end); | |
122 | - return 0; | |
123 | + return err; | |
124 | } | |
125 | ||
126 | static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, | |
127 | @@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(stru | |
128 | { | |
129 | pud_t *pud; | |
130 | unsigned long next; | |
131 | + int err; | |
132 | ||
133 | pud = pud_alloc(mm, pgd, addr); | |
134 | if (!pud) | |
135 | - return -ENOMEM; | |
136 | + return -EAGAIN; | |
137 | do { | |
138 | next = pud_addr_end(addr, end); | |
139 | - if (zeromap_pmd_range(mm, pud, addr, next, prot)) | |
140 | - return -ENOMEM; | |
141 | + err = zeromap_pmd_range(mm, pud, addr, next, prot); | |
142 | + if (err) | |
143 | + break; | |
144 | } while (pud++, addr = next, addr != end); | |
145 | - return 0; | |
146 | + return err; | |
147 | } | |
148 | ||
149 | int zeromap_page_range(struct vm_area_struct *vma, |