1 From: Gerald Schaefer <geraldsc@de.ibm.com>
2 Subject: kernel: fix dynamic TASK_SIZE handling.
3 References: bnc#484767,LTC#52259
5 Symptom: System crash (memory overwrite) on access to /proc/<pid>/pagemap.
6 Problem: pagemap_read() is using TASK_SIZE_OF to determine the address
7 range for the generic page table walker. With dynamic page table
8 upgrades on s390, this does not reflect the true task size, but
9 the maximum task size, with the maximum level of page tables. If
10 a process has not yet mmapped enough memory, the page tables will
11 not be completely upgraded and the generic page table walker will
12 access (and write) beyond the page tables.
13 Solution: Change TASK_SIZE/TASK_SIZE_OF to reflect the current size of the
16 Acked-by: John Jolly <jjolly@suse.de>
18 arch/s390/include/asm/mman.h | 5 +++
19 arch/s390/include/asm/processor.h | 5 +--
20 arch/s390/mm/mmap.c | 48 +++++++++++++++++++++++++-------------
21 arch/s390/mm/pgtable.c | 2 +
22 4 files changed, 41 insertions(+), 19 deletions(-)
24 Index: linux-2.6.27/arch/s390/include/asm/mman.h
25 ===================================================================
26 --- linux-2.6.27.orig/arch/s390/include/asm/mman.h
27 +++ linux-2.6.27/arch/s390/include/asm/mman.h
29 #define MCL_CURRENT 1 /* lock all current mappings */
30 #define MCL_FUTURE 2 /* lock all future mappings */
32 +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
33 +int s390_mmap_check(unsigned long addr, unsigned long len);
34 +#define arch_mmap_check(addr,len,flags) s390_mmap_check(addr,len)
37 #endif /* __S390_MMAN_H__ */
38 Index: linux-2.6.27/arch/s390/include/asm/processor.h
39 ===================================================================
40 --- linux-2.6.27.orig/arch/s390/include/asm/processor.h
41 +++ linux-2.6.27/arch/s390/include/asm/processor.h
42 @@ -60,7 +60,7 @@ extern void print_cpu_info(struct cpuinf
43 extern int get_cpu_capability(unsigned int *);
46 - * User space process size: 2GB for 31 bit, 4TB for 64 bit.
47 + * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
51 @@ -69,8 +69,7 @@ extern int get_cpu_capability(unsigned i
55 -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk,TIF_31BIT) ? \
56 - (1UL << 31) : (1UL << 53))
57 +#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit)
58 #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
59 (1UL << 30) : (1UL << 41))
60 #define TASK_SIZE TASK_SIZE_OF(current)
61 Index: linux-2.6.27/arch/s390/mm/mmap.c
62 ===================================================================
63 --- linux-2.6.27.orig/arch/s390/mm/mmap.c
64 +++ linux-2.6.27/arch/s390/mm/mmap.c
66 * Leave an at least ~128 MB hole.
68 #define MIN_GAP (128*1024*1024)
69 -#define MAX_GAP (TASK_SIZE/6*5)
70 +#define MAX_GAP (STACK_TOP/6*5)
72 static inline unsigned long mmap_base(void)
74 @@ -46,7 +46,7 @@ static inline unsigned long mmap_base(vo
75 else if (gap > MAX_GAP)
78 - return TASK_SIZE - (gap & PAGE_MASK);
79 + return STACK_TOP - (gap & PAGE_MASK);
82 static inline int mmap_is_legacy(void)
83 @@ -89,42 +89,58 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout)
87 +int s390_mmap_check(unsigned long addr, unsigned long len)
89 + if (!test_thread_flag(TIF_31BIT) &&
90 + len >= TASK_SIZE && TASK_SIZE < (1UL << 53))
91 + return crst_table_upgrade(current->mm, 1UL << 53);
96 s390_get_unmapped_area(struct file *filp, unsigned long addr,
97 unsigned long len, unsigned long pgoff, unsigned long flags)
99 struct mm_struct *mm = current->mm;
100 + unsigned long area;
103 - addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
104 - if (addr & ~PAGE_MASK)
106 - if (unlikely(mm->context.asce_limit < addr + len)) {
107 - rc = crst_table_upgrade(mm, addr + len);
108 + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
109 + if (!(area & ~PAGE_MASK))
111 + if (area == -ENOMEM &&
112 + !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) {
113 + /* Upgrade the page table to 4 levels and retry. */
114 + rc = crst_table_upgrade(mm, 1UL << 53);
116 return (unsigned long) rc;
117 + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
124 -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
125 +s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
126 const unsigned long len, const unsigned long pgoff,
127 const unsigned long flags)
129 struct mm_struct *mm = current->mm;
130 - unsigned long addr = addr0;
131 + unsigned long area;
134 - addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
135 - if (addr & ~PAGE_MASK)
137 - if (unlikely(mm->context.asce_limit < addr + len)) {
138 - rc = crst_table_upgrade(mm, addr + len);
139 + area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
140 + if (!(area & ~PAGE_MASK))
142 + if (area == -ENOMEM &&
143 + !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) {
144 + /* Upgrade the page table to 4 levels and retry. */
145 + rc = crst_table_upgrade(mm, 1UL << 53);
147 return (unsigned long) rc;
148 + area = arch_get_unmapped_area_topdown(filp, addr, len,
155 * This function, called very early during the creation of a new
156 Index: linux-2.6.27/arch/s390/mm/pgtable.c
157 ===================================================================
158 --- linux-2.6.27.orig/arch/s390/mm/pgtable.c
159 +++ linux-2.6.27/arch/s390/mm/pgtable.c
160 @@ -117,6 +117,7 @@ repeat:
161 crst_table_init(table, entry);
162 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
163 mm->pgd = (pgd_t *) table;
164 + mm->task_size = mm->context.asce_limit;
167 spin_unlock(&mm->page_table_lock);
168 @@ -154,6 +155,7 @@ void crst_table_downgrade(struct mm_stru
171 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
172 + mm->task_size = mm->context.asce_limit;
173 crst_table_free(mm, (unsigned long *) pgd);
175 update_mm(mm, current);