From: Greg Kroah-Hartman Date: Tue, 7 Dec 2010 00:34:35 +0000 (-0800) Subject: .33 X-Git-Tag: v2.6.27.57~24 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6df3ce2c34f8acae1c82135ddaaaa3c047cafdfb;p=thirdparty%2Fkernel%2Fstable-queue.git .33 --- diff --git a/queue-2.6.33/btrfs-kfree-correct-pointer-during-mount-option-parsing.patch b/queue-2.6.33/btrfs-kfree-correct-pointer-during-mount-option-parsing.patch new file mode 100644 index 00000000000..e3d7d48e810 --- /dev/null +++ b/queue-2.6.33/btrfs-kfree-correct-pointer-during-mount-option-parsing.patch @@ -0,0 +1,48 @@ +From da495ecc0fb096b383754952a1c152147bc95b52 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 25 Feb 2010 20:38:35 +0000 +Subject: Btrfs: kfree correct pointer during mount option parsing + +From: Josef Bacik + +commit da495ecc0fb096b383754952a1c152147bc95b52 upstream. + +We kstrdup the options string, but then strsep screws with the pointer, +so when we kfree() it, we're not giving it the right pointer. + +Tested-by: Andy Lutomirski +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/super.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -128,7 +128,7 @@ int btrfs_parse_options(struct btrfs_roo + { + struct btrfs_fs_info *info = root->fs_info; + substring_t args[MAX_OPT_ARGS]; +- char *p, *num; ++ char *p, *num, *orig; + int intarg; + int ret = 0; + +@@ -143,6 +143,7 @@ int btrfs_parse_options(struct btrfs_roo + if (!options) + return -ENOMEM; + ++ orig = options; + + while ((p = strsep(&options, ",")) != NULL) { + int token; +@@ -280,7 +281,7 @@ int btrfs_parse_options(struct btrfs_roo + } + } + out: +- kfree(options); ++ kfree(orig); + return ret; + } + diff --git a/queue-2.6.33/compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch b/queue-2.6.33/compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch new file mode 100644 index 00000000000..d80c40f387b --- /dev/null +++ b/queue-2.6.33/compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch @@ -0,0 +1,172 @@ +From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001 +From: H. Peter Anvin +Date: Tue, 7 Sep 2010 16:16:18 -0700 +Subject: compat: Make compat_alloc_user_space() incorporate the access_ok() + +From: H. Peter Anvin + +commit c41d68a513c71e35a14f66d71782d27a79a81ea6 upstream. + +compat_alloc_user_space() expects the caller to independently call +access_ok() to verify the returned area. A missing call could +introduce problems on some architectures. + +This patch incorporates the access_ok() check into +compat_alloc_user_space() and also adds a sanity check on the length. +The existing compat_alloc_user_space() implementations are renamed +arch_compat_alloc_user_space() and are used as part of the +implementation of the new global function. + +This patch assumes NULL will cause __get_user()/__put_user() to either +fail or access userspace on all architectures. This should be +followed by checking the return value of compat_access_user_space() +for NULL in the callers, at which time the access_ok() in the callers +can also be removed. + +Reported-by: Ben Hawkes +Signed-off-by: H. Peter Anvin +Acked-by: Benjamin Herrenschmidt +Acked-by: Chris Metcalf +Acked-by: David S. Miller +Acked-by: Ingo Molnar +Acked-by: Thomas Gleixner +Acked-by: Tony Luck +Cc: Andrew Morton +Cc: Arnd Bergmann +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Heiko Carstens +Cc: Helge Deller +Cc: James Bottomley +Cc: Kyle McMartin +Cc: Martin Schwidefsky +Cc: Paul Mackerras +Cc: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/ia64/include/asm/compat.h | 2 +- + arch/mips/include/asm/compat.h | 2 +- + arch/parisc/include/asm/compat.h | 2 +- + arch/powerpc/include/asm/compat.h | 2 +- + arch/s390/include/asm/compat.h | 2 +- + arch/sparc/include/asm/compat.h | 2 +- + arch/x86/include/asm/compat.h | 2 +- + include/linux/compat.h | 2 ++ + kernel/compat.c | 21 +++++++++++++++++++++ + 9 files changed, 30 insertions(+), 7 deletions(-) + +--- a/arch/ia64/include/asm/compat.h ++++ b/arch/ia64/include/asm/compat.h +@@ -198,7 +198,7 @@ ptr_to_compat(void __user *uptr) + } + + static __inline__ void __user * +-compat_alloc_user_space (long len) ++arch_compat_alloc_user_space (long len) + { + struct pt_regs *regs = task_pt_regs(current); + return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len); +--- a/arch/mips/include/asm/compat.h ++++ b/arch/mips/include/asm/compat.h +@@ -144,7 +144,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = (struct pt_regs *) + ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; +--- a/arch/parisc/include/asm/compat.h ++++ b/arch/parisc/include/asm/compat.h +@@ -146,7 +146,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static __inline__ void __user *compat_alloc_user_space(long len) ++static __inline__ void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = ¤t->thread.regs; + return (void __user *)regs->gr[30]; +--- a/arch/powerpc/include/asm/compat.h ++++ b/arch/powerpc/include/asm/compat.h +@@ -133,7 +133,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = current->thread.regs; + unsigned long usp = regs->gpr[1]; +--- a/arch/s390/include/asm/compat.h ++++ b/arch/s390/include/asm/compat.h +@@ -180,7 +180,7 @@ static inline int is_compat_task(void) + + #endif + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + unsigned long stack; + +--- a/arch/sparc/include/asm/compat.h ++++ b/arch/sparc/include/asm/compat.h +@@ -166,7 +166,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = current_thread_info()->kregs; + unsigned long usp = regs->u_regs[UREG_I6]; +--- a/arch/x86/include/asm/compat.h ++++ b/arch/x86/include/asm/compat.h +@@ -204,7 +204,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->sp - len; +--- a/include/linux/compat.h ++++ b/include/linux/compat.h +@@ -353,5 +353,7 @@ asmlinkage long compat_sys_newfstatat(un + asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, + int flags, int mode); + ++extern void __user *compat_alloc_user_space(unsigned long len); ++ + #endif /* CONFIG_COMPAT */ + #endif /* _LINUX_COMPAT_H */ +--- a/kernel/compat.c ++++ b/kernel/compat.c +@@ -1136,3 +1136,24 @@ compat_sys_sysinfo(struct compat_sysinfo + + return 0; + } ++ ++/* ++ * Allocate user-space memory for the duration of a single system call, ++ * in order to marshall parameters inside a compat thunk. ++ */ ++void __user *compat_alloc_user_space(unsigned long len) ++{ ++ void __user *ptr; ++ ++ /* If len would occupy more than half of the entire compat space... */ ++ if (unlikely(len > (((compat_uptr_t)~0) >> 1))) ++ return NULL; ++ ++ ptr = arch_compat_alloc_user_space(len); ++ ++ if (unlikely(!access_ok(VERIFY_WRITE, ptr, len))) ++ return NULL; ++ ++ return ptr; ++} ++EXPORT_SYMBOL_GPL(compat_alloc_user_space); diff --git a/queue-2.6.33/ext4-consolidate-in_range-definitions.patch b/queue-2.6.33/ext4-consolidate-in_range-definitions.patch new file mode 100644 index 00000000000..b377e593d34 --- /dev/null +++ b/queue-2.6.33/ext4-consolidate-in_range-definitions.patch @@ -0,0 +1,79 @@ +From 731eb1a03a8445cde2cb23ecfb3580c6fa7bb690 Mon Sep 17 00:00:00 2001 +From: Akinobu Mita +Date: Wed, 3 Mar 2010 23:55:01 -0500 +Subject: ext4: consolidate in_range() definitions + +From: Akinobu Mita + +commit 731eb1a03a8445cde2cb23ecfb3580c6fa7bb690 upstream. + +There are duplicate macro definitions of in_range() in mballoc.h and +balloc.c. This consolidates these two definitions into ext4.h, and +changes extents.c to use in_range() as well. + +Signed-off-by: Akinobu Mita +Signed-off-by: "Theodore Ts'o" +Cc: Andreas Dilger +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/balloc.c | 3 --- + fs/ext4/ext4.h | 2 ++ + fs/ext4/extents.c | 4 ++-- + fs/ext4/mballoc.h | 2 -- + 4 files changed, 4 insertions(+), 7 deletions(-) + +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -189,9 +189,6 @@ unsigned ext4_init_block_bitmap(struct s + * when a file system is mounted (see ext4_fill_super). + */ + +- +-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +- + /** + * ext4_get_group_desc() -- load group descriptor from disk + * @sb: super block +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1773,6 +1773,8 @@ static inline void set_bitmap_uptodate(s + set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); + } + ++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) ++ + #endif /* __KERNEL__ */ + + #endif /* _EXT4_H */ +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1952,7 +1952,7 @@ ext4_ext_in_cache(struct inode *inode, e + + BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && + cex->ec_type != EXT4_EXT_CACHE_EXTENT); +- if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ if (in_range(block, cex->ec_block, cex->ec_len)) { + ex->ee_block = cpu_to_le32(cex->ec_block); + ext4_ext_store_pblock(ex, cex->ec_start); + ex->ee_len = cpu_to_le16(cex->ec_len); +@@ -3258,7 +3258,7 @@ int ext4_ext_get_blocks(handle_t *handle + */ + ee_len = ext4_ext_get_actual_len(ex); + /* if found extent covers block, simply return it */ +- if (iblock >= ee_block && iblock < ee_block + ee_len) { ++ if (in_range(iblock, ee_block, ee_len)) { + newblock = iblock - ee_block + ee_start; + /* number of remaining blocks in the extent */ + allocated = ee_len - (iblock - ee_block); +--- a/fs/ext4/mballoc.h ++++ b/fs/ext4/mballoc.h +@@ -220,8 +220,6 @@ struct ext4_buddy { + #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) + #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) + +-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +- + static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, + struct ext4_free_extent *fex) + { diff --git a/queue-2.6.33/ext4-prevent-creation-of-files-larger-than-rlimit_fsize-using-fallocate.patch b/queue-2.6.33/ext4-prevent-creation-of-files-larger-than-rlimit_fsize-using-fallocate.patch new file mode 100644 index 00000000000..2ea2ee2e5eb --- /dev/null +++ b/queue-2.6.33/ext4-prevent-creation-of-files-larger-than-rlimit_fsize-using-fallocate.patch @@ -0,0 +1,35 @@ +From 6d19c42b7cf81c39632b6d4dbc514e8449bcd346 Mon Sep 17 00:00:00 2001 +From: Nikanth Karthikesan +Date: Sun, 16 May 2010 14:00:00 -0400 +Subject: ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate + +From: Nikanth Karthikesan + +commit 6d19c42b7cf81c39632b6d4dbc514e8449bcd346 upstream. + +Currently using posix_fallocate one can bypass an RLIMIT_FSIZE limit +and create a file larger than the limit. Add a check for that. + +Signed-off-by: Nikanth Karthikesan +Signed-off-by: Amit Arora +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/extents.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -3545,6 +3545,11 @@ long ext4_fallocate(struct inode *inode, + */ + credits = ext4_chunk_trans_blocks(inode, max_blocks); + mutex_lock(&inode->i_mutex); ++ ret = inode_newsize_ok(inode, (len + offset)); ++ if (ret) { ++ mutex_unlock(&inode->i_mutex); ++ return ret; ++ } + retry: + while (ret >= 0 && ret < max_blocks) { + block = block + ret; diff --git a/queue-2.6.33/mm-fix-corruption-of-hibernation-caused-by-reusing-swap-during-image-saving.patch b/queue-2.6.33/mm-fix-corruption-of-hibernation-caused-by-reusing-swap-during-image-saving.patch new file mode 100644 index 00000000000..18b9ac98d82 --- /dev/null +++ b/queue-2.6.33/mm-fix-corruption-of-hibernation-caused-by-reusing-swap-during-image-saving.patch @@ -0,0 +1,65 @@ +From 966cca029f739716fbcc8068b8c6dfe381f86fc3 Mon Sep 17 00:00:00 2001 +From: KAMEZAWA Hiroyuki +Date: Mon, 9 Aug 2010 17:20:09 -0700 +Subject: mm: fix corruption of hibernation caused by reusing swap during image saving + +From: KAMEZAWA Hiroyuki + +commit 966cca029f739716fbcc8068b8c6dfe381f86fc3 upstream. + +Since 2.6.31, swap_map[]'s refcounting was changed to show that a used +swap entry is just for swap-cache, can be reused. Then, while scanning +free entry in swap_map[], a swap entry may be able to be reclaimed and +reused. It was caused by commit c9e444103b5e7a5 ("mm: reuse unused swap +entry if necessary"). + +But this caused deta corruption at resume. The scenario is + +- Assume a clean-swap cache, but mapped. + +- at hibernation_snapshot[], clean-swap-cache is saved as + clean-swap-cache and swap_map[] is marked as SWAP_HAS_CACHE. + +- then, save_image() is called. And reuse SWAP_HAS_CACHE entry to save + image, and break the contents. + +After resume: + +- the memory reclaim runs and finds clean-not-referenced-swap-cache and + discards it because it's marked as clean. But here, the contents on + disk and swap-cache is inconsistent. + +Hance memory is corrupted. + +This patch avoids the bug by not reclaiming swap-entry during hibernation. +This is a quick fix for backporting. + +Signed-off-by: KAMEZAWA Hiroyuki +Cc: Rafael J. Wysocki +Reported-by: Ondreg Zary +Tested-by: Ondreg Zary +Tested-by: Andrea Gelmini +Acked-by: Hugh Dickins +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/swapfile.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -315,8 +315,10 @@ checks: + if (offset > si->highest_bit) + scan_base = offset = si->lowest_bit; + +- /* reuse swap entry of cache-only swap if not busy. */ +- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { ++ /* reuse swap entry of cache-only swap if not hibernation. */ ++ if (vm_swap_full() ++ && usage == SWAP_HAS_CACHE ++ && si->swap_map[offset] == SWAP_HAS_CACHE) { + int swap_was_freed; + spin_unlock(&swap_lock); + swap_was_freed = __try_to_reclaim_swap(si, offset); diff --git a/queue-2.6.33/mm-fix-missing-page-table-unmap-for-stack-guard-page-failure-case.patch b/queue-2.6.33/mm-fix-missing-page-table-unmap-for-stack-guard-page-failure-case.patch new file mode 100644 index 00000000000..f2f9674273a --- /dev/null +++ b/queue-2.6.33/mm-fix-missing-page-table-unmap-for-stack-guard-page-failure-case.patch @@ -0,0 +1,34 @@ +From 5528f9132cf65d4d892bcbc5684c61e7822b21e9 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Fri, 13 Aug 2010 09:24:04 -0700 +Subject: mm: fix missing page table unmap for stack guard page failure case + +From: Linus Torvalds + +commit 5528f9132cf65d4d892bcbc5684c61e7822b21e9 upstream. + +.. which didn't show up in my tests because it's a no-op on x86-64 and +most other architectures. But we enter the function with the last-level +page table mapped, and should unmap it at exit. + +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2677,8 +2677,10 @@ static int do_anonymous_page(struct mm_s + spinlock_t *ptl; + pte_t entry; + +- if (check_stack_guard_page(vma, address) < 0) ++ if (check_stack_guard_page(vma, address) < 0) { ++ pte_unmap(page_table); + return VM_FAULT_SIGBUS; ++ } + + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), diff --git a/queue-2.6.33/mm-fix-page-table-unmap-for-stack-guard-page-properly.patch b/queue-2.6.33/mm-fix-page-table-unmap-for-stack-guard-page-properly.patch new file mode 100644 index 00000000000..9da3225df93 --- /dev/null +++ b/queue-2.6.33/mm-fix-page-table-unmap-for-stack-guard-page-properly.patch @@ -0,0 +1,76 @@ +From 11ac552477e32835cb6970bf0a70c210807f5673 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 14 Aug 2010 11:44:56 -0700 +Subject: mm: fix page table unmap for stack guard page properly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Linus Torvalds + +commit 11ac552477e32835cb6970bf0a70c210807f5673 upstream. + +We do in fact need to unmap the page table _before_ doing the whole +stack guard page logic, because if it is needed (mainly 32-bit x86 with +PAE and CONFIG_HIGHPTE, but other architectures may use it too) then it +will do a kmap_atomic/kunmap_atomic. + +And those kmaps will create an atomic region that we cannot do +allocations in. However, the whole stack expand code will need to do +anon_vma_prepare() and vma_lock_anon_vma() and they cannot do that in an +atomic region. + +Now, a better model might actually be to do the anon_vma_prepare() when +_creating_ a VM_GROWSDOWN segment, and not have to worry about any of +this at page fault time. But in the meantime, this is the +straightforward fix for the issue. + +See https://bugzilla.kernel.org/show_bug.cgi?id=16588 for details. + +Reported-by: Wylda +Reported-by: Sedat Dilek +Reported-by: Mike Pagano +Reported-by: François Valenduc +Tested-by: Ed Tomlinson +Cc: Pekka Enberg +Cc: Greg KH +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2677,24 +2677,23 @@ static int do_anonymous_page(struct mm_s + spinlock_t *ptl; + pte_t entry; + +- if (check_stack_guard_page(vma, address) < 0) { +- pte_unmap(page_table); ++ pte_unmap(page_table); ++ ++ /* Check if we need to add a guard page to the stack */ ++ if (check_stack_guard_page(vma, address) < 0) + return VM_FAULT_SIGBUS; +- } + ++ /* Use the zero-page for reads */ + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), + vma->vm_page_prot)); +- ptl = pte_lockptr(mm, pmd); +- spin_lock(ptl); ++ page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + if (!pte_none(*page_table)) + goto unlock; + goto setpte; + } + + /* Allocate our own private page. */ +- pte_unmap(page_table); +- + if (unlikely(anon_vma_prepare(vma))) + goto oom; + page = alloc_zeroed_user_highpage_movable(vma, address); diff --git a/queue-2.6.33/mm-fix-up-some-user-visible-effects-of-the-stack-guard-page.patch b/queue-2.6.33/mm-fix-up-some-user-visible-effects-of-the-stack-guard-page.patch new file mode 100644 index 00000000000..88631186069 --- /dev/null +++ b/queue-2.6.33/mm-fix-up-some-user-visible-effects-of-the-stack-guard-page.patch @@ -0,0 +1,89 @@ +From d7824370e26325c881b665350ce64fb0a4fde24a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sun, 15 Aug 2010 11:35:52 -0700 +Subject: mm: fix up some user-visible effects of the stack guard page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Linus Torvalds + +commit d7824370e26325c881b665350ce64fb0a4fde24a upstream. + +This commit makes the stack guard page somewhat less visible to user +space. It does this by: + + - not showing the guard page in /proc//maps + + It looks like lvm-tools will actually read /proc/self/maps to figure + out where all its mappings are, and effectively do a specialized + "mlockall()" in user space. By not showing the guard page as part of + the mapping (by just adding PAGE_SIZE to the start for grows-up + pages), lvm-tools ends up not being aware of it. + + - by also teaching the _real_ mlock() functionality not to try to lock + the guard page. + + That would just expand the mapping down to create a new guard page, + so there really is no point in trying to lock it in place. + +It would perhaps be nice to show the guard page specially in +/proc//maps (or at least mark grow-down segments some way), but +let's not open ourselves up to more breakage by user space from programs +that depends on the exact deails of the 'maps' file. + +Special thanks to Henrique de Moraes Holschuh for diving into lvm-tools +source code to see what was going on with the whole new warning. + +Reported-and-tested-by: François Valenduc +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/task_mmu.c | 8 +++++++- + mm/mlock.c | 8 ++++++++ + 2 files changed, 15 insertions(+), 1 deletion(-) + +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -206,6 +206,7 @@ static void show_map_vma(struct seq_file + int flags = vma->vm_flags; + unsigned long ino = 0; + unsigned long long pgoff = 0; ++ unsigned long start; + dev_t dev = 0; + int len; + +@@ -216,8 +217,13 @@ static void show_map_vma(struct seq_file + pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; + } + ++ /* We don't show the stack guard page in /proc/maps */ ++ start = vma->vm_start; ++ if (vma->vm_flags & VM_GROWSDOWN) ++ start += PAGE_SIZE; ++ + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", +- vma->vm_start, ++ start, + vma->vm_end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -167,6 +167,14 @@ static long __mlock_vma_pages_range(stru + if (vma->vm_flags & VM_WRITE) + gup_flags |= FOLL_WRITE; + ++ /* We don't try to access the guard page of a stack vma */ ++ if (vma->vm_flags & VM_GROWSDOWN) { ++ if (start == vma->vm_start) { ++ start += PAGE_SIZE; ++ nr_pages--; ++ } ++ } ++ + while (nr_pages > 0) { + int i; + diff --git a/queue-2.6.33/mm-keep-a-guard-page-below-a-grow-down-stack-segment.patch b/queue-2.6.33/mm-keep-a-guard-page-below-a-grow-down-stack-segment.patch new file mode 100644 index 00000000000..fbd33ce04ba --- /dev/null +++ b/queue-2.6.33/mm-keep-a-guard-page-below-a-grow-down-stack-segment.patch @@ -0,0 +1,72 @@ +From 320b2b8de12698082609ebbc1a17165727f4c893 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Thu, 12 Aug 2010 17:54:33 -0700 +Subject: mm: keep a guard page below a grow-down stack segment + +From: Linus Torvalds + +commit 320b2b8de12698082609ebbc1a17165727f4c893 upstream. + +This is a rather minimally invasive patch to solve the problem of the +user stack growing into a memory mapped area below it. Whenever we fill +the first page of the stack segment, expand the segment down by one +page. + +Now, admittedly some odd application might _want_ the stack to grow down +into the preceding memory mapping, and so we may at some point need to +make this a process tunable (some people might also want to have more +than a single page of guarding), but let's try the minimal approach +first. + +Tested with trivial application that maps a single page just below the +stack, and then starts recursing. Without this, we will get a SIGSEGV +_after_ the stack has smashed the mapping. With this patch, we'll get a +nice SIGBUS just as the stack touches the page just above the mapping. + +Requested-by: Keith Packard +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2645,6 +2645,26 @@ out_release: + } + + /* ++ * This is like a special single-page "expand_downwards()", ++ * except we must first make sure that 'address-PAGE_SIZE' ++ * doesn't hit another vma. ++ * ++ * The "find_vma()" will do the right thing even if we wrap ++ */ ++static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) ++{ ++ address &= PAGE_MASK; ++ if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { ++ address -= PAGE_SIZE; ++ if (find_vma(vma->vm_mm, address) != vma) ++ return -ENOMEM; ++ ++ expand_stack(vma, address); ++ } ++ return 0; ++} ++ ++/* + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. +@@ -2657,6 +2677,9 @@ static int do_anonymous_page(struct mm_s + spinlock_t *ptl; + pte_t entry; + ++ if (check_stack_guard_page(vma, address) < 0) ++ return VM_FAULT_SIGBUS; ++ + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), + vma->vm_page_prot)); diff --git a/queue-2.6.33/prioritize-synchronous-signals-over-normal-signals.patch b/queue-2.6.33/prioritize-synchronous-signals-over-normal-signals.patch new file mode 100644 index 00000000000..2cb93283ccd --- /dev/null +++ b/queue-2.6.33/prioritize-synchronous-signals-over-normal-signals.patch @@ -0,0 +1,117 @@ +From a27341cd5fcb7cf2d2d4726e9f324009f7162c00 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 2 Mar 2010 08:36:46 -0800 +Subject: Prioritize synchronous signals over 'normal' signals + +From: Linus Torvalds + +commit a27341cd5fcb7cf2d2d4726e9f324009f7162c00 upstream. + +This makes sure that we pick the synchronous signals caused by a +processor fault over any pending regular asynchronous signals sent to +use by [t]kill(). + +This is not strictly required semantics, but it makes it _much_ easier +for programs like Wine that expect to find the fault information in the +signal stack. + +Without this, if a non-synchronous signal gets picked first, the delayed +asynchronous signal will have its signal context pointing to the new +signal invocation, rather than the instruction that caused the SIGSEGV +or SIGBUS in the first place. + +This is not all that pretty, and we're discussing making the synchronous +signals more explicit rather than have these kinds of implicit +preferences of SIGSEGV and friends. See for example + + http://bugzilla.kernel.org/show_bug.cgi?id=15395 + +for some of the discussion. But in the meantime this is a simple and +fairly straightforward work-around, and the whole + + if (x & Y) + x &= Y; + +thing can be compiled into (and gcc does do it) just three instructions: + + movq %rdx, %rax + andl $Y, %eax + cmovne %rax, %rdx + +so it is at least a simple solution to a subtle issue. + +Reported-and-tested-by: Pavel Vilim +Acked-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/signal.c | 43 ++++++++++++++++++++++++++++++------------- + 1 file changed, 30 insertions(+), 13 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -159,6 +159,10 @@ void recalc_sigpending(void) + + /* Given the mask, find the first available signal that should be serviced. */ + ++#define SYNCHRONOUS_MASK \ ++ (sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \ ++ sigmask(SIGTRAP) | sigmask(SIGFPE)) ++ + int next_signal(struct sigpending *pending, sigset_t *mask) + { + unsigned long i, *s, *m, x; +@@ -166,26 +170,39 @@ int next_signal(struct sigpending *pendi + + s = pending->signal.sig; + m = mask->sig; ++ ++ /* ++ * Handle the first word specially: it contains the ++ * synchronous signals that need to be dequeued first. ++ */ ++ x = *s &~ *m; ++ if (x) { ++ if (x & SYNCHRONOUS_MASK) ++ x &= SYNCHRONOUS_MASK; ++ sig = ffz(~x) + 1; ++ return sig; ++ } ++ + switch (_NSIG_WORDS) { + default: +- for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m) +- if ((x = *s &~ *m) != 0) { +- sig = ffz(~x) + i*_NSIG_BPW + 1; +- break; +- } ++ for (i = 1; i < _NSIG_WORDS; ++i) { ++ x = *++s &~ *++m; ++ if (!x) ++ continue; ++ sig = ffz(~x) + i*_NSIG_BPW + 1; ++ break; ++ } + break; + +- case 2: if ((x = s[0] &~ m[0]) != 0) +- sig = 1; +- else if ((x = s[1] &~ m[1]) != 0) +- sig = _NSIG_BPW + 1; +- else ++ case 2: ++ x = s[1] &~ m[1]; ++ if (!x) + break; +- sig += ffz(~x); ++ sig = ffz(~x) + _NSIG_BPW + 1; + break; + +- case 1: if ((x = *s &~ *m) != 0) +- sig = ffz(~x) + 1; ++ case 1: ++ /* Nothing to do */ + break; + } + diff --git a/queue-2.6.33/series b/queue-2.6.33/series index 1f018d1389d..820922fa257 100644 --- a/queue-2.6.33/series +++ b/queue-2.6.33/series @@ -179,3 +179,15 @@ staging-samsung-laptop-fix-up-some-sysfs-attribute-permissions.patch staging-samsung-laptop-fix-up-my-fixup-for-some-sysfs-attribute-permissions.patch staging-frontier-fix-up-some-sysfs-attribute-permissions.patch staging-rtl8187se-change-panic-to-warn-when-rf-switch-turned-off.patch +mm-keep-a-guard-page-below-a-grow-down-stack-segment.patch +mm-fix-missing-page-table-unmap-for-stack-guard-page-failure-case.patch +mm-fix-page-table-unmap-for-stack-guard-page-properly.patch +mm-fix-up-some-user-visible-effects-of-the-stack-guard-page.patch +ext4-consolidate-in_range-definitions.patch +x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch +x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch +compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch +ext4-prevent-creation-of-files-larger-than-rlimit_fsize-using-fallocate.patch +mm-fix-corruption-of-hibernation-caused-by-reusing-swap-during-image-saving.patch +btrfs-kfree-correct-pointer-during-mount-option-parsing.patch +prioritize-synchronous-signals-over-normal-signals.patch diff --git a/queue-2.6.33/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch b/queue-2.6.33/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch new file mode 100644 index 00000000000..f00ba7e9e02 --- /dev/null +++ b/queue-2.6.33/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch @@ -0,0 +1,50 @@ +From eefdca043e8391dcd719711716492063030b55ac Mon Sep 17 00:00:00 2001 +From: Roland McGrath +Date: Tue, 14 Sep 2010 12:22:58 -0700 +Subject: x86-64, compat: Retruncate rax after ia32 syscall entry tracing + +From: Roland McGrath + +commit eefdca043e8391dcd719711716492063030b55ac upstream. + +In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a +32-bit tracee in system call entry. A %rax value set via ptrace at the +entry tracing stop gets used whole as a 32-bit syscall number, while we +only check the low 32 bits for validity. + +Fix it by truncating %rax back to 32 bits after syscall_trace_enter, +in addition to testing the full 64 bits as has already been added. + +Reported-by: Ben Hawkes +Signed-off-by: Roland McGrath +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/ia32/ia32entry.S | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/ia32/ia32entry.S ++++ b/arch/x86/ia32/ia32entry.S +@@ -50,7 +50,12 @@ + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned +- * the value it wants us to use in the table lookup. ++ * the %rax value we should see. Instead, we just truncate that ++ * value to 32 bits again as we did on entry from user mode. ++ * If it's a new value set by user_regset during entry tracing, ++ * this matches the normal truncation of the user-mode value. ++ * If it's -1 to make us punt the syscall, then (u32)-1 is still ++ * an appropriately invalid value. + */ + .macro LOAD_ARGS32 offset, _r9=0 + .if \_r9 +@@ -60,6 +65,7 @@ + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi + movl \offset+64(%rsp),%edi ++ movl %eax,%eax /* zero extension */ + .endm + + .macro CFI_STARTPROC32 simple diff --git a/queue-2.6.33/x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch b/queue-2.6.33/x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch new file mode 100644 index 00000000000..8bf27c2078c --- /dev/null +++ b/queue-2.6.33/x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch @@ -0,0 +1,97 @@ +From 36d001c70d8a0144ac1d038f6876c484849a74de Mon Sep 17 00:00:00 2001 +From: H. Peter Anvin +Date: Tue, 14 Sep 2010 12:42:41 -0700 +Subject: x86-64, compat: Test %rax for the syscall number, not %eax + +From: H. Peter Anvin + +commit 36d001c70d8a0144ac1d038f6876c484849a74de upstream. + +On 64 bits, we always, by necessity, jump through the system call +table via %rax. For 32-bit system calls, in theory the system call +number is stored in %eax, and the code was testing %eax for a valid +system call number. At one point we loaded the stored value back from +the stack to enforce zero-extension, but that was removed in checkin +d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process +will not be able to introduce a non-zero-extended number, but it can +happen via ptrace. + +Instead of re-introducing the zero-extension, test what we are +actually going to use, i.e. %rax. This only adds a handful of REX +prefixes to the code. + +Reported-by: Ben Hawkes +Signed-off-by: H. Peter Anvin +Cc: Roland McGrath +Cc: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/ia32/ia32entry.S | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/arch/x86/ia32/ia32entry.S ++++ b/arch/x86/ia32/ia32entry.S +@@ -153,7 +153,7 @@ ENTRY(ia32_sysenter_target) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + CFI_REMEMBER_STATE + jnz sysenter_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + sysenter_do_call: + IA32_ARG_FIXUP +@@ -195,7 +195,7 @@ sysexit_from_sys_call: + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ +@@ -248,7 +248,7 @@ sysenter_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp sysenter_do_call + CFI_ENDPROC +@@ -314,7 +314,7 @@ ENTRY(ia32_cstar_target) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + CFI_REMEMBER_STATE + jnz cstar_tracesys +- cmpl $IA32_NR_syscalls-1,%eax ++ cmpq $IA32_NR_syscalls-1,%rax + ja ia32_badsys + cstar_do_call: + IA32_ARG_FIXUP 1 +@@ -367,7 +367,7 @@ cstar_tracesys: + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call + END(ia32_cstar_target) +@@ -425,7 +425,7 @@ ENTRY(ia32_syscall) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz ia32_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + ia32_do_call: + IA32_ARG_FIXUP +@@ -444,7 +444,7 @@ ia32_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + END(ia32_syscall)