some more patches in 2.6.18 queue (mid-review cycle) pointed out by Hugh

author Chris Wright <chrisw@sous-sol.org>

Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)

committer Chris Wright <chrisw@sous-sol.org>

Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)
author Chris Wright <chrisw@sous-sol.org>
Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)
committer Chris Wright <chrisw@sous-sol.org>
Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)
diff --git a/review-2.6.18/fix-for-shmem_truncate_range-bug_on.patch b/review-2.6.18/fix-for-shmem_truncate_range-bug_on.patch

new file mode 100644 (file)

index 0000000..0bb78f9
--- /dev/null
+++ b/review-2.6.18/fix-for-shmem_truncate_range-bug_on.patch
@@ -0,0 +1,42 @@
+From stable-bounces@linux.kernel.org  Fri Dec 22 01:13:06 2006
+Message-Id: <200612220906.kBM96PM4018647@shell0.pdx.osdl.net>
+To: torvalds@osdl.org
+From: akpm@osdl.org
+Date: Fri, 22 Dec 2006 01:06:23 -0800
+Cc: akpm@osdl.org, hugh@veritas.com, pbadari@us.ibm.com, stable@kernel.org
+Subject: Fix for shmem_truncate_range() BUG_ON()
+
+From: Badari Pulavarty <pbadari@us.ibm.com>
+
+Ran into BUG() while doing madvise(REMOVE) testing.  If we are punching a
+hole into shared memory segment using madvise(REMOVE) and the entire hole
+is below the indirect blocks, we hit following assert.
+
+               BUG_ON(limit <= SHMEM_NR_DIRECT);
+
+Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
+Cc: Hugh Dickins <hugh@veritas.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ mm/shmem.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- linux-2.6.18.7.orig/mm/shmem.c
++++ linux-2.6.18.7/mm/shmem.c
+@@ -510,7 +510,12 @@ static void shmem_truncate_range(struct 
+                       size = SHMEM_NR_DIRECT;
+               nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
+       }
+-      if (!topdir)
++
++      /*
++       * If there are no indirect blocks or we are punching a hole
++       * below indirect blocks, nothing to be done.
++       */
++      if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT)))
+               goto done2;
+ 
+       BUG_ON(limit <= SHMEM_NR_DIRECT);
diff --git a/review-2.6.18/fix-incorrect-user-space-access-locking-in-mincore.patch b/review-2.6.18/fix-incorrect-user-space-access-locking-in-mincore.patch

new file mode 100644 (file)

index 0000000..885ae39
--- /dev/null
+++ b/review-2.6.18/fix-incorrect-user-space-access-locking-in-mincore.patch
@@ -0,0 +1,246 @@
+From 2f77d107050abc14bc393b34bdb7b91cf670c250 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@woody.osdl.org>
+Date: Sat, 16 Dec 2006 09:44:32 -0800
+Subject: Fix incorrect user space access locking in mincore() (CVE-2006-4814)
+
+Doug Chapman noticed that mincore() will doa "copy_to_user()" of the
+result while holding the mmap semaphore for reading, which is a big
+no-no.  While a recursive read-lock on a semaphore in the case of a page
+fault happens to work, we don't actually allow them due to deadlock
+schenarios with writers due to fairness issues.
+
+Doug and Marcel sent in a patch to fix it, but I decided to just rewrite
+the mess instead - not just fixing the locking problem, but making the
+code smaller and (imho) much easier to understand.
+
+Cc: Doug Chapman <dchapman@redhat.com>
+Cc: Marcel Holtmann <holtmann@redhat.com>
+Cc: Hugh Dickins <hugh@veritas.com>
+Cc: Andrew Morton <akpm@osdl.org>
+[chrisw: fold in subsequent fix: 4fb23e439ce0]
+Acked-by: Hugh Dickins <hugh@veritas.com>
+[chrisw: fold in subsequent fix: 825020c3866e]
+Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ mm/mincore.c |  181 +++++++++++++++++++++++++----------------------------------
+ 1 file changed, 77 insertions(+), 104 deletions(-)
+
+--- linux-2.6.18.7.orig/mm/mincore.c
++++ linux-2.6.18.7/mm/mincore.c
+@@ -1,7 +1,7 @@
+ /*
+  *    linux/mm/mincore.c
+  *
+- * Copyright (C) 1994-1999  Linus Torvalds
++ * Copyright (C) 1994-2006  Linus Torvalds
+  */
+ 
+ /*
+@@ -38,46 +38,51 @@ static unsigned char mincore_page(struct
+       return present;
+ }
+ 
+-static long mincore_vma(struct vm_area_struct * vma,
+-      unsigned long start, unsigned long end, unsigned char __user * vec)
++/*
++ * Do a chunk of "sys_mincore()". We've already checked
++ * all the arguments, we hold the mmap semaphore: we should
++ * just return the amount of info we're asked for.
++ */
++static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
+ {
+-      long error, i, remaining;
+-      unsigned char * tmp;
+-
+-      error = -ENOMEM;
+-      if (!vma->vm_file)
+-              return error;
+-
+-      start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+-      if (end > vma->vm_end)
+-              end = vma->vm_end;
+-      end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+-
+-      error = -EAGAIN;
+-      tmp = (unsigned char *) __get_free_page(GFP_KERNEL);
+-      if (!tmp)
+-              return error;
+-
+-      /* (end - start) is # of pages, and also # of bytes in "vec */
+-      remaining = (end - start),
++      unsigned long i, nr, pgoff;
++      struct vm_area_struct *vma = find_vma(current->mm, addr);
+ 
+-      error = 0;
+-      for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) {
+-              int j = 0;
+-              long thispiece = (remaining < PAGE_SIZE) ?
+-                                              remaining : PAGE_SIZE;
++      /*
++       * find_vma() didn't find anything above us, or we're
++       * in an unmapped hole in the address space: ENOMEM.
++       */
++      if (!vma || addr < vma->vm_start)
++              return -ENOMEM;
+ 
+-              while (j < thispiece)
+-                      tmp[j++] = mincore_page(vma, start++);
++      /*
++       * Ok, got it. But check whether it's a segment we support
++       * mincore() on. Right now, we don't do any anonymous mappings.
++       *
++       * FIXME: This is just stupid. And returning ENOMEM is 
++       * stupid too. We should just look at the page tables. But
++       * this is what we've traditionally done, so we'll just
++       * continue doing it.
++       */
++      if (!vma->vm_file)
++              return -ENOMEM;
+ 
+-              if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) {
+-                      error = -EFAULT;
+-                      break;
+-              }
+-      }
++      /*
++       * Calculate how many pages there are left in the vma, and
++       * what the pgoff is for our address.
++       */
++      nr = (vma->vm_end - addr) >> PAGE_SHIFT;
++      if (nr > pages)
++              nr = pages;
++
++      pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
++      pgoff += vma->vm_pgoff;
++
++      /* And then we just fill the sucker in.. */
++      for (i = 0 ; i < nr; i++, pgoff++)
++              vec[i] = mincore_page(vma, pgoff);
+ 
+-      free_page((unsigned long) tmp);
+-      return error;
++      return nr;
+ }
+ 
+ /*
+@@ -107,82 +112,50 @@ static long mincore_vma(struct vm_area_s
+ asmlinkage long sys_mincore(unsigned long start, size_t len,
+       unsigned char __user * vec)
+ {
+-      int index = 0;
+-      unsigned long end, limit;
+-      struct vm_area_struct * vma;
+-      size_t max;
+-      int unmapped_error = 0;
+-      long error;
++      long retval;
++      unsigned long pages;
++      unsigned char *tmp;
+ 
+-      /* check the arguments */
++      /* Check the start address: needs to be page-aligned.. */
+       if (start & ~PAGE_CACHE_MASK)
+-              goto einval;
+-
+-      limit = TASK_SIZE;
+-      if (start >= limit)
+-              goto enomem;
++              return -EINVAL;
+ 
+-      if (!len)
+-              return 0;
++      /* ..and we need to be passed a valid user-space range */
++      if (!access_ok(VERIFY_READ, (void __user *) start, len))
++              return -ENOMEM;
+ 
+-      max = limit - start;
+-      len = PAGE_CACHE_ALIGN(len);
+-      if (len > max || !len)
+-              goto enomem;
++      /* This also avoids any overflows on PAGE_CACHE_ALIGN */
++      pages = len >> PAGE_SHIFT;
++      pages += (len & ~PAGE_MASK) != 0;
+ 
+-      end = start + len;
++      if (!access_ok(VERIFY_WRITE, vec, pages))
++              return -EFAULT;
+ 
+-      /* check the output buffer whilst holding the lock */
+-      error = -EFAULT;
+-      down_read(&current->mm->mmap_sem);
+-
+-      if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT))
+-              goto out;
+-
+-      /*
+-       * If the interval [start,end) covers some unmapped address
+-       * ranges, just ignore them, but return -ENOMEM at the end.
+-       */
+-      error = 0;
++      tmp = (void *) __get_free_page(GFP_USER);
++      if (!tmp)
++              return -EAGAIN;
+ 
+-      vma = find_vma(current->mm, start);
+-      while (vma) {
+-              /* Here start < vma->vm_end. */
+-              if (start < vma->vm_start) {
+-                      unmapped_error = -ENOMEM;
+-                      start = vma->vm_start;
+-              }
++      retval = 0;
++      while (pages) {
++              /*
++               * Do at most PAGE_SIZE entries per iteration, due to
++               * the temporary buffer size.
++               */
++              down_read(&current->mm->mmap_sem);
++              retval = do_mincore(start, tmp, min(pages, PAGE_SIZE));
++              up_read(&current->mm->mmap_sem);
+ 
+-              /* Here vma->vm_start <= start < vma->vm_end. */
+-              if (end <= vma->vm_end) {
+-                      if (start < end) {
+-                              error = mincore_vma(vma, start, end,
+-                                                      &vec[index]);
+-                              if (error)
+-                                      goto out;
+-                      }
+-                      error = unmapped_error;
+-                      goto out;
++              if (retval <= 0)
++                      break;
++              if (copy_to_user(vec, tmp, retval)) {
++                      retval = -EFAULT;
++                      break;
+               }
+-
+-              /* Here vma->vm_start <= start < vma->vm_end < end. */
+-              error = mincore_vma(vma, start, vma->vm_end, &vec[index]);
+-              if (error)
+-                      goto out;
+-              index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
+-              start = vma->vm_end;
+-              vma = vma->vm_next;
++              pages -= retval;
++              vec += retval;
++              start += retval << PAGE_SHIFT;
++              retval = 0;
+       }
+-
+-      /* we found a hole in the area queried if we arrive here */
+-      error = -ENOMEM;
+-
+-out:
+-      up_read(&current->mm->mmap_sem);
+-      return error;
+-
+-einval:
+-      return -EINVAL;
+-enomem:
+-      return -ENOMEM;
++      free_page((unsigned long) tmp);
++      return retval;
+ }
diff --git a/review-2.6.18/fix-msync-error-on-unmapped-area.patch b/review-2.6.18/fix-msync-error-on-unmapped-area.patch

new file mode 100644 (file)

index 0000000..f5c71bb
--- /dev/null
+++ b/review-2.6.18/fix-msync-error-on-unmapped-area.patch
@@ -0,0 +1,133 @@
+From hugh_dickins@symantec.com  Thu Jan  4 12:29:44 2007
+Date: Thu, 4 Jan 2007 20:22:14 +0000 (GMT)
+From: Hugh Dickins <hugh@veritas.com>
+To: Chris Wright <chrisw@sous-sol.org>
+cc: Martin Michlmayr <tbm@cyrius.com>, Jeff Licquia <jeff@licquia.org>, stable@kernel.org
+Subject: fix msync error on unmapped area
+Message-ID: <Pine.LNX.4.64.0701042012470.32026@blonde.wat.veritas.com>
+
+Fix the 2.6.18 sys_msync to report -ENOMEM correctly when an unmapped area
+falls within its range, and not to overshoot: to satisfy LSB 3.1 tests and
+to fix Debian Bug#394392.  Took the 2.6.19 sys_msync as starting point
+(including its cleanup of repeated "current->mm"s), reintroducing the
+msync_interval and balance_dirty_pages_ratelimited_nr needed in 2.6.18.
+
+The misbehaviour fixed here may not seem very serious; but it was enough
+to mislead Debian into backporting 2.6.19's dirty page tracking patches,
+with attendant mayhem when those resulted in unsuspected file corruption.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ mm/msync.c |   66 +++++++++++++++++++++++++++----------------------------------
+ 1 file changed, 30 insertions(+), 36 deletions(-)
+
+--- linux-2.6.18.7.orig/mm/msync.c
++++ linux-2.6.18.7/mm/msync.c
+@@ -146,10 +146,10 @@ static int msync_interval(struct vm_area
+ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
+ {
+       unsigned long end;
++      struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       int unmapped_error = 0;
+       int error = -EINVAL;
+-      int done = 0;
+ 
+       if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
+               goto out;
+@@ -169,64 +169,58 @@ asmlinkage long sys_msync(unsigned long 
+        * If the interval [start,end) covers some unmapped address ranges,
+        * just ignore them, but return -ENOMEM at the end.
+        */
+-      down_read(&current->mm->mmap_sem);
+-      vma = find_vma(current->mm, start);
+-      if (!vma) {
+-              error = -ENOMEM;
+-              goto out_unlock;
+-      }
+-      do {
++      down_read(&mm->mmap_sem);
++      vma = find_vma(mm, start);
++      for (;;) {
+               unsigned long nr_pages_dirtied = 0;
+               struct file *file;
+ 
++              /* Still start < end. */
++              error = -ENOMEM;
++              if (!vma)
++                      goto out_unlock;
+               /* Here start < vma->vm_end. */
+               if (start < vma->vm_start) {
+-                      unmapped_error = -ENOMEM;
+                       start = vma->vm_start;
+-              }
+-              /* Here vma->vm_start <= start < vma->vm_end. */
+-              if (end <= vma->vm_end) {
+-                      if (start < end) {
+-                              error = msync_interval(vma, start, end, flags,
+-                                                      &nr_pages_dirtied);
+-                              if (error)
+-                                      goto out_unlock;
+-                      }
+-                      error = unmapped_error;
+-                      done = 1;
+-              } else {
+-                      /* Here vma->vm_start <= start < vma->vm_end < end. */
+-                      error = msync_interval(vma, start, vma->vm_end, flags,
+-                                              &nr_pages_dirtied);
+-                      if (error)
++                      if (start >= end)
+                               goto out_unlock;
++                      unmapped_error = -ENOMEM;
+               }
++              /* Here vma->vm_start <= start < vma->vm_end. */
++              error = msync_interval(vma, start, min(end, vma->vm_end),
++                                              flags, &nr_pages_dirtied);
++              if (error)
++                      goto out_unlock;
+               file = vma->vm_file;
+               start = vma->vm_end;
+               if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
+                       get_file(file);
+-                      up_read(&current->mm->mmap_sem);
++                      up_read(&mm->mmap_sem);
+                       balance_dirty_pages_ratelimited_nr(file->f_mapping,
+                                                       nr_pages_dirtied);
+                       fput(file);
+-                      down_read(&current->mm->mmap_sem);
+-                      vma = find_vma(current->mm, start);
++                      if (start >= end)
++                              goto out;
++                      down_read(&mm->mmap_sem);
++                      vma = find_vma(mm, start);
+               } else if ((flags & MS_SYNC) && file &&
+                               (vma->vm_flags & VM_SHARED)) {
+                       get_file(file);
+-                      up_read(&current->mm->mmap_sem);
++                      up_read(&mm->mmap_sem);
+                       error = do_fsync(file, 0);
+                       fput(file);
+-                      down_read(&current->mm->mmap_sem);
+-                      if (error)
+-                              goto out_unlock;
+-                      vma = find_vma(current->mm, start);
++                      if (error || start >= end)
++                              goto out;
++                      down_read(&mm->mmap_sem);
++                      vma = find_vma(mm, start);
+               } else {
++                      if (start >= end)
++                              goto out_unlock;
+                       vma = vma->vm_next;
+               }
+-      } while (vma && !done);
++      }
+ out_unlock:
+-      up_read(&current->mm->mmap_sem);
++      up_read(&mm->mmap_sem);
+ out:
+-      return error;
++      return error ? : unmapped_error;
+ }
diff --git a/review-2.6.18/fix-umask-when-noacl-kernel-meets-extn-tuned-for-acls.patch b/review-2.6.18/fix-umask-when-noacl-kernel-meets-extn-tuned-for-acls.patch

new file mode 100644 (file)

index 0000000..418de7b
--- /dev/null
+++ b/review-2.6.18/fix-umask-when-noacl-kernel-meets-extn-tuned-for-acls.patch
@@ -0,0 +1,70 @@
+From stable-bounces@linux.kernel.org  Fri Feb 23 13:53:26 2007
+Date: Fri, 23 Feb 2007 21:51:20 +0000 (GMT)
+From: Hugh Dickins <hugh@veritas.com>
+To: Greg KH <greg@kroah.com>
+Message-ID: <Pine.LNX.4.64.0702232146460.11377@blonde.wat.veritas.com>
+Cc: Chris Wright <chrisw@sous-sol.org>, Tigran Aivazian <tigran@aivazian.fsnet.co.uk>, stable@kernel.org, Andreas Gruenbacher <agruen@suse.de>
+Subject: fix umask when noACL kernel meets extN tuned for ACLs
+
+Fix insecure default behaviour reported by Tigran Aivazian: if an ext2
+or ext3 filesystem is tuned to mount with "acl", but mounted by
+a kernel built without ACL support, then umask was ignored when creating
+inodes - though root or user has umask 022, touch creates files as 0666,
+and mkdir creates directories as 0777.
+
+This appears to have worked right until 2.6.11, when a fix to the default
+mode on symlinks (always 0777) assumed VFS applies umask: which it does,
+unless the mount is marked for ACLs; but ext[23] set MS_POSIXACL in
+s_flags according to s_mount_opt set according to def_mount_opts.
+
+We could revert to the 2.6.10 ext[23]_init_acl (adding an S_ISLNK test);
+but other filesystems only set MS_POSIXACL when ACLs are configured.  We
+could fix this at another level; but it seems most robust to avoid setting
+the s_mount_opt flag in the first place (at the expense of more ifdefs).
+
+Likewise don't set the XATTR_USER flag when built without XATTR support.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Acked-by: Andreas Gruenbacher <agruen@suse.de>
+Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ fs/ext2/super.c |    4 ++++
+ fs/ext3/super.c |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- linux-2.6.18.7.orig/fs/ext2/super.c
++++ linux-2.6.18.7/fs/ext2/super.c
+@@ -701,10 +701,14 @@ static int ext2_fill_super(struct super_
+               set_opt(sbi->s_mount_opt, GRPID);
+       if (def_mount_opts & EXT2_DEFM_UID16)
+               set_opt(sbi->s_mount_opt, NO_UID32);
++#ifdef CONFIG_EXT2_FS_XATTR
+       if (def_mount_opts & EXT2_DEFM_XATTR_USER)
+               set_opt(sbi->s_mount_opt, XATTR_USER);
++#endif
++#ifdef CONFIG_EXT2_FS_POSIX_ACL
+       if (def_mount_opts & EXT2_DEFM_ACL)
+               set_opt(sbi->s_mount_opt, POSIX_ACL);
++#endif
+       
+       if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
+               set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+--- linux-2.6.18.7.orig/fs/ext3/super.c
++++ linux-2.6.18.7/fs/ext3/super.c
+@@ -1451,10 +1451,14 @@ static int ext3_fill_super (struct super
+               set_opt(sbi->s_mount_opt, GRPID);
+       if (def_mount_opts & EXT3_DEFM_UID16)
+               set_opt(sbi->s_mount_opt, NO_UID32);
++#ifdef CONFIG_EXT3_FS_XATTR
+       if (def_mount_opts & EXT3_DEFM_XATTR_USER)
+               set_opt(sbi->s_mount_opt, XATTR_USER);
++#endif
++#ifdef CONFIG_EXT3_FS_POSIX_ACL
+       if (def_mount_opts & EXT3_DEFM_ACL)
+               set_opt(sbi->s_mount_opt, POSIX_ACL);
++#endif
+       if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
+               sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
+       else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
diff --git a/review-2.6.18/make-ppc64-current-preempt-safe.patch b/review-2.6.18/make-ppc64-current-preempt-safe.patch

new file mode 100644 (file)

index 0000000..b15113e
--- /dev/null
+++ b/review-2.6.18/make-ppc64-current-preempt-safe.patch
@@ -0,0 +1,54 @@
+From hugh_dickins@symantec.com  Thu Jan  4 12:32:47 2007
+Date: Thu, 4 Jan 2007 20:26:22 +0000 (GMT)
+From: Hugh Dickins <hugh@veritas.com>
+To: Chris Wright <chrisw@sous-sol.org>
+Cc: Paul Mackerras <paulus@samba.org>, Benjamin Herrenschmidt <benh@kernel.crashing.org>, stable@kernel.org
+Subject: [PATCH 2.6.18-stable] make ppc64 current preempt-safe
+Message-ID: <Pine.LNX.4.64.0701042022230.32026@blonde.wat.veritas.com>
+
+Repeated -j20 kernel builds on a G5 Quad running an SMP PREEMPT kernel
+would often collapse within a day, some exec failing with "Bad address".
+In each case examined, load_elf_binary was doing a kernel_read, but
+generic_file_aio_read's access_ok saw current->thread.fs.seg as USER_DS
+instead of KERNEL_DS.
+
+objdump of filemap.o shows gcc 4.1.0 emitting "mr r5,r13 ... ld r9,416(r5)"
+here for get_paca()->__current, instead of the expected and much more usual
+"ld r9,416(r13)"; I've seen other gcc4s do the same, but perhaps not gcc3s.
+
+So, if the task is preempted and rescheduled on a different cpu in between
+the mr and the ld, r5 will be looking at a different paca_struct from the
+one it's now on, pick up the wrong __current, and perhaps the wrong seg.
+Presumably much worse could happen elsewhere, though that split is rare.
+
+Other architectures appear to be safe (x86_64's read_pda is more limiting
+than get_paca), but ppc64 needs to force "current" into one instruction.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ include/asm-powerpc/current.h |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- linux-2.6.18.7.orig/include/asm-powerpc/current.h
++++ linux-2.6.18.7/include/asm-powerpc/current.h
+@@ -14,7 +14,17 @@ struct task_struct;
+ #ifdef __powerpc64__
+ #include <asm/paca.h>
+ 
+-#define current               (get_paca()->__current)
++static inline struct task_struct *get_current(void)
++{
++      struct task_struct *task;
++
++      __asm__ __volatile__("ld %0,%1(13)"
++      : "=r" (task)
++      : "i" (offsetof(struct paca_struct, __current)));
++
++      return task;
++}
++#define current       get_current()
+ 
+ #else
+ 
diff --git a/review-2.6.18/read_zero_pagealigned-locking-fix.patch b/review-2.6.18/read_zero_pagealigned-locking-fix.patch

new file mode 100644 (file)

index 0000000..0348e6f
--- /dev/null
+++ b/review-2.6.18/read_zero_pagealigned-locking-fix.patch
@@ -0,0 +1,147 @@
+From stable-bounces@linux.kernel.org  Sun Dec 10 02:24:42 2006
+Message-Id: <200612101018.kBAAIiFj021055@shell0.pdx.osdl.net>
+From: akpm@osdl.org
+To: torvalds@osdl.org
+Date: Sun, 10 Dec 2006 02:18:43 -0800
+Cc: akpm@osdl.org, hugh@veritas.com, Ramiro.Voicu@cern.ch, stable@kernel.org
+Subject: read_zero_pagealigned() locking fix
+
+From: Hugh Dickins <hugh@veritas.com>
+
+Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel
+bugzilla 7645.  Right: read_zero_pagealigned uses down_read of mmap_sem,
+but another thread's racing read of /dev/zero, or a normal fault, can
+easily set that pte again, in between zap_page_range and zeromap_page_range
+getting there.  It's been wrong ever since 2.4.3.
+
+The simple fix is to use down_write instead, but that would serialize reads
+of /dev/zero more than at present: perhaps some app would be badly
+affected.  So instead let zeromap_page_range return the error instead of
+BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in
+that case - there's no need to optimize for it.
+
+Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of
+zeromap_page_range), though it really isn't interesting there.  And since
+mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that
+than -ENOMEM.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ drivers/char/mem.c |   12 ++++++++----
+ mm/memory.c        |   32 +++++++++++++++++++++-----------
+ 2 files changed, 29 insertions(+), 15 deletions(-)
+
+--- linux-2.6.18.7.orig/drivers/char/mem.c
++++ linux-2.6.18.7/drivers/char/mem.c
+@@ -616,7 +616,8 @@ static inline size_t read_zero_pagealign
+                       count = size;
+ 
+               zap_page_range(vma, addr, count, NULL);
+-              zeromap_page_range(vma, addr, count, PAGE_COPY);
++              if (zeromap_page_range(vma, addr, count, PAGE_COPY))
++                      break;
+ 
+               size -= count;
+               buf += count;
+@@ -683,11 +684,14 @@ out:
+ 
+ static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+ {
++      int err;
++
+       if (vma->vm_flags & VM_SHARED)
+               return shmem_zero_setup(vma);
+-      if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
+-              return -EAGAIN;
+-      return 0;
++      err = zeromap_page_range(vma, vma->vm_start,
++                      vma->vm_end - vma->vm_start, vma->vm_page_prot);
++      BUG_ON(err == -EEXIST);
++      return err;
+ }
+ #else /* CONFIG_MMU */
+ static ssize_t read_zero(struct file * file, char * buf, 
+--- linux-2.6.18.7.orig/mm/memory.c
++++ linux-2.6.18.7/mm/memory.c
+@@ -1104,21 +1104,27 @@ static int zeromap_pte_range(struct mm_s
+ {
+       pte_t *pte;
+       spinlock_t *ptl;
++      int err = 0;
+ 
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+       if (!pte)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               struct page *page = ZERO_PAGE(addr);
+               pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
++
++              if (unlikely(!pte_none(*pte))) {
++                      err = -EEXIST;
++                      pte++;
++                      break;
++              }
+               page_cache_get(page);
+               page_add_file_rmap(page);
+               inc_mm_counter(mm, file_rss);
+-              BUG_ON(!pte_none(*pte));
+               set_pte_at(mm, addr, pte, zero_pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap_unlock(pte - 1, ptl);
+-      return 0;
++      return err;
+ }
+ 
+ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+@@ -1126,16 +1132,18 @@ static inline int zeromap_pmd_range(stru
+ {
+       pmd_t *pmd;
+       unsigned long next;
++      int err;
+ 
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               next = pmd_addr_end(addr, end);
+-              if (zeromap_pte_range(mm, pmd, addr, next, prot))
+-                      return -ENOMEM;
++              err = zeromap_pte_range(mm, pmd, addr, next, prot);
++              if (err)
++                      break;
+       } while (pmd++, addr = next, addr != end);
+-      return 0;
++      return err;
+ }
+ 
+ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+@@ -1143,16 +1151,18 @@ static inline int zeromap_pud_range(stru
+ {
+       pud_t *pud;
+       unsigned long next;
++      int err;
+ 
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               next = pud_addr_end(addr, end);
+-              if (zeromap_pmd_range(mm, pud, addr, next, prot))
+-                      return -ENOMEM;
++              err = zeromap_pmd_range(mm, pud, addr, next, prot);
++              if (err)
++                      break;
+       } while (pud++, addr = next, addr != end);
+-      return 0;
++      return err;
+ }
+ 
+ int zeromap_page_range(struct vm_area_struct *vma,
diff --git a/review-2.6.18/series b/review-2.6.18/series

index c86ca36436912b45ce216a5a2761cf45f4aeceeb..8bc310182ff85222819360fc11d5534de40d3393 100644 (file)
--- a/review-2.6.18/series
+++ b/review-2.6.18/series
@@ -16,3 +16,9 @@ V4L-fix-ks0127-status-flags.patch
  V4L-tveeprom-autodetect-LG-TAPC-G701D-as-tuner-type-37.patch
  V4L-buf_qbuf-fix-videobuf_queue-stream-corruption-and-lockup.patch
  x86_64-fix-2.6.18-regression-ptrace_oldsetoptions-should-be-accepted.patch
+fix-incorrect-user-space-access-locking-in-mincore.patch
+read_zero_pagealigned-locking-fix.patch
+fix-msync-error-on-unmapped-area.patch
+make-ppc64-current-preempt-safe.patch
+fix-for-shmem_truncate_range-bug_on.patch
+fix-umask-when-noacl-kernel-meets-extn-tuned-for-acls.patch
author	Chris Wright <chrisw@sous-sol.org>
	Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)
committer	Chris Wright <chrisw@sous-sol.org>
	Fri, 23 Feb 2007 22:34:36 +0000 (14:34 -0800)
review-2.6.18/fix-for-shmem_truncate_range-bug_on.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/fix-incorrect-user-space-access-locking-in-mincore.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/fix-msync-error-on-unmapped-area.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/fix-umask-when-noacl-kernel-meets-extn-tuned-for-acls.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/make-ppc64-current-preempt-safe.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/read_zero_pagealigned-locking-fix.patch	[new file with mode: 0644]	patch \| blob
review-2.6.18/series		patch \| blob \| blame \| history