]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
more .27 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Tue, 12 May 2009 22:08:23 +0000 (15:08 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Tue, 12 May 2009 22:08:23 +0000 (15:08 -0700)
queue-2.6.27/fs-fix-page_mkwrite-error-cases-in-core-code-and-btrfs.patch [new file with mode: 0644]
queue-2.6.27/gfs2-fix-page_mkwrite-return-code.patch [new file with mode: 0644]
queue-2.6.27/mm-close-page_mkwrite-races.patch [new file with mode: 0644]
queue-2.6.27/mm-page_mkwrite-change-prototype-to-match-fault.patch [new file with mode: 0644]
queue-2.6.27/nfs-close-page_mkwrite-races.patch [new file with mode: 0644]
queue-2.6.27/nfs-fix-the-return-value-in-nfs_page_mkwrite.patch [new file with mode: 0644]
queue-2.6.27/series

diff --git a/queue-2.6.27/fs-fix-page_mkwrite-error-cases-in-core-code-and-btrfs.patch b/queue-2.6.27/fs-fix-page_mkwrite-error-cases-in-core-code-and-btrfs.patch
new file mode 100644 (file)
index 0000000..7daa067
--- /dev/null
@@ -0,0 +1,73 @@
+From npiggin@suse.de  Tue May 12 15:02:33 2009
+From: Nick Piggin <npiggin@suse.de>
+Date: Tue, 12 May 2009 16:23:48 +1000
+Subject: fs: fix page_mkwrite error cases in core code and btrfs
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070749.802465454@suse.de>
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit 56a76f8275c379ed73c8a43cfa1dfa2f5e9cfa19 upstream
+
+
+fs: fix page_mkwrite error cases in core code and btrfs
+
+page_mkwrite is called with neither the page lock nor the ptl held.  This
+means a page can be concurrently truncated or invalidated out from
+underneath it.  Callers are supposed to prevent truncate races themselves,
+however previously the only thing they can do in case they hit one is to
+raise a SIGBUS.  A sigbus is wrong for the case that the page has been
+invalidated or truncated within i_size (eg.  hole punched).  Callers may
+also have to perform memory allocations in this path, where again, SIGBUS
+would be wrong.
+
+The previous patch ("mm: page_mkwrite change prototype to match fault")
+made it possible to properly specify errors.  Convert the generic buffer.c
+code and btrfs to return sane error values (in the case of page removed
+from pagecache, VM_FAULT_NOPAGE will cause the fault handler to exit
+without doing anything, and the fault will be retried properly).
+
+This fixes core code, and converts btrfs as a template/example.  All other
+filesystems defining their own page_mkwrite should be fixed in a similar
+manner.
+
+Acked-by: Chris Mason <chris.mason@oracle.com>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/buffer.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -2409,7 +2409,7 @@ block_page_mkwrite(struct vm_area_struct
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       unsigned long end;
+       loff_t size;
+-      int ret = -EINVAL;
++      int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
+       lock_page(page);
+       size = i_size_read(inode);
+@@ -2429,10 +2429,14 @@ block_page_mkwrite(struct vm_area_struct
+       if (!ret)
+               ret = block_commit_write(page, 0, end);
+-out_unlock:
+-      if (ret)
+-              ret = VM_FAULT_SIGBUS;
++      if (unlikely(ret)) {
++              if (ret == -ENOMEM)
++                      ret = VM_FAULT_OOM;
++              else /* -ENOSPC, -EIO, etc */
++                      ret = VM_FAULT_SIGBUS;
++      }
++out_unlock:
+       unlock_page(page);
+       return ret;
+ }
diff --git a/queue-2.6.27/gfs2-fix-page_mkwrite-return-code.patch b/queue-2.6.27/gfs2-fix-page_mkwrite-return-code.patch
new file mode 100644 (file)
index 0000000..d4df586
--- /dev/null
@@ -0,0 +1,39 @@
+From npiggin@suse.de  Tue May 12 15:04:20 2009
+From: Steven Whitehouse <swhiteho@redhat.com>
+Date: Tue, 12 May 2009 16:23:50 +1000
+Subject: GFS2: Fix page_mkwrite() return code
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070750.090713470@suse.de>
+
+
+From: Steven Whitehouse <swhiteho@redhat.com>
+
+commit e56985da455b9dc0591b8cb2006cc94b6f4fb0f4 upstream
+
+This allows for the possibility of returning VM_FAULT_OOM as
+well as VM_FAULT_SIGBUS. This ensures that the correct action
+is taken.
+
+Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
+Cc: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ fs/gfs2/ops_file.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/gfs2/ops_file.c
++++ b/fs/gfs2/ops_file.c
+@@ -412,7 +412,9 @@ out_unlock:
+       gfs2_glock_dq(&gh);
+ out:
+       gfs2_holder_uninit(&gh);
+-      if (ret)
++      if (ret == -ENOMEM)
++              ret = VM_FAULT_OOM;
++      else if (ret)
+               ret = VM_FAULT_SIGBUS;
+       return ret;
+ }
diff --git a/queue-2.6.27/mm-close-page_mkwrite-races.patch b/queue-2.6.27/mm-close-page_mkwrite-races.patch
new file mode 100644 (file)
index 0000000..1737407
--- /dev/null
@@ -0,0 +1,318 @@
+From npiggin@suse.de  Tue May 12 15:03:26 2009
+From: Nick Piggin <npiggin@suse.de>
+Date: Tue, 12 May 2009 16:23:49 +1000
+Subject: mm: close page_mkwrite races
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070749.947439955@suse.de>
+
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit b827e496c893de0c0f142abfaeb8730a2fd6b37f upstream
+
+mm: close page_mkwrite races
+
+Change page_mkwrite to allow implementations to return with the page
+locked, and also change it's callers (in page fault paths) to hold the
+lock until the page is marked dirty.  This allows the filesystem to have
+full control of page dirtying events coming from the VM.
+
+Rather than simply hold the page locked over the page_mkwrite call, we
+call page_mkwrite with the page unlocked and allow callers to return with
+it locked, so filesystems can avoid LOR conditions with page lock.
+
+The problem with the current scheme is this: a filesystem that wants to
+associate some metadata with a page as long as the page is dirty, will
+perform this manipulation in its ->page_mkwrite.  It currently then must
+return with the page unlocked and may not hold any other locks (according
+to existing page_mkwrite convention).
+
+In this window, the VM could write out the page, clearing page-dirty.  The
+filesystem has no good way to detect that a dirty pte is about to be
+attached, so it will happily write out the page, at which point, the
+filesystem may manipulate the metadata to reflect that the page is no
+longer dirty.
+
+It is not always possible to perform the required metadata manipulation in
+->set_page_dirty, because that function cannot block or fail.  The
+filesystem may need to allocate some data structure, for example.
+
+And the VM cannot mark the pte dirty before page_mkwrite, because
+page_mkwrite is allowed to fail, so we must not allow any window where the
+page could be written to if page_mkwrite does fail.
+
+This solution of holding the page locked over the 3 critical operations
+(page_mkwrite, setting the pte dirty, and finally setting the page dirty)
+closes out races nicely, preventing page cleaning for writeout being
+initiated in that window.  This provides the filesystem with a strong
+synchronisation against the VM here.
+
+- Sage needs this race closed for ceph filesystem.
+- Trond for NFS (http://bugzilla.kernel.org/show_bug.cgi?id=12913).
+- I need it for fsblock.
+- I suspect other filesystems may need it too (eg. btrfs).
+- I have converted buffer.c to the new locking. Even simple block allocation
+  under dirty pages might be susceptible to i_size changing under partial page
+  at the end of file (we also have a buffer.c-side problem here, but it cannot
+  be fixed properly without this patch).
+- Other filesystems (eg. NFS, maybe btrfs) will need to change their
+  page_mkwrite functions themselves.
+
+[ This also moves page_mkwrite another step closer to fault, which should
+  eventually allow page_mkwrite to be moved into ->fault, and thus avoiding a
+  filesystem calldown and page lock/unlock cycle in __do_fault. ]
+
+[akpm@linux-foundation.org: fix derefs of NULL ->mapping]
+Cc: Sage Weil <sage@newdream.net>
+Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ Documentation/filesystems/Locking |   24 +++++---
+ fs/buffer.c                       |   10 ++-
+ mm/memory.c                       |  108 ++++++++++++++++++++++++++------------
+ 3 files changed, 98 insertions(+), 44 deletions(-)
+
+--- a/Documentation/filesystems/Locking
++++ b/Documentation/filesystems/Locking
+@@ -509,16 +509,24 @@ locking rules:
+               BKL     mmap_sem        PageLocked(page)
+ open:         no      yes
+ close:                no      yes
+-fault:                no      yes
+-page_mkwrite: no      yes             no
++fault:                no      yes             can return with page locked
++page_mkwrite: no      yes             can return with page locked
+ access:               no      yes
+-      ->page_mkwrite() is called when a previously read-only page is
+-about to become writeable. The file system is responsible for
+-protecting against truncate races. Once appropriate action has been
+-taking to lock out truncate, the page range should be verified to be
+-within i_size. The page mapping should also be checked that it is not
+-NULL.
++      ->fault() is called when a previously not present pte is about
++to be faulted in. The filesystem must find and return the page associated
++with the passed in "pgoff" in the vm_fault structure. If it is possible that
++the page may be truncated and/or invalidated, then the filesystem must lock
++the page, then ensure it is not already truncated (the page lock will block
++subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
++locked. The VM will unlock the page.
++
++      ->page_mkwrite() is called when a previously read-only pte is
++about to become writeable. The filesystem again must ensure that there are
++no truncate/invalidate races, and then return with the page locked. If
++the page has been truncated, the filesystem should not look up a new page
++like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
++will cause the VM to retry the fault.
+       ->access() is called when get_user_pages() fails in
+ acces_process_vm(), typically used to debug a process through
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -2416,7 +2416,8 @@ block_page_mkwrite(struct vm_area_struct
+       if ((page->mapping != inode->i_mapping) ||
+           (page_offset(page) > size)) {
+               /* page got truncated out from underneath us */
+-              goto out_unlock;
++              unlock_page(page);
++              goto out;
+       }
+       /* page is wholly or partially inside EOF */
+@@ -2430,14 +2431,15 @@ block_page_mkwrite(struct vm_area_struct
+               ret = block_commit_write(page, 0, end);
+       if (unlikely(ret)) {
++              unlock_page(page);
+               if (ret == -ENOMEM)
+                       ret = VM_FAULT_OOM;
+               else /* -ENOSPC, -EIO, etc */
+                       ret = VM_FAULT_SIGBUS;
+-      }
++      } else
++              ret = VM_FAULT_LOCKED;
+-out_unlock:
+-      unlock_page(page);
++out:
+       return ret;
+ }
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1827,6 +1827,15 @@ static int do_wp_page(struct mm_struct *
+                               ret = tmp;
+                               goto unwritable_page;
+                       }
++                      if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
++                              lock_page(old_page);
++                              if (!old_page->mapping) {
++                                      ret = 0; /* retry the fault */
++                                      unlock_page(old_page);
++                                      goto unwritable_page;
++                              }
++                      } else
++                              VM_BUG_ON(!PageLocked(old_page));
+                       /*
+                        * Since we dropped the lock we need to revalidate
+@@ -1836,9 +1845,11 @@ static int do_wp_page(struct mm_struct *
+                        */
+                       page_table = pte_offset_map_lock(mm, pmd, address,
+                                                        &ptl);
+-                      page_cache_release(old_page);
+-                      if (!pte_same(*page_table, orig_pte))
++                      if (!pte_same(*page_table, orig_pte)) {
++                              unlock_page(old_page);
++                              page_cache_release(old_page);
+                               goto unlock;
++                      }
+                       page_mkwrite = 1;
+               }
+@@ -1943,9 +1954,6 @@ gotten:
+ unlock:
+       pte_unmap_unlock(page_table, ptl);
+       if (dirty_page) {
+-              if (vma->vm_file)
+-                      file_update_time(vma->vm_file);
+-
+               /*
+                * Yes, Virginia, this is actually required to prevent a race
+                * with clear_page_dirty_for_io() from clearing the page dirty
+@@ -1954,16 +1962,41 @@ unlock:
+                *
+                * do_no_page is protected similarly.
+                */
+-              wait_on_page_locked(dirty_page);
+-              set_page_dirty_balance(dirty_page, page_mkwrite);
++              if (!page_mkwrite) {
++                      wait_on_page_locked(dirty_page);
++                      set_page_dirty_balance(dirty_page, page_mkwrite);
++              }
+               put_page(dirty_page);
++              if (page_mkwrite) {
++                      struct address_space *mapping = dirty_page->mapping;
++
++                      set_page_dirty(dirty_page);
++                      unlock_page(dirty_page);
++                      page_cache_release(dirty_page);
++                      if (mapping)    {
++                              /*
++                               * Some device drivers do not set page.mapping
++                               * but still dirty their pages
++                               */
++                              balance_dirty_pages_ratelimited(mapping);
++                      }
++              }
++
++              /* file_update_time outside page_lock */
++              if (vma->vm_file)
++                      file_update_time(vma->vm_file);
+       }
+       return ret;
+ oom_free_new:
+       page_cache_release(new_page);
+ oom:
+-      if (old_page)
++      if (old_page) {
++              if (page_mkwrite) {
++                      unlock_page(old_page);
++                      page_cache_release(old_page);
++              }
+               page_cache_release(old_page);
++      }
+       return VM_FAULT_OOM;
+ unwritable_page:
+@@ -2488,27 +2521,22 @@ static int __do_fault(struct mm_struct *
+                               int tmp;
+                               unlock_page(page);
+-                              vmf.flags |= FAULT_FLAG_MKWRITE;
++                              vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+                               tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
+                               if (unlikely(tmp &
+                                         (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
+                                       ret = tmp;
+-                                      anon = 1; /* no anon but release vmf.page */
+-                                      goto out_unlocked;
+-                              }
+-                              lock_page(page);
+-                              /*
+-                               * XXX: this is not quite right (racy vs
+-                               * invalidate) to unlock and relock the page
+-                               * like this, however a better fix requires
+-                               * reworking page_mkwrite locking API, which
+-                               * is better done later.
+-                               */
+-                              if (!page->mapping) {
+-                                      ret = 0;
+-                                      anon = 1; /* no anon but release vmf.page */
+-                                      goto out;
++                                      goto unwritable_page;
+                               }
++                              if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
++                                      lock_page(page);
++                                      if (!page->mapping) {
++                                              ret = 0; /* retry the fault */
++                                              unlock_page(page);
++                                              goto unwritable_page;
++                                      }
++                              } else
++                                      VM_BUG_ON(!PageLocked(page));
+                               page_mkwrite = 1;
+                       }
+               }
+@@ -2565,19 +2593,35 @@ static int __do_fault(struct mm_struct *
+       pte_unmap_unlock(page_table, ptl);
+ out:
+-      unlock_page(vmf.page);
+-out_unlocked:
+-      if (anon)
+-              page_cache_release(vmf.page);
+-      else if (dirty_page) {
+-              if (vma->vm_file)
+-                      file_update_time(vma->vm_file);
++      if (dirty_page) {
++              struct address_space *mapping = page->mapping;
+-              set_page_dirty_balance(dirty_page, page_mkwrite);
++              if (set_page_dirty(dirty_page))
++                      page_mkwrite = 1;
++              unlock_page(dirty_page);
+               put_page(dirty_page);
++              if (page_mkwrite && mapping) {
++                      /*
++                       * Some device drivers do not set page.mapping but still
++                       * dirty their pages
++                       */
++                      balance_dirty_pages_ratelimited(mapping);
++              }
++
++              /* file_update_time outside page_lock */
++              if (vma->vm_file)
++                      file_update_time(vma->vm_file);
++      } else {
++              unlock_page(vmf.page);
++              if (anon)
++                      page_cache_release(vmf.page);
+       }
+       return ret;
++
++unwritable_page:
++      page_cache_release(page);
++      return ret;
+ }
+ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/queue-2.6.27/mm-page_mkwrite-change-prototype-to-match-fault.patch b/queue-2.6.27/mm-page_mkwrite-change-prototype-to-match-fault.patch
new file mode 100644 (file)
index 0000000..d8343aa
--- /dev/null
@@ -0,0 +1,358 @@
+From npiggin@suse.de  Tue May 12 15:01:25 2009
+From: npiggin@suse.de
+Date: Tue, 12 May 2009 16:23:47 +1000
+Subject: mm: page_mkwrite change prototype to match fault
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070749.653185658@suse.de>
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit c2ec175c39f62949438354f603f4aa170846aabb upstream
+
+
+mm: page_mkwrite change prototype to match fault
+
+Change the page_mkwrite prototype to take a struct vm_fault, and return
+VM_FAULT_xxx flags.  There should be no functional change.
+
+This makes it possible to return much more detailed error information to
+the VM (and also can provide more information eg.  virtual_address to the
+driver, which might be important in some special cases).
+
+This is required for a subsequent fix.  And will also make it easier to
+merge page_mkwrite() with fault() in future.
+
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Cc: Chris Mason <chris.mason@oracle.com>
+Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Steven Whitehouse <swhiteho@redhat.com>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Cc: Joel Becker <joel.becker@oracle.com>
+Cc: Artem Bityutskiy <dedekind@infradead.org>
+Cc: Felix Blyakher <felixb@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ Documentation/filesystems/Locking |    2 +-
+ drivers/video/fb_defio.c          |    3 ++-
+ fs/buffer.c                       |    6 +++++-
+ fs/ext4/ext4.h                    |    2 +-
+ fs/ext4/inode.c                   |    5 ++++-
+ fs/fuse/file.c                    |    3 ++-
+ fs/gfs2/ops_file.c                |    5 ++++-
+ fs/nfs/file.c                     |    5 ++++-
+ fs/ocfs2/mmap.c                   |    6 ++++--
+ fs/ubifs/file.c                   |    9 ++++++---
+ fs/xfs/linux-2.6/xfs_file.c       |    4 ++--
+ include/linux/buffer_head.h       |    2 +-
+ include/linux/mm.h                |    3 ++-
+ mm/memory.c                       |   26 ++++++++++++++++++++++----
+ 14 files changed, 60 insertions(+), 21 deletions(-)
+
+--- a/Documentation/filesystems/Locking
++++ b/Documentation/filesystems/Locking
+@@ -502,7 +502,7 @@ prototypes:
+       void (*open)(struct vm_area_struct*);
+       void (*close)(struct vm_area_struct*);
+       int (*fault)(struct vm_area_struct*, struct vm_fault *);
+-      int (*page_mkwrite)(struct vm_area_struct *, struct page *);
++      int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+       int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
+ locking rules:
+--- a/drivers/video/fb_defio.c
++++ b/drivers/video/fb_defio.c
+@@ -70,8 +70,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
+ /* vm_ops->page_mkwrite handler */
+ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
+-                                struct page *page)
++                                struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       struct fb_info *info = vma->vm_private_data;
+       struct fb_deferred_io *fbdefio = info->fbdefio;
+       struct page *cur;
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -2402,9 +2402,10 @@ int block_commit_write(struct page *page
+  * unlock the page.
+  */
+ int
+-block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
++block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+                  get_block_t get_block)
+ {
++      struct page *page = vmf->page;
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       unsigned long end;
+       loff_t size;
+@@ -2429,6 +2430,9 @@ block_page_mkwrite(struct vm_area_struct
+               ret = block_commit_write(page, 0, end);
+ out_unlock:
++      if (ret)
++              ret = VM_FAULT_SIGBUS;
++
+       unlock_page(page);
+       return ret;
+ }
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1084,7 +1084,7 @@ extern int ext4_meta_trans_blocks(struct
+ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
+ extern int ext4_block_truncate_page(handle_t *handle,
+               struct address_space *mapping, loff_t from);
+-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
++extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+ /* ioctl.c */
+ extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4861,8 +4861,9 @@ static int ext4_bh_unmapped(handle_t *ha
+       return !buffer_mapped(bh);
+ }
+-int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       loff_t size;
+       unsigned long len;
+       int ret = -EINVAL;
+@@ -4913,6 +4914,8 @@ int ext4_page_mkwrite(struct vm_area_str
+               goto out_unlock;
+       ret = 0;
+ out_unlock:
++      if (ret)
++              ret = VM_FAULT_SIGBUS;
+       up_read(&inode->i_alloc_sem);
+       return ret;
+ }
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1219,8 +1219,9 @@ static void fuse_vma_close(struct vm_are
+  * - sync(2)
+  * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
+  */
+-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       /*
+        * Don't use page->mapping as it may become NULL from a
+        * concurrent truncate.
+--- a/fs/gfs2/ops_file.c
++++ b/fs/gfs2/ops_file.c
+@@ -338,8 +338,9 @@ static int gfs2_allocate_page_backing(st
+  * blocks allocated on disk to back that page.
+  */
+-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+@@ -411,6 +412,8 @@ out_unlock:
+       gfs2_glock_dq(&gh);
+ out:
+       gfs2_holder_uninit(&gh);
++      if (ret)
++              ret = VM_FAULT_SIGBUS;
+       return ret;
+ }
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -448,8 +448,9 @@ const struct address_space_operations nf
+       .launder_page = nfs_launder_page,
+ };
+-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       struct file *filp = vma->vm_file;
+       struct dentry *dentry = filp->f_path.dentry;
+       unsigned pagelen;
+@@ -480,6 +481,8 @@ static int nfs_vm_page_mkwrite(struct vm
+               ret = pagelen;
+ out_unlock:
+       unlock_page(page);
++      if (ret)
++              ret = VM_FAULT_SIGBUS;
+       return ret;
+ }
+--- a/fs/ocfs2/mmap.c
++++ b/fs/ocfs2/mmap.c
+@@ -150,8 +150,9 @@ out:
+       return ret;
+ }
+-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct buffer_head *di_bh = NULL;
+       sigset_t blocked, oldset;
+@@ -192,7 +193,8 @@ out:
+       ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
+       if (ret2 < 0)
+               mlog_errno(ret2);
+-
++      if (ret)
++              ret = VM_FAULT_SIGBUS;
+       return ret;
+ }
+--- a/fs/ubifs/file.c
++++ b/fs/ubifs/file.c
+@@ -1140,8 +1140,9 @@ static int ubifs_releasepage(struct page
+  * mmap()d file has taken write protection fault and is being made
+  * writable. UBIFS must ensure page is budgeted for.
+  */
+-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
++      struct page *page = vmf->page;
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct timespec now = ubifs_current_time(inode);
+@@ -1153,7 +1154,7 @@ static int ubifs_vm_page_mkwrite(struct 
+       ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
+       if (unlikely(c->ro_media))
+-              return -EROFS;
++              return VM_FAULT_SIGBUS; /* -EROFS */
+       /*
+        * We have not locked @page so far so we may budget for changing the
+@@ -1186,7 +1187,7 @@ static int ubifs_vm_page_mkwrite(struct 
+               if (err == -ENOSPC)
+                       ubifs_warn("out of space for mmapped file "
+                                  "(inode number %lu)", inode->i_ino);
+-              return err;
++              return VM_FAULT_SIGBUS;
+       }
+       lock_page(page);
+@@ -1226,6 +1227,8 @@ static int ubifs_vm_page_mkwrite(struct 
+ out_unlock:
+       unlock_page(page);
+       ubifs_release_budget(c, &req);
++      if (err)
++              err = VM_FAULT_SIGBUS;
+       return err;
+ }
+--- a/fs/xfs/linux-2.6/xfs_file.c
++++ b/fs/xfs/linux-2.6/xfs_file.c
+@@ -427,9 +427,9 @@ xfs_file_ioctl_invis(
+ STATIC int
+ xfs_vm_page_mkwrite(
+       struct vm_area_struct   *vma,
+-      struct page             *page)
++      struct vm_fault         *vmf)
+ {
+-      return block_page_mkwrite(vma, page, xfs_get_blocks);
++      return block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ }
+ const struct file_operations xfs_file_operations = {
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -222,7 +222,7 @@ int cont_write_begin(struct file *, stru
+                       get_block_t *, loff_t *);
+ int generic_cont_expand_simple(struct inode *inode, loff_t size);
+ int block_commit_write(struct page *page, unsigned from, unsigned to);
+-int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
++int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+                               get_block_t get_block);
+ void block_sync_page(struct page *);
+ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -138,6 +138,7 @@ extern pgprot_t protection_map[16];
+ #define FAULT_FLAG_WRITE      0x01    /* Fault was a write access */
+ #define FAULT_FLAG_NONLINEAR  0x02    /* Fault was via a nonlinear mapping */
++#define FAULT_FLAG_MKWRITE    0x04    /* Fault was mkwrite of existing pte */
+ /*
+@@ -173,7 +174,7 @@ struct vm_operations_struct {
+       /* notification that a previously read-only page is about to become
+        * writable, if an error is returned it will cause a SIGBUS */
+-      int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
++      int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+       /* called by access_process_vm when get_user_pages() fails, typically
+        * for use by special VMAs that can switch between memory and hardware
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1801,6 +1801,15 @@ static int do_wp_page(struct mm_struct *
+                * get_user_pages(.write=1, .force=1).
+                */
+               if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
++                      struct vm_fault vmf;
++                      int tmp;
++
++                      vmf.virtual_address = (void __user *)(address &
++                                                              PAGE_MASK);
++                      vmf.pgoff = old_page->index;
++                      vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
++                      vmf.page = old_page;
++
+                       /*
+                        * Notify the address space that the page is about to
+                        * become writable so that it can prohibit this or wait
+@@ -1812,8 +1821,12 @@ static int do_wp_page(struct mm_struct *
+                       page_cache_get(old_page);
+                       pte_unmap_unlock(page_table, ptl);
+-                      if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
++                      tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
++                      if (unlikely(tmp &
++                                      (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
++                              ret = tmp;
+                               goto unwritable_page;
++                      }
+                       /*
+                        * Since we dropped the lock we need to revalidate
+@@ -1955,7 +1968,7 @@ oom:
+ unwritable_page:
+       page_cache_release(old_page);
+-      return VM_FAULT_SIGBUS;
++      return ret;
+ }
+ /*
+@@ -2472,9 +2485,14 @@ static int __do_fault(struct mm_struct *
+                        * to become writable
+                        */
+                       if (vma->vm_ops->page_mkwrite) {
++                              int tmp;
++
+                               unlock_page(page);
+-                              if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
+-                                      ret = VM_FAULT_SIGBUS;
++                              vmf.flags |= FAULT_FLAG_MKWRITE;
++                              tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
++                              if (unlikely(tmp &
++                                        (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
++                                      ret = tmp;
+                                       anon = 1; /* no anon but release vmf.page */
+                                       goto out_unlocked;
+                               }
diff --git a/queue-2.6.27/nfs-close-page_mkwrite-races.patch b/queue-2.6.27/nfs-close-page_mkwrite-races.patch
new file mode 100644 (file)
index 0000000..4b2e0f0
--- /dev/null
@@ -0,0 +1,44 @@
+From npiggin@suse.de  Tue May 12 15:06:05 2009
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 12 May 2009 16:23:52 +1000
+Subject: NFS: Close page_mkwrite() races
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070750.379544453@suse.de>
+
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 7fdf523067666b0eaff330f362401ee50ce187c4 upstream
+
+Follow up to Nick Piggin's patches to ensure that nfs_vm_page_mkwrite
+returns with the page lock held, and sets the VM_FAULT_LOCKED flag.
+
+See http://bugzilla.kernel.org/show_bug.cgi?id=12913
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ fs/nfs/file.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -478,10 +478,10 @@ static int nfs_vm_page_mkwrite(struct vm
+       ret = nfs_updatepage(filp, page, 0, pagelen);
+ out_unlock:
++      if (!ret)
++              return VM_FAULT_LOCKED;
+       unlock_page(page);
+-      if (ret)
+-              ret = VM_FAULT_SIGBUS;
+-      return ret;
++      return VM_FAULT_SIGBUS;
+ }
+ static struct vm_operations_struct nfs_file_vm_ops = {
diff --git a/queue-2.6.27/nfs-fix-the-return-value-in-nfs_page_mkwrite.patch b/queue-2.6.27/nfs-fix-the-return-value-in-nfs_page_mkwrite.patch
new file mode 100644 (file)
index 0000000..6323f30
--- /dev/null
@@ -0,0 +1,36 @@
+From npiggin@suse.de  Tue May 12 15:05:16 2009
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 12 May 2009 16:23:51 +1000
+Subject: NFS: Fix the return value in nfs_page_mkwrite()
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org
+Message-ID: <20090512070750.234686168@suse.de>
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 2b2ec7554cf7ec5e4412f89a5af6abe8ce950700 upstream
+
+Commit c2ec175c39f62949438354f603f4aa170846aabb ("mm: page_mkwrite
+change prototype to match fault") exposed a bug in the NFS
+implementation of page_mkwrite.  We should be returning 0 on success...
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/file.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -477,8 +477,6 @@ static int nfs_vm_page_mkwrite(struct vm
+               goto out_unlock;
+       ret = nfs_updatepage(filp, page, 0, pagelen);
+-      if (ret == 0)
+-              ret = pagelen;
+ out_unlock:
+       unlock_page(page);
+       if (ret)
index d7fb6fb768622b5ee0723a72b2c9f49922bf5c8e..ad07d363b6b08792dee9e483757acc906d9f8314 100644 (file)
@@ -6,3 +6,9 @@ usb-gadget-fix-utf-conversion-in-the-usbstring-library.patch
 dup2-fix-return-value-with-oldfd-newfd-and-invalid-fd.patch
 i2c-algo-bit-fix-timeout-test.patch
 i2c-algo-pca-let-pca9564-recover-from-unacked-data-byte.patch
+mm-page_mkwrite-change-prototype-to-match-fault.patch
+fs-fix-page_mkwrite-error-cases-in-core-code-and-btrfs.patch
+mm-close-page_mkwrite-races.patch
+gfs2-fix-page_mkwrite-return-code.patch
+nfs-fix-the-return-value-in-nfs_page_mkwrite.patch
+nfs-close-page_mkwrite-races.patch