]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
start 2.6.32.1 review cycle
authorGreg Kroah-Hartman <gregkh@suse.de>
Mon, 14 Dec 2009 17:32:45 +0000 (09:32 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 14 Dec 2009 17:32:45 +0000 (09:32 -0800)
37 files changed:
queue-2.6.32/mbox [new file with mode: 0644]
review-2.6.32/0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch [new file with mode: 0644]
review-2.6.32/0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch [new file with mode: 0644]
review-2.6.32/0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch [new file with mode: 0644]
review-2.6.32/0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch [new file with mode: 0644]
review-2.6.32/0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch [new file with mode: 0644]
review-2.6.32/0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch [new file with mode: 0644]
review-2.6.32/0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch [new file with mode: 0644]
review-2.6.32/0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch [new file with mode: 0644]
review-2.6.32/0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch [new file with mode: 0644]
review-2.6.32/0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch [new file with mode: 0644]
review-2.6.32/0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch [new file with mode: 0644]
review-2.6.32/0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch [new file with mode: 0644]
review-2.6.32/0013-ext4-avoid-issuing-unnecessary-barriers.patch [new file with mode: 0644]
review-2.6.32/0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch [new file with mode: 0644]
review-2.6.32/0015-ext4-make-trim-discard-optional-and-off-by-default.patch [new file with mode: 0644]
review-2.6.32/0016-ext4-make-norecovery-an-alias-for-noload.patch [new file with mode: 0644]
review-2.6.32/0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch [new file with mode: 0644]
review-2.6.32/0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch [new file with mode: 0644]
review-2.6.32/0019-ext4-move_extent_per_page-cleanup.patch [new file with mode: 0644]
review-2.6.32/0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch [new file with mode: 0644]
review-2.6.32/0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch [new file with mode: 0644]
review-2.6.32/0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch [new file with mode: 0644]
review-2.6.32/0023-ext4-wait-for-log-to-commit-when-umounting.patch [new file with mode: 0644]
review-2.6.32/0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch [new file with mode: 0644]
review-2.6.32/0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch [new file with mode: 0644]
review-2.6.32/0026-ext4-quota-macros-cleanup.patch [new file with mode: 0644]
review-2.6.32/0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch [new file with mode: 0644]
review-2.6.32/0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch [new file with mode: 0644]
review-2.6.32/0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch [new file with mode: 0644]
review-2.6.32/0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch [new file with mode: 0644]
review-2.6.32/mbox [new file with mode: 0644]
review-2.6.32/scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch [new file with mode: 0644]
review-2.6.32/scsi-osd_protocol.h-add-missing-include.patch [new file with mode: 0644]
review-2.6.32/scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch [new file with mode: 0644]
review-2.6.32/series [new file with mode: 0644]
review-2.6.32/signal-fix-alternate-signal-stack-check.patch [new file with mode: 0644]

diff --git a/queue-2.6.32/mbox b/queue-2.6.32/mbox
new file mode 100644 (file)
index 0000000..27686e4
--- /dev/null
@@ -0,0 +1,3730 @@
+From linux@linux.site Thu Dec 10 21:25:40 2009
+Message-Id: <20091211052540.442199443@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:13 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Sebastian Andrzej Siewior <sebastian@breakpoint.cc>,
+ Oleg Nesterov <oleg@redhat.com>,
+ Roland McGrath <roland@redhat.com>,
+ Kyle McMartin <kyle@mcmartin.ca>,
+ Thomas Gleixner <tglx@linutronix.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [01/34] signal: Fix alternate signal stack check
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=signal-fix-alternate-signal-stack-check.patch
+Content-Length: 2919
+Lines: 83
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+
+commit 2a855dd01bc1539111adb7233f587c5c468732ac upstream.
+
+All architectures in the kernel increment/decrement the stack pointer
+before storing values on the stack.
+
+On architectures which have the stack grow down sas_ss_sp == sp is not
+on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is
+on the alternate signal stack.
+
+On architectures which have the stack grow up sas_ss_sp == sp is on
+the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not
+on the alternate signal stack.
+
+The current implementation fails for architectures which have the
+stack grow down on the corner case where sas_ss_sp == sp.This was
+reported as Debian bug #544905 on AMD64.
+Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c
+
+The test case creates the following stack scenario:
+   0xn0300     stack top
+   0xn0200     alt stack pointer top (when switching to alt stack)
+   0xn01ff     alt stack end
+   0xn0100     alt stack start == stack pointer
+
+If the signal is sent the stack pointer is pointing to the base
+address of the alt stack and the kernel erroneously decides that it
+has already switched to the alternate stack because of the current
+check for "sp - sas_ss_sp < sas_ss_size"
+
+On parisc (stack grows up) the scenario would be:
+   0xn0200     stack pointer
+   0xn01ff     alt stack end
+   0xn0100     alt stack start = alt stack pointer base
+                                 (when switching to alt stack)
+   0xn0000     stack base
+
+This is handled correctly by the current implementation.
+
+[ tglx: Modified for archs which have the stack grow up (parisc) which
+       would fail with the correct implementation for stack grows
+       down. Added a check for sp >= current->sas_ss_sp which is
+       strictly not necessary but makes the code symetric for both
+       variants ]
+
+Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Kyle McMartin <kyle@mcmartin.ca>
+LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/sched.h |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2086,11 +2086,18 @@ static inline int is_si_special(const st
+       return info <= SEND_SIG_FORCED;
+ }
+-/* True if we are on the alternate signal stack.  */
+-
++/*
++ * True if we are on the alternate signal stack.
++ */
+ static inline int on_sig_stack(unsigned long sp)
+ {
+-      return (sp - current->sas_ss_sp < current->sas_ss_size);
++#ifdef CONFIG_STACK_GROWSUP
++      return sp >= current->sas_ss_sp &&
++              sp - current->sas_ss_sp < current->sas_ss_size;
++#else
++      return sp > current->sas_ss_sp &&
++              sp - current->sas_ss_sp <= current->sas_ss_size;
++#endif
+ }
+ static inline int sas_ss_flags(unsigned long sp)
+
+
+From linux@linux.site Thu Dec 10 21:25:41 2009
+Message-Id: <20091211052540.941627509@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:14 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ James Smart <james.smart@emulex.com>,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [02/34] SCSI: scsi_lib_dma: fix bug with dma maps on nested scsi objects
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch
+Content-Length: 5210
+Lines: 149
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: James Bottomley <James.Bottomley@suse.de>
+
+commit d139b9bd0e52dda14fd13412e7096e68b56d0076 upstream.
+
+Some of our virtual SCSI hosts don't have a proper bus parent at the
+top, which can be a problem for doing DMA on them
+
+This patch makes the host device cache a pointer to the physical bus
+device and provides an extra API for setting it (the normal API picks
+it up from the parent).  This patch also modifies the qla2xxx and lpfc
+vport logic to use the new DMA host setting API.
+
+Acked-By: James Smart  <james.smart@emulex.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/hosts.c            |   13 ++++++++++---
+ drivers/scsi/lpfc/lpfc_init.c   |    2 +-
+ drivers/scsi/qla2xxx/qla_attr.c |    3 ++-
+ drivers/scsi/scsi_lib_dma.c     |    4 ++--
+ include/scsi/scsi_host.h        |   16 +++++++++++++++-
+ 5 files changed, 30 insertions(+), 8 deletions(-)
+
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *
+ EXPORT_SYMBOL(scsi_remove_host);
+ /**
+- * scsi_add_host - add a scsi host
++ * scsi_add_host_with_dma - add a scsi host with dma device
+  * @shost:    scsi host pointer to add
+  * @dev:      a struct device of type scsi class
++ * @dma_dev:  dma device for the host
++ *
++ * Note: You rarely need to worry about this unless you're in a
++ * virtualised host environments, so use the simpler scsi_add_host()
++ * function instead.
+  *
+  * Return value: 
+  *    0 on success / != 0 for error
+  **/
+-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
++int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
++                         struct device *dma_dev)
+ {
+       struct scsi_host_template *sht = shost->hostt;
+       int error = -EINVAL;
+@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shos
+       if (!shost->shost_gendev.parent)
+               shost->shost_gendev.parent = dev ? dev : &platform_bus;
++      shost->dma_dev = dma_dev;
+       error = device_add(&shost->shost_gendev);
+       if (error)
+@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shos
+  fail:
+       return error;
+ }
+-EXPORT_SYMBOL(scsi_add_host);
++EXPORT_SYMBOL(scsi_add_host_with_dma);
+ static void scsi_host_dev_release(struct device *dev)
+ {
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba,
+       vport->els_tmofunc.function = lpfc_els_timeout;
+       vport->els_tmofunc.data = (unsigned long)vport;
+-      error = scsi_add_host(shost, dev);
++      error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
+       if (error)
+               goto out_put_shost;
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc
+                       fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+       }
+-      if (scsi_add_host(vha->host, &fc_vport->dev)) {
++      if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
++                                 &ha->pdev->dev)) {
+               DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
+                       vha->host_no, vha->vp_idx));
+               goto vport_create_failed_2;
+--- a/drivers/scsi/scsi_lib_dma.c
++++ b/drivers/scsi/scsi_lib_dma.c
+@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
+       int nseg = 0;
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                                 cmd->sc_data_direction);
+@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
+ void scsi_dma_unmap(struct scsi_cmnd *cmd)
+ {
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                            cmd->sc_data_direction);
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -677,6 +677,12 @@ struct Scsi_Host {
+       void *shost_data;
+       /*
++       * Points to the physical bus device we'd use to do DMA
++       * Needed just in case we have virtual hosts.
++       */
++      struct device *dma_dev;
++
++      /*
+        * We should ensure that this is aligned, both for better performance
+        * and also because some compilers (m68k) don't automatically force
+        * alignment to a long boundary.
+@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_H
+ extern void scsi_flush_work(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
++extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
++                                             struct device *,
++                                             struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+ extern void scsi_rescan_device(struct device *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(
+ extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
++static inline int __must_check scsi_add_host(struct Scsi_Host *host,
++                                           struct device *dev)
++{
++      return scsi_add_host_with_dma(host, dev, dev);
++}
++
+ static inline struct device *scsi_get_device(struct Scsi_Host *shost)
+ {
+         return shost->shost_gendev.parent;
+
+
+From linux@linux.site Thu Dec 10 21:25:42 2009
+Message-Id: <20091211052541.550415868@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:15 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Martin Michlmayr <tbm@cyrius.com>,
+ Boaz Harrosh <bharrosh@panasas.com>,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [03/34] SCSI: osd_protocol.h: Add missing #include
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-osd_protocol.h-add-missing-include.patch
+Content-Length: 708
+Lines: 24
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Martin Michlmayr <tbm@cyrius.com>
+
+commit 0899638688f223fd9e9fee60d662665e11693d12 upstream.
+
+include/scsi/osd_protocol.h uses ALIGN() without an #include
+<linux/kernel.h>, leading to:
+| include/scsi/osd_protocol.h:362: error: implicit declaration of function 'ALIGN'
+
+Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/include/scsi/osd_protocol.h
++++ b/include/scsi/osd_protocol.h
+@@ -17,6 +17,7 @@
+ #define __OSD_PROTOCOL_H__
+ #include <linux/types.h>
++#include <linux/kernel.h>
+ #include <asm/unaligned.h>
+ #include <scsi/scsi.h>
+
+
+From linux@linux.site Thu Dec 10 21:25:42 2009
+Message-Id: <20091211052542.045664905@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:16 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [04/34] SCSI: megaraid_sas: fix 64 bit sense pointer truncation
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch
+Content-Length: 1456
+Lines: 47
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Yang, Bo <Bo.Yang@lsi.com>
+
+commit 7b2519afa1abd1b9f63aa1e90879307842422dae upstream.
+
+The current sense pointer is cast to a u32 pointer, which can truncate
+on 64 bits.  Fix by using unsigned long instead.
+
+Signed-off-by Bo Yang<bo.yang@lsi.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/megaraid/megaraid_sas.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/megaraid/megaraid_sas.c
++++ b/drivers/scsi/megaraid/megaraid_sas.c
+@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+       int error = 0, i;
+       void *sense = NULL;
+       dma_addr_t sense_handle;
+-      u32 *sense_ptr;
++      unsigned long *sense_ptr;
+       memset(kbuff_arr, 0, sizeof(kbuff_arr));
+@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+               }
+               sense_ptr =
+-                  (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
++              (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
+               *sense_ptr = sense_handle;
+       }
+@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+                * sense_ptr points to the location that has the user
+                * sense buffer address
+                */
+-              sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
+-                                   ioc->sense_off);
++              sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
++                              ioc->sense_off);
+               if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
+                                sense, ioc->sense_len)) {
+
+
+From linux@linux.site Thu Dec 10 21:25:43 2009
+Message-Id: <20091211052542.664737460@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:17 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Curt Wohlgemuth <curtw@google.com>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [05/34] ext4: fix potential buffer head leak when add_dirent_to_buf() returns ENOSPC
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch
+Content-Length: 3833
+Lines: 118
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 2de770a406b06dfc619faabbf5d85c835ed3f2e1)
+
+Previously add_dirent_to_buf() did not free its passed-in buffer head
+in the case of ENOSPC, since in some cases the caller still needed it.
+However, this led to potential buffer head leaks since not all callers
+dealt with this correctly.  Fix this by making simplifying the freeing
+convention; now add_dirent_to_buf() *never* frees the passed-in buffer
+head, and leaves that to the responsibility of its caller.  This makes
+things cleaner and easier to prove that the code is neither leaking
+buffer heads or calling brelse() one time too many.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Curt Wohlgemuth <curtw@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c |   30 ++++++++++++------------------
+ 1 file changed, 12 insertions(+), 18 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1292,9 +1292,6 @@ errout:
+  * add_dirent_to_buf will attempt search the directory block for
+  * space.  It will return -ENOSPC if no space is available, and -EIO
+  * and -EEXIST if directory entry already exists.
+- *
+- * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+- * all other cases bh is released.
+  */
+ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode, struct ext4_dir_entry_2 *de,
+@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *h
+               top = bh->b_data + blocksize - reclen;
+               while ((char *) de <= top) {
+                       if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
+-                                                bh, offset)) {
+-                              brelse(bh);
++                                                bh, offset))
+                               return -EIO;
+-                      }
+-                      if (ext4_match(namelen, name, de)) {
+-                              brelse(bh);
++                      if (ext4_match(namelen, name, de))
+                               return -EEXIST;
+-                      }
+                       nlen = EXT4_DIR_REC_LEN(de->name_len);
+                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+                       if ((de->inode? rlen - nlen: rlen) >= reclen)
+@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_journal_get_write_access(handle, bh);
+       if (err) {
+               ext4_std_error(dir->i_sb, err);
+-              brelse(bh);
+               return err;
+       }
+@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
+       if (err)
+               ext4_std_error(dir->i_sb, err);
+-      brelse(bh);
+       return 0;
+ }
+@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *ha
+       if (!(de))
+               return retval;
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *hand
+               if(!bh)
+                       return retval;
+               retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-              if (retval != -ENOSPC)
++              if (retval != -ENOSPC) {
++                      brelse(bh);
+                       return retval;
++              }
+               if (blocks == 1 && !dx_fallback &&
+                   EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
+@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *hand
+       de = (struct ext4_dir_entry_2 *) bh->b_data;
+       de->inode = 0;
+       de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *h
+               goto journal_error;
+       err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-      if (err != -ENOSPC) {
+-              bh = NULL;
++      if (err != -ENOSPC)
+               goto cleanup;
+-      }
+       /* Block full, should compress but for now just split */
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *h
+       if (!de)
+               goto cleanup;
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+-      bh = NULL;
+       goto cleanup;
+ journal_error:
+
+
+From linux@linux.site Thu Dec 10 21:25:43 2009
+Message-Id: <20091211052543.277851362@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:18 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [06/34] ext4: avoid divide by zero when trying to mount a corrupted file system
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch
+Content-Length: 1267
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 503358ae01b70ce6909d19dd01287093f6b6271c)
+
+If s_log_groups_per_flex is greater than 31, then groups_per_flex will
+will overflow and cause a divide by zero error.  This can cause kernel
+BUG if such a file system is mounted.
+
+Thanks to Nageswara R Sastry for analyzing the failure and providing
+an initial patch.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=14287
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1673,14 +1673,14 @@ static int ext4_fill_flex_info(struct su
+       size_t size;
+       int i;
+-      if (!sbi->s_es->s_log_groups_per_flex) {
++      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
++      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
++
++      if (groups_per_flex < 2) {
+               sbi->s_log_groups_per_flex = 0;
+               return 1;
+       }
+-      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+-      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+-
+       /* We allocate both existing and potentially added groups */
+       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+                       ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
+
+
+From linux@linux.site Thu Dec 10 21:25:44 2009
+Message-Id: <20091211052543.772152436@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:19 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [07/34] ext4: fix the returned block count if EXT4_IOC_MOVE_EXT fails
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch
+Content-Length: 10970
+Lines: 349
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit f868a48d06f8886cb0367568a12367fa4f21ea0d)
+
+If the EXT4_IOC_MOVE_EXT ioctl fails, the number of blocks that were
+exchanged before the failure should be returned to the userspace
+caller.  Unfortunately, currently if the block size is not the same as
+the page size, the returned block count that is returned is the
+page-aligned block count instead of the actual block count.  This
+commit addresses this bug.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  139 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 73 insertions(+), 66 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -661,6 +661,7 @@ mext_calc_swap_extents(struct ext4_exten
+  * @donor_inode:      donor inode
+  * @from:             block offset of orig_inode
+  * @count:            block count to be replaced
++ * @err:              pointer to save return value
+  *
+  * Replace original inode extents and donor inode extents page by page.
+  * We implement this replacement in the following three steps:
+@@ -671,19 +672,18 @@ mext_calc_swap_extents(struct ext4_exten
+  * 3. Change the block information of donor inode to point at the saved
+  *    original inode blocks in the dummy extents.
+  *
+- * Return 0 on success, or a negative error value on failure.
++ * Return replaced block count.
+  */
+ static int
+ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+                          struct inode *donor_inode, ext4_lblk_t from,
+-                         ext4_lblk_t count)
++                         ext4_lblk_t count, int *err)
+ {
+       struct ext4_ext_path *orig_path = NULL;
+       struct ext4_ext_path *donor_path = NULL;
+       struct ext4_extent *oext, *dext;
+       struct ext4_extent tmp_dext, tmp_oext;
+       ext4_lblk_t orig_off = from, donor_off = from;
+-      int err = 0;
+       int depth;
+       int replaced_count = 0;
+       int dext_alen;
+@@ -691,13 +691,13 @@ mext_replace_branches(handle_t *handle,
+       mext_double_down_write(orig_inode, donor_inode);
+       /* Get the original extent for the block "orig_off" */
+-      err = get_ext_path(orig_inode, orig_off, &orig_path);
+-      if (err)
++      *err = get_ext_path(orig_inode, orig_off, &orig_path);
++      if (*err)
+               goto out;
+       /* Get the donor extent for the head */
+-      err = get_ext_path(donor_inode, donor_off, &donor_path);
+-      if (err)
++      *err = get_ext_path(donor_inode, donor_off, &donor_path);
++      if (*err)
+               goto out;
+       depth = ext_depth(orig_inode);
+       oext = orig_path[depth].p_ext;
+@@ -707,9 +707,9 @@ mext_replace_branches(handle_t *handle,
+       dext = donor_path[depth].p_ext;
+       tmp_dext = *dext;
+-      err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++      *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                     donor_off, count);
+-      if (err)
++      if (*err)
+               goto out;
+       /* Loop for the donor extents */
+@@ -718,7 +718,7 @@ mext_replace_branches(handle_t *handle,
+               if (!dext) {
+                       ext4_error(donor_inode->i_sb, __func__,
+                                  "The extent for donor must be found");
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
+                       ext4_error(donor_inode->i_sb, __func__,
+@@ -726,20 +726,20 @@ mext_replace_branches(handle_t *handle,
+                               "extent(%u) should be equal",
+                               donor_off,
+                               le32_to_cpu(tmp_dext.ee_block));
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               }
+               /* Set donor extent to orig extent */
+-              err = mext_leaf_block(handle, orig_inode,
++              *err = mext_leaf_block(handle, orig_inode,
+                                          orig_path, &tmp_dext, &orig_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               /* Set orig extent to donor extent */
+-              err = mext_leaf_block(handle, donor_inode,
++              *err = mext_leaf_block(handle, donor_inode,
+                                          donor_path, &tmp_oext, &donor_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+@@ -753,35 +753,25 @@ mext_replace_branches(handle_t *handle,
+               if (orig_path)
+                       ext4_ext_drop_refs(orig_path);
+-              err = get_ext_path(orig_inode, orig_off, &orig_path);
+-              if (err)
++              *err = get_ext_path(orig_inode, orig_off, &orig_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(orig_inode);
+               oext = orig_path[depth].p_ext;
+-              if (le32_to_cpu(oext->ee_block) +
+-                              ext4_ext_get_actual_len(oext) <= orig_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_oext = *oext;
+               if (donor_path)
+                       ext4_ext_drop_refs(donor_path);
+-              err = get_ext_path(donor_inode, donor_off, &donor_path);
+-              if (err)
++              *err = get_ext_path(donor_inode, donor_off, &donor_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(donor_inode);
+               dext = donor_path[depth].p_ext;
+-              if (le32_to_cpu(dext->ee_block) +
+-                              ext4_ext_get_actual_len(dext) <= donor_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_dext = *dext;
+-              err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++              *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                          donor_off, count - replaced_count);
+-              if (err)
++              if (*err)
+                       goto out;
+       }
+@@ -796,7 +786,7 @@ out:
+       }
+       mext_double_up_write(orig_inode, donor_inode);
+-      return err;
++      return replaced_count;
+ }
+ /**
+@@ -808,16 +798,17 @@ out:
+  * @data_offset_in_page:      block index where data swapping starts
+  * @block_len_in_page:                the number of blocks to be swapped
+  * @uninit:                   orig extent is uninitialized or not
++ * @err:                      pointer to save return value
+  *
+  * Save the data in original inode blocks and replace original inode extents
+  * with donor inode extents by calling mext_replace_branches().
+- * Finally, write out the saved data in new original inode blocks. Return 0
+- * on success, or a negative error value on failure.
++ * Finally, write out the saved data in new original inode blocks. Return
++ * replaced block count.
+  */
+ static int
+ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                 pgoff_t orig_page_offset, int data_offset_in_page,
+-                int block_len_in_page, int uninit)
++                int block_len_in_page, int uninit, int *err)
+ {
+       struct inode *orig_inode = o_filp->f_dentry->d_inode;
+       struct address_space *mapping = orig_inode->i_mapping;
+@@ -829,9 +820,11 @@ move_extent_per_page(struct file *o_filp
+       long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+       unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+       unsigned int w_flags = 0;
+-      unsigned int tmp_data_len, data_len;
++      unsigned int tmp_data_size, data_size, replaced_size;
+       void *fsdata;
+-      int ret, i, jblocks;
++      int i, jblocks;
++      int err2 = 0;
++      int replaced_count = 0;
+       int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+       /*
+@@ -841,8 +834,8 @@ move_extent_per_page(struct file *o_filp
+       jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+       handle = ext4_journal_start(orig_inode, jblocks);
+       if (IS_ERR(handle)) {
+-              ret = PTR_ERR(handle);
+-              return ret;
++              *err = PTR_ERR(handle);
++              return 0;
+       }
+       if (segment_eq(get_fs(), KERNEL_DS))
+@@ -858,9 +851,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              ret = mext_replace_branches(handle, orig_inode,
+-                                               donor_inode, orig_blk_offset,
+-                                               block_len_in_page);
++              replaced_count = mext_replace_branches(handle, orig_inode,
++                                              donor_inode, orig_blk_offset,
++                                              block_len_in_page, err);
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+@@ -870,27 +863,28 @@ move_extent_per_page(struct file *o_filp
+       offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+-      /* Calculate data_len */
++      /* Calculate data_size */
+       if ((orig_blk_offset + block_len_in_page - 1) ==
+           ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+               /* Replace the last block */
+-              tmp_data_len = orig_inode->i_size & (blocksize - 1);
++              tmp_data_size = orig_inode->i_size & (blocksize - 1);
+               /*
+-               * If data_len equal zero, it shows data_len is multiples of
++               * If data_size equal zero, it shows data_size is multiples of
+                * blocksize. So we set appropriate value.
+                */
+-              if (tmp_data_len == 0)
+-                      tmp_data_len = blocksize;
++              if (tmp_data_size == 0)
++                      tmp_data_size = blocksize;
+-              data_len = tmp_data_len +
++              data_size = tmp_data_size +
+                       ((block_len_in_page - 1) << orig_inode->i_blkbits);
+-      } else {
+-              data_len = block_len_in_page << orig_inode->i_blkbits;
+-      }
++      } else
++              data_size = block_len_in_page << orig_inode->i_blkbits;
++
++      replaced_size = data_size;
+-      ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
++      *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
+                                &page, &fsdata);
+-      if (unlikely(ret < 0))
++      if (unlikely(*err < 0))
+               goto out;
+       if (!PageUptodate(page)) {
+@@ -911,10 +905,17 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      ret = mext_replace_branches(handle, orig_inode, donor_inode,
+-                                       orig_blk_offset, block_len_in_page);
+-      if (ret < 0)
+-              goto out;
++      replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
++                                      orig_blk_offset, block_len_in_page,
++                                      &err2);
++      if (err2) {
++              if (replaced_count) {
++                      block_len_in_page = replaced_count;
++                      replaced_size =
++                              block_len_in_page << orig_inode->i_blkbits;
++              } else
++                      goto out;
++      }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+@@ -928,16 +929,16 @@ move_extent_per_page(struct file *o_filp
+               bh = bh->b_this_page;
+       for (i = 0; i < block_len_in_page; i++) {
+-              ret = ext4_get_block(orig_inode,
++              *err = ext4_get_block(orig_inode,
+                               (sector_t)(orig_blk_offset + i), bh, 0);
+-              if (ret < 0)
++              if (*err < 0)
+                       goto out;
+               if (bh->b_this_page != NULL)
+                       bh = bh->b_this_page;
+       }
+-      ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
++      *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
+                              page, fsdata);
+       page = NULL;
+@@ -951,7 +952,10 @@ out:
+ out2:
+       ext4_journal_stop(handle);
+-      return ret < 0 ? ret : 0;
++      if (err2)
++              *err = err2;
++
++      return replaced_count;
+ }
+ /**
+@@ -1367,15 +1371,17 @@ ext4_move_extents(struct file *o_filp, s
+               while (orig_page_offset <= seq_end_page) {
+                       /* Swap original branches with new branches */
+-                      ret1 = move_extent_per_page(o_filp, donor_inode,
++                      block_len_in_page = move_extent_per_page(
++                                              o_filp, donor_inode,
+                                               orig_page_offset,
+                                               data_offset_in_page,
+-                                              block_len_in_page, uninit);
+-                      if (ret1 < 0)
+-                              goto out;
+-                      orig_page_offset++;
++                                              block_len_in_page, uninit,
++                                              &ret1);
++
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
++                      if (ret1 < 0)
++                              goto out;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+@@ -1385,6 +1391,7 @@ ext4_move_extents(struct file *o_filp, s
+                               goto out;
+                       }
++                      orig_page_offset++;
+                       data_offset_in_page = 0;
+                       rest_blocks -= block_len_in_page;
+                       if (rest_blocks > blocks_per_page)
+
+
+From linux@linux.site Thu Dec 10 21:25:44 2009
+Message-Id: <20091211052544.287395070@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:20 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [08/34] ext4: fix lock order problem in ext4_move_extents()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch
+Content-Length: 10372
+Lines: 310
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit fc04cb49a898c372a22b21fffc47f299d8710801)
+
+ext4_move_extents() checks the logical block contiguousness
+of original file with ext4_find_extent() and mext_next_extent().
+Therefore the extent which ext4_ext_path structure indicates
+must not be changed between above functions.
+
+But in current implementation, there is no i_data_sem protection
+between ext4_ext_find_extent() and mext_next_extent().  So the extent
+which ext4_ext_path structure indicates may be overwritten by
+delalloc.  As a result, ext4_move_extents() will exchange wrong blocks
+between original and donor files.  I change the place where
+acquire/release i_data_sem to solve this problem.
+
+Moreover, I changed move_extent_per_page() to start transaction first,
+and then acquire i_data_sem.  Without this change, there is a
+possibility of the deadlock between mmap() and ext4_move_extents():
+
+* NOTE: "A", "B" and "C" mean different processes
+
+A-1: ext4_ext_move_extents() acquires i_data_sem of two inodes.
+
+B:   do_page_fault() starts the transaction (T),
+     and then tries to acquire i_data_sem.
+     But process "A" is already holding it, so it is kept waiting.
+
+C:   While "A" and "B" running, kjournald2 tries to commit transaction (T)
+     but it is under updating, so kjournald2 waits for it.
+
+A-2: Call ext4_journal_start with holding i_data_sem,
+     but transaction (T) is locked.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  117 ++++++++++++++++++++++----------------------------
+ 1 file changed, 53 insertions(+), 64 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -77,12 +77,14 @@ static int
+ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+                     struct ext4_extent **extent)
+ {
++      struct ext4_extent_header *eh;
+       int ppos, leaf_ppos = path->p_depth;
+       ppos = leaf_ppos;
+       if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+               /* leaf block */
+               *extent = ++path[ppos].p_ext;
++              path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+               return 0;
+       }
+@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, st
+                                       ext_block_hdr(path[cur_ppos+1].p_bh);
+                       }
++                      path[leaf_ppos].p_ext = *extent = NULL;
++
++                      eh = path[leaf_ppos].p_hdr;
++                      if (le16_to_cpu(eh->eh_entries) == 0)
++                              /* empty leaf is found */
++                              return -ENODATA;
++
+                       /* leaf block */
+                       path[leaf_ppos].p_ext = *extent =
+                               EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
++                      path[leaf_ppos].p_block =
++                                      ext_pblock(path[leaf_ppos].p_ext);
+                       return 0;
+               }
+       }
+@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inod
+ }
+ /**
+- * mext_double_down_read - Acquire two inodes' read semaphore
+- *
+- * @orig_inode:               original inode structure
+- * @donor_inode:      donor inode structure
+- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+- */
+-static void
+-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      struct inode *first = orig_inode, *second = donor_inode;
+-
+-      /*
+-       * Use the inode number to provide the stable locking order instead
+-       * of its address, because the C language doesn't guarantee you can
+-       * compare pointers that don't come from the same array.
+-       */
+-      if (donor_inode->i_ino < orig_inode->i_ino) {
+-              first = donor_inode;
+-              second = orig_inode;
+-      }
+-
+-      down_read(&EXT4_I(first)->i_data_sem);
+-      down_read(&EXT4_I(second)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_down_write - Acquire two inodes' write semaphore
++ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure
+  * @donor_inode:      donor inode structure
+- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
++ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
++ * i_ino order.
+  */
+ static void
+-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
++double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       struct inode *first = orig_inode, *second = donor_inode;
+@@ -207,28 +193,14 @@ mext_double_down_write(struct inode *ori
+ }
+ /**
+- * mext_double_up_read - Release two inodes' read semaphore
++ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure to be released its lock first
+  * @donor_inode:      donor inode structure to be released its lock second
+- * Release read semaphore of two inodes (orig and donor).
++ * Release write lock of i_data_sem of two inodes (orig and donor).
+  */
+ static void
+-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      up_read(&EXT4_I(orig_inode)->i_data_sem);
+-      up_read(&EXT4_I(donor_inode)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_up_write - Release two inodes' write semaphore
+- *
+- * @orig_inode:               original inode structure to be released its lock first
+- * @donor_inode:      donor inode structure to be released its lock second
+- * Release write semaphore of two inodes (orig and donor).
+- */
+-static void
+-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
++double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       up_write(&EXT4_I(orig_inode)->i_data_sem);
+       up_write(&EXT4_I(donor_inode)->i_data_sem);
+@@ -688,8 +660,6 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
+-      mext_double_down_write(orig_inode, donor_inode);
+-
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -785,7 +755,6 @@ out:
+               kfree(donor_path);
+       }
+-      mext_double_up_write(orig_inode, donor_inode);
+       return replaced_count;
+ }
+@@ -851,6 +820,11 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
++              /*
++               * Protect extent trees against block allocations
++               * via delalloc
++               */
++              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+@@ -858,6 +832,7 @@ move_extent_per_page(struct file *o_filp
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+               ext4_ext_invalidate_cache(donor_inode);
++              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -905,6 +880,8 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -913,14 +890,18 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else
++              } else {
++                      double_up_write_data_sem(orig_inode, donor_inode);
+                       goto out;
++              }
+       }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+       ext4_ext_invalidate_cache(donor_inode);
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+@@ -1236,16 +1217,16 @@ ext4_move_extents(struct file *o_filp, s
+               return -EINVAL;
+       }
+-      /* protect orig and donor against a truncate */
++      /* Protect orig and donor inodes against a truncate */
+       ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+       if (ret1 < 0)
+               return ret1;
+-      mext_double_down_read(orig_inode, donor_inode);
++      /* Protect extent tree against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+                                       donor_start, &len, *moved_len);
+-      mext_double_up_read(orig_inode, donor_inode);
+       if (ret1)
+               goto out;
+@@ -1308,6 +1289,10 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
++      /* Discard preallocations of two inodes */
++      ext4_discard_preallocations(orig_inode);
++      ext4_discard_preallocations(donor_inode);
++
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1359,14 +1344,14 @@ ext4_move_extents(struct file *o_filp, s
+               seq_start = le32_to_cpu(ext_cur->ee_block);
+               rest_blocks = seq_blocks;
+-              /* Discard preallocations of two inodes */
+-              down_write(&EXT4_I(orig_inode)->i_data_sem);
+-              ext4_discard_preallocations(orig_inode);
+-              up_write(&EXT4_I(orig_inode)->i_data_sem);
+-
+-              down_write(&EXT4_I(donor_inode)->i_data_sem);
+-              ext4_discard_preallocations(donor_inode);
+-              up_write(&EXT4_I(donor_inode)->i_data_sem);
++              /*
++               * Up semaphore to avoid following problems:
++               * a. transaction deadlock among ext4_journal_start,
++               *    ->write_begin via pagefault, and jbd2_journal_commit
++               * b. racing with ->readpage, ->write_begin, and ext4_get_block
++               *    in move_extent_per_page
++               */
++              double_up_write_data_sem(orig_inode, donor_inode);
+               while (orig_page_offset <= seq_end_page) {
+@@ -1381,14 +1366,14 @@ ext4_move_extents(struct file *o_filp, s
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
+                       if (ret1 < 0)
+-                              goto out;
++                              break;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+                                       "sum of replaced: %llu requested: %llu",
+                                       *moved_len, len);
+                               ret1 = -EIO;
+-                              goto out;
++                              break;
+                       }
+                       orig_page_offset++;
+@@ -1400,6 +1385,10 @@ ext4_move_extents(struct file *o_filp, s
+                               block_len_in_page = rest_blocks;
+               }
++              double_down_write_data_sem(orig_inode, donor_inode);
++              if (ret1 < 0)
++                      break;
++
+               /* Decrease buffer counter */
+               if (holecheck_path)
+                       ext4_ext_drop_refs(holecheck_path);
+@@ -1429,7 +1418,7 @@ out:
+               ext4_ext_drop_refs(holecheck_path);
+               kfree(holecheck_path);
+       }
+-
++      double_up_write_data_sem(orig_inode, donor_inode);
+       ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
+       if (ret1)
+
+
+From linux@linux.site Thu Dec 10 21:25:45 2009
+Message-Id: <20091211052544.890897126@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:21 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [09/34] ext4: fix possible recursive locking warning in EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch
+Content-Length: 1075
+Lines: 32
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 49bd22bc4d603a2a4fc2a6a60e156cbea52eb494)
+
+If CONFIG_PROVE_LOCKING is enabled, the double_down_write_data_sem()
+will trigger a false-positive warning of a recursive lock.  Since we
+take i_data_sem for the two inodes ordered by their inode numbers,
+this isn't a problem.  Use of down_write_nested() will notify the lock
+dependency checker machinery that there is no problem here.
+
+This problem was reported by Brian Rogers:
+
+       http://marc.info/?l=linux-ext4&m=125115356928011&w=1
+
+Reported-by: Brian Rogers <brian@xyzw.org>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -189,7 +189,7 @@ double_down_write_data_sem(struct inode
+       }
+       down_write(&EXT4_I(first)->i_data_sem);
+-      down_write(&EXT4_I(second)->i_data_sem);
++      down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:45 2009
+Message-Id: <20091211052545.443549269@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:22 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [10/34] ext4: plug a buffer_head leak in an error path of ext4_iget()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch
+Content-Length: 2427
+Lines: 82
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 567f3e9a70d71e5c9be03701b8578be77857293b)
+
+One of the invalid error paths in ext4_iget() forgot to brelse() the
+inode buffer head.  Fix it by adding a brelse() in the common error
+return path, which also simplifies function.
+
+Thanks to Andi Kleen <ak@linux.intel.com> reporting the problem.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4781,7 +4781,6 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+-      struct buffer_head *bh;
+       struct inode *inode;
+       long ret;
+       int block;
+@@ -4793,11 +4792,11 @@ struct inode *ext4_iget(struct super_blo
+               return inode;
+       ei = EXT4_I(inode);
++      iloc.bh = 0;
+       ret = __ext4_get_inode_loc(inode, &iloc, 0);
+       if (ret < 0)
+               goto bad_inode;
+-      bh = iloc.bh;
+       raw_inode = ext4_raw_inode(&iloc);
+       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+@@ -4820,7 +4819,6 @@ struct inode *ext4_iget(struct super_blo
+               if (inode->i_mode == 0 ||
+                   !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+                       /* this inode is deleted */
+-                      brelse(bh);
+                       ret = -ESTALE;
+                       goto bad_inode;
+               }
+@@ -4852,7 +4850,6 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+-                      brelse(bh);
+                       ret = -EIO;
+                       goto bad_inode;
+               }
+@@ -4905,10 +4902,8 @@ struct inode *ext4_iget(struct super_blo
+               /* Validate block references which are part of inode */
+               ret = ext4_check_inode_blockref(inode);
+       }
+-      if (ret) {
+-              brelse(bh);
++      if (ret)
+               goto bad_inode;
+-      }
+       if (S_ISREG(inode->i_mode)) {
+               inode->i_op = &ext4_file_inode_operations;
+@@ -4936,7 +4931,6 @@ struct inode *ext4_iget(struct super_blo
+                       init_special_inode(inode, inode->i_mode,
+                          new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+       } else {
+-              brelse(bh);
+               ret = -EIO;
+               ext4_error(inode->i_sb, __func__,
+                          "bogus i_mode (%o) for inode=%lu",
+@@ -4949,6 +4943,7 @@ struct inode *ext4_iget(struct super_blo
+       return inode;
+ bad_inode:
++      brelse(iloc.bh);
+       iget_failed(inode);
+       return ERR_PTR(ret);
+ }
+
+
+From linux@linux.site Thu Dec 10 21:25:46 2009
+Message-Id: <20091211052545.995802406@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:23 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [11/34] ext4: make sure directory and symlink blocks are revoked
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch
+Content-Length: 2052
+Lines: 58
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 50689696867d95b38d9c7be640a311494a04fb86)
+
+When an inode gets unlinked, the functions ext4_clear_blocks() and
+ext4_remove_blocks() call ext4_forget() for all the buffer heads
+corresponding to the deleted inode's data blocks.  If the inode is a
+directory or a symlink, the is_metadata parameter must be non-zero so
+ext4_forget() will revoke them via jbd2_journal_revoke().  Otherwise,
+if these blocks are reused for a data file, and the system crashes
+before a journal checkpoint, the journal replay could end up
+corrupting these data blocks.
+
+Thanks to Curt Wohlgemuth for pointing out potential problems in this
+area.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    2 +-
+ fs/ext4/inode.c   |    6 ++++--
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2074,7 +2074,7 @@ static int ext4_remove_blocks(handle_t *
+               ext_debug("free last %u blocks starting %llu\n", num, start);
+               for (i = 0; i < num; i++) {
+                       bh = sb_find_get_block(inode->i_sb, start + i);
+-                      ext4_forget(handle, 0, inode, bh, start + i);
++                      ext4_forget(handle, metadata, inode, bh, start + i);
+               }
+               ext4_free_blocks(handle, inode, start, num, metadata);
+       } else if (from == le32_to_cpu(ex->ee_block)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4120,6 +4120,8 @@ static void ext4_clear_blocks(handle_t *
+                             __le32 *last)
+ {
+       __le32 *p;
++      int     is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
++
+       if (try_to_extend_transaction(handle, inode)) {
+               if (bh) {
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+@@ -4150,11 +4152,11 @@ static void ext4_clear_blocks(handle_t *
+                       *p = 0;
+                       tbh = sb_find_get_block(inode->i_sb, nr);
+-                      ext4_forget(handle, 0, inode, tbh, nr);
++                      ext4_forget(handle, is_metadata, inode, tbh, nr);
+               }
+       }
+-      ext4_free_blocks(handle, inode, block_to_free, count, 0);
++      ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:47 2009
+Message-Id: <20091211052546.544464652@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:24 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Julia Lawall <julia@diku.dk>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [12/34] ext4: fix i_flags access in ext4_da_writepages_trans_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch
+Content-Length: 846
+Lines: 25
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 30c6e07a92ea4cb87160d32ffa9bce172576ae4c)
+
+We need to be testing the i_flags field in the ext4 specific portion
+of the inode, instead of the (confusingly aliased) i_flags field in
+the generic struct inode.
+
+Signed-off-by: Julia Lawall <julia@diku.dk>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2788,7 +2788,7 @@ static int ext4_da_writepages_trans_bloc
+        * number of contiguous block. So we will limit
+        * number of contiguous block to a sane value
+        */
+-      if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
++      if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+           (max_blocks > EXT4_MAX_TRANS_DATA))
+               max_blocks = EXT4_MAX_TRANS_DATA;
+
+
+From linux@linux.site Thu Dec 10 21:25:47 2009
+Message-Id: <20091211052547.065677730@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:25 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [13/34] ext4: journal all modifications in ext4_xattr_set_handle
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch
+Content-Length: 1254
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 86ebfd08a1930ccedb8eac0aeb1ed4b8b6a41dbc)
+
+ext4_xattr_set_handle() was zeroing out an inode outside
+of journaling constraints; this is one of the accesses that
+was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Reviewed-by: Andreas Dilger <adilger@sun.com>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/xattr.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle,
+       if (error)
+               goto cleanup;
++      error = ext4_journal_get_write_access(handle, is.iloc.bh);
++      if (error)
++              goto cleanup;
++
+       if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+               struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+               memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle,
+               if (flags & XATTR_CREATE)
+                       goto cleanup;
+       }
+-      error = ext4_journal_get_write_access(handle, is.iloc.bh);
+-      if (error)
+-              goto cleanup;
+       if (!value) {
+               if (!is.s.not_found)
+                       error = ext4_xattr_ibody_set(handle, inode, &i, &is);
+
+
+From linux@linux.site Thu Dec 10 21:25:48 2009
+Message-Id: <20091211052547.644399594@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:26 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [14/34] ext4: dont update the superblock in ext4_statfs()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch
+Content-Length: 1341
+Lines: 31
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 3f8fb9490efbd300887470a2a880a64e04dcc3f5)
+
+commit a71ce8c6c9bf269b192f352ea555217815cf027e updated ext4_statfs()
+to update the on-disk superblock counters, but modified this buffer
+directly without any journaling of the change.  This is one of the
+accesses that was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3668,13 +3668,11 @@ static int ext4_statfs(struct dentry *de
+       buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+       buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+                      percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+-      ext4_free_blocks_count_set(es, buf->f_bfree);
+       buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+       if (buf->f_bfree < ext4_r_blocks_count(es))
+               buf->f_bavail = 0;
+       buf->f_files = le32_to_cpu(es->s_inodes_count);
+       buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
+-      es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
+       buf->f_namelen = EXT4_NAME_LEN;
+       fsid = le64_to_cpup((void *)es->s_uuid) ^
+              le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+
+
+From linux@linux.site Thu Dec 10 21:25:48 2009
+Message-Id: <20091211052548.201782286@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:27 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [15/34] ext4: fix uninit block bitmap initialization when s_meta_first_bg is non-zero
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch
+Content-Length: 875
+Lines: 29
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 8dadb198cb70ef811916668fe67eeec82e8858dd)
+
+The number of old-style block group descriptor blocks is
+s_meta_first_bg when the meta_bg feature flag is set.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/balloc.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_met
+ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+-      return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
++      if (!ext4_bg_has_super(sb, group))
++              return 0;
++
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
++              return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
++      else
++              return EXT4_SB(sb)->s_gdb_count;
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:49 2009
+Message-Id: <20091211052548.726431621@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:28 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [16/34] ext4: fix block validity checks so they work correctly with meta_bg
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch
+Content-Length: 1411
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 1032988c71f3f85483b2b4319684d1205a704c02)
+
+The block validity checks used by ext4_data_block_valid() wasn't
+correctly written to check file systems with the meta_bg feature.  Fix
+this.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/block_validity.c |    2 +-
+ fs/ext4/inode.c          |    5 +----
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_
+               if (ext4_bg_has_super(sb, i) &&
+                   ((i < 5) || ((i % flex_size) == 0)))
+                       add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+-                                      sbi->s_gdb_count + 1);
++                                      ext4_bg_num_gdb(sb, i) + 1);
+               gdp = ext4_get_group_desc(sb, i, NULL);
+               ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+               if (ret)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4883,10 +4883,7 @@ struct inode *ext4_iget(struct super_blo
+       ret = 0;
+       if (ei->i_file_acl &&
+-          ((ei->i_file_acl <
+-            (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+-             EXT4_SB(sb)->s_gdb_count)) ||
+-           (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++          !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+               ext4_error(sb, __func__,
+                          "bad extended attribute block %llu in inode #%lu",
+                          ei->i_file_acl, inode->i_ino);
+
+
+From linux@linux.site Thu Dec 10 21:25:49 2009
+Message-Id: <20091211052549.341684525@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:29 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Jan Kara <jack@suse.cz>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [17/34] ext4: avoid issuing unnecessary barriers
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0013-ext4-avoid-issuing-unnecessary-barriers.patch
+Content-Length: 1115
+Lines: 37
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 6b17d902fdd241adfa4ce780df20547b28bf5801)
+
+We don't to issue an I/O barrier on an error or if we force commit
+because we are doing data journaling.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/fsync.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -60,7 +60,7 @@ int ext4_sync_file(struct file *file, st
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+-              goto out;
++              return ret;
+       /*
+        * data=writeback:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+@@ -79,10 +79,8 @@ int ext4_sync_file(struct file *file, st
+        *  (they were dirtied by commit).  But that's OK - the blocks are
+        *  safe in-journal, which is all fsync() needs to ensure.
+        */
+-      if (ext4_should_journal_data(inode)) {
+-              ret = ext4_force_commit(inode->i_sb);
+-              goto out;
+-      }
++      if (ext4_should_journal_data(inode))
++              return ext4_force_commit(inode->i_sb);
+       if (!journal)
+               ret = sync_mapping_buffers(inode->i_mapping);
+
+
+From linux@linux.site Thu Dec 10 21:25:50 2009
+Message-Id: <20091211052549.883933582@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:30 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [18/34] ext4: fix error handling in ext4_ind_get_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch
+Content-Length: 733
+Lines: 25
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 2bba702d4f88d7b010ec37e2527b552588404ae7)
+
+When an error happened in ext4_splice_branch we failed to notice that
+in ext4_ind_get_blocks and mapped the buffer anyway. Fix the problem
+by checking for error properly.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1021,7 +1021,7 @@ static int ext4_ind_get_blocks(handle_t
+       if (!err)
+               err = ext4_splice_branch(handle, inode, iblock,
+                                        partial, indirect_blks, count);
+-      else
++      if (err)
+               goto cleanup;
+       set_buffer_new(bh_result);
+
+
+From linux@linux.site Thu Dec 10 21:25:50 2009
+Message-Id: <20091211052550.441771142@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:31 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [19/34] ext4: make trim/discard optional (and off by default)
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0015-ext4-make-trim-discard-optional-and-off-by-default.patch
+Content-Length: 4275
+Lines: 124
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 5328e635315734d42080de9a5a1ee87bf4cae0a4)
+
+It is anticipated that when sb_issue_discard starts doing
+real work on trim-capable devices, we may see issues.  Make
+this mount-time optional, and default it to off until we know
+that things are working out OK.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    6 ++++++
+ fs/ext4/ext4.h                     |    1 +
+ fs/ext4/mballoc.c                  |   21 +++++++++++++--------
+ fs/ext4/super.c                    |   14 +++++++++++++-
+ 4 files changed, 33 insertions(+), 9 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -353,6 +353,12 @@ noauto_da_alloc           replacing existing file
+                       system crashes before the delayed allocation
+                       blocks are forced to disk.
++discard               Controls whether ext4 should issue discard/TRIM
++nodiscard(*)          commands to the underlying block device when
++                      blocks are freed.  This is useful for SSD devices
++                      and sparse/thinly-provisioned LUNs, but it is off
++                      by default until sufficient testing has been done.
++
+ Data Mode
+ =========
+ There are 3 different data modes:
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -750,6 +750,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_DELALLOC           0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT     0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY     0x20000000 /* Block validity checking */
++#define EXT4_MOUNT_DISCARD            0x40000000 /* Issue DISCARD requests */
+ #define clear_opt(o, opt)             o &= ~EXT4_MOUNT_##opt
+ #define set_opt(o, opt)                       o |= EXT4_MOUNT_##opt
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(jou
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+       struct ext4_free_data *entry;
+-      ext4_fsblk_t discard_block;
+       struct list_head *l, *ltmp;
+       list_for_each_safe(l, ltmp, &txn->t_private_list) {
+@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(jou
+                       page_cache_release(e4b.bd_bitmap_page);
+               }
+               ext4_unlock_group(sb, entry->group);
+-              discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+-                      + entry->start_blk
+-                      + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+-              trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+-                                        entry->count);
+-              sb_issue_discard(sb, discard_block, entry->count);
+-
++              if (test_opt(sb, DISCARD)) {
++                      ext4_fsblk_t discard_block;
++                      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++
++                      discard_block = (ext4_fsblk_t)entry->group *
++                                              EXT4_BLOCKS_PER_GROUP(sb)
++                                      + entry->start_blk
++                                      + le32_to_cpu(es->s_first_data_block);
++                      trace_ext4_discard_blocks(sb,
++                                      (unsigned long long)discard_block,
++                                      entry->count);
++                      sb_issue_discard(sb, discard_block, entry->count);
++              }
+               kmem_cache_free(ext4_free_ext_cachep, entry);
+               ext4_mb_release_desc(&e4b);
+       }
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -899,6 +899,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, NO_AUTO_DA_ALLOC))
+               seq_puts(seq, ",noauto_da_alloc");
++      if (test_opt(sb, DISCARD))
++              seq_puts(seq, ",discard");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1079,7 +1082,8 @@ enum {
+       Opt_usrquota, Opt_grpquota, Opt_i_version,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_block_validity, Opt_noblock_validity,
+-      Opt_inode_readahead_blks, Opt_journal_ioprio
++      Opt_inode_readahead_blks, Opt_journal_ioprio,
++      Opt_discard, Opt_nodiscard,
+ };
+ static const match_table_t tokens = {
+@@ -1144,6 +1148,8 @@ static const match_table_t tokens = {
+       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
++      {Opt_discard, "discard"},
++      {Opt_nodiscard, "nodiscard"},
+       {Opt_err, NULL},
+ };
+@@ -1565,6 +1571,12 @@ set_qf_format:
+                       else
+                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+                       break;
++              case Opt_discard:
++                      set_opt(sbi->s_mount_opt, DISCARD);
++                      break;
++              case Opt_nodiscard:
++                      clear_opt(sbi->s_mount_opt, DISCARD);
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+
+
+From linux@linux.site Thu Dec 10 21:25:51 2009
+Message-Id: <20091211052551.004437667@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:32 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [20/34] ext4: make "norecovery" an alias for "noload"
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0016-ext4-make-norecovery-an-alias-for-noload.patch
+Content-Length: 1856
+Lines: 53
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit e3bb52ae2bb9573e84c17b8e3560378d13a5c798)
+
+Users on the linux-ext4 list recently complained about differences
+across filesystems w.r.t. how to mount without a journal replay.
+
+In the discussion it was noted that xfs's "norecovery" option is
+perhaps more descriptively accurate than "noload," so let's make
+that an alias for ext4.
+
+Also show this status in /proc/mounts
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    4 ++--
+ fs/ext4/super.c                    |    4 ++++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -153,8 +153,8 @@ journal_dev=devnum When the external jou
+                       identified through its new major/minor numbers encoded
+                       in devnum.
+-noload                        Don't load the journal on mounting.  Note that
+-                      if the filesystem was not unmounted cleanly,
++norecovery            Don't load the journal on mounting.  Note that
++noload                        if the filesystem was not unmounted cleanly,
+                       skipping the journal replay will lead to the
+                       filesystem containing inconsistencies that can
+                       lead to any number of problems.
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -902,6 +902,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, DISCARD))
+               seq_puts(seq, ",discard");
++      if (test_opt(sb, NOLOAD))
++              seq_puts(seq, ",norecovery");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1108,6 +1111,7 @@ static const match_table_t tokens = {
+       {Opt_acl, "acl"},
+       {Opt_noacl, "noacl"},
+       {Opt_noload, "noload"},
++      {Opt_noload, "norecovery"},
+       {Opt_nobh, "nobh"},
+       {Opt_bh, "bh"},
+       {Opt_commit, "commit=%u"},
+
+
+From linux@linux.site Thu Dec 10 21:25:52 2009
+Message-Id: <20091211052551.564396025@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:33 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [21/34] ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch
+Content-Length: 2565
+Lines: 75
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 94d7c16cbbbd0e03841fcf272bcaf0620ad39618)
+
+At the beginning of ext4_move_extent(), we call
+ext4_discard_preallocations() to discard inode PAs of orig and donor
+inodes.  But in the following case, blocks can be double freed, so
+move ext4_discard_preallocations() to the end of ext4_move_extents().
+
+1. Discard inode PAs of orig and donor inodes with
+   ext4_discard_preallocations() in ext4_move_extents().
+
+   orig : [ DATA1 ]
+   donor: [ DATA2 ]
+
+2. While data blocks are exchanging between orig and donor inodes, new
+   inode PAs is created to orig by other process's block allocation.
+   (Since there are semaphore gaps in ext4_move_extents().)  And new
+   inode PAs is used partially (2-1).
+
+   2-1 Create new inode PAs to orig inode
+   orig : [ DATA1 | used PA1 | free PA1 ]
+   donor: [ DATA2 ]
+
+3. Donor inode which has old orig inode's blocks is deleted after
+   EXT4_IOC_MOVE_EXT finished (3-1, 3-2).  So the block bitmap
+   corresponds to old orig inode's blocks are freed.
+
+   3-1 After EXT4_IOC_MOVE_EXT finished
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ DATA1 |  used PA1 ]
+
+   3-2 Delete donor inode
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ FREE SPACE(DATA1) | FREE SPACE(used PA1) ]
+
+4. The double-free of blocks is occurred, when close() is called to
+   orig inode.  Because ext4_discard_preallocations() for orig inode
+   frees used PA1 and free PA1, though used PA1 is already freed in 3.
+
+   4-1 Double-free of blocks is occurred
+   orig : [ DATA2 |  FREE SPACE(free PA1) ]
+   donor: [ FREE SPACE(DATA1) | DOUBLE FREE(used PA1) ]
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -1289,10 +1289,6 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
+-      /* Discard preallocations of two inodes */
+-      ext4_discard_preallocations(orig_inode);
+-      ext4_discard_preallocations(donor_inode);
+-
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1410,6 +1406,11 @@ ext4_move_extents(struct file *o_filp, s
+       }
+ out:
++      if (*moved_len) {
++              ext4_discard_preallocations(orig_inode);
++              ext4_discard_preallocations(donor_inode);
++      }
++
+       if (orig_path) {
+               ext4_ext_drop_refs(orig_path);
+               kfree(orig_path);
+
+
+From linux@linux.site Thu Dec 10 21:25:52 2009
+Message-Id: <20091211052552.134440580@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:34 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Kazuya Mio <k-mio@sx.jp.nec.com>,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [22/34] ext4: initialize moved_len before calling ext4_move_extents()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch
+Content-Length: 2439
+Lines: 72
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 446aaa6e7e993b38a6f21c6acfa68f3f1af3dbe3)
+
+The move_extent.moved_len is used to pass back the number of exchanged
+blocks count to user space.  Currently the caller must clear this
+field; but we spend more code space checking for this requirement than
+simply zeroing the field ourselves, so let's just make life easier for
+everyone all around.
+
+Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |    1 +
+ fs/ext4/move_extent.c |   14 +++-----------
+ 2 files changed, 4 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -239,6 +239,7 @@ setversion_out:
+                       }
+               }
++              me.moved_len = 0;
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+               fput(donor_filp);
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -947,7 +947,6 @@ out2:
+  * @orig_start:               logical start offset in block for orig
+  * @donor_start:      logical start offset in block for donor
+  * @len:              the number of blocks to be moved
+- * @moved_len:                moved block length
+  *
+  * Check the arguments of ext4_move_extents() whether the files can be
+  * exchanged with each other.
+@@ -955,8 +954,8 @@ out2:
+  */
+ static int
+ mext_check_arguments(struct inode *orig_inode,
+-                        struct inode *donor_inode, __u64 orig_start,
+-                        __u64 donor_start, __u64 *len, __u64 moved_len)
++                   struct inode *donor_inode, __u64 orig_start,
++                   __u64 donor_start, __u64 *len)
+ {
+       ext4_lblk_t orig_blocks, donor_blocks;
+       unsigned int blkbits = orig_inode->i_blkbits;
+@@ -1010,13 +1009,6 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
+-      if (moved_len) {
+-              ext4_debug("ext4 move extent: moved_len should be 0 "
+-                      "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+-                      donor_inode->i_ino);
+-              return -EINVAL;
+-      }
+-
+       if ((orig_start > EXT_MAX_BLOCK) ||
+           (donor_start > EXT_MAX_BLOCK) ||
+           (*len > EXT_MAX_BLOCK) ||
+@@ -1226,7 +1218,7 @@ ext4_move_extents(struct file *o_filp, s
+       double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+-                                      donor_start, &len, *moved_len);
++                                  donor_start, &len);
+       if (ret1)
+               goto out;
+
+
+From linux@linux.site Thu Dec 10 21:25:53 2009
+Message-Id: <20091211052552.682377360@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:35 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [23/34] ext4: move_extent_per_page() cleanup
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0019-ext4-move_extent_per_page-cleanup.patch
+Content-Length: 2733
+Lines: 87
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit ac48b0a1d068887141581bea8285de5fcab182b0)
+
+Integrate duplicate lines (acquire/release semaphore and invalidate
+extent cache in move_extent_per_page()) into mext_replace_branches(),
+to reduce source and object code size.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |   30 +++++++++---------------------
+ 1 file changed, 9 insertions(+), 21 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -660,6 +660,9 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
++
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -755,6 +758,11 @@ out:
+               kfree(donor_path);
+       }
++      ext4_ext_invalidate_cache(orig_inode);
++      ext4_ext_invalidate_cache(donor_inode);
++
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       return replaced_count;
+ }
+@@ -820,19 +828,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              /*
+-               * Protect extent trees against block allocations
+-               * via delalloc
+-               */
+-              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+-
+-              /* Clear the inode cache not to refer to the old data */
+-              ext4_ext_invalidate_cache(orig_inode);
+-              ext4_ext_invalidate_cache(donor_inode);
+-              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -880,8 +878,6 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      /* Protect extent trees against block allocations via delalloc */
+-      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -890,18 +886,10 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else {
+-                      double_up_write_data_sem(orig_inode, donor_inode);
++              } else
+                       goto out;
+-              }
+       }
+-      /* Clear the inode cache not to refer to the old data */
+-      ext4_ext_invalidate_cache(orig_inode);
+-      ext4_ext_invalidate_cache(donor_inode);
+-
+-      double_up_write_data_sem(orig_inode, donor_inode);
+-
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+
+
+From linux@linux.site Thu Dec 10 21:25:53 2009
+Message-Id: <20091211052553.196951652@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:36 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [24/34] jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch
+Content-Length: 1035
+Lines: 38
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit e6ec116b67f46e0e7808276476554727b2e6240b)
+
+OOM happens.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/jbd2/commit.c  |    4 ++++
+ fs/jbd2/journal.c |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(jou
+               JBUFFER_TRACE(jh, "ph3: write metadata");
+               flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+                                                     jh, &new_jh, blocknr);
++              if (flags < 0) {
++                      jbd2_journal_abort(journal, flags);
++                      continue;
++              }
+               set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+               wbuf[bufs++] = jh2bh(new_jh);
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -358,6 +358,10 @@ repeat:
+               jbd_unlock_bh_state(bh_in);
+               tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
++              if (!tmp) {
++                      jbd2_journal_put_journal_head(new_jh);
++                      return -ENOMEM;
++              }
+               jbd_lock_bh_state(bh_in);
+               if (jh_in->b_frozen_data) {
+                       jbd2_free(tmp, bh_in->b_size);
+
+
+From linux@linux.site Thu Dec 10 21:25:54 2009
+Message-Id: <20091211052553.749907435@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:37 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Roel Kluin <roel.kluin@gmail.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [25/34] ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch
+Content-Length: 595
+Lines: 21
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit c09eef305dd43846360944ad072f051f964fa383)
+
+Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/resize.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct
+                       goto exit_bh;
+               if (IS_ERR(gdb = bclean(handle, sb, block))) {
+-                      err = PTR_ERR(bh);
++                      err = PTR_ERR(gdb);
+                       goto exit_bh;
+               }
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
+
+
+From linux@linux.site Thu Dec 10 21:25:54 2009
+Message-Id: <20091211052554.355331485@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:38 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [26/34] ext4: Avoid data / filesystem corruption when write fails to copy data
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch
+Content-Length: 2923
+Lines: 84
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b9a4207d5e911b938f73079a83cc2ae10524ec7f)
+
+When ext4_write_begin fails after allocating some blocks or
+generic_perform_write fails to copy data to write, we truncate blocks
+already instantiated beyond i_size.  Although these blocks were never
+inside i_size, we have to truncate the pagecache of these blocks so
+that corresponding buffers get unmapped.  Otherwise subsequent
+__block_prepare_write (called because we are retrying the write) will
+find the buffers mapped, not call ->get_block, and thus the page will
+be backed by already freed blocks leading to filesystem and data
+corruption.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1534,6 +1534,16 @@ static int do_journal_get_write_access(h
+       return ext4_journal_get_write_access(handle, bh);
+ }
++/*
++ * Truncate blocks that were not used by write. We have to truncate the
++ * pagecache as well so that corresponding buffers get properly unmapped.
++ */
++static void ext4_truncate_failed_write(struct inode *inode)
++{
++      truncate_inode_pages(inode->i_mapping, inode->i_size);
++      ext4_truncate(inode);
++}
++
+ static int ext4_write_begin(struct file *file, struct address_space *mapping,
+                           loff_t pos, unsigned len, unsigned flags,
+                           struct page **pagep, void **fsdata)
+@@ -1599,7 +1609,7 @@ retry:
+               ext4_journal_stop(handle);
+               if (pos + len > inode->i_size) {
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+                       /*
+                        * If truncate failed early the inode might
+                        * still be on the orphan list; we need to
+@@ -1709,7 +1719,7 @@ static int ext4_ordered_write_end(struct
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1751,7 +1761,7 @@ static int ext4_writeback_write_end(stru
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1814,7 +1824,7 @@ static int ext4_journalled_write_end(str
+       if (!ret)
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -3091,7 +3101,7 @@ retry:
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+       }
+       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+
+
+From linux@linux.site Thu Dec 10 21:25:55 2009
+Message-Id: <20091211052554.925382177@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:39 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Josef Bacik <josef@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [27/34] ext4: wait for log to commit when umounting
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0023-ext4-wait-for-log-to-commit-when-umounting.patch
+Content-Length: 1540
+Lines: 46
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit d4edac314e9ad0b21ba20ba8bc61b61f186f79e1)
+
+There is a potential race when a transaction is committing right when
+the file system is being umounting.  This could reduce in a race
+because EXT4_SB(sb)->s_group_info could be freed in ext4_put_super
+before the commit code calls a callback so the mballoc code can
+release freed blocks in the transaction, resulting in a panic trying
+to access the freed s_group_info.
+
+The fix is to wait for the transaction to finish committing before we
+shutdown the multiblock allocator.
+
+Signed-off-by: Josef Bacik <josef@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_
+       if (sb->s_dirt)
+               ext4_commit_super(sb, 1);
+-      ext4_release_system_zone(sb);
+-      ext4_mb_release(sb);
+-      ext4_ext_release(sb);
+-      ext4_xattr_put_super(sb);
+       if (sbi->s_journal) {
+               err = jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_
+                       ext4_abort(sb, __func__,
+                                  "Couldn't clean up the journal");
+       }
++
++      ext4_release_system_zone(sb);
++      ext4_mb_release(sb);
++      ext4_ext_release(sb);
++      ext4_xattr_put_super(sb);
++
+       if (!(sb->s_flags & MS_RDONLY)) {
+               EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+               es->s_state = cpu_to_le16(sbi->s_mount_state);
+
+
+From linux@linux.site Thu Dec 10 21:25:55 2009
+Message-Id: <20091211052555.487338959@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:40 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Curt Wohlgemuth <curtw@google.com>,
+ "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [28/34] ext4: remove blocks from inode prealloc list on failure
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch
+Content-Length: 1476
+Lines: 49
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b844167edc7fcafda9623955c05e4c1b3c32ebc7)
+
+This fixes a leak of blocks in an inode prealloc list if device failures
+cause ext4_mb_mark_diskspace_used() to fail.
+
+Signed-off-by: Curt Wohlgemuth <curtw@google.com>
+Acked-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3011,6 +3011,24 @@ static void ext4_mb_collect_stats(struct
+ }
+ /*
++ * Called on failure; free up any blocks from the inode PA for this
++ * context.  We don't need this for MB_GROUP_PA because we only change
++ * pa_free in ext4_mb_release_context(), but on failure, we've already
++ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
++ */
++static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
++{
++      struct ext4_prealloc_space *pa = ac->ac_pa;
++      int len;
++
++      if (pa && pa->pa_type == MB_INODE_PA) {
++              len = ac->ac_b_ex.fe_len;
++              pa->pa_free += len;
++      }
++
++}
++
++/*
+  * use blocks preallocated to inode
+  */
+ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+@@ -4295,6 +4313,7 @@ repeat:
+                       ac->ac_status = AC_STATUS_CONTINUE;
+                       goto repeat;
+               } else if (*errp) {
++                      ext4_discard_allocated_blocks(ac);
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
+
+
+From linux@linux.site Thu Dec 10 21:25:56 2009
+Message-Id: <20091211052556.043172197@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:41 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ Mingming Cao <cmm@us.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [29/34] ext4: ext4_get_reserved_space() must return bytes instead of blocks
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch
+Content-Length: 718
+Lines: 23
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 8aa6790f876e81f5a2211fe1711a5fe3fe2d7b20)
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1052,7 +1052,7 @@ qsize_t ext4_get_reserved_space(struct i
+               EXT4_I(inode)->i_reserved_meta_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+-      return total;
++      return (total << inode->i_blkbits);
+ }
+ /*
+  * Calculate the number of metadata blocks need to reserve
+
+
+From linux@linux.site Thu Dec 10 21:25:57 2009
+Message-Id: <20091211052556.560487193@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:42 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ Mingming Cao <cmm@us.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [30/34] ext4: quota macros cleanup
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0026-ext4-quota-macros-cleanup.patch
+Content-Length: 5167
+Lines: 138
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 5aca07eb7d8f14d90c740834d15ca15277f4820c)
+
+Currently all quota block reservation macros contains hard-coded "2"
+aka MAXQUOTAS value. This is no good because in some places it is not
+obvious to understand what does this digit represent. Let's introduce
+new macro with self descriptive name.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4_jbd2.h |    8 ++++++--
+ fs/ext4/extents.c   |    2 +-
+ fs/ext4/inode.c     |    2 +-
+ fs/ext4/migrate.c   |    4 ++--
+ fs/ext4/namei.c     |    8 ++++----
+ 5 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -49,7 +49,7 @@
+ #define EXT4_DATA_TRANS_BLOCKS(sb)    (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+                                        EXT4_XATTR_TRANS_BLOCKS - 2 + \
+-                                       2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                       EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /*
+  * Define the number of metadata blocks we need to account to modify data.
+@@ -57,7 +57,7 @@
+  * This include super block, inode block, quota blocks and xattr blocks
+  */
+ #define EXT4_META_TRANS_BLOCKS(sb)    (EXT4_XATTR_TRANS_BLOCKS + \
+-                                      2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                      EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /* Delete operations potentially hit one directory's namespace plus an
+  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+@@ -92,6 +92,7 @@
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+ #else
+@@ -99,6 +100,9 @@
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+ #endif
++#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2167,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
+                       correct_index = 1;
+                       credits += (ext_depth(inode)) + 1;
+               }
+-              credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++              credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+               err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+               if (err)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5231,7 +5231,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+-              handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
++              handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(hand
+        * So allocate a credit of 3. We may update
+        * quota (user and group).
+        */
+-      needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++      needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+       if (ext4_journal_extend(handle, needed) != 0)
+               retval = ext4_journal_restart(handle, needed);
+@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode
+       handle = ext4_journal_start(inode,
+                                       EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+                                       + 1);
+       if (IS_ERR(handle)) {
+               retval = PTR_ERR(handle);
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1769,7 +1769,7 @@ static int ext4_create(struct inode *dir
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1803,7 +1803,7 @@ static int ext4_mknod(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1840,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -2253,7 +2253,7 @@ static int ext4_symlink(struct inode *di
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+
+
+From linux@linux.site Thu Dec 10 21:25:57 2009
+Message-Id: <20091211052557.153813326@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:43 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [31/34] ext4: fix incorrect block reservation on quota transfer.
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch
+Content-Length: 1036
+Lines: 27
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 194074acacebc169ded90a4657193f5180015051)
+
+Inside ->setattr() call both ATTR_UID and ATTR_GID may be valid
+This means that we may end-up with transferring all quotas. Add
+we have to reserve QUOTA_DEL_BLOCKS for all quotas, as we do in
+case of QUOTA_INIT_BLOCKS.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5232,7 +5232,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+-                                      EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
++                                      EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+                       goto err_out;
+
+
+From linux@linux.site Thu Dec 10 21:25:58 2009
+Message-Id: <20091211052557.723287400@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:44 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [32/34] ext4: Wait for proper transaction commit on fsync
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
+Content-Length: 7849
+Lines: 252
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b436b9bef84de6893e86346d8fbf7104bc520645)
+
+We cannot rely on buffer dirty bits during fsync because pdflush can come
+before fsync is called and clear dirty bits without forcing a transaction
+commit. What we do is that we track which transaction has last changed
+the inode and which transaction last changed allocation and force it to
+disk on fsync.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h      |    7 +++++++
+ fs/ext4/ext4_jbd2.h |   13 +++++++++++++
+ fs/ext4/extents.c   |   14 ++++++++++++--
+ fs/ext4/fsync.c     |   46 +++++++++++++++++-----------------------------
+ fs/ext4/inode.c     |   29 +++++++++++++++++++++++++++++
+ fs/ext4/super.c     |    2 ++
+ fs/jbd2/journal.c   |    1 +
+ 7 files changed, 81 insertions(+), 31 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -703,6 +703,13 @@ struct ext4_inode_info {
+       struct list_head i_aio_dio_complete_list;
+       /* current io_end structure for async DIO write*/
+       ext4_io_end_t *cur_aio_dio;
++
++      /*
++       * Transactions that contain inode's metadata needed to complete
++       * fsync and fdatasync, respectively.
++       */
++      tid_t i_sync_tid;
++      tid_t i_datasync_tid;
+ };
+ /*
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -258,6 +258,19 @@ static inline int ext4_jbd2_file_inode(h
+       return 0;
+ }
++static inline void ext4_update_inode_fsync_trans(handle_t *handle,
++                                               struct inode *inode,
++                                               int datasync)
++{
++      struct ext4_inode_info *ei = EXT4_I(inode);
++
++      if (ext4_handle_valid(handle)) {
++              ei->i_sync_tid = handle->h_transaction->t_tid;
++              if (datasync)
++                      ei->i_datasync_tid = handle->h_transaction->t_tid;
++      }
++}
++
+ /* super.c */
+ int ext4_force_commit(struct super_block *sb);
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -3064,6 +3064,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
+               ret = ext4_convert_unwritten_extents_dio(handle, inode,
+                                                       path);
++              if (ret >= 0)
++                      ext4_update_inode_fsync_trans(handle, inode, 1);
+               goto out2;
+       }
+       /* buffered IO case */
+@@ -3091,6 +3093,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       ret = ext4_ext_convert_to_initialized(handle, inode,
+                                               path, iblock,
+                                               max_blocks);
++      if (ret >= 0)
++              ext4_update_inode_fsync_trans(handle, inode, 1);
+ out:
+       if (ret <= 0) {
+               err = ret;
+@@ -3329,10 +3333,16 @@ int ext4_ext_get_blocks(handle_t *handle
+       allocated = ext4_ext_get_actual_len(&newex);
+       set_buffer_new(bh_result);
+-      /* Cache only when it is _not_ an uninitialized extent */
+-      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
++      /*
++       * Cache the extent and update transaction to commit on fdatasync only
++       * when it is _not_ an uninitialized extent.
++       */
++      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
+               ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+                                               EXT4_EXT_CACHE_EXTENT);
++              ext4_update_inode_fsync_trans(handle, inode, 1);
++      } else
++              ext4_update_inode_fsync_trans(handle, inode, 0);
+ out:
+       if (allocated > max_blocks)
+               allocated = max_blocks;
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -51,25 +51,30 @@
+ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ {
+       struct inode *inode = dentry->d_inode;
++      struct ext4_inode_info *ei = EXT4_I(inode);
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+-      int err, ret = 0;
++      int ret;
++      tid_t commit_tid;
+       J_ASSERT(ext4_journal_current_handle() == NULL);
+       trace_ext4_sync_file(file, dentry, datasync);
++      if (inode->i_sb->s_flags & MS_RDONLY)
++              return 0;
++
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+               return ret;
++
++      if (!journal)
++              return simple_fsync(file, dentry, datasync);
++
+       /*
+-       * data=writeback:
++       * data=writeback,ordered:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+-       *  sync_inode() will sync the metadata
+-       *
+-       * data=ordered:
+-       *  The caller's filemap_fdatawrite() will write the data and
+-       *  sync_inode() will write the inode if it is dirty.  Then the caller's
+-       *  filemap_fdatawait() will wait on the pages.
++       *  Metadata is in the journal, we wait for proper transaction to
++       *  commit here.
+        *
+        * data=journal:
+        *  filemap_fdatawrite won't do anything (the buffers are clean).
+@@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, st
+       if (ext4_should_journal_data(inode))
+               return ext4_force_commit(inode->i_sb);
+-      if (!journal)
+-              ret = sync_mapping_buffers(inode->i_mapping);
+-
+-      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+-              goto out;
+-
+-      /*
+-       * The VFS has written the file data.  If the inode is unaltered
+-       * then we need not start a commit.
+-       */
+-      if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+-              struct writeback_control wbc = {
+-                      .sync_mode = WB_SYNC_ALL,
+-                      .nr_to_write = 0, /* sys_fsync did this */
+-              };
+-              err = sync_inode(inode, &wbc);
+-              if (ret == 0)
+-                      ret = err;
+-      }
+-out:
+-      if (journal && (journal->j_flags & JBD2_BARRIER))
++      commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
++      if (jbd2_log_start_commit(journal, commit_tid))
++              jbd2_log_wait_commit(journal, commit_tid);
++      else if (journal->j_flags & JBD2_BARRIER)
+               blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+       return ret;
+ }
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1025,6 +1025,8 @@ static int ext4_ind_get_blocks(handle_t
+               goto cleanup;
+       set_buffer_new(bh_result);
++
++      ext4_update_inode_fsync_trans(handle, inode, 1);
+ got_it:
+       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       if (count > blocks_to_boundary)
+@@ -4794,6 +4796,7 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+       struct inode *inode;
++      journal_t *journal = EXT4_SB(sb)->s_journal;
+       long ret;
+       int block;
+@@ -4858,6 +4861,31 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_data[block] = raw_inode->i_block[block];
+       INIT_LIST_HEAD(&ei->i_orphan);
++      /*
++       * Set transaction id's of transactions that have to be committed
++       * to finish f[data]sync. We set them to currently running transaction
++       * as we cannot be sure that the inode or some of its metadata isn't
++       * part of the transaction - the inode could have been reclaimed and
++       * now it is reread from disk.
++       */
++      if (journal) {
++              transaction_t *transaction;
++              tid_t tid;
++
++              spin_lock(&journal->j_state_lock);
++              if (journal->j_running_transaction)
++                      transaction = journal->j_running_transaction;
++              else
++                      transaction = journal->j_committing_transaction;
++              if (transaction)
++                      tid = transaction->t_tid;
++              else
++                      tid = journal->j_commit_sequence;
++              spin_unlock(&journal->j_state_lock);
++              ei->i_sync_tid = tid;
++              ei->i_datasync_tid = tid;
++      }
++
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+@@ -5112,6 +5140,7 @@ static int ext4_do_update_inode(handle_t
+               err = rc;
+       ei->i_state &= ~EXT4_STATE_NEW;
++      ext4_update_inode_fsync_trans(handle, inode, 0);
+ out_brelse:
+       brelse(bh);
+       ext4_std_error(inode->i_sb, err);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(st
+       spin_lock_init(&(ei->i_block_reservation_lock));
+       INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
+       ei->cur_aio_dio = NULL;
++      ei->i_sync_tid = 0;
++      ei->i_datasync_tid = 0;
+       return &ei->vfs_inode;
+ }
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
+ EXPORT_SYMBOL(jbd2_journal_ack_err);
+ EXPORT_SYMBOL(jbd2_journal_clear_err);
+ EXPORT_SYMBOL(jbd2_log_wait_commit);
++EXPORT_SYMBOL(jbd2_log_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+ EXPORT_SYMBOL(jbd2_journal_wipe);
+
+
+From linux@linux.site Thu Dec 10 21:25:58 2009
+Message-Id: <20091211052558.272572522@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:45 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [33/34] ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch
+Content-Length: 2732
+Lines: 94
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 4a58579b9e4e2a35d57e6c9c8483e52f6f1b7fd6)
+
+This patch fixes three problems in the handling of the
+EXT4_IOC_MOVE_EXT ioctl:
+
+1. In current EXT4_IOC_MOVE_EXT, there are read access mode checks for
+original and donor files, but they allow the illegal write access to
+donor file, since donor file is overwritten by original file data.  To
+fix this problem, change access mode checks of original (r->r/w) and
+donor (r->w) files.
+
+2.  Disallow the use of donor files that have a setuid or setgid bits.
+
+3.  Call mnt_want_write() and mnt_drop_write() before and after
+ext4_move_extents() calling to get write access to a mount.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |   30 ++++++++++++++++++------------
+ fs/ext4/move_extent.c |    7 +++++++
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -221,32 +221,38 @@ setversion_out:
+               struct file *donor_filp;
+               int err;
++              if (!(filp->f_mode & FMODE_READ) ||
++                  !(filp->f_mode & FMODE_WRITE))
++                      return -EBADF;
++
+               if (copy_from_user(&me,
+                       (struct move_extent __user *)arg, sizeof(me)))
+                       return -EFAULT;
++              me.moved_len = 0;
+               donor_filp = fget(me.donor_fd);
+               if (!donor_filp)
+                       return -EBADF;
+-              if (!capable(CAP_DAC_OVERRIDE)) {
+-                      if ((current->real_cred->fsuid != inode->i_uid) ||
+-                              !(inode->i_mode & S_IRUSR) ||
+-                              !(donor_filp->f_dentry->d_inode->i_mode &
+-                              S_IRUSR)) {
+-                              fput(donor_filp);
+-                              return -EACCES;
+-                      }
++              if (!(donor_filp->f_mode & FMODE_WRITE)) {
++                      err = -EBADF;
++                      goto mext_out;
+               }
+-              me.moved_len = 0;
++              err = mnt_want_write(filp->f_path.mnt);
++              if (err)
++                      goto mext_out;
++
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+-              fput(donor_filp);
++              mnt_drop_write(filp->f_path.mnt);
++              if (me.moved_len > 0)
++                      file_remove_suid(donor_filp);
+               if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
+-                      return -EFAULT;
+-
++                      err = -EFAULT;
++mext_out:
++              fput(donor_filp);
+               return err;
+       }
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -957,6 +957,13 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
++      if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
++              ext4_debug("ext4 move extent: suid or sgid is set"
++                         " to donor file [ino:orig %lu, donor %lu]\n",
++                         orig_inode->i_ino, donor_inode->i_ino);
++              return -EINVAL;
++      }
++
+       /* Ext4 move extent does not support swapfile */
+       if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+               ext4_debug("ext4 move extent: The argument files should "
+
+
+From linux@linux.site Thu Dec 10 21:25:59 2009
+Message-Id: <20091211052558.863762484@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:46 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [34/34] ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch
+Content-Length: 5029
+Lines: 115
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit fab3a549e204172236779f502eccb4f9bf0dc87d)
+
+Fix the following potential circular locking dependency between
+mm->mmap_sem and ei->i_data_sem:
+
+    =======================================================
+    [ INFO: possible circular locking dependency detected ]
+    2.6.32-04115-gec044c5 #37
+    -------------------------------------------------------
+    ureadahead/1855 is trying to acquire lock:
+     (&mm->mmap_sem){++++++}, at: [<ffffffff81107224>] might_fault+0x5c/0xac
+
+    but task is already holding lock:
+     (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    which lock already depends on the new lock.
+
+    the existing dependency chain (in reverse order) is:
+
+    -> #1 (&ei->i_data_sem){++++..}:
+           [<ffffffff81099bfa>] __lock_acquire+0xb67/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81516633>] down_read+0x51/0x84
+           [<ffffffff811a2414>] ext4_get_blocks+0x50/0x2a5
+           [<ffffffff811a3453>] ext4_get_block+0xab/0xef
+           [<ffffffff81154f39>] do_mpage_readpage+0x198/0x48d
+           [<ffffffff81155360>] mpage_readpages+0xd0/0x114
+           [<ffffffff811a104b>] ext4_readpages+0x1d/0x1f
+           [<ffffffff810f8644>] __do_page_cache_readahead+0x12f/0x1bc
+           [<ffffffff810f86f2>] ra_submit+0x21/0x25
+           [<ffffffff810f0cfd>] filemap_fault+0x19f/0x32c
+           [<ffffffff81107b97>] __do_fault+0x55/0x3a2
+           [<ffffffff81109db0>] handle_mm_fault+0x327/0x734
+           [<ffffffff8151aaa9>] do_page_fault+0x292/0x2aa
+           [<ffffffff81518205>] page_fault+0x25/0x30
+           [<ffffffff812a34d8>] clear_user+0x38/0x3c
+           [<ffffffff81167e16>] padzero+0x20/0x31
+           [<ffffffff81168b47>] load_elf_binary+0x8bc/0x17ed
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff81166d64>] load_script+0x1b8/0x1cc
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff8113255f>] do_execve+0x1ce/0x2cf
+           [<ffffffff81027494>] sys_execve+0x43/0x5a
+           [<ffffffff8102918a>] stub_execve+0x6a/0xc0
+
+    -> #0 (&mm->mmap_sem){++++++}:
+           [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81107251>] might_fault+0x89/0xac
+           [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+           [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+           [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+           [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+           [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+           [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+           [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+    other info that might help us debug this:
+
+    1 lock held by ureadahead/1855:
+     #0:  (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    stack backtrace:
+    Pid: 1855, comm: ureadahead Not tainted 2.6.32-04115-gec044c5 #37
+    Call Trace:
+     [<ffffffff81098c70>] print_circular_bug+0xa8/0xb7
+     [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+     [<ffffffff8102f229>] ? sched_clock+0x9/0xd
+     [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81107251>] might_fault+0x89/0xac
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81124b44>] ? __kmalloc+0x13b/0x18c
+     [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+     [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+     [<ffffffff811bca0b>] ? ext4_ext_fiemap_cb+0x0/0x157
+     [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+     [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+     [<ffffffff8129f6d0>] ? __up_read+0x8d/0x95
+     [<ffffffff81517fb5>] ? retint_swapgs+0x13/0x1b
+     [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+     [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *in
+       while (block < last && block != EXT_MAX_BLOCK) {
+               num = last - block;
+               /* find extent for this block */
++              down_read(&EXT4_I(inode)->i_data_sem);
+               path = ext4_ext_find_extent(inode, block, path);
++              up_read(&EXT4_I(inode)->i_data_sem);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       path = NULL;
+@@ -3730,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, str
+                * Walk the extent tree gathering extent information.
+                * ext4_ext_fiemap_cb will push extents back to user.
+                */
+-              down_read(&EXT4_I(inode)->i_data_sem);
+               error = ext4_ext_walk_space(inode, start_blk, len_blks,
+                                         ext4_ext_fiemap_cb, fieinfo);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+       }
+       return error;
+
+
+From linux@linux.site Thu Dec 10 21:25:40 2009
+Message-Id: <20091211052312.805428372@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:12 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk
+Subject: [00/34] 2.6.32.1-stable review
+Content-Length: 2372
+Lines: 51
+
+This is the start of the stable review cycle for the 2.6.32.1 release.
+There are 34 patches in this series, all will be posted as a response to
+this one.  If anyone has any issues with these being applied, please let
+us know.  If anyone is a maintainer of the proper subsystem, and wants
+to add a Signed-off-by: line to the patch, please respond with it.
+
+As was done with the 2.6.31.8-rc1 release, this is not all of the
+patches in the -stable queue, just a huge chunk of ext4 patches here,
+and a few scsi ones, which should all get out sooner rather than later.
+So note that there will be more 2.6.32-stable releases coming, this is
+just the first in the series.
+
+Responses should be made by Sunday, Dec 13 04:00:00 UTC 2009
+Anything received after that time might be too late.
+
+The whole patch series can be found in one patch at:
+       kernel.org/pub/linux/kernel/v2.6/stable-review/patch-2.6.32.1-rc1.gz
+and the diffstat can be found below.
+
+thanks,
+
+greg k-h
+
+ Documentation/filesystems/ext4.txt   |   10 +-
+ Makefile                             |    2 +-
+ drivers/scsi/hosts.c                 |   13 ++-
+ drivers/scsi/lpfc/lpfc_init.c        |    2 +-
+ drivers/scsi/megaraid/megaraid_sas.c |    8 +-
+ drivers/scsi/qla2xxx/qla_attr.c      |    3 +-
+ drivers/scsi/scsi_lib_dma.c          |    4 +-
+ fs/ext4/balloc.c                     |    8 +-
+ fs/ext4/block_validity.c             |    2 +-
+ fs/ext4/ext4.h                       |    8 +
+ fs/ext4/ext4_jbd2.h                  |   21 +++-
+ fs/ext4/extents.c                    |   22 ++-
+ fs/ext4/fsync.c                      |   54 +++----
+ fs/ext4/inode.c                      |   81 +++++++---
+ fs/ext4/ioctl.c                      |   29 +++--
+ fs/ext4/mballoc.c                    |   40 ++++-
+ fs/ext4/migrate.c                    |    4 +-
+ fs/ext4/move_extent.c                |  278 ++++++++++++++++------------------
+ fs/ext4/namei.c                      |   38 ++---
+ fs/ext4/resize.c                     |    2 +-
+ fs/ext4/super.c                      |   40 ++++--
+ fs/ext4/xattr.c                      |    7 +-
+ fs/jbd2/commit.c                     |    4 +
+ fs/jbd2/journal.c                    |    5 +
+ include/linux/sched.h                |   13 ++-
+ include/scsi/osd_protocol.h          |    1 +
+ include/scsi/scsi_host.h             |   16 ++-
+ 27 files changed, 424 insertions(+), 291 deletions(-)
+
diff --git a/review-2.6.32/0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch b/review-2.6.32/0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch
new file mode 100644 (file)
index 0000000..291e49c
--- /dev/null
@@ -0,0 +1,122 @@
+From 4df5b14f4bea734ff3d9c721eb331540ad142de3 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:25:49 -0500
+Subject: [PATCH 01/30] ext4: fix potential buffer head leak when add_dirent_to_buf() returns ENOSPC
+
+(cherry picked from commit 2de770a406b06dfc619faabbf5d85c835ed3f2e1)
+
+Previously add_dirent_to_buf() did not free its passed-in buffer head
+in the case of ENOSPC, since in some cases the caller still needed it.
+However, this led to potential buffer head leaks since not all callers
+dealt with this correctly.  Fix this by making simplifying the freeing
+convention; now add_dirent_to_buf() *never* frees the passed-in buffer
+head, and leaves that to the responsibility of its caller.  This makes
+things cleaner and easier to prove that the code is neither leaking
+buffer heads or calling brelse() one time too many.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Curt Wohlgemuth <curtw@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c |   30 ++++++++++++------------------
+ 1 file changed, 12 insertions(+), 18 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1292,9 +1292,6 @@ errout:
+  * add_dirent_to_buf will attempt search the directory block for
+  * space.  It will return -ENOSPC if no space is available, and -EIO
+  * and -EEXIST if directory entry already exists.
+- *
+- * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+- * all other cases bh is released.
+  */
+ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode, struct ext4_dir_entry_2 *de,
+@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *h
+               top = bh->b_data + blocksize - reclen;
+               while ((char *) de <= top) {
+                       if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
+-                                                bh, offset)) {
+-                              brelse(bh);
++                                                bh, offset))
+                               return -EIO;
+-                      }
+-                      if (ext4_match(namelen, name, de)) {
+-                              brelse(bh);
++                      if (ext4_match(namelen, name, de))
+                               return -EEXIST;
+-                      }
+                       nlen = EXT4_DIR_REC_LEN(de->name_len);
+                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+                       if ((de->inode? rlen - nlen: rlen) >= reclen)
+@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_journal_get_write_access(handle, bh);
+       if (err) {
+               ext4_std_error(dir->i_sb, err);
+-              brelse(bh);
+               return err;
+       }
+@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
+       if (err)
+               ext4_std_error(dir->i_sb, err);
+-      brelse(bh);
+       return 0;
+ }
+@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *ha
+       if (!(de))
+               return retval;
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *hand
+               if(!bh)
+                       return retval;
+               retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-              if (retval != -ENOSPC)
++              if (retval != -ENOSPC) {
++                      brelse(bh);
+                       return retval;
++              }
+               if (blocks == 1 && !dx_fallback &&
+                   EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
+@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *hand
+       de = (struct ext4_dir_entry_2 *) bh->b_data;
+       de->inode = 0;
+       de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *h
+               goto journal_error;
+       err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-      if (err != -ENOSPC) {
+-              bh = NULL;
++      if (err != -ENOSPC)
+               goto cleanup;
+-      }
+       /* Block full, should compress but for now just split */
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *h
+       if (!de)
+               goto cleanup;
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+-      bh = NULL;
+       goto cleanup;
+ journal_error:
diff --git a/review-2.6.32/0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch b/review-2.6.32/0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch
new file mode 100644 (file)
index 0000000..9a611a6
--- /dev/null
@@ -0,0 +1,43 @@
+From 833f693f0cf8059bcda7ddff3434473b2a5d4d69 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:24:46 -0500
+Subject: [PATCH 02/30] ext4: avoid divide by zero when trying to mount a corrupted file system
+
+(cherry picked from commit 503358ae01b70ce6909d19dd01287093f6b6271c)
+
+If s_log_groups_per_flex is greater than 31, then groups_per_flex will
+will overflow and cause a divide by zero error.  This can cause kernel
+BUG if such a file system is mounted.
+
+Thanks to Nageswara R Sastry for analyzing the failure and providing
+an initial patch.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=14287
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1673,14 +1673,14 @@ static int ext4_fill_flex_info(struct su
+       size_t size;
+       int i;
+-      if (!sbi->s_es->s_log_groups_per_flex) {
++      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
++      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
++
++      if (groups_per_flex < 2) {
+               sbi->s_log_groups_per_flex = 0;
+               return 1;
+       }
+-      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+-      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+-
+       /* We allocate both existing and potentially added groups */
+       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+                       ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
diff --git a/review-2.6.32/0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch b/review-2.6.32/0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch
new file mode 100644 (file)
index 0000000..610a2a3
--- /dev/null
@@ -0,0 +1,353 @@
+From 30a07c3105210d4d88e1ddb80fdd11a98ad2b281 Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Mon, 23 Nov 2009 07:25:48 -0500
+Subject: [PATCH 03/30] ext4: fix the returned block count if EXT4_IOC_MOVE_EXT fails
+
+(cherry picked from commit f868a48d06f8886cb0367568a12367fa4f21ea0d)
+
+If the EXT4_IOC_MOVE_EXT ioctl fails, the number of blocks that were
+exchanged before the failure should be returned to the userspace
+caller.  Unfortunately, currently if the block size is not the same as
+the page size, the returned block count that is returned is the
+page-aligned block count instead of the actual block count.  This
+commit addresses this bug.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  139 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 73 insertions(+), 66 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -661,6 +661,7 @@ mext_calc_swap_extents(struct ext4_exten
+  * @donor_inode:      donor inode
+  * @from:             block offset of orig_inode
+  * @count:            block count to be replaced
++ * @err:              pointer to save return value
+  *
+  * Replace original inode extents and donor inode extents page by page.
+  * We implement this replacement in the following three steps:
+@@ -671,19 +672,18 @@ mext_calc_swap_extents(struct ext4_exten
+  * 3. Change the block information of donor inode to point at the saved
+  *    original inode blocks in the dummy extents.
+  *
+- * Return 0 on success, or a negative error value on failure.
++ * Return replaced block count.
+  */
+ static int
+ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+                          struct inode *donor_inode, ext4_lblk_t from,
+-                         ext4_lblk_t count)
++                         ext4_lblk_t count, int *err)
+ {
+       struct ext4_ext_path *orig_path = NULL;
+       struct ext4_ext_path *donor_path = NULL;
+       struct ext4_extent *oext, *dext;
+       struct ext4_extent tmp_dext, tmp_oext;
+       ext4_lblk_t orig_off = from, donor_off = from;
+-      int err = 0;
+       int depth;
+       int replaced_count = 0;
+       int dext_alen;
+@@ -691,13 +691,13 @@ mext_replace_branches(handle_t *handle,
+       mext_double_down_write(orig_inode, donor_inode);
+       /* Get the original extent for the block "orig_off" */
+-      err = get_ext_path(orig_inode, orig_off, &orig_path);
+-      if (err)
++      *err = get_ext_path(orig_inode, orig_off, &orig_path);
++      if (*err)
+               goto out;
+       /* Get the donor extent for the head */
+-      err = get_ext_path(donor_inode, donor_off, &donor_path);
+-      if (err)
++      *err = get_ext_path(donor_inode, donor_off, &donor_path);
++      if (*err)
+               goto out;
+       depth = ext_depth(orig_inode);
+       oext = orig_path[depth].p_ext;
+@@ -707,9 +707,9 @@ mext_replace_branches(handle_t *handle,
+       dext = donor_path[depth].p_ext;
+       tmp_dext = *dext;
+-      err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++      *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                     donor_off, count);
+-      if (err)
++      if (*err)
+               goto out;
+       /* Loop for the donor extents */
+@@ -718,7 +718,7 @@ mext_replace_branches(handle_t *handle,
+               if (!dext) {
+                       ext4_error(donor_inode->i_sb, __func__,
+                                  "The extent for donor must be found");
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
+                       ext4_error(donor_inode->i_sb, __func__,
+@@ -726,20 +726,20 @@ mext_replace_branches(handle_t *handle,
+                               "extent(%u) should be equal",
+                               donor_off,
+                               le32_to_cpu(tmp_dext.ee_block));
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               }
+               /* Set donor extent to orig extent */
+-              err = mext_leaf_block(handle, orig_inode,
++              *err = mext_leaf_block(handle, orig_inode,
+                                          orig_path, &tmp_dext, &orig_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               /* Set orig extent to donor extent */
+-              err = mext_leaf_block(handle, donor_inode,
++              *err = mext_leaf_block(handle, donor_inode,
+                                          donor_path, &tmp_oext, &donor_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+@@ -753,35 +753,25 @@ mext_replace_branches(handle_t *handle,
+               if (orig_path)
+                       ext4_ext_drop_refs(orig_path);
+-              err = get_ext_path(orig_inode, orig_off, &orig_path);
+-              if (err)
++              *err = get_ext_path(orig_inode, orig_off, &orig_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(orig_inode);
+               oext = orig_path[depth].p_ext;
+-              if (le32_to_cpu(oext->ee_block) +
+-                              ext4_ext_get_actual_len(oext) <= orig_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_oext = *oext;
+               if (donor_path)
+                       ext4_ext_drop_refs(donor_path);
+-              err = get_ext_path(donor_inode, donor_off, &donor_path);
+-              if (err)
++              *err = get_ext_path(donor_inode, donor_off, &donor_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(donor_inode);
+               dext = donor_path[depth].p_ext;
+-              if (le32_to_cpu(dext->ee_block) +
+-                              ext4_ext_get_actual_len(dext) <= donor_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_dext = *dext;
+-              err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++              *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                          donor_off, count - replaced_count);
+-              if (err)
++              if (*err)
+                       goto out;
+       }
+@@ -796,7 +786,7 @@ out:
+       }
+       mext_double_up_write(orig_inode, donor_inode);
+-      return err;
++      return replaced_count;
+ }
+ /**
+@@ -808,16 +798,17 @@ out:
+  * @data_offset_in_page:      block index where data swapping starts
+  * @block_len_in_page:                the number of blocks to be swapped
+  * @uninit:                   orig extent is uninitialized or not
++ * @err:                      pointer to save return value
+  *
+  * Save the data in original inode blocks and replace original inode extents
+  * with donor inode extents by calling mext_replace_branches().
+- * Finally, write out the saved data in new original inode blocks. Return 0
+- * on success, or a negative error value on failure.
++ * Finally, write out the saved data in new original inode blocks. Return
++ * replaced block count.
+  */
+ static int
+ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                 pgoff_t orig_page_offset, int data_offset_in_page,
+-                int block_len_in_page, int uninit)
++                int block_len_in_page, int uninit, int *err)
+ {
+       struct inode *orig_inode = o_filp->f_dentry->d_inode;
+       struct address_space *mapping = orig_inode->i_mapping;
+@@ -829,9 +820,11 @@ move_extent_per_page(struct file *o_filp
+       long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+       unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+       unsigned int w_flags = 0;
+-      unsigned int tmp_data_len, data_len;
++      unsigned int tmp_data_size, data_size, replaced_size;
+       void *fsdata;
+-      int ret, i, jblocks;
++      int i, jblocks;
++      int err2 = 0;
++      int replaced_count = 0;
+       int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+       /*
+@@ -841,8 +834,8 @@ move_extent_per_page(struct file *o_filp
+       jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+       handle = ext4_journal_start(orig_inode, jblocks);
+       if (IS_ERR(handle)) {
+-              ret = PTR_ERR(handle);
+-              return ret;
++              *err = PTR_ERR(handle);
++              return 0;
+       }
+       if (segment_eq(get_fs(), KERNEL_DS))
+@@ -858,9 +851,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              ret = mext_replace_branches(handle, orig_inode,
+-                                               donor_inode, orig_blk_offset,
+-                                               block_len_in_page);
++              replaced_count = mext_replace_branches(handle, orig_inode,
++                                              donor_inode, orig_blk_offset,
++                                              block_len_in_page, err);
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+@@ -870,27 +863,28 @@ move_extent_per_page(struct file *o_filp
+       offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+-      /* Calculate data_len */
++      /* Calculate data_size */
+       if ((orig_blk_offset + block_len_in_page - 1) ==
+           ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+               /* Replace the last block */
+-              tmp_data_len = orig_inode->i_size & (blocksize - 1);
++              tmp_data_size = orig_inode->i_size & (blocksize - 1);
+               /*
+-               * If data_len equal zero, it shows data_len is multiples of
++               * If data_size equal zero, it shows data_size is multiples of
+                * blocksize. So we set appropriate value.
+                */
+-              if (tmp_data_len == 0)
+-                      tmp_data_len = blocksize;
++              if (tmp_data_size == 0)
++                      tmp_data_size = blocksize;
+-              data_len = tmp_data_len +
++              data_size = tmp_data_size +
+                       ((block_len_in_page - 1) << orig_inode->i_blkbits);
+-      } else {
+-              data_len = block_len_in_page << orig_inode->i_blkbits;
+-      }
++      } else
++              data_size = block_len_in_page << orig_inode->i_blkbits;
++
++      replaced_size = data_size;
+-      ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
++      *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
+                                &page, &fsdata);
+-      if (unlikely(ret < 0))
++      if (unlikely(*err < 0))
+               goto out;
+       if (!PageUptodate(page)) {
+@@ -911,10 +905,17 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      ret = mext_replace_branches(handle, orig_inode, donor_inode,
+-                                       orig_blk_offset, block_len_in_page);
+-      if (ret < 0)
+-              goto out;
++      replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
++                                      orig_blk_offset, block_len_in_page,
++                                      &err2);
++      if (err2) {
++              if (replaced_count) {
++                      block_len_in_page = replaced_count;
++                      replaced_size =
++                              block_len_in_page << orig_inode->i_blkbits;
++              } else
++                      goto out;
++      }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+@@ -928,16 +929,16 @@ move_extent_per_page(struct file *o_filp
+               bh = bh->b_this_page;
+       for (i = 0; i < block_len_in_page; i++) {
+-              ret = ext4_get_block(orig_inode,
++              *err = ext4_get_block(orig_inode,
+                               (sector_t)(orig_blk_offset + i), bh, 0);
+-              if (ret < 0)
++              if (*err < 0)
+                       goto out;
+               if (bh->b_this_page != NULL)
+                       bh = bh->b_this_page;
+       }
+-      ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
++      *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
+                              page, fsdata);
+       page = NULL;
+@@ -951,7 +952,10 @@ out:
+ out2:
+       ext4_journal_stop(handle);
+-      return ret < 0 ? ret : 0;
++      if (err2)
++              *err = err2;
++
++      return replaced_count;
+ }
+ /**
+@@ -1367,15 +1371,17 @@ ext4_move_extents(struct file *o_filp, s
+               while (orig_page_offset <= seq_end_page) {
+                       /* Swap original branches with new branches */
+-                      ret1 = move_extent_per_page(o_filp, donor_inode,
++                      block_len_in_page = move_extent_per_page(
++                                              o_filp, donor_inode,
+                                               orig_page_offset,
+                                               data_offset_in_page,
+-                                              block_len_in_page, uninit);
+-                      if (ret1 < 0)
+-                              goto out;
+-                      orig_page_offset++;
++                                              block_len_in_page, uninit,
++                                              &ret1);
++
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
++                      if (ret1 < 0)
++                              goto out;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+@@ -1385,6 +1391,7 @@ ext4_move_extents(struct file *o_filp, s
+                               goto out;
+                       }
++                      orig_page_offset++;
+                       data_offset_in_page = 0;
+                       rest_blocks -= block_len_in_page;
+                       if (rest_blocks > blocks_per_page)
diff --git a/review-2.6.32/0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch b/review-2.6.32/0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch
new file mode 100644 (file)
index 0000000..6573b74
--- /dev/null
@@ -0,0 +1,314 @@
+From 28e1746b954e72a90c00fec39fa3e131e2a3d640 Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Mon, 23 Nov 2009 07:24:43 -0500
+Subject: [PATCH 04/30] ext4: fix lock order problem in ext4_move_extents()
+
+(cherry picked from commit fc04cb49a898c372a22b21fffc47f299d8710801)
+
+ext4_move_extents() checks the logical block contiguousness
+of original file with ext4_find_extent() and mext_next_extent().
+Therefore the extent which ext4_ext_path structure indicates
+must not be changed between above functions.
+
+But in current implementation, there is no i_data_sem protection
+between ext4_ext_find_extent() and mext_next_extent().  So the extent
+which ext4_ext_path structure indicates may be overwritten by
+delalloc.  As a result, ext4_move_extents() will exchange wrong blocks
+between original and donor files.  I change the place where
+acquire/release i_data_sem to solve this problem.
+
+Moreover, I changed move_extent_per_page() to start transaction first,
+and then acquire i_data_sem.  Without this change, there is a
+possibility of the deadlock between mmap() and ext4_move_extents():
+
+* NOTE: "A", "B" and "C" mean different processes
+
+A-1: ext4_ext_move_extents() acquires i_data_sem of two inodes.
+
+B:   do_page_fault() starts the transaction (T),
+     and then tries to acquire i_data_sem.
+     But process "A" is already holding it, so it is kept waiting.
+
+C:   While "A" and "B" running, kjournald2 tries to commit transaction (T)
+     but it is under updating, so kjournald2 waits for it.
+
+A-2: Call ext4_journal_start with holding i_data_sem,
+     but transaction (T) is locked.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  117 ++++++++++++++++++++++----------------------------
+ 1 file changed, 53 insertions(+), 64 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -77,12 +77,14 @@ static int
+ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+                     struct ext4_extent **extent)
+ {
++      struct ext4_extent_header *eh;
+       int ppos, leaf_ppos = path->p_depth;
+       ppos = leaf_ppos;
+       if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+               /* leaf block */
+               *extent = ++path[ppos].p_ext;
++              path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+               return 0;
+       }
+@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, st
+                                       ext_block_hdr(path[cur_ppos+1].p_bh);
+                       }
++                      path[leaf_ppos].p_ext = *extent = NULL;
++
++                      eh = path[leaf_ppos].p_hdr;
++                      if (le16_to_cpu(eh->eh_entries) == 0)
++                              /* empty leaf is found */
++                              return -ENODATA;
++
+                       /* leaf block */
+                       path[leaf_ppos].p_ext = *extent =
+                               EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
++                      path[leaf_ppos].p_block =
++                                      ext_pblock(path[leaf_ppos].p_ext);
+                       return 0;
+               }
+       }
+@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inod
+ }
+ /**
+- * mext_double_down_read - Acquire two inodes' read semaphore
+- *
+- * @orig_inode:               original inode structure
+- * @donor_inode:      donor inode structure
+- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+- */
+-static void
+-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      struct inode *first = orig_inode, *second = donor_inode;
+-
+-      /*
+-       * Use the inode number to provide the stable locking order instead
+-       * of its address, because the C language doesn't guarantee you can
+-       * compare pointers that don't come from the same array.
+-       */
+-      if (donor_inode->i_ino < orig_inode->i_ino) {
+-              first = donor_inode;
+-              second = orig_inode;
+-      }
+-
+-      down_read(&EXT4_I(first)->i_data_sem);
+-      down_read(&EXT4_I(second)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_down_write - Acquire two inodes' write semaphore
++ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure
+  * @donor_inode:      donor inode structure
+- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
++ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
++ * i_ino order.
+  */
+ static void
+-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
++double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       struct inode *first = orig_inode, *second = donor_inode;
+@@ -207,28 +193,14 @@ mext_double_down_write(struct inode *ori
+ }
+ /**
+- * mext_double_up_read - Release two inodes' read semaphore
++ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure to be released its lock first
+  * @donor_inode:      donor inode structure to be released its lock second
+- * Release read semaphore of two inodes (orig and donor).
++ * Release write lock of i_data_sem of two inodes (orig and donor).
+  */
+ static void
+-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      up_read(&EXT4_I(orig_inode)->i_data_sem);
+-      up_read(&EXT4_I(donor_inode)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_up_write - Release two inodes' write semaphore
+- *
+- * @orig_inode:               original inode structure to be released its lock first
+- * @donor_inode:      donor inode structure to be released its lock second
+- * Release write semaphore of two inodes (orig and donor).
+- */
+-static void
+-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
++double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       up_write(&EXT4_I(orig_inode)->i_data_sem);
+       up_write(&EXT4_I(donor_inode)->i_data_sem);
+@@ -688,8 +660,6 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
+-      mext_double_down_write(orig_inode, donor_inode);
+-
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -785,7 +755,6 @@ out:
+               kfree(donor_path);
+       }
+-      mext_double_up_write(orig_inode, donor_inode);
+       return replaced_count;
+ }
+@@ -851,6 +820,11 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
++              /*
++               * Protect extent trees against block allocations
++               * via delalloc
++               */
++              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+@@ -858,6 +832,7 @@ move_extent_per_page(struct file *o_filp
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+               ext4_ext_invalidate_cache(donor_inode);
++              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -905,6 +880,8 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -913,14 +890,18 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else
++              } else {
++                      double_up_write_data_sem(orig_inode, donor_inode);
+                       goto out;
++              }
+       }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+       ext4_ext_invalidate_cache(donor_inode);
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+@@ -1236,16 +1217,16 @@ ext4_move_extents(struct file *o_filp, s
+               return -EINVAL;
+       }
+-      /* protect orig and donor against a truncate */
++      /* Protect orig and donor inodes against a truncate */
+       ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+       if (ret1 < 0)
+               return ret1;
+-      mext_double_down_read(orig_inode, donor_inode);
++      /* Protect extent tree against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+                                       donor_start, &len, *moved_len);
+-      mext_double_up_read(orig_inode, donor_inode);
+       if (ret1)
+               goto out;
+@@ -1308,6 +1289,10 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
++      /* Discard preallocations of two inodes */
++      ext4_discard_preallocations(orig_inode);
++      ext4_discard_preallocations(donor_inode);
++
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1359,14 +1344,14 @@ ext4_move_extents(struct file *o_filp, s
+               seq_start = le32_to_cpu(ext_cur->ee_block);
+               rest_blocks = seq_blocks;
+-              /* Discard preallocations of two inodes */
+-              down_write(&EXT4_I(orig_inode)->i_data_sem);
+-              ext4_discard_preallocations(orig_inode);
+-              up_write(&EXT4_I(orig_inode)->i_data_sem);
+-
+-              down_write(&EXT4_I(donor_inode)->i_data_sem);
+-              ext4_discard_preallocations(donor_inode);
+-              up_write(&EXT4_I(donor_inode)->i_data_sem);
++              /*
++               * Up semaphore to avoid following problems:
++               * a. transaction deadlock among ext4_journal_start,
++               *    ->write_begin via pagefault, and jbd2_journal_commit
++               * b. racing with ->readpage, ->write_begin, and ext4_get_block
++               *    in move_extent_per_page
++               */
++              double_up_write_data_sem(orig_inode, donor_inode);
+               while (orig_page_offset <= seq_end_page) {
+@@ -1381,14 +1366,14 @@ ext4_move_extents(struct file *o_filp, s
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
+                       if (ret1 < 0)
+-                              goto out;
++                              break;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+                                       "sum of replaced: %llu requested: %llu",
+                                       *moved_len, len);
+                               ret1 = -EIO;
+-                              goto out;
++                              break;
+                       }
+                       orig_page_offset++;
+@@ -1400,6 +1385,10 @@ ext4_move_extents(struct file *o_filp, s
+                               block_len_in_page = rest_blocks;
+               }
++              double_down_write_data_sem(orig_inode, donor_inode);
++              if (ret1 < 0)
++                      break;
++
+               /* Decrease buffer counter */
+               if (holecheck_path)
+                       ext4_ext_drop_refs(holecheck_path);
+@@ -1429,7 +1418,7 @@ out:
+               ext4_ext_drop_refs(holecheck_path);
+               kfree(holecheck_path);
+       }
+-
++      double_up_write_data_sem(orig_inode, donor_inode);
+       ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
+       if (ret1)
diff --git a/review-2.6.32/0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch b/review-2.6.32/0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch
new file mode 100644 (file)
index 0000000..86da634
--- /dev/null
@@ -0,0 +1,36 @@
+From 2af090b9487c1259525876fe4f3a2605594ac413 Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Mon, 23 Nov 2009 07:24:41 -0500
+Subject: [PATCH 05/30] ext4: fix possible recursive locking warning in EXT4_IOC_MOVE_EXT
+
+(cherry picked from commit 49bd22bc4d603a2a4fc2a6a60e156cbea52eb494)
+
+If CONFIG_PROVE_LOCKING is enabled, the double_down_write_data_sem()
+will trigger a false-positive warning of a recursive lock.  Since we
+take i_data_sem for the two inodes ordered by their inode numbers,
+this isn't a problem.  Use of down_write_nested() will notify the lock
+dependency checker machinery that there is no problem here.
+
+This problem was reported by Brian Rogers:
+
+       http://marc.info/?l=linux-ext4&m=125115356928011&w=1
+
+Reported-by: Brian Rogers <brian@xyzw.org>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -189,7 +189,7 @@ double_down_write_data_sem(struct inode
+       }
+       down_write(&EXT4_I(first)->i_data_sem);
+-      down_write(&EXT4_I(second)->i_data_sem);
++      down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ }
+ /**
diff --git a/review-2.6.32/0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch b/review-2.6.32/0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch
new file mode 100644 (file)
index 0000000..ade9f8c
--- /dev/null
@@ -0,0 +1,86 @@
+From a712d4ea6545e04d90dd0ca788cd2380a93ce7dc Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 14 Nov 2009 08:19:05 -0500
+Subject: [PATCH 06/30] ext4: plug a buffer_head leak in an error path of ext4_iget()
+
+(cherry picked from commit 567f3e9a70d71e5c9be03701b8578be77857293b)
+
+One of the invalid error paths in ext4_iget() forgot to brelse() the
+inode buffer head.  Fix it by adding a brelse() in the common error
+return path, which also simplifies function.
+
+Thanks to Andi Kleen <ak@linux.intel.com> reporting the problem.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4781,7 +4781,6 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+-      struct buffer_head *bh;
+       struct inode *inode;
+       long ret;
+       int block;
+@@ -4793,11 +4792,11 @@ struct inode *ext4_iget(struct super_blo
+               return inode;
+       ei = EXT4_I(inode);
++      iloc.bh = 0;
+       ret = __ext4_get_inode_loc(inode, &iloc, 0);
+       if (ret < 0)
+               goto bad_inode;
+-      bh = iloc.bh;
+       raw_inode = ext4_raw_inode(&iloc);
+       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+@@ -4820,7 +4819,6 @@ struct inode *ext4_iget(struct super_blo
+               if (inode->i_mode == 0 ||
+                   !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+                       /* this inode is deleted */
+-                      brelse(bh);
+                       ret = -ESTALE;
+                       goto bad_inode;
+               }
+@@ -4852,7 +4850,6 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+-                      brelse(bh);
+                       ret = -EIO;
+                       goto bad_inode;
+               }
+@@ -4905,10 +4902,8 @@ struct inode *ext4_iget(struct super_blo
+               /* Validate block references which are part of inode */
+               ret = ext4_check_inode_blockref(inode);
+       }
+-      if (ret) {
+-              brelse(bh);
++      if (ret)
+               goto bad_inode;
+-      }
+       if (S_ISREG(inode->i_mode)) {
+               inode->i_op = &ext4_file_inode_operations;
+@@ -4936,7 +4931,6 @@ struct inode *ext4_iget(struct super_blo
+                       init_special_inode(inode, inode->i_mode,
+                          new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+       } else {
+-              brelse(bh);
+               ret = -EIO;
+               ext4_error(inode->i_sb, __func__,
+                          "bogus i_mode (%o) for inode=%lu",
+@@ -4949,6 +4943,7 @@ struct inode *ext4_iget(struct super_blo
+       return inode;
+ bad_inode:
++      brelse(iloc.bh);
+       iget_failed(inode);
+       return ERR_PTR(ret);
+ }
diff --git a/review-2.6.32/0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch b/review-2.6.32/0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch
new file mode 100644 (file)
index 0000000..1cd419f
--- /dev/null
@@ -0,0 +1,62 @@
+From 766191a641ba219a4152ce3cfbb24cc15426e025 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:17:34 -0500
+Subject: [PATCH 07/30] ext4: make sure directory and symlink blocks are revoked
+
+(cherry picked from commit 50689696867d95b38d9c7be640a311494a04fb86)
+
+When an inode gets unlinked, the functions ext4_clear_blocks() and
+ext4_remove_blocks() call ext4_forget() for all the buffer heads
+corresponding to the deleted inode's data blocks.  If the inode is a
+directory or a symlink, the is_metadata parameter must be non-zero so
+ext4_forget() will revoke them via jbd2_journal_revoke().  Otherwise,
+if these blocks are reused for a data file, and the system crashes
+before a journal checkpoint, the journal replay could end up
+corrupting these data blocks.
+
+Thanks to Curt Wohlgemuth for pointing out potential problems in this
+area.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    2 +-
+ fs/ext4/inode.c   |    6 ++++--
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2074,7 +2074,7 @@ static int ext4_remove_blocks(handle_t *
+               ext_debug("free last %u blocks starting %llu\n", num, start);
+               for (i = 0; i < num; i++) {
+                       bh = sb_find_get_block(inode->i_sb, start + i);
+-                      ext4_forget(handle, 0, inode, bh, start + i);
++                      ext4_forget(handle, metadata, inode, bh, start + i);
+               }
+               ext4_free_blocks(handle, inode, start, num, metadata);
+       } else if (from == le32_to_cpu(ex->ee_block)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4120,6 +4120,8 @@ static void ext4_clear_blocks(handle_t *
+                             __le32 *last)
+ {
+       __le32 *p;
++      int     is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
++
+       if (try_to_extend_transaction(handle, inode)) {
+               if (bh) {
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+@@ -4150,11 +4152,11 @@ static void ext4_clear_blocks(handle_t *
+                       *p = 0;
+                       tbh = sb_find_get_block(inode->i_sb, nr);
+-                      ext4_forget(handle, 0, inode, tbh, nr);
++                      ext4_forget(handle, is_metadata, inode, tbh, nr);
+               }
+       }
+-      ext4_free_blocks(handle, inode, block_to_free, count, 0);
++      ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+ }
+ /**
diff --git a/review-2.6.32/0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch b/review-2.6.32/0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch
new file mode 100644 (file)
index 0000000..6826dab
--- /dev/null
@@ -0,0 +1,29 @@
+From 7606c163cb096dfda150e6bd7323d60bbda9ea0d Mon Sep 17 00:00:00 2001
+From: Julia Lawall <julia@diku.dk>
+Date: Sun, 15 Nov 2009 15:30:58 -0500
+Subject: [PATCH 08/30] ext4: fix i_flags access in ext4_da_writepages_trans_blocks()
+
+(cherry picked from commit 30c6e07a92ea4cb87160d32ffa9bce172576ae4c)
+
+We need to be testing the i_flags field in the ext4 specific portion
+of the inode, instead of the (confusingly aliased) i_flags field in
+the generic struct inode.
+
+Signed-off-by: Julia Lawall <julia@diku.dk>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2788,7 +2788,7 @@ static int ext4_da_writepages_trans_bloc
+        * number of contiguous block. So we will limit
+        * number of contiguous block to a sane value
+        */
+-      if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
++      if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+           (max_blocks > EXT4_MAX_TRANS_DATA))
+               max_blocks = EXT4_MAX_TRANS_DATA;
diff --git a/review-2.6.32/0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch b/review-2.6.32/0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch
new file mode 100644 (file)
index 0000000..15b8e07
--- /dev/null
@@ -0,0 +1,43 @@
+From 22d31ce67bb060aba86bf20ba6f69cb9e39a1fa2 Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Sun, 15 Nov 2009 15:30:52 -0500
+Subject: [PATCH 09/30] ext4: journal all modifications in ext4_xattr_set_handle
+
+(cherry picked from commit 86ebfd08a1930ccedb8eac0aeb1ed4b8b6a41dbc)
+
+ext4_xattr_set_handle() was zeroing out an inode outside
+of journaling constraints; this is one of the accesses that
+was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Reviewed-by: Andreas Dilger <adilger@sun.com>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/xattr.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle,
+       if (error)
+               goto cleanup;
++      error = ext4_journal_get_write_access(handle, is.iloc.bh);
++      if (error)
++              goto cleanup;
++
+       if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+               struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+               memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle,
+               if (flags & XATTR_CREATE)
+                       goto cleanup;
+       }
+-      error = ext4_journal_get_write_access(handle, is.iloc.bh);
+-      if (error)
+-              goto cleanup;
+       if (!value) {
+               if (!is.s.not_found)
+                       error = ext4_xattr_ibody_set(handle, inode, &i, &is);
diff --git a/review-2.6.32/0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch b/review-2.6.32/0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch
new file mode 100644 (file)
index 0000000..61a0e75
--- /dev/null
@@ -0,0 +1,35 @@
+From cd0201f1e2b91358cf15c234482645381ad183b7 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:24:52 -0500
+Subject: [PATCH 10/30] ext4: don't update the superblock in ext4_statfs()
+
+(cherry picked from commit 3f8fb9490efbd300887470a2a880a64e04dcc3f5)
+
+commit a71ce8c6c9bf269b192f352ea555217815cf027e updated ext4_statfs()
+to update the on-disk superblock counters, but modified this buffer
+directly without any journaling of the change.  This is one of the
+accesses that was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3668,13 +3668,11 @@ static int ext4_statfs(struct dentry *de
+       buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+       buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+                      percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+-      ext4_free_blocks_count_set(es, buf->f_bfree);
+       buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+       if (buf->f_bfree < ext4_r_blocks_count(es))
+               buf->f_bavail = 0;
+       buf->f_files = le32_to_cpu(es->s_inodes_count);
+       buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
+-      es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
+       buf->f_namelen = EXT4_NAME_LEN;
+       fsid = le64_to_cpup((void *)es->s_uuid) ^
+              le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/review-2.6.32/0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch b/review-2.6.32/0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch
new file mode 100644 (file)
index 0000000..ffb2aeb
--- /dev/null
@@ -0,0 +1,33 @@
+From de7685f1b6efddd2fcf54f9fa94fee73b1a98f67 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:24:38 -0500
+Subject: [PATCH 11/30] ext4: fix uninit block bitmap initialization when s_meta_first_bg is non-zero
+
+(cherry picked from commit 8dadb198cb70ef811916668fe67eeec82e8858dd)
+
+The number of old-style block group descriptor blocks is
+s_meta_first_bg when the meta_bg feature flag is set.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/balloc.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_met
+ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+-      return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
++      if (!ext4_bg_has_super(sb, group))
++              return 0;
++
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
++              return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
++      else
++              return EXT4_SB(sb)->s_gdb_count;
+ }
+ /**
diff --git a/review-2.6.32/0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch b/review-2.6.32/0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch
new file mode 100644 (file)
index 0000000..302343a
--- /dev/null
@@ -0,0 +1,43 @@
+From c2388358cfbf01a0f2b57fe0549791004fb1c72c Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sun, 15 Nov 2009 15:29:56 -0500
+Subject: [PATCH 12/30] ext4: fix block validity checks so they work correctly with meta_bg
+
+(cherry picked from commit 1032988c71f3f85483b2b4319684d1205a704c02)
+
+The block validity checks used by ext4_data_block_valid() wasn't
+correctly written to check file systems with the meta_bg feature.  Fix
+this.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/block_validity.c |    2 +-
+ fs/ext4/inode.c          |    5 +----
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_
+               if (ext4_bg_has_super(sb, i) &&
+                   ((i < 5) || ((i % flex_size) == 0)))
+                       add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+-                                      sbi->s_gdb_count + 1);
++                                      ext4_bg_num_gdb(sb, i) + 1);
+               gdp = ext4_get_group_desc(sb, i, NULL);
+               ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+               if (ret)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4883,10 +4883,7 @@ struct inode *ext4_iget(struct super_blo
+       ret = 0;
+       if (ei->i_file_acl &&
+-          ((ei->i_file_acl <
+-            (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+-             EXT4_SB(sb)->s_gdb_count)) ||
+-           (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++          !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+               ext4_error(sb, __func__,
+                          "bad extended attribute block %llu in inode #%lu",
+                          ei->i_file_acl, inode->i_ino);
diff --git a/review-2.6.32/0013-ext4-avoid-issuing-unnecessary-barriers.patch b/review-2.6.32/0013-ext4-avoid-issuing-unnecessary-barriers.patch
new file mode 100644 (file)
index 0000000..646d50e
--- /dev/null
@@ -0,0 +1,41 @@
+From c5e9fda54109d93439f7ee3224e0421edbed6c4b Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Mon, 23 Nov 2009 07:24:57 -0500
+Subject: [PATCH 13/30] ext4: avoid issuing unnecessary barriers
+
+(cherry picked from commit 6b17d902fdd241adfa4ce780df20547b28bf5801)
+
+We don't to issue an I/O barrier on an error or if we force commit
+because we are doing data journaling.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/fsync.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -60,7 +60,7 @@ int ext4_sync_file(struct file *file, st
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+-              goto out;
++              return ret;
+       /*
+        * data=writeback:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+@@ -79,10 +79,8 @@ int ext4_sync_file(struct file *file, st
+        *  (they were dirtied by commit).  But that's OK - the blocks are
+        *  safe in-journal, which is all fsync() needs to ensure.
+        */
+-      if (ext4_should_journal_data(inode)) {
+-              ret = ext4_force_commit(inode->i_sb);
+-              goto out;
+-      }
++      if (ext4_should_journal_data(inode))
++              return ext4_force_commit(inode->i_sb);
+       if (!journal)
+               ret = sync_mapping_buffers(inode->i_mapping);
diff --git a/review-2.6.32/0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch b/review-2.6.32/0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch
new file mode 100644 (file)
index 0000000..666ea01
--- /dev/null
@@ -0,0 +1,29 @@
+From 42c6dded3faff003f12dde41715865327590cd6c Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 23 Nov 2009 07:24:48 -0500
+Subject: [PATCH 14/30] ext4: fix error handling in ext4_ind_get_blocks()
+
+(cherry picked from commit 2bba702d4f88d7b010ec37e2527b552588404ae7)
+
+When an error happened in ext4_splice_branch we failed to notice that
+in ext4_ind_get_blocks and mapped the buffer anyway. Fix the problem
+by checking for error properly.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1021,7 +1021,7 @@ static int ext4_ind_get_blocks(handle_t
+       if (!err)
+               err = ext4_splice_branch(handle, inode, iblock,
+                                        partial, indirect_blks, count);
+-      else
++      if (err)
+               goto cleanup;
+       set_buffer_new(bh_result);
diff --git a/review-2.6.32/0015-ext4-make-trim-discard-optional-and-off-by-default.patch b/review-2.6.32/0015-ext4-make-trim-discard-optional-and-off-by-default.patch
new file mode 100644 (file)
index 0000000..7549d4b
--- /dev/null
@@ -0,0 +1,128 @@
+From a48fc8df1871ad50bef2476e76ad5038f5a927ce Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Thu, 19 Nov 2009 14:25:42 -0500
+Subject: [PATCH 15/30] ext4: make trim/discard optional (and off by default)
+
+(cherry picked from commit 5328e635315734d42080de9a5a1ee87bf4cae0a4)
+
+It is anticipated that when sb_issue_discard starts doing
+real work on trim-capable devices, we may see issues.  Make
+this mount-time optional, and default it to off until we know
+that things are working out OK.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    6 ++++++
+ fs/ext4/ext4.h                     |    1 +
+ fs/ext4/mballoc.c                  |   21 +++++++++++++--------
+ fs/ext4/super.c                    |   14 +++++++++++++-
+ 4 files changed, 33 insertions(+), 9 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -353,6 +353,12 @@ noauto_da_alloc           replacing existing file
+                       system crashes before the delayed allocation
+                       blocks are forced to disk.
++discard               Controls whether ext4 should issue discard/TRIM
++nodiscard(*)          commands to the underlying block device when
++                      blocks are freed.  This is useful for SSD devices
++                      and sparse/thinly-provisioned LUNs, but it is off
++                      by default until sufficient testing has been done.
++
+ Data Mode
+ =========
+ There are 3 different data modes:
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -750,6 +750,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_DELALLOC           0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT     0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY     0x20000000 /* Block validity checking */
++#define EXT4_MOUNT_DISCARD            0x40000000 /* Issue DISCARD requests */
+ #define clear_opt(o, opt)             o &= ~EXT4_MOUNT_##opt
+ #define set_opt(o, opt)                       o |= EXT4_MOUNT_##opt
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(jou
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+       struct ext4_free_data *entry;
+-      ext4_fsblk_t discard_block;
+       struct list_head *l, *ltmp;
+       list_for_each_safe(l, ltmp, &txn->t_private_list) {
+@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(jou
+                       page_cache_release(e4b.bd_bitmap_page);
+               }
+               ext4_unlock_group(sb, entry->group);
+-              discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+-                      + entry->start_blk
+-                      + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+-              trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+-                                        entry->count);
+-              sb_issue_discard(sb, discard_block, entry->count);
+-
++              if (test_opt(sb, DISCARD)) {
++                      ext4_fsblk_t discard_block;
++                      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++
++                      discard_block = (ext4_fsblk_t)entry->group *
++                                              EXT4_BLOCKS_PER_GROUP(sb)
++                                      + entry->start_blk
++                                      + le32_to_cpu(es->s_first_data_block);
++                      trace_ext4_discard_blocks(sb,
++                                      (unsigned long long)discard_block,
++                                      entry->count);
++                      sb_issue_discard(sb, discard_block, entry->count);
++              }
+               kmem_cache_free(ext4_free_ext_cachep, entry);
+               ext4_mb_release_desc(&e4b);
+       }
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -899,6 +899,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, NO_AUTO_DA_ALLOC))
+               seq_puts(seq, ",noauto_da_alloc");
++      if (test_opt(sb, DISCARD))
++              seq_puts(seq, ",discard");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1079,7 +1082,8 @@ enum {
+       Opt_usrquota, Opt_grpquota, Opt_i_version,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_block_validity, Opt_noblock_validity,
+-      Opt_inode_readahead_blks, Opt_journal_ioprio
++      Opt_inode_readahead_blks, Opt_journal_ioprio,
++      Opt_discard, Opt_nodiscard,
+ };
+ static const match_table_t tokens = {
+@@ -1144,6 +1148,8 @@ static const match_table_t tokens = {
+       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
++      {Opt_discard, "discard"},
++      {Opt_nodiscard, "nodiscard"},
+       {Opt_err, NULL},
+ };
+@@ -1565,6 +1571,12 @@ set_qf_format:
+                       else
+                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+                       break;
++              case Opt_discard:
++                      set_opt(sbi->s_mount_opt, DISCARD);
++                      break;
++              case Opt_nodiscard:
++                      clear_opt(sbi->s_mount_opt, DISCARD);
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
diff --git a/review-2.6.32/0016-ext4-make-norecovery-an-alias-for-noload.patch b/review-2.6.32/0016-ext4-make-norecovery-an-alias-for-noload.patch
new file mode 100644 (file)
index 0000000..2fa498b
--- /dev/null
@@ -0,0 +1,57 @@
+From 68ab480b59f296d164526cc51e2876687340c803 Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Thu, 19 Nov 2009 14:28:50 -0500
+Subject: [PATCH 16/30] ext4: make "norecovery" an alias for "noload"
+
+(cherry picked from commit e3bb52ae2bb9573e84c17b8e3560378d13a5c798)
+
+Users on the linux-ext4 list recently complained about differences
+across filesystems w.r.t. how to mount without a journal replay.
+
+In the discussion it was noted that xfs's "norecovery" option is
+perhaps more descriptively accurate than "noload," so let's make
+that an alias for ext4.
+
+Also show this status in /proc/mounts
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    4 ++--
+ fs/ext4/super.c                    |    4 ++++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -153,8 +153,8 @@ journal_dev=devnum When the external jou
+                       identified through its new major/minor numbers encoded
+                       in devnum.
+-noload                        Don't load the journal on mounting.  Note that
+-                      if the filesystem was not unmounted cleanly,
++norecovery            Don't load the journal on mounting.  Note that
++noload                        if the filesystem was not unmounted cleanly,
+                       skipping the journal replay will lead to the
+                       filesystem containing inconsistencies that can
+                       lead to any number of problems.
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -902,6 +902,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, DISCARD))
+               seq_puts(seq, ",discard");
++      if (test_opt(sb, NOLOAD))
++              seq_puts(seq, ",norecovery");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1108,6 +1111,7 @@ static const match_table_t tokens = {
+       {Opt_acl, "acl"},
+       {Opt_noacl, "noacl"},
+       {Opt_noload, "noload"},
++      {Opt_noload, "norecovery"},
+       {Opt_nobh, "nobh"},
+       {Opt_bh, "bh"},
+       {Opt_commit, "commit=%u"},
diff --git a/review-2.6.32/0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch b/review-2.6.32/0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch
new file mode 100644 (file)
index 0000000..fb0b692
--- /dev/null
@@ -0,0 +1,79 @@
+From 164659d7861e1243dc300c27c63e5918762912eb Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Tue, 24 Nov 2009 10:19:57 -0500
+Subject: [PATCH 17/30] ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
+
+(cherry picked from commit 94d7c16cbbbd0e03841fcf272bcaf0620ad39618)
+
+At the beginning of ext4_move_extent(), we call
+ext4_discard_preallocations() to discard inode PAs of orig and donor
+inodes.  But in the following case, blocks can be double freed, so
+move ext4_discard_preallocations() to the end of ext4_move_extents().
+
+1. Discard inode PAs of orig and donor inodes with
+   ext4_discard_preallocations() in ext4_move_extents().
+
+   orig : [ DATA1 ]
+   donor: [ DATA2 ]
+
+2. While data blocks are exchanging between orig and donor inodes, new
+   inode PAs is created to orig by other process's block allocation.
+   (Since there are semaphore gaps in ext4_move_extents().)  And new
+   inode PAs is used partially (2-1).
+
+   2-1 Create new inode PAs to orig inode
+   orig : [ DATA1 | used PA1 | free PA1 ]
+   donor: [ DATA2 ]
+
+3. Donor inode which has old orig inode's blocks is deleted after
+   EXT4_IOC_MOVE_EXT finished (3-1, 3-2).  So the block bitmap
+   corresponds to old orig inode's blocks are freed.
+
+   3-1 After EXT4_IOC_MOVE_EXT finished
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ DATA1 |  used PA1 ]
+
+   3-2 Delete donor inode
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ FREE SPACE(DATA1) | FREE SPACE(used PA1) ]
+
+4. The double-free of blocks is occurred, when close() is called to
+   orig inode.  Because ext4_discard_preallocations() for orig inode
+   frees used PA1 and free PA1, though used PA1 is already freed in 3.
+
+   4-1 Double-free of blocks is occurred
+   orig : [ DATA2 |  FREE SPACE(free PA1) ]
+   donor: [ FREE SPACE(DATA1) | DOUBLE FREE(used PA1) ]
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -1289,10 +1289,6 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
+-      /* Discard preallocations of two inodes */
+-      ext4_discard_preallocations(orig_inode);
+-      ext4_discard_preallocations(donor_inode);
+-
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1410,6 +1406,11 @@ ext4_move_extents(struct file *o_filp, s
+       }
+ out:
++      if (*moved_len) {
++              ext4_discard_preallocations(orig_inode);
++              ext4_discard_preallocations(donor_inode);
++      }
++
+       if (orig_path) {
+               ext4_ext_drop_refs(orig_path);
+               kfree(orig_path);
diff --git a/review-2.6.32/0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch b/review-2.6.32/0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch
new file mode 100644 (file)
index 0000000..3044e4c
--- /dev/null
@@ -0,0 +1,76 @@
+From 4ba9bccde799430e634b6cda8614b13ccb24ef1a Mon Sep 17 00:00:00 2001
+From: Kazuya Mio <k-mio@sx.jp.nec.com>
+Date: Tue, 24 Nov 2009 10:28:48 -0500
+Subject: [PATCH 18/30] ext4: initialize moved_len before calling ext4_move_extents()
+
+(cherry picked from commit 446aaa6e7e993b38a6f21c6acfa68f3f1af3dbe3)
+
+The move_extent.moved_len is used to pass back the number of exchanged
+blocks count to user space.  Currently the caller must clear this
+field; but we spend more code space checking for this requirement than
+simply zeroing the field ourselves, so let's just make life easier for
+everyone all around.
+
+Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |    1 +
+ fs/ext4/move_extent.c |   14 +++-----------
+ 2 files changed, 4 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -239,6 +239,7 @@ setversion_out:
+                       }
+               }
++              me.moved_len = 0;
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+               fput(donor_filp);
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -947,7 +947,6 @@ out2:
+  * @orig_start:               logical start offset in block for orig
+  * @donor_start:      logical start offset in block for donor
+  * @len:              the number of blocks to be moved
+- * @moved_len:                moved block length
+  *
+  * Check the arguments of ext4_move_extents() whether the files can be
+  * exchanged with each other.
+@@ -955,8 +954,8 @@ out2:
+  */
+ static int
+ mext_check_arguments(struct inode *orig_inode,
+-                        struct inode *donor_inode, __u64 orig_start,
+-                        __u64 donor_start, __u64 *len, __u64 moved_len)
++                   struct inode *donor_inode, __u64 orig_start,
++                   __u64 donor_start, __u64 *len)
+ {
+       ext4_lblk_t orig_blocks, donor_blocks;
+       unsigned int blkbits = orig_inode->i_blkbits;
+@@ -1010,13 +1009,6 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
+-      if (moved_len) {
+-              ext4_debug("ext4 move extent: moved_len should be 0 "
+-                      "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+-                      donor_inode->i_ino);
+-              return -EINVAL;
+-      }
+-
+       if ((orig_start > EXT_MAX_BLOCK) ||
+           (donor_start > EXT_MAX_BLOCK) ||
+           (*len > EXT_MAX_BLOCK) ||
+@@ -1226,7 +1218,7 @@ ext4_move_extents(struct file *o_filp, s
+       double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+-                                      donor_start, &len, *moved_len);
++                                  donor_start, &len);
+       if (ret1)
+               goto out;
diff --git a/review-2.6.32/0019-ext4-move_extent_per_page-cleanup.patch b/review-2.6.32/0019-ext4-move_extent_per_page-cleanup.patch
new file mode 100644 (file)
index 0000000..89b9e5c
--- /dev/null
@@ -0,0 +1,91 @@
+From 3aa949f17b315027f4ffb033bfe332cb35af5c3a Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Tue, 24 Nov 2009 10:31:56 -0500
+Subject: [PATCH 19/30] ext4: move_extent_per_page() cleanup
+
+(cherry picked from commit ac48b0a1d068887141581bea8285de5fcab182b0)
+
+Integrate duplicate lines (acquire/release semaphore and invalidate
+extent cache in move_extent_per_page()) into mext_replace_branches(),
+to reduce source and object code size.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |   30 +++++++++---------------------
+ 1 file changed, 9 insertions(+), 21 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -660,6 +660,9 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
++
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -755,6 +758,11 @@ out:
+               kfree(donor_path);
+       }
++      ext4_ext_invalidate_cache(orig_inode);
++      ext4_ext_invalidate_cache(donor_inode);
++
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       return replaced_count;
+ }
+@@ -820,19 +828,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              /*
+-               * Protect extent trees against block allocations
+-               * via delalloc
+-               */
+-              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+-
+-              /* Clear the inode cache not to refer to the old data */
+-              ext4_ext_invalidate_cache(orig_inode);
+-              ext4_ext_invalidate_cache(donor_inode);
+-              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -880,8 +878,6 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      /* Protect extent trees against block allocations via delalloc */
+-      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -890,18 +886,10 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else {
+-                      double_up_write_data_sem(orig_inode, donor_inode);
++              } else
+                       goto out;
+-              }
+       }
+-      /* Clear the inode cache not to refer to the old data */
+-      ext4_ext_invalidate_cache(orig_inode);
+-      ext4_ext_invalidate_cache(donor_inode);
+-
+-      double_up_write_data_sem(orig_inode, donor_inode);
+-
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
diff --git a/review-2.6.32/0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch b/review-2.6.32/0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch
new file mode 100644 (file)
index 0000000..f3b5f4f
--- /dev/null
@@ -0,0 +1,42 @@
+From 9dd2722f839b69ede127875f490716cb137daa35 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Tue, 1 Dec 2009 09:04:42 -0500
+Subject: [PATCH 20/30] jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
+
+(cherry picked from commit e6ec116b67f46e0e7808276476554727b2e6240b)
+
+OOM happens.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/jbd2/commit.c  |    4 ++++
+ fs/jbd2/journal.c |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(jou
+               JBUFFER_TRACE(jh, "ph3: write metadata");
+               flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+                                                     jh, &new_jh, blocknr);
++              if (flags < 0) {
++                      jbd2_journal_abort(journal, flags);
++                      continue;
++              }
+               set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+               wbuf[bufs++] = jh2bh(new_jh);
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -358,6 +358,10 @@ repeat:
+               jbd_unlock_bh_state(bh_in);
+               tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
++              if (!tmp) {
++                      jbd2_journal_put_journal_head(new_jh);
++                      return -ENOMEM;
++              }
+               jbd_lock_bh_state(bh_in);
+               if (jh_in->b_frozen_data) {
+                       jbd2_free(tmp, bh_in->b_size);
diff --git a/review-2.6.32/0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch b/review-2.6.32/0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch
new file mode 100644 (file)
index 0000000..f96e6cf
--- /dev/null
@@ -0,0 +1,25 @@
+From 67b269b65fa8620f2a5ba402f4bd49b286e00eb4 Mon Sep 17 00:00:00 2001
+From: Roel Kluin <roel.kluin@gmail.com>
+Date: Mon, 7 Dec 2009 10:38:16 -0500
+Subject: [PATCH 21/30] ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
+
+(cherry picked from commit c09eef305dd43846360944ad072f051f964fa383)
+
+Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/resize.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct
+                       goto exit_bh;
+               if (IS_ERR(gdb = bclean(handle, sb, block))) {
+-                      err = PTR_ERR(bh);
++                      err = PTR_ERR(gdb);
+                       goto exit_bh;
+               }
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
diff --git a/review-2.6.32/0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch b/review-2.6.32/0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch
new file mode 100644 (file)
index 0000000..740ad9c
--- /dev/null
@@ -0,0 +1,88 @@
+From 968c53cec156cb9a3239969a3686c33a0e5cc9ab Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 8 Dec 2009 21:24:33 -0500
+Subject: [PATCH 22/30] ext4: Avoid data / filesystem corruption when write fails to copy data
+
+(cherry picked from commit b9a4207d5e911b938f73079a83cc2ae10524ec7f)
+
+When ext4_write_begin fails after allocating some blocks or
+generic_perform_write fails to copy data to write, we truncate blocks
+already instantiated beyond i_size.  Although these blocks were never
+inside i_size, we have to truncate the pagecache of these blocks so
+that corresponding buffers get unmapped.  Otherwise subsequent
+__block_prepare_write (called because we are retrying the write) will
+find the buffers mapped, not call ->get_block, and thus the page will
+be backed by already freed blocks leading to filesystem and data
+corruption.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1534,6 +1534,16 @@ static int do_journal_get_write_access(h
+       return ext4_journal_get_write_access(handle, bh);
+ }
++/*
++ * Truncate blocks that were not used by write. We have to truncate the
++ * pagecache as well so that corresponding buffers get properly unmapped.
++ */
++static void ext4_truncate_failed_write(struct inode *inode)
++{
++      truncate_inode_pages(inode->i_mapping, inode->i_size);
++      ext4_truncate(inode);
++}
++
+ static int ext4_write_begin(struct file *file, struct address_space *mapping,
+                           loff_t pos, unsigned len, unsigned flags,
+                           struct page **pagep, void **fsdata)
+@@ -1599,7 +1609,7 @@ retry:
+               ext4_journal_stop(handle);
+               if (pos + len > inode->i_size) {
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+                       /*
+                        * If truncate failed early the inode might
+                        * still be on the orphan list; we need to
+@@ -1709,7 +1719,7 @@ static int ext4_ordered_write_end(struct
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1751,7 +1761,7 @@ static int ext4_writeback_write_end(stru
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1814,7 +1824,7 @@ static int ext4_journalled_write_end(str
+       if (!ret)
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -3091,7 +3101,7 @@ retry:
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+       }
+       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
diff --git a/review-2.6.32/0023-ext4-wait-for-log-to-commit-when-umounting.patch b/review-2.6.32/0023-ext4-wait-for-log-to-commit-when-umounting.patch
new file mode 100644 (file)
index 0000000..d1a9f6a
--- /dev/null
@@ -0,0 +1,50 @@
+From 2a51ce267e55720f32e089a0d5349d558263ea9a Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@redhat.com>
+Date: Tue, 8 Dec 2009 21:48:58 -0500
+Subject: [PATCH 23/30] ext4: wait for log to commit when umounting
+
+(cherry picked from commit d4edac314e9ad0b21ba20ba8bc61b61f186f79e1)
+
+There is a potential race when a transaction is committing right when
+the file system is being umounting.  This could reduce in a race
+because EXT4_SB(sb)->s_group_info could be freed in ext4_put_super
+before the commit code calls a callback so the mballoc code can
+release freed blocks in the transaction, resulting in a panic trying
+to access the freed s_group_info.
+
+The fix is to wait for the transaction to finish committing before we
+shutdown the multiblock allocator.
+
+Signed-off-by: Josef Bacik <josef@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_
+       if (sb->s_dirt)
+               ext4_commit_super(sb, 1);
+-      ext4_release_system_zone(sb);
+-      ext4_mb_release(sb);
+-      ext4_ext_release(sb);
+-      ext4_xattr_put_super(sb);
+       if (sbi->s_journal) {
+               err = jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_
+                       ext4_abort(sb, __func__,
+                                  "Couldn't clean up the journal");
+       }
++
++      ext4_release_system_zone(sb);
++      ext4_mb_release(sb);
++      ext4_ext_release(sb);
++      ext4_xattr_put_super(sb);
++
+       if (!(sb->s_flags & MS_RDONLY)) {
+               EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+               es->s_state = cpu_to_le16(sbi->s_mount_state);
diff --git a/review-2.6.32/0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch b/review-2.6.32/0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch
new file mode 100644 (file)
index 0000000..8804b02
--- /dev/null
@@ -0,0 +1,53 @@
+From f76c3ee1ee414a64d953f59a3f23a23355c6a18c Mon Sep 17 00:00:00 2001
+From: Curt Wohlgemuth <curtw@google.com>
+Date: Tue, 8 Dec 2009 22:18:25 -0500
+Subject: [PATCH 24/30] ext4: remove blocks from inode prealloc list on failure
+
+(cherry picked from commit b844167edc7fcafda9623955c05e4c1b3c32ebc7)
+
+This fixes a leak of blocks in an inode prealloc list if device failures
+cause ext4_mb_mark_diskspace_used() to fail.
+
+Signed-off-by: Curt Wohlgemuth <curtw@google.com>
+Acked-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3011,6 +3011,24 @@ static void ext4_mb_collect_stats(struct
+ }
+ /*
++ * Called on failure; free up any blocks from the inode PA for this
++ * context.  We don't need this for MB_GROUP_PA because we only change
++ * pa_free in ext4_mb_release_context(), but on failure, we've already
++ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
++ */
++static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
++{
++      struct ext4_prealloc_space *pa = ac->ac_pa;
++      int len;
++
++      if (pa && pa->pa_type == MB_INODE_PA) {
++              len = ac->ac_b_ex.fe_len;
++              pa->pa_free += len;
++      }
++
++}
++
++/*
+  * use blocks preallocated to inode
+  */
+ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+@@ -4295,6 +4313,7 @@ repeat:
+                       ac->ac_status = AC_STATUS_CONTINUE;
+                       goto repeat;
+               } else if (*errp) {
++                      ext4_discard_allocated_blocks(ac);
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
diff --git a/review-2.6.32/0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch b/review-2.6.32/0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch
new file mode 100644 (file)
index 0000000..4bab596
--- /dev/null
@@ -0,0 +1,27 @@
+From 89f0a58e7cc53a31325b7409935c8b499ee9a5c9 Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Tue, 8 Dec 2009 22:41:52 -0500
+Subject: [PATCH 25/30] ext4: ext4_get_reserved_space() must return bytes instead of blocks
+
+(cherry picked from commit 8aa6790f876e81f5a2211fe1711a5fe3fe2d7b20)
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1052,7 +1052,7 @@ qsize_t ext4_get_reserved_space(struct i
+               EXT4_I(inode)->i_reserved_meta_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+-      return total;
++      return (total << inode->i_blkbits);
+ }
+ /*
+  * Calculate the number of metadata blocks need to reserve
diff --git a/review-2.6.32/0026-ext4-quota-macros-cleanup.patch b/review-2.6.32/0026-ext4-quota-macros-cleanup.patch
new file mode 100644 (file)
index 0000000..198b781
--- /dev/null
@@ -0,0 +1,142 @@
+From 02fa03eb498b59853433cfa40cd152f0f5419729 Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Tue, 8 Dec 2009 22:42:15 -0500
+Subject: [PATCH 26/30] ext4: quota macros cleanup
+
+(cherry picked from commit 5aca07eb7d8f14d90c740834d15ca15277f4820c)
+
+Currently all quota block reservation macros contains hard-coded "2"
+aka MAXQUOTAS value. This is no good because in some places it is not
+obvious to understand what does this digit represent. Let's introduce
+new macro with self descriptive name.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4_jbd2.h |    8 ++++++--
+ fs/ext4/extents.c   |    2 +-
+ fs/ext4/inode.c     |    2 +-
+ fs/ext4/migrate.c   |    4 ++--
+ fs/ext4/namei.c     |    8 ++++----
+ 5 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -49,7 +49,7 @@
+ #define EXT4_DATA_TRANS_BLOCKS(sb)    (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+                                        EXT4_XATTR_TRANS_BLOCKS - 2 + \
+-                                       2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                       EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /*
+  * Define the number of metadata blocks we need to account to modify data.
+@@ -57,7 +57,7 @@
+  * This include super block, inode block, quota blocks and xattr blocks
+  */
+ #define EXT4_META_TRANS_BLOCKS(sb)    (EXT4_XATTR_TRANS_BLOCKS + \
+-                                      2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                      EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /* Delete operations potentially hit one directory's namespace plus an
+  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+@@ -92,6 +92,7 @@
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+ #else
+@@ -99,6 +100,9 @@
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+ #endif
++#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2167,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
+                       correct_index = 1;
+                       credits += (ext_depth(inode)) + 1;
+               }
+-              credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++              credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+               err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+               if (err)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5231,7 +5231,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+-              handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
++              handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(hand
+        * So allocate a credit of 3. We may update
+        * quota (user and group).
+        */
+-      needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++      needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+       if (ext4_journal_extend(handle, needed) != 0)
+               retval = ext4_journal_restart(handle, needed);
+@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode
+       handle = ext4_journal_start(inode,
+                                       EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+                                       + 1);
+       if (IS_ERR(handle)) {
+               retval = PTR_ERR(handle);
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1769,7 +1769,7 @@ static int ext4_create(struct inode *dir
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1803,7 +1803,7 @@ static int ext4_mknod(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1840,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -2253,7 +2253,7 @@ static int ext4_symlink(struct inode *di
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
diff --git a/review-2.6.32/0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch b/review-2.6.32/0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch
new file mode 100644 (file)
index 0000000..07e6a61
--- /dev/null
@@ -0,0 +1,31 @@
+From 57a4922aba632b8e4aff05f43ffb0b0d615b79ef Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Tue, 8 Dec 2009 22:42:28 -0500
+Subject: [PATCH 27/30] ext4: fix incorrect block reservation on quota transfer.
+
+(cherry picked from commit 194074acacebc169ded90a4657193f5180015051)
+
+Inside ->setattr() call both ATTR_UID and ATTR_GID may be valid
+This means that we may end-up with transferring all quotas. Add
+we have to reserve QUOTA_DEL_BLOCKS for all quotas, as we do in
+case of QUOTA_INIT_BLOCKS.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5232,7 +5232,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+-                                      EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
++                                      EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+                       goto err_out;
diff --git a/review-2.6.32/0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch b/review-2.6.32/0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
new file mode 100644 (file)
index 0000000..2569bb7
--- /dev/null
@@ -0,0 +1,256 @@
+From ebe71d4fcb5cad29134efb77a36b11a546616104 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 8 Dec 2009 23:51:10 -0500
+Subject: [PATCH 28/30] ext4: Wait for proper transaction commit on fsync
+
+(cherry picked from commit b436b9bef84de6893e86346d8fbf7104bc520645)
+
+We cannot rely on buffer dirty bits during fsync because pdflush can come
+before fsync is called and clear dirty bits without forcing a transaction
+commit. What we do is that we track which transaction has last changed
+the inode and which transaction last changed allocation and force it to
+disk on fsync.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h      |    7 +++++++
+ fs/ext4/ext4_jbd2.h |   13 +++++++++++++
+ fs/ext4/extents.c   |   14 ++++++++++++--
+ fs/ext4/fsync.c     |   46 +++++++++++++++++-----------------------------
+ fs/ext4/inode.c     |   29 +++++++++++++++++++++++++++++
+ fs/ext4/super.c     |    2 ++
+ fs/jbd2/journal.c   |    1 +
+ 7 files changed, 81 insertions(+), 31 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -703,6 +703,13 @@ struct ext4_inode_info {
+       struct list_head i_aio_dio_complete_list;
+       /* current io_end structure for async DIO write*/
+       ext4_io_end_t *cur_aio_dio;
++
++      /*
++       * Transactions that contain inode's metadata needed to complete
++       * fsync and fdatasync, respectively.
++       */
++      tid_t i_sync_tid;
++      tid_t i_datasync_tid;
+ };
+ /*
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -258,6 +258,19 @@ static inline int ext4_jbd2_file_inode(h
+       return 0;
+ }
++static inline void ext4_update_inode_fsync_trans(handle_t *handle,
++                                               struct inode *inode,
++                                               int datasync)
++{
++      struct ext4_inode_info *ei = EXT4_I(inode);
++
++      if (ext4_handle_valid(handle)) {
++              ei->i_sync_tid = handle->h_transaction->t_tid;
++              if (datasync)
++                      ei->i_datasync_tid = handle->h_transaction->t_tid;
++      }
++}
++
+ /* super.c */
+ int ext4_force_commit(struct super_block *sb);
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -3064,6 +3064,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
+               ret = ext4_convert_unwritten_extents_dio(handle, inode,
+                                                       path);
++              if (ret >= 0)
++                      ext4_update_inode_fsync_trans(handle, inode, 1);
+               goto out2;
+       }
+       /* buffered IO case */
+@@ -3091,6 +3093,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       ret = ext4_ext_convert_to_initialized(handle, inode,
+                                               path, iblock,
+                                               max_blocks);
++      if (ret >= 0)
++              ext4_update_inode_fsync_trans(handle, inode, 1);
+ out:
+       if (ret <= 0) {
+               err = ret;
+@@ -3329,10 +3333,16 @@ int ext4_ext_get_blocks(handle_t *handle
+       allocated = ext4_ext_get_actual_len(&newex);
+       set_buffer_new(bh_result);
+-      /* Cache only when it is _not_ an uninitialized extent */
+-      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
++      /*
++       * Cache the extent and update transaction to commit on fdatasync only
++       * when it is _not_ an uninitialized extent.
++       */
++      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
+               ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+                                               EXT4_EXT_CACHE_EXTENT);
++              ext4_update_inode_fsync_trans(handle, inode, 1);
++      } else
++              ext4_update_inode_fsync_trans(handle, inode, 0);
+ out:
+       if (allocated > max_blocks)
+               allocated = max_blocks;
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -51,25 +51,30 @@
+ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ {
+       struct inode *inode = dentry->d_inode;
++      struct ext4_inode_info *ei = EXT4_I(inode);
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+-      int err, ret = 0;
++      int ret;
++      tid_t commit_tid;
+       J_ASSERT(ext4_journal_current_handle() == NULL);
+       trace_ext4_sync_file(file, dentry, datasync);
++      if (inode->i_sb->s_flags & MS_RDONLY)
++              return 0;
++
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+               return ret;
++
++      if (!journal)
++              return simple_fsync(file, dentry, datasync);
++
+       /*
+-       * data=writeback:
++       * data=writeback,ordered:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+-       *  sync_inode() will sync the metadata
+-       *
+-       * data=ordered:
+-       *  The caller's filemap_fdatawrite() will write the data and
+-       *  sync_inode() will write the inode if it is dirty.  Then the caller's
+-       *  filemap_fdatawait() will wait on the pages.
++       *  Metadata is in the journal, we wait for proper transaction to
++       *  commit here.
+        *
+        * data=journal:
+        *  filemap_fdatawrite won't do anything (the buffers are clean).
+@@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, st
+       if (ext4_should_journal_data(inode))
+               return ext4_force_commit(inode->i_sb);
+-      if (!journal)
+-              ret = sync_mapping_buffers(inode->i_mapping);
+-
+-      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+-              goto out;
+-
+-      /*
+-       * The VFS has written the file data.  If the inode is unaltered
+-       * then we need not start a commit.
+-       */
+-      if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+-              struct writeback_control wbc = {
+-                      .sync_mode = WB_SYNC_ALL,
+-                      .nr_to_write = 0, /* sys_fsync did this */
+-              };
+-              err = sync_inode(inode, &wbc);
+-              if (ret == 0)
+-                      ret = err;
+-      }
+-out:
+-      if (journal && (journal->j_flags & JBD2_BARRIER))
++      commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
++      if (jbd2_log_start_commit(journal, commit_tid))
++              jbd2_log_wait_commit(journal, commit_tid);
++      else if (journal->j_flags & JBD2_BARRIER)
+               blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+       return ret;
+ }
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1025,6 +1025,8 @@ static int ext4_ind_get_blocks(handle_t
+               goto cleanup;
+       set_buffer_new(bh_result);
++
++      ext4_update_inode_fsync_trans(handle, inode, 1);
+ got_it:
+       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       if (count > blocks_to_boundary)
+@@ -4794,6 +4796,7 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+       struct inode *inode;
++      journal_t *journal = EXT4_SB(sb)->s_journal;
+       long ret;
+       int block;
+@@ -4858,6 +4861,31 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_data[block] = raw_inode->i_block[block];
+       INIT_LIST_HEAD(&ei->i_orphan);
++      /*
++       * Set transaction id's of transactions that have to be committed
++       * to finish f[data]sync. We set them to currently running transaction
++       * as we cannot be sure that the inode or some of its metadata isn't
++       * part of the transaction - the inode could have been reclaimed and
++       * now it is reread from disk.
++       */
++      if (journal) {
++              transaction_t *transaction;
++              tid_t tid;
++
++              spin_lock(&journal->j_state_lock);
++              if (journal->j_running_transaction)
++                      transaction = journal->j_running_transaction;
++              else
++                      transaction = journal->j_committing_transaction;
++              if (transaction)
++                      tid = transaction->t_tid;
++              else
++                      tid = journal->j_commit_sequence;
++              spin_unlock(&journal->j_state_lock);
++              ei->i_sync_tid = tid;
++              ei->i_datasync_tid = tid;
++      }
++
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+@@ -5112,6 +5140,7 @@ static int ext4_do_update_inode(handle_t
+               err = rc;
+       ei->i_state &= ~EXT4_STATE_NEW;
++      ext4_update_inode_fsync_trans(handle, inode, 0);
+ out_brelse:
+       brelse(bh);
+       ext4_std_error(inode->i_sb, err);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(st
+       spin_lock_init(&(ei->i_block_reservation_lock));
+       INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
+       ei->cur_aio_dio = NULL;
++      ei->i_sync_tid = 0;
++      ei->i_datasync_tid = 0;
+       return &ei->vfs_inode;
+ }
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
+ EXPORT_SYMBOL(jbd2_journal_ack_err);
+ EXPORT_SYMBOL(jbd2_journal_clear_err);
+ EXPORT_SYMBOL(jbd2_log_wait_commit);
++EXPORT_SYMBOL(jbd2_log_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+ EXPORT_SYMBOL(jbd2_journal_wipe);
diff --git a/review-2.6.32/0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch b/review-2.6.32/0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch
new file mode 100644 (file)
index 0000000..c0468bf
--- /dev/null
@@ -0,0 +1,98 @@
+From 10b526a22877a621d1c94d5efde29e3fea8214c2 Mon Sep 17 00:00:00 2001
+From: Akira Fujita <a-fujita@rs.jp.nec.com>
+Date: Sun, 6 Dec 2009 23:38:31 -0500
+Subject: [PATCH 29/30] ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
+
+(cherry picked from commit 4a58579b9e4e2a35d57e6c9c8483e52f6f1b7fd6)
+
+This patch fixes three problems in the handling of the
+EXT4_IOC_MOVE_EXT ioctl:
+
+1. In current EXT4_IOC_MOVE_EXT, there are read access mode checks for
+original and donor files, but they allow the illegal write access to
+donor file, since donor file is overwritten by original file data.  To
+fix this problem, change access mode checks of original (r->r/w) and
+donor (r->w) files.
+
+2.  Disallow the use of donor files that have a setuid or setgid bits.
+
+3.  Call mnt_want_write() and mnt_drop_write() before and after
+ext4_move_extents() calling to get write access to a mount.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |   30 ++++++++++++++++++------------
+ fs/ext4/move_extent.c |    7 +++++++
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -221,32 +221,38 @@ setversion_out:
+               struct file *donor_filp;
+               int err;
++              if (!(filp->f_mode & FMODE_READ) ||
++                  !(filp->f_mode & FMODE_WRITE))
++                      return -EBADF;
++
+               if (copy_from_user(&me,
+                       (struct move_extent __user *)arg, sizeof(me)))
+                       return -EFAULT;
++              me.moved_len = 0;
+               donor_filp = fget(me.donor_fd);
+               if (!donor_filp)
+                       return -EBADF;
+-              if (!capable(CAP_DAC_OVERRIDE)) {
+-                      if ((current->real_cred->fsuid != inode->i_uid) ||
+-                              !(inode->i_mode & S_IRUSR) ||
+-                              !(donor_filp->f_dentry->d_inode->i_mode &
+-                              S_IRUSR)) {
+-                              fput(donor_filp);
+-                              return -EACCES;
+-                      }
++              if (!(donor_filp->f_mode & FMODE_WRITE)) {
++                      err = -EBADF;
++                      goto mext_out;
+               }
+-              me.moved_len = 0;
++              err = mnt_want_write(filp->f_path.mnt);
++              if (err)
++                      goto mext_out;
++
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+-              fput(donor_filp);
++              mnt_drop_write(filp->f_path.mnt);
++              if (me.moved_len > 0)
++                      file_remove_suid(donor_filp);
+               if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
+-                      return -EFAULT;
+-
++                      err = -EFAULT;
++mext_out:
++              fput(donor_filp);
+               return err;
+       }
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -957,6 +957,13 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
++      if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
++              ext4_debug("ext4 move extent: suid or sgid is set"
++                         " to donor file [ino:orig %lu, donor %lu]\n",
++                         orig_inode->i_ino, donor_inode->i_ino);
++              return -EINVAL;
++      }
++
+       /* Ext4 move extent does not support swapfile */
+       if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+               ext4_debug("ext4 move extent: The argument files should "
diff --git a/review-2.6.32/0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch b/review-2.6.32/0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch
new file mode 100644 (file)
index 0000000..ebc999a
--- /dev/null
@@ -0,0 +1,119 @@
+From 2b26b6f696ad26ae401c395491bd0f7a4611c093 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 9 Dec 2009 21:30:02 -0500
+Subject: [PATCH 30/30] ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
+
+(cherry picked from commit fab3a549e204172236779f502eccb4f9bf0dc87d)
+
+Fix the following potential circular locking dependency between
+mm->mmap_sem and ei->i_data_sem:
+
+    =======================================================
+    [ INFO: possible circular locking dependency detected ]
+    2.6.32-04115-gec044c5 #37
+    -------------------------------------------------------
+    ureadahead/1855 is trying to acquire lock:
+     (&mm->mmap_sem){++++++}, at: [<ffffffff81107224>] might_fault+0x5c/0xac
+
+    but task is already holding lock:
+     (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    which lock already depends on the new lock.
+
+    the existing dependency chain (in reverse order) is:
+
+    -> #1 (&ei->i_data_sem){++++..}:
+           [<ffffffff81099bfa>] __lock_acquire+0xb67/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81516633>] down_read+0x51/0x84
+           [<ffffffff811a2414>] ext4_get_blocks+0x50/0x2a5
+           [<ffffffff811a3453>] ext4_get_block+0xab/0xef
+           [<ffffffff81154f39>] do_mpage_readpage+0x198/0x48d
+           [<ffffffff81155360>] mpage_readpages+0xd0/0x114
+           [<ffffffff811a104b>] ext4_readpages+0x1d/0x1f
+           [<ffffffff810f8644>] __do_page_cache_readahead+0x12f/0x1bc
+           [<ffffffff810f86f2>] ra_submit+0x21/0x25
+           [<ffffffff810f0cfd>] filemap_fault+0x19f/0x32c
+           [<ffffffff81107b97>] __do_fault+0x55/0x3a2
+           [<ffffffff81109db0>] handle_mm_fault+0x327/0x734
+           [<ffffffff8151aaa9>] do_page_fault+0x292/0x2aa
+           [<ffffffff81518205>] page_fault+0x25/0x30
+           [<ffffffff812a34d8>] clear_user+0x38/0x3c
+           [<ffffffff81167e16>] padzero+0x20/0x31
+           [<ffffffff81168b47>] load_elf_binary+0x8bc/0x17ed
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff81166d64>] load_script+0x1b8/0x1cc
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff8113255f>] do_execve+0x1ce/0x2cf
+           [<ffffffff81027494>] sys_execve+0x43/0x5a
+           [<ffffffff8102918a>] stub_execve+0x6a/0xc0
+
+    -> #0 (&mm->mmap_sem){++++++}:
+           [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81107251>] might_fault+0x89/0xac
+           [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+           [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+           [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+           [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+           [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+           [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+           [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+    other info that might help us debug this:
+
+    1 lock held by ureadahead/1855:
+     #0:  (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    stack backtrace:
+    Pid: 1855, comm: ureadahead Not tainted 2.6.32-04115-gec044c5 #37
+    Call Trace:
+     [<ffffffff81098c70>] print_circular_bug+0xa8/0xb7
+     [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+     [<ffffffff8102f229>] ? sched_clock+0x9/0xd
+     [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81107251>] might_fault+0x89/0xac
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81124b44>] ? __kmalloc+0x13b/0x18c
+     [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+     [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+     [<ffffffff811bca0b>] ? ext4_ext_fiemap_cb+0x0/0x157
+     [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+     [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+     [<ffffffff8129f6d0>] ? __up_read+0x8d/0x95
+     [<ffffffff81517fb5>] ? retint_swapgs+0x13/0x1b
+     [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+     [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *in
+       while (block < last && block != EXT_MAX_BLOCK) {
+               num = last - block;
+               /* find extent for this block */
++              down_read(&EXT4_I(inode)->i_data_sem);
+               path = ext4_ext_find_extent(inode, block, path);
++              up_read(&EXT4_I(inode)->i_data_sem);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       path = NULL;
+@@ -3730,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, str
+                * Walk the extent tree gathering extent information.
+                * ext4_ext_fiemap_cb will push extents back to user.
+                */
+-              down_read(&EXT4_I(inode)->i_data_sem);
+               error = ext4_ext_walk_space(inode, start_blk, len_blks,
+                                         ext4_ext_fiemap_cb, fieinfo);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+       }
+       return error;
diff --git a/review-2.6.32/mbox b/review-2.6.32/mbox
new file mode 100644 (file)
index 0000000..27686e4
--- /dev/null
@@ -0,0 +1,3730 @@
+From linux@linux.site Thu Dec 10 21:25:40 2009
+Message-Id: <20091211052540.442199443@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:13 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Sebastian Andrzej Siewior <sebastian@breakpoint.cc>,
+ Oleg Nesterov <oleg@redhat.com>,
+ Roland McGrath <roland@redhat.com>,
+ Kyle McMartin <kyle@mcmartin.ca>,
+ Thomas Gleixner <tglx@linutronix.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [01/34] signal: Fix alternate signal stack check
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=signal-fix-alternate-signal-stack-check.patch
+Content-Length: 2919
+Lines: 83
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+
+commit 2a855dd01bc1539111adb7233f587c5c468732ac upstream.
+
+All architectures in the kernel increment/decrement the stack pointer
+before storing values on the stack.
+
+On architectures which have the stack grow down sas_ss_sp == sp is not
+on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is
+on the alternate signal stack.
+
+On architectures which have the stack grow up sas_ss_sp == sp is on
+the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not
+on the alternate signal stack.
+
+The current implementation fails for architectures which have the
+stack grow down on the corner case where sas_ss_sp == sp.This was
+reported as Debian bug #544905 on AMD64.
+Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c
+
+The test case creates the following stack scenario:
+   0xn0300     stack top
+   0xn0200     alt stack pointer top (when switching to alt stack)
+   0xn01ff     alt stack end
+   0xn0100     alt stack start == stack pointer
+
+If the signal is sent the stack pointer is pointing to the base
+address of the alt stack and the kernel erroneously decides that it
+has already switched to the alternate stack because of the current
+check for "sp - sas_ss_sp < sas_ss_size"
+
+On parisc (stack grows up) the scenario would be:
+   0xn0200     stack pointer
+   0xn01ff     alt stack end
+   0xn0100     alt stack start = alt stack pointer base
+                                 (when switching to alt stack)
+   0xn0000     stack base
+
+This is handled correctly by the current implementation.
+
+[ tglx: Modified for archs which have the stack grow up (parisc) which
+       would fail with the correct implementation for stack grows
+       down. Added a check for sp >= current->sas_ss_sp which is
+       strictly not necessary but makes the code symetric for both
+       variants ]
+
+Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Kyle McMartin <kyle@mcmartin.ca>
+LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/sched.h |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2086,11 +2086,18 @@ static inline int is_si_special(const st
+       return info <= SEND_SIG_FORCED;
+ }
+-/* True if we are on the alternate signal stack.  */
+-
++/*
++ * True if we are on the alternate signal stack.
++ */
+ static inline int on_sig_stack(unsigned long sp)
+ {
+-      return (sp - current->sas_ss_sp < current->sas_ss_size);
++#ifdef CONFIG_STACK_GROWSUP
++      return sp >= current->sas_ss_sp &&
++              sp - current->sas_ss_sp < current->sas_ss_size;
++#else
++      return sp > current->sas_ss_sp &&
++              sp - current->sas_ss_sp <= current->sas_ss_size;
++#endif
+ }
+ static inline int sas_ss_flags(unsigned long sp)
+
+
+From linux@linux.site Thu Dec 10 21:25:41 2009
+Message-Id: <20091211052540.941627509@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:14 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ James Smart <james.smart@emulex.com>,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [02/34] SCSI: scsi_lib_dma: fix bug with dma maps on nested scsi objects
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch
+Content-Length: 5210
+Lines: 149
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: James Bottomley <James.Bottomley@suse.de>
+
+commit d139b9bd0e52dda14fd13412e7096e68b56d0076 upstream.
+
+Some of our virtual SCSI hosts don't have a proper bus parent at the
+top, which can be a problem for doing DMA on them
+
+This patch makes the host device cache a pointer to the physical bus
+device and provides an extra API for setting it (the normal API picks
+it up from the parent).  This patch also modifies the qla2xxx and lpfc
+vport logic to use the new DMA host setting API.
+
+Acked-By: James Smart  <james.smart@emulex.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/hosts.c            |   13 ++++++++++---
+ drivers/scsi/lpfc/lpfc_init.c   |    2 +-
+ drivers/scsi/qla2xxx/qla_attr.c |    3 ++-
+ drivers/scsi/scsi_lib_dma.c     |    4 ++--
+ include/scsi/scsi_host.h        |   16 +++++++++++++++-
+ 5 files changed, 30 insertions(+), 8 deletions(-)
+
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *
+ EXPORT_SYMBOL(scsi_remove_host);
+ /**
+- * scsi_add_host - add a scsi host
++ * scsi_add_host_with_dma - add a scsi host with dma device
+  * @shost:    scsi host pointer to add
+  * @dev:      a struct device of type scsi class
++ * @dma_dev:  dma device for the host
++ *
++ * Note: You rarely need to worry about this unless you're in a
++ * virtualised host environments, so use the simpler scsi_add_host()
++ * function instead.
+  *
+  * Return value: 
+  *    0 on success / != 0 for error
+  **/
+-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
++int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
++                         struct device *dma_dev)
+ {
+       struct scsi_host_template *sht = shost->hostt;
+       int error = -EINVAL;
+@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shos
+       if (!shost->shost_gendev.parent)
+               shost->shost_gendev.parent = dev ? dev : &platform_bus;
++      shost->dma_dev = dma_dev;
+       error = device_add(&shost->shost_gendev);
+       if (error)
+@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shos
+  fail:
+       return error;
+ }
+-EXPORT_SYMBOL(scsi_add_host);
++EXPORT_SYMBOL(scsi_add_host_with_dma);
+ static void scsi_host_dev_release(struct device *dev)
+ {
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba,
+       vport->els_tmofunc.function = lpfc_els_timeout;
+       vport->els_tmofunc.data = (unsigned long)vport;
+-      error = scsi_add_host(shost, dev);
++      error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
+       if (error)
+               goto out_put_shost;
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc
+                       fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+       }
+-      if (scsi_add_host(vha->host, &fc_vport->dev)) {
++      if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
++                                 &ha->pdev->dev)) {
+               DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
+                       vha->host_no, vha->vp_idx));
+               goto vport_create_failed_2;
+--- a/drivers/scsi/scsi_lib_dma.c
++++ b/drivers/scsi/scsi_lib_dma.c
+@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
+       int nseg = 0;
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                                 cmd->sc_data_direction);
+@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
+ void scsi_dma_unmap(struct scsi_cmnd *cmd)
+ {
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                            cmd->sc_data_direction);
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -677,6 +677,12 @@ struct Scsi_Host {
+       void *shost_data;
+       /*
++       * Points to the physical bus device we'd use to do DMA
++       * Needed just in case we have virtual hosts.
++       */
++      struct device *dma_dev;
++
++      /*
+        * We should ensure that this is aligned, both for better performance
+        * and also because some compilers (m68k) don't automatically force
+        * alignment to a long boundary.
+@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_H
+ extern void scsi_flush_work(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
++extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
++                                             struct device *,
++                                             struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+ extern void scsi_rescan_device(struct device *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(
+ extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
++static inline int __must_check scsi_add_host(struct Scsi_Host *host,
++                                           struct device *dev)
++{
++      return scsi_add_host_with_dma(host, dev, dev);
++}
++
+ static inline struct device *scsi_get_device(struct Scsi_Host *shost)
+ {
+         return shost->shost_gendev.parent;
+
+
+From linux@linux.site Thu Dec 10 21:25:42 2009
+Message-Id: <20091211052541.550415868@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:15 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Martin Michlmayr <tbm@cyrius.com>,
+ Boaz Harrosh <bharrosh@panasas.com>,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [03/34] SCSI: osd_protocol.h: Add missing #include
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-osd_protocol.h-add-missing-include.patch
+Content-Length: 708
+Lines: 24
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Martin Michlmayr <tbm@cyrius.com>
+
+commit 0899638688f223fd9e9fee60d662665e11693d12 upstream.
+
+include/scsi/osd_protocol.h uses ALIGN() without an #include
+<linux/kernel.h>, leading to:
+| include/scsi/osd_protocol.h:362: error: implicit declaration of function 'ALIGN'
+
+Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/include/scsi/osd_protocol.h
++++ b/include/scsi/osd_protocol.h
+@@ -17,6 +17,7 @@
+ #define __OSD_PROTOCOL_H__
+ #include <linux/types.h>
++#include <linux/kernel.h>
+ #include <asm/unaligned.h>
+ #include <scsi/scsi.h>
+
+
+From linux@linux.site Thu Dec 10 21:25:42 2009
+Message-Id: <20091211052542.045664905@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:16 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ James Bottomley <James.Bottomley@suse.de>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [04/34] SCSI: megaraid_sas: fix 64 bit sense pointer truncation
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch
+Content-Length: 1456
+Lines: 47
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+From: Yang, Bo <Bo.Yang@lsi.com>
+
+commit 7b2519afa1abd1b9f63aa1e90879307842422dae upstream.
+
+The current sense pointer is cast to a u32 pointer, which can truncate
+on 64 bits.  Fix by using unsigned long instead.
+
+Signed-off-by Bo Yang<bo.yang@lsi.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/megaraid/megaraid_sas.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/megaraid/megaraid_sas.c
++++ b/drivers/scsi/megaraid/megaraid_sas.c
+@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+       int error = 0, i;
+       void *sense = NULL;
+       dma_addr_t sense_handle;
+-      u32 *sense_ptr;
++      unsigned long *sense_ptr;
+       memset(kbuff_arr, 0, sizeof(kbuff_arr));
+@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+               }
+               sense_ptr =
+-                  (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
++              (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
+               *sense_ptr = sense_handle;
+       }
+@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+                * sense_ptr points to the location that has the user
+                * sense buffer address
+                */
+-              sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
+-                                   ioc->sense_off);
++              sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
++                              ioc->sense_off);
+               if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
+                                sense, ioc->sense_len)) {
+
+
+From linux@linux.site Thu Dec 10 21:25:43 2009
+Message-Id: <20091211052542.664737460@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:17 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Curt Wohlgemuth <curtw@google.com>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [05/34] ext4: fix potential buffer head leak when add_dirent_to_buf() returns ENOSPC
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch
+Content-Length: 3833
+Lines: 118
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 2de770a406b06dfc619faabbf5d85c835ed3f2e1)
+
+Previously add_dirent_to_buf() did not free its passed-in buffer head
+in the case of ENOSPC, since in some cases the caller still needed it.
+However, this led to potential buffer head leaks since not all callers
+dealt with this correctly.  Fix this by making simplifying the freeing
+convention; now add_dirent_to_buf() *never* frees the passed-in buffer
+head, and leaves that to the responsibility of its caller.  This makes
+things cleaner and easier to prove that the code is neither leaking
+buffer heads or calling brelse() one time too many.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Curt Wohlgemuth <curtw@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c |   30 ++++++++++++------------------
+ 1 file changed, 12 insertions(+), 18 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1292,9 +1292,6 @@ errout:
+  * add_dirent_to_buf will attempt search the directory block for
+  * space.  It will return -ENOSPC if no space is available, and -EIO
+  * and -EEXIST if directory entry already exists.
+- *
+- * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+- * all other cases bh is released.
+  */
+ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode, struct ext4_dir_entry_2 *de,
+@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *h
+               top = bh->b_data + blocksize - reclen;
+               while ((char *) de <= top) {
+                       if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
+-                                                bh, offset)) {
+-                              brelse(bh);
++                                                bh, offset))
+                               return -EIO;
+-                      }
+-                      if (ext4_match(namelen, name, de)) {
+-                              brelse(bh);
++                      if (ext4_match(namelen, name, de))
+                               return -EEXIST;
+-                      }
+                       nlen = EXT4_DIR_REC_LEN(de->name_len);
+                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+                       if ((de->inode? rlen - nlen: rlen) >= reclen)
+@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_journal_get_write_access(handle, bh);
+       if (err) {
+               ext4_std_error(dir->i_sb, err);
+-              brelse(bh);
+               return err;
+       }
+@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *h
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
+       if (err)
+               ext4_std_error(dir->i_sb, err);
+-      brelse(bh);
+       return 0;
+ }
+@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *ha
+       if (!(de))
+               return retval;
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *hand
+               if(!bh)
+                       return retval;
+               retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-              if (retval != -ENOSPC)
++              if (retval != -ENOSPC) {
++                      brelse(bh);
+                       return retval;
++              }
+               if (blocks == 1 && !dx_fallback &&
+                   EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
+@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *hand
+       de = (struct ext4_dir_entry_2 *) bh->b_data;
+       de->inode = 0;
+       de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      brelse(bh);
++      return retval;
+ }
+ /*
+@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *h
+               goto journal_error;
+       err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-      if (err != -ENOSPC) {
+-              bh = NULL;
++      if (err != -ENOSPC)
+               goto cleanup;
+-      }
+       /* Block full, should compress but for now just split */
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *h
+       if (!de)
+               goto cleanup;
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+-      bh = NULL;
+       goto cleanup;
+ journal_error:
+
+
+From linux@linux.site Thu Dec 10 21:25:43 2009
+Message-Id: <20091211052543.277851362@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:18 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [06/34] ext4: avoid divide by zero when trying to mount a corrupted file system
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch
+Content-Length: 1267
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 503358ae01b70ce6909d19dd01287093f6b6271c)
+
+If s_log_groups_per_flex is greater than 31, then groups_per_flex will
+will overflow and cause a divide by zero error.  This can cause kernel
+BUG if such a file system is mounted.
+
+Thanks to Nageswara R Sastry for analyzing the failure and providing
+an initial patch.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=14287
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1673,14 +1673,14 @@ static int ext4_fill_flex_info(struct su
+       size_t size;
+       int i;
+-      if (!sbi->s_es->s_log_groups_per_flex) {
++      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
++      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
++
++      if (groups_per_flex < 2) {
+               sbi->s_log_groups_per_flex = 0;
+               return 1;
+       }
+-      sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+-      groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+-
+       /* We allocate both existing and potentially added groups */
+       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+                       ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
+
+
+From linux@linux.site Thu Dec 10 21:25:44 2009
+Message-Id: <20091211052543.772152436@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:19 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [07/34] ext4: fix the returned block count if EXT4_IOC_MOVE_EXT fails
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch
+Content-Length: 10970
+Lines: 349
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit f868a48d06f8886cb0367568a12367fa4f21ea0d)
+
+If the EXT4_IOC_MOVE_EXT ioctl fails, the number of blocks that were
+exchanged before the failure should be returned to the userspace
+caller.  Unfortunately, currently if the block size is not the same as
+the page size, the returned block count that is returned is the
+page-aligned block count instead of the actual block count.  This
+commit addresses this bug.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  139 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 73 insertions(+), 66 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -661,6 +661,7 @@ mext_calc_swap_extents(struct ext4_exten
+  * @donor_inode:      donor inode
+  * @from:             block offset of orig_inode
+  * @count:            block count to be replaced
++ * @err:              pointer to save return value
+  *
+  * Replace original inode extents and donor inode extents page by page.
+  * We implement this replacement in the following three steps:
+@@ -671,19 +672,18 @@ mext_calc_swap_extents(struct ext4_exten
+  * 3. Change the block information of donor inode to point at the saved
+  *    original inode blocks in the dummy extents.
+  *
+- * Return 0 on success, or a negative error value on failure.
++ * Return replaced block count.
+  */
+ static int
+ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+                          struct inode *donor_inode, ext4_lblk_t from,
+-                         ext4_lblk_t count)
++                         ext4_lblk_t count, int *err)
+ {
+       struct ext4_ext_path *orig_path = NULL;
+       struct ext4_ext_path *donor_path = NULL;
+       struct ext4_extent *oext, *dext;
+       struct ext4_extent tmp_dext, tmp_oext;
+       ext4_lblk_t orig_off = from, donor_off = from;
+-      int err = 0;
+       int depth;
+       int replaced_count = 0;
+       int dext_alen;
+@@ -691,13 +691,13 @@ mext_replace_branches(handle_t *handle,
+       mext_double_down_write(orig_inode, donor_inode);
+       /* Get the original extent for the block "orig_off" */
+-      err = get_ext_path(orig_inode, orig_off, &orig_path);
+-      if (err)
++      *err = get_ext_path(orig_inode, orig_off, &orig_path);
++      if (*err)
+               goto out;
+       /* Get the donor extent for the head */
+-      err = get_ext_path(donor_inode, donor_off, &donor_path);
+-      if (err)
++      *err = get_ext_path(donor_inode, donor_off, &donor_path);
++      if (*err)
+               goto out;
+       depth = ext_depth(orig_inode);
+       oext = orig_path[depth].p_ext;
+@@ -707,9 +707,9 @@ mext_replace_branches(handle_t *handle,
+       dext = donor_path[depth].p_ext;
+       tmp_dext = *dext;
+-      err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++      *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                     donor_off, count);
+-      if (err)
++      if (*err)
+               goto out;
+       /* Loop for the donor extents */
+@@ -718,7 +718,7 @@ mext_replace_branches(handle_t *handle,
+               if (!dext) {
+                       ext4_error(donor_inode->i_sb, __func__,
+                                  "The extent for donor must be found");
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
+                       ext4_error(donor_inode->i_sb, __func__,
+@@ -726,20 +726,20 @@ mext_replace_branches(handle_t *handle,
+                               "extent(%u) should be equal",
+                               donor_off,
+                               le32_to_cpu(tmp_dext.ee_block));
+-                      err = -EIO;
++                      *err = -EIO;
+                       goto out;
+               }
+               /* Set donor extent to orig extent */
+-              err = mext_leaf_block(handle, orig_inode,
++              *err = mext_leaf_block(handle, orig_inode,
+                                          orig_path, &tmp_dext, &orig_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               /* Set orig extent to donor extent */
+-              err = mext_leaf_block(handle, donor_inode,
++              *err = mext_leaf_block(handle, donor_inode,
+                                          donor_path, &tmp_oext, &donor_off);
+-              if (err < 0)
++              if (*err)
+                       goto out;
+               dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+@@ -753,35 +753,25 @@ mext_replace_branches(handle_t *handle,
+               if (orig_path)
+                       ext4_ext_drop_refs(orig_path);
+-              err = get_ext_path(orig_inode, orig_off, &orig_path);
+-              if (err)
++              *err = get_ext_path(orig_inode, orig_off, &orig_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(orig_inode);
+               oext = orig_path[depth].p_ext;
+-              if (le32_to_cpu(oext->ee_block) +
+-                              ext4_ext_get_actual_len(oext) <= orig_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_oext = *oext;
+               if (donor_path)
+                       ext4_ext_drop_refs(donor_path);
+-              err = get_ext_path(donor_inode, donor_off, &donor_path);
+-              if (err)
++              *err = get_ext_path(donor_inode, donor_off, &donor_path);
++              if (*err)
+                       goto out;
+               depth = ext_depth(donor_inode);
+               dext = donor_path[depth].p_ext;
+-              if (le32_to_cpu(dext->ee_block) +
+-                              ext4_ext_get_actual_len(dext) <= donor_off) {
+-                      err = 0;
+-                      goto out;
+-              }
+               tmp_dext = *dext;
+-              err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++              *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                          donor_off, count - replaced_count);
+-              if (err)
++              if (*err)
+                       goto out;
+       }
+@@ -796,7 +786,7 @@ out:
+       }
+       mext_double_up_write(orig_inode, donor_inode);
+-      return err;
++      return replaced_count;
+ }
+ /**
+@@ -808,16 +798,17 @@ out:
+  * @data_offset_in_page:      block index where data swapping starts
+  * @block_len_in_page:                the number of blocks to be swapped
+  * @uninit:                   orig extent is uninitialized or not
++ * @err:                      pointer to save return value
+  *
+  * Save the data in original inode blocks and replace original inode extents
+  * with donor inode extents by calling mext_replace_branches().
+- * Finally, write out the saved data in new original inode blocks. Return 0
+- * on success, or a negative error value on failure.
++ * Finally, write out the saved data in new original inode blocks. Return
++ * replaced block count.
+  */
+ static int
+ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                 pgoff_t orig_page_offset, int data_offset_in_page,
+-                int block_len_in_page, int uninit)
++                int block_len_in_page, int uninit, int *err)
+ {
+       struct inode *orig_inode = o_filp->f_dentry->d_inode;
+       struct address_space *mapping = orig_inode->i_mapping;
+@@ -829,9 +820,11 @@ move_extent_per_page(struct file *o_filp
+       long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+       unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+       unsigned int w_flags = 0;
+-      unsigned int tmp_data_len, data_len;
++      unsigned int tmp_data_size, data_size, replaced_size;
+       void *fsdata;
+-      int ret, i, jblocks;
++      int i, jblocks;
++      int err2 = 0;
++      int replaced_count = 0;
+       int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+       /*
+@@ -841,8 +834,8 @@ move_extent_per_page(struct file *o_filp
+       jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+       handle = ext4_journal_start(orig_inode, jblocks);
+       if (IS_ERR(handle)) {
+-              ret = PTR_ERR(handle);
+-              return ret;
++              *err = PTR_ERR(handle);
++              return 0;
+       }
+       if (segment_eq(get_fs(), KERNEL_DS))
+@@ -858,9 +851,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              ret = mext_replace_branches(handle, orig_inode,
+-                                               donor_inode, orig_blk_offset,
+-                                               block_len_in_page);
++              replaced_count = mext_replace_branches(handle, orig_inode,
++                                              donor_inode, orig_blk_offset,
++                                              block_len_in_page, err);
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+@@ -870,27 +863,28 @@ move_extent_per_page(struct file *o_filp
+       offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+-      /* Calculate data_len */
++      /* Calculate data_size */
+       if ((orig_blk_offset + block_len_in_page - 1) ==
+           ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+               /* Replace the last block */
+-              tmp_data_len = orig_inode->i_size & (blocksize - 1);
++              tmp_data_size = orig_inode->i_size & (blocksize - 1);
+               /*
+-               * If data_len equal zero, it shows data_len is multiples of
++               * If data_size equal zero, it shows data_size is multiples of
+                * blocksize. So we set appropriate value.
+                */
+-              if (tmp_data_len == 0)
+-                      tmp_data_len = blocksize;
++              if (tmp_data_size == 0)
++                      tmp_data_size = blocksize;
+-              data_len = tmp_data_len +
++              data_size = tmp_data_size +
+                       ((block_len_in_page - 1) << orig_inode->i_blkbits);
+-      } else {
+-              data_len = block_len_in_page << orig_inode->i_blkbits;
+-      }
++      } else
++              data_size = block_len_in_page << orig_inode->i_blkbits;
++
++      replaced_size = data_size;
+-      ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
++      *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
+                                &page, &fsdata);
+-      if (unlikely(ret < 0))
++      if (unlikely(*err < 0))
+               goto out;
+       if (!PageUptodate(page)) {
+@@ -911,10 +905,17 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      ret = mext_replace_branches(handle, orig_inode, donor_inode,
+-                                       orig_blk_offset, block_len_in_page);
+-      if (ret < 0)
+-              goto out;
++      replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
++                                      orig_blk_offset, block_len_in_page,
++                                      &err2);
++      if (err2) {
++              if (replaced_count) {
++                      block_len_in_page = replaced_count;
++                      replaced_size =
++                              block_len_in_page << orig_inode->i_blkbits;
++              } else
++                      goto out;
++      }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+@@ -928,16 +929,16 @@ move_extent_per_page(struct file *o_filp
+               bh = bh->b_this_page;
+       for (i = 0; i < block_len_in_page; i++) {
+-              ret = ext4_get_block(orig_inode,
++              *err = ext4_get_block(orig_inode,
+                               (sector_t)(orig_blk_offset + i), bh, 0);
+-              if (ret < 0)
++              if (*err < 0)
+                       goto out;
+               if (bh->b_this_page != NULL)
+                       bh = bh->b_this_page;
+       }
+-      ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
++      *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
+                              page, fsdata);
+       page = NULL;
+@@ -951,7 +952,10 @@ out:
+ out2:
+       ext4_journal_stop(handle);
+-      return ret < 0 ? ret : 0;
++      if (err2)
++              *err = err2;
++
++      return replaced_count;
+ }
+ /**
+@@ -1367,15 +1371,17 @@ ext4_move_extents(struct file *o_filp, s
+               while (orig_page_offset <= seq_end_page) {
+                       /* Swap original branches with new branches */
+-                      ret1 = move_extent_per_page(o_filp, donor_inode,
++                      block_len_in_page = move_extent_per_page(
++                                              o_filp, donor_inode,
+                                               orig_page_offset,
+                                               data_offset_in_page,
+-                                              block_len_in_page, uninit);
+-                      if (ret1 < 0)
+-                              goto out;
+-                      orig_page_offset++;
++                                              block_len_in_page, uninit,
++                                              &ret1);
++
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
++                      if (ret1 < 0)
++                              goto out;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+@@ -1385,6 +1391,7 @@ ext4_move_extents(struct file *o_filp, s
+                               goto out;
+                       }
++                      orig_page_offset++;
+                       data_offset_in_page = 0;
+                       rest_blocks -= block_len_in_page;
+                       if (rest_blocks > blocks_per_page)
+
+
+From linux@linux.site Thu Dec 10 21:25:44 2009
+Message-Id: <20091211052544.287395070@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:20 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [08/34] ext4: fix lock order problem in ext4_move_extents()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch
+Content-Length: 10372
+Lines: 310
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit fc04cb49a898c372a22b21fffc47f299d8710801)
+
+ext4_move_extents() checks the logical block contiguousness
+of original file with ext4_find_extent() and mext_next_extent().
+Therefore the extent which ext4_ext_path structure indicates
+must not be changed between above functions.
+
+But in current implementation, there is no i_data_sem protection
+between ext4_ext_find_extent() and mext_next_extent().  So the extent
+which ext4_ext_path structure indicates may be overwritten by
+delalloc.  As a result, ext4_move_extents() will exchange wrong blocks
+between original and donor files.  I change the place where
+acquire/release i_data_sem to solve this problem.
+
+Moreover, I changed move_extent_per_page() to start transaction first,
+and then acquire i_data_sem.  Without this change, there is a
+possibility of the deadlock between mmap() and ext4_move_extents():
+
+* NOTE: "A", "B" and "C" mean different processes
+
+A-1: ext4_ext_move_extents() acquires i_data_sem of two inodes.
+
+B:   do_page_fault() starts the transaction (T),
+     and then tries to acquire i_data_sem.
+     But process "A" is already holding it, so it is kept waiting.
+
+C:   While "A" and "B" running, kjournald2 tries to commit transaction (T)
+     but it is under updating, so kjournald2 waits for it.
+
+A-2: Call ext4_journal_start with holding i_data_sem,
+     but transaction (T) is locked.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |  117 ++++++++++++++++++++++----------------------------
+ 1 file changed, 53 insertions(+), 64 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -77,12 +77,14 @@ static int
+ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+                     struct ext4_extent **extent)
+ {
++      struct ext4_extent_header *eh;
+       int ppos, leaf_ppos = path->p_depth;
+       ppos = leaf_ppos;
+       if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+               /* leaf block */
+               *extent = ++path[ppos].p_ext;
++              path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+               return 0;
+       }
+@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, st
+                                       ext_block_hdr(path[cur_ppos+1].p_bh);
+                       }
++                      path[leaf_ppos].p_ext = *extent = NULL;
++
++                      eh = path[leaf_ppos].p_hdr;
++                      if (le16_to_cpu(eh->eh_entries) == 0)
++                              /* empty leaf is found */
++                              return -ENODATA;
++
+                       /* leaf block */
+                       path[leaf_ppos].p_ext = *extent =
+                               EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
++                      path[leaf_ppos].p_block =
++                                      ext_pblock(path[leaf_ppos].p_ext);
+                       return 0;
+               }
+       }
+@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inod
+ }
+ /**
+- * mext_double_down_read - Acquire two inodes' read semaphore
+- *
+- * @orig_inode:               original inode structure
+- * @donor_inode:      donor inode structure
+- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+- */
+-static void
+-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      struct inode *first = orig_inode, *second = donor_inode;
+-
+-      /*
+-       * Use the inode number to provide the stable locking order instead
+-       * of its address, because the C language doesn't guarantee you can
+-       * compare pointers that don't come from the same array.
+-       */
+-      if (donor_inode->i_ino < orig_inode->i_ino) {
+-              first = donor_inode;
+-              second = orig_inode;
+-      }
+-
+-      down_read(&EXT4_I(first)->i_data_sem);
+-      down_read(&EXT4_I(second)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_down_write - Acquire two inodes' write semaphore
++ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure
+  * @donor_inode:      donor inode structure
+- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
++ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
++ * i_ino order.
+  */
+ static void
+-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
++double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       struct inode *first = orig_inode, *second = donor_inode;
+@@ -207,28 +193,14 @@ mext_double_down_write(struct inode *ori
+ }
+ /**
+- * mext_double_up_read - Release two inodes' read semaphore
++ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:               original inode structure to be released its lock first
+  * @donor_inode:      donor inode structure to be released its lock second
+- * Release read semaphore of two inodes (orig and donor).
++ * Release write lock of i_data_sem of two inodes (orig and donor).
+  */
+ static void
+-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-      up_read(&EXT4_I(orig_inode)->i_data_sem);
+-      up_read(&EXT4_I(donor_inode)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_up_write - Release two inodes' write semaphore
+- *
+- * @orig_inode:               original inode structure to be released its lock first
+- * @donor_inode:      donor inode structure to be released its lock second
+- * Release write semaphore of two inodes (orig and donor).
+- */
+-static void
+-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
++double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+       up_write(&EXT4_I(orig_inode)->i_data_sem);
+       up_write(&EXT4_I(donor_inode)->i_data_sem);
+@@ -688,8 +660,6 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
+-      mext_double_down_write(orig_inode, donor_inode);
+-
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -785,7 +755,6 @@ out:
+               kfree(donor_path);
+       }
+-      mext_double_up_write(orig_inode, donor_inode);
+       return replaced_count;
+ }
+@@ -851,6 +820,11 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
++              /*
++               * Protect extent trees against block allocations
++               * via delalloc
++               */
++              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+@@ -858,6 +832,7 @@ move_extent_per_page(struct file *o_filp
+               /* Clear the inode cache not to refer to the old data */
+               ext4_ext_invalidate_cache(orig_inode);
+               ext4_ext_invalidate_cache(donor_inode);
++              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -905,6 +880,8 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -913,14 +890,18 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else
++              } else {
++                      double_up_write_data_sem(orig_inode, donor_inode);
+                       goto out;
++              }
+       }
+       /* Clear the inode cache not to refer to the old data */
+       ext4_ext_invalidate_cache(orig_inode);
+       ext4_ext_invalidate_cache(donor_inode);
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+@@ -1236,16 +1217,16 @@ ext4_move_extents(struct file *o_filp, s
+               return -EINVAL;
+       }
+-      /* protect orig and donor against a truncate */
++      /* Protect orig and donor inodes against a truncate */
+       ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+       if (ret1 < 0)
+               return ret1;
+-      mext_double_down_read(orig_inode, donor_inode);
++      /* Protect extent tree against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+                                       donor_start, &len, *moved_len);
+-      mext_double_up_read(orig_inode, donor_inode);
+       if (ret1)
+               goto out;
+@@ -1308,6 +1289,10 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
++      /* Discard preallocations of two inodes */
++      ext4_discard_preallocations(orig_inode);
++      ext4_discard_preallocations(donor_inode);
++
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1359,14 +1344,14 @@ ext4_move_extents(struct file *o_filp, s
+               seq_start = le32_to_cpu(ext_cur->ee_block);
+               rest_blocks = seq_blocks;
+-              /* Discard preallocations of two inodes */
+-              down_write(&EXT4_I(orig_inode)->i_data_sem);
+-              ext4_discard_preallocations(orig_inode);
+-              up_write(&EXT4_I(orig_inode)->i_data_sem);
+-
+-              down_write(&EXT4_I(donor_inode)->i_data_sem);
+-              ext4_discard_preallocations(donor_inode);
+-              up_write(&EXT4_I(donor_inode)->i_data_sem);
++              /*
++               * Up semaphore to avoid following problems:
++               * a. transaction deadlock among ext4_journal_start,
++               *    ->write_begin via pagefault, and jbd2_journal_commit
++               * b. racing with ->readpage, ->write_begin, and ext4_get_block
++               *    in move_extent_per_page
++               */
++              double_up_write_data_sem(orig_inode, donor_inode);
+               while (orig_page_offset <= seq_end_page) {
+@@ -1381,14 +1366,14 @@ ext4_move_extents(struct file *o_filp, s
+                       /* Count how many blocks we have exchanged */
+                       *moved_len += block_len_in_page;
+                       if (ret1 < 0)
+-                              goto out;
++                              break;
+                       if (*moved_len > len) {
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+                                       "sum of replaced: %llu requested: %llu",
+                                       *moved_len, len);
+                               ret1 = -EIO;
+-                              goto out;
++                              break;
+                       }
+                       orig_page_offset++;
+@@ -1400,6 +1385,10 @@ ext4_move_extents(struct file *o_filp, s
+                               block_len_in_page = rest_blocks;
+               }
++              double_down_write_data_sem(orig_inode, donor_inode);
++              if (ret1 < 0)
++                      break;
++
+               /* Decrease buffer counter */
+               if (holecheck_path)
+                       ext4_ext_drop_refs(holecheck_path);
+@@ -1429,7 +1418,7 @@ out:
+               ext4_ext_drop_refs(holecheck_path);
+               kfree(holecheck_path);
+       }
+-
++      double_up_write_data_sem(orig_inode, donor_inode);
+       ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
+       if (ret1)
+
+
+From linux@linux.site Thu Dec 10 21:25:45 2009
+Message-Id: <20091211052544.890897126@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:21 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [09/34] ext4: fix possible recursive locking warning in EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch
+Content-Length: 1075
+Lines: 32
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 49bd22bc4d603a2a4fc2a6a60e156cbea52eb494)
+
+If CONFIG_PROVE_LOCKING is enabled, the double_down_write_data_sem()
+will trigger a false-positive warning of a recursive lock.  Since we
+take i_data_sem for the two inodes ordered by their inode numbers,
+this isn't a problem.  Use of down_write_nested() will notify the lock
+dependency checker machinery that there is no problem here.
+
+This problem was reported by Brian Rogers:
+
+       http://marc.info/?l=linux-ext4&m=125115356928011&w=1
+
+Reported-by: Brian Rogers <brian@xyzw.org>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -189,7 +189,7 @@ double_down_write_data_sem(struct inode
+       }
+       down_write(&EXT4_I(first)->i_data_sem);
+-      down_write(&EXT4_I(second)->i_data_sem);
++      down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:45 2009
+Message-Id: <20091211052545.443549269@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:22 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [10/34] ext4: plug a buffer_head leak in an error path of ext4_iget()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch
+Content-Length: 2427
+Lines: 82
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 567f3e9a70d71e5c9be03701b8578be77857293b)
+
+One of the invalid error paths in ext4_iget() forgot to brelse() the
+inode buffer head.  Fix it by adding a brelse() in the common error
+return path, which also simplifies function.
+
+Thanks to Andi Kleen <ak@linux.intel.com> reporting the problem.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4781,7 +4781,6 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+-      struct buffer_head *bh;
+       struct inode *inode;
+       long ret;
+       int block;
+@@ -4793,11 +4792,11 @@ struct inode *ext4_iget(struct super_blo
+               return inode;
+       ei = EXT4_I(inode);
++      iloc.bh = 0;
+       ret = __ext4_get_inode_loc(inode, &iloc, 0);
+       if (ret < 0)
+               goto bad_inode;
+-      bh = iloc.bh;
+       raw_inode = ext4_raw_inode(&iloc);
+       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+@@ -4820,7 +4819,6 @@ struct inode *ext4_iget(struct super_blo
+               if (inode->i_mode == 0 ||
+                   !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+                       /* this inode is deleted */
+-                      brelse(bh);
+                       ret = -ESTALE;
+                       goto bad_inode;
+               }
+@@ -4852,7 +4850,6 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+-                      brelse(bh);
+                       ret = -EIO;
+                       goto bad_inode;
+               }
+@@ -4905,10 +4902,8 @@ struct inode *ext4_iget(struct super_blo
+               /* Validate block references which are part of inode */
+               ret = ext4_check_inode_blockref(inode);
+       }
+-      if (ret) {
+-              brelse(bh);
++      if (ret)
+               goto bad_inode;
+-      }
+       if (S_ISREG(inode->i_mode)) {
+               inode->i_op = &ext4_file_inode_operations;
+@@ -4936,7 +4931,6 @@ struct inode *ext4_iget(struct super_blo
+                       init_special_inode(inode, inode->i_mode,
+                          new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+       } else {
+-              brelse(bh);
+               ret = -EIO;
+               ext4_error(inode->i_sb, __func__,
+                          "bogus i_mode (%o) for inode=%lu",
+@@ -4949,6 +4943,7 @@ struct inode *ext4_iget(struct super_blo
+       return inode;
+ bad_inode:
++      brelse(iloc.bh);
+       iget_failed(inode);
+       return ERR_PTR(ret);
+ }
+
+
+From linux@linux.site Thu Dec 10 21:25:46 2009
+Message-Id: <20091211052545.995802406@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:23 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [11/34] ext4: make sure directory and symlink blocks are revoked
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch
+Content-Length: 2052
+Lines: 58
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 50689696867d95b38d9c7be640a311494a04fb86)
+
+When an inode gets unlinked, the functions ext4_clear_blocks() and
+ext4_remove_blocks() call ext4_forget() for all the buffer heads
+corresponding to the deleted inode's data blocks.  If the inode is a
+directory or a symlink, the is_metadata parameter must be non-zero so
+ext4_forget() will revoke them via jbd2_journal_revoke().  Otherwise,
+if these blocks are reused for a data file, and the system crashes
+before a journal checkpoint, the journal replay could end up
+corrupting these data blocks.
+
+Thanks to Curt Wohlgemuth for pointing out potential problems in this
+area.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    2 +-
+ fs/ext4/inode.c   |    6 ++++--
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2074,7 +2074,7 @@ static int ext4_remove_blocks(handle_t *
+               ext_debug("free last %u blocks starting %llu\n", num, start);
+               for (i = 0; i < num; i++) {
+                       bh = sb_find_get_block(inode->i_sb, start + i);
+-                      ext4_forget(handle, 0, inode, bh, start + i);
++                      ext4_forget(handle, metadata, inode, bh, start + i);
+               }
+               ext4_free_blocks(handle, inode, start, num, metadata);
+       } else if (from == le32_to_cpu(ex->ee_block)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4120,6 +4120,8 @@ static void ext4_clear_blocks(handle_t *
+                             __le32 *last)
+ {
+       __le32 *p;
++      int     is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
++
+       if (try_to_extend_transaction(handle, inode)) {
+               if (bh) {
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+@@ -4150,11 +4152,11 @@ static void ext4_clear_blocks(handle_t *
+                       *p = 0;
+                       tbh = sb_find_get_block(inode->i_sb, nr);
+-                      ext4_forget(handle, 0, inode, tbh, nr);
++                      ext4_forget(handle, is_metadata, inode, tbh, nr);
+               }
+       }
+-      ext4_free_blocks(handle, inode, block_to_free, count, 0);
++      ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:47 2009
+Message-Id: <20091211052546.544464652@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:24 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Julia Lawall <julia@diku.dk>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [12/34] ext4: fix i_flags access in ext4_da_writepages_trans_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch
+Content-Length: 846
+Lines: 25
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 30c6e07a92ea4cb87160d32ffa9bce172576ae4c)
+
+We need to be testing the i_flags field in the ext4 specific portion
+of the inode, instead of the (confusingly aliased) i_flags field in
+the generic struct inode.
+
+Signed-off-by: Julia Lawall <julia@diku.dk>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2788,7 +2788,7 @@ static int ext4_da_writepages_trans_bloc
+        * number of contiguous block. So we will limit
+        * number of contiguous block to a sane value
+        */
+-      if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
++      if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+           (max_blocks > EXT4_MAX_TRANS_DATA))
+               max_blocks = EXT4_MAX_TRANS_DATA;
+
+
+From linux@linux.site Thu Dec 10 21:25:47 2009
+Message-Id: <20091211052547.065677730@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:25 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [13/34] ext4: journal all modifications in ext4_xattr_set_handle
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch
+Content-Length: 1254
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 86ebfd08a1930ccedb8eac0aeb1ed4b8b6a41dbc)
+
+ext4_xattr_set_handle() was zeroing out an inode outside
+of journaling constraints; this is one of the accesses that
+was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Reviewed-by: Andreas Dilger <adilger@sun.com>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/xattr.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle,
+       if (error)
+               goto cleanup;
++      error = ext4_journal_get_write_access(handle, is.iloc.bh);
++      if (error)
++              goto cleanup;
++
+       if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+               struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+               memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle,
+               if (flags & XATTR_CREATE)
+                       goto cleanup;
+       }
+-      error = ext4_journal_get_write_access(handle, is.iloc.bh);
+-      if (error)
+-              goto cleanup;
+       if (!value) {
+               if (!is.s.not_found)
+                       error = ext4_xattr_ibody_set(handle, inode, &i, &is);
+
+
+From linux@linux.site Thu Dec 10 21:25:48 2009
+Message-Id: <20091211052547.644399594@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:26 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [14/34] ext4: dont update the superblock in ext4_statfs()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch
+Content-Length: 1341
+Lines: 31
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 3f8fb9490efbd300887470a2a880a64e04dcc3f5)
+
+commit a71ce8c6c9bf269b192f352ea555217815cf027e updated ext4_statfs()
+to update the on-disk superblock counters, but modified this buffer
+directly without any journaling of the change.  This is one of the
+accesses that was causing the crc errors in journal replay as seen in
+kernel.org bugzilla #14354.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3668,13 +3668,11 @@ static int ext4_statfs(struct dentry *de
+       buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+       buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+                      percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+-      ext4_free_blocks_count_set(es, buf->f_bfree);
+       buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+       if (buf->f_bfree < ext4_r_blocks_count(es))
+               buf->f_bavail = 0;
+       buf->f_files = le32_to_cpu(es->s_inodes_count);
+       buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
+-      es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
+       buf->f_namelen = EXT4_NAME_LEN;
+       fsid = le64_to_cpup((void *)es->s_uuid) ^
+              le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+
+
+From linux@linux.site Thu Dec 10 21:25:48 2009
+Message-Id: <20091211052548.201782286@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:27 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [15/34] ext4: fix uninit block bitmap initialization when s_meta_first_bg is non-zero
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch
+Content-Length: 875
+Lines: 29
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 8dadb198cb70ef811916668fe67eeec82e8858dd)
+
+The number of old-style block group descriptor blocks is
+s_meta_first_bg when the meta_bg feature flag is set.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/balloc.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_met
+ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+-      return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
++      if (!ext4_bg_has_super(sb, group))
++              return 0;
++
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
++              return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
++      else
++              return EXT4_SB(sb)->s_gdb_count;
+ }
+ /**
+
+
+From linux@linux.site Thu Dec 10 21:25:49 2009
+Message-Id: <20091211052548.726431621@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:28 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [16/34] ext4: fix block validity checks so they work correctly with meta_bg
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch
+Content-Length: 1411
+Lines: 39
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 1032988c71f3f85483b2b4319684d1205a704c02)
+
+The block validity checks used by ext4_data_block_valid() wasn't
+correctly written to check file systems with the meta_bg feature.  Fix
+this.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/block_validity.c |    2 +-
+ fs/ext4/inode.c          |    5 +----
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_
+               if (ext4_bg_has_super(sb, i) &&
+                   ((i < 5) || ((i % flex_size) == 0)))
+                       add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+-                                      sbi->s_gdb_count + 1);
++                                      ext4_bg_num_gdb(sb, i) + 1);
+               gdp = ext4_get_group_desc(sb, i, NULL);
+               ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+               if (ret)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4883,10 +4883,7 @@ struct inode *ext4_iget(struct super_blo
+       ret = 0;
+       if (ei->i_file_acl &&
+-          ((ei->i_file_acl <
+-            (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+-             EXT4_SB(sb)->s_gdb_count)) ||
+-           (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++          !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+               ext4_error(sb, __func__,
+                          "bad extended attribute block %llu in inode #%lu",
+                          ei->i_file_acl, inode->i_ino);
+
+
+From linux@linux.site Thu Dec 10 21:25:49 2009
+Message-Id: <20091211052549.341684525@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:29 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Jan Kara <jack@suse.cz>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [17/34] ext4: avoid issuing unnecessary barriers
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0013-ext4-avoid-issuing-unnecessary-barriers.patch
+Content-Length: 1115
+Lines: 37
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 6b17d902fdd241adfa4ce780df20547b28bf5801)
+
+We don't to issue an I/O barrier on an error or if we force commit
+because we are doing data journaling.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/fsync.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -60,7 +60,7 @@ int ext4_sync_file(struct file *file, st
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+-              goto out;
++              return ret;
+       /*
+        * data=writeback:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+@@ -79,10 +79,8 @@ int ext4_sync_file(struct file *file, st
+        *  (they were dirtied by commit).  But that's OK - the blocks are
+        *  safe in-journal, which is all fsync() needs to ensure.
+        */
+-      if (ext4_should_journal_data(inode)) {
+-              ret = ext4_force_commit(inode->i_sb);
+-              goto out;
+-      }
++      if (ext4_should_journal_data(inode))
++              return ext4_force_commit(inode->i_sb);
+       if (!journal)
+               ret = sync_mapping_buffers(inode->i_mapping);
+
+
+From linux@linux.site Thu Dec 10 21:25:50 2009
+Message-Id: <20091211052549.883933582@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:30 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [18/34] ext4: fix error handling in ext4_ind_get_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch
+Content-Length: 733
+Lines: 25
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 2bba702d4f88d7b010ec37e2527b552588404ae7)
+
+When an error happened in ext4_splice_branch we failed to notice that
+in ext4_ind_get_blocks and mapped the buffer anyway. Fix the problem
+by checking for error properly.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1021,7 +1021,7 @@ static int ext4_ind_get_blocks(handle_t
+       if (!err)
+               err = ext4_splice_branch(handle, inode, iblock,
+                                        partial, indirect_blks, count);
+-      else
++      if (err)
+               goto cleanup;
+       set_buffer_new(bh_result);
+
+
+From linux@linux.site Thu Dec 10 21:25:50 2009
+Message-Id: <20091211052550.441771142@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:31 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [19/34] ext4: make trim/discard optional (and off by default)
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0015-ext4-make-trim-discard-optional-and-off-by-default.patch
+Content-Length: 4275
+Lines: 124
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 5328e635315734d42080de9a5a1ee87bf4cae0a4)
+
+It is anticipated that when sb_issue_discard starts doing
+real work on trim-capable devices, we may see issues.  Make
+this mount-time optional, and default it to off until we know
+that things are working out OK.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    6 ++++++
+ fs/ext4/ext4.h                     |    1 +
+ fs/ext4/mballoc.c                  |   21 +++++++++++++--------
+ fs/ext4/super.c                    |   14 +++++++++++++-
+ 4 files changed, 33 insertions(+), 9 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -353,6 +353,12 @@ noauto_da_alloc           replacing existing file
+                       system crashes before the delayed allocation
+                       blocks are forced to disk.
++discard               Controls whether ext4 should issue discard/TRIM
++nodiscard(*)          commands to the underlying block device when
++                      blocks are freed.  This is useful for SSD devices
++                      and sparse/thinly-provisioned LUNs, but it is off
++                      by default until sufficient testing has been done.
++
+ Data Mode
+ =========
+ There are 3 different data modes:
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -750,6 +750,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_DELALLOC           0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT     0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY     0x20000000 /* Block validity checking */
++#define EXT4_MOUNT_DISCARD            0x40000000 /* Issue DISCARD requests */
+ #define clear_opt(o, opt)             o &= ~EXT4_MOUNT_##opt
+ #define set_opt(o, opt)                       o |= EXT4_MOUNT_##opt
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(jou
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+       struct ext4_free_data *entry;
+-      ext4_fsblk_t discard_block;
+       struct list_head *l, *ltmp;
+       list_for_each_safe(l, ltmp, &txn->t_private_list) {
+@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(jou
+                       page_cache_release(e4b.bd_bitmap_page);
+               }
+               ext4_unlock_group(sb, entry->group);
+-              discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+-                      + entry->start_blk
+-                      + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+-              trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+-                                        entry->count);
+-              sb_issue_discard(sb, discard_block, entry->count);
+-
++              if (test_opt(sb, DISCARD)) {
++                      ext4_fsblk_t discard_block;
++                      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++
++                      discard_block = (ext4_fsblk_t)entry->group *
++                                              EXT4_BLOCKS_PER_GROUP(sb)
++                                      + entry->start_blk
++                                      + le32_to_cpu(es->s_first_data_block);
++                      trace_ext4_discard_blocks(sb,
++                                      (unsigned long long)discard_block,
++                                      entry->count);
++                      sb_issue_discard(sb, discard_block, entry->count);
++              }
+               kmem_cache_free(ext4_free_ext_cachep, entry);
+               ext4_mb_release_desc(&e4b);
+       }
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -899,6 +899,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, NO_AUTO_DA_ALLOC))
+               seq_puts(seq, ",noauto_da_alloc");
++      if (test_opt(sb, DISCARD))
++              seq_puts(seq, ",discard");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1079,7 +1082,8 @@ enum {
+       Opt_usrquota, Opt_grpquota, Opt_i_version,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_block_validity, Opt_noblock_validity,
+-      Opt_inode_readahead_blks, Opt_journal_ioprio
++      Opt_inode_readahead_blks, Opt_journal_ioprio,
++      Opt_discard, Opt_nodiscard,
+ };
+ static const match_table_t tokens = {
+@@ -1144,6 +1148,8 @@ static const match_table_t tokens = {
+       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
++      {Opt_discard, "discard"},
++      {Opt_nodiscard, "nodiscard"},
+       {Opt_err, NULL},
+ };
+@@ -1565,6 +1571,12 @@ set_qf_format:
+                       else
+                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+                       break;
++              case Opt_discard:
++                      set_opt(sbi->s_mount_opt, DISCARD);
++                      break;
++              case Opt_nodiscard:
++                      clear_opt(sbi->s_mount_opt, DISCARD);
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+
+
+From linux@linux.site Thu Dec 10 21:25:51 2009
+Message-Id: <20091211052551.004437667@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:32 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Eric Sandeen <sandeen@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [20/34] ext4: make "norecovery" an alias for "noload"
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0016-ext4-make-norecovery-an-alias-for-noload.patch
+Content-Length: 1856
+Lines: 53
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit e3bb52ae2bb9573e84c17b8e3560378d13a5c798)
+
+Users on the linux-ext4 list recently complained about differences
+across filesystems w.r.t. how to mount without a journal replay.
+
+In the discussion it was noted that xfs's "norecovery" option is
+perhaps more descriptively accurate than "noload," so let's make
+that an alias for ext4.
+
+Also show this status in /proc/mounts
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    4 ++--
+ fs/ext4/super.c                    |    4 ++++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -153,8 +153,8 @@ journal_dev=devnum When the external jou
+                       identified through its new major/minor numbers encoded
+                       in devnum.
+-noload                        Don't load the journal on mounting.  Note that
+-                      if the filesystem was not unmounted cleanly,
++norecovery            Don't load the journal on mounting.  Note that
++noload                        if the filesystem was not unmounted cleanly,
+                       skipping the journal replay will lead to the
+                       filesystem containing inconsistencies that can
+                       lead to any number of problems.
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -902,6 +902,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, DISCARD))
+               seq_puts(seq, ",discard");
++      if (test_opt(sb, NOLOAD))
++              seq_puts(seq, ",norecovery");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+@@ -1108,6 +1111,7 @@ static const match_table_t tokens = {
+       {Opt_acl, "acl"},
+       {Opt_noacl, "noacl"},
+       {Opt_noload, "noload"},
++      {Opt_noload, "norecovery"},
+       {Opt_nobh, "nobh"},
+       {Opt_bh, "bh"},
+       {Opt_commit, "commit=%u"},
+
+
+From linux@linux.site Thu Dec 10 21:25:52 2009
+Message-Id: <20091211052551.564396025@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:33 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [21/34] ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch
+Content-Length: 2565
+Lines: 75
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 94d7c16cbbbd0e03841fcf272bcaf0620ad39618)
+
+At the beginning of ext4_move_extent(), we call
+ext4_discard_preallocations() to discard inode PAs of orig and donor
+inodes.  But in the following case, blocks can be double freed, so
+move ext4_discard_preallocations() to the end of ext4_move_extents().
+
+1. Discard inode PAs of orig and donor inodes with
+   ext4_discard_preallocations() in ext4_move_extents().
+
+   orig : [ DATA1 ]
+   donor: [ DATA2 ]
+
+2. While data blocks are exchanging between orig and donor inodes, new
+   inode PAs is created to orig by other process's block allocation.
+   (Since there are semaphore gaps in ext4_move_extents().)  And new
+   inode PAs is used partially (2-1).
+
+   2-1 Create new inode PAs to orig inode
+   orig : [ DATA1 | used PA1 | free PA1 ]
+   donor: [ DATA2 ]
+
+3. Donor inode which has old orig inode's blocks is deleted after
+   EXT4_IOC_MOVE_EXT finished (3-1, 3-2).  So the block bitmap
+   corresponds to old orig inode's blocks are freed.
+
+   3-1 After EXT4_IOC_MOVE_EXT finished
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ DATA1 |  used PA1 ]
+
+   3-2 Delete donor inode
+   orig : [ DATA2 |  free PA1 ]
+   donor: [ FREE SPACE(DATA1) | FREE SPACE(used PA1) ]
+
+4. The double-free of blocks is occurred, when close() is called to
+   orig inode.  Because ext4_discard_preallocations() for orig inode
+   frees used PA1 and free PA1, though used PA1 is already freed in 3.
+
+   4-1 Double-free of blocks is occurred
+   orig : [ DATA2 |  FREE SPACE(free PA1) ]
+   donor: [ FREE SPACE(DATA1) | DOUBLE FREE(used PA1) ]
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -1289,10 +1289,6 @@ ext4_move_extents(struct file *o_filp, s
+                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                    max(le32_to_cpu(ext_cur->ee_block), block_start);
+-      /* Discard preallocations of two inodes */
+-      ext4_discard_preallocations(orig_inode);
+-      ext4_discard_preallocations(donor_inode);
+-
+       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+               seq_blocks += add_blocks;
+@@ -1410,6 +1406,11 @@ ext4_move_extents(struct file *o_filp, s
+       }
+ out:
++      if (*moved_len) {
++              ext4_discard_preallocations(orig_inode);
++              ext4_discard_preallocations(donor_inode);
++      }
++
+       if (orig_path) {
+               ext4_ext_drop_refs(orig_path);
+               kfree(orig_path);
+
+
+From linux@linux.site Thu Dec 10 21:25:52 2009
+Message-Id: <20091211052552.134440580@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:34 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Kazuya Mio <k-mio@sx.jp.nec.com>,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [22/34] ext4: initialize moved_len before calling ext4_move_extents()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch
+Content-Length: 2439
+Lines: 72
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 446aaa6e7e993b38a6f21c6acfa68f3f1af3dbe3)
+
+The move_extent.moved_len is used to pass back the number of exchanged
+blocks count to user space.  Currently the caller must clear this
+field; but we spend more code space checking for this requirement than
+simply zeroing the field ourselves, so let's just make life easier for
+everyone all around.
+
+Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com>
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |    1 +
+ fs/ext4/move_extent.c |   14 +++-----------
+ 2 files changed, 4 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -239,6 +239,7 @@ setversion_out:
+                       }
+               }
++              me.moved_len = 0;
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+               fput(donor_filp);
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -947,7 +947,6 @@ out2:
+  * @orig_start:               logical start offset in block for orig
+  * @donor_start:      logical start offset in block for donor
+  * @len:              the number of blocks to be moved
+- * @moved_len:                moved block length
+  *
+  * Check the arguments of ext4_move_extents() whether the files can be
+  * exchanged with each other.
+@@ -955,8 +954,8 @@ out2:
+  */
+ static int
+ mext_check_arguments(struct inode *orig_inode,
+-                        struct inode *donor_inode, __u64 orig_start,
+-                        __u64 donor_start, __u64 *len, __u64 moved_len)
++                   struct inode *donor_inode, __u64 orig_start,
++                   __u64 donor_start, __u64 *len)
+ {
+       ext4_lblk_t orig_blocks, donor_blocks;
+       unsigned int blkbits = orig_inode->i_blkbits;
+@@ -1010,13 +1009,6 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
+-      if (moved_len) {
+-              ext4_debug("ext4 move extent: moved_len should be 0 "
+-                      "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+-                      donor_inode->i_ino);
+-              return -EINVAL;
+-      }
+-
+       if ((orig_start > EXT_MAX_BLOCK) ||
+           (donor_start > EXT_MAX_BLOCK) ||
+           (*len > EXT_MAX_BLOCK) ||
+@@ -1226,7 +1218,7 @@ ext4_move_extents(struct file *o_filp, s
+       double_down_write_data_sem(orig_inode, donor_inode);
+       /* Check the filesystem environment whether move_extent can be done */
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+-                                      donor_start, &len, *moved_len);
++                                  donor_start, &len);
+       if (ret1)
+               goto out;
+
+
+From linux@linux.site Thu Dec 10 21:25:53 2009
+Message-Id: <20091211052552.682377360@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:35 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [23/34] ext4: move_extent_per_page() cleanup
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0019-ext4-move_extent_per_page-cleanup.patch
+Content-Length: 2733
+Lines: 87
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit ac48b0a1d068887141581bea8285de5fcab182b0)
+
+Integrate duplicate lines (acquire/release semaphore and invalidate
+extent cache in move_extent_per_page()) into mext_replace_branches(),
+to reduce source and object code size.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/move_extent.c |   30 +++++++++---------------------
+ 1 file changed, 9 insertions(+), 21 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -660,6 +660,9 @@ mext_replace_branches(handle_t *handle,
+       int replaced_count = 0;
+       int dext_alen;
++      /* Protect extent trees against block allocations via delalloc */
++      double_down_write_data_sem(orig_inode, donor_inode);
++
+       /* Get the original extent for the block "orig_off" */
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
+@@ -755,6 +758,11 @@ out:
+               kfree(donor_path);
+       }
++      ext4_ext_invalidate_cache(orig_inode);
++      ext4_ext_invalidate_cache(donor_inode);
++
++      double_up_write_data_sem(orig_inode, donor_inode);
++
+       return replaced_count;
+ }
+@@ -820,19 +828,9 @@ move_extent_per_page(struct file *o_filp
+        * Just swap data blocks between orig and donor.
+        */
+       if (uninit) {
+-              /*
+-               * Protect extent trees against block allocations
+-               * via delalloc
+-               */
+-              double_down_write_data_sem(orig_inode, donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
+-
+-              /* Clear the inode cache not to refer to the old data */
+-              ext4_ext_invalidate_cache(orig_inode);
+-              ext4_ext_invalidate_cache(donor_inode);
+-              double_up_write_data_sem(orig_inode, donor_inode);
+               goto out2;
+       }
+@@ -880,8 +878,6 @@ move_extent_per_page(struct file *o_filp
+       /* Release old bh and drop refs */
+       try_to_release_page(page, 0);
+-      /* Protect extent trees against block allocations via delalloc */
+-      double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+@@ -890,18 +886,10 @@ move_extent_per_page(struct file *o_filp
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+-              } else {
+-                      double_up_write_data_sem(orig_inode, donor_inode);
++              } else
+                       goto out;
+-              }
+       }
+-      /* Clear the inode cache not to refer to the old data */
+-      ext4_ext_invalidate_cache(orig_inode);
+-      ext4_ext_invalidate_cache(donor_inode);
+-
+-      double_up_write_data_sem(orig_inode, donor_inode);
+-
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+
+
+From linux@linux.site Thu Dec 10 21:25:53 2009
+Message-Id: <20091211052553.196951652@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:36 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [24/34] jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch
+Content-Length: 1035
+Lines: 38
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit e6ec116b67f46e0e7808276476554727b2e6240b)
+
+OOM happens.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/jbd2/commit.c  |    4 ++++
+ fs/jbd2/journal.c |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(jou
+               JBUFFER_TRACE(jh, "ph3: write metadata");
+               flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+                                                     jh, &new_jh, blocknr);
++              if (flags < 0) {
++                      jbd2_journal_abort(journal, flags);
++                      continue;
++              }
+               set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+               wbuf[bufs++] = jh2bh(new_jh);
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -358,6 +358,10 @@ repeat:
+               jbd_unlock_bh_state(bh_in);
+               tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
++              if (!tmp) {
++                      jbd2_journal_put_journal_head(new_jh);
++                      return -ENOMEM;
++              }
+               jbd_lock_bh_state(bh_in);
+               if (jh_in->b_frozen_data) {
+                       jbd2_free(tmp, bh_in->b_size);
+
+
+From linux@linux.site Thu Dec 10 21:25:54 2009
+Message-Id: <20091211052553.749907435@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:37 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Roel Kluin <roel.kluin@gmail.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [25/34] ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch
+Content-Length: 595
+Lines: 21
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit c09eef305dd43846360944ad072f051f964fa383)
+
+Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/resize.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct
+                       goto exit_bh;
+               if (IS_ERR(gdb = bclean(handle, sb, block))) {
+-                      err = PTR_ERR(bh);
++                      err = PTR_ERR(gdb);
+                       goto exit_bh;
+               }
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
+
+
+From linux@linux.site Thu Dec 10 21:25:54 2009
+Message-Id: <20091211052554.355331485@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:38 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [26/34] ext4: Avoid data / filesystem corruption when write fails to copy data
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch
+Content-Length: 2923
+Lines: 84
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b9a4207d5e911b938f73079a83cc2ae10524ec7f)
+
+When ext4_write_begin fails after allocating some blocks or
+generic_perform_write fails to copy data to write, we truncate blocks
+already instantiated beyond i_size.  Although these blocks were never
+inside i_size, we have to truncate the pagecache of these blocks so
+that corresponding buffers get unmapped.  Otherwise subsequent
+__block_prepare_write (called because we are retrying the write) will
+find the buffers mapped, not call ->get_block, and thus the page will
+be backed by already freed blocks leading to filesystem and data
+corruption.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1534,6 +1534,16 @@ static int do_journal_get_write_access(h
+       return ext4_journal_get_write_access(handle, bh);
+ }
++/*
++ * Truncate blocks that were not used by write. We have to truncate the
++ * pagecache as well so that corresponding buffers get properly unmapped.
++ */
++static void ext4_truncate_failed_write(struct inode *inode)
++{
++      truncate_inode_pages(inode->i_mapping, inode->i_size);
++      ext4_truncate(inode);
++}
++
+ static int ext4_write_begin(struct file *file, struct address_space *mapping,
+                           loff_t pos, unsigned len, unsigned flags,
+                           struct page **pagep, void **fsdata)
+@@ -1599,7 +1609,7 @@ retry:
+               ext4_journal_stop(handle);
+               if (pos + len > inode->i_size) {
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+                       /*
+                        * If truncate failed early the inode might
+                        * still be on the orphan list; we need to
+@@ -1709,7 +1719,7 @@ static int ext4_ordered_write_end(struct
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1751,7 +1761,7 @@ static int ext4_writeback_write_end(stru
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -1814,7 +1824,7 @@ static int ext4_journalled_write_end(str
+       if (!ret)
+               ret = ret2;
+       if (pos + len > inode->i_size) {
+-              ext4_truncate(inode);
++              ext4_truncate_failed_write(inode);
+               /*
+                * If truncate failed early the inode might still be
+                * on the orphan list; we need to make sure the inode
+@@ -3091,7 +3101,7 @@ retry:
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+-                      ext4_truncate(inode);
++                      ext4_truncate_failed_write(inode);
+       }
+       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+
+
+From linux@linux.site Thu Dec 10 21:25:55 2009
+Message-Id: <20091211052554.925382177@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:39 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Josef Bacik <josef@redhat.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [27/34] ext4: wait for log to commit when umounting
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0023-ext4-wait-for-log-to-commit-when-umounting.patch
+Content-Length: 1540
+Lines: 46
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit d4edac314e9ad0b21ba20ba8bc61b61f186f79e1)
+
+There is a potential race when a transaction is committing right when
+the file system is being umounting.  This could reduce in a race
+because EXT4_SB(sb)->s_group_info could be freed in ext4_put_super
+before the commit code calls a callback so the mballoc code can
+release freed blocks in the transaction, resulting in a panic trying
+to access the freed s_group_info.
+
+The fix is to wait for the transaction to finish committing before we
+shutdown the multiblock allocator.
+
+Signed-off-by: Josef Bacik <josef@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/super.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_
+       if (sb->s_dirt)
+               ext4_commit_super(sb, 1);
+-      ext4_release_system_zone(sb);
+-      ext4_mb_release(sb);
+-      ext4_ext_release(sb);
+-      ext4_xattr_put_super(sb);
+       if (sbi->s_journal) {
+               err = jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_
+                       ext4_abort(sb, __func__,
+                                  "Couldn't clean up the journal");
+       }
++
++      ext4_release_system_zone(sb);
++      ext4_mb_release(sb);
++      ext4_ext_release(sb);
++      ext4_xattr_put_super(sb);
++
+       if (!(sb->s_flags & MS_RDONLY)) {
+               EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+               es->s_state = cpu_to_le16(sbi->s_mount_state);
+
+
+From linux@linux.site Thu Dec 10 21:25:55 2009
+Message-Id: <20091211052555.487338959@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:40 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Curt Wohlgemuth <curtw@google.com>,
+ "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [28/34] ext4: remove blocks from inode prealloc list on failure
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch
+Content-Length: 1476
+Lines: 49
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b844167edc7fcafda9623955c05e4c1b3c32ebc7)
+
+This fixes a leak of blocks in an inode prealloc list if device failures
+cause ext4_mb_mark_diskspace_used() to fail.
+
+Signed-off-by: Curt Wohlgemuth <curtw@google.com>
+Acked-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3011,6 +3011,24 @@ static void ext4_mb_collect_stats(struct
+ }
+ /*
++ * Called on failure; free up any blocks from the inode PA for this
++ * context.  We don't need this for MB_GROUP_PA because we only change
++ * pa_free in ext4_mb_release_context(), but on failure, we've already
++ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
++ */
++static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
++{
++      struct ext4_prealloc_space *pa = ac->ac_pa;
++      int len;
++
++      if (pa && pa->pa_type == MB_INODE_PA) {
++              len = ac->ac_b_ex.fe_len;
++              pa->pa_free += len;
++      }
++
++}
++
++/*
+  * use blocks preallocated to inode
+  */
+ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+@@ -4295,6 +4313,7 @@ repeat:
+                       ac->ac_status = AC_STATUS_CONTINUE;
+                       goto repeat;
+               } else if (*errp) {
++                      ext4_discard_allocated_blocks(ac);
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
+
+
+From linux@linux.site Thu Dec 10 21:25:56 2009
+Message-Id: <20091211052556.043172197@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:41 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ Mingming Cao <cmm@us.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [29/34] ext4: ext4_get_reserved_space() must return bytes instead of blocks
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch
+Content-Length: 718
+Lines: 23
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 8aa6790f876e81f5a2211fe1711a5fe3fe2d7b20)
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1052,7 +1052,7 @@ qsize_t ext4_get_reserved_space(struct i
+               EXT4_I(inode)->i_reserved_meta_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+-      return total;
++      return (total << inode->i_blkbits);
+ }
+ /*
+  * Calculate the number of metadata blocks need to reserve
+
+
+From linux@linux.site Thu Dec 10 21:25:57 2009
+Message-Id: <20091211052556.560487193@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:42 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ Mingming Cao <cmm@us.ibm.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [30/34] ext4: quota macros cleanup
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0026-ext4-quota-macros-cleanup.patch
+Content-Length: 5167
+Lines: 138
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 5aca07eb7d8f14d90c740834d15ca15277f4820c)
+
+Currently all quota block reservation macros contains hard-coded "2"
+aka MAXQUOTAS value. This is no good because in some places it is not
+obvious to understand what does this digit represent. Let's introduce
+new macro with self descriptive name.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Acked-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4_jbd2.h |    8 ++++++--
+ fs/ext4/extents.c   |    2 +-
+ fs/ext4/inode.c     |    2 +-
+ fs/ext4/migrate.c   |    4 ++--
+ fs/ext4/namei.c     |    8 ++++----
+ 5 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -49,7 +49,7 @@
+ #define EXT4_DATA_TRANS_BLOCKS(sb)    (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+                                        EXT4_XATTR_TRANS_BLOCKS - 2 + \
+-                                       2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                       EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /*
+  * Define the number of metadata blocks we need to account to modify data.
+@@ -57,7 +57,7 @@
+  * This include super block, inode block, quota blocks and xattr blocks
+  */
+ #define EXT4_META_TRANS_BLOCKS(sb)    (EXT4_XATTR_TRANS_BLOCKS + \
+-                                      2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++                                      EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ /* Delete operations potentially hit one directory's namespace plus an
+  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+@@ -92,6 +92,7 @@
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+               (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+ #else
+@@ -99,6 +100,9 @@
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+ #endif
++#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2167,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
+                       correct_index = 1;
+                       credits += (ext_depth(inode)) + 1;
+               }
+-              credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++              credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+               err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+               if (err)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5231,7 +5231,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+-              handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
++              handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(hand
+        * So allocate a credit of 3. We may update
+        * quota (user and group).
+        */
+-      needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++      needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+       if (ext4_journal_extend(handle, needed) != 0)
+               retval = ext4_journal_restart(handle, needed);
+@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode
+       handle = ext4_journal_start(inode,
+                                       EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+                                       + 1);
+       if (IS_ERR(handle)) {
+               retval = PTR_ERR(handle);
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1769,7 +1769,7 @@ static int ext4_create(struct inode *dir
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1803,7 +1803,7 @@ static int ext4_mknod(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1840,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir,
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -2253,7 +2253,7 @@ static int ext4_symlink(struct inode *di
+ retry:
+       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+-                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+
+
+From linux@linux.site Thu Dec 10 21:25:57 2009
+Message-Id: <20091211052557.153813326@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:43 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Dmitry Monakhov <dmonakhov@openvz.org>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [31/34] ext4: fix incorrect block reservation on quota transfer.
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch
+Content-Length: 1036
+Lines: 27
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 194074acacebc169ded90a4657193f5180015051)
+
+Inside ->setattr() call both ATTR_UID and ATTR_GID may be valid
+This means that we may end-up with transferring all quotas. Add
+we have to reserve QUOTA_DEL_BLOCKS for all quotas, as we do in
+case of QUOTA_INIT_BLOCKS.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Reviewed-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5232,7 +5232,7 @@ int ext4_setattr(struct dentry *dentry,
+               /* (user+group)*(old+new) structure, inode write (sb,
+                * inode block, ? - but truncate inode update has it) */
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+-                                      EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
++                                      EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+               if (IS_ERR(handle)) {
+                       error = PTR_ERR(handle);
+                       goto err_out;
+
+
+From linux@linux.site Thu Dec 10 21:25:58 2009
+Message-Id: <20091211052557.723287400@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:44 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Jan Kara <jack@suse.cz>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [32/34] ext4: Wait for proper transaction commit on fsync
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
+Content-Length: 7849
+Lines: 252
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit b436b9bef84de6893e86346d8fbf7104bc520645)
+
+We cannot rely on buffer dirty bits during fsync because pdflush can come
+before fsync is called and clear dirty bits without forcing a transaction
+commit. What we do is that we track which transaction has last changed
+the inode and which transaction last changed allocation and force it to
+disk on fsync.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h      |    7 +++++++
+ fs/ext4/ext4_jbd2.h |   13 +++++++++++++
+ fs/ext4/extents.c   |   14 ++++++++++++--
+ fs/ext4/fsync.c     |   46 +++++++++++++++++-----------------------------
+ fs/ext4/inode.c     |   29 +++++++++++++++++++++++++++++
+ fs/ext4/super.c     |    2 ++
+ fs/jbd2/journal.c   |    1 +
+ 7 files changed, 81 insertions(+), 31 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -703,6 +703,13 @@ struct ext4_inode_info {
+       struct list_head i_aio_dio_complete_list;
+       /* current io_end structure for async DIO write*/
+       ext4_io_end_t *cur_aio_dio;
++
++      /*
++       * Transactions that contain inode's metadata needed to complete
++       * fsync and fdatasync, respectively.
++       */
++      tid_t i_sync_tid;
++      tid_t i_datasync_tid;
+ };
+ /*
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -258,6 +258,19 @@ static inline int ext4_jbd2_file_inode(h
+       return 0;
+ }
++static inline void ext4_update_inode_fsync_trans(handle_t *handle,
++                                               struct inode *inode,
++                                               int datasync)
++{
++      struct ext4_inode_info *ei = EXT4_I(inode);
++
++      if (ext4_handle_valid(handle)) {
++              ei->i_sync_tid = handle->h_transaction->t_tid;
++              if (datasync)
++                      ei->i_datasync_tid = handle->h_transaction->t_tid;
++      }
++}
++
+ /* super.c */
+ int ext4_force_commit(struct super_block *sb);
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -3064,6 +3064,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
+               ret = ext4_convert_unwritten_extents_dio(handle, inode,
+                                                       path);
++              if (ret >= 0)
++                      ext4_update_inode_fsync_trans(handle, inode, 1);
+               goto out2;
+       }
+       /* buffered IO case */
+@@ -3091,6 +3093,8 @@ ext4_ext_handle_uninitialized_extents(ha
+       ret = ext4_ext_convert_to_initialized(handle, inode,
+                                               path, iblock,
+                                               max_blocks);
++      if (ret >= 0)
++              ext4_update_inode_fsync_trans(handle, inode, 1);
+ out:
+       if (ret <= 0) {
+               err = ret;
+@@ -3329,10 +3333,16 @@ int ext4_ext_get_blocks(handle_t *handle
+       allocated = ext4_ext_get_actual_len(&newex);
+       set_buffer_new(bh_result);
+-      /* Cache only when it is _not_ an uninitialized extent */
+-      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
++      /*
++       * Cache the extent and update transaction to commit on fdatasync only
++       * when it is _not_ an uninitialized extent.
++       */
++      if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
+               ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+                                               EXT4_EXT_CACHE_EXTENT);
++              ext4_update_inode_fsync_trans(handle, inode, 1);
++      } else
++              ext4_update_inode_fsync_trans(handle, inode, 0);
+ out:
+       if (allocated > max_blocks)
+               allocated = max_blocks;
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -51,25 +51,30 @@
+ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ {
+       struct inode *inode = dentry->d_inode;
++      struct ext4_inode_info *ei = EXT4_I(inode);
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+-      int err, ret = 0;
++      int ret;
++      tid_t commit_tid;
+       J_ASSERT(ext4_journal_current_handle() == NULL);
+       trace_ext4_sync_file(file, dentry, datasync);
++      if (inode->i_sb->s_flags & MS_RDONLY)
++              return 0;
++
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+               return ret;
++
++      if (!journal)
++              return simple_fsync(file, dentry, datasync);
++
+       /*
+-       * data=writeback:
++       * data=writeback,ordered:
+        *  The caller's filemap_fdatawrite()/wait will sync the data.
+-       *  sync_inode() will sync the metadata
+-       *
+-       * data=ordered:
+-       *  The caller's filemap_fdatawrite() will write the data and
+-       *  sync_inode() will write the inode if it is dirty.  Then the caller's
+-       *  filemap_fdatawait() will wait on the pages.
++       *  Metadata is in the journal, we wait for proper transaction to
++       *  commit here.
+        *
+        * data=journal:
+        *  filemap_fdatawrite won't do anything (the buffers are clean).
+@@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, st
+       if (ext4_should_journal_data(inode))
+               return ext4_force_commit(inode->i_sb);
+-      if (!journal)
+-              ret = sync_mapping_buffers(inode->i_mapping);
+-
+-      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+-              goto out;
+-
+-      /*
+-       * The VFS has written the file data.  If the inode is unaltered
+-       * then we need not start a commit.
+-       */
+-      if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+-              struct writeback_control wbc = {
+-                      .sync_mode = WB_SYNC_ALL,
+-                      .nr_to_write = 0, /* sys_fsync did this */
+-              };
+-              err = sync_inode(inode, &wbc);
+-              if (ret == 0)
+-                      ret = err;
+-      }
+-out:
+-      if (journal && (journal->j_flags & JBD2_BARRIER))
++      commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
++      if (jbd2_log_start_commit(journal, commit_tid))
++              jbd2_log_wait_commit(journal, commit_tid);
++      else if (journal->j_flags & JBD2_BARRIER)
+               blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+       return ret;
+ }
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1025,6 +1025,8 @@ static int ext4_ind_get_blocks(handle_t
+               goto cleanup;
+       set_buffer_new(bh_result);
++
++      ext4_update_inode_fsync_trans(handle, inode, 1);
+ got_it:
+       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       if (count > blocks_to_boundary)
+@@ -4794,6 +4796,7 @@ struct inode *ext4_iget(struct super_blo
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei;
+       struct inode *inode;
++      journal_t *journal = EXT4_SB(sb)->s_journal;
+       long ret;
+       int block;
+@@ -4858,6 +4861,31 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_data[block] = raw_inode->i_block[block];
+       INIT_LIST_HEAD(&ei->i_orphan);
++      /*
++       * Set transaction id's of transactions that have to be committed
++       * to finish f[data]sync. We set them to currently running transaction
++       * as we cannot be sure that the inode or some of its metadata isn't
++       * part of the transaction - the inode could have been reclaimed and
++       * now it is reread from disk.
++       */
++      if (journal) {
++              transaction_t *transaction;
++              tid_t tid;
++
++              spin_lock(&journal->j_state_lock);
++              if (journal->j_running_transaction)
++                      transaction = journal->j_running_transaction;
++              else
++                      transaction = journal->j_committing_transaction;
++              if (transaction)
++                      tid = transaction->t_tid;
++              else
++                      tid = journal->j_commit_sequence;
++              spin_unlock(&journal->j_state_lock);
++              ei->i_sync_tid = tid;
++              ei->i_datasync_tid = tid;
++      }
++
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+@@ -5112,6 +5140,7 @@ static int ext4_do_update_inode(handle_t
+               err = rc;
+       ei->i_state &= ~EXT4_STATE_NEW;
++      ext4_update_inode_fsync_trans(handle, inode, 0);
+ out_brelse:
+       brelse(bh);
+       ext4_std_error(inode->i_sb, err);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(st
+       spin_lock_init(&(ei->i_block_reservation_lock));
+       INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
+       ei->cur_aio_dio = NULL;
++      ei->i_sync_tid = 0;
++      ei->i_datasync_tid = 0;
+       return &ei->vfs_inode;
+ }
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
+ EXPORT_SYMBOL(jbd2_journal_ack_err);
+ EXPORT_SYMBOL(jbd2_journal_clear_err);
+ EXPORT_SYMBOL(jbd2_log_wait_commit);
++EXPORT_SYMBOL(jbd2_log_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+ EXPORT_SYMBOL(jbd2_journal_wipe);
+
+
+From linux@linux.site Thu Dec 10 21:25:58 2009
+Message-Id: <20091211052558.272572522@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:45 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ Akira Fujita <a-fujita@rs.jp.nec.com>,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [33/34] ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch
+Content-Length: 2732
+Lines: 94
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit 4a58579b9e4e2a35d57e6c9c8483e52f6f1b7fd6)
+
+This patch fixes three problems in the handling of the
+EXT4_IOC_MOVE_EXT ioctl:
+
+1. In current EXT4_IOC_MOVE_EXT, there are read access mode checks for
+original and donor files, but they allow the illegal write access to
+donor file, since donor file is overwritten by original file data.  To
+fix this problem, change access mode checks of original (r->r/w) and
+donor (r->w) files.
+
+2.  Disallow the use of donor files that have a setuid or setgid bits.
+
+3.  Call mnt_want_write() and mnt_drop_write() before and after
+ext4_move_extents() calling to get write access to a mount.
+
+Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ioctl.c       |   30 ++++++++++++++++++------------
+ fs/ext4/move_extent.c |    7 +++++++
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -221,32 +221,38 @@ setversion_out:
+               struct file *donor_filp;
+               int err;
++              if (!(filp->f_mode & FMODE_READ) ||
++                  !(filp->f_mode & FMODE_WRITE))
++                      return -EBADF;
++
+               if (copy_from_user(&me,
+                       (struct move_extent __user *)arg, sizeof(me)))
+                       return -EFAULT;
++              me.moved_len = 0;
+               donor_filp = fget(me.donor_fd);
+               if (!donor_filp)
+                       return -EBADF;
+-              if (!capable(CAP_DAC_OVERRIDE)) {
+-                      if ((current->real_cred->fsuid != inode->i_uid) ||
+-                              !(inode->i_mode & S_IRUSR) ||
+-                              !(donor_filp->f_dentry->d_inode->i_mode &
+-                              S_IRUSR)) {
+-                              fput(donor_filp);
+-                              return -EACCES;
+-                      }
++              if (!(donor_filp->f_mode & FMODE_WRITE)) {
++                      err = -EBADF;
++                      goto mext_out;
+               }
+-              me.moved_len = 0;
++              err = mnt_want_write(filp->f_path.mnt);
++              if (err)
++                      goto mext_out;
++
+               err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                       me.donor_start, me.len, &me.moved_len);
+-              fput(donor_filp);
++              mnt_drop_write(filp->f_path.mnt);
++              if (me.moved_len > 0)
++                      file_remove_suid(donor_filp);
+               if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
+-                      return -EFAULT;
+-
++                      err = -EFAULT;
++mext_out:
++              fput(donor_filp);
+               return err;
+       }
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -957,6 +957,13 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
++      if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
++              ext4_debug("ext4 move extent: suid or sgid is set"
++                         " to donor file [ino:orig %lu, donor %lu]\n",
++                         orig_inode->i_ino, donor_inode->i_ino);
++              return -EINVAL;
++      }
++
+       /* Ext4 move extent does not support swapfile */
+       if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+               ext4_debug("ext4 move extent: The argument files should "
+
+
+From linux@linux.site Thu Dec 10 21:25:59 2009
+Message-Id: <20091211052558.863762484@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:46 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk,
+ "Theodore Tso" <tytso@mit.edu>,
+ Greg Kroah-Hartman <gregkh@suse.de>
+Subject: [34/34] ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
+References: <20091211052312.805428372@linux.site>
+Content-Disposition: inline; filename=0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch
+Content-Length: 5029
+Lines: 115
+
+2.6.32-stable review patch.  If anyone has any objections, please let us know.
+
+------------------
+
+(cherry picked from commit fab3a549e204172236779f502eccb4f9bf0dc87d)
+
+Fix the following potential circular locking dependency between
+mm->mmap_sem and ei->i_data_sem:
+
+    =======================================================
+    [ INFO: possible circular locking dependency detected ]
+    2.6.32-04115-gec044c5 #37
+    -------------------------------------------------------
+    ureadahead/1855 is trying to acquire lock:
+     (&mm->mmap_sem){++++++}, at: [<ffffffff81107224>] might_fault+0x5c/0xac
+
+    but task is already holding lock:
+     (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    which lock already depends on the new lock.
+
+    the existing dependency chain (in reverse order) is:
+
+    -> #1 (&ei->i_data_sem){++++..}:
+           [<ffffffff81099bfa>] __lock_acquire+0xb67/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81516633>] down_read+0x51/0x84
+           [<ffffffff811a2414>] ext4_get_blocks+0x50/0x2a5
+           [<ffffffff811a3453>] ext4_get_block+0xab/0xef
+           [<ffffffff81154f39>] do_mpage_readpage+0x198/0x48d
+           [<ffffffff81155360>] mpage_readpages+0xd0/0x114
+           [<ffffffff811a104b>] ext4_readpages+0x1d/0x1f
+           [<ffffffff810f8644>] __do_page_cache_readahead+0x12f/0x1bc
+           [<ffffffff810f86f2>] ra_submit+0x21/0x25
+           [<ffffffff810f0cfd>] filemap_fault+0x19f/0x32c
+           [<ffffffff81107b97>] __do_fault+0x55/0x3a2
+           [<ffffffff81109db0>] handle_mm_fault+0x327/0x734
+           [<ffffffff8151aaa9>] do_page_fault+0x292/0x2aa
+           [<ffffffff81518205>] page_fault+0x25/0x30
+           [<ffffffff812a34d8>] clear_user+0x38/0x3c
+           [<ffffffff81167e16>] padzero+0x20/0x31
+           [<ffffffff81168b47>] load_elf_binary+0x8bc/0x17ed
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff81166d64>] load_script+0x1b8/0x1cc
+           [<ffffffff81130e95>] search_binary_handler+0xc2/0x259
+           [<ffffffff8113255f>] do_execve+0x1ce/0x2cf
+           [<ffffffff81027494>] sys_execve+0x43/0x5a
+           [<ffffffff8102918a>] stub_execve+0x6a/0xc0
+
+    -> #0 (&mm->mmap_sem){++++++}:
+           [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+           [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+           [<ffffffff81107251>] might_fault+0x89/0xac
+           [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+           [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+           [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+           [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+           [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+           [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+           [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+    other info that might help us debug this:
+
+    1 lock held by ureadahead/1855:
+     #0:  (&ei->i_data_sem){++++..}, at: [<ffffffff811be1fd>] ext4_fiemap+0x11b/0x159
+
+    stack backtrace:
+    Pid: 1855, comm: ureadahead Not tainted 2.6.32-04115-gec044c5 #37
+    Call Trace:
+     [<ffffffff81098c70>] print_circular_bug+0xa8/0xb7
+     [<ffffffff81099aa4>] __lock_acquire+0xa11/0xd0f
+     [<ffffffff8102f229>] ? sched_clock+0x9/0xd
+     [<ffffffff81099e7e>] lock_acquire+0xdc/0x102
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81107251>] might_fault+0x89/0xac
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff81124b44>] ? __kmalloc+0x13b/0x18c
+     [<ffffffff81139382>] fiemap_fill_next_extent+0x95/0xda
+     [<ffffffff811bcb43>] ext4_ext_fiemap_cb+0x138/0x157
+     [<ffffffff811bca0b>] ? ext4_ext_fiemap_cb+0x0/0x157
+     [<ffffffff811be069>] ext4_ext_walk_space+0x178/0x1f1
+     [<ffffffff811be21e>] ext4_fiemap+0x13c/0x159
+     [<ffffffff81107224>] ? might_fault+0x5c/0xac
+     [<ffffffff811390e6>] do_vfs_ioctl+0x348/0x4d6
+     [<ffffffff8129f6d0>] ? __up_read+0x8d/0x95
+     [<ffffffff81517fb5>] ? retint_swapgs+0x13/0x1b
+     [<ffffffff811392ca>] sys_ioctl+0x56/0x79
+     [<ffffffff81028cb2>] system_call_fastpath+0x16/0x1b
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *in
+       while (block < last && block != EXT_MAX_BLOCK) {
+               num = last - block;
+               /* find extent for this block */
++              down_read(&EXT4_I(inode)->i_data_sem);
+               path = ext4_ext_find_extent(inode, block, path);
++              up_read(&EXT4_I(inode)->i_data_sem);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       path = NULL;
+@@ -3730,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, str
+                * Walk the extent tree gathering extent information.
+                * ext4_ext_fiemap_cb will push extents back to user.
+                */
+-              down_read(&EXT4_I(inode)->i_data_sem);
+               error = ext4_ext_walk_space(inode, start_blk, len_blks,
+                                         ext4_ext_fiemap_cb, fieinfo);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+       }
+       return error;
+
+
+From linux@linux.site Thu Dec 10 21:25:40 2009
+Message-Id: <20091211052312.805428372@linux.site>
+User-Agent: quilt/0.47-14.9
+Date: Thu, 10 Dec 2009 21:23:12 -0800
+From: Greg KH <gregkh@suse.de>
+To: linux-kernel@vger.kernel.org,
+ stable@kernel.org
+Cc: stable-review@kernel.org,
+ torvalds@linux-foundation.org,
+ akpm@linux-foundation.org,
+ alan@lxorguk.ukuu.org.uk
+Subject: [00/34] 2.6.32.1-stable review
+Content-Length: 2372
+Lines: 51
+
+This is the start of the stable review cycle for the 2.6.32.1 release.
+There are 34 patches in this series, all will be posted as a response to
+this one.  If anyone has any issues with these being applied, please let
+us know.  If anyone is a maintainer of the proper subsystem, and wants
+to add a Signed-off-by: line to the patch, please respond with it.
+
+As was done with the 2.6.31.8-rc1 release, this is not all of the
+patches in the -stable queue, just a huge chunk of ext4 patches here,
+and a few scsi ones, which should all get out sooner rather than later.
+So note that there will be more 2.6.32-stable releases coming, this is
+just the first in the series.
+
+Responses should be made by Sunday, Dec 13 04:00:00 UTC 2009
+Anything received after that time might be too late.
+
+The whole patch series can be found in one patch at:
+       kernel.org/pub/linux/kernel/v2.6/stable-review/patch-2.6.32.1-rc1.gz
+and the diffstat can be found below.
+
+thanks,
+
+greg k-h
+
+ Documentation/filesystems/ext4.txt   |   10 +-
+ Makefile                             |    2 +-
+ drivers/scsi/hosts.c                 |   13 ++-
+ drivers/scsi/lpfc/lpfc_init.c        |    2 +-
+ drivers/scsi/megaraid/megaraid_sas.c |    8 +-
+ drivers/scsi/qla2xxx/qla_attr.c      |    3 +-
+ drivers/scsi/scsi_lib_dma.c          |    4 +-
+ fs/ext4/balloc.c                     |    8 +-
+ fs/ext4/block_validity.c             |    2 +-
+ fs/ext4/ext4.h                       |    8 +
+ fs/ext4/ext4_jbd2.h                  |   21 +++-
+ fs/ext4/extents.c                    |   22 ++-
+ fs/ext4/fsync.c                      |   54 +++----
+ fs/ext4/inode.c                      |   81 +++++++---
+ fs/ext4/ioctl.c                      |   29 +++--
+ fs/ext4/mballoc.c                    |   40 ++++-
+ fs/ext4/migrate.c                    |    4 +-
+ fs/ext4/move_extent.c                |  278 ++++++++++++++++------------------
+ fs/ext4/namei.c                      |   38 ++---
+ fs/ext4/resize.c                     |    2 +-
+ fs/ext4/super.c                      |   40 ++++--
+ fs/ext4/xattr.c                      |    7 +-
+ fs/jbd2/commit.c                     |    4 +
+ fs/jbd2/journal.c                    |    5 +
+ include/linux/sched.h                |   13 ++-
+ include/scsi/osd_protocol.h          |    1 +
+ include/scsi/scsi_host.h             |   16 ++-
+ 27 files changed, 424 insertions(+), 291 deletions(-)
+
diff --git a/review-2.6.32/scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch b/review-2.6.32/scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch
new file mode 100644 (file)
index 0000000..f936137
--- /dev/null
@@ -0,0 +1,51 @@
+From 7b2519afa1abd1b9f63aa1e90879307842422dae Mon Sep 17 00:00:00 2001
+From: Yang, Bo <Bo.Yang@lsi.com>
+Date: Tue, 6 Oct 2009 14:52:20 -0600
+Subject: SCSI: megaraid_sas: fix 64 bit sense pointer truncation
+
+From: Yang, Bo <Bo.Yang@lsi.com>
+
+commit 7b2519afa1abd1b9f63aa1e90879307842422dae upstream.
+
+The current sense pointer is cast to a u32 pointer, which can truncate
+on 64 bits.  Fix by using unsigned long instead.
+
+Signed-off-by Bo Yang<bo.yang@lsi.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/megaraid/megaraid_sas.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/megaraid/megaraid_sas.c
++++ b/drivers/scsi/megaraid/megaraid_sas.c
+@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+       int error = 0, i;
+       void *sense = NULL;
+       dma_addr_t sense_handle;
+-      u32 *sense_ptr;
++      unsigned long *sense_ptr;
+       memset(kbuff_arr, 0, sizeof(kbuff_arr));
+@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+               }
+               sense_ptr =
+-                  (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
++              (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
+               *sense_ptr = sense_handle;
+       }
+@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+                * sense_ptr points to the location that has the user
+                * sense buffer address
+                */
+-              sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
+-                                   ioc->sense_off);
++              sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
++                              ioc->sense_off);
+               if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
+                                sense, ioc->sense_len)) {
diff --git a/review-2.6.32/scsi-osd_protocol.h-add-missing-include.patch b/review-2.6.32/scsi-osd_protocol.h-add-missing-include.patch
new file mode 100644 (file)
index 0000000..6726c96
--- /dev/null
@@ -0,0 +1,28 @@
+From 0899638688f223fd9e9fee60d662665e11693d12 Mon Sep 17 00:00:00 2001
+From: Martin Michlmayr <tbm@cyrius.com>
+Date: Mon, 16 Nov 2009 20:49:25 +0200
+Subject: SCSI: osd_protocol.h: Add missing #include
+
+From: Martin Michlmayr <tbm@cyrius.com>
+
+commit 0899638688f223fd9e9fee60d662665e11693d12 upstream.
+
+include/scsi/osd_protocol.h uses ALIGN() without an #include
+<linux/kernel.h>, leading to:
+| include/scsi/osd_protocol.h:362: error: implicit declaration of function 'ALIGN'
+
+Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/include/scsi/osd_protocol.h
++++ b/include/scsi/osd_protocol.h
+@@ -17,6 +17,7 @@
+ #define __OSD_PROTOCOL_H__
+ #include <linux/types.h>
++#include <linux/kernel.h>
+ #include <asm/unaligned.h>
+ #include <scsi/scsi.h>
diff --git a/review-2.6.32/scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch b/review-2.6.32/scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch
new file mode 100644 (file)
index 0000000..57cb6dd
--- /dev/null
@@ -0,0 +1,153 @@
+From d139b9bd0e52dda14fd13412e7096e68b56d0076 Mon Sep 17 00:00:00 2001
+From: James Bottomley <James.Bottomley@suse.de>
+Date: Thu, 5 Nov 2009 13:33:12 -0600
+Subject: SCSI: scsi_lib_dma: fix bug with dma maps on nested scsi objects
+
+From: James Bottomley <James.Bottomley@suse.de>
+
+commit d139b9bd0e52dda14fd13412e7096e68b56d0076 upstream.
+
+Some of our virtual SCSI hosts don't have a proper bus parent at the
+top, which can be a problem for doing DMA on them
+
+This patch makes the host device cache a pointer to the physical bus
+device and provides an extra API for setting it (the normal API picks
+it up from the parent).  This patch also modifies the qla2xxx and lpfc
+vport logic to use the new DMA host setting API.
+
+Acked-By: James Smart  <james.smart@emulex.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/hosts.c            |   13 ++++++++++---
+ drivers/scsi/lpfc/lpfc_init.c   |    2 +-
+ drivers/scsi/qla2xxx/qla_attr.c |    3 ++-
+ drivers/scsi/scsi_lib_dma.c     |    4 ++--
+ include/scsi/scsi_host.h        |   16 +++++++++++++++-
+ 5 files changed, 30 insertions(+), 8 deletions(-)
+
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *
+ EXPORT_SYMBOL(scsi_remove_host);
+ /**
+- * scsi_add_host - add a scsi host
++ * scsi_add_host_with_dma - add a scsi host with dma device
+  * @shost:    scsi host pointer to add
+  * @dev:      a struct device of type scsi class
++ * @dma_dev:  dma device for the host
++ *
++ * Note: You rarely need to worry about this unless you're in a
++ * virtualised host environments, so use the simpler scsi_add_host()
++ * function instead.
+  *
+  * Return value: 
+  *    0 on success / != 0 for error
+  **/
+-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
++int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
++                         struct device *dma_dev)
+ {
+       struct scsi_host_template *sht = shost->hostt;
+       int error = -EINVAL;
+@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shos
+       if (!shost->shost_gendev.parent)
+               shost->shost_gendev.parent = dev ? dev : &platform_bus;
++      shost->dma_dev = dma_dev;
+       error = device_add(&shost->shost_gendev);
+       if (error)
+@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shos
+  fail:
+       return error;
+ }
+-EXPORT_SYMBOL(scsi_add_host);
++EXPORT_SYMBOL(scsi_add_host_with_dma);
+ static void scsi_host_dev_release(struct device *dev)
+ {
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba,
+       vport->els_tmofunc.function = lpfc_els_timeout;
+       vport->els_tmofunc.data = (unsigned long)vport;
+-      error = scsi_add_host(shost, dev);
++      error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
+       if (error)
+               goto out_put_shost;
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc
+                       fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+       }
+-      if (scsi_add_host(vha->host, &fc_vport->dev)) {
++      if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
++                                 &ha->pdev->dev)) {
+               DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
+                       vha->host_no, vha->vp_idx));
+               goto vport_create_failed_2;
+--- a/drivers/scsi/scsi_lib_dma.c
++++ b/drivers/scsi/scsi_lib_dma.c
+@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
+       int nseg = 0;
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                                 cmd->sc_data_direction);
+@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
+ void scsi_dma_unmap(struct scsi_cmnd *cmd)
+ {
+       if (scsi_sg_count(cmd)) {
+-              struct device *dev = cmd->device->host->shost_gendev.parent;
++              struct device *dev = cmd->device->host->dma_dev;
+               dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+                            cmd->sc_data_direction);
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -677,6 +677,12 @@ struct Scsi_Host {
+       void *shost_data;
+       /*
++       * Points to the physical bus device we'd use to do DMA
++       * Needed just in case we have virtual hosts.
++       */
++      struct device *dma_dev;
++
++      /*
+        * We should ensure that this is aligned, both for better performance
+        * and also because some compilers (m68k) don't automatically force
+        * alignment to a long boundary.
+@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_H
+ extern void scsi_flush_work(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
++extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
++                                             struct device *,
++                                             struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+ extern void scsi_rescan_device(struct device *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(
+ extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
++static inline int __must_check scsi_add_host(struct Scsi_Host *host,
++                                           struct device *dev)
++{
++      return scsi_add_host_with_dma(host, dev, dev);
++}
++
+ static inline struct device *scsi_get_device(struct Scsi_Host *shost)
+ {
+         return shost->shost_gendev.parent;
diff --git a/review-2.6.32/series b/review-2.6.32/series
new file mode 100644 (file)
index 0000000..51c3172
--- /dev/null
@@ -0,0 +1,34 @@
+signal-fix-alternate-signal-stack-check.patch
+scsi-scsi_lib_dma-fix-bug-with-dma-maps-on-nested-scsi-objects.patch
+scsi-osd_protocol.h-add-missing-include.patch
+scsi-megaraid_sas-fix-64-bit-sense-pointer-truncation.patch
+0001-ext4-fix-potential-buffer-head-leak-when-add_dirent_.patch
+0002-ext4-avoid-divide-by-zero-when-trying-to-mount-a-cor.patch
+0003-ext4-fix-the-returned-block-count-if-EXT4_IOC_MOVE_E.patch
+0004-ext4-fix-lock-order-problem-in-ext4_move_extents.patch
+0005-ext4-fix-possible-recursive-locking-warning-in-EXT4_.patch
+0006-ext4-plug-a-buffer_head-leak-in-an-error-path-of-ext.patch
+0007-ext4-make-sure-directory-and-symlink-blocks-are-revo.patch
+0008-ext4-fix-i_flags-access-in-ext4_da_writepages_trans_.patch
+0009-ext4-journal-all-modifications-in-ext4_xattr_set_han.patch
+0010-ext4-don-t-update-the-superblock-in-ext4_statfs.patch
+0011-ext4-fix-uninit-block-bitmap-initialization-when-s_m.patch
+0012-ext4-fix-block-validity-checks-so-they-work-correctl.patch
+0013-ext4-avoid-issuing-unnecessary-barriers.patch
+0014-ext4-fix-error-handling-in-ext4_ind_get_blocks.patch
+0015-ext4-make-trim-discard-optional-and-off-by-default.patch
+0016-ext4-make-norecovery-an-alias-for-noload.patch
+0017-ext4-Fix-double-free-of-blocks-with-EXT4_IOC_MOVE_EX.patch
+0018-ext4-initialize-moved_len-before-calling-ext4_move_e.patch
+0019-ext4-move_extent_per_page-cleanup.patch
+0020-jbd2-Add-ENOMEM-checking-in-and-for-jbd2_journal_wri.patch
+0021-ext4-Return-the-PTR_ERR-of-the-correct-pointer-in-se.patch
+0022-ext4-Avoid-data-filesystem-corruption-when-write-fai.patch
+0023-ext4-wait-for-log-to-commit-when-umounting.patch
+0024-ext4-remove-blocks-from-inode-prealloc-list-on-failu.patch
+0025-ext4-ext4_get_reserved_space-must-return-bytes-inste.patch
+0026-ext4-quota-macros-cleanup.patch
+0027-ext4-fix-incorrect-block-reservation-on-quota-transf.patch
+0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
+0029-ext4-Fix-insufficient-checks-in-EXT4_IOC_MOVE_EXT.patch
+0030-ext4-Fix-potential-fiemap-deadlock-mmap_sem-vs.-i_da.patch
diff --git a/review-2.6.32/signal-fix-alternate-signal-stack-check.patch b/review-2.6.32/signal-fix-alternate-signal-stack-check.patch
new file mode 100644 (file)
index 0000000..73cd5fd
--- /dev/null
@@ -0,0 +1,87 @@
+From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+Date: Sun, 25 Oct 2009 15:37:58 +0100
+Subject: signal: Fix alternate signal stack check
+
+From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+
+commit 2a855dd01bc1539111adb7233f587c5c468732ac upstream.
+
+All architectures in the kernel increment/decrement the stack pointer
+before storing values on the stack.
+
+On architectures which have the stack grow down sas_ss_sp == sp is not
+on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is
+on the alternate signal stack.
+
+On architectures which have the stack grow up sas_ss_sp == sp is on
+the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not
+on the alternate signal stack.
+
+The current implementation fails for architectures which have the
+stack grow down on the corner case where sas_ss_sp == sp.This was
+reported as Debian bug #544905 on AMD64.
+Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c
+
+The test case creates the following stack scenario:
+   0xn0300     stack top
+   0xn0200     alt stack pointer top (when switching to alt stack)
+   0xn01ff     alt stack end
+   0xn0100     alt stack start == stack pointer
+
+If the signal is sent the stack pointer is pointing to the base
+address of the alt stack and the kernel erroneously decides that it
+has already switched to the alternate stack because of the current
+check for "sp - sas_ss_sp < sas_ss_size"
+
+On parisc (stack grows up) the scenario would be:
+   0xn0200     stack pointer
+   0xn01ff     alt stack end
+   0xn0100     alt stack start = alt stack pointer base
+                                 (when switching to alt stack)
+   0xn0000     stack base
+
+This is handled correctly by the current implementation.
+
+[ tglx: Modified for archs which have the stack grow up (parisc) which
+       would fail with the correct implementation for stack grows
+       down. Added a check for sp >= current->sas_ss_sp which is
+       strictly not necessary but makes the code symetric for both
+       variants ]
+
+Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Kyle McMartin <kyle@mcmartin.ca>
+LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/sched.h |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2086,11 +2086,18 @@ static inline int is_si_special(const st
+       return info <= SEND_SIG_FORCED;
+ }
+-/* True if we are on the alternate signal stack.  */
+-
++/*
++ * True if we are on the alternate signal stack.
++ */
+ static inline int on_sig_stack(unsigned long sp)
+ {
+-      return (sp - current->sas_ss_sp < current->sas_ss_size);
++#ifdef CONFIG_STACK_GROWSUP
++      return sp >= current->sas_ss_sp &&
++              sp - current->sas_ss_sp < current->sas_ss_size;
++#else
++      return sp > current->sas_ss_sp &&
++              sp - current->sas_ss_sp <= current->sas_ss_size;
++#endif
+ }
+ static inline int sas_ss_flags(unsigned long sp)