]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.32 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 23 Sep 2010 19:17:38 +0000 (12:17 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 23 Sep 2010 19:17:38 +0000 (12:17 -0700)
28 files changed:
queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch [new file with mode: 0644]
queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch [new file with mode: 0644]
queue-2.6.32/at91-change-dma-resource-index.patch [new file with mode: 0644]
queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch [new file with mode: 0644]
queue-2.6.32/bnx2-fix-netpoll-crash.patch [new file with mode: 0644]
queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch [new file with mode: 0644]
queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch [new file with mode: 0644]
queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch [new file with mode: 0644]
queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch [new file with mode: 0644]
queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch [new file with mode: 0644]
queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch [new file with mode: 0644]
queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch [new file with mode: 0644]
queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch [new file with mode: 0644]
queue-2.6.32/ia64-fix-siglock.patch [new file with mode: 0644]
queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch [new file with mode: 0644]
queue-2.6.32/inotify-send-in_unmount-events.patch [new file with mode: 0644]
queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch [new file with mode: 0644]
queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch [new file with mode: 0644]
queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch [new file with mode: 0644]
queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch [new file with mode: 0644]
queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch [new file with mode: 0644]
queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch [new file with mode: 0644]
queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch [new file with mode: 0644]
queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch [new file with mode: 0644]
queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch [new file with mode: 0644]
queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch [new file with mode: 0644]
queue-2.6.32/series
queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch [new file with mode: 0644]

diff --git a/queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch b/queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch
new file mode 100644 (file)
index 0000000..2d26709
--- /dev/null
@@ -0,0 +1,80 @@
+From 81074e90f5c150ca70ab8dfcc77860cbe76f364d Mon Sep 17 00:00:00 2001
+From: Zhang Rui <rui.zhang@intel.com>
+Date: Mon, 21 Dec 2009 16:13:15 +0800
+Subject: ACPI: disable _OSI(Windows 2009) on Asus K50IJ
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+commit 81074e90f5c150ca70ab8dfcc77860cbe76f364d upstream.
+
+Fix a win7 compability issue on Asus K50IJ.
+
+Here is the _BCM method of this laptop:
+                    Method (_BCM, 1, NotSerialized)
+                    {
+                        If (LGreaterEqual (OSFG, OSVT))
+                        {
+                            If (LNotEqual (OSFG, OSW7))
+                            {
+                                Store (One, BCMD)
+                                Store (GCBL (Arg0), Local0)
+                                Subtract (0x0F, Local0, LBTN)
+                                ^^^SBRG.EC0.STBR ()
+                                ...
+                            }
+                            Else
+                            {
+                                DBGR (0x0B, Zero, Zero, Arg0)
+                                Store (Arg0, LBTN)
+                                ^^^SBRG.EC0.STBR ()
+                                ...
+                            }
+                        }
+                    }
+LBTN is used to store the index of the brightness level in the _BCL.
+GCBL is a method that convert the percentage value to the index value.
+If _OSI(Windows 2009) is not disabled, LBTN is stored a percentage
+value which is surely beyond the end of _BCL package.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=14753
+
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Len Brown <len.brown@intel.com>
+Cc: maximilian attems <max@stro.at>
+Cc: Paolo Ornati <ornati@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/acpi/blacklist.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/drivers/acpi/blacklist.c
++++ b/drivers/acpi/blacklist.c
+@@ -185,6 +185,12 @@ static int __init dmi_disable_osi_vista(
+       acpi_osi_setup("!Windows 2006");
+       return 0;
+ }
++static int __init dmi_disable_osi_win7(const struct dmi_system_id *d)
++{
++      printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
++      acpi_osi_setup("!Windows 2009");
++      return 0;
++}
+ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
+       {
+@@ -211,6 +217,14 @@ static struct dmi_system_id acpi_osi_dmi
+                    DMI_MATCH(DMI_PRODUCT_NAME, "Sony VGN-SR290J"),
+               },
+       },
++      {
++      .callback = dmi_disable_osi_win7,
++      .ident = "ASUS K50IJ",
++      .matches = {
++                   DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
++                   DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"),
++              },
++      },
+       /*
+        * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
diff --git a/queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch b/queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch
new file mode 100644 (file)
index 0000000..de192fd
--- /dev/null
@@ -0,0 +1,48 @@
+From 75e1c70fc31490ef8a373ea2a4bea2524099b478 Mon Sep 17 00:00:00 2001
+From: Jeff Moyer <jmoyer@redhat.com>
+Date: Fri, 10 Sep 2010 14:16:00 -0700
+Subject: aio: check for multiplication overflow in do_io_submit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Moyer <jmoyer@redhat.com>
+
+commit 75e1c70fc31490ef8a373ea2a4bea2524099b478 upstream.
+
+Tavis Ormandy pointed out that do_io_submit does not do proper bounds
+checking on the passed-in iocb array:
+
+       if (unlikely(nr < 0))
+               return -EINVAL;
+
+       if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(iocbpp)))))
+               return -EFAULT;                      ^^^^^^^^^^^^^^^^^^
+
+The attached patch checks for overflow, and if it is detected, the
+number of iocbs submitted is scaled down to a number that will fit in
+the long.  This is an ok thing to do, as sys_io_submit is documented as
+returning the number of iocbs submitted, so callers should handle a
+return value of less than the 'nr' argument passed in.
+
+Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
+Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/aio.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1639,6 +1639,9 @@ SYSCALL_DEFINE3(io_submit, aio_context_t
+       if (unlikely(nr < 0))
+               return -EINVAL;
++      if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
++              nr = LONG_MAX/sizeof(*iocbpp);
++
+       if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
+               return -EFAULT;
diff --git a/queue-2.6.32/at91-change-dma-resource-index.patch b/queue-2.6.32/at91-change-dma-resource-index.patch
new file mode 100644 (file)
index 0000000..29729a2
--- /dev/null
@@ -0,0 +1,28 @@
+From 8d2602e0778299e2d6084f03086b716d6e7a1e1e Mon Sep 17 00:00:00 2001
+From: Nicolas Ferre <nicolas.ferre@atmel.com>
+Date: Fri, 20 Aug 2010 16:44:33 +0200
+Subject: AT91: change dma resource index
+
+From: Nicolas Ferre <nicolas.ferre@atmel.com>
+
+commit 8d2602e0778299e2d6084f03086b716d6e7a1e1e upstream.
+
+Reported-by: Dan Liang <dan.liang@atmel.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/arm/mach-at91/at91sam9g45_devices.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/mach-at91/at91sam9g45_devices.c
++++ b/arch/arm/mach-at91/at91sam9g45_devices.c
+@@ -46,7 +46,7 @@ static struct resource hdmac_resources[]
+               .end    = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+-      [2] = {
++      [1] = {
+               .start  = AT91SAM9G45_ID_DMA,
+               .end    = AT91SAM9G45_ID_DMA,
+               .flags  = IORESOURCE_IRQ,
diff --git a/queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch b/queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch
new file mode 100644 (file)
index 0000000..decf041
--- /dev/null
@@ -0,0 +1,76 @@
+From f048fa9c8686119c3858a463cab6121dced7c0bf Mon Sep 17 00:00:00 2001
+From: Michael Chan <mchan@broadcom.com>
+Date: Tue, 1 Jun 2010 15:05:36 +0000
+Subject: bnx2: Fix hang during rmmod bnx2.
+
+From: Michael Chan <mchan@broadcom.com>
+
+commit f048fa9c8686119c3858a463cab6121dced7c0bf upstream.
+
+The regression is caused by:
+
+commit 4327ba435a56ada13eedf3eb332e583c7a0586a9
+    bnx2: Fix netpoll crash.
+
+If ->open() and ->close() are called multiple times, the same napi structs
+will be added to dev->napi_list multiple times, corrupting the dev->napi_list.
+This causes free_netdev() to hang during rmmod.
+
+We fix this by calling netif_napi_del() during ->close().
+
+Also, bnx2_init_napi() must not be in the __devinit section since it is
+called by ->open().
+
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: Benjamin Li <benli@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/bnx2.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bnx2.c
++++ b/drivers/net/bnx2.c
+@@ -248,6 +248,7 @@ static const struct flash_spec flash_570
+ MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
+ static void bnx2_init_napi(struct bnx2 *bp);
++static void bnx2_del_napi(struct bnx2 *bp);
+ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr)
+ {
+@@ -6237,6 +6238,7 @@ open_err:
+       bnx2_free_skbs(bp);
+       bnx2_free_irq(bp);
+       bnx2_free_mem(bp);
++      bnx2_del_napi(bp);
+       return rc;
+ }
+@@ -6444,6 +6446,7 @@ bnx2_close(struct net_device *dev)
+       bnx2_free_irq(bp);
+       bnx2_free_skbs(bp);
+       bnx2_free_mem(bp);
++      bnx2_del_napi(bp);
+       bp->link_up = 0;
+       netif_carrier_off(bp->dev);
+       bnx2_set_power_state(bp, PCI_D3hot);
+@@ -8019,7 +8022,16 @@ bnx2_bus_string(struct bnx2 *bp, char *s
+       return str;
+ }
+-static void __devinit
++static void
++bnx2_del_napi(struct bnx2 *bp)
++{
++      int i;
++
++      for (i = 0; i < bp->irq_nvecs; i++)
++              netif_napi_del(&bp->bnx2_napi[i].napi);
++}
++
++static void
+ bnx2_init_napi(struct bnx2 *bp)
+ {
+       int i;
diff --git a/queue-2.6.32/bnx2-fix-netpoll-crash.patch b/queue-2.6.32/bnx2-fix-netpoll-crash.patch
new file mode 100644 (file)
index 0000000..1daea8d
--- /dev/null
@@ -0,0 +1,63 @@
+From 4327ba435a56ada13eedf3eb332e583c7a0586a9 Mon Sep 17 00:00:00 2001
+From: Benjamin Li <benli@broadcom.com>
+Date: Tue, 23 Mar 2010 13:13:11 +0000
+Subject: bnx2: Fix netpoll crash.
+
+From: Benjamin Li <benli@broadcom.com>
+
+commit 4327ba435a56ada13eedf3eb332e583c7a0586a9 upstream.
+
+The bnx2 driver calls netif_napi_add() for all the NAPI structs during
+->probe() time but not all of them will be used if we're not in MSI-X
+mode.  This creates a problem for netpoll since it will poll all the
+NAPI structs in the dev_list whether or not they are scheduled, resulting
+in a crash when we access structure fields not initialized for that vector.
+
+We fix it by moving the netif_napi_add() call to ->open() after the number
+of IRQ vectors has been determined.
+
+Signed-off-by: Benjamin Li <benli@broadcom.com>
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/bnx2.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/bnx2.c
++++ b/drivers/net/bnx2.c
+@@ -247,6 +247,8 @@ static const struct flash_spec flash_570
+ MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
++static void bnx2_init_napi(struct bnx2 *bp);
++
+ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr)
+ {
+       u32 diff;
+@@ -6173,6 +6175,7 @@ bnx2_open(struct net_device *dev)
+       bnx2_disable_int(bp);
+       bnx2_setup_int_mode(bp, disable_msi);
++      bnx2_init_napi(bp);
+       bnx2_napi_enable(bp);
+       rc = bnx2_alloc_mem(bp);
+       if (rc)
+@@ -8021,7 +8024,7 @@ bnx2_init_napi(struct bnx2 *bp)
+ {
+       int i;
+-      for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) {
++      for (i = 0; i < bp->irq_nvecs; i++) {
+               struct bnx2_napi *bnapi = &bp->bnx2_napi[i];
+               int (*poll)(struct napi_struct *, int);
+@@ -8090,7 +8093,6 @@ bnx2_init_one(struct pci_dev *pdev, cons
+       dev->ethtool_ops = &bnx2_ethtool_ops;
+       bp = netdev_priv(dev);
+-      bnx2_init_napi(bp);
+       pci_set_drvdata(pdev, dev);
diff --git a/queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch b/queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch
new file mode 100644 (file)
index 0000000..96fd0e5
--- /dev/null
@@ -0,0 +1,50 @@
+From 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 21 Sep 2010 11:49:01 +0200
+Subject: char: Mark /dev/zero and /dev/kmem as not capable of writeback
+
+From: Jan Kara <jack@suse.cz>
+
+commit 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 upstream.
+
+These devices don't do any writeback but their device inodes still can get
+dirty so mark bdi appropriately so that bdi code does the right thing and files
+inodes to lists of bdi carrying the device inodes.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/char/mem.c |    3 ++-
+ fs/char_dev.c      |    4 +++-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/char/mem.c
++++ b/drivers/char/mem.c
+@@ -822,10 +822,11 @@ static const struct file_operations zero
+ /*
+  * capabilities for /dev/zero
+  * - permits private mappings, "copies" are taken of the source of zeros
++ * - no writeback happens
+  */
+ static struct backing_dev_info zero_bdi = {
+       .name           = "char/mem",
+-      .capabilities   = BDI_CAP_MAP_COPY,
++      .capabilities   = BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK,
+ };
+ static const struct file_operations full_fops = {
+--- a/fs/char_dev.c
++++ b/fs/char_dev.c
+@@ -39,7 +39,9 @@ struct backing_dev_info directly_mappabl
+ #endif
+               /* permit direct mmap, for read, write or exec */
+               BDI_CAP_MAP_DIRECT |
+-              BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
++              BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
++              /* no writeback happens */
++              BDI_CAP_NO_ACCT_AND_WRITEBACK),
+ };
+ static struct kobj_map *cdev_map;
diff --git a/queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch b/queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch
new file mode 100644 (file)
index 0000000..6fa8a82
--- /dev/null
@@ -0,0 +1,31 @@
+From a6f018e324ba91d0464cca6895447c2b89e6d578 Mon Sep 17 00:00:00 2001
+From: Divy Le Ray <divy@chelsio.com>
+Date: Wed, 3 Mar 2010 09:49:47 +0000
+Subject: cxgb3: fix hot plug removal crash
+
+From: Divy Le Ray <divy@chelsio.com>
+
+commit a6f018e324ba91d0464cca6895447c2b89e6d578 upstream.
+
+queue restart tasklets need to be stopped after napi handlers are stopped
+since the latter can restart them.  So stop them after stopping napi.
+
+Signed-off-by: Divy Le Ray <divy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Brandon Philips <bphilips@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/cxgb3/cxgb3_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -1274,6 +1274,7 @@ static void cxgb_down(struct adapter *ad
+       free_irq_resources(adapter);
+       quiesce_rx(adapter);
++      t3_sge_stop(adapter);
+       flush_workqueue(cxgb3_wq);      /* wait for external IRQ handler */
+ }
diff --git a/queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch b/queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch
new file mode 100644 (file)
index 0000000..acab626
--- /dev/null
@@ -0,0 +1,142 @@
+From df08cdc7ef606509debe7677c439be0ca48790e4 Mon Sep 17 00:00:00 2001
+From: Andrew Morton <akpm@linux-foundation.org>
+Date: Wed, 22 Sep 2010 13:05:11 -0700
+Subject: drivers/pci/intel-iommu.c: fix build with older gcc's
+
+From: Andrew Morton <akpm@linux-foundation.org>
+
+commit df08cdc7ef606509debe7677c439be0ca48790e4 upstream.
+
+drivers/pci/intel-iommu.c: In function `__iommu_calculate_agaw':
+drivers/pci/intel-iommu.c:437: sorry, unimplemented: inlining failed in call to 'width_to_agaw': function body not available
+drivers/pci/intel-iommu.c:445: sorry, unimplemented: called from here
+
+Move the offending function (and its siblings) to top-of-file, remove the
+forward declaration.
+
+Addresses https://bugzilla.kernel.org/show_bug.cgi?id=17441
+
+Reported-by: Martin Mokrejs <mmokrejs@ribosome.natur.cuni.cz>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/intel-iommu.c |   90 +++++++++++++++++++++-------------------------
+ 1 file changed, 43 insertions(+), 47 deletions(-)
+
+--- a/drivers/pci/intel-iommu.c
++++ b/drivers/pci/intel-iommu.c
+@@ -71,6 +71,49 @@
+ #define DMA_32BIT_PFN         IOVA_PFN(DMA_BIT_MASK(32))
+ #define DMA_64BIT_PFN         IOVA_PFN(DMA_BIT_MASK(64))
++/* page table handling */
++#define LEVEL_STRIDE          (9)
++#define LEVEL_MASK            (((u64)1 << LEVEL_STRIDE) - 1)
++
++static inline int agaw_to_level(int agaw)
++{
++      return agaw + 2;
++}
++
++static inline int agaw_to_width(int agaw)
++{
++      return 30 + agaw * LEVEL_STRIDE;
++}
++
++static inline int width_to_agaw(int width)
++{
++      return (width - 30) / LEVEL_STRIDE;
++}
++
++static inline unsigned int level_to_offset_bits(int level)
++{
++      return (level - 1) * LEVEL_STRIDE;
++}
++
++static inline int pfn_level_offset(unsigned long pfn, int level)
++{
++      return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
++}
++
++static inline unsigned long level_mask(int level)
++{
++      return -1UL << level_to_offset_bits(level);
++}
++
++static inline unsigned long level_size(int level)
++{
++      return 1UL << level_to_offset_bits(level);
++}
++
++static inline unsigned long align_to_level(unsigned long pfn, int level)
++{
++      return (pfn + level_size(level) - 1) & level_mask(level);
++}
+ /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
+    are never going to work. */
+@@ -449,8 +492,6 @@ void free_iova_mem(struct iova *iova)
+ }
+-static inline int width_to_agaw(int width);
+-
+ static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
+ {
+       unsigned long sagaw;
+@@ -664,51 +705,6 @@ out:
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+-/* page table handling */
+-#define LEVEL_STRIDE          (9)
+-#define LEVEL_MASK            (((u64)1 << LEVEL_STRIDE) - 1)
+-
+-static inline int agaw_to_level(int agaw)
+-{
+-      return agaw + 2;
+-}
+-
+-static inline int agaw_to_width(int agaw)
+-{
+-      return 30 + agaw * LEVEL_STRIDE;
+-
+-}
+-
+-static inline int width_to_agaw(int width)
+-{
+-      return (width - 30) / LEVEL_STRIDE;
+-}
+-
+-static inline unsigned int level_to_offset_bits(int level)
+-{
+-      return (level - 1) * LEVEL_STRIDE;
+-}
+-
+-static inline int pfn_level_offset(unsigned long pfn, int level)
+-{
+-      return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
+-}
+-
+-static inline unsigned long level_mask(int level)
+-{
+-      return -1UL << level_to_offset_bits(level);
+-}
+-
+-static inline unsigned long level_size(int level)
+-{
+-      return 1UL << level_to_offset_bits(level);
+-}
+-
+-static inline unsigned long align_to_level(unsigned long pfn, int level)
+-{
+-      return (pfn + level_size(level) - 1) & level_mask(level);
+-}
+-
+ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
+                                     unsigned long pfn)
+ {
diff --git a/queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch
new file mode 100644 (file)
index 0000000..0fee45b
--- /dev/null
@@ -0,0 +1,36 @@
+From fd02db9de73faebc51240619c7c7f99bee9f65c7 Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 22 Sep 2010 13:05:09 -0700
+Subject: drivers/video/sis/sis_main.c: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit fd02db9de73faebc51240619c7c7f99bee9f65c7 upstream.
+
+The FBIOGET_VBLANK device ioctl allows unprivileged users to read 16 bytes
+of uninitialized stack memory, because the "reserved" member of the
+fb_vblank struct declared on the stack is not altered or zeroed before
+being copied back to the user.  This patch takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Cc: Thomas Winischhofer <thomas@winischhofer.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/video/sis/sis_main.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/video/sis/sis_main.c
++++ b/drivers/video/sis/sis_main.c
+@@ -1701,6 +1701,9 @@ static int       sisfb_ioctl(struct fb_info *i
+               break;
+          case FBIOGET_VBLANK:
++
++              memset(&sisvbblank, 0, sizeof(struct fb_vblank));
++
+               sisvbblank.count = 0;
+               sisvbblank.flags = sisfb_setupvbblankflags(ivideo, &sisvbblank.vcount, &sisvbblank.hcount);
diff --git a/queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch
new file mode 100644 (file)
index 0000000..28d326f
--- /dev/null
@@ -0,0 +1,34 @@
+From b4aaa78f4c2f9cde2f335b14f4ca30b01f9651ca Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Sep 2010 19:08:24 -0400
+Subject: drivers/video/via/ioctl.c: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit b4aaa78f4c2f9cde2f335b14f4ca30b01f9651ca upstream.
+
+The VIAFB_GET_INFO device ioctl allows unprivileged users to read 246
+bytes of uninitialized stack memory, because the "reserved" member of
+the viafb_ioctl_info struct declared on the stack is not altered or
+zeroed before being copied back to the user.  This patch takes care of
+it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/video/via/ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/video/via/ioctl.c
++++ b/drivers/video/via/ioctl.c
+@@ -25,6 +25,8 @@ int viafb_ioctl_get_viafb_info(u_long ar
+ {
+       struct viafb_ioctl_info viainfo;
++      memset(&viainfo, 0, sizeof(struct viafb_ioctl_info));
++
+       viainfo.viafb_id = VIAID;
+       viainfo.vendor_id = PCI_VIA_VENDOR_ID;
diff --git a/queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch b/queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch
new file mode 100644 (file)
index 0000000..818b759
--- /dev/null
@@ -0,0 +1,82 @@
+From 84a8dce2710cc425089a2b92acc354d4fbb5788d Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Sat, 5 Jun 2010 11:51:27 -0400
+Subject: ext4: Fix remaining racy updates of EXT4_I(inode)->i_flags
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 84a8dce2710cc425089a2b92acc354d4fbb5788d upstream.
+
+A few functions were still modifying i_flags in a racy manner.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   38 ++++++++++++++++++++++----------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4815,20 +4815,26 @@ void ext4_set_inode_flags(struct inode *
+ /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
+ void ext4_get_inode_flags(struct ext4_inode_info *ei)
+ {
+-      unsigned int flags = ei->vfs_inode.i_flags;
++      unsigned int vfs_fl;
++      unsigned long old_fl, new_fl;
+-      ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+-                      EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
+-      if (flags & S_SYNC)
+-              ei->i_flags |= EXT4_SYNC_FL;
+-      if (flags & S_APPEND)
+-              ei->i_flags |= EXT4_APPEND_FL;
+-      if (flags & S_IMMUTABLE)
+-              ei->i_flags |= EXT4_IMMUTABLE_FL;
+-      if (flags & S_NOATIME)
+-              ei->i_flags |= EXT4_NOATIME_FL;
+-      if (flags & S_DIRSYNC)
+-              ei->i_flags |= EXT4_DIRSYNC_FL;
++      do {
++              vfs_fl = ei->vfs_inode.i_flags;
++              old_fl = ei->i_flags;
++              new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
++                              EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
++                              EXT4_DIRSYNC_FL);
++              if (vfs_fl & S_SYNC)
++                      new_fl |= EXT4_SYNC_FL;
++              if (vfs_fl & S_APPEND)
++                      new_fl |= EXT4_APPEND_FL;
++              if (vfs_fl & S_IMMUTABLE)
++                      new_fl |= EXT4_IMMUTABLE_FL;
++              if (vfs_fl & S_NOATIME)
++                      new_fl |= EXT4_NOATIME_FL;
++              if (vfs_fl & S_DIRSYNC)
++                      new_fl |= EXT4_DIRSYNC_FL;
++      } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
+ }
+ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
+@@ -5067,7 +5073,7 @@ static int ext4_inode_blocks_set(handle_
+                */
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
+               raw_inode->i_blocks_high = 0;
+-              ei->i_flags &= ~EXT4_HUGE_FILE_FL;
++              ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
+               return 0;
+       }
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
+@@ -5080,9 +5086,9 @@ static int ext4_inode_blocks_set(handle_
+                */
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
+               raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
+-              ei->i_flags &= ~EXT4_HUGE_FILE_FL;
++              ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
+       } else {
+-              ei->i_flags |= EXT4_HUGE_FILE_FL;
++              ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
+               /* i_block is stored in file system block size */
+               i_blocks = i_blocks >> (inode->i_blkbits - 9);
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
diff --git a/queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch b/queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch
new file mode 100644 (file)
index 0000000..4ca057e
--- /dev/null
@@ -0,0 +1,99 @@
+From f362b73244fb16ea4ae127ced1467dd8adaa7733 Mon Sep 17 00:00:00 2001
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+Date: Tue, 17 Aug 2010 23:56:55 +0100
+Subject: Fix unprotected access to task credentials in waitid()
+
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+
+commit f362b73244fb16ea4ae127ced1467dd8adaa7733 upstream.
+
+Using a program like the following:
+
+       #include <stdlib.h>
+       #include <unistd.h>
+       #include <sys/types.h>
+       #include <sys/wait.h>
+
+       int main() {
+               id_t id;
+               siginfo_t infop;
+               pid_t res;
+
+               id = fork();
+               if (id == 0) { sleep(1); exit(0); }
+               kill(id, SIGSTOP);
+               alarm(1);
+               waitid(P_PID, id, &infop, WCONTINUED);
+               return 0;
+       }
+
+to call waitid() on a stopped process results in access to the child task's
+credentials without the RCU read lock being held - which may be replaced in the
+meantime - eliciting the following warning:
+
+       ===================================================
+       [ INFO: suspicious rcu_dereference_check() usage. ]
+       ---------------------------------------------------
+       kernel/exit.c:1460 invoked rcu_dereference_check() without protection!
+
+       other info that might help us debug this:
+
+       rcu_scheduler_active = 1, debug_locks = 1
+       2 locks held by waitid02/22252:
+        #0:  (tasklist_lock){.?.?..}, at: [<ffffffff81061ce5>] do_wait+0xc5/0x310
+        #1:  (&(&sighand->siglock)->rlock){-.-...}, at: [<ffffffff810611da>]
+       wait_consider_task+0x19a/0xbe0
+
+       stack backtrace:
+       Pid: 22252, comm: waitid02 Not tainted 2.6.35-323cd+ #3
+       Call Trace:
+        [<ffffffff81095da4>] lockdep_rcu_dereference+0xa4/0xc0
+        [<ffffffff81061b31>] wait_consider_task+0xaf1/0xbe0
+        [<ffffffff81061d15>] do_wait+0xf5/0x310
+        [<ffffffff810620b6>] sys_waitid+0x86/0x1f0
+        [<ffffffff8105fce0>] ? child_wait_callback+0x0/0x70
+        [<ffffffff81003282>] system_call_fastpath+0x16/0x1b
+
+This is fixed by holding the RCU read lock in wait_task_continued() to ensure
+that the task's current credentials aren't destroyed between us reading the
+cred pointer and us reading the UID from those credentials.
+
+Furthermore, protect wait_task_stopped() in the same way.
+
+We don't need to keep holding the RCU read lock once we've read the UID from
+the credentials as holding the RCU read lock doesn't stop the target task from
+changing its creds under us - so the credentials may be outdated immediately
+after we've read the pointer, lock or no lock.
+
+Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/exit.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1374,8 +1374,7 @@ static int wait_task_stopped(struct wait
+       if (!unlikely(wo->wo_flags & WNOWAIT))
+               *p_code = 0;
+-      /* don't need the RCU readlock here as we're holding a spinlock */
+-      uid = __task_cred(p)->uid;
++      uid = task_uid(p);
+ unlock_sig:
+       spin_unlock_irq(&p->sighand->siglock);
+       if (!exit_code)
+@@ -1448,7 +1447,7 @@ static int wait_task_continued(struct wa
+       }
+       if (!unlikely(wo->wo_flags & WNOWAIT))
+               p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+-      uid = __task_cred(p)->uid;
++      uid = task_uid(p);
+       spin_unlock_irq(&p->sighand->siglock);
+       pid = task_pid_vnr(p);
diff --git a/queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch b/queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch
new file mode 100644 (file)
index 0000000..2b63784
--- /dev/null
@@ -0,0 +1,95 @@
+From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001
+From: Luck, Tony <tony.luck@intel.com>
+Date: Tue, 24 Aug 2010 11:44:18 -0700
+Subject: guard page for stacks that grow upwards
+
+From: Luck, Tony <tony.luck@intel.com>
+
+commit 8ca3eb08097f6839b2206e2242db4179aee3cfb3 upstream.
+
+pa-risc and ia64 have stacks that grow upwards. Check that
+they do not run into other mappings. By making VM_GROWSUP
+0x0 on architectures that do not ever use it, we can avoid
+some unpleasant #ifdefs in check_stack_guard_page().
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: dann frazier <dannf@debian.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/mm.h |    8 +++++++-
+ mm/memory.c        |   15 +++++++++++----
+ mm/mmap.c          |    3 ---
+ 3 files changed, 18 insertions(+), 8 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -77,7 +77,11 @@ extern unsigned int kobjsize(const void
+ #define VM_MAYSHARE   0x00000080
+ #define VM_GROWSDOWN  0x00000100      /* general info on the segment */
++#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+ #define VM_GROWSUP    0x00000200
++#else
++#define VM_GROWSUP    0x00000000
++#endif
+ #define VM_PFNMAP     0x00000400      /* Page-ranges managed without "struct page", just pure PFN */
+ #define VM_DENYWRITE  0x00000800      /* ETXTBSY on write attempts.. */
+@@ -1195,8 +1199,10 @@ unsigned long ra_submit(struct file_ra_s
+ /* Do stack extension */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+-#ifdef CONFIG_IA64
++#if VM_GROWSUP
+ extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
++#else
++  #define expand_upwards(vma, address) do { } while (0)
+ #endif
+ extern int expand_stack_downwards(struct vm_area_struct *vma,
+                                 unsigned long address);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2630,11 +2630,9 @@ out_release:
+ }
+ /*
+- * This is like a special single-page "expand_downwards()",
+- * except we must first make sure that 'address-PAGE_SIZE'
++ * This is like a special single-page "expand_{down|up}wards()",
++ * except we must first make sure that 'address{-|+}PAGE_SIZE'
+  * doesn't hit another vma.
+- *
+- * The "find_vma()" will do the right thing even if we wrap
+  */
+ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+ {
+@@ -2653,6 +2651,15 @@ static inline int check_stack_guard_page
+               expand_stack(vma, address - PAGE_SIZE);
+       }
++      if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
++              struct vm_area_struct *next = vma->vm_next;
++
++              /* As VM_GROWSDOWN but s/below/above/ */
++              if (next && next->vm_start == address + PAGE_SIZE)
++                      return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
++
++              expand_upwards(vma, address + PAGE_SIZE);
++      }
+       return 0;
+ }
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1600,9 +1600,6 @@ static int acct_stack_growth(struct vm_a
+  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+  * vma is the last one with address > vma->vm_end.  Have to extend vma.
+  */
+-#ifndef CONFIG_IA64
+-static
+-#endif
+ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
+       int error;
diff --git a/queue-2.6.32/ia64-fix-siglock.patch b/queue-2.6.32/ia64-fix-siglock.patch
new file mode 100644 (file)
index 0000000..40d8f88
--- /dev/null
@@ -0,0 +1,101 @@
+From f574c843191728d9407b766a027f779dcd27b272 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 9 Sep 2010 15:16:56 -0700
+Subject: IA64: fix siglock
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit f574c843191728d9407b766a027f779dcd27b272 upstream.
+
+When ia64 converted to using ticket locks, an inline implementation
+of trylock/unlock in fsys.S was missed.  This was not noticed because
+in most circumstances it simply resulted in using the slow path because
+the siglock was apparently not available (under old spinlock rules).
+
+Problems occur when the ticket spinlock has value 0x0 (when first
+initialised, or when it wraps around). At this point the fsys.S
+code acquires the lock (changing the 0x0 to 0x1. If another process
+attempts to get the lock at this point, it will change the value from
+0x1 to 0x2 (using new ticket lock rules). Then the fsys.S code will
+free the lock using old spinlock rules by writing 0x0 to it. From
+here a variety of bad things can happen.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/ia64/kernel/fsys.S |   46 +++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 39 insertions(+), 7 deletions(-)
+
+--- a/arch/ia64/kernel/fsys.S
++++ b/arch/ia64/kernel/fsys.S
+@@ -424,14 +424,26 @@ EX(.fail_efault, ld8 r14=[r33])                  // r14
+       andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
+ #ifdef CONFIG_SMP
+-      mov r17=1
+-      ;;
+-      cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
++      // __ticket_spin_trylock(r31)
++      ld4 r17=[r31]
+       mov r8=EINVAL                   // default to EINVAL
+       ;;
++      extr r9=r17,17,15
++      ;;
++      xor r18=r17,r9
++      adds r19=1,r17
++      ;;
++      extr.u r18=r18,0,15
++      ;;
++      cmp.eq p0,p7=0,r18
++(p7)  br.cond.spnt.many .lock_contention
++      mov.m ar.ccv=r17
++      ;;
++      cmpxchg4.acq r9=[r31],r19,ar.ccv
++      ;;
++      cmp4.eq p0,p7=r9,r17
++(p7)  br.cond.spnt.many .lock_contention
+       ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
+-      cmp4.ne p6,p0=r18,r0
+-(p6)  br.cond.spnt.many .lock_contention
+       ;;
+ #else
+       ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
+@@ -490,7 +502,17 @@ EX(.fail_efault, ld8 r14=[r33])                   // r14
+ (p6)  br.cond.spnt.few 1b                     // yes -> retry
+ #ifdef CONFIG_SMP
+-      st4.rel [r31]=r0                        // release the lock
++      // __ticket_spin_unlock(r31)
++      adds r31=2,r31
++      ;;
++      ld2.bias r2=[r31]
++      mov r3=65534
++      ;;
++      adds r2=2,r2
++      ;;
++      and r3=r3,r2
++      ;;
++      st2.rel [r31]=r3
+ #endif
+       SSM_PSR_I(p0, p9, r31)
+       ;;
+@@ -512,7 +534,17 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
+ .sig_pending:
+ #ifdef CONFIG_SMP
+-      st4.rel [r31]=r0                        // release the lock
++      // __ticket_spin_unlock(r31)
++      adds r31=2,r31
++      ;;
++      ld2.bias r2=[r31]
++      mov r3=65534
++      ;;
++      adds r2=2,r2
++      ;;
++      and r3=r3,r2
++      ;;
++      st2.rel [r31]=r3
+ #endif
+       SSM_PSR_I(p0, p9, r17)
+       ;;
diff --git a/queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch b/queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch
new file mode 100644 (file)
index 0000000..44c9a02
--- /dev/null
@@ -0,0 +1,103 @@
+From 2d2b6901649a62977452be85df53eda2412def24 Mon Sep 17 00:00:00 2001
+From: Petr Tesarik <ptesarik@suse.cz>
+Date: Wed, 15 Sep 2010 15:35:48 -0700
+Subject: IA64: Optimize ticket spinlocks in fsys_rt_sigprocmask
+
+From: Petr Tesarik <ptesarik@suse.cz>
+
+commit 2d2b6901649a62977452be85df53eda2412def24 upstream.
+
+Tony's fix (f574c843191728d9407b766a027f779dcd27b272) has a small bug,
+it incorrectly uses "r3" as a scratch register in the first of the two
+unlock paths ... it is also inefficient.  Optimize the fast path again.
+
+Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/ia64/kernel/fsys.S |   42 +++++++++++-------------------------------
+ 1 file changed, 11 insertions(+), 31 deletions(-)
+
+--- a/arch/ia64/kernel/fsys.S
++++ b/arch/ia64/kernel/fsys.S
+@@ -420,34 +420,31 @@ EX(.fail_efault, ld8 r14=[r33])                  // r14
+       ;;
+       RSM_PSR_I(p0, r18, r19)                 // mask interrupt delivery
+-      mov ar.ccv=0
+       andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
++      mov r8=EINVAL                   // default to EINVAL
+ #ifdef CONFIG_SMP
+       // __ticket_spin_trylock(r31)
+       ld4 r17=[r31]
+-      mov r8=EINVAL                   // default to EINVAL
+-      ;;
+-      extr r9=r17,17,15
+       ;;
+-      xor r18=r17,r9
++      mov.m ar.ccv=r17
++      extr.u r9=r17,17,15
+       adds r19=1,r17
++      extr.u r18=r17,0,15
+       ;;
+-      extr.u r18=r18,0,15
++      cmp.eq p6,p7=r9,r18
+       ;;
+-      cmp.eq p0,p7=0,r18
++(p6)  cmpxchg4.acq r9=[r31],r19,ar.ccv
++(p6)  dep.z r20=r19,1,15              // next serving ticket for unlock
+ (p7)  br.cond.spnt.many .lock_contention
+-      mov.m ar.ccv=r17
+-      ;;
+-      cmpxchg4.acq r9=[r31],r19,ar.ccv
+       ;;
+       cmp4.eq p0,p7=r9,r17
++      adds r31=2,r31
+ (p7)  br.cond.spnt.many .lock_contention
+       ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
+       ;;
+ #else
+       ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
+-      mov r8=EINVAL                   // default to EINVAL
+ #endif
+       add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+       add r19=IA64_TASK_SIGNAL_OFFSET,r16
+@@ -503,16 +500,8 @@ EX(.fail_efault, ld8 r14=[r33])                   // r14
+ #ifdef CONFIG_SMP
+       // __ticket_spin_unlock(r31)
+-      adds r31=2,r31
+-      ;;
+-      ld2.bias r2=[r31]
+-      mov r3=65534
+-      ;;
+-      adds r2=2,r2
+-      ;;
+-      and r3=r3,r2
+-      ;;
+-      st2.rel [r31]=r3
++      st2.rel [r31]=r20
++      mov r20=0                                       // i must not leak kernel bits...
+ #endif
+       SSM_PSR_I(p0, p9, r31)
+       ;;
+@@ -535,16 +524,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
+ .sig_pending:
+ #ifdef CONFIG_SMP
+       // __ticket_spin_unlock(r31)
+-      adds r31=2,r31
+-      ;;
+-      ld2.bias r2=[r31]
+-      mov r3=65534
+-      ;;
+-      adds r2=2,r2
+-      ;;
+-      and r3=r3,r2
+-      ;;
+-      st2.rel [r31]=r3
++      st2.rel [r31]=r20                       // release the lock
+ #endif
+       SSM_PSR_I(p0, p9, r17)
+       ;;
diff --git a/queue-2.6.32/inotify-send-in_unmount-events.patch b/queue-2.6.32/inotify-send-in_unmount-events.patch
new file mode 100644 (file)
index 0000000..f0b0d27
--- /dev/null
@@ -0,0 +1,36 @@
+From 611da04f7a31b2208e838be55a42c7a1310ae321 Mon Sep 17 00:00:00 2001
+From: Eric Paris <eparis@redhat.com>
+Date: Wed, 28 Jul 2010 10:18:37 -0400
+Subject: inotify: send IN_UNMOUNT events
+
+From: Eric Paris <eparis@redhat.com>
+
+commit 611da04f7a31b2208e838be55a42c7a1310ae321 upstream.
+
+Since the .31 or so notify rewrite inotify has not sent events about
+inodes which are unmounted.  This patch restores those events.
+
+Signed-off-by: Eric Paris <eparis@redhat.com>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/notify/inotify/inotify_user.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -106,8 +106,11 @@ static inline __u32 inotify_arg_to_mask(
+ {
+       __u32 mask;
+-      /* everything should accept their own ignored and cares about children */
+-      mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
++      /*
++       * everything should accept their own ignored, cares about children,
++       * and should receive events when the inode is unmounted
++       */
++      mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
+       /* mask off the flags used to open the fd */
+       mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
diff --git a/queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch b/queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch
new file mode 100644 (file)
index 0000000..b175765
--- /dev/null
@@ -0,0 +1,55 @@
+From 3d96406c7da1ed5811ea52a3b0905f4f0e295376 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 10 Sep 2010 09:59:51 +0100
+Subject: KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring
+
+From: David Howells <dhowells@redhat.com>
+
+commit 3d96406c7da1ed5811ea52a3b0905f4f0e295376 upstream.
+
+Fix a bug in keyctl_session_to_parent() whereby it tries to check the ownership
+of the parent process's session keyring whether or not the parent has a session
+keyring [CVE-2010-2960].
+
+This results in the following oops:
+
+  BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
+  IP: [<ffffffff811ae4dd>] keyctl_session_to_parent+0x251/0x443
+  ...
+  Call Trace:
+   [<ffffffff811ae2f3>] ? keyctl_session_to_parent+0x67/0x443
+   [<ffffffff8109d286>] ? __do_fault+0x24b/0x3d0
+   [<ffffffff811af98c>] sys_keyctl+0xb4/0xb8
+   [<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
+
+if the parent process has no session keyring.
+
+If the system is using pam_keyinit then it mostly protected against this as all
+processes derived from a login will have inherited the session keyring created
+by pam_keyinit during the log in procedure.
+
+To test this, pam_keyinit calls need to be commented out in /etc/pam.d/.
+
+Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Acked-by: Tavis Ormandy <taviso@cmpxchg8b.com>
+Cc: dann frazier <dannf@debian.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/keyctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -1292,7 +1292,8 @@ long keyctl_session_to_parent(void)
+               goto not_permitted;
+       /* the keyrings must have the same UID */
+-      if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
++      if ((pcred->tgcred->session_keyring &&
++           pcred->tgcred->session_keyring->uid != mycred->euid) ||
+           mycred->tgcred->session_keyring->uid != mycred->euid)
+               goto not_permitted;
diff --git a/queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch b/queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch
new file mode 100644 (file)
index 0000000..a40f718
--- /dev/null
@@ -0,0 +1,69 @@
+From 9d1ac65a9698513d00e5608d93fca0c53f536c14 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 10 Sep 2010 09:59:46 +0100
+Subject: KEYS: Fix RCU no-lock warning in keyctl_session_to_parent()
+
+From: David Howells <dhowells@redhat.com>
+
+commit 9d1ac65a9698513d00e5608d93fca0c53f536c14 upstream.
+
+There's an protected access to the parent process's credentials in the middle
+of keyctl_session_to_parent().  This results in the following RCU warning:
+
+  ===================================================
+  [ INFO: suspicious rcu_dereference_check() usage. ]
+  ---------------------------------------------------
+  security/keys/keyctl.c:1291 invoked rcu_dereference_check() without protection!
+
+  other info that might help us debug this:
+
+  rcu_scheduler_active = 1, debug_locks = 0
+  1 lock held by keyctl-session-/2137:
+   #0:  (tasklist_lock){.+.+..}, at: [<ffffffff811ae2ec>] keyctl_session_to_parent+0x60/0x236
+
+  stack backtrace:
+  Pid: 2137, comm: keyctl-session- Not tainted 2.6.36-rc2-cachefs+ #1
+  Call Trace:
+   [<ffffffff8105606a>] lockdep_rcu_dereference+0xaa/0xb3
+   [<ffffffff811ae379>] keyctl_session_to_parent+0xed/0x236
+   [<ffffffff811af77e>] sys_keyctl+0xb4/0xb6
+   [<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
+
+The code should take the RCU read lock to make sure the parents credentials
+don't go away, even though it's holding a spinlock and has IRQ disabled.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: dann frazier <dannf@debian.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/keyctl.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -1259,6 +1259,7 @@ long keyctl_session_to_parent(void)
+       keyring_r = NULL;
+       me = current;
++      rcu_read_lock();
+       write_lock_irq(&tasklist_lock);
+       parent = me->real_parent;
+@@ -1313,6 +1314,7 @@ long keyctl_session_to_parent(void)
+       set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
+       write_unlock_irq(&tasklist_lock);
++      rcu_read_unlock();
+       if (oldcred)
+               put_cred(oldcred);
+       return 0;
+@@ -1321,6 +1323,7 @@ already_same:
+       ret = 0;
+ not_permitted:
+       write_unlock_irq(&tasklist_lock);
++      rcu_read_unlock();
+       put_cred(cred);
+       return ret;
diff --git a/queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch b/queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch
new file mode 100644 (file)
index 0000000..349bb87
--- /dev/null
@@ -0,0 +1,185 @@
+From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001
+From: Christoph Lameter <cl@linux.com>
+Date: Thu, 9 Sep 2010 16:38:17 -0700
+Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake
+
+From: Christoph Lameter <cl@linux.com>
+
+commit aa45484031ddee09b06350ab8528bfe5b2c76d1c upstream.
+
+Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is
+cheaper than scanning a number of lists.  To avoid synchronization
+overhead, counter deltas are maintained on a per-cpu basis and drained
+both periodically and when the delta is above a threshold.  On large CPU
+systems, the difference between the estimated and real value of
+NR_FREE_PAGES can be very high.  If NR_FREE_PAGES is much higher than
+number of real free page in buddy, the VM can allocate pages below min
+watermark, at worst reducing the real number of pages to zero.  Even if
+the OOM killer kills some victim for freeing memory, it may not free
+memory if the exit path requires a new page resulting in livelock.
+
+This patch introduces a zone_page_state_snapshot() function (courtesy of
+Christoph) that takes a slightly more accurate view of an arbitrary vmstat
+counter.  It is used to read NR_FREE_PAGES while kswapd is awake to avoid
+the watermark being accidentally broken.  The estimate is not perfect and
+may result in cache line bounces but is expected to be lighter than the
+IPI calls necessary to continually drain the per-cpu counters while kswapd
+is awake.
+
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: Mel Gorman <mel@csn.ul.ie>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ include/linux/mmzone.h |   13 +++++++++++++
+ include/linux/vmstat.h |   22 ++++++++++++++++++++++
+ mm/mmzone.c            |   21 +++++++++++++++++++++
+ mm/page_alloc.c        |    4 ++--
+ mm/vmstat.c            |   15 ++++++++++++++-
+ 5 files changed, 72 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -290,6 +290,13 @@ struct zone {
+       unsigned long watermark[NR_WMARK];
+       /*
++       * When free pages are below this point, additional steps are taken
++       * when reading the number of free pages to avoid per-cpu counter
++       * drift allowing watermarks to be breached
++       */
++      unsigned long percpu_drift_mark;
++
++      /*
+        * We don't know if the memory that we're going to allocate will be freeable
+        * or/and it will be released eventually, so to avoid totally wasting several
+        * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+@@ -460,6 +467,12 @@ static inline int zone_is_oom_locked(con
+       return test_bit(ZONE_OOM_LOCKED, &zone->flags);
+ }
++#ifdef CONFIG_SMP
++unsigned long zone_nr_free_pages(struct zone *zone);
++#else
++#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
++#endif /* CONFIG_SMP */
++
+ /*
+  * The "priority" of VM scanning is how much of the queues we will scan in one
+  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -166,6 +166,28 @@ static inline unsigned long zone_page_st
+       return x;
+ }
++/*
++ * More accurate version that also considers the currently pending
++ * deltas. For that we need to loop over all cpus to find the current
++ * deltas. There is no synchronization so the result cannot be
++ * exactly accurate either.
++ */
++static inline unsigned long zone_page_state_snapshot(struct zone *zone,
++                                      enum zone_stat_item item)
++{
++      long x = atomic_long_read(&zone->vm_stat[item]);
++
++#ifdef CONFIG_SMP
++      int cpu;
++      for_each_online_cpu(cpu)
++              x += zone_pcp(zone, cpu)->vm_stat_diff[item];
++
++      if (x < 0)
++              x = 0;
++#endif
++      return x;
++}
++
+ extern unsigned long global_reclaimable_pages(void);
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+--- a/mm/mmzone.c
++++ b/mm/mmzone.c
+@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pf
+       return 1;
+ }
+ #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
++
++#ifdef CONFIG_SMP
++/* Called when a more accurate view of NR_FREE_PAGES is needed */
++unsigned long zone_nr_free_pages(struct zone *zone)
++{
++      unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
++
++      /*
++       * While kswapd is awake, it is considered the zone is under some
++       * memory pressure. Under pressure, there is a risk that
++       * per-cpu-counter-drift will allow the min watermark to be breached
++       * potentially causing a live-lock. While kswapd is awake and
++       * free pages are low, get a better estimate for free pages
++       */
++      if (nr_free_pages < zone->percpu_drift_mark &&
++                      !waitqueue_active(&zone->zone_pgdat->kswapd_wait))
++              return zone_page_state_snapshot(zone, NR_FREE_PAGES);
++
++      return nr_free_pages;
++}
++#endif /* CONFIG_SMP */
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1365,7 +1365,7 @@ int zone_watermark_ok(struct zone *z, in
+ {
+       /* free_pages my go negative - that's OK */
+       long min = mark;
+-      long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
++      long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
+       int o;
+       if (alloc_flags & ALLOC_HIGH)
+@@ -2250,7 +2250,7 @@ void show_free_areas(void)
+                       " all_unreclaimable? %s"
+                       "\n",
+                       zone->name,
+-                      K(zone_page_state(zone, NR_FREE_PAGES)),
++                      K(zone_nr_free_pages(zone)),
+                       K(min_wmark_pages(zone)),
+                       K(low_wmark_pages(zone)),
+                       K(high_wmark_pages(zone)),
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -136,10 +136,23 @@ static void refresh_zone_stat_thresholds
+       int threshold;
+       for_each_populated_zone(zone) {
++              unsigned long max_drift, tolerate_drift;
++
+               threshold = calculate_threshold(zone);
+               for_each_online_cpu(cpu)
+                       zone_pcp(zone, cpu)->stat_threshold = threshold;
++
++              /*
++               * Only set percpu_drift_mark if there is a danger that
++               * NR_FREE_PAGES reports the low watermark is ok when in fact
++               * the min watermark could be breached by an allocation
++               */
++              tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
++              max_drift = num_online_cpus() * threshold;
++              if (max_drift > tolerate_drift)
++                      zone->percpu_drift_mark = high_wmark_pages(zone) +
++                                      max_drift;
+       }
+ }
+@@ -715,7 +728,7 @@ static void zoneinfo_show_print(struct s
+                  "\n        scanned  %lu"
+                  "\n        spanned  %lu"
+                  "\n        present  %lu",
+-                 zone_page_state(zone, NR_FREE_PAGES),
++                 zone_nr_free_pages(zone),
+                  min_wmark_pages(zone),
+                  low_wmark_pages(zone),
+                  high_wmark_pages(zone),
diff --git a/queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch b/queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch
new file mode 100644 (file)
index 0000000..615cea0
--- /dev/null
@@ -0,0 +1,79 @@
+From 9ee493ce0a60bf42c0f8fd0b0fe91df5704a1cbf Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mel@csn.ul.ie>
+Date: Thu, 9 Sep 2010 16:38:18 -0700
+Subject: mm: page allocator: drain per-cpu lists after direct reclaim allocation fails
+
+From: Mel Gorman <mel@csn.ul.ie>
+
+commit 9ee493ce0a60bf42c0f8fd0b0fe91df5704a1cbf upstream.
+
+When under significant memory pressure, a process enters direct reclaim
+and immediately afterwards tries to allocate a page.  If it fails and no
+further progress is made, it's possible the system will go OOM.  However,
+on systems with large amounts of memory, it's possible that a significant
+number of pages are on per-cpu lists and inaccessible to the calling
+process.  This leads to a process entering direct reclaim more often than
+it should increasing the pressure on the system and compounding the
+problem.
+
+This patch notes that if direct reclaim is making progress but allocations
+are still failing that the system is already under heavy pressure.  In
+this case, it drains the per-cpu lists and tries the allocation a second
+time before continuing.
+
+Signed-off-by: Mel Gorman <mel@csn.ul.ie>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/page_alloc.c |   20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1681,6 +1681,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m
+       struct page *page = NULL;
+       struct reclaim_state reclaim_state;
+       struct task_struct *p = current;
++      bool drained = false;
+       cond_resched();
+@@ -1699,14 +1700,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m
+       cond_resched();
+-      if (order != 0)
+-              drain_all_pages();
++      if (unlikely(!(*did_some_progress)))
++              return NULL;
+-      if (likely(*did_some_progress))
+-              page = get_page_from_freelist(gfp_mask, nodemask, order,
++retry:
++      page = get_page_from_freelist(gfp_mask, nodemask, order,
+                                       zonelist, high_zoneidx,
+                                       alloc_flags, preferred_zone,
+                                       migratetype);
++
++      /*
++       * If an allocation failed after direct reclaim, it could be because
++       * pages are pinned on the per-cpu lists. Drain them and try again
++       */
++      if (!page && !drained) {
++              drain_all_pages();
++              drained = true;
++              goto retry;
++      }
++
+       return page;
+ }
diff --git a/queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch b/queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch
new file mode 100644 (file)
index 0000000..23afa01
--- /dev/null
@@ -0,0 +1,80 @@
+From 72853e2991a2702ae93aaf889ac7db743a415dd3 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mel@csn.ul.ie>
+Date: Thu, 9 Sep 2010 16:38:16 -0700
+Subject: mm: page allocator: update free page counters after pages are placed on the free list
+
+From: Mel Gorman <mel@csn.ul.ie>
+
+commit 72853e2991a2702ae93aaf889ac7db743a415dd3 upstream.
+
+When allocating a page, the system uses NR_FREE_PAGES counters to
+determine if watermarks would remain intact after the allocation was made.
+This check is made without interrupts disabled or the zone lock held and
+so is race-prone by nature.  Unfortunately, when pages are being freed in
+batch, the counters are updated before the pages are added on the list.
+During this window, the counters are misleading as the pages do not exist
+yet.  When under significant pressure on systems with large numbers of
+CPUs, it's possible for processes to make progress even though they should
+have been stalled.  This is particularly problematic if a number of the
+processes are using GFP_ATOMIC as the min watermark can be accidentally
+breached and in extreme cases, the system can livelock.
+
+This patch updates the counters after the pages have been added to the
+list.  This makes the allocator more cautious with respect to preserving
+the watermarks and mitigates livelock possibilities.
+
+[akpm@linux-foundation.org: avoid modifying incoming args]
+Signed-off-by: Mel Gorman <mel@csn.ul.ie>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/page_alloc.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -531,13 +531,13 @@ static void free_pcppages_bulk(struct zo
+ {
+       int migratetype = 0;
+       int batch_free = 0;
++      int to_free = count;
+       spin_lock(&zone->lock);
+       zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+       zone->pages_scanned = 0;
+-      __mod_zone_page_state(zone, NR_FREE_PAGES, count);
+-      while (count) {
++      while (to_free) {
+               struct page *page;
+               struct list_head *list;
+@@ -562,8 +562,9 @@ static void free_pcppages_bulk(struct zo
+                       /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+                       __free_one_page(page, zone, 0, page_private(page));
+                       trace_mm_page_pcpu_drain(page, 0, page_private(page));
+-              } while (--count && --batch_free && !list_empty(list));
++              } while (--to_free && --batch_free && !list_empty(list));
+       }
++      __mod_zone_page_state(zone, NR_FREE_PAGES, count);
+       spin_unlock(&zone->lock);
+ }
+@@ -574,8 +575,8 @@ static void free_one_page(struct zone *z
+       zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+       zone->pages_scanned = 0;
+-      __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+       __free_one_page(page, zone, order, migratetype);
++      __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+       spin_unlock(&zone->lock);
+ }
diff --git a/queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch b/queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch
new file mode 100644 (file)
index 0000000..e638cfd
--- /dev/null
@@ -0,0 +1,43 @@
+From c33f543d320843e1732534c3931da4bbd18e6c14 Mon Sep 17 00:00:00 2001
+From: Patrick Simmons <linuxrocks123@netscape.net>
+Date: Wed, 8 Sep 2010 10:34:28 -0400
+Subject: oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron 540)
+
+From: Patrick Simmons <linuxrocks123@netscape.net>
+
+commit c33f543d320843e1732534c3931da4bbd18e6c14 upstream.
+
+This patch adds CPU type detection for the Intel Celeron 540, which is
+part of the Core 2 family according to Wikipedia; the family and ID pair
+is absent from the Volume 3B table referenced in the source code
+comments.  I have tested this patch on an Intel Celeron 540 machine
+reporting itself as Family 6 Model 22, and OProfile runs on the machine
+without issue.
+
+Spec:
+
+ http://download.intel.com/design/mobile/SPECUPDT/317667.pdf
+
+Signed-off-by: Patrick Simmons <linuxrocks123@netscape.net>
+Acked-by: Andi Kleen <ak@linux.intel.com>
+Acked-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Robert Richter <robert.richter@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/oprofile/nmi_int.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/oprofile/nmi_int.c
++++ b/arch/x86/oprofile/nmi_int.c
+@@ -621,7 +621,9 @@ static int __init ppro_init(char **cpu_t
+       case 14:
+               *cpu_type = "i386/core";
+               break;
+-      case 15: case 23:
++      case 0x0f:
++      case 0x16:
++      case 0x17:
+               *cpu_type = "i386/core_2";
+               break;
+       case 0x1a:
diff --git a/queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch b/queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch
new file mode 100644 (file)
index 0000000..29d8e1b
--- /dev/null
@@ -0,0 +1,45 @@
+From 46b30ea9bc3698bc1d1e6fd726c9601d46fa0a91 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 21 Sep 2010 07:57:19 +0200
+Subject: percpu: fix pcpu_last_unit_cpu
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 46b30ea9bc3698bc1d1e6fd726c9601d46fa0a91 upstream.
+
+pcpu_first/last_unit_cpu are used to track which cpu has the first and
+last units assigned.  This in turn is used to determine the span of a
+chunk for man/unmap cache flushes and whether an address belongs to
+the first chunk or not in per_cpu_ptr_to_phys().
+
+When the number of possible CPUs isn't power of two, a chunk may
+contain unassigned units towards the end of a chunk.  The logic to
+determine pcpu_last_unit_cpu was incorrect when there was an unused
+unit at the end of a chunk.  It failed to ignore the unused unit and
+assigned the unused marker NR_CPUS to pcpu_last_unit_cpu.
+
+This was discovered through kdump failure which was caused by
+malfunctioning per_cpu_ptr_to_phys() on a kvm setup with 50 possible
+CPUs by CAI Qian.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/percpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -1702,9 +1702,9 @@ int __init pcpu_setup_first_chunk(const
+                       if (pcpu_first_unit_cpu == NR_CPUS)
+                               pcpu_first_unit_cpu = cpu;
++                      pcpu_last_unit_cpu = cpu;
+               }
+       }
+-      pcpu_last_unit_cpu = cpu;
+       pcpu_nr_units = unit;
+       for_each_possible_cpu(cpu)
diff --git a/queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch b/queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch
new file mode 100644 (file)
index 0000000..e98c3a2
--- /dev/null
@@ -0,0 +1,66 @@
+From 950eaaca681c44aab87a46225c9e44f902c080aa Mon Sep 17 00:00:00 2001
+From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Date: Tue, 31 Aug 2010 17:00:18 -0700
+Subject: pid: make setpgid() system call use RCU read-side critical section
+
+From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+commit 950eaaca681c44aab87a46225c9e44f902c080aa upstream.
+
+[   23.584719]
+[   23.584720] ===================================================
+[   23.585059] [ INFO: suspicious rcu_dereference_check() usage. ]
+[   23.585176] ---------------------------------------------------
+[   23.585176] kernel/pid.c:419 invoked rcu_dereference_check() without protection!
+[   23.585176]
+[   23.585176] other info that might help us debug this:
+[   23.585176]
+[   23.585176]
+[   23.585176] rcu_scheduler_active = 1, debug_locks = 1
+[   23.585176] 1 lock held by rc.sysinit/728:
+[   23.585176]  #0:  (tasklist_lock){.+.+..}, at: [<ffffffff8104771f>] sys_setpgid+0x5f/0x193
+[   23.585176]
+[   23.585176] stack backtrace:
+[   23.585176] Pid: 728, comm: rc.sysinit Not tainted 2.6.36-rc2 #2
+[   23.585176] Call Trace:
+[   23.585176]  [<ffffffff8105b436>] lockdep_rcu_dereference+0x99/0xa2
+[   23.585176]  [<ffffffff8104c324>] find_task_by_pid_ns+0x50/0x6a
+[   23.585176]  [<ffffffff8104c35b>] find_task_by_vpid+0x1d/0x1f
+[   23.585176]  [<ffffffff81047727>] sys_setpgid+0x67/0x193
+[   23.585176]  [<ffffffff810029eb>] system_call_fastpath+0x16/0x1b
+[   24.959669] type=1400 audit(1282938522.956:4): avc:  denied  { module_request } for  pid=766 comm="hwclock" kmod="char-major-10-135" scontext=system_u:system_r:hwclock_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclas
+
+It turns out that the setpgid() system call fails to enter an RCU
+read-side critical section before doing a PID-to-task_struct translation.
+This commit therefore does rcu_read_lock() before the translation, and
+also does rcu_read_unlock() after the last use of the returned pointer.
+
+Reported-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Acked-by: David Howells <dhowells@redhat.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sys.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -962,6 +962,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid
+               pgid = pid;
+       if (pgid < 0)
+               return -EINVAL;
++      rcu_read_lock();
+       /* From this point forward we keep holding onto the tasklist lock
+        * so that our parent does not change from under us. -DaveM
+@@ -1015,6 +1016,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid
+ out:
+       /* All paths lead to here, thus we are safe. -DaveM */
+       write_unlock_irq(&tasklist_lock);
++      rcu_read_unlock();
+       return err;
+ }
diff --git a/queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch b/queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch
new file mode 100644 (file)
index 0000000..015d063
--- /dev/null
@@ -0,0 +1,46 @@
+From e75e863dd5c7d96b91ebbd241da5328fc38a78cc Mon Sep 17 00:00:00 2001
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+Date: Tue, 14 Sep 2010 16:35:14 +0200
+Subject: sched: Fix user time incorrectly accounted as system time on 32-bit
+
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+
+commit e75e863dd5c7d96b91ebbd241da5328fc38a78cc upstream.
+
+We have 32-bit variable overflow possibility when multiply in
+task_times() and thread_group_times() functions. When the
+overflow happens then the scaled utime value becomes erroneously
+small and the scaled stime becomes i erroneously big.
+
+Reported here:
+
+ https://bugzilla.redhat.com/show_bug.cgi?id=633037
+ https://bugzilla.kernel.org/show_bug.cgi?id=16559
+
+Reported-by: Michael Chapman <redhat-bugzilla@very.puzzling.org>
+Reported-by: Ciriaco Garcia de Celis <sysman@etherpilot.com>
+Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+LKML-Reference: <20100914143513.GB8415@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -5341,9 +5341,9 @@ void thread_group_times(struct task_stru
+       rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
+       if (total) {
+-              u64 temp;
++              u64 temp = rtime;
+-              temp = (u64)(rtime * cputime.utime);
++              temp *= cputime.utime;
+               do_div(temp, total);
+               utime = (cputime_t)temp;
+       } else
diff --git a/queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch b/queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch
new file mode 100644 (file)
index 0000000..878d94a
--- /dev/null
@@ -0,0 +1,77 @@
+From 2a1b7e575b80ceb19ea50bfa86ce0053ea57181d Mon Sep 17 00:00:00 2001
+From: Ryan Kuester <rkuester@kspace.net>
+Date: Mon, 26 Apr 2010 18:11:54 -0500
+Subject: SCSI: mptsas: fix hangs caused by ATA pass-through
+
+From: Ryan Kuester <rkuester@kspace.net>
+
+commit 2a1b7e575b80ceb19ea50bfa86ce0053ea57181d upstream.
+
+I may have an explanation for the LSI 1068 HBA hangs provoked by ATA
+pass-through commands, in particular by smartctl.
+
+First, my version of the symptoms.  On an LSI SAS1068E B3 HBA running
+01.29.00.00 firmware, with SATA disks, and with smartd running, I'm seeing
+occasional task, bus, and host resets, some of which lead to hard faults of
+the HBA requiring a reboot.  Abusively looping the smartctl command,
+
+    # while true; do smartctl -a /dev/sdb > /dev/null; done
+
+dramatically increases the frequency of these failures to nearly one per
+minute.  A high IO load through the HBA while looping smartctl seems to
+improve the chance of a full scsi host reset or a non-recoverable hang.
+
+I reduced what smartctl was doing down to a simple test case which
+causes the hang with a single IO when pointed at the sd interface.  See
+the code at the bottom of this e-mail.  It uses an SG_IO ioctl to issue
+a single pass-through ATA identify device command.  If the buffer
+userspace gives for the read data has certain alignments, the task is
+issued to the HBA but the HBA fails to respond.  If run against the sg
+interface, neither the test code nor smartctl causes a hang.
+
+sd and sg handle the SG_IO ioctl slightly differently.  Unless you
+specifically set a flag to do direct IO, sg passes a buffer of its own,
+which is page-aligned, to the block layer and later copies the result
+into the userspace buffer regardless of its alignment.  sd, on the other
+hand, always does direct IO unless the userspace buffer fails an
+alignment test at block/blk-map.c line 57, in which case a page-aligned
+buffer is created and used for the transfer.
+
+The alignment test currently checks for word-alignment, the default
+setup by scsi_lib.c; therefore, userspace buffers of almost any
+alignment are given directly to the HBA as DMA targets.  The LSI 1068
+hardware doesn't seem to like at least a couple of the alignments which
+cross a page boundary (see the test code below).  Curiously, many
+page-boundary-crossing alignments do work just fine.
+
+So, either the hardware has an bug handling certain alignments or the
+hardware has a stricter alignment requirement than the driver is
+advertising.  If stricter alignment is required, then in no case should
+misaligned buffers from userspace be allowed through without being
+bounced or at least causing an error to be returned.
+
+It seems the mptsas driver could use blk_queue_dma_alignment() to advertise
+a stricter alignment requirement.  If it does, sd does the right thing and
+bounces misaligned buffers (see block/blk-map.c line 57).  The following
+patch to 2.6.34-rc5 makes my symptoms go away.  I'm sure this is the wrong
+place for this code, but it gets my idea across.
+
+Acked-by: "Desai, Kashyap" <Kashyap.Desai@lsi.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/message/fusion/mptscsih.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/message/fusion/mptscsih.c
++++ b/drivers/message/fusion/mptscsih.c
+@@ -2439,6 +2439,8 @@ mptscsih_slave_configure(struct scsi_dev
+               ioc->name,sdev->tagged_supported, sdev->simple_tags,
+               sdev->ordered_tags));
++      blk_queue_dma_alignment (sdev->request_queue, 512 - 1);
++
+       return 0;
+ }
index 7b15f8f4d1d38302d5ca4c8dc76772f824f0ed5b..f7ebf8d22d373c6695b02a27078ceece3a36f33a 100644 (file)
@@ -21,3 +21,30 @@ drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch
 bonding-correctly-process-non-linear-skbs.patch
 staging-vt6655-fix-buffer-overflow.patch
 net-llc-make-opt-unsigned-in-llc_ui_setsockopt.patch
+pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch
+sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch
+oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch
+char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch
+drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch
+drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch
+percpu-fix-pcpu_last_unit_cpu.patch
+aio-check-for-multiplication-overflow-in-do_io_submit.patch
+inotify-send-in_unmount-events.patch
+scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch
+ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch
+ia64-fix-siglock.patch
+ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch
+keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch
+keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch
+xfs-prevent-reading-uninitialized-stack-memory.patch
+drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch
+acpi-disable-_osi-windows-2009-on-asus-k50ij.patch
+bnx2-fix-netpoll-crash.patch
+bnx2-fix-hang-during-rmmod-bnx2.patch
+at91-change-dma-resource-index.patch
+cxgb3-fix-hot-plug-removal-crash.patch
+mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch
+mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch
+mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch
+guard-page-for-stacks-that-grow-upwards.patch
+fix-unprotected-access-to-task-credentials-in-waitid.patch
diff --git a/queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch
new file mode 100644 (file)
index 0000000..d718256
--- /dev/null
@@ -0,0 +1,36 @@
+From a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Date: Mon, 6 Sep 2010 18:24:57 -0400
+Subject: xfs: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+
+commit a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 upstream.
+
+The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12
+bytes of uninitialized stack memory, because the fsxattr struct
+declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero)
+the 12-byte fsx_pad member before copying it back to the user.  This
+patch takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Alex Elder <aelder@sgi.com>
+Cc: dann frazier <dannf@debian.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/xfs/linux-2.6/xfs_ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/xfs/linux-2.6/xfs_ioctl.c
++++ b/fs/xfs/linux-2.6/xfs_ioctl.c
+@@ -789,6 +789,8 @@ xfs_ioc_fsgetxattr(
+ {
+       struct fsxattr          fa;
++      memset(&fa, 0, sizeof(struct fsxattr));
++
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       fa.fsx_xflags = xfs_ip2xflags(ip);
+       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;