From: Greg Kroah-Hartman Date: Thu, 23 Sep 2010 19:17:38 +0000 (-0700) Subject: .32 patches X-Git-Tag: v2.6.35.6~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=db71a88068ffc31138e3ecbcc22364900e5031cb;p=thirdparty%2Fkernel%2Fstable-queue.git .32 patches --- diff --git a/queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch b/queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch new file mode 100644 index 00000000000..2d2670908e2 --- /dev/null +++ b/queue-2.6.32/acpi-disable-_osi-windows-2009-on-asus-k50ij.patch @@ -0,0 +1,80 @@ +From 81074e90f5c150ca70ab8dfcc77860cbe76f364d Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Mon, 21 Dec 2009 16:13:15 +0800 +Subject: ACPI: disable _OSI(Windows 2009) on Asus K50IJ + +From: Zhang Rui + +commit 81074e90f5c150ca70ab8dfcc77860cbe76f364d upstream. + +Fix a win7 compability issue on Asus K50IJ. + +Here is the _BCM method of this laptop: + Method (_BCM, 1, NotSerialized) + { + If (LGreaterEqual (OSFG, OSVT)) + { + If (LNotEqual (OSFG, OSW7)) + { + Store (One, BCMD) + Store (GCBL (Arg0), Local0) + Subtract (0x0F, Local0, LBTN) + ^^^SBRG.EC0.STBR () + ... + } + Else + { + DBGR (0x0B, Zero, Zero, Arg0) + Store (Arg0, LBTN) + ^^^SBRG.EC0.STBR () + ... + } + } + } +LBTN is used to store the index of the brightness level in the _BCL. +GCBL is a method that convert the percentage value to the index value. +If _OSI(Windows 2009) is not disabled, LBTN is stored a percentage +value which is surely beyond the end of _BCL package. + +http://bugzilla.kernel.org/show_bug.cgi?id=14753 + +Signed-off-by: Zhang Rui +Signed-off-by: Len Brown +Cc: maximilian attems +Cc: Paolo Ornati +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/blacklist.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/drivers/acpi/blacklist.c ++++ b/drivers/acpi/blacklist.c +@@ -185,6 +185,12 @@ static int __init dmi_disable_osi_vista( + acpi_osi_setup("!Windows 2006"); + return 0; + } ++static int __init dmi_disable_osi_win7(const struct dmi_system_id *d) ++{ ++ printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident); ++ acpi_osi_setup("!Windows 2009"); ++ return 0; ++} + + static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { + { +@@ -211,6 +217,14 @@ static struct dmi_system_id acpi_osi_dmi + DMI_MATCH(DMI_PRODUCT_NAME, "Sony VGN-SR290J"), + }, + }, ++ { ++ .callback = dmi_disable_osi_win7, ++ .ident = "ASUS K50IJ", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"), ++ }, ++ }, + + /* + * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. diff --git a/queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch b/queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch new file mode 100644 index 00000000000..de192fd720e --- /dev/null +++ b/queue-2.6.32/aio-check-for-multiplication-overflow-in-do_io_submit.patch @@ -0,0 +1,48 @@ +From 75e1c70fc31490ef8a373ea2a4bea2524099b478 Mon Sep 17 00:00:00 2001 +From: Jeff Moyer +Date: Fri, 10 Sep 2010 14:16:00 -0700 +Subject: aio: check for multiplication overflow in do_io_submit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jeff Moyer + +commit 75e1c70fc31490ef8a373ea2a4bea2524099b478 upstream. + +Tavis Ormandy pointed out that do_io_submit does not do proper bounds +checking on the passed-in iocb array: + +       if (unlikely(nr < 0)) +               return -EINVAL; + +       if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(iocbpp))))) +               return -EFAULT;                      ^^^^^^^^^^^^^^^^^^ + +The attached patch checks for overflow, and if it is detected, the +number of iocbs submitted is scaled down to a number that will fit in +the long.  This is an ok thing to do, as sys_io_submit is documented as +returning the number of iocbs submitted, so callers should handle a +return value of less than the 'nr' argument passed in. + +Reported-by: Tavis Ormandy +Signed-off-by: Jeff Moyer +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/aio.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -1639,6 +1639,9 @@ SYSCALL_DEFINE3(io_submit, aio_context_t + if (unlikely(nr < 0)) + return -EINVAL; + ++ if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) ++ nr = LONG_MAX/sizeof(*iocbpp); ++ + if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) + return -EFAULT; + diff --git a/queue-2.6.32/at91-change-dma-resource-index.patch b/queue-2.6.32/at91-change-dma-resource-index.patch new file mode 100644 index 00000000000..29729a2d1f6 --- /dev/null +++ b/queue-2.6.32/at91-change-dma-resource-index.patch @@ -0,0 +1,28 @@ +From 8d2602e0778299e2d6084f03086b716d6e7a1e1e Mon Sep 17 00:00:00 2001 +From: Nicolas Ferre +Date: Fri, 20 Aug 2010 16:44:33 +0200 +Subject: AT91: change dma resource index + +From: Nicolas Ferre + +commit 8d2602e0778299e2d6084f03086b716d6e7a1e1e upstream. + +Reported-by: Dan Liang +Signed-off-by: Nicolas Ferre +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/mach-at91/at91sam9g45_devices.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/mach-at91/at91sam9g45_devices.c ++++ b/arch/arm/mach-at91/at91sam9g45_devices.c +@@ -46,7 +46,7 @@ static struct resource hdmac_resources[] + .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1, + .flags = IORESOURCE_MEM, + }, +- [2] = { ++ [1] = { + .start = AT91SAM9G45_ID_DMA, + .end = AT91SAM9G45_ID_DMA, + .flags = IORESOURCE_IRQ, diff --git a/queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch b/queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch new file mode 100644 index 00000000000..decf041067c --- /dev/null +++ b/queue-2.6.32/bnx2-fix-hang-during-rmmod-bnx2.patch @@ -0,0 +1,76 @@ +From f048fa9c8686119c3858a463cab6121dced7c0bf Mon Sep 17 00:00:00 2001 +From: Michael Chan +Date: Tue, 1 Jun 2010 15:05:36 +0000 +Subject: bnx2: Fix hang during rmmod bnx2. + +From: Michael Chan + +commit f048fa9c8686119c3858a463cab6121dced7c0bf upstream. + +The regression is caused by: + +commit 4327ba435a56ada13eedf3eb332e583c7a0586a9 + bnx2: Fix netpoll crash. + +If ->open() and ->close() are called multiple times, the same napi structs +will be added to dev->napi_list multiple times, corrupting the dev->napi_list. +This causes free_netdev() to hang during rmmod. + +We fix this by calling netif_napi_del() during ->close(). + +Also, bnx2_init_napi() must not be in the __devinit section since it is +called by ->open(). + +Signed-off-by: Michael Chan +Signed-off-by: Benjamin Li +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/bnx2.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/drivers/net/bnx2.c ++++ b/drivers/net/bnx2.c +@@ -248,6 +248,7 @@ static const struct flash_spec flash_570 + MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); + + static void bnx2_init_napi(struct bnx2 *bp); ++static void bnx2_del_napi(struct bnx2 *bp); + + static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) + { +@@ -6237,6 +6238,7 @@ open_err: + bnx2_free_skbs(bp); + bnx2_free_irq(bp); + bnx2_free_mem(bp); ++ bnx2_del_napi(bp); + return rc; + } + +@@ -6444,6 +6446,7 @@ bnx2_close(struct net_device *dev) + bnx2_free_irq(bp); + bnx2_free_skbs(bp); + bnx2_free_mem(bp); ++ bnx2_del_napi(bp); + bp->link_up = 0; + netif_carrier_off(bp->dev); + bnx2_set_power_state(bp, PCI_D3hot); +@@ -8019,7 +8022,16 @@ bnx2_bus_string(struct bnx2 *bp, char *s + return str; + } + +-static void __devinit ++static void ++bnx2_del_napi(struct bnx2 *bp) ++{ ++ int i; ++ ++ for (i = 0; i < bp->irq_nvecs; i++) ++ netif_napi_del(&bp->bnx2_napi[i].napi); ++} ++ ++static void + bnx2_init_napi(struct bnx2 *bp) + { + int i; diff --git a/queue-2.6.32/bnx2-fix-netpoll-crash.patch b/queue-2.6.32/bnx2-fix-netpoll-crash.patch new file mode 100644 index 00000000000..1daea8d165c --- /dev/null +++ b/queue-2.6.32/bnx2-fix-netpoll-crash.patch @@ -0,0 +1,63 @@ +From 4327ba435a56ada13eedf3eb332e583c7a0586a9 Mon Sep 17 00:00:00 2001 +From: Benjamin Li +Date: Tue, 23 Mar 2010 13:13:11 +0000 +Subject: bnx2: Fix netpoll crash. + +From: Benjamin Li + +commit 4327ba435a56ada13eedf3eb332e583c7a0586a9 upstream. + +The bnx2 driver calls netif_napi_add() for all the NAPI structs during +->probe() time but not all of them will be used if we're not in MSI-X +mode. This creates a problem for netpoll since it will poll all the +NAPI structs in the dev_list whether or not they are scheduled, resulting +in a crash when we access structure fields not initialized for that vector. + +We fix it by moving the netif_napi_add() call to ->open() after the number +of IRQ vectors has been determined. + +Signed-off-by: Benjamin Li +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/bnx2.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/bnx2.c ++++ b/drivers/net/bnx2.c +@@ -247,6 +247,8 @@ static const struct flash_spec flash_570 + + MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); + ++static void bnx2_init_napi(struct bnx2 *bp); ++ + static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) + { + u32 diff; +@@ -6173,6 +6175,7 @@ bnx2_open(struct net_device *dev) + bnx2_disable_int(bp); + + bnx2_setup_int_mode(bp, disable_msi); ++ bnx2_init_napi(bp); + bnx2_napi_enable(bp); + rc = bnx2_alloc_mem(bp); + if (rc) +@@ -8021,7 +8024,7 @@ bnx2_init_napi(struct bnx2 *bp) + { + int i; + +- for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) { ++ for (i = 0; i < bp->irq_nvecs; i++) { + struct bnx2_napi *bnapi = &bp->bnx2_napi[i]; + int (*poll)(struct napi_struct *, int); + +@@ -8090,7 +8093,6 @@ bnx2_init_one(struct pci_dev *pdev, cons + dev->ethtool_ops = &bnx2_ethtool_ops; + + bp = netdev_priv(dev); +- bnx2_init_napi(bp); + + pci_set_drvdata(pdev, dev); + diff --git a/queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch b/queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch new file mode 100644 index 00000000000..96fd0e557ae --- /dev/null +++ b/queue-2.6.32/char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch @@ -0,0 +1,50 @@ +From 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 21 Sep 2010 11:49:01 +0200 +Subject: char: Mark /dev/zero and /dev/kmem as not capable of writeback + +From: Jan Kara + +commit 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 upstream. + +These devices don't do any writeback but their device inodes still can get +dirty so mark bdi appropriately so that bdi code does the right thing and files +inodes to lists of bdi carrying the device inodes. + +Signed-off-by: Jan Kara +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/mem.c | 3 ++- + fs/char_dev.c | 4 +++- + 2 files changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/char/mem.c ++++ b/drivers/char/mem.c +@@ -822,10 +822,11 @@ static const struct file_operations zero + /* + * capabilities for /dev/zero + * - permits private mappings, "copies" are taken of the source of zeros ++ * - no writeback happens + */ + static struct backing_dev_info zero_bdi = { + .name = "char/mem", +- .capabilities = BDI_CAP_MAP_COPY, ++ .capabilities = BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK, + }; + + static const struct file_operations full_fops = { +--- a/fs/char_dev.c ++++ b/fs/char_dev.c +@@ -39,7 +39,9 @@ struct backing_dev_info directly_mappabl + #endif + /* permit direct mmap, for read, write or exec */ + BDI_CAP_MAP_DIRECT | +- BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), ++ BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | ++ /* no writeback happens */ ++ BDI_CAP_NO_ACCT_AND_WRITEBACK), + }; + + static struct kobj_map *cdev_map; diff --git a/queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch b/queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch new file mode 100644 index 00000000000..6fa8a8297c8 --- /dev/null +++ b/queue-2.6.32/cxgb3-fix-hot-plug-removal-crash.patch @@ -0,0 +1,31 @@ +From a6f018e324ba91d0464cca6895447c2b89e6d578 Mon Sep 17 00:00:00 2001 +From: Divy Le Ray +Date: Wed, 3 Mar 2010 09:49:47 +0000 +Subject: cxgb3: fix hot plug removal crash + +From: Divy Le Ray + +commit a6f018e324ba91d0464cca6895447c2b89e6d578 upstream. + +queue restart tasklets need to be stopped after napi handlers are stopped +since the latter can restart them. So stop them after stopping napi. + +Signed-off-by: Divy Le Ray +Signed-off-by: David S. Miller +Signed-off-by: Brandon Philips +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/cxgb3/cxgb3_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -1274,6 +1274,7 @@ static void cxgb_down(struct adapter *ad + + free_irq_resources(adapter); + quiesce_rx(adapter); ++ t3_sge_stop(adapter); + flush_workqueue(cxgb3_wq); /* wait for external IRQ handler */ + } + diff --git a/queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch b/queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch new file mode 100644 index 00000000000..acab62615a0 --- /dev/null +++ b/queue-2.6.32/drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch @@ -0,0 +1,142 @@ +From df08cdc7ef606509debe7677c439be0ca48790e4 Mon Sep 17 00:00:00 2001 +From: Andrew Morton +Date: Wed, 22 Sep 2010 13:05:11 -0700 +Subject: drivers/pci/intel-iommu.c: fix build with older gcc's + +From: Andrew Morton + +commit df08cdc7ef606509debe7677c439be0ca48790e4 upstream. + +drivers/pci/intel-iommu.c: In function `__iommu_calculate_agaw': +drivers/pci/intel-iommu.c:437: sorry, unimplemented: inlining failed in call to 'width_to_agaw': function body not available +drivers/pci/intel-iommu.c:445: sorry, unimplemented: called from here + +Move the offending function (and its siblings) to top-of-file, remove the +forward declaration. + +Addresses https://bugzilla.kernel.org/show_bug.cgi?id=17441 + +Reported-by: Martin Mokrejs +Cc: David Woodhouse +Cc: Jesse Barnes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/intel-iommu.c | 90 +++++++++++++++++++++------------------------- + 1 file changed, 43 insertions(+), 47 deletions(-) + +--- a/drivers/pci/intel-iommu.c ++++ b/drivers/pci/intel-iommu.c +@@ -71,6 +71,49 @@ + #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) + #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) + ++/* page table handling */ ++#define LEVEL_STRIDE (9) ++#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) ++ ++static inline int agaw_to_level(int agaw) ++{ ++ return agaw + 2; ++} ++ ++static inline int agaw_to_width(int agaw) ++{ ++ return 30 + agaw * LEVEL_STRIDE; ++} ++ ++static inline int width_to_agaw(int width) ++{ ++ return (width - 30) / LEVEL_STRIDE; ++} ++ ++static inline unsigned int level_to_offset_bits(int level) ++{ ++ return (level - 1) * LEVEL_STRIDE; ++} ++ ++static inline int pfn_level_offset(unsigned long pfn, int level) ++{ ++ return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; ++} ++ ++static inline unsigned long level_mask(int level) ++{ ++ return -1UL << level_to_offset_bits(level); ++} ++ ++static inline unsigned long level_size(int level) ++{ ++ return 1UL << level_to_offset_bits(level); ++} ++ ++static inline unsigned long align_to_level(unsigned long pfn, int level) ++{ ++ return (pfn + level_size(level) - 1) & level_mask(level); ++} + + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +@@ -449,8 +492,6 @@ void free_iova_mem(struct iova *iova) + } + + +-static inline int width_to_agaw(int width); +- + static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) + { + unsigned long sagaw; +@@ -664,51 +705,6 @@ out: + spin_unlock_irqrestore(&iommu->lock, flags); + } + +-/* page table handling */ +-#define LEVEL_STRIDE (9) +-#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +- +-static inline int agaw_to_level(int agaw) +-{ +- return agaw + 2; +-} +- +-static inline int agaw_to_width(int agaw) +-{ +- return 30 + agaw * LEVEL_STRIDE; +- +-} +- +-static inline int width_to_agaw(int width) +-{ +- return (width - 30) / LEVEL_STRIDE; +-} +- +-static inline unsigned int level_to_offset_bits(int level) +-{ +- return (level - 1) * LEVEL_STRIDE; +-} +- +-static inline int pfn_level_offset(unsigned long pfn, int level) +-{ +- return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; +-} +- +-static inline unsigned long level_mask(int level) +-{ +- return -1UL << level_to_offset_bits(level); +-} +- +-static inline unsigned long level_size(int level) +-{ +- return 1UL << level_to_offset_bits(level); +-} +- +-static inline unsigned long align_to_level(unsigned long pfn, int level) +-{ +- return (pfn + level_size(level) - 1) & level_mask(level); +-} +- + static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + unsigned long pfn) + { diff --git a/queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..0fee45baefd --- /dev/null +++ b/queue-2.6.32/drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From fd02db9de73faebc51240619c7c7f99bee9f65c7 Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 22 Sep 2010 13:05:09 -0700 +Subject: drivers/video/sis/sis_main.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit fd02db9de73faebc51240619c7c7f99bee9f65c7 upstream. + +The FBIOGET_VBLANK device ioctl allows unprivileged users to read 16 bytes +of uninitialized stack memory, because the "reserved" member of the +fb_vblank struct declared on the stack is not altered or zeroed before +being copied back to the user. This patch takes care of it. + +Signed-off-by: Dan Rosenberg +Cc: Thomas Winischhofer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/sis/sis_main.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/video/sis/sis_main.c ++++ b/drivers/video/sis/sis_main.c +@@ -1701,6 +1701,9 @@ static int sisfb_ioctl(struct fb_info *i + break; + + case FBIOGET_VBLANK: ++ ++ memset(&sisvbblank, 0, sizeof(struct fb_vblank)); ++ + sisvbblank.count = 0; + sisvbblank.flags = sisfb_setupvbblankflags(ivideo, &sisvbblank.vcount, &sisvbblank.hcount); + diff --git a/queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..28d326fa7c1 --- /dev/null +++ b/queue-2.6.32/drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,34 @@ +From b4aaa78f4c2f9cde2f335b14f4ca30b01f9651ca Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 19:08:24 -0400 +Subject: drivers/video/via/ioctl.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit b4aaa78f4c2f9cde2f335b14f4ca30b01f9651ca upstream. + +The VIAFB_GET_INFO device ioctl allows unprivileged users to read 246 +bytes of uninitialized stack memory, because the "reserved" member of +the viafb_ioctl_info struct declared on the stack is not altered or +zeroed before being copied back to the user. This patch takes care of +it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: Florian Tobias Schandinat +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/via/ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/video/via/ioctl.c ++++ b/drivers/video/via/ioctl.c +@@ -25,6 +25,8 @@ int viafb_ioctl_get_viafb_info(u_long ar + { + struct viafb_ioctl_info viainfo; + ++ memset(&viainfo, 0, sizeof(struct viafb_ioctl_info)); ++ + viainfo.viafb_id = VIAID; + viainfo.vendor_id = PCI_VIA_VENDOR_ID; + diff --git a/queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch b/queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch new file mode 100644 index 00000000000..818b75996f3 --- /dev/null +++ b/queue-2.6.32/ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch @@ -0,0 +1,82 @@ +From 84a8dce2710cc425089a2b92acc354d4fbb5788d Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Sat, 5 Jun 2010 11:51:27 -0400 +Subject: ext4: Fix remaining racy updates of EXT4_I(inode)->i_flags + +From: Dmitry Monakhov + +commit 84a8dce2710cc425089a2b92acc354d4fbb5788d upstream. + +A few functions were still modifying i_flags in a racy manner. + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 38 ++++++++++++++++++++++---------------- + 1 file changed, 22 insertions(+), 16 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4815,20 +4815,26 @@ void ext4_set_inode_flags(struct inode * + /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ + void ext4_get_inode_flags(struct ext4_inode_info *ei) + { +- unsigned int flags = ei->vfs_inode.i_flags; ++ unsigned int vfs_fl; ++ unsigned long old_fl, new_fl; + +- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| +- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); +- if (flags & S_SYNC) +- ei->i_flags |= EXT4_SYNC_FL; +- if (flags & S_APPEND) +- ei->i_flags |= EXT4_APPEND_FL; +- if (flags & S_IMMUTABLE) +- ei->i_flags |= EXT4_IMMUTABLE_FL; +- if (flags & S_NOATIME) +- ei->i_flags |= EXT4_NOATIME_FL; +- if (flags & S_DIRSYNC) +- ei->i_flags |= EXT4_DIRSYNC_FL; ++ do { ++ vfs_fl = ei->vfs_inode.i_flags; ++ old_fl = ei->i_flags; ++ new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| ++ EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| ++ EXT4_DIRSYNC_FL); ++ if (vfs_fl & S_SYNC) ++ new_fl |= EXT4_SYNC_FL; ++ if (vfs_fl & S_APPEND) ++ new_fl |= EXT4_APPEND_FL; ++ if (vfs_fl & S_IMMUTABLE) ++ new_fl |= EXT4_IMMUTABLE_FL; ++ if (vfs_fl & S_NOATIME) ++ new_fl |= EXT4_NOATIME_FL; ++ if (vfs_fl & S_DIRSYNC) ++ new_fl |= EXT4_DIRSYNC_FL; ++ } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); + } + + static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, +@@ -5067,7 +5073,7 @@ static int ext4_inode_blocks_set(handle_ + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = 0; +- ei->i_flags &= ~EXT4_HUGE_FILE_FL; ++ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + return 0; + } + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) +@@ -5080,9 +5086,9 @@ static int ext4_inode_blocks_set(handle_ + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); +- ei->i_flags &= ~EXT4_HUGE_FILE_FL; ++ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + } else { +- ei->i_flags |= EXT4_HUGE_FILE_FL; ++ ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); + /* i_block is stored in file system block size */ + i_blocks = i_blocks >> (inode->i_blkbits - 9); + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); diff --git a/queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch b/queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch new file mode 100644 index 00000000000..4ca057e0219 --- /dev/null +++ b/queue-2.6.32/fix-unprotected-access-to-task-credentials-in-waitid.patch @@ -0,0 +1,99 @@ +From f362b73244fb16ea4ae127ced1467dd8adaa7733 Mon Sep 17 00:00:00 2001 +From: Daniel J Blueman +Date: Tue, 17 Aug 2010 23:56:55 +0100 +Subject: Fix unprotected access to task credentials in waitid() + +From: Daniel J Blueman + +commit f362b73244fb16ea4ae127ced1467dd8adaa7733 upstream. + +Using a program like the following: + + #include + #include + #include + #include + + int main() { + id_t id; + siginfo_t infop; + pid_t res; + + id = fork(); + if (id == 0) { sleep(1); exit(0); } + kill(id, SIGSTOP); + alarm(1); + waitid(P_PID, id, &infop, WCONTINUED); + return 0; + } + +to call waitid() on a stopped process results in access to the child task's +credentials without the RCU read lock being held - which may be replaced in the +meantime - eliciting the following warning: + + =================================================== + [ INFO: suspicious rcu_dereference_check() usage. ] + --------------------------------------------------- + kernel/exit.c:1460 invoked rcu_dereference_check() without protection! + + other info that might help us debug this: + + rcu_scheduler_active = 1, debug_locks = 1 + 2 locks held by waitid02/22252: + #0: (tasklist_lock){.?.?..}, at: [] do_wait+0xc5/0x310 + #1: (&(&sighand->siglock)->rlock){-.-...}, at: [] + wait_consider_task+0x19a/0xbe0 + + stack backtrace: + Pid: 22252, comm: waitid02 Not tainted 2.6.35-323cd+ #3 + Call Trace: + [] lockdep_rcu_dereference+0xa4/0xc0 + [] wait_consider_task+0xaf1/0xbe0 + [] do_wait+0xf5/0x310 + [] sys_waitid+0x86/0x1f0 + [] ? child_wait_callback+0x0/0x70 + [] system_call_fastpath+0x16/0x1b + +This is fixed by holding the RCU read lock in wait_task_continued() to ensure +that the task's current credentials aren't destroyed between us reading the +cred pointer and us reading the UID from those credentials. + +Furthermore, protect wait_task_stopped() in the same way. + +We don't need to keep holding the RCU read lock once we've read the UID from +the credentials as holding the RCU read lock doesn't stop the target task from +changing its creds under us - so the credentials may be outdated immediately +after we've read the pointer, lock or no lock. + +Signed-off-by: Daniel J Blueman +Signed-off-by: David Howells +Acked-by: Paul E. McKenney +Acked-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1374,8 +1374,7 @@ static int wait_task_stopped(struct wait + if (!unlikely(wo->wo_flags & WNOWAIT)) + *p_code = 0; + +- /* don't need the RCU readlock here as we're holding a spinlock */ +- uid = __task_cred(p)->uid; ++ uid = task_uid(p); + unlock_sig: + spin_unlock_irq(&p->sighand->siglock); + if (!exit_code) +@@ -1448,7 +1447,7 @@ static int wait_task_continued(struct wa + } + if (!unlikely(wo->wo_flags & WNOWAIT)) + p->signal->flags &= ~SIGNAL_STOP_CONTINUED; +- uid = __task_cred(p)->uid; ++ uid = task_uid(p); + spin_unlock_irq(&p->sighand->siglock); + + pid = task_pid_vnr(p); diff --git a/queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch b/queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch new file mode 100644 index 00000000000..2b637840000 --- /dev/null +++ b/queue-2.6.32/guard-page-for-stacks-that-grow-upwards.patch @@ -0,0 +1,95 @@ +From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001 +From: Luck, Tony +Date: Tue, 24 Aug 2010 11:44:18 -0700 +Subject: guard page for stacks that grow upwards + +From: Luck, Tony + +commit 8ca3eb08097f6839b2206e2242db4179aee3cfb3 upstream. + +pa-risc and ia64 have stacks that grow upwards. Check that +they do not run into other mappings. By making VM_GROWSUP +0x0 on architectures that do not ever use it, we can avoid +some unpleasant #ifdefs in check_stack_guard_page(). + +Signed-off-by: Tony Luck +Signed-off-by: Linus Torvalds +Cc: dann frazier +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm.h | 8 +++++++- + mm/memory.c | 15 +++++++++++---- + mm/mmap.c | 3 --- + 3 files changed, 18 insertions(+), 8 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -77,7 +77,11 @@ extern unsigned int kobjsize(const void + #define VM_MAYSHARE 0x00000080 + + #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ ++#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) + #define VM_GROWSUP 0x00000200 ++#else ++#define VM_GROWSUP 0x00000000 ++#endif + #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ + #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ + +@@ -1195,8 +1199,10 @@ unsigned long ra_submit(struct file_ra_s + + /* Do stack extension */ + extern int expand_stack(struct vm_area_struct *vma, unsigned long address); +-#ifdef CONFIG_IA64 ++#if VM_GROWSUP + extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); ++#else ++ #define expand_upwards(vma, address) do { } while (0) + #endif + extern int expand_stack_downwards(struct vm_area_struct *vma, + unsigned long address); +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2630,11 +2630,9 @@ out_release: + } + + /* +- * This is like a special single-page "expand_downwards()", +- * except we must first make sure that 'address-PAGE_SIZE' ++ * This is like a special single-page "expand_{down|up}wards()", ++ * except we must first make sure that 'address{-|+}PAGE_SIZE' + * doesn't hit another vma. +- * +- * The "find_vma()" will do the right thing even if we wrap + */ + static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) + { +@@ -2653,6 +2651,15 @@ static inline int check_stack_guard_page + + expand_stack(vma, address - PAGE_SIZE); + } ++ if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { ++ struct vm_area_struct *next = vma->vm_next; ++ ++ /* As VM_GROWSDOWN but s/below/above/ */ ++ if (next && next->vm_start == address + PAGE_SIZE) ++ return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; ++ ++ expand_upwards(vma, address + PAGE_SIZE); ++ } + return 0; + } + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1600,9 +1600,6 @@ static int acct_stack_growth(struct vm_a + * PA-RISC uses this for its stack; IA64 for its Register Backing Store. + * vma is the last one with address > vma->vm_end. Have to extend vma. + */ +-#ifndef CONFIG_IA64 +-static +-#endif + int expand_upwards(struct vm_area_struct *vma, unsigned long address) + { + int error; diff --git a/queue-2.6.32/ia64-fix-siglock.patch b/queue-2.6.32/ia64-fix-siglock.patch new file mode 100644 index 00000000000..40d8f8817f2 --- /dev/null +++ b/queue-2.6.32/ia64-fix-siglock.patch @@ -0,0 +1,101 @@ +From f574c843191728d9407b766a027f779dcd27b272 Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Thu, 9 Sep 2010 15:16:56 -0700 +Subject: IA64: fix siglock + +From: Tony Luck + +commit f574c843191728d9407b766a027f779dcd27b272 upstream. + +When ia64 converted to using ticket locks, an inline implementation +of trylock/unlock in fsys.S was missed. This was not noticed because +in most circumstances it simply resulted in using the slow path because +the siglock was apparently not available (under old spinlock rules). + +Problems occur when the ticket spinlock has value 0x0 (when first +initialised, or when it wraps around). At this point the fsys.S +code acquires the lock (changing the 0x0 to 0x1. If another process +attempts to get the lock at this point, it will change the value from +0x1 to 0x2 (using new ticket lock rules). Then the fsys.S code will +free the lock using old spinlock rules by writing 0x0 to it. From +here a variety of bad things can happen. + +Signed-off-by: Tony Luck +Signed-off-by: Greg Kroah-Hartman + +--- + arch/ia64/kernel/fsys.S | 46 +++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 39 insertions(+), 7 deletions(-) + +--- a/arch/ia64/kernel/fsys.S ++++ b/arch/ia64/kernel/fsys.S +@@ -424,14 +424,26 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 + andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP + + #ifdef CONFIG_SMP +- mov r17=1 +- ;; +- cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock ++ // __ticket_spin_trylock(r31) ++ ld4 r17=[r31] + mov r8=EINVAL // default to EINVAL + ;; ++ extr r9=r17,17,15 ++ ;; ++ xor r18=r17,r9 ++ adds r19=1,r17 ++ ;; ++ extr.u r18=r18,0,15 ++ ;; ++ cmp.eq p0,p7=0,r18 ++(p7) br.cond.spnt.many .lock_contention ++ mov.m ar.ccv=r17 ++ ;; ++ cmpxchg4.acq r9=[r31],r19,ar.ccv ++ ;; ++ cmp4.eq p0,p7=r9,r17 ++(p7) br.cond.spnt.many .lock_contention + ld8 r3=[r2] // re-read current->blocked now that we hold the lock +- cmp4.ne p6,p0=r18,r0 +-(p6) br.cond.spnt.many .lock_contention + ;; + #else + ld8 r3=[r2] // re-read current->blocked now that we hold the lock +@@ -490,7 +502,17 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 + (p6) br.cond.spnt.few 1b // yes -> retry + + #ifdef CONFIG_SMP +- st4.rel [r31]=r0 // release the lock ++ // __ticket_spin_unlock(r31) ++ adds r31=2,r31 ++ ;; ++ ld2.bias r2=[r31] ++ mov r3=65534 ++ ;; ++ adds r2=2,r2 ++ ;; ++ and r3=r3,r2 ++ ;; ++ st2.rel [r31]=r3 + #endif + SSM_PSR_I(p0, p9, r31) + ;; +@@ -512,7 +534,17 @@ EX(.fail_efault, (p15) st8 [r34]=r3) + + .sig_pending: + #ifdef CONFIG_SMP +- st4.rel [r31]=r0 // release the lock ++ // __ticket_spin_unlock(r31) ++ adds r31=2,r31 ++ ;; ++ ld2.bias r2=[r31] ++ mov r3=65534 ++ ;; ++ adds r2=2,r2 ++ ;; ++ and r3=r3,r2 ++ ;; ++ st2.rel [r31]=r3 + #endif + SSM_PSR_I(p0, p9, r17) + ;; diff --git a/queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch b/queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch new file mode 100644 index 00000000000..44c9a02fe9f --- /dev/null +++ b/queue-2.6.32/ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch @@ -0,0 +1,103 @@ +From 2d2b6901649a62977452be85df53eda2412def24 Mon Sep 17 00:00:00 2001 +From: Petr Tesarik +Date: Wed, 15 Sep 2010 15:35:48 -0700 +Subject: IA64: Optimize ticket spinlocks in fsys_rt_sigprocmask + +From: Petr Tesarik + +commit 2d2b6901649a62977452be85df53eda2412def24 upstream. + +Tony's fix (f574c843191728d9407b766a027f779dcd27b272) has a small bug, +it incorrectly uses "r3" as a scratch register in the first of the two +unlock paths ... it is also inefficient. Optimize the fast path again. + +Signed-off-by: Petr Tesarik +Signed-off-by: Tony Luck +Signed-off-by: Greg Kroah-Hartman + +--- + arch/ia64/kernel/fsys.S | 42 +++++++++++------------------------------- + 1 file changed, 11 insertions(+), 31 deletions(-) + +--- a/arch/ia64/kernel/fsys.S ++++ b/arch/ia64/kernel/fsys.S +@@ -420,34 +420,31 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 + ;; + + RSM_PSR_I(p0, r18, r19) // mask interrupt delivery +- mov ar.ccv=0 + andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP ++ mov r8=EINVAL // default to EINVAL + + #ifdef CONFIG_SMP + // __ticket_spin_trylock(r31) + ld4 r17=[r31] +- mov r8=EINVAL // default to EINVAL +- ;; +- extr r9=r17,17,15 + ;; +- xor r18=r17,r9 ++ mov.m ar.ccv=r17 ++ extr.u r9=r17,17,15 + adds r19=1,r17 ++ extr.u r18=r17,0,15 + ;; +- extr.u r18=r18,0,15 ++ cmp.eq p6,p7=r9,r18 + ;; +- cmp.eq p0,p7=0,r18 ++(p6) cmpxchg4.acq r9=[r31],r19,ar.ccv ++(p6) dep.z r20=r19,1,15 // next serving ticket for unlock + (p7) br.cond.spnt.many .lock_contention +- mov.m ar.ccv=r17 +- ;; +- cmpxchg4.acq r9=[r31],r19,ar.ccv + ;; + cmp4.eq p0,p7=r9,r17 ++ adds r31=2,r31 + (p7) br.cond.spnt.many .lock_contention + ld8 r3=[r2] // re-read current->blocked now that we hold the lock + ;; + #else + ld8 r3=[r2] // re-read current->blocked now that we hold the lock +- mov r8=EINVAL // default to EINVAL + #endif + add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 + add r19=IA64_TASK_SIGNAL_OFFSET,r16 +@@ -503,16 +500,8 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 + + #ifdef CONFIG_SMP + // __ticket_spin_unlock(r31) +- adds r31=2,r31 +- ;; +- ld2.bias r2=[r31] +- mov r3=65534 +- ;; +- adds r2=2,r2 +- ;; +- and r3=r3,r2 +- ;; +- st2.rel [r31]=r3 ++ st2.rel [r31]=r20 ++ mov r20=0 // i must not leak kernel bits... + #endif + SSM_PSR_I(p0, p9, r31) + ;; +@@ -535,16 +524,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3) + .sig_pending: + #ifdef CONFIG_SMP + // __ticket_spin_unlock(r31) +- adds r31=2,r31 +- ;; +- ld2.bias r2=[r31] +- mov r3=65534 +- ;; +- adds r2=2,r2 +- ;; +- and r3=r3,r2 +- ;; +- st2.rel [r31]=r3 ++ st2.rel [r31]=r20 // release the lock + #endif + SSM_PSR_I(p0, p9, r17) + ;; diff --git a/queue-2.6.32/inotify-send-in_unmount-events.patch b/queue-2.6.32/inotify-send-in_unmount-events.patch new file mode 100644 index 00000000000..f0b0d27f54b --- /dev/null +++ b/queue-2.6.32/inotify-send-in_unmount-events.patch @@ -0,0 +1,36 @@ +From 611da04f7a31b2208e838be55a42c7a1310ae321 Mon Sep 17 00:00:00 2001 +From: Eric Paris +Date: Wed, 28 Jul 2010 10:18:37 -0400 +Subject: inotify: send IN_UNMOUNT events + +From: Eric Paris + +commit 611da04f7a31b2208e838be55a42c7a1310ae321 upstream. + +Since the .31 or so notify rewrite inotify has not sent events about +inodes which are unmounted. This patch restores those events. + +Signed-off-by: Eric Paris +Cc: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman + +--- + fs/notify/inotify/inotify_user.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/notify/inotify/inotify_user.c ++++ b/fs/notify/inotify/inotify_user.c +@@ -106,8 +106,11 @@ static inline __u32 inotify_arg_to_mask( + { + __u32 mask; + +- /* everything should accept their own ignored and cares about children */ +- mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); ++ /* ++ * everything should accept their own ignored, cares about children, ++ * and should receive events when the inode is unmounted ++ */ ++ mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); + + /* mask off the flags used to open the fd */ + mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); diff --git a/queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch b/queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch new file mode 100644 index 00000000000..b1757656f61 --- /dev/null +++ b/queue-2.6.32/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch @@ -0,0 +1,55 @@ +From 3d96406c7da1ed5811ea52a3b0905f4f0e295376 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 10 Sep 2010 09:59:51 +0100 +Subject: KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring + +From: David Howells + +commit 3d96406c7da1ed5811ea52a3b0905f4f0e295376 upstream. + +Fix a bug in keyctl_session_to_parent() whereby it tries to check the ownership +of the parent process's session keyring whether or not the parent has a session +keyring [CVE-2010-2960]. + +This results in the following oops: + + BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 + IP: [] keyctl_session_to_parent+0x251/0x443 + ... + Call Trace: + [] ? keyctl_session_to_parent+0x67/0x443 + [] ? __do_fault+0x24b/0x3d0 + [] sys_keyctl+0xb4/0xb8 + [] system_call_fastpath+0x16/0x1b + +if the parent process has no session keyring. + +If the system is using pam_keyinit then it mostly protected against this as all +processes derived from a login will have inherited the session keyring created +by pam_keyinit during the log in procedure. + +To test this, pam_keyinit calls need to be commented out in /etc/pam.d/. + +Reported-by: Tavis Ormandy +Signed-off-by: David Howells +Acked-by: Tavis Ormandy +Cc: dann frazier +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/keyctl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -1292,7 +1292,8 @@ long keyctl_session_to_parent(void) + goto not_permitted; + + /* the keyrings must have the same UID */ +- if (pcred ->tgcred->session_keyring->uid != mycred->euid || ++ if ((pcred->tgcred->session_keyring && ++ pcred->tgcred->session_keyring->uid != mycred->euid) || + mycred->tgcred->session_keyring->uid != mycred->euid) + goto not_permitted; + diff --git a/queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch b/queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch new file mode 100644 index 00000000000..a40f718886b --- /dev/null +++ b/queue-2.6.32/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch @@ -0,0 +1,69 @@ +From 9d1ac65a9698513d00e5608d93fca0c53f536c14 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 10 Sep 2010 09:59:46 +0100 +Subject: KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() + +From: David Howells + +commit 9d1ac65a9698513d00e5608d93fca0c53f536c14 upstream. + +There's an protected access to the parent process's credentials in the middle +of keyctl_session_to_parent(). This results in the following RCU warning: + + =================================================== + [ INFO: suspicious rcu_dereference_check() usage. ] + --------------------------------------------------- + security/keys/keyctl.c:1291 invoked rcu_dereference_check() without protection! + + other info that might help us debug this: + + rcu_scheduler_active = 1, debug_locks = 0 + 1 lock held by keyctl-session-/2137: + #0: (tasklist_lock){.+.+..}, at: [] keyctl_session_to_parent+0x60/0x236 + + stack backtrace: + Pid: 2137, comm: keyctl-session- Not tainted 2.6.36-rc2-cachefs+ #1 + Call Trace: + [] lockdep_rcu_dereference+0xaa/0xb3 + [] keyctl_session_to_parent+0xed/0x236 + [] sys_keyctl+0xb4/0xb6 + [] system_call_fastpath+0x16/0x1b + +The code should take the RCU read lock to make sure the parents credentials +don't go away, even though it's holding a spinlock and has IRQ disabled. + +Signed-off-by: David Howells +Signed-off-by: Linus Torvalds +Cc: dann frazier +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/keyctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -1259,6 +1259,7 @@ long keyctl_session_to_parent(void) + keyring_r = NULL; + + me = current; ++ rcu_read_lock(); + write_lock_irq(&tasklist_lock); + + parent = me->real_parent; +@@ -1313,6 +1314,7 @@ long keyctl_session_to_parent(void) + set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME); + + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + if (oldcred) + put_cred(oldcred); + return 0; +@@ -1321,6 +1323,7 @@ already_same: + ret = 0; + not_permitted: + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + put_cred(cred); + return ret; + diff --git a/queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch b/queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch new file mode 100644 index 00000000000..349bb87d3d3 --- /dev/null +++ b/queue-2.6.32/mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch @@ -0,0 +1,185 @@ +From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001 +From: Christoph Lameter +Date: Thu, 9 Sep 2010 16:38:17 -0700 +Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake + +From: Christoph Lameter + +commit aa45484031ddee09b06350ab8528bfe5b2c76d1c upstream. + +Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is +cheaper than scanning a number of lists. To avoid synchronization +overhead, counter deltas are maintained on a per-cpu basis and drained +both periodically and when the delta is above a threshold. On large CPU +systems, the difference between the estimated and real value of +NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than +number of real free page in buddy, the VM can allocate pages below min +watermark, at worst reducing the real number of pages to zero. Even if +the OOM killer kills some victim for freeing memory, it may not free +memory if the exit path requires a new page resulting in livelock. + +This patch introduces a zone_page_state_snapshot() function (courtesy of +Christoph) that takes a slightly more accurate view of an arbitrary vmstat +counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid +the watermark being accidentally broken. The estimate is not perfect and +may result in cache line bounces but is expected to be lighter than the +IPI calls necessary to continually drain the per-cpu counters while kswapd +is awake. + +Signed-off-by: Christoph Lameter +Signed-off-by: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/mmzone.h | 13 +++++++++++++ + include/linux/vmstat.h | 22 ++++++++++++++++++++++ + mm/mmzone.c | 21 +++++++++++++++++++++ + mm/page_alloc.c | 4 ++-- + mm/vmstat.c | 15 ++++++++++++++- + 5 files changed, 72 insertions(+), 3 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -290,6 +290,13 @@ struct zone { + unsigned long watermark[NR_WMARK]; + + /* ++ * When free pages are below this point, additional steps are taken ++ * when reading the number of free pages to avoid per-cpu counter ++ * drift allowing watermarks to be breached ++ */ ++ unsigned long percpu_drift_mark; ++ ++ /* + * We don't know if the memory that we're going to allocate will be freeable + * or/and it will be released eventually, so to avoid totally wasting several + * GB of ram we must reserve some of the lower zone memory (otherwise we risk +@@ -460,6 +467,12 @@ static inline int zone_is_oom_locked(con + return test_bit(ZONE_OOM_LOCKED, &zone->flags); + } + ++#ifdef CONFIG_SMP ++unsigned long zone_nr_free_pages(struct zone *zone); ++#else ++#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) ++#endif /* CONFIG_SMP */ ++ + /* + * The "priority" of VM scanning is how much of the queues we will scan in one + * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -166,6 +166,28 @@ static inline unsigned long zone_page_st + return x; + } + ++/* ++ * More accurate version that also considers the currently pending ++ * deltas. For that we need to loop over all cpus to find the current ++ * deltas. There is no synchronization so the result cannot be ++ * exactly accurate either. ++ */ ++static inline unsigned long zone_page_state_snapshot(struct zone *zone, ++ enum zone_stat_item item) ++{ ++ long x = atomic_long_read(&zone->vm_stat[item]); ++ ++#ifdef CONFIG_SMP ++ int cpu; ++ for_each_online_cpu(cpu) ++ x += zone_pcp(zone, cpu)->vm_stat_diff[item]; ++ ++ if (x < 0) ++ x = 0; ++#endif ++ return x; ++} ++ + extern unsigned long global_reclaimable_pages(void); + extern unsigned long zone_reclaimable_pages(struct zone *zone); + +--- a/mm/mmzone.c ++++ b/mm/mmzone.c +@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pf + return 1; + } + #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ ++ ++#ifdef CONFIG_SMP ++/* Called when a more accurate view of NR_FREE_PAGES is needed */ ++unsigned long zone_nr_free_pages(struct zone *zone) ++{ ++ unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES); ++ ++ /* ++ * While kswapd is awake, it is considered the zone is under some ++ * memory pressure. Under pressure, there is a risk that ++ * per-cpu-counter-drift will allow the min watermark to be breached ++ * potentially causing a live-lock. While kswapd is awake and ++ * free pages are low, get a better estimate for free pages ++ */ ++ if (nr_free_pages < zone->percpu_drift_mark && ++ !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) ++ return zone_page_state_snapshot(zone, NR_FREE_PAGES); ++ ++ return nr_free_pages; ++} ++#endif /* CONFIG_SMP */ +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1365,7 +1365,7 @@ int zone_watermark_ok(struct zone *z, in + { + /* free_pages my go negative - that's OK */ + long min = mark; +- long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; ++ long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; + int o; + + if (alloc_flags & ALLOC_HIGH) +@@ -2250,7 +2250,7 @@ void show_free_areas(void) + " all_unreclaimable? %s" + "\n", + zone->name, +- K(zone_page_state(zone, NR_FREE_PAGES)), ++ K(zone_nr_free_pages(zone)), + K(min_wmark_pages(zone)), + K(low_wmark_pages(zone)), + K(high_wmark_pages(zone)), +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -136,10 +136,23 @@ static void refresh_zone_stat_thresholds + int threshold; + + for_each_populated_zone(zone) { ++ unsigned long max_drift, tolerate_drift; ++ + threshold = calculate_threshold(zone); + + for_each_online_cpu(cpu) + zone_pcp(zone, cpu)->stat_threshold = threshold; ++ ++ /* ++ * Only set percpu_drift_mark if there is a danger that ++ * NR_FREE_PAGES reports the low watermark is ok when in fact ++ * the min watermark could be breached by an allocation ++ */ ++ tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); ++ max_drift = num_online_cpus() * threshold; ++ if (max_drift > tolerate_drift) ++ zone->percpu_drift_mark = high_wmark_pages(zone) + ++ max_drift; + } + } + +@@ -715,7 +728,7 @@ static void zoneinfo_show_print(struct s + "\n scanned %lu" + "\n spanned %lu" + "\n present %lu", +- zone_page_state(zone, NR_FREE_PAGES), ++ zone_nr_free_pages(zone), + min_wmark_pages(zone), + low_wmark_pages(zone), + high_wmark_pages(zone), diff --git a/queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch b/queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch new file mode 100644 index 00000000000..615cea07fd5 --- /dev/null +++ b/queue-2.6.32/mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch @@ -0,0 +1,79 @@ +From 9ee493ce0a60bf42c0f8fd0b0fe91df5704a1cbf Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 9 Sep 2010 16:38:18 -0700 +Subject: mm: page allocator: drain per-cpu lists after direct reclaim allocation fails + +From: Mel Gorman + +commit 9ee493ce0a60bf42c0f8fd0b0fe91df5704a1cbf upstream. + +When under significant memory pressure, a process enters direct reclaim +and immediately afterwards tries to allocate a page. If it fails and no +further progress is made, it's possible the system will go OOM. However, +on systems with large amounts of memory, it's possible that a significant +number of pages are on per-cpu lists and inaccessible to the calling +process. This leads to a process entering direct reclaim more often than +it should increasing the pressure on the system and compounding the +problem. + +This patch notes that if direct reclaim is making progress but allocations +are still failing that the system is already under heavy pressure. In +this case, it drains the per-cpu lists and tries the allocation a second +time before continuing. + +Signed-off-by: Mel Gorman +Reviewed-by: Minchan Kim +Reviewed-by: KAMEZAWA Hiroyuki +Reviewed-by: KOSAKI Motohiro +Reviewed-by: Christoph Lameter +Cc: Dave Chinner +Cc: Wu Fengguang +Cc: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1681,6 +1681,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m + struct page *page = NULL; + struct reclaim_state reclaim_state; + struct task_struct *p = current; ++ bool drained = false; + + cond_resched(); + +@@ -1699,14 +1700,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m + + cond_resched(); + +- if (order != 0) +- drain_all_pages(); ++ if (unlikely(!(*did_some_progress))) ++ return NULL; + +- if (likely(*did_some_progress)) +- page = get_page_from_freelist(gfp_mask, nodemask, order, ++retry: ++ page = get_page_from_freelist(gfp_mask, nodemask, order, + zonelist, high_zoneidx, + alloc_flags, preferred_zone, + migratetype); ++ ++ /* ++ * If an allocation failed after direct reclaim, it could be because ++ * pages are pinned on the per-cpu lists. Drain them and try again ++ */ ++ if (!page && !drained) { ++ drain_all_pages(); ++ drained = true; ++ goto retry; ++ } ++ + return page; + } + diff --git a/queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch b/queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch new file mode 100644 index 00000000000..23afa014dc0 --- /dev/null +++ b/queue-2.6.32/mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch @@ -0,0 +1,80 @@ +From 72853e2991a2702ae93aaf889ac7db743a415dd3 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 9 Sep 2010 16:38:16 -0700 +Subject: mm: page allocator: update free page counters after pages are placed on the free list + +From: Mel Gorman + +commit 72853e2991a2702ae93aaf889ac7db743a415dd3 upstream. + +When allocating a page, the system uses NR_FREE_PAGES counters to +determine if watermarks would remain intact after the allocation was made. +This check is made without interrupts disabled or the zone lock held and +so is race-prone by nature. Unfortunately, when pages are being freed in +batch, the counters are updated before the pages are added on the list. +During this window, the counters are misleading as the pages do not exist +yet. When under significant pressure on systems with large numbers of +CPUs, it's possible for processes to make progress even though they should +have been stalled. This is particularly problematic if a number of the +processes are using GFP_ATOMIC as the min watermark can be accidentally +breached and in extreme cases, the system can livelock. + +This patch updates the counters after the pages have been added to the +list. This makes the allocator more cautious with respect to preserving +the watermarks and mitigates livelock possibilities. + +[akpm@linux-foundation.org: avoid modifying incoming args] +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Reviewed-by: Minchan Kim +Reviewed-by: KAMEZAWA Hiroyuki +Reviewed-by: Christoph Lameter +Reviewed-by: KOSAKI Motohiro +Acked-by: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -531,13 +531,13 @@ static void free_pcppages_bulk(struct zo + { + int migratetype = 0; + int batch_free = 0; ++ int to_free = count; + + spin_lock(&zone->lock); + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); + zone->pages_scanned = 0; + +- __mod_zone_page_state(zone, NR_FREE_PAGES, count); +- while (count) { ++ while (to_free) { + struct page *page; + struct list_head *list; + +@@ -562,8 +562,9 @@ static void free_pcppages_bulk(struct zo + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ + __free_one_page(page, zone, 0, page_private(page)); + trace_mm_page_pcpu_drain(page, 0, page_private(page)); +- } while (--count && --batch_free && !list_empty(list)); ++ } while (--to_free && --batch_free && !list_empty(list)); + } ++ __mod_zone_page_state(zone, NR_FREE_PAGES, count); + spin_unlock(&zone->lock); + } + +@@ -574,8 +575,8 @@ static void free_one_page(struct zone *z + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); + zone->pages_scanned = 0; + +- __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); + __free_one_page(page, zone, order, migratetype); ++ __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); + spin_unlock(&zone->lock); + } + diff --git a/queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch b/queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch new file mode 100644 index 00000000000..e638cfd2526 --- /dev/null +++ b/queue-2.6.32/oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch @@ -0,0 +1,43 @@ +From c33f543d320843e1732534c3931da4bbd18e6c14 Mon Sep 17 00:00:00 2001 +From: Patrick Simmons +Date: Wed, 8 Sep 2010 10:34:28 -0400 +Subject: oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron 540) + +From: Patrick Simmons + +commit c33f543d320843e1732534c3931da4bbd18e6c14 upstream. + +This patch adds CPU type detection for the Intel Celeron 540, which is +part of the Core 2 family according to Wikipedia; the family and ID pair +is absent from the Volume 3B table referenced in the source code +comments. I have tested this patch on an Intel Celeron 540 machine +reporting itself as Family 6 Model 22, and OProfile runs on the machine +without issue. + +Spec: + + http://download.intel.com/design/mobile/SPECUPDT/317667.pdf + +Signed-off-by: Patrick Simmons +Acked-by: Andi Kleen +Acked-by: Arnd Bergmann +Signed-off-by: Robert Richter +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/oprofile/nmi_int.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/oprofile/nmi_int.c ++++ b/arch/x86/oprofile/nmi_int.c +@@ -621,7 +621,9 @@ static int __init ppro_init(char **cpu_t + case 14: + *cpu_type = "i386/core"; + break; +- case 15: case 23: ++ case 0x0f: ++ case 0x16: ++ case 0x17: + *cpu_type = "i386/core_2"; + break; + case 0x1a: diff --git a/queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch b/queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch new file mode 100644 index 00000000000..29d8e1b9623 --- /dev/null +++ b/queue-2.6.32/percpu-fix-pcpu_last_unit_cpu.patch @@ -0,0 +1,45 @@ +From 46b30ea9bc3698bc1d1e6fd726c9601d46fa0a91 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Tue, 21 Sep 2010 07:57:19 +0200 +Subject: percpu: fix pcpu_last_unit_cpu + +From: Tejun Heo + +commit 46b30ea9bc3698bc1d1e6fd726c9601d46fa0a91 upstream. + +pcpu_first/last_unit_cpu are used to track which cpu has the first and +last units assigned. This in turn is used to determine the span of a +chunk for man/unmap cache flushes and whether an address belongs to +the first chunk or not in per_cpu_ptr_to_phys(). + +When the number of possible CPUs isn't power of two, a chunk may +contain unassigned units towards the end of a chunk. The logic to +determine pcpu_last_unit_cpu was incorrect when there was an unused +unit at the end of a chunk. It failed to ignore the unused unit and +assigned the unused marker NR_CPUS to pcpu_last_unit_cpu. + +This was discovered through kdump failure which was caused by +malfunctioning per_cpu_ptr_to_phys() on a kvm setup with 50 possible +CPUs by CAI Qian. + +Signed-off-by: Tejun Heo +Reported-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + mm/percpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/percpu.c ++++ b/mm/percpu.c +@@ -1702,9 +1702,9 @@ int __init pcpu_setup_first_chunk(const + + if (pcpu_first_unit_cpu == NR_CPUS) + pcpu_first_unit_cpu = cpu; ++ pcpu_last_unit_cpu = cpu; + } + } +- pcpu_last_unit_cpu = cpu; + pcpu_nr_units = unit; + + for_each_possible_cpu(cpu) diff --git a/queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch b/queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch new file mode 100644 index 00000000000..e98c3a218c4 --- /dev/null +++ b/queue-2.6.32/pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch @@ -0,0 +1,66 @@ +From 950eaaca681c44aab87a46225c9e44f902c080aa Mon Sep 17 00:00:00 2001 +From: Paul E. McKenney +Date: Tue, 31 Aug 2010 17:00:18 -0700 +Subject: pid: make setpgid() system call use RCU read-side critical section + +From: Paul E. McKenney + +commit 950eaaca681c44aab87a46225c9e44f902c080aa upstream. + +[ 23.584719] +[ 23.584720] =================================================== +[ 23.585059] [ INFO: suspicious rcu_dereference_check() usage. ] +[ 23.585176] --------------------------------------------------- +[ 23.585176] kernel/pid.c:419 invoked rcu_dereference_check() without protection! +[ 23.585176] +[ 23.585176] other info that might help us debug this: +[ 23.585176] +[ 23.585176] +[ 23.585176] rcu_scheduler_active = 1, debug_locks = 1 +[ 23.585176] 1 lock held by rc.sysinit/728: +[ 23.585176] #0: (tasklist_lock){.+.+..}, at: [] sys_setpgid+0x5f/0x193 +[ 23.585176] +[ 23.585176] stack backtrace: +[ 23.585176] Pid: 728, comm: rc.sysinit Not tainted 2.6.36-rc2 #2 +[ 23.585176] Call Trace: +[ 23.585176] [] lockdep_rcu_dereference+0x99/0xa2 +[ 23.585176] [] find_task_by_pid_ns+0x50/0x6a +[ 23.585176] [] find_task_by_vpid+0x1d/0x1f +[ 23.585176] [] sys_setpgid+0x67/0x193 +[ 23.585176] [] system_call_fastpath+0x16/0x1b +[ 24.959669] type=1400 audit(1282938522.956:4): avc: denied { module_request } for pid=766 comm="hwclock" kmod="char-major-10-135" scontext=system_u:system_r:hwclock_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclas + +It turns out that the setpgid() system call fails to enter an RCU +read-side critical section before doing a PID-to-task_struct translation. +This commit therefore does rcu_read_lock() before the translation, and +also does rcu_read_unlock() after the last use of the returned pointer. + +Reported-by: Andrew Morton +Signed-off-by: Paul E. McKenney +Acked-by: David Howells +Cc: Jiri Slaby +Cc: Oleg Nesterov +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sys.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -962,6 +962,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid + pgid = pid; + if (pgid < 0) + return -EINVAL; ++ rcu_read_lock(); + + /* From this point forward we keep holding onto the tasklist lock + * so that our parent does not change from under us. -DaveM +@@ -1015,6 +1016,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid + out: + /* All paths lead to here, thus we are safe. -DaveM */ + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + return err; + } + diff --git a/queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch b/queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch new file mode 100644 index 00000000000..015d063a3c8 --- /dev/null +++ b/queue-2.6.32/sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch @@ -0,0 +1,46 @@ +From e75e863dd5c7d96b91ebbd241da5328fc38a78cc Mon Sep 17 00:00:00 2001 +From: Stanislaw Gruszka +Date: Tue, 14 Sep 2010 16:35:14 +0200 +Subject: sched: Fix user time incorrectly accounted as system time on 32-bit + +From: Stanislaw Gruszka + +commit e75e863dd5c7d96b91ebbd241da5328fc38a78cc upstream. + +We have 32-bit variable overflow possibility when multiply in +task_times() and thread_group_times() functions. When the +overflow happens then the scaled utime value becomes erroneously +small and the scaled stime becomes i erroneously big. + +Reported here: + + https://bugzilla.redhat.com/show_bug.cgi?id=633037 + https://bugzilla.kernel.org/show_bug.cgi?id=16559 + +Reported-by: Michael Chapman +Reported-by: Ciriaco Garcia de Celis +Signed-off-by: Stanislaw Gruszka +Signed-off-by: Peter Zijlstra +Cc: Hidetoshi Seto +LKML-Reference: <20100914143513.GB8415@redhat.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -5341,9 +5341,9 @@ void thread_group_times(struct task_stru + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); + + if (total) { +- u64 temp; ++ u64 temp = rtime; + +- temp = (u64)(rtime * cputime.utime); ++ temp *= cputime.utime; + do_div(temp, total); + utime = (cputime_t)temp; + } else diff --git a/queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch b/queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch new file mode 100644 index 00000000000..878d94a1aea --- /dev/null +++ b/queue-2.6.32/scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch @@ -0,0 +1,77 @@ +From 2a1b7e575b80ceb19ea50bfa86ce0053ea57181d Mon Sep 17 00:00:00 2001 +From: Ryan Kuester +Date: Mon, 26 Apr 2010 18:11:54 -0500 +Subject: SCSI: mptsas: fix hangs caused by ATA pass-through + +From: Ryan Kuester + +commit 2a1b7e575b80ceb19ea50bfa86ce0053ea57181d upstream. + +I may have an explanation for the LSI 1068 HBA hangs provoked by ATA +pass-through commands, in particular by smartctl. + +First, my version of the symptoms. On an LSI SAS1068E B3 HBA running +01.29.00.00 firmware, with SATA disks, and with smartd running, I'm seeing +occasional task, bus, and host resets, some of which lead to hard faults of +the HBA requiring a reboot. Abusively looping the smartctl command, + + # while true; do smartctl -a /dev/sdb > /dev/null; done + +dramatically increases the frequency of these failures to nearly one per +minute. A high IO load through the HBA while looping smartctl seems to +improve the chance of a full scsi host reset or a non-recoverable hang. + +I reduced what smartctl was doing down to a simple test case which +causes the hang with a single IO when pointed at the sd interface. See +the code at the bottom of this e-mail. It uses an SG_IO ioctl to issue +a single pass-through ATA identify device command. If the buffer +userspace gives for the read data has certain alignments, the task is +issued to the HBA but the HBA fails to respond. If run against the sg +interface, neither the test code nor smartctl causes a hang. + +sd and sg handle the SG_IO ioctl slightly differently. Unless you +specifically set a flag to do direct IO, sg passes a buffer of its own, +which is page-aligned, to the block layer and later copies the result +into the userspace buffer regardless of its alignment. sd, on the other +hand, always does direct IO unless the userspace buffer fails an +alignment test at block/blk-map.c line 57, in which case a page-aligned +buffer is created and used for the transfer. + +The alignment test currently checks for word-alignment, the default +setup by scsi_lib.c; therefore, userspace buffers of almost any +alignment are given directly to the HBA as DMA targets. The LSI 1068 +hardware doesn't seem to like at least a couple of the alignments which +cross a page boundary (see the test code below). Curiously, many +page-boundary-crossing alignments do work just fine. + +So, either the hardware has an bug handling certain alignments or the +hardware has a stricter alignment requirement than the driver is +advertising. If stricter alignment is required, then in no case should +misaligned buffers from userspace be allowed through without being +bounced or at least causing an error to be returned. + +It seems the mptsas driver could use blk_queue_dma_alignment() to advertise +a stricter alignment requirement. If it does, sd does the right thing and +bounces misaligned buffers (see block/blk-map.c line 57). The following +patch to 2.6.34-rc5 makes my symptoms go away. I'm sure this is the wrong +place for this code, but it gets my idea across. + +Acked-by: "Desai, Kashyap" +Signed-off-by: James Bottomley +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/message/fusion/mptscsih.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/message/fusion/mptscsih.c ++++ b/drivers/message/fusion/mptscsih.c +@@ -2439,6 +2439,8 @@ mptscsih_slave_configure(struct scsi_dev + ioc->name,sdev->tagged_supported, sdev->simple_tags, + sdev->ordered_tags)); + ++ blk_queue_dma_alignment (sdev->request_queue, 512 - 1); ++ + return 0; + } + diff --git a/queue-2.6.32/series b/queue-2.6.32/series index 7b15f8f4d1d..f7ebf8d22d3 100644 --- a/queue-2.6.32/series +++ b/queue-2.6.32/series @@ -21,3 +21,30 @@ drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch bonding-correctly-process-non-linear-skbs.patch staging-vt6655-fix-buffer-overflow.patch net-llc-make-opt-unsigned-in-llc_ui_setsockopt.patch +pid-make-setpgid-system-call-use-rcu-read-side-critical-section.patch +sched-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch +oprofile-add-support-for-intel-cpu-family-6-model-22-intel-celeron-540.patch +char-mark-dev-zero-and-dev-kmem-as-not-capable-of-writeback.patch +drivers-pci-intel-iommu.c-fix-build-with-older-gcc-s.patch +drivers-video-sis-sis_main.c-prevent-reading-uninitialized-stack-memory.patch +percpu-fix-pcpu_last_unit_cpu.patch +aio-check-for-multiplication-overflow-in-do_io_submit.patch +inotify-send-in_unmount-events.patch +scsi-mptsas-fix-hangs-caused-by-ata-pass-through.patch +ext4-fix-remaining-racy-updates-of-ext4_i-inode-i_flags.patch +ia64-fix-siglock.patch +ia64-optimize-ticket-spinlocks-in-fsys_rt_sigprocmask.patch +keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent.patch +keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring.patch +xfs-prevent-reading-uninitialized-stack-memory.patch +drivers-video-via-ioctl.c-prevent-reading-uninitialized-stack-memory.patch +acpi-disable-_osi-windows-2009-on-asus-k50ij.patch +bnx2-fix-netpoll-crash.patch +bnx2-fix-hang-during-rmmod-bnx2.patch +at91-change-dma-resource-index.patch +cxgb3-fix-hot-plug-removal-crash.patch +mm-page-allocator-drain-per-cpu-lists-after-direct-reclaim-allocation-fails.patch +mm-page-allocator-calculate-a-better-estimate-of-nr_free_pages-when-memory-is-low-and-kswapd-is-awake.patch +mm-page-allocator-update-free-page-counters-after-pages-are-placed-on-the-free-list.patch +guard-page-for-stacks-that-grow-upwards.patch +fix-unprotected-access-to-task-credentials-in-waitid.patch diff --git a/queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..d7182566bab --- /dev/null +++ b/queue-2.6.32/xfs-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Mon, 6 Sep 2010 18:24:57 -0400 +Subject: xfs: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 upstream. + +The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12 +bytes of uninitialized stack memory, because the fsxattr struct +declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero) +the 12-byte fsx_pad member before copying it back to the user. This +patch takes care of it. + +Signed-off-by: Dan Rosenberg +Reviewed-by: Eric Sandeen +Signed-off-by: Alex Elder +Cc: dann frazier +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/linux-2.6/xfs_ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/xfs/linux-2.6/xfs_ioctl.c ++++ b/fs/xfs/linux-2.6/xfs_ioctl.c +@@ -789,6 +789,8 @@ xfs_ioc_fsgetxattr( + { + struct fsxattr fa; + ++ memset(&fa, 0, sizeof(struct fsxattr)); ++ + xfs_ilock(ip, XFS_ILOCK_SHARED); + fa.fsx_xflags = xfs_ip2xflags(ip); + fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;