3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)
diff --git a/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch b/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch

new file mode 100644 (file)

index 0000000..d397283
--- /dev/null
+++ b/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch
@@ -0,0 +1,73 @@
+From ecashin@coraid.com  Mon Jan 14 11:25:08 2013
+From: Ed Cashin <ecashin@coraid.com>
+Date: Sat, 12 Jan 2013 06:43:35 -0500
+Subject: aoe: do not call bdi_init after blk_alloc_queue
+To: stable@vger.kernel.org
+Cc: ecashin@coraid.com
+Message-ID: <05215162ab30011eecf7a5c28ceed233@coraid.com>
+
+From: Ed Cashin <ecashin@coraid.com>
+
+commit 0a41409c518083133e79015092585d68915865be upstream, but doesn't
+apply, so this version is different for older kernels than 3.7.x
+
+blk_alloc_queue has already done a bdi_init, so do not bdi_init
+again in aoeblk_gdalloc.  The extra call causes list corruption
+in the per-CPU backing dev info stats lists.
+
+Affected users see console WARNINGs about list_del corruption on
+percpu_counter_destroy when doing "rmmod aoe" or "aoeflush -a"
+when AoE targets have been detected and initialized by the
+system.
+
+The patch below applies to v3.6.11, with its v47 aoe driver.  It
+is expected to apply to all currently maintained stable kernels
+except 3.7.y.  A related but different fix has been posted for
+3.7.y.
+
+References:
+
+  RedHat bugzilla ticket with original report
+  https://bugzilla.redhat.com/show_bug.cgi?id=853064
+
+  LKML discussion of bug and fix
+  http://thread.gmane.org/gmane.linux.kernel/1416336/focus=1416497
+
+Reported-by: Josh Boyer <jwboyer@redhat.com>
+Signed-off-by: Ed Cashin <ecashin@coraid.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/aoe/aoe.h    |    2 +-
+ drivers/block/aoe/aoeblk.c |    5 -----
+ 2 files changed, 1 insertion(+), 6 deletions(-)
+
+--- a/drivers/block/aoe/aoe.h
++++ b/drivers/block/aoe/aoe.h
+@@ -1,5 +1,5 @@
+ /* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+-#define VERSION "47"
++#define VERSION "47q"
+ #define AOE_MAJOR 152
+ #define DEVICE_NAME "aoe"
+ 
+--- a/drivers/block/aoe/aoeblk.c
++++ b/drivers/block/aoe/aoeblk.c
+@@ -276,8 +276,6 @@ aoeblk_gdalloc(void *vp)
+               goto err_mempool;
+       blk_queue_make_request(d->blkq, aoeblk_make_request);
+       d->blkq->backing_dev_info.name = "aoe";
+-      if (bdi_init(&d->blkq->backing_dev_info))
+-              goto err_blkq;
+       spin_lock_irqsave(&d->lock, flags);
+       gd->major = AOE_MAJOR;
+       gd->first_minor = d->sysminor * AOE_PARTITIONS;
+@@ -298,9 +296,6 @@ aoeblk_gdalloc(void *vp)
+       aoedisk_add_sysfs(d);
+       return;
+ 
+-err_blkq:
+-      blk_cleanup_queue(d->blkq);
+-      d->blkq = NULL;
+ err_mempool:
+       mempool_destroy(d->bufpool);
+ err_disk:
diff --git a/queue-3.4/series b/queue-3.4/series

index 6c1314db591dd2f49646f21d2a5d608abd175cb2..024dea6b4767743e10ca3e8ceca477dd2240c5f3 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -59,3 +59,8 @@ ext4-check-dioread_nolock-on-remount.patch
  jbd2-fix-assertion-failure-in-jbd2_journal_flush.patch
  ext4-do-not-try-to-write-superblock-on-ro-remount-w-o-journal.patch
  ext4-lock-i_mutex-when-truncating-orphan-inodes.patch
+aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch
+udf-fix-memory-leak-while-allocating-blocks-during-write.patch
+udf-don-t-increment-lenextents-while-writing-to-a-hole.patch
+thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch
+x86-amd-disable-way-access-filter-on-piledriver-cpus.patch
diff --git a/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch b/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch

new file mode 100644 (file)

index 0000000..418f069
--- /dev/null
+++ b/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch
@@ -0,0 +1,90 @@
+From 1f1d06c34f7675026326cd9f39ff91e4555cf355 Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Tue, 29 May 2012 15:06:23 -0700
+Subject: thp, memcg: split hugepage for memcg oom on cow
+
+From: David Rientjes <rientjes@google.com>
+
+commit 1f1d06c34f7675026326cd9f39ff91e4555cf355 upstream.
+
+On COW, a new hugepage is allocated and charged to the memcg.  If the
+system is oom or the charge to the memcg fails, however, the fault
+handler will return VM_FAULT_OOM which results in an oom kill.
+
+Instead, it's possible to fallback to splitting the hugepage so that the
+COW results only in an order-0 page being allocated and charged to the
+memcg which has a higher liklihood to succeed.  This is expensive
+because the hugepage must be split in the page fault handler, but it is
+much better than unnecessarily oom killing a process.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |    3 +++
+ mm/memory.c      |   18 +++++++++++++++---
+ 2 files changed, 18 insertions(+), 3 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -950,6 +950,8 @@ int do_huge_pmd_wp_page(struct mm_struct
+               count_vm_event(THP_FAULT_FALLBACK);
+               ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
+                                                  pmd, orig_pmd, page, haddr);
++              if (ret & VM_FAULT_OOM)
++                      split_huge_page(page);
+               put_page(page);
+               goto out;
+       }
+@@ -957,6 +959,7 @@ int do_huge_pmd_wp_page(struct mm_struct
+ 
+       if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+               put_page(new_page);
++              split_huge_page(page);
+               put_page(page);
+               ret |= VM_FAULT_OOM;
+               goto out;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3494,6 +3494,7 @@ int handle_mm_fault(struct mm_struct *mm
+       if (unlikely(is_vm_hugetlb_page(vma)))
+               return hugetlb_fault(mm, vma, address, flags);
+ 
++retry:
+       pgd = pgd_offset(mm, address);
+       pud = pud_alloc(mm, pgd, address);
+       if (!pud)
+@@ -3507,13 +3508,24 @@ int handle_mm_fault(struct mm_struct *mm
+                                                         pmd, flags);
+       } else {
+               pmd_t orig_pmd = *pmd;
++              int ret;
++
+               barrier();
+               if (pmd_trans_huge(orig_pmd)) {
+                       if (flags & FAULT_FLAG_WRITE &&
+                           !pmd_write(orig_pmd) &&
+-                          !pmd_trans_splitting(orig_pmd))
+-                              return do_huge_pmd_wp_page(mm, vma, address,
+-                                                         pmd, orig_pmd);
++                          !pmd_trans_splitting(orig_pmd)) {
++                              ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
++                                                        orig_pmd);
++                              /*
++                               * If COW results in an oom, the huge pmd will
++                               * have been split, so retry the fault on the
++                               * pte for a smaller charge.
++                               */
++                              if (unlikely(ret & VM_FAULT_OOM))
++                                      goto retry;
++                              return ret;
++                      }
+                       return 0;
+               }
+       }
diff --git a/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch b/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch

new file mode 100644 (file)

index 0000000..f444631
--- /dev/null
+++ b/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch
@@ -0,0 +1,61 @@
+From fb719c59bdb4fca86ee1fd1f42ab3735ca12b6b2 Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <namjae.jeon@samsung.com>
+Date: Wed, 10 Oct 2012 00:09:12 +0900
+Subject: udf: don't increment lenExtents while writing to a hole
+
+From: Namjae Jeon <namjae.jeon@samsung.com>
+
+commit fb719c59bdb4fca86ee1fd1f42ab3735ca12b6b2 upstream.
+
+Incrementing lenExtents even while writing to a hole is bad
+for performance as calls to udf_discard_prealloc and
+udf_truncate_tail_extent would not return from start if
+isize != lenExtents
+
+Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
+Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Shuah Khan <shuah.khan@hp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/inode.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -574,6 +574,7 @@ static sector_t inode_getblk(struct inod
+       struct udf_inode_info *iinfo = UDF_I(inode);
+       int goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
+       int lastblock = 0;
++      bool isBeyondEOF;
+ 
+       *err = 0;
+       *new = 0;
+@@ -653,7 +654,7 @@ static sector_t inode_getblk(struct inod
+       /* Are we beyond EOF? */
+       if (etype == -1) {
+               int ret;
+-
++              isBeyondEOF = 1;
+               if (count) {
+                       if (c)
+                               laarr[0] = laarr[1];
+@@ -696,6 +697,7 @@ static sector_t inode_getblk(struct inod
+               endnum = c + 1;
+               lastblock = 1;
+       } else {
++              isBeyondEOF = 0;
+               endnum = startnum = ((count > 2) ? 2 : count);
+ 
+               /* if the current extent is in position 0,
+@@ -743,7 +745,8 @@ static sector_t inode_getblk(struct inod
+                       *err = -ENOSPC;
+                       return 0;
+               }
+-              iinfo->i_lenExtents += inode->i_sb->s_blocksize;
++              if (isBeyondEOF)
++                      iinfo->i_lenExtents += inode->i_sb->s_blocksize;
+       }
+ 
+       /* if the extent the requsted block is located in contains multiple
diff --git a/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch b/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch

new file mode 100644 (file)

index 0000000..31cb3df
--- /dev/null
+++ b/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch
@@ -0,0 +1,41 @@
+From 2fb7d99d0de3fd8ae869f35ab682581d8455887a Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <namjae.jeon@samsung.com>
+Date: Wed, 10 Oct 2012 00:08:56 +0900
+Subject: udf: fix memory leak while allocating blocks during write
+
+From: Namjae Jeon <namjae.jeon@samsung.com>
+
+commit 2fb7d99d0de3fd8ae869f35ab682581d8455887a upstream.
+
+Need to brelse the buffer_head stored in cur_epos and next_epos.
+
+Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
+Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Shuah Khan <shuah.khan@hp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -738,6 +738,8 @@ static sector_t inode_getblk(struct inod
+                               goal, err);
+               if (!newblocknum) {
+                       brelse(prev_epos.bh);
++                      brelse(cur_epos.bh);
++                      brelse(next_epos.bh);
+                       *err = -ENOSPC;
+                       return 0;
+               }
+@@ -768,6 +770,8 @@ static sector_t inode_getblk(struct inod
+       udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ 
+       brelse(prev_epos.bh);
++      brelse(cur_epos.bh);
++      brelse(next_epos.bh);
+ 
+       newblock = udf_get_pblock(inode->i_sb, newblocknum,
+                               iinfo->i_location.partitionReferenceNum, 0);
diff --git a/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch b/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch

new file mode 100644 (file)

index 0000000..be9cc2f
--- /dev/null
+++ b/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch
@@ -0,0 +1,70 @@
+From 2bbf0a1427c377350f001fbc6260995334739ad7 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@amd.com>
+Date: Wed, 31 Oct 2012 17:20:50 +0100
+Subject: x86, amd: Disable way access filter on Piledriver CPUs
+
+From: Andre Przywara <andre.przywara@amd.com>
+
+commit 2bbf0a1427c377350f001fbc6260995334739ad7 upstream.
+
+The Way Access Filter in recent AMD CPUs may hurt the performance of
+some workloads, caused by aliasing issues in the L1 cache.
+This patch disables it on the affected CPUs.
+
+The issue is similar to that one of last year:
+http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html
+This new patch does not replace the old one, we just need another
+quirk for newer CPUs.
+
+The performance penalty without the patch depends on the
+circumstances, but is a bit less than the last year's 3%.
+
+The workloads affected would be those that access code from the same
+physical page under different virtual addresses, so different
+processes using the same libraries with ASLR or multiple instances of
+PIE-binaries. The code needs to be accessed simultaneously from both
+cores of the same compute unit.
+
+More details can be found here:
+http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf
+
+CPUs affected are anything with the core known as Piledriver.
+That includes the new parts of the AMD A-Series (aka Trinity) and the
+just released new CPUs of the FX-Series (aka Vishera).
+The model numbering is a bit odd here: FX CPUs have model 2,
+A-Series has model 10h, with possible extensions to 1Fh. Hence the
+range of model ids.
+
+Signed-off-by: Andre Przywara <osp@andrep.de>
+Link: http://lkml.kernel.org/r/1351700450-9277-1-git-send-email-osp@andrep.de
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/amd.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -612,6 +612,20 @@ static void __cpuinit init_amd(struct cp
+               }
+       }
+ 
++      /*
++       * The way access filter has a performance penalty on some workloads.
++       * Disable it on the affected CPUs.
++       */
++      if ((c->x86 == 0x15) &&
++          (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
++              u64 val;
++
++              if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
++                      val |= 0x1E;
++                      checking_wrmsrl(0xc0011021, val);
++              }
++      }
++
+       cpu_detect_cache_sizes(c);
+ 
+       /* Multi core CPU? */
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 14 Jan 2013 19:38:00 +0000 (11:38 -0800)
queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history
queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch	[new file with mode: 0644]	patch \| blob