From 26378e271143daa232a93d4d0cf878c0bf0a175f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Jan 2013 11:38:00 -0800 Subject: [PATCH] 3.4-stable patches added patches: aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch udf-don-t-increment-lenextents-while-writing-to-a-hole.patch udf-fix-memory-leak-while-allocating-blocks-during-write.patch x86-amd-disable-way-access-filter-on-piledriver-cpus.patch --- ...-call-bdi_init-after-blk_alloc_queue.patch | 73 +++++++++++++++ queue-3.4/series | 5 ++ ...-split-hugepage-for-memcg-oom-on-cow.patch | 90 +++++++++++++++++++ ...t-lenextents-while-writing-to-a-hole.patch | 61 +++++++++++++ ...while-allocating-blocks-during-write.patch | 41 +++++++++ ...way-access-filter-on-piledriver-cpus.patch | 70 +++++++++++++++ 6 files changed, 340 insertions(+) create mode 100644 queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch create mode 100644 queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch create mode 100644 queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch create mode 100644 queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch create mode 100644 queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch diff --git a/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch b/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch new file mode 100644 index 00000000000..d397283ba3e --- /dev/null +++ b/queue-3.4/aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch @@ -0,0 +1,73 @@ +From ecashin@coraid.com Mon Jan 14 11:25:08 2013 +From: Ed Cashin +Date: Sat, 12 Jan 2013 06:43:35 -0500 +Subject: aoe: do not call bdi_init after blk_alloc_queue +To: stable@vger.kernel.org +Cc: ecashin@coraid.com +Message-ID: <05215162ab30011eecf7a5c28ceed233@coraid.com> + +From: Ed Cashin + +commit 0a41409c518083133e79015092585d68915865be upstream, but doesn't +apply, so this version is different for older kernels than 3.7.x + +blk_alloc_queue has already done a bdi_init, so do not bdi_init +again in aoeblk_gdalloc. The extra call causes list corruption +in the per-CPU backing dev info stats lists. + +Affected users see console WARNINGs about list_del corruption on +percpu_counter_destroy when doing "rmmod aoe" or "aoeflush -a" +when AoE targets have been detected and initialized by the +system. + +The patch below applies to v3.6.11, with its v47 aoe driver. It +is expected to apply to all currently maintained stable kernels +except 3.7.y. A related but different fix has been posted for +3.7.y. + +References: + + RedHat bugzilla ticket with original report + https://bugzilla.redhat.com/show_bug.cgi?id=853064 + + LKML discussion of bug and fix + http://thread.gmane.org/gmane.linux.kernel/1416336/focus=1416497 + +Reported-by: Josh Boyer +Signed-off-by: Ed Cashin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/aoe/aoe.h | 2 +- + drivers/block/aoe/aoeblk.c | 5 ----- + 2 files changed, 1 insertion(+), 6 deletions(-) + +--- a/drivers/block/aoe/aoe.h ++++ b/drivers/block/aoe/aoe.h +@@ -1,5 +1,5 @@ + /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ +-#define VERSION "47" ++#define VERSION "47q" + #define AOE_MAJOR 152 + #define DEVICE_NAME "aoe" + +--- a/drivers/block/aoe/aoeblk.c ++++ b/drivers/block/aoe/aoeblk.c +@@ -276,8 +276,6 @@ aoeblk_gdalloc(void *vp) + goto err_mempool; + blk_queue_make_request(d->blkq, aoeblk_make_request); + d->blkq->backing_dev_info.name = "aoe"; +- if (bdi_init(&d->blkq->backing_dev_info)) +- goto err_blkq; + spin_lock_irqsave(&d->lock, flags); + gd->major = AOE_MAJOR; + gd->first_minor = d->sysminor * AOE_PARTITIONS; +@@ -298,9 +296,6 @@ aoeblk_gdalloc(void *vp) + aoedisk_add_sysfs(d); + return; + +-err_blkq: +- blk_cleanup_queue(d->blkq); +- d->blkq = NULL; + err_mempool: + mempool_destroy(d->bufpool); + err_disk: diff --git a/queue-3.4/series b/queue-3.4/series index 6c1314db591..024dea6b476 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -59,3 +59,8 @@ ext4-check-dioread_nolock-on-remount.patch jbd2-fix-assertion-failure-in-jbd2_journal_flush.patch ext4-do-not-try-to-write-superblock-on-ro-remount-w-o-journal.patch ext4-lock-i_mutex-when-truncating-orphan-inodes.patch +aoe-do-not-call-bdi_init-after-blk_alloc_queue.patch +udf-fix-memory-leak-while-allocating-blocks-during-write.patch +udf-don-t-increment-lenextents-while-writing-to-a-hole.patch +thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch +x86-amd-disable-way-access-filter-on-piledriver-cpus.patch diff --git a/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch b/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch new file mode 100644 index 00000000000..418f069f5f8 --- /dev/null +++ b/queue-3.4/thp-memcg-split-hugepage-for-memcg-oom-on-cow.patch @@ -0,0 +1,90 @@ +From 1f1d06c34f7675026326cd9f39ff91e4555cf355 Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Tue, 29 May 2012 15:06:23 -0700 +Subject: thp, memcg: split hugepage for memcg oom on cow + +From: David Rientjes + +commit 1f1d06c34f7675026326cd9f39ff91e4555cf355 upstream. + +On COW, a new hugepage is allocated and charged to the memcg. If the +system is oom or the charge to the memcg fails, however, the fault +handler will return VM_FAULT_OOM which results in an oom kill. + +Instead, it's possible to fallback to splitting the hugepage so that the +COW results only in an order-0 page being allocated and charged to the +memcg which has a higher liklihood to succeed. This is expensive +because the hugepage must be split in the page fault handler, but it is +much better than unnecessarily oom killing a process. + +Signed-off-by: David Rientjes +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Acked-by: KAMEZAWA Hiroyuki +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 3 +++ + mm/memory.c | 18 +++++++++++++++--- + 2 files changed, 18 insertions(+), 3 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -950,6 +950,8 @@ int do_huge_pmd_wp_page(struct mm_struct + count_vm_event(THP_FAULT_FALLBACK); + ret = do_huge_pmd_wp_page_fallback(mm, vma, address, + pmd, orig_pmd, page, haddr); ++ if (ret & VM_FAULT_OOM) ++ split_huge_page(page); + put_page(page); + goto out; + } +@@ -957,6 +959,7 @@ int do_huge_pmd_wp_page(struct mm_struct + + if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { + put_page(new_page); ++ split_huge_page(page); + put_page(page); + ret |= VM_FAULT_OOM; + goto out; +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3494,6 +3494,7 @@ int handle_mm_fault(struct mm_struct *mm + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, flags); + ++retry: + pgd = pgd_offset(mm, address); + pud = pud_alloc(mm, pgd, address); + if (!pud) +@@ -3507,13 +3508,24 @@ int handle_mm_fault(struct mm_struct *mm + pmd, flags); + } else { + pmd_t orig_pmd = *pmd; ++ int ret; ++ + barrier(); + if (pmd_trans_huge(orig_pmd)) { + if (flags & FAULT_FLAG_WRITE && + !pmd_write(orig_pmd) && +- !pmd_trans_splitting(orig_pmd)) +- return do_huge_pmd_wp_page(mm, vma, address, +- pmd, orig_pmd); ++ !pmd_trans_splitting(orig_pmd)) { ++ ret = do_huge_pmd_wp_page(mm, vma, address, pmd, ++ orig_pmd); ++ /* ++ * If COW results in an oom, the huge pmd will ++ * have been split, so retry the fault on the ++ * pte for a smaller charge. ++ */ ++ if (unlikely(ret & VM_FAULT_OOM)) ++ goto retry; ++ return ret; ++ } + return 0; + } + } diff --git a/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch b/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch new file mode 100644 index 00000000000..f444631d732 --- /dev/null +++ b/queue-3.4/udf-don-t-increment-lenextents-while-writing-to-a-hole.patch @@ -0,0 +1,61 @@ +From fb719c59bdb4fca86ee1fd1f42ab3735ca12b6b2 Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Wed, 10 Oct 2012 00:09:12 +0900 +Subject: udf: don't increment lenExtents while writing to a hole + +From: Namjae Jeon + +commit fb719c59bdb4fca86ee1fd1f42ab3735ca12b6b2 upstream. + +Incrementing lenExtents even while writing to a hole is bad +for performance as calls to udf_discard_prealloc and +udf_truncate_tail_extent would not return from start if +isize != lenExtents + +Signed-off-by: Namjae Jeon +Signed-off-by: Ashish Sangwan +Signed-off-by: Jan Kara +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + fs/udf/inode.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -574,6 +574,7 @@ static sector_t inode_getblk(struct inod + struct udf_inode_info *iinfo = UDF_I(inode); + int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; + int lastblock = 0; ++ bool isBeyondEOF; + + *err = 0; + *new = 0; +@@ -653,7 +654,7 @@ static sector_t inode_getblk(struct inod + /* Are we beyond EOF? */ + if (etype == -1) { + int ret; +- ++ isBeyondEOF = 1; + if (count) { + if (c) + laarr[0] = laarr[1]; +@@ -696,6 +697,7 @@ static sector_t inode_getblk(struct inod + endnum = c + 1; + lastblock = 1; + } else { ++ isBeyondEOF = 0; + endnum = startnum = ((count > 2) ? 2 : count); + + /* if the current extent is in position 0, +@@ -743,7 +745,8 @@ static sector_t inode_getblk(struct inod + *err = -ENOSPC; + return 0; + } +- iinfo->i_lenExtents += inode->i_sb->s_blocksize; ++ if (isBeyondEOF) ++ iinfo->i_lenExtents += inode->i_sb->s_blocksize; + } + + /* if the extent the requsted block is located in contains multiple diff --git a/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch b/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch new file mode 100644 index 00000000000..31cb3df85ca --- /dev/null +++ b/queue-3.4/udf-fix-memory-leak-while-allocating-blocks-during-write.patch @@ -0,0 +1,41 @@ +From 2fb7d99d0de3fd8ae869f35ab682581d8455887a Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Wed, 10 Oct 2012 00:08:56 +0900 +Subject: udf: fix memory leak while allocating blocks during write + +From: Namjae Jeon + +commit 2fb7d99d0de3fd8ae869f35ab682581d8455887a upstream. + +Need to brelse the buffer_head stored in cur_epos and next_epos. + +Signed-off-by: Namjae Jeon +Signed-off-by: Ashish Sangwan +Signed-off-by: Jan Kara +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + fs/udf/inode.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -738,6 +738,8 @@ static sector_t inode_getblk(struct inod + goal, err); + if (!newblocknum) { + brelse(prev_epos.bh); ++ brelse(cur_epos.bh); ++ brelse(next_epos.bh); + *err = -ENOSPC; + return 0; + } +@@ -768,6 +770,8 @@ static sector_t inode_getblk(struct inod + udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); + + brelse(prev_epos.bh); ++ brelse(cur_epos.bh); ++ brelse(next_epos.bh); + + newblock = udf_get_pblock(inode->i_sb, newblocknum, + iinfo->i_location.partitionReferenceNum, 0); diff --git a/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch b/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch new file mode 100644 index 00000000000..be9cc2f04d3 --- /dev/null +++ b/queue-3.4/x86-amd-disable-way-access-filter-on-piledriver-cpus.patch @@ -0,0 +1,70 @@ +From 2bbf0a1427c377350f001fbc6260995334739ad7 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Wed, 31 Oct 2012 17:20:50 +0100 +Subject: x86, amd: Disable way access filter on Piledriver CPUs + +From: Andre Przywara + +commit 2bbf0a1427c377350f001fbc6260995334739ad7 upstream. + +The Way Access Filter in recent AMD CPUs may hurt the performance of +some workloads, caused by aliasing issues in the L1 cache. +This patch disables it on the affected CPUs. + +The issue is similar to that one of last year: +http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html +This new patch does not replace the old one, we just need another +quirk for newer CPUs. + +The performance penalty without the patch depends on the +circumstances, but is a bit less than the last year's 3%. + +The workloads affected would be those that access code from the same +physical page under different virtual addresses, so different +processes using the same libraries with ASLR or multiple instances of +PIE-binaries. The code needs to be accessed simultaneously from both +cores of the same compute unit. + +More details can be found here: +http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf + +CPUs affected are anything with the core known as Piledriver. +That includes the new parts of the AMD A-Series (aka Trinity) and the +just released new CPUs of the FX-Series (aka Vishera). +The model numbering is a bit odd here: FX CPUs have model 2, +A-Series has model 10h, with possible extensions to 1Fh. Hence the +range of model ids. + +Signed-off-by: Andre Przywara +Link: http://lkml.kernel.org/r/1351700450-9277-1-git-send-email-osp@andrep.de +Signed-off-by: H. Peter Anvin +Signed-off-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/amd.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -612,6 +612,20 @@ static void __cpuinit init_amd(struct cp + } + } + ++ /* ++ * The way access filter has a performance penalty on some workloads. ++ * Disable it on the affected CPUs. ++ */ ++ if ((c->x86 == 0x15) && ++ (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { ++ u64 val; ++ ++ if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { ++ val |= 0x1E; ++ checking_wrmsrl(0xc0011021, val); ++ } ++ } ++ + cpu_detect_cache_sizes(c); + + /* Multi core CPU? */ -- 2.47.3