From 3627b65b0790d7f3bf27e10c6208aa5fb7758cae Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 2 Oct 2013 19:48:08 -0700 Subject: [PATCH] 3.4-stable patches added patches: alsa-compress-fix-compress-device-unregister.patch hwmon-applesmc-check-key-count-before-proceeding.patch mm-fix-aio-performance-regression-for-database-caused-by-thp.patch mm-memcg-give-exiting-processes-access-to-memory-reserves.patch --- ...press-fix-compress-device-unregister.patch | 34 ++++ ...mc-check-key-count-before-proceeding.patch | 53 ++++++ ...egression-for-database-caused-by-thp.patch | 172 ++++++++++++++++++ ...-processes-access-to-memory-reserves.patch | 69 +++++++ queue-3.4/series | 4 + 5 files changed, 332 insertions(+) create mode 100644 queue-3.4/alsa-compress-fix-compress-device-unregister.patch create mode 100644 queue-3.4/hwmon-applesmc-check-key-count-before-proceeding.patch create mode 100644 queue-3.4/mm-fix-aio-performance-regression-for-database-caused-by-thp.patch create mode 100644 queue-3.4/mm-memcg-give-exiting-processes-access-to-memory-reserves.patch diff --git a/queue-3.4/alsa-compress-fix-compress-device-unregister.patch b/queue-3.4/alsa-compress-fix-compress-device-unregister.patch new file mode 100644 index 00000000000..15a6d6256f1 --- /dev/null +++ b/queue-3.4/alsa-compress-fix-compress-device-unregister.patch @@ -0,0 +1,34 @@ +From 4028b6c4c03f213260e9290ff3a6b5439aad07ce Mon Sep 17 00:00:00 2001 +From: Liam Girdwood +Date: Fri, 13 Sep 2013 17:43:17 +0100 +Subject: ALSA: compress: Fix compress device unregister. + +From: Liam Girdwood + +commit 4028b6c4c03f213260e9290ff3a6b5439aad07ce upstream. + +snd_unregister_device() should return the device type and not stream +direction. + +Signed-off-by: Liam Girdwood +Acked-by: Vinod Koul +Tested-by: Vinod Koul +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/compress_offload.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/sound/core/compress_offload.c ++++ b/sound/core/compress_offload.c +@@ -663,7 +663,8 @@ static int snd_compress_dev_disconnect(s + struct snd_compr *compr; + + compr = device->device_data; +- snd_unregister_device(compr->direction, compr->card, compr->device); ++ snd_unregister_device(SNDRV_DEVICE_TYPE_COMPRESS, compr->card, ++ compr->device); + return 0; + } + diff --git a/queue-3.4/hwmon-applesmc-check-key-count-before-proceeding.patch b/queue-3.4/hwmon-applesmc-check-key-count-before-proceeding.patch new file mode 100644 index 00000000000..ef4475ebc65 --- /dev/null +++ b/queue-3.4/hwmon-applesmc-check-key-count-before-proceeding.patch @@ -0,0 +1,53 @@ +From 5f4513864304672e6ea9eac60583eeac32e679f2 Mon Sep 17 00:00:00 2001 +From: Henrik Rydberg +Date: Thu, 26 Sep 2013 08:33:16 +0200 +Subject: hwmon: (applesmc) Check key count before proceeding + +From: Henrik Rydberg + +commit 5f4513864304672e6ea9eac60583eeac32e679f2 upstream. + +After reports from Chris and Josh Boyer of a rare crash in applesmc, +Guenter pointed at the initialization problem fixed below. The patch +has not been verified to fix the crash, but should be applied +regardless. + +Reported-by: +Suggested-by: Guenter Roeck +Signed-off-by: Henrik Rydberg +Signed-off-by: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hwmon/applesmc.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -489,16 +489,25 @@ static int applesmc_init_smcreg_try(void + { + struct applesmc_registers *s = &smcreg; + bool left_light_sensor, right_light_sensor; ++ unsigned int count; + u8 tmp[1]; + int ret; + + if (s->init_complete) + return 0; + +- ret = read_register_count(&s->key_count); ++ ret = read_register_count(&count); + if (ret) + return ret; + ++ if (s->cache && s->key_count != count) { ++ pr_warn("key count changed from %d to %d\n", ++ s->key_count, count); ++ kfree(s->cache); ++ s->cache = NULL; ++ } ++ s->key_count = count; ++ + if (!s->cache) + s->cache = kcalloc(s->key_count, sizeof(*s->cache), GFP_KERNEL); + if (!s->cache) diff --git a/queue-3.4/mm-fix-aio-performance-regression-for-database-caused-by-thp.patch b/queue-3.4/mm-fix-aio-performance-regression-for-database-caused-by-thp.patch new file mode 100644 index 00000000000..0f3fae00a31 --- /dev/null +++ b/queue-3.4/mm-fix-aio-performance-regression-for-database-caused-by-thp.patch @@ -0,0 +1,172 @@ +From khalid.aziz@oracle.com Wed Oct 2 19:38:33 2013 +From: Khalid Aziz +Date: Mon, 23 Sep 2013 13:54:09 -0600 +Subject: mm: fix aio performance regression for database caused by THP +To: bhutchings@solarflare.com, gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, pshelar@nicira.com, cl@linux.com, aarcange@redhat.com, hannes@cmpxchg.org, mel@csn.ul.ie, riel@redhat.com, minchan@kernel.org, andi@firstfloor.org, akpm@linux-foundation.org, torvalds@linux-foundation.org +Message-ID: <1379966049.30551.9.camel@concerto> + +From: Khalid Aziz + +commit 7cb2ef56e6a8b7b368b2e883a0a47d02fed66911 upstream. + +This patch needed to be backported due to changes to mm/swap.c some time +after 3.6 kernel. + +I am working with a tool that simulates oracle database I/O workload. +This tool (orion to be specific - +) +allocates hugetlbfs pages using shmget() with SHM_HUGETLB flag. It then +does aio into these pages from flash disks using various common block +sizes used by database. I am looking at performance with two of the most +common block sizes - 1M and 64K. aio performance with these two block +sizes plunged after Transparent HugePages was introduced in the kernel. +Here are performance numbers: + + pre-THP 2.6.39 3.11-rc5 +1M read 8384 MB/s 5629 MB/s 6501 MB/s +64K read 7867 MB/s 4576 MB/s 4251 MB/s + +I have narrowed the performance impact down to the overheads introduced by +THP in __get_page_tail() and put_compound_page() routines. perf top shows +>40% of cycles being spent in these two routines. Every time direct I/O +to hugetlbfs pages starts, kernel calls get_page() to grab a reference to +the pages and calls put_page() when I/O completes to put the reference +away. THP introduced significant amount of locking overhead to get_page() +and put_page() when dealing with compound pages because hugepages can be +split underneath get_page() and put_page(). It added this overhead +irrespective of whether it is dealing with hugetlbfs pages or transparent +hugepages. This resulted in 20%-45% drop in aio performance when using +hugetlbfs pages. + +Since hugetlbfs pages can not be split, there is no reason to go through +all the locking overhead for these pages from what I can see. I added +code to __get_page_tail() and put_compound_page() to bypass all the +locking code when working with hugetlbfs pages. This improved performance +significantly. Performance numbers with this patch: + + pre-THP 3.11-rc5 3.11-rc5 + Patch +1M read 8384 MB/s 6501 MB/s 8371 MB/s +64K read 7867 MB/s 4251 MB/s 6510 MB/s + +Performance with 64K read is still lower than what it was before THP, but +still a 53% improvement. It does mean there is more work to be done but I +will take a 53% improvement for now. + +Please take a look at the following patch and let me know if it looks +reasonable. + +[akpm@linux-foundation.org: tweak comments] +Signed-off-by: Khalid Aziz +Cc: Pravin B Shelar +Cc: Christoph Lameter +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Mel Gorman +Cc: Rik van Riel +Cc: Minchan Kim +Cc: Andi Kleen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/swap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 47 insertions(+), 18 deletions(-) + +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -40,6 +40,8 @@ static DEFINE_PER_CPU(struct pagevec[NR_ + static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); + static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); + ++int PageHuge(struct page *page); ++ + /* + * This path almost never happens for VM activity - pages are normally + * freed via pagevecs. But it gets used by networking. +@@ -68,13 +70,26 @@ static void __put_compound_page(struct p + { + compound_page_dtor *dtor; + +- __page_cache_release(page); ++ if (!PageHuge(page)) ++ __page_cache_release(page); + dtor = get_compound_page_dtor(page); + (*dtor)(page); + } + + static void put_compound_page(struct page *page) + { ++ /* ++ * hugetlbfs pages can not be split from under us. So if this ++ * is a hugetlbfs page, check refcount on head page and release ++ * the page if refcount is zero. ++ */ ++ if (PageHuge(page)) { ++ page = compound_head(page); ++ if (put_page_testzero(page)) ++ __put_compound_page(page); ++ return; ++ } ++ + if (unlikely(PageTail(page))) { + /* __split_huge_page_refcount can run under us */ + struct page *page_head = compound_trans_head(page); +@@ -157,26 +172,40 @@ bool __get_page_tail(struct page *page) + * proper PT lock that already serializes against + * split_huge_page(). + */ +- unsigned long flags; + bool got = false; +- struct page *page_head = compound_trans_head(page); + +- if (likely(page != page_head && get_page_unless_zero(page_head))) { +- /* +- * page_head wasn't a dangling pointer but it +- * may not be a head page anymore by the time +- * we obtain the lock. That is ok as long as it +- * can't be freed from under us. +- */ +- flags = compound_lock_irqsave(page_head); +- /* here __split_huge_page_refcount won't run anymore */ +- if (likely(PageTail(page))) { +- __get_page_tail_foll(page, false); +- got = true; ++ /* ++ * If this is a hugetlbfs page, it can not be split under ++ * us. Simply increment counts for tail page and its head page ++ */ ++ if (PageHuge(page)) { ++ struct page *page_head; ++ ++ page_head = compound_head(page); ++ atomic_inc(&page_head->_count); ++ got = true; ++ } else { ++ struct page *page_head = compound_trans_head(page); ++ unsigned long flags; ++ ++ if (likely(page != page_head && ++ get_page_unless_zero(page_head))) { ++ /* ++ * page_head wasn't a dangling pointer but it ++ * may not be a head page anymore by the time ++ * we obtain the lock. That is ok as long as it ++ * can't be freed from under us. ++ */ ++ flags = compound_lock_irqsave(page_head); ++ /* here __split_huge_page_refcount won't run anymore */ ++ if (likely(PageTail(page))) { ++ __get_page_tail_foll(page, false); ++ got = true; ++ } ++ compound_unlock_irqrestore(page_head, flags); ++ if (unlikely(!got)) ++ put_page(page_head); + } +- compound_unlock_irqrestore(page_head, flags); +- if (unlikely(!got)) +- put_page(page_head); + } + return got; + } diff --git a/queue-3.4/mm-memcg-give-exiting-processes-access-to-memory-reserves.patch b/queue-3.4/mm-memcg-give-exiting-processes-access-to-memory-reserves.patch new file mode 100644 index 00000000000..f3656cf9b5b --- /dev/null +++ b/queue-3.4/mm-memcg-give-exiting-processes-access-to-memory-reserves.patch @@ -0,0 +1,69 @@ +From h.huangqiang@huawei.com Wed Oct 2 19:36:01 2013 +From: David Rientjes +Date: Fri, 27 Sep 2013 17:08:49 +0800 +Subject: mm, memcg: give exiting processes access to memory reserves +To: +Cc: Andrew Morton , Michal Hocko , , Li Zefan +Message-ID: <52454B21.3010207@huawei.com> + + +From: David Rientjes + +commit 465adcf1ea7b2e49b2e0899366624f5532b64012 + +A memcg may livelock when oom if the process that grabs the hierarchy's +oom lock is never the first process with PF_EXITING set in the memcg's +task iteration. + +The oom killer, both global and memcg, will defer if it finds an +eligible process that is in the process of exiting and it is not being +ptraced. The idea is to allow it to exit without using memory reserves +before needlessly killing another process. + +This normally works fine except in the memcg case with a large number of +threads attached to the oom memcg. In this case, the memcg oom killer +only gets called for the process that grabs the hierarchy's oom lock; +all others end up blocked on the memcg's oom waitqueue. Thus, if the +process that grabs the hierarchy's oom lock is never the first +PF_EXITING process in the memcg's task iteration, the oom killer is +constantly deferred without anything making progress. + +The fix is to give PF_EXITING processes access to memory reserves so +that we've marked them as oom killed without any iteration. This allows +__mem_cgroup_try_charge() to succeed so that the process may exit. This +makes the memcg oom killer exemption for TIF_MEMDIE tasks, now +immediately granted for processes with pending SIGKILLs and those in the +exit path, to be equivalent to what is done for the global oom killer. + +Signed-off-by: David Rientjes +Acked-by: Michal Hocko +Acked-by: KAMEZAWA Hiroyuki +Acked-by: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[Qiang: backported to 3.4: + - move the changes from memcontrol.c to oom_kill.c] +Signed-off-by: Qiang Huang +Signed-off-by: Greg Kroah-Hartman +--- + mm/oom_kill.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -562,11 +562,11 @@ void mem_cgroup_out_of_memory(struct mem + struct task_struct *p; + + /* +- * If current has a pending SIGKILL, then automatically select it. The +- * goal is to allow it to allocate so that it may quickly exit and free +- * its memory. ++ * If current has a pending SIGKILL or is exiting, then automatically ++ * select it. The goal is to allow it to allocate so that it may ++ * quickly exit and free its memory. + */ +- if (fatal_signal_pending(current)) { ++ if (fatal_signal_pending(current) || current->flags & PF_EXITING) { + set_thread_flag(TIF_MEMDIE); + return; + } diff --git a/queue-3.4/series b/queue-3.4/series index 86951c11d0f..c054f6bc05e 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -9,3 +9,7 @@ dm-snapshot-workaround-for-a-false-positive-lockdep-warning.patch dm-snapshot-fix-performance-degradation-due-to-small-hash-size.patch drm-i915-dp-increase-i2c-over-aux-retry-interval-on-aux-defer.patch drm-radeon-disable-tests-benchmarks-if-accel-is-disabled.patch +hwmon-applesmc-check-key-count-before-proceeding.patch +alsa-compress-fix-compress-device-unregister.patch +mm-memcg-give-exiting-processes-access-to-memory-reserves.patch +mm-fix-aio-performance-regression-for-database-caused-by-thp.patch -- 2.47.2