--- /dev/null
+From 1431574a1c4c669a0c198e4763627837416e4443 Mon Sep 17 00:00:00 2001
+From: Alexandre Courbot <acourbot@nvidia.com>
+Date: Wed, 11 Sep 2013 14:23:53 -0700
+Subject: lib/decompressors: fix "no limit" output buffer length
+
+From: Alexandre Courbot <acourbot@nvidia.com>
+
+commit 1431574a1c4c669a0c198e4763627837416e4443 upstream.
+
+When decompressing into memory, the output buffer length is set to some
+arbitrarily high value (0x7fffffff) to indicate the output is, virtually,
+unlimited in size.
+
+The problem with this is that some platforms have their physical memory at
+high physical addresses (0x80000000 or more), and that the output buffer
+address and its "unlimited" length cannot be added without overflowing.
+An example of this can be found in inflate_fast():
+
+/* next_out is the output buffer address */
+out = strm->next_out - OFF;
+/* avail_out is the output buffer size. end will overflow if the output
+ * address is >= 0x80000104 */
+end = out + (strm->avail_out - 257);
+
+This has huge consequences on the performance of kernel decompression,
+since the following exit condition of inflate_fast() will be always true:
+
+} while (in < last && out < end);
+
+Indeed, "end" has overflowed and is now always lower than "out". As a
+result, inflate_fast() will return after processing one single byte of
+input data, and will thus need to be called an unreasonably high number of
+times. This probably went unnoticed because kernel decompression is fast
+enough even with this issue.
+
+Nonetheless, adjusting the output buffer length in such a way that the
+above pointer arithmetic never overflows results in a kernel decompression
+that is about 3 times faster on affected machines.
+
+Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+Tested-by: Jon Medhurst <tixy@linaro.org>
+Cc: Stephen Warren <swarren@wwwdotorg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/decompress_inflate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/decompress_inflate.c
++++ b/lib/decompress_inflate.c
+@@ -48,7 +48,7 @@ STATIC int INIT gunzip(unsigned char *bu
+ out_len = 0x8000; /* 32 K */
+ out_buf = malloc(out_len);
+ } else {
+- out_len = 0x7fffffff; /* no limit */
++ out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */
+ }
+ if (!out_buf) {
+ error("Out of memory while allocating output buffer");
--- /dev/null
+From 27c73ae759774e63313c1fbfeb17ba076cea64c5 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Thu, 21 Nov 2013 14:32:02 -0800
+Subject: mm: hugetlbfs: fix hugetlbfs optimization
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 27c73ae759774e63313c1fbfeb17ba076cea64c5 upstream.
+
+Commit 7cb2ef56e6a8 ("mm: fix aio performance regression for database
+caused by THP") can cause dereference of a dangling pointer if
+split_huge_page runs during PageHuge() if there are updates to the
+tail_page->private field.
+
+Also it is repeating compound_head twice for hugetlbfs and it is running
+compound_head+compound_trans_head for THP when a single one is needed in
+both cases.
+
+The new code within the PageSlab() check doesn't need to verify that the
+THP page size is never bigger than the smallest hugetlbfs page size, to
+avoid memory corruption.
+
+A longstanding theoretical race condition was found while fixing the
+above (see the change right after the skip_unlock label, that is
+relevant for the compound_lock path too).
+
+By re-establishing the _mapcount tail refcounting for all compound
+pages, this also fixes the below problem:
+
+ echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ BUG: Bad page state in process bash pfn:59a01
+ page:ffffea000139b038 count:0 mapcount:10 mapping: (null) index:0x0
+ page flags: 0x1c00000000008000(tail)
+ Modules linked in:
+ CPU: 6 PID: 2018 Comm: bash Not tainted 3.12.0+ #25
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+ Call Trace:
+ dump_stack+0x55/0x76
+ bad_page+0xd5/0x130
+ free_pages_prepare+0x213/0x280
+ __free_pages+0x36/0x80
+ update_and_free_page+0xc1/0xd0
+ free_pool_huge_page+0xc2/0xe0
+ set_max_huge_pages.part.58+0x14c/0x220
+ nr_hugepages_store_common.isra.60+0xd0/0xf0
+ nr_hugepages_store+0x13/0x20
+ kobj_attr_store+0xf/0x20
+ sysfs_write_file+0x189/0x1e0
+ vfs_write+0xc5/0x1f0
+ SyS_write+0x55/0xb0
+ system_call_fastpath+0x16/0x1b
+
+Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Tested-by: Khalid Aziz <khalid.aziz@oracle.com>
+Cc: Pravin Shelar <pshelar@nicira.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Ben Hutchings <bhutchings@solarflare.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Guillaume Morin <guillaume@morinfr.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/hugetlb.h | 6 ++
+ mm/hugetlb.c | 17 +++++
+ mm/swap.c | 143 +++++++++++++++++++++++++++---------------------
+ 3 files changed, 106 insertions(+), 60 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_su
+ void hugepage_put_subpool(struct hugepage_subpool *spool);
+
+ int PageHuge(struct page *page);
++int PageHeadHuge(struct page *page_head);
+
+ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+ int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+@@ -97,6 +98,11 @@ static inline int PageHuge(struct page *
+ {
+ return 0;
+ }
++
++static inline int PageHeadHuge(struct page *page_head)
++{
++ return 0;
++}
+
+ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+ {
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -690,6 +690,23 @@ int PageHuge(struct page *page)
+ }
+ EXPORT_SYMBOL_GPL(PageHuge);
+
++/*
++ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
++ * normal or transparent huge pages.
++ */
++int PageHeadHuge(struct page *page_head)
++{
++ compound_page_dtor *dtor;
++
++ if (!PageHead(page_head))
++ return 0;
++
++ dtor = get_compound_page_dtor(page_head);
++
++ return dtor == free_huge_page;
++}
++EXPORT_SYMBOL_GPL(PageHeadHuge);
++
+ pgoff_t __basepage_index(struct page *page)
+ {
+ struct page *page_head = compound_head(page);
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -79,19 +79,6 @@ static void __put_compound_page(struct p
+
+ static void put_compound_page(struct page *page)
+ {
+- /*
+- * hugetlbfs pages cannot be split from under us. If this is a
+- * hugetlbfs page, check refcount on head page and release the page if
+- * the refcount becomes zero.
+- */
+- if (PageHuge(page)) {
+- page = compound_head(page);
+- if (put_page_testzero(page))
+- __put_compound_page(page);
+-
+- return;
+- }
+-
+ if (unlikely(PageTail(page))) {
+ /* __split_huge_page_refcount can run under us */
+ struct page *page_head = compound_trans_head(page);
+@@ -108,14 +95,31 @@ static void put_compound_page(struct pag
+ * still hot on arches that do not support
+ * this_cpu_cmpxchg_double().
+ */
+- if (PageSlab(page_head)) {
+- if (PageTail(page)) {
++ if (PageSlab(page_head) || PageHeadHuge(page_head)) {
++ if (likely(PageTail(page))) {
++ /*
++ * __split_huge_page_refcount
++ * cannot race here.
++ */
++ VM_BUG_ON(!PageHead(page_head));
++ atomic_dec(&page->_mapcount);
+ if (put_page_testzero(page_head))
+ VM_BUG_ON(1);
+-
+- atomic_dec(&page->_mapcount);
+- goto skip_lock_tail;
++ if (put_page_testzero(page_head))
++ __put_compound_page(page_head);
++ return;
+ } else
++ /*
++ * __split_huge_page_refcount
++ * run before us, "page" was a
++ * THP tail. The split
++ * page_head has been freed
++ * and reallocated as slab or
++ * hugetlbfs page of smaller
++ * order (only possible if
++ * reallocated as slab on
++ * x86).
++ */
+ goto skip_lock;
+ }
+ /*
+@@ -129,8 +133,27 @@ static void put_compound_page(struct pag
+ /* __split_huge_page_refcount run before us */
+ compound_unlock_irqrestore(page_head, flags);
+ skip_lock:
+- if (put_page_testzero(page_head))
+- __put_single_page(page_head);
++ if (put_page_testzero(page_head)) {
++ /*
++ * The head page may have been
++ * freed and reallocated as a
++ * compound page of smaller
++ * order and then freed again.
++ * All we know is that it
++ * cannot have become: a THP
++ * page, a compound page of
++ * higher order, a tail page.
++ * That is because we still
++ * hold the refcount of the
++ * split THP tail and
++ * page_head was the THP head
++ * before the split.
++ */
++ if (PageHead(page_head))
++ __put_compound_page(page_head);
++ else
++ __put_single_page(page_head);
++ }
+ out_put_single:
+ if (put_page_testzero(page))
+ __put_single_page(page);
+@@ -152,7 +175,6 @@ out_put_single:
+ VM_BUG_ON(atomic_read(&page->_count) != 0);
+ compound_unlock_irqrestore(page_head, flags);
+
+-skip_lock_tail:
+ if (put_page_testzero(page_head)) {
+ if (PageHead(page_head))
+ __put_compound_page(page_head);
+@@ -195,51 +217,52 @@ bool __get_page_tail(struct page *page)
+ * proper PT lock that already serializes against
+ * split_huge_page().
+ */
++ unsigned long flags;
+ bool got = false;
+- struct page *page_head;
+-
+- /*
+- * If this is a hugetlbfs page it cannot be split under us. Simply
+- * increment refcount for the head page.
+- */
+- if (PageHuge(page)) {
+- page_head = compound_head(page);
+- atomic_inc(&page_head->_count);
+- got = true;
+- } else {
+- unsigned long flags;
+-
+- page_head = compound_trans_head(page);
+- if (likely(page != page_head &&
+- get_page_unless_zero(page_head))) {
+-
+- /* Ref to put_compound_page() comment. */
+- if (PageSlab(page_head)) {
+- if (likely(PageTail(page))) {
+- __get_page_tail_foll(page, false);
+- return true;
+- } else {
+- put_page(page_head);
+- return false;
+- }
+- }
++ struct page *page_head = compound_trans_head(page);
+
+- /*
+- * page_head wasn't a dangling pointer but it
+- * may not be a head page anymore by the time
+- * we obtain the lock. That is ok as long as it
+- * can't be freed from under us.
+- */
+- flags = compound_lock_irqsave(page_head);
+- /* here __split_huge_page_refcount won't run anymore */
++ if (likely(page != page_head && get_page_unless_zero(page_head))) {
++ /* Ref to put_compound_page() comment. */
++ if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+ if (likely(PageTail(page))) {
++ /*
++ * This is a hugetlbfs page or a slab
++ * page. __split_huge_page_refcount
++ * cannot race here.
++ */
++ VM_BUG_ON(!PageHead(page_head));
+ __get_page_tail_foll(page, false);
+- got = true;
+- }
+- compound_unlock_irqrestore(page_head, flags);
+- if (unlikely(!got))
++ return true;
++ } else {
++ /*
++ * __split_huge_page_refcount run
++ * before us, "page" was a THP
++ * tail. The split page_head has been
++ * freed and reallocated as slab or
++ * hugetlbfs page of smaller order
++ * (only possible if reallocated as
++ * slab on x86).
++ */
+ put_page(page_head);
++ return false;
++ }
++ }
++
++ /*
++ * page_head wasn't a dangling pointer but it
++ * may not be a head page anymore by the time
++ * we obtain the lock. That is ok as long as it
++ * can't be freed from under us.
++ */
++ flags = compound_lock_irqsave(page_head);
++ /* here __split_huge_page_refcount won't run anymore */
++ if (likely(PageTail(page))) {
++ __get_page_tail_foll(page, false);
++ got = true;
+ }
++ compound_unlock_irqrestore(page_head, flags);
++ if (unlikely(!got))
++ put_page(page_head);
+ }
+ return got;
+ }