]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.1 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Fri, 9 Dec 2011 20:15:34 +0000 (12:15 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 9 Dec 2011 20:15:34 +0000 (12:15 -0800)
added patches:
drivers-rtc-rtc-s3c.c-fix-driver-clock-enable-disable-balance-issues.patch
fs-proc-meminfo.c-fix-compilation-error.patch
lockdep-kmemcheck-annotate-lock-in-lockdep_init_map.patch
mm-ensure-that-pfn_valid-is-called-once-per-pageblock-when-reserving-pageblocks.patch
mm-vmalloc-check-for-page-allocation-failure-before-vmlist-insertion.patch
ptp-fix-clock_getres-implementation.patch
thp-add-compound-tail-page-_mapcount-when-mapped.patch
thp-set-compound-tail-page-_count-to-zero.patch

queue-3.1/drivers-rtc-rtc-s3c.c-fix-driver-clock-enable-disable-balance-issues.patch [new file with mode: 0644]
queue-3.1/fs-proc-meminfo.c-fix-compilation-error.patch [new file with mode: 0644]
queue-3.1/lockdep-kmemcheck-annotate-lock-in-lockdep_init_map.patch [new file with mode: 0644]
queue-3.1/mm-ensure-that-pfn_valid-is-called-once-per-pageblock-when-reserving-pageblocks.patch [new file with mode: 0644]
queue-3.1/mm-vmalloc-check-for-page-allocation-failure-before-vmlist-insertion.patch [new file with mode: 0644]
queue-3.1/ptp-fix-clock_getres-implementation.patch [new file with mode: 0644]
queue-3.1/series
queue-3.1/thp-add-compound-tail-page-_mapcount-when-mapped.patch [new file with mode: 0644]
queue-3.1/thp-set-compound-tail-page-_count-to-zero.patch [new file with mode: 0644]

diff --git a/queue-3.1/drivers-rtc-rtc-s3c.c-fix-driver-clock-enable-disable-balance-issues.patch b/queue-3.1/drivers-rtc-rtc-s3c.c-fix-driver-clock-enable-disable-balance-issues.patch
new file mode 100644 (file)
index 0000000..beb563d
--- /dev/null
@@ -0,0 +1,41 @@
+From 2dbcd05f1e9e0932833d16dab1696176fc164b07 Mon Sep 17 00:00:00 2001
+From: Jonghwan Choi <jhbird.choi@samsung.com>
+Date: Thu, 8 Dec 2011 14:34:02 -0800
+Subject: drivers/rtc/rtc-s3c.c: fix driver clock enable/disable balance issues
+
+From: Jonghwan Choi <jhbird.choi@samsung.com>
+
+commit 2dbcd05f1e9e0932833d16dab1696176fc164b07 upstream.
+
+If an error occurs after the clock is enabled, the enable/disable state
+can become unbalanced.
+
+Signed-off-by: Jonghwan Choi <jhbird.choi@samsung.com>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Acked-by: Kukjin Kim <kgene.kim@samsung.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/rtc/rtc-s3c.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/rtc-s3c.c
++++ b/drivers/rtc/rtc-s3c.c
+@@ -202,7 +202,6 @@ static int s3c_rtc_settime(struct device
+       void __iomem *base = s3c_rtc_base;
+       int year = tm->tm_year - 100;
+-      clk_enable(rtc_clk);
+       pr_debug("set time %04d.%02d.%02d %02d:%02d:%02d\n",
+                1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+                tm->tm_hour, tm->tm_min, tm->tm_sec);
+@@ -214,6 +213,7 @@ static int s3c_rtc_settime(struct device
+               return -EINVAL;
+       }
++      clk_enable(rtc_clk);
+       writeb(bin2bcd(tm->tm_sec),  base + S3C2410_RTCSEC);
+       writeb(bin2bcd(tm->tm_min),  base + S3C2410_RTCMIN);
+       writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
diff --git a/queue-3.1/fs-proc-meminfo.c-fix-compilation-error.patch b/queue-3.1/fs-proc-meminfo.c-fix-compilation-error.patch
new file mode 100644 (file)
index 0000000..1d6d288
--- /dev/null
@@ -0,0 +1,41 @@
+From b53fc7c2974a50913f49e1d800fe904a28c338e3 Mon Sep 17 00:00:00 2001
+From: Claudio Scordino <claudio@evidence.eu.com>
+Date: Thu, 8 Dec 2011 14:33:56 -0800
+Subject: fs/proc/meminfo.c: fix compilation error
+
+From: Claudio Scordino <claudio@evidence.eu.com>
+
+commit b53fc7c2974a50913f49e1d800fe904a28c338e3 upstream.
+
+Fix the error message "directives may not be used inside a macro argument"
+which appears when the kernel is compiled for the cris architecture.
+
+Signed-off-by: Claudio Scordino <claudio@evidence.eu.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/meminfo.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -131,12 +131,13 @@ static int meminfo_proc_show(struct seq_
+               K(i.freeswap),
+               K(global_page_state(NR_FILE_DIRTY)),
+               K(global_page_state(NR_WRITEBACK)),
+-              K(global_page_state(NR_ANON_PAGES)
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
++              K(global_page_state(NR_ANON_PAGES)
+                 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+-                HPAGE_PMD_NR
++                HPAGE_PMD_NR),
++#else
++              K(global_page_state(NR_ANON_PAGES)),
+ #endif
+-                ),
+               K(global_page_state(NR_FILE_MAPPED)),
+               K(global_page_state(NR_SHMEM)),
+               K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/queue-3.1/lockdep-kmemcheck-annotate-lock-in-lockdep_init_map.patch b/queue-3.1/lockdep-kmemcheck-annotate-lock-in-lockdep_init_map.patch
new file mode 100644 (file)
index 0000000..c3bf04e
--- /dev/null
@@ -0,0 +1,72 @@
+From a33caeb118198286309859f014c0662f3ed54ed4 Mon Sep 17 00:00:00 2001
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Wed, 9 Nov 2011 16:04:51 +0800
+Subject: lockdep, kmemcheck: Annotate ->lock in lockdep_init_map()
+
+From: Yong Zhang <yong.zhang0@gmail.com>
+
+commit a33caeb118198286309859f014c0662f3ed54ed4 upstream.
+
+Since commit f59de89 ("lockdep: Clear whole lockdep_map on initialization"),
+lockdep_init_map() will clear all the struct. But it will break
+lock_set_class()/lock_set_subclass(). A typical race condition
+is like below:
+
+     CPU A                                   CPU B
+lock_set_subclass(lockA);
+ lock_set_class(lockA);
+   lockdep_init_map(lockA);
+     /* lockA->name is cleared */
+     memset(lockA);
+                                     __lock_acquire(lockA);
+                                       /* lockA->class_cache[] is cleared */
+                                       register_lock_class(lockA);
+                                         look_up_lock_class(lockA);
+                                           WARN_ON_ONCE(class->name !=
+                                                     lock->name);
+
+     lock->name = name;
+
+So restore to what we have done before commit f59de89 but annotate
+->lock with kmemcheck_mark_initialized() to suppress the kmemcheck
+warning reported in commit f59de89.
+
+Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Reported-by: Borislav Petkov <bp@alien8.de>
+Suggested-by: Vegard Nossum <vegard.nossum@gmail.com>
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/20111109080451.GB8124@zhy
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/lockdep.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/kernel/lockdep.c
++++ b/kernel/lockdep.c
+@@ -44,6 +44,7 @@
+ #include <linux/stringify.h>
+ #include <linux/bitops.h>
+ #include <linux/gfp.h>
++#include <linux/kmemcheck.h>
+ #include <asm/sections.h>
+@@ -2874,7 +2875,12 @@ static int mark_lock(struct task_struct
+ void lockdep_init_map(struct lockdep_map *lock, const char *name,
+                     struct lock_class_key *key, int subclass)
+ {
+-      memset(lock, 0, sizeof(*lock));
++      int i;
++
++      kmemcheck_mark_initialized(lock, sizeof(*lock));
++
++      for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
++              lock->class_cache[i] = NULL;
+ #ifdef CONFIG_LOCK_STAT
+       lock->cpu = raw_smp_processor_id();
diff --git a/queue-3.1/mm-ensure-that-pfn_valid-is-called-once-per-pageblock-when-reserving-pageblocks.patch b/queue-3.1/mm-ensure-that-pfn_valid-is-called-once-per-pageblock-when-reserving-pageblocks.patch
new file mode 100644 (file)
index 0000000..9acaffd
--- /dev/null
@@ -0,0 +1,83 @@
+From d021563888312018ca65681096f62e36c20e63cc Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.cz>
+Date: Thu, 8 Dec 2011 14:34:27 -0800
+Subject: mm: Ensure that pfn_valid() is called once per pageblock when reserving pageblocks
+
+From: Michal Hocko <mhocko@suse.cz>
+
+commit d021563888312018ca65681096f62e36c20e63cc upstream.
+
+setup_zone_migrate_reserve() expects that zone->start_pfn starts at
+pageblock_nr_pages aligned pfn otherwise we could access beyond an
+existing memblock resulting in the following panic if
+CONFIG_HOLES_IN_ZONE is not configured and we do not check pfn_valid:
+
+  IP: [<c02d331d>] setup_zone_migrate_reserve+0xcd/0x180
+  *pdpt = 0000000000000000 *pde = f000ff53f000ff53
+  Oops: 0000 [#1] SMP
+  Pid: 1, comm: swapper Not tainted 3.0.7-0.7-pae #1 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform
+  EIP: 0060:[<c02d331d>] EFLAGS: 00010006 CPU: 0
+  EIP is at setup_zone_migrate_reserve+0xcd/0x180
+  EAX: 000c0000 EBX: f5801fc0 ECX: 000c0000 EDX: 00000000
+  ESI: 000c01fe EDI: 000c01fe EBP: 00140000 ESP: f2475f58
+  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
+  Process swapper (pid: 1, ti=f2474000 task=f2472cd0 task.ti=f2474000)
+  Call Trace:
+  [<c02d389c>] __setup_per_zone_wmarks+0xec/0x160
+  [<c02d3a1f>] setup_per_zone_wmarks+0xf/0x20
+  [<c08a771c>] init_per_zone_wmark_min+0x27/0x86
+  [<c020111b>] do_one_initcall+0x2b/0x160
+  [<c086639d>] kernel_init+0xbe/0x157
+  [<c05cae26>] kernel_thread_helper+0x6/0xd
+  Code: a5 39 f5 89 f7 0f 46 fd 39 cf 76 40 8b 03 f6 c4 08 74 32 eb 91 90 89 c8 c1 e8 0e 0f be 80 80 2f 86 c0 8b 14 85 60 2f 86 c0 89 c8 <2b> 82 b4 12 00 00 c1 e0 05 03 82 ac 12 00 00 8b 00 f6 c4 08 0f
+  EIP: [<c02d331d>] setup_zone_migrate_reserve+0xcd/0x180 SS:ESP 0068:f2475f58
+  CR2: 00000000000012b4
+
+We crashed in pageblock_is_reserved() when accessing pfn 0xc0000 because
+highstart_pfn = 0x36ffe.
+
+The issue was introduced in 3.0-rc1 by 6d3163ce ("mm: check if any page
+in a pageblock is reserved before marking it MIGRATE_RESERVE").
+
+Make sure that start_pfn is always aligned to pageblock_nr_pages to
+ensure that pfn_valid s always called at the start of each pageblock.
+Architectures with holes in pageblocks will be correctly handled by
+pfn_valid_within in pageblock_is_reserved.
+
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Tested-by: Dang Bo <bdang@vmware.com>
+Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Arve Hjnnevg <arve@android.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/page_alloc.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3370,9 +3370,15 @@ static void setup_zone_migrate_reserve(s
+       unsigned long block_migratetype;
+       int reserve;
+-      /* Get the start pfn, end pfn and the number of blocks to reserve */
++      /*
++       * Get the start pfn, end pfn and the number of blocks to reserve
++       * We have to be careful to be aligned to pageblock_nr_pages to
++       * make sure that we always check pfn_valid for the first page in
++       * the block.
++       */
+       start_pfn = zone->zone_start_pfn;
+       end_pfn = start_pfn + zone->spanned_pages;
++      start_pfn = roundup(start_pfn, pageblock_nr_pages);
+       reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
+                                                       pageblock_order;
diff --git a/queue-3.1/mm-vmalloc-check-for-page-allocation-failure-before-vmlist-insertion.patch b/queue-3.1/mm-vmalloc-check-for-page-allocation-failure-before-vmlist-insertion.patch
new file mode 100644 (file)
index 0000000..4595bc1
--- /dev/null
@@ -0,0 +1,49 @@
+From 1368edf0647ac112d8cfa6ce47257dc950c50f5c Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Thu, 8 Dec 2011 14:34:30 -0800
+Subject: mm: vmalloc: check for page allocation failure before vmlist insertion
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 1368edf0647ac112d8cfa6ce47257dc950c50f5c upstream.
+
+Commit f5252e00 ("mm: avoid null pointer access in vm_struct via
+/proc/vmallocinfo") adds newly allocated vm_structs to the vmlist after
+it is fully initialised.  Unfortunately, it did not check that
+__vmalloc_area_node() successfully populated the area.  In the event of
+allocation failure, the vmalloc area is freed but the pointer to freed
+memory is inserted into the vmlist leading to a a crash later in
+get_vmalloc_info().
+
+This patch adds a check for ____vmalloc_area_node() failure within
+__vmalloc_node_range.  It does not use "goto fail" as in the previous
+error path as a warning was already displayed by __vmalloc_area_node()
+before it called vfree in its failure path.
+
+Credit goes to Luciano Chavez for doing all the real work of identifying
+exactly where the problem was.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reported-by: Luciano Chavez <lnx1138@linux.vnet.ibm.com>
+Tested-by: Luciano Chavez <lnx1138@linux.vnet.ibm.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1634,6 +1634,8 @@ void *__vmalloc_node_range(unsigned long
+               return NULL;
+       addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
++      if (!addr)
++              return NULL;
+       /*
+        * In this function, newly allocated vm_struct is not added
diff --git a/queue-3.1/ptp-fix-clock_getres-implementation.patch b/queue-3.1/ptp-fix-clock_getres-implementation.patch
new file mode 100644 (file)
index 0000000..49c9577
--- /dev/null
@@ -0,0 +1,34 @@
+From d68fb11c3dae75c8331538dcf083a65e697cc034 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 5 Dec 2011 21:16:06 +0100
+Subject: ptp: Fix clock_getres() implementation
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d68fb11c3dae75c8331538dcf083a65e697cc034 upstream.
+
+The clock_getres() function must return the resolution in the timespec
+argument and return 0 for success.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: John Stultz <john.stultz@linaro.org>
+Cc: Richard Cochran <richard.cochran@omicron.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ptp/ptp_clock.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -101,7 +101,9 @@ static s32 scaled_ppm_to_ppb(long ppm)
+ static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp)
+ {
+-      return 1; /* always round timer functions to one nanosecond */
++      tp->tv_sec = 0;
++      tp->tv_nsec = 1;
++      return 0;
+ }
+ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
index 909d792f9a5dc4055f972bd943f39b13b8505165..c5fb9eb3a5876075f8f6db719dd11d3a86f5f2c4 100644 (file)
@@ -8,3 +8,11 @@ arm-at91-fix-clock-conid-for-atmel_tcb.1-on-9260-9g20.patch
 arm-at91-fix-usb-at91-gadget-registration.patch
 arm-davinci-dm646x-evm-wrong-register-used-in-setup_vpif_input_channel_mode.patch
 asoc-provide-a-more-complete-dma-driver-stub.patch
+drivers-rtc-rtc-s3c.c-fix-driver-clock-enable-disable-balance-issues.patch
+fs-proc-meminfo.c-fix-compilation-error.patch
+thp-add-compound-tail-page-_mapcount-when-mapped.patch
+thp-set-compound-tail-page-_count-to-zero.patch
+lockdep-kmemcheck-annotate-lock-in-lockdep_init_map.patch
+ptp-fix-clock_getres-implementation.patch
+mm-ensure-that-pfn_valid-is-called-once-per-pageblock-when-reserving-pageblocks.patch
+mm-vmalloc-check-for-page-allocation-failure-before-vmlist-insertion.patch
diff --git a/queue-3.1/thp-add-compound-tail-page-_mapcount-when-mapped.patch b/queue-3.1/thp-add-compound-tail-page-_mapcount-when-mapped.patch
new file mode 100644 (file)
index 0000000..6e09015
--- /dev/null
@@ -0,0 +1,67 @@
+From b6999b19120931ede364fa3b685e698a61fed31d Mon Sep 17 00:00:00 2001
+From: Youquan Song <youquan.song@intel.com>
+Date: Thu, 8 Dec 2011 14:34:16 -0800
+Subject: thp: add compound tail page _mapcount when mapped
+
+From: Youquan Song <youquan.song@intel.com>
+
+commit b6999b19120931ede364fa3b685e698a61fed31d upstream.
+
+With the 3.2-rc kernel, IOMMU 2M pages in KVM works.  But when I tried
+to use IOMMU 1GB pages in KVM, I encountered an oops and the 1GB page
+failed to be used.
+
+The root cause is that 1GB page allocation calls gup_huge_pud() while 2M
+page calls gup_huge_pmd.  If compound pages are used and the page is a
+tail page, gup_huge_pmd() increases _mapcount to record tail page are
+mapped while gup_huge_pud does not do that.
+
+So when the mapped page is relesed, it will result in kernel oops
+because the page is not marked mapped.
+
+This patch add tail process for compound page in 1GB huge page which
+keeps the same process as 2M page.
+
+Reproduce like:
+1. Add grub boot option: hugepagesz=1G hugepages=8
+2. mount -t hugetlbfs -o pagesize=1G hugetlbfs /dev/hugepages
+3. qemu-kvm -m 2048 -hda os-kvm.img -cpu kvm64 -smp 4 -mem-path /dev/hugepages
+       -net none -device pci-assign,host=07:00.1
+
+  kernel BUG at mm/swap.c:114!
+  invalid opcode: 0000 [#1] SMP
+  Call Trace:
+    put_page+0x15/0x37
+    kvm_release_pfn_clean+0x31/0x36
+    kvm_iommu_put_pages+0x94/0xb1
+    kvm_iommu_unmap_memslots+0x80/0xb6
+    kvm_assign_device+0xba/0x117
+    kvm_vm_ioctl_assigned_device+0x301/0xa47
+    kvm_vm_ioctl+0x36c/0x3a2
+    do_vfs_ioctl+0x49e/0x4e4
+    sys_ioctl+0x5a/0x7c
+    system_call_fastpath+0x16/0x1b
+  RIP  put_compound_page+0xd4/0x168
+
+Signed-off-by: Youquan Song <youquan.song@intel.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/mm/gup.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -201,6 +201,8 @@ static noinline int gup_huge_pud(pud_t p
+       do {
+               VM_BUG_ON(compound_head(page) != head);
+               pages[*nr] = page;
++              if (PageTail(page))
++                      get_huge_page_tail(page);
+               (*nr)++;
+               page++;
+               refs++;
diff --git a/queue-3.1/thp-set-compound-tail-page-_count-to-zero.patch b/queue-3.1/thp-set-compound-tail-page-_count-to-zero.patch
new file mode 100644 (file)
index 0000000..eb43c86
--- /dev/null
@@ -0,0 +1,66 @@
+From 58a84aa92723d1ac3e1cc4e3b0ff49291663f7e1 Mon Sep 17 00:00:00 2001
+From: Youquan Song <youquan.song@intel.com>
+Date: Thu, 8 Dec 2011 14:34:18 -0800
+Subject: thp: set compound tail page _count to zero
+
+From: Youquan Song <youquan.song@intel.com>
+
+commit 58a84aa92723d1ac3e1cc4e3b0ff49291663f7e1 upstream.
+
+Commit 70b50f94f1644 ("mm: thp: tail page refcounting fix") keeps all
+page_tail->_count zero at all times.  But the current kernel does not
+set page_tail->_count to zero if a 1GB page is utilized.  So when an
+IOMMU 1GB page is used by KVM, it wil result in a kernel oops because a
+tail page's _count does not equal zero.
+
+  kernel BUG at include/linux/mm.h:386!
+  invalid opcode: 0000 [#1] SMP
+  Call Trace:
+    gup_pud_range+0xb8/0x19d
+    get_user_pages_fast+0xcb/0x192
+    ? trace_hardirqs_off+0xd/0xf
+    hva_to_pfn+0x119/0x2f2
+    gfn_to_pfn_memslot+0x2c/0x2e
+    kvm_iommu_map_pages+0xfd/0x1c1
+    kvm_iommu_map_memslots+0x7c/0xbd
+    kvm_iommu_map_guest+0xaa/0xbf
+    kvm_vm_ioctl_assigned_device+0x2ef/0xa47
+    kvm_vm_ioctl+0x36c/0x3a2
+    do_vfs_ioctl+0x49e/0x4e4
+    sys_ioctl+0x5a/0x7c
+    system_call_fastpath+0x16/0x1b
+  RIP  gup_huge_pud+0xf2/0x159
+
+Signed-off-by: Youquan Song <youquan.song@intel.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/hugetlb.c    |    1 +
+ mm/page_alloc.c |    2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(
+       __SetPageHead(page);
+       for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+               __SetPageTail(p);
++              set_page_count(p, 0);
+               p->first_page = page;
+       }
+ }
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -355,8 +355,8 @@ void prep_compound_page(struct page *pag
+       __SetPageHead(page);
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
+-
+               __SetPageTail(p);
++              set_page_count(p, 0);
+               p->first_page = page;
+       }
+ }