]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.29 fun
authorGreg Kroah-Hartman <gregkh@suse.de>
Wed, 6 May 2009 06:31:26 +0000 (23:31 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Wed, 6 May 2009 06:31:26 +0000 (23:31 -0700)
queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch [new file with mode: 0644]
queue-2.6.29/committed_as-for-2.6.29.2.patch [new file with mode: 0644]
queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch [new file with mode: 0644]
queue-2.6.29/series
rejects/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch [moved from queue-2.6.29/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch with 100% similarity]

diff --git a/queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch b/queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch
new file mode 100644 (file)
index 0000000..5d044c3
--- /dev/null
@@ -0,0 +1,53 @@
+From 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 Mon Sep 17 00:00:00 2001
+From: john stultz <johnstul@us.ibm.com>
+Date: Fri, 1 May 2009 13:10:25 -0700
+Subject: clockevents: prevent endless loop in tick_handle_periodic()
+
+From: john stultz <johnstul@us.ibm.com>
+
+commit 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 upstream.
+
+tick_handle_periodic() can lock up hard when a one shot clock event
+device is used in combination with jiffies clocksource.
+
+Avoid an endless loop issue by requiring that a highres valid
+clocksource be installed before we call tick_periodic() in a loop when
+using ONESHOT mode. The result is we will only increment jiffies once
+per interrupt until a continuous hardware clocksource is available.
+
+Without this, we can run into a endless loop, where each cycle through
+the loop, jiffies is updated which increments time by tick_period or
+more (due to clock steering), which can cause the event programming to
+think the next event was before the newly incremented time and fail
+causing tick_periodic() to be called again and the whole process loops
+forever.
+
+[ Impact: prevent hard lock up ]
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
+       for (;;) {
+               if (!clockevents_program_event(dev, next, ktime_get()))
+                       return;
+-              tick_periodic(cpu);
++              /*
++               * Have to be careful here. If we're in oneshot mode,
++               * before we call tick_periodic() in a loop, we need
++               * to be sure we're using a real hardware clocksource.
++               * Otherwise we could get trapped in an infinite
++               * loop, as the tick_periodic() increments jiffies,
++               * when then will increment time, posibly causing
++               * the loop to trigger again and again.
++               */
++              if (timekeeping_valid_for_hres())
++                      tick_periodic(cpu);
+               next = ktime_add(next, tick_period);
+       }
+ }
diff --git a/queue-2.6.29/committed_as-for-2.6.29.2.patch b/queue-2.6.29/committed_as-for-2.6.29.2.patch
new file mode 100644 (file)
index 0000000..dd3faa1
--- /dev/null
@@ -0,0 +1,225 @@
+From foo@baz Tue May  5 23:28:23 PDT 2009
+Date: Tue Apr 28 22:48:11 2009 +0200
+To: Greg KH <greg@kroah.com>
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 00a62ce91e554198ef28234c91c36f850f5a3bc9 upstream
+
+The Committed_AS field can underflow in certain situations:
+
+>         # while true; do cat /proc/meminfo  | grep _AS; sleep 1; done | uniq -c
+>               1 Committed_AS: 18446744073709323392 kB
+>              11 Committed_AS: 18446744073709455488 kB
+>               6 Committed_AS:    35136 kB
+>               5 Committed_AS: 18446744073709454400 kB
+>               7 Committed_AS:    35904 kB
+>               3 Committed_AS: 18446744073709453248 kB
+>               2 Committed_AS:    34752 kB
+>               9 Committed_AS: 18446744073709453248 kB
+>               8 Committed_AS:    34752 kB
+>               3 Committed_AS: 18446744073709320960 kB
+>               7 Committed_AS: 18446744073709454080 kB
+>               3 Committed_AS: 18446744073709320960 kB
+>               5 Committed_AS: 18446744073709454080 kB
+>               6 Committed_AS: 18446744073709320960 kB
+
+Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
+not check for underflow.
+
+But NR_CPUS proportional isn't good calculation.  In general,
+possibility of lock contention is proportional to the number of online
+cpus, not theorical maximum cpus (NR_CPUS).
+
+The current kernel has generic percpu-counter stuff.  using it is right
+way.  it makes code simplify and percpu_counter_read_positive() don't
+make underflow issue.
+
+Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Eric B Munson <ebmunson@us.ibm.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/meminfo.c    |    2 +-
+ include/linux/mman.h |    9 +++------
+ mm/mmap.c            |   12 ++++++------
+ mm/nommu.c           |   13 +++++++------
+ mm/swap.c            |   46 ----------------------------------------------
+ 5 files changed, 17 insertions(+), 65 deletions(-)
+
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+       si_meminfo(&i);
+       si_swapinfo(&i);
+-      committed = atomic_long_read(&vm_committed_space);
++      committed = percpu_counter_read_positive(&vm_committed_as);
+       allowed = ((totalram_pages - hugetlb_total_pages())
+               * sysctl_overcommit_ratio / 100) + total_swap_pages;
+--- a/include/linux/mman.h
++++ b/include/linux/mman.h
+@@ -12,21 +12,18 @@
+ #ifdef __KERNEL__
+ #include <linux/mm.h>
++#include <linux/percpu_counter.h>
+ #include <asm/atomic.h>
+ extern int sysctl_overcommit_memory;
+ extern int sysctl_overcommit_ratio;
+-extern atomic_long_t vm_committed_space;
++extern struct percpu_counter vm_committed_as;
+-#ifdef CONFIG_SMP
+-extern void vm_acct_memory(long pages);
+-#else
+ static inline void vm_acct_memory(long pages)
+ {
+-      atomic_long_add(pages, &vm_committed_space);
++      percpu_counter_add(&vm_committed_as, pages);
+ }
+-#endif
+ static inline void vm_unacct_memory(long pages)
+ {
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50;     /* default is 50% */
+ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ /*
+  * Check that a process has enough memory to allocate a new virtual
+@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct 
+       if (mm)
+               allowed -= mm->total_vm / 32;
+-      /*
+-       * cast `allowed' as a signed long because vm_committed_space
+-       * sometimes has a negative value
+-       */
+-      if (atomic_long_read(&vm_committed_space) < (long)allowed)
++      if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+               return 0;
+ error:
+       vm_unacct_memory(pages);
+@@ -2477,6 +2473,10 @@ void mm_drop_all_locks(struct mm_struct 
+  */
+ void __init mmap_init(void)
+ {
++      int ret;
++
++      ret = percpu_counter_init(&vm_committed_as, 0);
++      VM_BUG_ON(ret);
+       vm_area_cachep = kmem_cache_create("vm_area_struct",
+                       sizeof(struct vm_area_struct), 0,
+                       SLAB_PANIC, NULL);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -62,7 +62,7 @@ void *high_memory;
+ struct page *mem_map;
+ unsigned long max_mapnr;
+ unsigned long num_physpages;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+  */
+ void __init mmap_init(void)
+ {
++      int ret;
++
++      ret = percpu_counter_init(&vm_committed_as, 0);
++      VM_BUG_ON(ret);
+       vm_region_jar = kmem_cache_create("vm_region_jar",
+                                         sizeof(struct vm_region), 0,
+                                         SLAB_PANIC, NULL);
+@@ -1849,12 +1853,9 @@ int __vm_enough_memory(struct mm_struct 
+       if (mm)
+               allowed -= mm->total_vm / 32;
+-      /*
+-       * cast `allowed' as a signed long because vm_committed_space
+-       * sometimes has a negative value
+-       */
+-      if (atomic_long_read(&vm_committed_space) < (long)allowed)
++      if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+               return 0;
++
+ error:
+       vm_unacct_memory(pages);
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -514,49 +514,6 @@ unsigned pagevec_lookup_tag(struct pagev
+ EXPORT_SYMBOL(pagevec_lookup_tag);
+-#ifdef CONFIG_SMP
+-/*
+- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+- * CPUs
+- */
+-#define ACCT_THRESHOLD        max(16, NR_CPUS * 2)
+-
+-static DEFINE_PER_CPU(long, committed_space);
+-
+-void vm_acct_memory(long pages)
+-{
+-      long *local;
+-
+-      preempt_disable();
+-      local = &__get_cpu_var(committed_space);
+-      *local += pages;
+-      if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+-              atomic_long_add(*local, &vm_committed_space);
+-              *local = 0;
+-      }
+-      preempt_enable();
+-}
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-
+-/* Drop the CPU's cached committed space back into the central pool. */
+-static int cpu_swap_callback(struct notifier_block *nfb,
+-                           unsigned long action,
+-                           void *hcpu)
+-{
+-      long *committed;
+-
+-      committed = &per_cpu(committed_space, (long)hcpu);
+-      if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+-              atomic_long_add(*committed, &vm_committed_space);
+-              *committed = 0;
+-              drain_cpu_pagevecs((long)hcpu);
+-      }
+-      return NOTIFY_OK;
+-}
+-#endif /* CONFIG_HOTPLUG_CPU */
+-#endif /* CONFIG_SMP */
+-
+ /*
+  * Perform any setup for the swap system
+  */
+@@ -577,7 +534,4 @@ void __init swap_setup(void)
+        * Right now other parts of the system means that we
+        * _really_ don't want to cluster much more
+        */
+-#ifdef CONFIG_HOTPLUG_CPU
+-      hotcpu_notifier(cpu_swap_callback, 0);
+-#endif
+ }
diff --git a/queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch b/queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch
new file mode 100644 (file)
index 0000000..30f78cc
--- /dev/null
@@ -0,0 +1,47 @@
+From a425a638c858fd10370b573bde81df3ba500e271 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mel@csn.ul.ie>
+Date: Tue, 5 May 2009 16:37:17 +0100
+Subject: Ignore madvise(MADV_WILLNEED) for hugetlbfs-backed regions
+
+From: Mel Gorman <mel@csn.ul.ie>
+
+commit a425a638c858fd10370b573bde81df3ba500e271 upstream.
+
+madvise(MADV_WILLNEED) forces page cache readahead on a range of memory
+backed by a file.  The assumption is made that the page required is
+order-0 and "normal" page cache.
+
+On hugetlbfs, this assumption is not true and order-0 pages are
+allocated and inserted into the hugetlbfs page cache.  This leaks
+hugetlbfs page reservations and can cause BUGs to trigger related to
+corrupted page tables.
+
+This patch causes MADV_WILLNEED to be ignored for hugetlbfs-backed
+regions.
+
+Signed-off-by: Mel Gorman <mel@csn.ul.ie>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/madvise.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -112,6 +112,14 @@ static long madvise_willneed(struct vm_a
+       if (!file)
+               return -EBADF;
++      /*
++       * Page cache readahead assumes page cache pages are order-0 which
++       * is not the case for hugetlbfs. Do not give a bad return value
++       * but ignore the advice.
++       */
++      if (vma->vm_flags & VM_HUGETLB)
++              return 0;
++
+       if (file->f_mapping->a_ops->get_xip_mem) {
+               /* no bad return value, but ignore advice */
+               return 0;
index 0932fafef6c080ed5cce1acd18f224d4e1d60499..5ac8862397cf0e36c9f3272edd3ae20c7e04fa45 100644 (file)
@@ -24,7 +24,6 @@ asoc-fix-offset-of-freqmode-in-wm8580-pll-configuration.patch
 pci-quirk-disable-msi-on-via-vt3364-chipsets.patch
 bio-fix-memcpy-corruption-in-bio_copy_user_iov.patch
 drm-i915-allow-tiled-front-buffers-on-965.patch
-mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch
 pagemap-require-aligned-length-non-null-reads-of-proc-pid-pagemap.patch
 kbuild-fix-module.markers-permission-error-under-cygwin.patch
 ptrace-ptrace_attach-fix-the-usage-of-cred_exec_mutex.patch
@@ -50,3 +49,6 @@ cs5536-define-dma_sff_read_status-method.patch
 intel-iommu-fix-device-to-iommu-mapping-for-pci-pci-bridges.patch
 intel-iommu-fix-oops-in-device_to_iommu-when-devices-not-found.patch
 intel-iommu-avoid-panic-for-drhd-at-address-zero.patch
+clockevents-prevent-endless-loop-in-tick_handle_periodic.patch
+ignore-madvise-for-hugetlbfs-backed-regions.patch
+committed_as-for-2.6.29.2.patch