From: Greg Kroah-Hartman Date: Wed, 6 May 2009 06:31:26 +0000 (-0700) Subject: .29 fun X-Git-Tag: v2.6.27.23~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8191bd5b679f21b120537c16cb073ef770f42116;p=thirdparty%2Fkernel%2Fstable-queue.git .29 fun --- diff --git a/queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch b/queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch new file mode 100644 index 00000000000..5d044c36ac6 --- /dev/null +++ b/queue-2.6.29/clockevents-prevent-endless-loop-in-tick_handle_periodic.patch @@ -0,0 +1,53 @@ +From 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 Mon Sep 17 00:00:00 2001 +From: john stultz +Date: Fri, 1 May 2009 13:10:25 -0700 +Subject: clockevents: prevent endless loop in tick_handle_periodic() + +From: john stultz + +commit 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 upstream. + +tick_handle_periodic() can lock up hard when a one shot clock event +device is used in combination with jiffies clocksource. + +Avoid an endless loop issue by requiring that a highres valid +clocksource be installed before we call tick_periodic() in a loop when +using ONESHOT mode. The result is we will only increment jiffies once +per interrupt until a continuous hardware clocksource is available. + +Without this, we can run into a endless loop, where each cycle through +the loop, jiffies is updated which increments time by tick_period or +more (due to clock steering), which can cause the event programming to +think the next event was before the newly incremented time and fail +causing tick_periodic() to be called again and the whole process loops +forever. + +[ Impact: prevent hard lock up ] + +Signed-off-by: John Stultz +Signed-off-by: Andrew Morton +Signed-off-by: Thomas Gleixner +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev) + for (;;) { + if (!clockevents_program_event(dev, next, ktime_get())) + return; +- tick_periodic(cpu); ++ /* ++ * Have to be careful here. If we're in oneshot mode, ++ * before we call tick_periodic() in a loop, we need ++ * to be sure we're using a real hardware clocksource. ++ * Otherwise we could get trapped in an infinite ++ * loop, as the tick_periodic() increments jiffies, ++ * when then will increment time, posibly causing ++ * the loop to trigger again and again. ++ */ ++ if (timekeeping_valid_for_hres()) ++ tick_periodic(cpu); + next = ktime_add(next, tick_period); + } + } diff --git a/queue-2.6.29/committed_as-for-2.6.29.2.patch b/queue-2.6.29/committed_as-for-2.6.29.2.patch new file mode 100644 index 00000000000..dd3faa1b106 --- /dev/null +++ b/queue-2.6.29/committed_as-for-2.6.29.2.patch @@ -0,0 +1,225 @@ +From foo@baz Tue May 5 23:28:23 PDT 2009 +Date: Tue Apr 28 22:48:11 2009 +0200 +To: Greg KH +From: KOSAKI Motohiro +Subject: mm: fix Committed_AS underflow on large NR_CPUS environment + +From: KOSAKI Motohiro + +commit 00a62ce91e554198ef28234c91c36f850f5a3bc9 upstream + +The Committed_AS field can underflow in certain situations: + +> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c +> 1 Committed_AS: 18446744073709323392 kB +> 11 Committed_AS: 18446744073709455488 kB +> 6 Committed_AS: 35136 kB +> 5 Committed_AS: 18446744073709454400 kB +> 7 Committed_AS: 35904 kB +> 3 Committed_AS: 18446744073709453248 kB +> 2 Committed_AS: 34752 kB +> 9 Committed_AS: 18446744073709453248 kB +> 8 Committed_AS: 34752 kB +> 3 Committed_AS: 18446744073709320960 kB +> 7 Committed_AS: 18446744073709454080 kB +> 3 Committed_AS: 18446744073709320960 kB +> 5 Committed_AS: 18446744073709454080 kB +> 6 Committed_AS: 18446744073709320960 kB + +Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does +not check for underflow. + +But NR_CPUS proportional isn't good calculation. In general, +possibility of lock contention is proportional to the number of online +cpus, not theorical maximum cpus (NR_CPUS). + +The current kernel has generic percpu-counter stuff. using it is right +way. it makes code simplify and percpu_counter_read_positive() don't +make underflow issue. + +Reported-by: Dave Hansen +Signed-off-by: KOSAKI Motohiro +Cc: Eric B Munson +Cc: Mel Gorman +Cc: Christoph Lameter +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/meminfo.c | 2 +- + include/linux/mman.h | 9 +++------ + mm/mmap.c | 12 ++++++------ + mm/nommu.c | 13 +++++++------ + mm/swap.c | 46 ---------------------------------------------- + 5 files changed, 17 insertions(+), 65 deletions(-) + +--- a/fs/proc/meminfo.c ++++ b/fs/proc/meminfo.c +@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_ + #define K(x) ((x) << (PAGE_SHIFT - 10)) + si_meminfo(&i); + si_swapinfo(&i); +- committed = atomic_long_read(&vm_committed_space); ++ committed = percpu_counter_read_positive(&vm_committed_as); + allowed = ((totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100) + total_swap_pages; + +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -12,21 +12,18 @@ + + #ifdef __KERNEL__ + #include ++#include + + #include + + extern int sysctl_overcommit_memory; + extern int sysctl_overcommit_ratio; +-extern atomic_long_t vm_committed_space; ++extern struct percpu_counter vm_committed_as; + +-#ifdef CONFIG_SMP +-extern void vm_acct_memory(long pages); +-#else + static inline void vm_acct_memory(long pages) + { +- atomic_long_add(pages, &vm_committed_space); ++ percpu_counter_add(&vm_committed_as, pages); + } +-#endif + + static inline void vm_unacct_memory(long pages) + { +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot); + int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ + int sysctl_overcommit_ratio = 50; /* default is 50% */ + int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; +-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); ++struct percpu_counter vm_committed_as; + + /* + * Check that a process has enough memory to allocate a new virtual +@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct + if (mm) + allowed -= mm->total_vm / 32; + +- /* +- * cast `allowed' as a signed long because vm_committed_space +- * sometimes has a negative value +- */ +- if (atomic_long_read(&vm_committed_space) < (long)allowed) ++ if (percpu_counter_read_positive(&vm_committed_as) < allowed) + return 0; + error: + vm_unacct_memory(pages); +@@ -2477,6 +2473,10 @@ void mm_drop_all_locks(struct mm_struct + */ + void __init mmap_init(void) + { ++ int ret; ++ ++ ret = percpu_counter_init(&vm_committed_as, 0); ++ VM_BUG_ON(ret); + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), 0, + SLAB_PANIC, NULL); +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -62,7 +62,7 @@ void *high_memory; + struct page *mem_map; + unsigned long max_mapnr; + unsigned long num_physpages; +-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); ++struct percpu_counter vm_committed_as; + int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ + int sysctl_overcommit_ratio = 50; /* default is 50% */ + int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; +@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) + */ + void __init mmap_init(void) + { ++ int ret; ++ ++ ret = percpu_counter_init(&vm_committed_as, 0); ++ VM_BUG_ON(ret); + vm_region_jar = kmem_cache_create("vm_region_jar", + sizeof(struct vm_region), 0, + SLAB_PANIC, NULL); +@@ -1849,12 +1853,9 @@ int __vm_enough_memory(struct mm_struct + if (mm) + allowed -= mm->total_vm / 32; + +- /* +- * cast `allowed' as a signed long because vm_committed_space +- * sometimes has a negative value +- */ +- if (atomic_long_read(&vm_committed_space) < (long)allowed) ++ if (percpu_counter_read_positive(&vm_committed_as) < allowed) + return 0; ++ + error: + vm_unacct_memory(pages); + +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -514,49 +514,6 @@ unsigned pagevec_lookup_tag(struct pagev + + EXPORT_SYMBOL(pagevec_lookup_tag); + +-#ifdef CONFIG_SMP +-/* +- * We tolerate a little inaccuracy to avoid ping-ponging the counter between +- * CPUs +- */ +-#define ACCT_THRESHOLD max(16, NR_CPUS * 2) +- +-static DEFINE_PER_CPU(long, committed_space); +- +-void vm_acct_memory(long pages) +-{ +- long *local; +- +- preempt_disable(); +- local = &__get_cpu_var(committed_space); +- *local += pages; +- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { +- atomic_long_add(*local, &vm_committed_space); +- *local = 0; +- } +- preempt_enable(); +-} +- +-#ifdef CONFIG_HOTPLUG_CPU +- +-/* Drop the CPU's cached committed space back into the central pool. */ +-static int cpu_swap_callback(struct notifier_block *nfb, +- unsigned long action, +- void *hcpu) +-{ +- long *committed; +- +- committed = &per_cpu(committed_space, (long)hcpu); +- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { +- atomic_long_add(*committed, &vm_committed_space); +- *committed = 0; +- drain_cpu_pagevecs((long)hcpu); +- } +- return NOTIFY_OK; +-} +-#endif /* CONFIG_HOTPLUG_CPU */ +-#endif /* CONFIG_SMP */ +- + /* + * Perform any setup for the swap system + */ +@@ -577,7 +534,4 @@ void __init swap_setup(void) + * Right now other parts of the system means that we + * _really_ don't want to cluster much more + */ +-#ifdef CONFIG_HOTPLUG_CPU +- hotcpu_notifier(cpu_swap_callback, 0); +-#endif + } diff --git a/queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch b/queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch new file mode 100644 index 00000000000..30f78cc2df0 --- /dev/null +++ b/queue-2.6.29/ignore-madvise-for-hugetlbfs-backed-regions.patch @@ -0,0 +1,47 @@ +From a425a638c858fd10370b573bde81df3ba500e271 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Tue, 5 May 2009 16:37:17 +0100 +Subject: Ignore madvise(MADV_WILLNEED) for hugetlbfs-backed regions + +From: Mel Gorman + +commit a425a638c858fd10370b573bde81df3ba500e271 upstream. + +madvise(MADV_WILLNEED) forces page cache readahead on a range of memory +backed by a file. The assumption is made that the page required is +order-0 and "normal" page cache. + +On hugetlbfs, this assumption is not true and order-0 pages are +allocated and inserted into the hugetlbfs page cache. This leaks +hugetlbfs page reservations and can cause BUGs to trigger related to +corrupted page tables. + +This patch causes MADV_WILLNEED to be ignored for hugetlbfs-backed +regions. + +Signed-off-by: Mel Gorman +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -112,6 +112,14 @@ static long madvise_willneed(struct vm_a + if (!file) + return -EBADF; + ++ /* ++ * Page cache readahead assumes page cache pages are order-0 which ++ * is not the case for hugetlbfs. Do not give a bad return value ++ * but ignore the advice. ++ */ ++ if (vma->vm_flags & VM_HUGETLB) ++ return 0; ++ + if (file->f_mapping->a_ops->get_xip_mem) { + /* no bad return value, but ignore advice */ + return 0; diff --git a/queue-2.6.29/series b/queue-2.6.29/series index 0932fafef6c..5ac8862397c 100644 --- a/queue-2.6.29/series +++ b/queue-2.6.29/series @@ -24,7 +24,6 @@ asoc-fix-offset-of-freqmode-in-wm8580-pll-configuration.patch pci-quirk-disable-msi-on-via-vt3364-chipsets.patch bio-fix-memcpy-corruption-in-bio_copy_user_iov.patch drm-i915-allow-tiled-front-buffers-on-965.patch -mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch pagemap-require-aligned-length-non-null-reads-of-proc-pid-pagemap.patch kbuild-fix-module.markers-permission-error-under-cygwin.patch ptrace-ptrace_attach-fix-the-usage-of-cred_exec_mutex.patch @@ -50,3 +49,6 @@ cs5536-define-dma_sff_read_status-method.patch intel-iommu-fix-device-to-iommu-mapping-for-pci-pci-bridges.patch intel-iommu-fix-oops-in-device_to_iommu-when-devices-not-found.patch intel-iommu-avoid-panic-for-drhd-at-address-zero.patch +clockevents-prevent-endless-loop-in-tick_handle_periodic.patch +ignore-madvise-for-hugetlbfs-backed-regions.patch +committed_as-for-2.6.29.2.patch diff --git a/queue-2.6.29/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch b/rejects/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch similarity index 100% rename from queue-2.6.29/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch rename to rejects/mm-fix-committed_as-underflow-on-large-nr_cpus-environment.patch