From dfe4d62a39f58e57ab737c71c1933d549faf1805 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 May 2012 14:27:16 -0700 Subject: [PATCH] 3.3-stable patches added patches: exit_signal-fix-the-parent-has-changed-security-domain-logic.patch exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch sched-fix-nohz-load-accounting-again.patch --- ...nt-has-changed-security-domain-logic.patch | 72 ++++++++++ ...-have-changed-execution-domain-logic.patch | 70 ++++++++++ ...es-used-for-hibernate-thaw-buffering.patch | 97 +++++++++++++ ...n-t-call-of_platform_bus_probe-twice.patch | 99 +++++++++++++ ...sched-fix-nohz-load-accounting-again.patch | 131 ++++++++++++++++++ queue-3.3/series | 5 + 6 files changed, 474 insertions(+) create mode 100644 queue-3.3/exit_signal-fix-the-parent-has-changed-security-domain-logic.patch create mode 100644 queue-3.3/exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch create mode 100644 queue-3.3/pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch create mode 100644 queue-3.3/powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch create mode 100644 queue-3.3/sched-fix-nohz-load-accounting-again.patch diff --git a/queue-3.3/exit_signal-fix-the-parent-has-changed-security-domain-logic.patch b/queue-3.3/exit_signal-fix-the-parent-has-changed-security-domain-logic.patch new file mode 100644 index 00000000000..1b212e0e952 --- /dev/null +++ b/queue-3.3/exit_signal-fix-the-parent-has-changed-security-domain-logic.patch @@ -0,0 +1,72 @@ +From b6e238dceed36891cc633167afe7151f1f3d83c5 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Mon, 19 Mar 2012 17:03:41 +0100 +Subject: exit_signal: fix the "parent has changed security domain" logic + +From: Oleg Nesterov + +commit b6e238dceed36891cc633167afe7151f1f3d83c5 upstream. + +exit_notify() changes ->exit_signal if the parent already did exec. +This doesn't really work, we are not going to send the signal now +if there is another live thread or the exiting task is traced. The +parent can exec before the last dies or the tracer detaches. + +Move this check into do_notify_parent() which actually sends the +signal. + +The user-visible change is that we do not change ->exit_signal, +and thus the exiting task is still "clone children" for +do_wait()->eligible_child(__WCLONE). Hopefully this is fine, the +current logic is racy anyway. + +Signed-off-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 14 -------------- + kernel/signal.c | 9 +++++++++ + 2 files changed, 9 insertions(+), 14 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -818,20 +818,6 @@ static void exit_notify(struct task_stru + if (group_dead) + kill_orphaned_pgrp(tsk->group_leader, NULL); + +- /* Let father know we died +- * +- * Thread signals are configurable, but you aren't going to use +- * that to send signals to arbitrary processes. +- * That stops right now. +- * +- * If the parent exec id doesn't match the exec id we saved +- * when we started then we know the parent has changed security +- * domain. +- */ +- if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && +- tsk->parent_exec_id != tsk->real_parent->self_exec_id) +- tsk->exit_signal = SIGCHLD; +- + if (unlikely(tsk->ptrace)) { + int sig = thread_group_leader(tsk) && + thread_group_empty(tsk) && +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -1642,6 +1642,15 @@ bool do_notify_parent(struct task_struct + BUG_ON(!tsk->ptrace && + (tsk->group_leader != tsk || !thread_group_empty(tsk))); + ++ if (sig != SIGCHLD) { ++ /* ++ * This is only possible if parent == real_parent. ++ * Check if it has changed security domain. ++ */ ++ if (tsk->parent_exec_id != tsk->parent->self_exec_id) ++ sig = SIGCHLD; ++ } ++ + info.si_signo = sig; + info.si_errno = 0; + /* diff --git a/queue-3.3/exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch b/queue-3.3/exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch new file mode 100644 index 00000000000..e0ef0eb4e7d --- /dev/null +++ b/queue-3.3/exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch @@ -0,0 +1,70 @@ +From e636825346b36a07ccfc8e30946d52855e21f681 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Mon, 19 Mar 2012 17:03:22 +0100 +Subject: exit_signal: simplify the "we have changed execution domain" logic + +From: Oleg Nesterov + +commit e636825346b36a07ccfc8e30946d52855e21f681 upstream. + +exit_notify() checks "tsk->self_exec_id != tsk->parent_exec_id" +to handle the "we have changed execution domain" case. + +We can change do_thread() to always set ->exit_signal = SIGCHLD +and remove this check to simplify the code. + +We could change setup_new_exec() instead, this looks more logical +because it increments ->self_exec_id. But note that de_thread() +already resets ->exit_signal if it changes the leader, let's keep +both changes close to each other. + +Note that we change ->exit_signal lockless, this changes the rules. +Thereafter ->exit_signal is not stable under tasklist but this is +fine, the only possible change is OLDSIG -> SIGCHLD. This can race +with eligible_child() but the race is harmless. We can race with +reparent_leader() which changes our ->exit_signal in parallel, but +it does the same change to SIGCHLD. + +The noticeable user-visible change is that the execing task is not +"visible" to do_wait()->eligible_child(__WCLONE) right after exec. +To me this looks more logical, and this is consistent with mt case. + +Signed-off-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 3 +++ + kernel/exit.c | 7 +------ + 2 files changed, 4 insertions(+), 6 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -975,6 +975,9 @@ static int de_thread(struct task_struct + sig->notify_count = 0; + + no_thread_group: ++ /* we have changed execution domain */ ++ tsk->exit_signal = SIGCHLD; ++ + if (current->mm) + setmax_mm_hiwater_rss(&sig->maxrss, current->mm); + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -827,14 +827,9 @@ static void exit_notify(struct task_stru + * If the parent exec id doesn't match the exec id we saved + * when we started then we know the parent has changed security + * domain. +- * +- * If our self_exec id doesn't match our parent_exec_id then +- * we have changed execution domain as these two values started +- * the same after a fork. + */ + if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && +- (tsk->parent_exec_id != tsk->real_parent->self_exec_id || +- tsk->self_exec_id != tsk->parent_exec_id)) ++ tsk->parent_exec_id != tsk->real_parent->self_exec_id) + tsk->exit_signal = SIGCHLD; + + if (unlikely(tsk->ptrace)) { diff --git a/queue-3.3/pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch b/queue-3.3/pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch new file mode 100644 index 00000000000..f78bb097551 --- /dev/null +++ b/queue-3.3/pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch @@ -0,0 +1,97 @@ +From f8262d476823a7ea1eb497ff9676d1eab2393c75 Mon Sep 17 00:00:00 2001 +From: Bojan Smojver +Date: Tue, 24 Apr 2012 23:53:28 +0200 +Subject: PM / Hibernate: fix the number of pages used for hibernate/thaw buffering + +From: Bojan Smojver + +commit f8262d476823a7ea1eb497ff9676d1eab2393c75 upstream. + +Hibernation regression fix, since 3.2. + +Calculate the number of required free pages based on non-high memory +pages only, because that is where the buffers will come from. + +Commit 081a9d043c983f161b78fdc4671324d1342b86bc introduced a new buffer +page allocation logic during hibernation, in order to improve the +performance. The amount of pages allocated was calculated based on total +amount of pages available, although only non-high memory pages are +usable for this purpose. This caused hibernation code to attempt to over +allocate pages on platforms that have high memory, which led to hangs. + +Signed-off-by: Bojan Smojver +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/power/swap.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +--- a/kernel/power/swap.c ++++ b/kernel/power/swap.c +@@ -51,6 +51,23 @@ + + #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) + ++/* ++ * Number of free pages that are not high. ++ */ ++static inline unsigned long low_free_pages(void) ++{ ++ return nr_free_pages() - nr_free_highpages(); ++} ++ ++/* ++ * Number of pages required to be kept free while writing the image. Always ++ * half of all available low pages before the writing starts. ++ */ ++static inline unsigned long reqd_free_pages(void) ++{ ++ return low_free_pages() / 2; ++} ++ + struct swap_map_page { + sector_t entries[MAP_PAGE_ENTRIES]; + sector_t next_swap; +@@ -72,7 +89,7 @@ struct swap_map_handle { + sector_t cur_swap; + sector_t first_sector; + unsigned int k; +- unsigned long nr_free_pages, written; ++ unsigned long reqd_free_pages; + u32 crc32; + }; + +@@ -316,8 +333,7 @@ static int get_swap_writer(struct swap_m + goto err_rel; + } + handle->k = 0; +- handle->nr_free_pages = nr_free_pages() >> 1; +- handle->written = 0; ++ handle->reqd_free_pages = reqd_free_pages(); + handle->first_sector = handle->cur_swap; + return 0; + err_rel: +@@ -352,11 +368,11 @@ static int swap_write_page(struct swap_m + handle->cur_swap = offset; + handle->k = 0; + } +- if (bio_chain && ++handle->written > handle->nr_free_pages) { ++ if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; +- handle->written = 0; ++ handle->reqd_free_pages = reqd_free_pages(); + } + out: + return error; +@@ -618,7 +634,7 @@ static int save_image_lzo(struct swap_ma + * Adjust number of free pages after all allocations have been done. + * We don't want to run out of pages when writing. + */ +- handle->nr_free_pages = nr_free_pages() >> 1; ++ handle->reqd_free_pages = reqd_free_pages(); + + /* + * Start the CRC32 thread. diff --git a/queue-3.3/powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch b/queue-3.3/powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch new file mode 100644 index 00000000000..a1da80f8143 --- /dev/null +++ b/queue-3.3/powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch @@ -0,0 +1,99 @@ +From 8a95bc8dfe06982fc2b8a0a2adda7baa2346a17b Mon Sep 17 00:00:00 2001 +From: Timur Tabi +Date: Wed, 30 Nov 2011 10:19:17 -0600 +Subject: powerpc/85xx: don't call of_platform_bus_probe() twice + +From: Timur Tabi + +commit 8a95bc8dfe06982fc2b8a0a2adda7baa2346a17b upstream. + +Commit 46d026ac ("powerpc/85xx: consolidate of_platform_bus_probe calls") +replaced platform-specific of_device_id tables with a single function +that probes the most of the busses in 85xx device trees. If a specific +platform needed additional busses probed, then it could call +of_platform_bus_probe() again. Typically, the additional platform-specific +busses are children of existing busses that have already been probed. +of_platform_bus_probe() does not handle those child busses automatically. + +Unfortunately, this doesn't actually work. The second (platform-specific) +call to of_platform_bus_probe() never finds any of the busses it's asked +to find. + +To remedy this, the platform-specific of_device_id tables are eliminated, +and their entries are merged into mpc85xx_common_ids[], so that all busses +are probed at once. + +Signed-off-by: Timur Tabi +Signed-off-by: Kumar Gala +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/85xx/common.c | 6 ++++++ + arch/powerpc/platforms/85xx/mpc85xx_mds.c | 11 +---------- + arch/powerpc/platforms/85xx/p1022_ds.c | 13 +------------ + 3 files changed, 8 insertions(+), 22 deletions(-) + +--- a/arch/powerpc/platforms/85xx/common.c ++++ b/arch/powerpc/platforms/85xx/common.c +@@ -21,6 +21,12 @@ static struct of_device_id __initdata mp + { .compatible = "fsl,qe", }, + { .compatible = "fsl,cpm2", }, + { .compatible = "fsl,srio", }, ++ /* So that the DMA channel nodes can be probed individually: */ ++ { .compatible = "fsl,eloplus-dma", }, ++ /* For the PMC driver */ ++ { .compatible = "fsl,mpc8548-guts", }, ++ /* Probably unnecessary? */ ++ { .compatible = "gpio-leds", }, + {}, + }; + +--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c ++++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c +@@ -405,12 +405,6 @@ static int __init board_fixups(void) + machine_arch_initcall(mpc8568_mds, board_fixups); + machine_arch_initcall(mpc8569_mds, board_fixups); + +-static struct of_device_id mpc85xx_ids[] = { +- { .compatible = "fsl,mpc8548-guts", }, +- { .compatible = "gpio-leds", }, +- {}, +-}; +- + static int __init mpc85xx_publish_devices(void) + { + if (machine_is(mpc8568_mds)) +@@ -418,10 +412,7 @@ static int __init mpc85xx_publish_device + if (machine_is(mpc8569_mds)) + simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio"); + +- mpc85xx_common_publish_devices(); +- of_platform_bus_probe(NULL, mpc85xx_ids, NULL); +- +- return 0; ++ return mpc85xx_common_publish_devices(); + } + + machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices); +--- a/arch/powerpc/platforms/85xx/p1022_ds.c ++++ b/arch/powerpc/platforms/85xx/p1022_ds.c +@@ -303,18 +303,7 @@ static void __init p1022_ds_setup_arch(v + pr_info("Freescale P1022 DS reference board\n"); + } + +-static struct of_device_id __initdata p1022_ds_ids[] = { +- /* So that the DMA channel nodes can be probed individually: */ +- { .compatible = "fsl,eloplus-dma", }, +- {}, +-}; +- +-static int __init p1022_ds_publish_devices(void) +-{ +- mpc85xx_common_publish_devices(); +- return of_platform_bus_probe(NULL, p1022_ds_ids, NULL); +-} +-machine_device_initcall(p1022_ds, p1022_ds_publish_devices); ++machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices); + + machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier); + diff --git a/queue-3.3/sched-fix-nohz-load-accounting-again.patch b/queue-3.3/sched-fix-nohz-load-accounting-again.patch new file mode 100644 index 00000000000..43ab7fc07aa --- /dev/null +++ b/queue-3.3/sched-fix-nohz-load-accounting-again.patch @@ -0,0 +1,131 @@ +From c308b56b5398779cd3da0f62ab26b0453494c3d4 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Thu, 1 Mar 2012 15:04:46 +0100 +Subject: sched: Fix nohz load accounting -- again! +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Peter Zijlstra + +commit c308b56b5398779cd3da0f62ab26b0453494c3d4 upstream. + +Various people reported nohz load tracking still being wrecked, but Doug +spotted the actual problem. We fold the nohz remainder in too soon, +causing us to loose samples and under-account. + +So instead of playing catch-up up-front, always do a single load-fold +with whatever state we encounter and only then fold the nohz remainder +and play catch-up. + +Reported-by: Doug Smythies +Reported-by: LesÅ=82aw Kope=C4=87 +Reported-by: Aman Gupta +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/n/tip-4v31etnhgg9kwd6ocgx3rxl8@git.kernel.org +Signed-off-by: Ingo Molnar +Cc: Kerin Millar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 53 +++++++++++++++++++++++++--------------------------- + 1 file changed, 26 insertions(+), 27 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -2266,13 +2266,10 @@ calc_load_n(unsigned long load, unsigned + * Once we've updated the global active value, we need to apply the exponential + * weights adjusted to the number of cycles missed. + */ +-static void calc_global_nohz(unsigned long ticks) ++static void calc_global_nohz(void) + { + long delta, active, n; + +- if (time_before(jiffies, calc_load_update)) +- return; +- + /* + * If we crossed a calc_load_update boundary, make sure to fold + * any pending idle changes, the respective CPUs might have +@@ -2284,31 +2281,25 @@ static void calc_global_nohz(unsigned lo + atomic_long_add(delta, &calc_load_tasks); + + /* +- * If we were idle for multiple load cycles, apply them. ++ * It could be the one fold was all it took, we done! + */ +- if (ticks >= LOAD_FREQ) { +- n = ticks / LOAD_FREQ; ++ if (time_before(jiffies, calc_load_update + 10)) ++ return; + +- active = atomic_long_read(&calc_load_tasks); +- active = active > 0 ? active * FIXED_1 : 0; ++ /* ++ * Catch-up, fold however many we are behind still ++ */ ++ delta = jiffies - calc_load_update - 10; ++ n = 1 + (delta / LOAD_FREQ); + +- avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); +- avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); +- avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); ++ active = atomic_long_read(&calc_load_tasks); ++ active = active > 0 ? active * FIXED_1 : 0; + +- calc_load_update += n * LOAD_FREQ; +- } ++ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); ++ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); ++ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + +- /* +- * Its possible the remainder of the above division also crosses +- * a LOAD_FREQ period, the regular check in calc_global_load() +- * which comes after this will take care of that. +- * +- * Consider us being 11 ticks before a cycle completion, and us +- * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will +- * age us 4 cycles, and the test in calc_global_load() will +- * pick up the final one. +- */ ++ calc_load_update += n * LOAD_FREQ; + } + #else + void calc_load_account_idle(struct rq *this_rq) +@@ -2320,7 +2311,7 @@ static inline long calc_load_fold_idle(v + return 0; + } + +-static void calc_global_nohz(unsigned long ticks) ++static void calc_global_nohz(void) + { + } + #endif +@@ -2348,8 +2339,6 @@ void calc_global_load(unsigned long tick + { + long active; + +- calc_global_nohz(ticks); +- + if (time_before(jiffies, calc_load_update + 10)) + return; + +@@ -2361,6 +2350,16 @@ void calc_global_load(unsigned long tick + avenrun[2] = calc_load(avenrun[2], EXP_15, active); + + calc_load_update += LOAD_FREQ; ++ ++ /* ++ * Account one period with whatever state we found before ++ * folding in the nohz state and ageing the entire idle period. ++ * ++ * This avoids loosing a sample when we go idle between ++ * calc_load_account_active() (10 ticks ago) and now and thus ++ * under-accounting. ++ */ ++ calc_global_nohz(); + } + + /* diff --git a/queue-3.3/series b/queue-3.3/series index 500dc089bbb..7e261a644f0 100644 --- a/queue-3.3/series +++ b/queue-3.3/series @@ -51,3 +51,8 @@ x86-boot-restrict-cflags-for-hostprogs.patch x86-efi-fix-endian-issues-and-unaligned-accesses.patch x86-boot-correct-cflags-for-hostprogs.patch x86-efi-add-dedicated-efi-stub-entry-point.patch +powerpc-85xx-don-t-call-of_platform_bus_probe-twice.patch +pm-hibernate-fix-the-number-of-pages-used-for-hibernate-thaw-buffering.patch +sched-fix-nohz-load-accounting-again.patch +exit_signal-simplify-the-we-have-changed-execution-domain-logic.patch +exit_signal-fix-the-parent-has-changed-security-domain-logic.patch -- 2.47.3