From: Greg Kroah-Hartman Date: Mon, 10 Nov 2014 04:19:43 +0000 (+0900) Subject: 3.14-stable patches X-Git-Tag: v3.10.60~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8ad83e7003be2a55c93ec1fee0fd2ed8e3c122c0;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: freezer-do-not-freeze-tasks-killed-by-oom-killer.patch intel_pstate-correct-byt-vid-values.patch intel_pstate-fix-byt-frequency-reporting.patch oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch --- diff --git a/queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch b/queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch new file mode 100644 index 00000000000..45c3bcc32fd --- /dev/null +++ b/queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch @@ -0,0 +1,54 @@ +From 51fae6da640edf9d266c94f36bc806c63c301991 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Tue, 21 Oct 2014 09:27:12 +0200 +Subject: freezer: Do not freeze tasks killed by OOM killer + +From: Cong Wang + +commit 51fae6da640edf9d266c94f36bc806c63c301991 upstream. + +Since f660daac474c6f (oom: thaw threads if oom killed thread is frozen +before deferring) OOM killer relies on being able to thaw a frozen task +to handle OOM situation but a3201227f803 (freezer: make freezing() test +freeze conditions in effect instead of TIF_FREEZE) has reorganized the +code and stopped clearing freeze flag in __thaw_task. This means that +the target task only wakes up and goes into the fridge again because the +freezing condition hasn't changed for it. This reintroduces the bug +fixed by f660daac474c6f. + +Fix the issue by checking for TIF_MEMDIE thread flag in +freezing_slow_path and exclude the task from freezing completely. If a +task was already frozen it would get woken by __thaw_task from OOM killer +and get out of freezer after rechecking freezing(). + +Changes since v1 +- put TIF_MEMDIE check into freezing_slowpath rather than in __refrigerator + as per Oleg +- return __thaw_task into oom_scan_process_thread because + oom_kill_process will not wake task in the fridge because it is + sleeping uninterruptible + +[mhocko@suse.cz: rewrote the changelog] +Fixes: a3201227f803 (freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE) +Signed-off-by: Cong Wang +Signed-off-by: Michal Hocko +Acked-by: Oleg Nesterov +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/freezer.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/freezer.c ++++ b/kernel/freezer.c +@@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_stru + if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) + return false; + ++ if (test_thread_flag(TIF_MEMDIE)) ++ return false; ++ + if (pm_nosig_freezing || cgroup_freezing(p)) + return true; + diff --git a/queue-3.14/intel_pstate-correct-byt-vid-values.patch b/queue-3.14/intel_pstate-correct-byt-vid-values.patch new file mode 100644 index 00000000000..1660c7299e6 --- /dev/null +++ b/queue-3.14/intel_pstate-correct-byt-vid-values.patch @@ -0,0 +1,55 @@ +From d022a65ed2473fac4a600e3424503dc571160a3e Mon Sep 17 00:00:00 2001 +From: Dirk Brandewie +Date: Mon, 13 Oct 2014 08:37:44 -0700 +Subject: intel_pstate: Correct BYT VID values. + +From: Dirk Brandewie + +commit d022a65ed2473fac4a600e3424503dc571160a3e upstream. + +Using a VID value that is not high enough for the requested P state can +cause machine checks. Add a ceiling function to ensure calulated VIDs +with fractional values are set to the next highest integer VID value. + +The algorythm for calculating the non-trubo VID from the BIOS writers +guide is: + vid_ratio = (vid_max - vid_min) / (max_pstate - min_pstate) + vid = ceiling(vid_min + (req_pstate - min_pstate) * vid_ratio) + +Signed-off-by: Dirk Brandewie +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/intel_pstate.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -55,6 +55,17 @@ static inline int32_t div_fp(int32_t x, + return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); + } + ++static inline int ceiling_fp(int32_t x) ++{ ++ int mask, ret; ++ ++ ret = fp_toint(x); ++ mask = (1 << FRAC_BITS) - 1; ++ if (x & mask) ++ ret += 1; ++ return ret; ++} ++ + struct sample { + int32_t core_pct_busy; + u64 aperf; +@@ -399,7 +410,7 @@ static void byt_set_pstate(struct cpudat + cpudata->vid.ratio); + + vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); +- vid = fp_toint(vid_fp); ++ vid = ceiling_fp(vid_fp); + + if (pstate > cpudata->pstate.max_pstate) + vid = cpudata->vid.turbo; diff --git a/queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch b/queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch new file mode 100644 index 00000000000..c677f23b180 --- /dev/null +++ b/queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch @@ -0,0 +1,146 @@ +From b27580b05e6f5253228debc60b8ff4a786ff573a Mon Sep 17 00:00:00 2001 +From: Dirk Brandewie +Date: Mon, 13 Oct 2014 08:37:43 -0700 +Subject: intel_pstate: Fix BYT frequency reporting + +From: Dirk Brandewie + +commit b27580b05e6f5253228debc60b8ff4a786ff573a upstream. + +BYT has a different conversion from P state to frequency than the core +processors. This causes the min/max and current frequency to be +misreported on some BYT SKUs. Tested on BYT N2820, Ivybridge and +Haswell processors. + +Link: https://bugzilla.yoctoproject.org/show_bug.cgi?id=6663 +Signed-off-by: Dirk Brandewie +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/intel_pstate.c | 42 +++++++++++++++++++++++++++++++++++------ + 1 file changed, 36 insertions(+), 6 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -67,6 +67,7 @@ struct pstate_data { + int current_pstate; + int min_pstate; + int max_pstate; ++ int scaling; + int turbo_pstate; + }; + +@@ -118,6 +119,7 @@ struct pstate_funcs { + int (*get_max)(void); + int (*get_min)(void); + int (*get_turbo)(void); ++ int (*get_scaling)(void); + void (*set)(struct cpudata*, int pstate); + void (*get_vid)(struct cpudata *); + }; +@@ -407,6 +409,22 @@ static void byt_set_pstate(struct cpudat + wrmsrl(MSR_IA32_PERF_CTL, val); + } + ++#define BYT_BCLK_FREQS 5 ++static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; ++ ++static int byt_get_scaling(void) ++{ ++ u64 value; ++ int i; ++ ++ rdmsrl(MSR_FSB_FREQ, value); ++ i = value & 0x3; ++ ++ BUG_ON(i > BYT_BCLK_FREQS); ++ ++ return byt_freq_table[i] * 100; ++} ++ + static void byt_get_vid(struct cpudata *cpudata) + { + u64 value; +@@ -451,6 +469,11 @@ static int core_get_turbo_pstate(void) + return ret; + } + ++static inline int core_get_scaling(void) ++{ ++ return 100000; ++} ++ + static void core_set_pstate(struct cpudata *cpudata, int pstate) + { + u64 val; +@@ -475,6 +498,7 @@ static struct cpu_defaults core_params = + .get_max = core_get_max_pstate, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, ++ .get_scaling = core_get_scaling, + .set = core_set_pstate, + }, + }; +@@ -493,6 +517,7 @@ static struct cpu_defaults byt_params = + .get_min = byt_get_min_pstate, + .get_turbo = byt_get_turbo_pstate, + .set = byt_set_pstate, ++ .get_scaling = byt_get_scaling, + .get_vid = byt_get_vid, + }, + }; +@@ -526,7 +551,7 @@ static void intel_pstate_set_pstate(stru + if (pstate == cpu->pstate.current_pstate) + return; + +- trace_cpu_frequency(pstate * 100000, cpu->cpu); ++ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + + cpu->pstate.current_pstate = pstate; + +@@ -555,6 +580,7 @@ static void intel_pstate_get_cpu_pstates + cpu->pstate.min_pstate = pstate_funcs.get_min(); + cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); ++ cpu->pstate.scaling = pstate_funcs.get_scaling(); + + if (pstate_funcs.get_vid) + pstate_funcs.get_vid(cpu); +@@ -574,7 +600,9 @@ static inline void intel_pstate_calc_bus + core_pct += 1; + + sample->freq = fp_toint( +- mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); ++ mul_fp(int_tofp( ++ cpu->pstate.max_pstate * cpu->pstate.scaling / 100), ++ core_pct)); + + sample->core_pct_busy = (int32_t)core_pct; + } +@@ -817,12 +845,13 @@ static int intel_pstate_cpu_init(struct + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + +- policy->min = cpu->pstate.min_pstate * 100000; +- policy->max = cpu->pstate.turbo_pstate * 100000; ++ policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; ++ policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + + /* cpuinfo and default policy values */ +- policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; +- policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; ++ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; ++ policy->cpuinfo.max_freq = ++ cpu->pstate.turbo_pstate * cpu->pstate.scaling; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + cpumask_set_cpu(policy->cpu, policy->cpus); + +@@ -880,6 +909,7 @@ static void copy_cpu_funcs(struct pstate + pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_min = funcs->get_min; + pstate_funcs.get_turbo = funcs->get_turbo; ++ pstate_funcs.get_scaling = funcs->get_scaling; + pstate_funcs.set = funcs->set; + pstate_funcs.get_vid = funcs->get_vid; + } diff --git a/queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch b/queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch new file mode 100644 index 00000000000..cff1d6da12e --- /dev/null +++ b/queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch @@ -0,0 +1,171 @@ +From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Mon, 20 Oct 2014 18:12:32 +0200 +Subject: OOM, PM: OOM killed task shouldn't escape PM suspend + +From: Michal Hocko + +commit 5695be142e203167e3cb515ef86a88424f3524eb upstream. + +PM freezer relies on having all tasks frozen by the time devices are +getting frozen so that no task will touch them while they are getting +frozen. But OOM killer is allowed to kill an already frozen task in +order to handle OOM situtation. In order to protect from late wake ups +OOM killer is disabled after all tasks are frozen. This, however, still +keeps a window open when a killed task didn't manage to die by the time +freeze_processes finishes. + +Reduce the race window by checking all tasks after OOM killer has been +disabled. This is still not race free completely unfortunately because +oom_killer_disable cannot stop an already ongoing OOM killer so a task +might still wake up from the fridge and get killed without +freeze_processes noticing. Full synchronization of OOM and freezer is, +however, too heavy weight for this highly unlikely case. + +Introduce and check oom_kills counter which gets incremented early when +the allocator enters __alloc_pages_may_oom path and only check all the +tasks if the counter changes during the freezing attempt. The counter +is updated so early to reduce the race window since allocator checked +oom_killer_disabled which is set by PM-freezing code. A false positive +will push the PM-freezer into a slow path but that is not a big deal. + +Changes since v1 +- push the re-check loop out of freeze_processes into + check_frozen_processes and invert the condition to make the code more + readable as per Rafael + +Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring) +Signed-off-by: Michal Hocko +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/oom.h | 3 +++ + kernel/power/process.c | 40 +++++++++++++++++++++++++++++++++++++++- + mm/oom_kill.c | 17 +++++++++++++++++ + mm/page_alloc.c | 8 ++++++++ + 4 files changed, 67 insertions(+), 1 deletion(-) + +--- a/include/linux/oom.h ++++ b/include/linux/oom.h +@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const + extern unsigned long oom_badness(struct task_struct *p, + struct mem_cgroup *memcg, const nodemask_t *nodemask, + unsigned long totalpages); ++ ++extern int oom_kills_count(void); ++extern void note_oom_kill(void); + extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, + unsigned int points, unsigned long totalpages, + struct mem_cgroup *memcg, nodemask_t *nodemask, +--- a/kernel/power/process.c ++++ b/kernel/power/process.c +@@ -107,6 +107,28 @@ static int try_to_freeze_tasks(bool user + return todo ? -EBUSY : 0; + } + ++/* ++ * Returns true if all freezable tasks (except for current) are frozen already ++ */ ++static bool check_frozen_processes(void) ++{ ++ struct task_struct *g, *p; ++ bool ret = true; ++ ++ read_lock(&tasklist_lock); ++ for_each_process_thread(g, p) { ++ if (p != current && !freezer_should_skip(p) && ++ !frozen(p)) { ++ ret = false; ++ goto done; ++ } ++ } ++done: ++ read_unlock(&tasklist_lock); ++ ++ return ret; ++} ++ + /** + * freeze_processes - Signal user space processes to enter the refrigerator. + * The current thread will not be frozen. The same process that calls +@@ -117,6 +139,7 @@ static int try_to_freeze_tasks(bool user + int freeze_processes(void) + { + int error; ++ int oom_kills_saved; + + error = __usermodehelper_disable(UMH_FREEZING); + if (error) +@@ -130,12 +153,27 @@ int freeze_processes(void) + + printk("Freezing user space processes ... "); + pm_freezing = true; ++ oom_kills_saved = oom_kills_count(); + error = try_to_freeze_tasks(true); + if (!error) { +- printk("done."); + __usermodehelper_set_disable_depth(UMH_DISABLED); + oom_killer_disable(); ++ ++ /* ++ * There might have been an OOM kill while we were ++ * freezing tasks and the killed task might be still ++ * on the way out so we have to double check for race. ++ */ ++ if (oom_kills_count() != oom_kills_saved && ++ !check_frozen_processes()) { ++ __usermodehelper_set_disable_depth(UMH_ENABLED); ++ printk("OOM in progress."); ++ error = -EBUSY; ++ goto done; ++ } ++ printk("done."); + } ++done: + printk("\n"); + BUG_ON(in_atomic()); + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -406,6 +406,23 @@ static void dump_header(struct task_stru + dump_tasks(memcg, nodemask); + } + ++/* ++ * Number of OOM killer invocations (including memcg OOM killer). ++ * Primarily used by PM freezer to check for potential races with ++ * OOM killed frozen task. ++ */ ++static atomic_t oom_kills = ATOMIC_INIT(0); ++ ++int oom_kills_count(void) ++{ ++ return atomic_read(&oom_kills); ++} ++ ++void note_oom_kill(void) ++{ ++ atomic_inc(&oom_kills); ++} ++ + #define K(x) ((x) << (PAGE_SHIFT-10)) + /* + * Must be called while holding a reference to p, which will be released upon +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2196,6 +2196,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, un + } + + /* ++ * PM-freezer should be notified that there might be an OOM killer on ++ * its way to kill and wake somebody up. This is too early and we might ++ * end up not killing anything but false positives are acceptable. ++ * See freeze_processes. ++ */ ++ note_oom_kill(); ++ ++ /* + * Go through the zonelist yet one more time, keep very high watermark + * here, this is only to catch a parallel oom killing, we must fail if + * we're still under heavy pressure. diff --git a/queue-3.14/series b/queue-3.14/series index d54fb787ca1..a939c9e8f5c 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -119,3 +119,7 @@ mmc-sdhci-pci-sdio-host-controller-support-for-intel-quark.patch x86-platform-intel-iosf-add-braswell-pci-id.patch alsa-hda-add-pci-ids-for-intel-braswell.patch alsa-hda-add-codec-id-for-braswell-display-audio-codec.patch +intel_pstate-fix-byt-frequency-reporting.patch +intel_pstate-correct-byt-vid-values.patch +freezer-do-not-freeze-tasks-killed-by-oom-killer.patch +oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch