]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Nov 2014 04:19:43 +0000 (13:19 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Nov 2014 04:19:43 +0000 (13:19 +0900)
added patches:
freezer-do-not-freeze-tasks-killed-by-oom-killer.patch
intel_pstate-correct-byt-vid-values.patch
intel_pstate-fix-byt-frequency-reporting.patch
oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch

queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch [new file with mode: 0644]
queue-3.14/intel_pstate-correct-byt-vid-values.patch [new file with mode: 0644]
queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch [new file with mode: 0644]
queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch [new file with mode: 0644]
queue-3.14/series

diff --git a/queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch b/queue-3.14/freezer-do-not-freeze-tasks-killed-by-oom-killer.patch
new file mode 100644 (file)
index 0000000..45c3bcc
--- /dev/null
@@ -0,0 +1,54 @@
+From 51fae6da640edf9d266c94f36bc806c63c301991 Mon Sep 17 00:00:00 2001
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Tue, 21 Oct 2014 09:27:12 +0200
+Subject: freezer: Do not freeze tasks killed by OOM killer
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+commit 51fae6da640edf9d266c94f36bc806c63c301991 upstream.
+
+Since f660daac474c6f (oom: thaw threads if oom killed thread is frozen
+before deferring) OOM killer relies on being able to thaw a frozen task
+to handle OOM situation but a3201227f803 (freezer: make freezing() test
+freeze conditions in effect instead of TIF_FREEZE) has reorganized the
+code and stopped clearing freeze flag in __thaw_task. This means that
+the target task only wakes up and goes into the fridge again because the
+freezing condition hasn't changed for it. This reintroduces the bug
+fixed by f660daac474c6f.
+
+Fix the issue by checking for TIF_MEMDIE thread flag in
+freezing_slow_path and exclude the task from freezing completely. If a
+task was already frozen it would get woken by __thaw_task from OOM killer
+and get out of freezer after rechecking freezing().
+
+Changes since v1
+- put TIF_MEMDIE check into freezing_slowpath rather than in __refrigerator
+  as per Oleg
+- return __thaw_task into oom_scan_process_thread because
+  oom_kill_process will not wake task in the fridge because it is
+  sleeping uninterruptible
+
+[mhocko@suse.cz: rewrote the changelog]
+Fixes: a3201227f803 (freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE)
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/freezer.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/freezer.c
++++ b/kernel/freezer.c
+@@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_stru
+       if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
+               return false;
++      if (test_thread_flag(TIF_MEMDIE))
++              return false;
++
+       if (pm_nosig_freezing || cgroup_freezing(p))
+               return true;
diff --git a/queue-3.14/intel_pstate-correct-byt-vid-values.patch b/queue-3.14/intel_pstate-correct-byt-vid-values.patch
new file mode 100644 (file)
index 0000000..1660c72
--- /dev/null
@@ -0,0 +1,55 @@
+From d022a65ed2473fac4a600e3424503dc571160a3e Mon Sep 17 00:00:00 2001
+From: Dirk Brandewie <dirk.j.brandewie@intel.com>
+Date: Mon, 13 Oct 2014 08:37:44 -0700
+Subject: intel_pstate: Correct BYT VID values.
+
+From: Dirk Brandewie <dirk.j.brandewie@intel.com>
+
+commit d022a65ed2473fac4a600e3424503dc571160a3e upstream.
+
+Using a VID value that is not high enough for the requested P state can
+cause machine checks. Add a ceiling function to ensure calulated VIDs
+with fractional values are set to the next highest integer VID value.
+
+The algorythm for calculating the non-trubo VID from the BIOS writers
+guide is:
+ vid_ratio = (vid_max - vid_min) / (max_pstate - min_pstate)
+ vid = ceiling(vid_min + (req_pstate - min_pstate) * vid_ratio)
+
+Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpufreq/intel_pstate.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -55,6 +55,17 @@ static inline int32_t div_fp(int32_t x,
+       return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
+ }
++static inline int ceiling_fp(int32_t x)
++{
++      int mask, ret;
++
++      ret = fp_toint(x);
++      mask = (1 << FRAC_BITS) - 1;
++      if (x & mask)
++              ret += 1;
++      return ret;
++}
++
+ struct sample {
+       int32_t core_pct_busy;
+       u64 aperf;
+@@ -399,7 +410,7 @@ static void byt_set_pstate(struct cpudat
+               cpudata->vid.ratio);
+       vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
+-      vid = fp_toint(vid_fp);
++      vid = ceiling_fp(vid_fp);
+       if (pstate > cpudata->pstate.max_pstate)
+               vid = cpudata->vid.turbo;
diff --git a/queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch b/queue-3.14/intel_pstate-fix-byt-frequency-reporting.patch
new file mode 100644 (file)
index 0000000..c677f23
--- /dev/null
@@ -0,0 +1,146 @@
+From b27580b05e6f5253228debc60b8ff4a786ff573a Mon Sep 17 00:00:00 2001
+From: Dirk Brandewie <dirk.j.brandewie@intel.com>
+Date: Mon, 13 Oct 2014 08:37:43 -0700
+Subject: intel_pstate: Fix BYT frequency reporting
+
+From: Dirk Brandewie <dirk.j.brandewie@intel.com>
+
+commit b27580b05e6f5253228debc60b8ff4a786ff573a upstream.
+
+BYT has a different conversion from P state to frequency than the core
+processors.  This causes the min/max and current frequency to be
+misreported on some BYT SKUs. Tested on BYT N2820, Ivybridge and
+Haswell processors.
+
+Link: https://bugzilla.yoctoproject.org/show_bug.cgi?id=6663
+Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpufreq/intel_pstate.c |   42 +++++++++++++++++++++++++++++++++++------
+ 1 file changed, 36 insertions(+), 6 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -67,6 +67,7 @@ struct pstate_data {
+       int     current_pstate;
+       int     min_pstate;
+       int     max_pstate;
++      int     scaling;
+       int     turbo_pstate;
+ };
+@@ -118,6 +119,7 @@ struct pstate_funcs {
+       int (*get_max)(void);
+       int (*get_min)(void);
+       int (*get_turbo)(void);
++      int (*get_scaling)(void);
+       void (*set)(struct cpudata*, int pstate);
+       void (*get_vid)(struct cpudata *);
+ };
+@@ -407,6 +409,22 @@ static void byt_set_pstate(struct cpudat
+       wrmsrl(MSR_IA32_PERF_CTL, val);
+ }
++#define BYT_BCLK_FREQS 5
++static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
++
++static int byt_get_scaling(void)
++{
++      u64 value;
++      int i;
++
++      rdmsrl(MSR_FSB_FREQ, value);
++      i = value & 0x3;
++
++      BUG_ON(i > BYT_BCLK_FREQS);
++
++      return byt_freq_table[i] * 100;
++}
++
+ static void byt_get_vid(struct cpudata *cpudata)
+ {
+       u64 value;
+@@ -451,6 +469,11 @@ static int core_get_turbo_pstate(void)
+       return ret;
+ }
++static inline int core_get_scaling(void)
++{
++      return 100000;
++}
++
+ static void core_set_pstate(struct cpudata *cpudata, int pstate)
+ {
+       u64 val;
+@@ -475,6 +498,7 @@ static struct cpu_defaults core_params =
+               .get_max = core_get_max_pstate,
+               .get_min = core_get_min_pstate,
+               .get_turbo = core_get_turbo_pstate,
++              .get_scaling = core_get_scaling,
+               .set = core_set_pstate,
+       },
+ };
+@@ -493,6 +517,7 @@ static struct cpu_defaults byt_params =
+               .get_min = byt_get_min_pstate,
+               .get_turbo = byt_get_turbo_pstate,
+               .set = byt_set_pstate,
++              .get_scaling = byt_get_scaling,
+               .get_vid = byt_get_vid,
+       },
+ };
+@@ -526,7 +551,7 @@ static void intel_pstate_set_pstate(stru
+       if (pstate == cpu->pstate.current_pstate)
+               return;
+-      trace_cpu_frequency(pstate * 100000, cpu->cpu);
++      trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+       cpu->pstate.current_pstate = pstate;
+@@ -555,6 +580,7 @@ static void intel_pstate_get_cpu_pstates
+       cpu->pstate.min_pstate = pstate_funcs.get_min();
+       cpu->pstate.max_pstate = pstate_funcs.get_max();
+       cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
++      cpu->pstate.scaling = pstate_funcs.get_scaling();
+       if (pstate_funcs.get_vid)
+               pstate_funcs.get_vid(cpu);
+@@ -574,7 +600,9 @@ static inline void intel_pstate_calc_bus
+               core_pct += 1;
+       sample->freq = fp_toint(
+-              mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
++              mul_fp(int_tofp(
++                      cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
++                      core_pct));
+       sample->core_pct_busy = (int32_t)core_pct;
+ }
+@@ -817,12 +845,13 @@ static int intel_pstate_cpu_init(struct
+       else
+               policy->policy = CPUFREQ_POLICY_POWERSAVE;
+-      policy->min = cpu->pstate.min_pstate * 100000;
+-      policy->max = cpu->pstate.turbo_pstate * 100000;
++      policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
++      policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+       /* cpuinfo and default policy values */
+-      policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
+-      policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
++      policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
++      policy->cpuinfo.max_freq =
++              cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+       policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+       cpumask_set_cpu(policy->cpu, policy->cpus);
+@@ -880,6 +909,7 @@ static void copy_cpu_funcs(struct pstate
+       pstate_funcs.get_max   = funcs->get_max;
+       pstate_funcs.get_min   = funcs->get_min;
+       pstate_funcs.get_turbo = funcs->get_turbo;
++      pstate_funcs.get_scaling = funcs->get_scaling;
+       pstate_funcs.set       = funcs->set;
+       pstate_funcs.get_vid   = funcs->get_vid;
+ }
diff --git a/queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch b/queue-3.14/oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch
new file mode 100644 (file)
index 0000000..cff1d6d
--- /dev/null
@@ -0,0 +1,171 @@
+From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.cz>
+Date: Mon, 20 Oct 2014 18:12:32 +0200
+Subject: OOM, PM: OOM killed task shouldn't escape PM suspend
+
+From: Michal Hocko <mhocko@suse.cz>
+
+commit 5695be142e203167e3cb515ef86a88424f3524eb upstream.
+
+PM freezer relies on having all tasks frozen by the time devices are
+getting frozen so that no task will touch them while they are getting
+frozen. But OOM killer is allowed to kill an already frozen task in
+order to handle OOM situtation. In order to protect from late wake ups
+OOM killer is disabled after all tasks are frozen. This, however, still
+keeps a window open when a killed task didn't manage to die by the time
+freeze_processes finishes.
+
+Reduce the race window by checking all tasks after OOM killer has been
+disabled. This is still not race free completely unfortunately because
+oom_killer_disable cannot stop an already ongoing OOM killer so a task
+might still wake up from the fridge and get killed without
+freeze_processes noticing. Full synchronization of OOM and freezer is,
+however, too heavy weight for this highly unlikely case.
+
+Introduce and check oom_kills counter which gets incremented early when
+the allocator enters __alloc_pages_may_oom path and only check all the
+tasks if the counter changes during the freezing attempt. The counter
+is updated so early to reduce the race window since allocator checked
+oom_killer_disabled which is set by PM-freezing code. A false positive
+will push the PM-freezer into a slow path but that is not a big deal.
+
+Changes since v1
+- push the re-check loop out of freeze_processes into
+  check_frozen_processes and invert the condition to make the code more
+  readable as per Rafael
+
+Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring)
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/oom.h    |    3 +++
+ kernel/power/process.c |   40 +++++++++++++++++++++++++++++++++++++++-
+ mm/oom_kill.c          |   17 +++++++++++++++++
+ mm/page_alloc.c        |    8 ++++++++
+ 4 files changed, 67 insertions(+), 1 deletion(-)
+
+--- a/include/linux/oom.h
++++ b/include/linux/oom.h
+@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const
+ extern unsigned long oom_badness(struct task_struct *p,
+               struct mem_cgroup *memcg, const nodemask_t *nodemask,
+               unsigned long totalpages);
++
++extern int oom_kills_count(void);
++extern void note_oom_kill(void);
+ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+                            unsigned int points, unsigned long totalpages,
+                            struct mem_cgroup *memcg, nodemask_t *nodemask,
+--- a/kernel/power/process.c
++++ b/kernel/power/process.c
+@@ -107,6 +107,28 @@ static int try_to_freeze_tasks(bool user
+       return todo ? -EBUSY : 0;
+ }
++/*
++ * Returns true if all freezable tasks (except for current) are frozen already
++ */
++static bool check_frozen_processes(void)
++{
++      struct task_struct *g, *p;
++      bool ret = true;
++
++      read_lock(&tasklist_lock);
++      for_each_process_thread(g, p) {
++              if (p != current && !freezer_should_skip(p) &&
++                  !frozen(p)) {
++                      ret = false;
++                      goto done;
++              }
++      }
++done:
++      read_unlock(&tasklist_lock);
++
++      return ret;
++}
++
+ /**
+  * freeze_processes - Signal user space processes to enter the refrigerator.
+  * The current thread will not be frozen.  The same process that calls
+@@ -117,6 +139,7 @@ static int try_to_freeze_tasks(bool user
+ int freeze_processes(void)
+ {
+       int error;
++      int oom_kills_saved;
+       error = __usermodehelper_disable(UMH_FREEZING);
+       if (error)
+@@ -130,12 +153,27 @@ int freeze_processes(void)
+       printk("Freezing user space processes ... ");
+       pm_freezing = true;
++      oom_kills_saved = oom_kills_count();
+       error = try_to_freeze_tasks(true);
+       if (!error) {
+-              printk("done.");
+               __usermodehelper_set_disable_depth(UMH_DISABLED);
+               oom_killer_disable();
++
++              /*
++               * There might have been an OOM kill while we were
++               * freezing tasks and the killed task might be still
++               * on the way out so we have to double check for race.
++               */
++              if (oom_kills_count() != oom_kills_saved &&
++                              !check_frozen_processes()) {
++                      __usermodehelper_set_disable_depth(UMH_ENABLED);
++                      printk("OOM in progress.");
++                      error = -EBUSY;
++                      goto done;
++              }
++              printk("done.");
+       }
++done:
+       printk("\n");
+       BUG_ON(in_atomic());
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -406,6 +406,23 @@ static void dump_header(struct task_stru
+               dump_tasks(memcg, nodemask);
+ }
++/*
++ * Number of OOM killer invocations (including memcg OOM killer).
++ * Primarily used by PM freezer to check for potential races with
++ * OOM killed frozen task.
++ */
++static atomic_t oom_kills = ATOMIC_INIT(0);
++
++int oom_kills_count(void)
++{
++      return atomic_read(&oom_kills);
++}
++
++void note_oom_kill(void)
++{
++      atomic_inc(&oom_kills);
++}
++
+ #define K(x) ((x) << (PAGE_SHIFT-10))
+ /*
+  * Must be called while holding a reference to p, which will be released upon
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2196,6 +2196,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, un
+       }
+       /*
++       * PM-freezer should be notified that there might be an OOM killer on
++       * its way to kill and wake somebody up. This is too early and we might
++       * end up not killing anything but false positives are acceptable.
++       * See freeze_processes.
++       */
++      note_oom_kill();
++
++      /*
+        * Go through the zonelist yet one more time, keep very high watermark
+        * here, this is only to catch a parallel oom killing, we must fail if
+        * we're still under heavy pressure.
index d54fb787ca17431c55ec716a570663085530c3de..a939c9e8f5c064c00edfc73dc8f79cc6ff16cb1d 100644 (file)
@@ -119,3 +119,7 @@ mmc-sdhci-pci-sdio-host-controller-support-for-intel-quark.patch
 x86-platform-intel-iosf-add-braswell-pci-id.patch
 alsa-hda-add-pci-ids-for-intel-braswell.patch
 alsa-hda-add-codec-id-for-braswell-display-audio-codec.patch
+intel_pstate-fix-byt-frequency-reporting.patch
+intel_pstate-correct-byt-vid-values.patch
+freezer-do-not-freeze-tasks-killed-by-oom-killer.patch
+oom-pm-oom-killed-task-shouldn-t-escape-pm-suspend.patch