]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Jun 2014 22:37:19 +0000 (15:37 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Jun 2014 22:37:19 +0000 (15:37 -0700)
added patches:
arm-perf-hook-up-perf_sample_event_took-around-pmu-irq-handling.patch
netfilter-fix-potential-use-after-free-in-ip6_route_me_harder.patch
perf-drop-sample-rate-when-sampling-is-too-slow.patch
perf-enforce-1-as-lower-limit-for-perf_event_max_sample_rate.patch
perf-fix-interrupt-handler-timing-harness.patch

queue-3.10/arm-perf-hook-up-perf_sample_event_took-around-pmu-irq-handling.patch [new file with mode: 0644]
queue-3.10/netfilter-fix-potential-use-after-free-in-ip6_route_me_harder.patch [new file with mode: 0644]
queue-3.10/perf-drop-sample-rate-when-sampling-is-too-slow.patch [new file with mode: 0644]
queue-3.10/perf-enforce-1-as-lower-limit-for-perf_event_max_sample_rate.patch [new file with mode: 0644]
queue-3.10/perf-fix-interrupt-handler-timing-harness.patch [new file with mode: 0644]
queue-3.10/series

diff --git a/queue-3.10/arm-perf-hook-up-perf_sample_event_took-around-pmu-irq-handling.patch b/queue-3.10/arm-perf-hook-up-perf_sample_event_took-around-pmu-irq-handling.patch
new file mode 100644 (file)
index 0000000..04a9a15
--- /dev/null
@@ -0,0 +1,46 @@
+From 5f5092e72cc25a6a5785308270e0085b2b2772cc Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 11 Feb 2014 18:08:41 +0000
+Subject: ARM: perf: hook up perf_sample_event_took around pmu irq handling
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 5f5092e72cc25a6a5785308270e0085b2b2772cc upstream.
+
+Since we indirect all of our PMU IRQ handling through a dispatcher, it's
+trivial to hook up perf_sample_event_took to prevent applications such
+as oprofile from generating interrupt storms due to an unrealisticly
+low sample period.
+
+Reported-by: Robert Richter <rric@kernel.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Weng Meiling <wengmeiling.weng@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kernel/perf_event.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/kernel/perf_event.c
++++ b/arch/arm/kernel/perf_event.c
+@@ -303,11 +303,18 @@ static irqreturn_t armpmu_dispatch_irq(i
+       struct arm_pmu *armpmu = (struct arm_pmu *) dev;
+       struct platform_device *plat_device = armpmu->plat_device;
+       struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
++      int ret;
++      u64 start_clock, finish_clock;
++      start_clock = sched_clock();
+       if (plat && plat->handle_irq)
+-              return plat->handle_irq(irq, dev, armpmu->handle_irq);
++              ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
+       else
+-              return armpmu->handle_irq(irq, dev);
++              ret = armpmu->handle_irq(irq, dev);
++      finish_clock = sched_clock();
++
++      perf_sample_event_took(finish_clock - start_clock);
++      return ret;
+ }
+ static void
diff --git a/queue-3.10/netfilter-fix-potential-use-after-free-in-ip6_route_me_harder.patch b/queue-3.10/netfilter-fix-potential-use-after-free-in-ip6_route_me_harder.patch
new file mode 100644 (file)
index 0000000..67a1820
--- /dev/null
@@ -0,0 +1,42 @@
+From a8951d5814e1373807a94f79f7ccec7041325470 Mon Sep 17 00:00:00 2001
+From: Sergey Popovich <popovich_sergei@mail.ru>
+Date: Thu, 8 May 2014 16:22:35 +0300
+Subject: netfilter: Fix potential use after free in ip6_route_me_harder()
+
+From: Sergey Popovich <popovich_sergei@mail.ru>
+
+commit a8951d5814e1373807a94f79f7ccec7041325470 upstream.
+
+Dst is released one line before we access it again with dst->error.
+
+Fixes: 58e35d147128 netfilter: ipv6: propagate routing errors from
+ip6_route_me_harder()
+
+Signed-off-by: Sergey Popovich <popovich_sergei@mail.ru>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv6/netfilter.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *
+               .daddr = iph->daddr,
+               .saddr = iph->saddr,
+       };
++      int err;
+       dst = ip6_route_output(net, skb->sk, &fl6);
+-      if (dst->error) {
++      err = dst->error;
++      if (err) {
+               IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+               LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+               dst_release(dst);
+-              return dst->error;
++              return err;
+       }
+       /* Drop old route. */
diff --git a/queue-3.10/perf-drop-sample-rate-when-sampling-is-too-slow.patch b/queue-3.10/perf-drop-sample-rate-when-sampling-is-too-slow.patch
new file mode 100644 (file)
index 0000000..eaa965a
--- /dev/null
@@ -0,0 +1,257 @@
+From 14c63f17b1fde5a575a28e96547a22b451c71fb5 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Fri, 21 Jun 2013 08:51:36 -0700
+Subject: perf: Drop sample rate when sampling is too slow
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 14c63f17b1fde5a575a28e96547a22b451c71fb5 upstream.
+
+This patch keeps track of how long perf's NMI handler is taking,
+and also calculates how many samples perf can take a second.  If
+the sample length times the expected max number of samples
+exceeds a configurable threshold, it drops the sample rate.
+
+This way, we don't have a runaway sampling process eating up the
+CPU.
+
+This patch can tend to drop the sample rate down to level where
+perf doesn't work very well.  *BUT* the alternative is that my
+system hangs because it spends all of its time handling NMIs.
+
+I'll take a busted performance tool over an entire system that's
+busted and undebuggable any day.
+
+BTW, my suspicion is that there's still an underlying bug here.
+Using the HPET instead of the TSC is definitely a contributing
+factor, but I suspect there are some other things going on.
+But, I can't go dig down on a bug like that with my machine
+hanging all the time.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: paulus@samba.org
+Cc: acme@ghostprotocols.net
+Cc: Dave Hansen <dave@sr71.net>
+[ Prettified it a bit. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Weng Meiling <wengmeiling.weng@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/sysctl/kernel.txt  |   26 +++++++++++
+ arch/x86/kernel/cpu/perf_event.c |   12 ++++-
+ include/linux/perf_event.h       |    7 ++
+ kernel/events/core.c             |   92 +++++++++++++++++++++++++++++++++++++--
+ kernel/sysctl.c                  |    9 +++
+ 5 files changed, 141 insertions(+), 5 deletions(-)
+
+--- a/Documentation/sysctl/kernel.txt
++++ b/Documentation/sysctl/kernel.txt
+@@ -438,6 +438,32 @@ This file shows up if CONFIG_DEBUG_STACK
+ ==============================================================
++perf_cpu_time_max_percent:
++
++Hints to the kernel how much CPU time it should be allowed to
++use to handle perf sampling events.  If the perf subsystem
++is informed that its samples are exceeding this limit, it
++will drop its sampling frequency to attempt to reduce its CPU
++usage.
++
++Some perf sampling happens in NMIs.  If these samples
++unexpectedly take too long to execute, the NMIs can become
++stacked up next to each other so much that nothing else is
++allowed to execute.
++
++0: disable the mechanism.  Do not monitor or correct perf's
++   sampling rate no matter how CPU time it takes.
++
++1-100: attempt to throttle perf's sample rate to this
++   percentage of CPU.  Note: the kernel calculates an
++   "expected" length of each sample event.  100 here means
++   100% of that expected length.  Even if this is set to
++   100, you may still see sample throttling if this
++   length is exceeded.  Set to 0 if you truly do not care
++   how much CPU is consumed.
++
++==============================================================
++
+ pid_max:
+--- a/arch/x86/kernel/cpu/perf_event.c
++++ b/arch/x86/kernel/cpu/perf_event.c
+@@ -1252,10 +1252,20 @@ void perf_events_lapic_init(void)
+ static int __kprobes
+ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+ {
++      int ret;
++      u64 start_clock;
++      u64 finish_clock;
++
+       if (!atomic_read(&active_events))
+               return NMI_DONE;
+-      return x86_pmu.handle_irq(regs);
++      start_clock = local_clock();
++      ret = x86_pmu.handle_irq(regs);
++      finish_clock = local_clock();
++
++      perf_sample_event_took(finish_clock - start_clock);
++
++      return ret;
+ }
+ struct event_constraint emptyconstraint;
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -695,10 +695,17 @@ static inline void perf_callchain_store(
+ extern int sysctl_perf_event_paranoid;
+ extern int sysctl_perf_event_mlock;
+ extern int sysctl_perf_event_sample_rate;
++extern int sysctl_perf_cpu_time_max_percent;
++
++extern void perf_sample_event_took(u64 sample_len_ns);
+ extern int perf_proc_update_handler(struct ctl_table *table, int write,
+               void __user *buffer, size_t *lenp,
+               loff_t *ppos);
++extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
++              void __user *buffer, size_t *lenp,
++              loff_t *ppos);
++
+ static inline bool perf_paranoid_tracepoint_raw(void)
+ {
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -165,10 +165,26 @@ int sysctl_perf_event_mlock __read_mostl
+ /*
+  * max perf event sample rate
+  */
+-#define DEFAULT_MAX_SAMPLE_RATE 100000
+-int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
+-static int max_samples_per_tick __read_mostly =
+-      DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
++#define DEFAULT_MAX_SAMPLE_RATE               100000
++#define DEFAULT_SAMPLE_PERIOD_NS      (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
++#define DEFAULT_CPU_TIME_MAX_PERCENT  25
++
++int sysctl_perf_event_sample_rate __read_mostly       = DEFAULT_MAX_SAMPLE_RATE;
++
++static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
++static int perf_sample_period_ns __read_mostly        = DEFAULT_SAMPLE_PERIOD_NS;
++
++static atomic_t perf_sample_allowed_ns __read_mostly =
++      ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
++
++void update_perf_cpu_limits(void)
++{
++      u64 tmp = perf_sample_period_ns;
++
++      tmp *= sysctl_perf_cpu_time_max_percent;
++      tmp = do_div(tmp, 100);
++      atomic_set(&perf_sample_allowed_ns, tmp);
++}
+ int perf_proc_update_handler(struct ctl_table *table, int write,
+               void __user *buffer, size_t *lenp,
+@@ -180,10 +196,78 @@ int perf_proc_update_handler(struct ctl_
+               return ret;
+       max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
++      perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
++      update_perf_cpu_limits();
++
++      return 0;
++}
++
++int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
++
++int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
++                              void __user *buffer, size_t *lenp,
++                              loff_t *ppos)
++{
++      int ret = proc_dointvec(table, write, buffer, lenp, ppos);
++
++      if (ret || !write)
++              return ret;
++
++      update_perf_cpu_limits();
+       return 0;
+ }
++/*
++ * perf samples are done in some very critical code paths (NMIs).
++ * If they take too much CPU time, the system can lock up and not
++ * get any real work done.  This will drop the sample rate when
++ * we detect that events are taking too long.
++ */
++#define NR_ACCUMULATED_SAMPLES 128
++DEFINE_PER_CPU(u64, running_sample_length);
++
++void perf_sample_event_took(u64 sample_len_ns)
++{
++      u64 avg_local_sample_len;
++      u64 local_samples_len = __get_cpu_var(running_sample_length);
++
++      if (atomic_read(&perf_sample_allowed_ns) == 0)
++              return;
++
++      /* decay the counter by 1 average sample */
++      local_samples_len = __get_cpu_var(running_sample_length);
++      local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
++      local_samples_len += sample_len_ns;
++      __get_cpu_var(running_sample_length) = local_samples_len;
++
++      /*
++       * note: this will be biased artifically low until we have
++       * seen NR_ACCUMULATED_SAMPLES.  Doing it this way keeps us
++       * from having to maintain a count.
++       */
++      avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
++
++      if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
++              return;
++
++      if (max_samples_per_tick <= 1)
++              return;
++
++      max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
++      sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
++      perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
++
++      printk_ratelimited(KERN_WARNING
++                      "perf samples too long (%lld > %d), lowering "
++                      "kernel.perf_event_max_sample_rate to %d\n",
++                      avg_local_sample_len,
++                      atomic_read(&perf_sample_allowed_ns),
++                      sysctl_perf_event_sample_rate);
++
++      update_perf_cpu_limits();
++}
++
+ static atomic64_t perf_event_id;
+ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1050,6 +1050,15 @@ static struct ctl_table kern_table[] = {
+               .mode           = 0644,
+               .proc_handler   = perf_proc_update_handler,
+       },
++      {
++              .procname       = "perf_cpu_time_max_percent",
++              .data           = &sysctl_perf_cpu_time_max_percent,
++              .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
++              .mode           = 0644,
++              .proc_handler   = perf_cpu_time_max_percent_handler,
++              .extra1         = &zero,
++              .extra2         = &one_hundred,
++      },
+ #endif
+ #ifdef CONFIG_KMEMCHECK
+       {
diff --git a/queue-3.10/perf-enforce-1-as-lower-limit-for-perf_event_max_sample_rate.patch b/queue-3.10/perf-enforce-1-as-lower-limit-for-perf_event_max_sample_rate.patch
new file mode 100644 (file)
index 0000000..268a2a1
--- /dev/null
@@ -0,0 +1,50 @@
+From 723478c8a471403c53cf144999701f6e0c4bbd11 Mon Sep 17 00:00:00 2001
+From: Knut Petersen <Knut_Petersen@t-online.de>
+Date: Wed, 25 Sep 2013 14:29:37 +0200
+Subject: perf: Enforce 1 as lower limit for perf_event_max_sample_rate
+
+From: Knut Petersen <Knut_Petersen@t-online.de>
+
+commit 723478c8a471403c53cf144999701f6e0c4bbd11 upstream.
+
+/proc/sys/kernel/perf_event_max_sample_rate will accept
+negative values as well as 0.
+
+Negative values are unreasonable, and 0 causes a
+divide by zero exception in perf_proc_update_handler.
+
+This patch enforces a lower limit of 1.
+
+Signed-off-by: Knut Petersen <Knut_Petersen@t-online.de>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/5242DB0C.4070005@t-online.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Weng Meiling <wengmeiling.weng@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c |    2 +-
+ kernel/sysctl.c      |    1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -190,7 +190,7 @@ int perf_proc_update_handler(struct ctl_
+               void __user *buffer, size_t *lenp,
+               loff_t *ppos)
+ {
+-      int ret = proc_dointvec(table, write, buffer, lenp, ppos);
++      int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret || !write)
+               return ret;
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1049,6 +1049,7 @@ static struct ctl_table kern_table[] = {
+               .maxlen         = sizeof(sysctl_perf_event_sample_rate),
+               .mode           = 0644,
+               .proc_handler   = perf_proc_update_handler,
++              .extra1         = &one,
+       },
+       {
+               .procname       = "perf_cpu_time_max_percent",
diff --git a/queue-3.10/perf-fix-interrupt-handler-timing-harness.patch b/queue-3.10/perf-fix-interrupt-handler-timing-harness.patch
new file mode 100644 (file)
index 0000000..783f6c3
--- /dev/null
@@ -0,0 +1,55 @@
+From e5302920da9ef23f9d19d4e9ac85704cc25bee7a Mon Sep 17 00:00:00 2001
+From: Stephane Eranian <eranian@google.com>
+Date: Fri, 5 Jul 2013 00:30:11 +0200
+Subject: perf: Fix interrupt handler timing harness
+
+From: Stephane Eranian <eranian@google.com>
+
+commit e5302920da9ef23f9d19d4e9ac85704cc25bee7a upstream.
+
+This patch fixes a serious bug in:
+
+  14c63f17b1fd perf: Drop sample rate when sampling is too slow
+
+There was an misunderstanding on the API of the do_div()
+macro. It returns the remainder of the division and this
+was not what the function expected leading to disabling the
+interrupt latency watchdog.
+
+This patch also remove a duplicate assignment in
+perf_sample_event_took().
+
+Signed-off-by: Stephane Eranian <eranian@google.com>
+Cc: peterz@infradead.org
+Cc: dave.hansen@linux.intel.com
+Cc: ak@linux.intel.com
+Cc: jolsa@redhat.com
+Link: http://lkml.kernel.org/r/20130704223010.GA30625@quad
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Weng Meiling <wengmeiling.weng@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -182,7 +182,7 @@ void update_perf_cpu_limits(void)
+       u64 tmp = perf_sample_period_ns;
+       tmp *= sysctl_perf_cpu_time_max_percent;
+-      tmp = do_div(tmp, 100);
++      do_div(tmp, 100);
+       atomic_set(&perf_sample_allowed_ns, tmp);
+ }
+@@ -230,7 +230,7 @@ DEFINE_PER_CPU(u64, running_sample_lengt
+ void perf_sample_event_took(u64 sample_len_ns)
+ {
+       u64 avg_local_sample_len;
+-      u64 local_samples_len = __get_cpu_var(running_sample_length);
++      u64 local_samples_len;
+       if (atomic_read(&perf_sample_allowed_ns) == 0)
+               return;
index 122237d5da0f090ba7e95aea47747d80147875bf..e8f6b7b92dbd38fedefbb7859b8fe1d339e711d9 100644 (file)
@@ -33,3 +33,8 @@ usb-io_ti-fix-firmware-download-on-big-endian-machines-part-2.patch
 usb-avoid-runtime-suspend-loops-for-hcds-that-can-t-handle-suspend-resume.patch
 mm-rmap-fix-use-after-free-in-__put_anon_vma.patch
 iser-target-add-missing-target_put_sess_cmd-for-immedatedata-failure.patch
+perf-drop-sample-rate-when-sampling-is-too-slow.patch
+perf-fix-interrupt-handler-timing-harness.patch
+perf-enforce-1-as-lower-limit-for-perf_event_max_sample_rate.patch
+arm-perf-hook-up-perf_sample_event_took-around-pmu-irq-handling.patch
+netfilter-fix-potential-use-after-free-in-ip6_route_me_harder.patch