+++ /dev/null
-From: Andi Kleen <andi@firstfloor.org>
-Date: Thu, 12 Feb 2009 12:39:29 +0000 (+0100)
-Subject: x86, mce: switch machine check polling to per CPU timer
-Patch-mainline: 2.6.30-rc1
-Git-commit: 52d168e28bc11dd026b620fe1767cadde5a747cd
-References: bnc#507557
-
-x86, mce: switch machine check polling to per CPU timer
-
-Impact: Higher priority bug fix
-
-The machine check poller runs a single timer and then broadcasted an
-IPI to all CPUs to check them. This leads to unnecessary
-synchronization between CPUs. The original CPU running the timer has
-to wait potentially a long time for all other CPUs answering. This is
-also real time unfriendly and in general inefficient.
-
-This was especially a problem on systems with a lot of events where
-the poller run with a higher frequency after processing some events.
-There could be more and more CPU time wasted with this, to
-the point of significantly slowing down machines.
-
-The machine check polling is actually fully independent per CPU, so
-there's no reason to not just do this all with per CPU timers. This
-patch implements that.
-
-Also switch the poller also to use standard timers instead of work
-queues. It was using work queues to be able to execute a user program
-on a event, but mce_notify_user() handles this case now with a
-separate callback. So instead always run the poll code in in a
-standard per CPU timer, which means that in the common case of not
-having to execute a trigger there will be less overhead.
-
-This allows to clean up the initialization significantly, because
-standard timers are already up when machine checks get init'ed. No
-multiple initialization functions.
-
-Thanks to Thomas Gleixner for some help.
-
-Cc: thockin@google.com
-v2: Use del_timer_sync() on cpu shutdown and don't try to handle
-migrated timers.
-v3: Add WARN_ON for timer running on unexpected CPU
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-Acked-by: Jeff Mahoney <jeffm@suse.com>
----
- arch/x86/kernel/cpu/mcheck/mce_64.c | 68 +++++++++++++++++++++++-------------
- 1 file changed, 45 insertions(+), 23 deletions(-)
-
---- a/arch/x86/kernel/cpu/mcheck/mce_64.c
-+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
-@@ -353,18 +353,17 @@ void mce_log_therm_throt_event(unsigned
-
- static int check_interval = 5 * 60; /* 5 minutes */
- static int next_interval; /* in jiffies */
--static void mcheck_timer(struct work_struct *work);
--static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
-+static void mcheck_timer(unsigned long);
-+static DEFINE_PER_CPU(struct timer_list, mce_timer);
-
--static void mcheck_check_cpu(void *info)
-+static void mcheck_timer(unsigned long data)
- {
-+ struct timer_list *t = &per_cpu(mce_timer, data);
-+
-+ WARN_ON(smp_processor_id() != data);
-+
- if (mce_available(¤t_cpu_data))
- do_machine_check(NULL, 0);
--}
--
--static void mcheck_timer(struct work_struct *work)
--{
-- on_each_cpu(mcheck_check_cpu, NULL, 1);
-
- /*
- * Alert userspace if needed. If we logged an MCE, reduce the
-@@ -377,7 +376,8 @@ static void mcheck_timer(struct work_str
- (int)round_jiffies_relative(check_interval*HZ));
- }
-
-- schedule_delayed_work(&mcheck_work, next_interval);
-+ t->expires = jiffies + next_interval;
-+ add_timer(t);
- }
-
- /*
-@@ -425,16 +425,11 @@ static struct notifier_block mce_idle_no
-
- static __init int periodic_mcheck_init(void)
- {
-- next_interval = check_interval * HZ;
-- if (next_interval)
-- schedule_delayed_work(&mcheck_work,
-- round_jiffies_relative(next_interval));
-- idle_notifier_register(&mce_idle_notifier);
-- return 0;
-+ idle_notifier_register(&mce_idle_notifier);
-+ return 0;
- }
- __initcall(periodic_mcheck_init);
-
--
- /*
- * Initialize Machine Checks for a CPU.
- */
-@@ -504,6 +499,20 @@ static void __cpuinit mce_cpu_features(s
- }
- }
-
-+static void mce_init_timer(void)
-+{
-+ struct timer_list *t = &__get_cpu_var(mce_timer);
-+
-+ /* data race harmless because everyone sets to the same value */
-+ if (!next_interval)
-+ next_interval = check_interval * HZ;
-+ if (!next_interval)
-+ return;
-+ setup_timer(t, mcheck_timer, smp_processor_id());
-+ t->expires = round_jiffies_relative(jiffies + next_interval);
-+ add_timer(t);
-+}
-+
- /*
- * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off.
-@@ -521,6 +530,7 @@ void __cpuinit mcheck_init(struct cpuinf
-
- mce_init(NULL);
- mce_cpu_features(c);
-+ mce_init_timer();
- }
-
- /*
-@@ -740,17 +750,19 @@ static int mce_resume(struct sys_device
- return 0;
- }
-
-+static void mce_cpu_restart(void *data)
-+{
-+ del_timer_sync(&__get_cpu_var(mce_timer));
-+ if (mce_available(¤t_cpu_data))
-+ mce_init(NULL);
-+ mce_init_timer();
-+}
-+
- /* Reinit MCEs after user configuration changes */
- static void mce_restart(void)
- {
-- if (next_interval)
-- cancel_delayed_work(&mcheck_work);
-- /* Timer race is harmless here */
-- on_each_cpu(mce_init, NULL, 1);
- next_interval = check_interval * HZ;
-- if (next_interval)
-- schedule_delayed_work(&mcheck_work,
-- round_jiffies_relative(next_interval));
-+ on_each_cpu(mce_cpu_restart, NULL, 1);
- }
-
- static struct sysdev_class mce_sysclass = {
-@@ -879,6 +891,7 @@ static int __cpuinit mce_cpu_callback(st
- unsigned long action, void *hcpu)
- {
- unsigned int cpu = (unsigned long)hcpu;
-+ struct timer_list *t = &per_cpu(mce_timer, cpu);
-
- switch (action) {
- case CPU_ONLINE:
-@@ -893,6 +906,15 @@ static int __cpuinit mce_cpu_callback(st
- threshold_cpu_callback(action, cpu);
- mce_remove_device(cpu);
- break;
-+ case CPU_DOWN_PREPARE:
-+ case CPU_DOWN_PREPARE_FROZEN:
-+ del_timer_sync(t);
-+ break;
-+ case CPU_DOWN_FAILED:
-+ case CPU_DOWN_FAILED_FROZEN:
-+ t->expires = round_jiffies_relative(jiffies + next_interval);
-+ add_timer_on(t, cpu);
-+ break;
- }
- return NOTIFY_OK;
- }