]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Andi Kleen <andi@firstfloor.org> |
2 | Date: Thu, 12 Feb 2009 12:39:29 +0000 (+0100) | |
3 | Subject: x86, mce: switch machine check polling to per CPU timer | |
4 | Patch-mainline: 2.6.30-rc1 | |
5 | Git-commit: 52d168e28bc11dd026b620fe1767cadde5a747cd | |
6 | References: bnc#507557 | |
7 | ||
8 | x86, mce: switch machine check polling to per CPU timer | |
9 | ||
10 | Impact: Higher priority bug fix | |
11 | ||
12 | The machine check poller runs a single timer and then broadcasted an | |
13 | IPI to all CPUs to check them. This leads to unnecessary | |
14 | synchronization between CPUs. The original CPU running the timer has | |
15 | to wait potentially a long time for all other CPUs answering. This is | |
16 | also real time unfriendly and in general inefficient. | |
17 | ||
18 | This was especially a problem on systems with a lot of events where | |
19 | the poller run with a higher frequency after processing some events. | |
20 | There could be more and more CPU time wasted with this, to | |
21 | the point of significantly slowing down machines. | |
22 | ||
23 | The machine check polling is actually fully independent per CPU, so | |
24 | there's no reason to not just do this all with per CPU timers. This | |
25 | patch implements that. | |
26 | ||
27 | Also switch the poller also to use standard timers instead of work | |
28 | queues. It was using work queues to be able to execute a user program | |
29 | on a event, but mce_notify_user() handles this case now with a | |
30 | separate callback. So instead always run the poll code in in a | |
31 | standard per CPU timer, which means that in the common case of not | |
32 | having to execute a trigger there will be less overhead. | |
33 | ||
34 | This allows to clean up the initialization significantly, because | |
35 | standard timers are already up when machine checks get init'ed. No | |
36 | multiple initialization functions. | |
37 | ||
38 | Thanks to Thomas Gleixner for some help. | |
39 | ||
40 | Cc: thockin@google.com | |
41 | v2: Use del_timer_sync() on cpu shutdown and don't try to handle | |
42 | migrated timers. | |
43 | v3: Add WARN_ON for timer running on unexpected CPU | |
44 | ||
45 | Signed-off-by: Andi Kleen <ak@linux.intel.com> | |
46 | Acked-by: Thomas Gleixner <tglx@linutronix.de> | |
47 | Signed-off-by: H. Peter Anvin <hpa@zytor.com> | |
48 | Acked-by: Jeff Mahoney <jeffm@suse.com> | |
49 | --- | |
50 | arch/x86/kernel/cpu/mcheck/mce_64.c | 68 +++++++++++++++++++++++------------- | |
51 | 1 file changed, 45 insertions(+), 23 deletions(-) | |
52 | ||
53 | --- a/arch/x86/kernel/cpu/mcheck/mce_64.c | |
54 | +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |
55 | @@ -353,18 +353,17 @@ void mce_log_therm_throt_event(unsigned | |
56 | ||
57 | static int check_interval = 5 * 60; /* 5 minutes */ | |
58 | static int next_interval; /* in jiffies */ | |
59 | -static void mcheck_timer(struct work_struct *work); | |
60 | -static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); | |
61 | +static void mcheck_timer(unsigned long); | |
62 | +static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
63 | ||
64 | -static void mcheck_check_cpu(void *info) | |
65 | +static void mcheck_timer(unsigned long data) | |
66 | { | |
67 | + struct timer_list *t = &per_cpu(mce_timer, data); | |
68 | + | |
69 | + WARN_ON(smp_processor_id() != data); | |
70 | + | |
71 | if (mce_available(¤t_cpu_data)) | |
72 | do_machine_check(NULL, 0); | |
73 | -} | |
74 | - | |
75 | -static void mcheck_timer(struct work_struct *work) | |
76 | -{ | |
77 | - on_each_cpu(mcheck_check_cpu, NULL, 1); | |
78 | ||
79 | /* | |
80 | * Alert userspace if needed. If we logged an MCE, reduce the | |
81 | @@ -377,7 +376,8 @@ static void mcheck_timer(struct work_str | |
82 | (int)round_jiffies_relative(check_interval*HZ)); | |
83 | } | |
84 | ||
85 | - schedule_delayed_work(&mcheck_work, next_interval); | |
86 | + t->expires = jiffies + next_interval; | |
87 | + add_timer(t); | |
88 | } | |
89 | ||
90 | /* | |
91 | @@ -425,16 +425,11 @@ static struct notifier_block mce_idle_no | |
92 | ||
93 | static __init int periodic_mcheck_init(void) | |
94 | { | |
95 | - next_interval = check_interval * HZ; | |
96 | - if (next_interval) | |
97 | - schedule_delayed_work(&mcheck_work, | |
98 | - round_jiffies_relative(next_interval)); | |
99 | - idle_notifier_register(&mce_idle_notifier); | |
100 | - return 0; | |
101 | + idle_notifier_register(&mce_idle_notifier); | |
102 | + return 0; | |
103 | } | |
104 | __initcall(periodic_mcheck_init); | |
105 | ||
106 | - | |
107 | /* | |
108 | * Initialize Machine Checks for a CPU. | |
109 | */ | |
110 | @@ -504,6 +499,20 @@ static void __cpuinit mce_cpu_features(s | |
111 | } | |
112 | } | |
113 | ||
114 | +static void mce_init_timer(void) | |
115 | +{ | |
116 | + struct timer_list *t = &__get_cpu_var(mce_timer); | |
117 | + | |
118 | + /* data race harmless because everyone sets to the same value */ | |
119 | + if (!next_interval) | |
120 | + next_interval = check_interval * HZ; | |
121 | + if (!next_interval) | |
122 | + return; | |
123 | + setup_timer(t, mcheck_timer, smp_processor_id()); | |
124 | + t->expires = round_jiffies_relative(jiffies + next_interval); | |
125 | + add_timer(t); | |
126 | +} | |
127 | + | |
128 | /* | |
129 | * Called for each booted CPU to set up machine checks. | |
130 | * Must be called with preempt off. | |
131 | @@ -521,6 +530,7 @@ void __cpuinit mcheck_init(struct cpuinf | |
132 | ||
133 | mce_init(NULL); | |
134 | mce_cpu_features(c); | |
135 | + mce_init_timer(); | |
136 | } | |
137 | ||
138 | /* | |
139 | @@ -740,17 +750,19 @@ static int mce_resume(struct sys_device | |
140 | return 0; | |
141 | } | |
142 | ||
143 | +static void mce_cpu_restart(void *data) | |
144 | +{ | |
145 | + del_timer_sync(&__get_cpu_var(mce_timer)); | |
146 | + if (mce_available(¤t_cpu_data)) | |
147 | + mce_init(NULL); | |
148 | + mce_init_timer(); | |
149 | +} | |
150 | + | |
151 | /* Reinit MCEs after user configuration changes */ | |
152 | static void mce_restart(void) | |
153 | { | |
154 | - if (next_interval) | |
155 | - cancel_delayed_work(&mcheck_work); | |
156 | - /* Timer race is harmless here */ | |
157 | - on_each_cpu(mce_init, NULL, 1); | |
158 | next_interval = check_interval * HZ; | |
159 | - if (next_interval) | |
160 | - schedule_delayed_work(&mcheck_work, | |
161 | - round_jiffies_relative(next_interval)); | |
162 | + on_each_cpu(mce_cpu_restart, NULL, 1); | |
163 | } | |
164 | ||
165 | static struct sysdev_class mce_sysclass = { | |
166 | @@ -879,6 +891,7 @@ static int __cpuinit mce_cpu_callback(st | |
167 | unsigned long action, void *hcpu) | |
168 | { | |
169 | unsigned int cpu = (unsigned long)hcpu; | |
170 | + struct timer_list *t = &per_cpu(mce_timer, cpu); | |
171 | ||
172 | switch (action) { | |
173 | case CPU_ONLINE: | |
174 | @@ -893,6 +906,15 @@ static int __cpuinit mce_cpu_callback(st | |
175 | threshold_cpu_callback(action, cpu); | |
176 | mce_remove_device(cpu); | |
177 | break; | |
178 | + case CPU_DOWN_PREPARE: | |
179 | + case CPU_DOWN_PREPARE_FROZEN: | |
180 | + del_timer_sync(t); | |
181 | + break; | |
182 | + case CPU_DOWN_FAILED: | |
183 | + case CPU_DOWN_FAILED_FROZEN: | |
184 | + t->expires = round_jiffies_relative(jiffies + next_interval); | |
185 | + add_timer_on(t, cpu); | |
186 | + break; | |
187 | } | |
188 | return NOTIFY_OK; | |
189 | } |