]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Cliff Wickman <cpw@sgi.com> |
2 | Subject: perfmon2 | |
3 | References: bnc#430298 | |
4 | Patch-mainline: never | |
5 | ||
6 | This is Stephane Eranian's patch | |
7 | from http://perfmon2.sourceforge.net/ | |
8 | but backfitted to the SuSE KOTD for 10/20/2008 | |
9 | ||
10 | [greg's note: I really don't like this, as perfmon2 has been rejected | |
11 | from upstream, and perfmon3 is being worked on. This should be going | |
12 | away for SP1, and no one should count on the userspace interface | |
13 | remaining the same...] | |
14 | ||
15 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
16 | ||
17 | --- | |
18 | Documentation/ABI/testing/sysfs-perfmon | 87 + | |
19 | Documentation/ABI/testing/sysfs-perfmon-fmt | 18 | |
20 | Documentation/ABI/testing/sysfs-perfmon-pmu | 46 | |
21 | Documentation/kernel-parameters.txt | 3 | |
22 | Documentation/perfmon2-debugfs.txt | 126 ++ | |
23 | Documentation/perfmon2.txt | 213 +++ | |
24 | MAINTAINERS | 8 | |
25 | Makefile | 1 | |
26 | arch/ia64/Kconfig | 10 | |
27 | arch/ia64/Makefile | 1 | |
28 | arch/ia64/configs/generic_defconfig | 11 | |
29 | arch/ia64/include/asm/Kbuild | 4 | |
30 | arch/ia64/include/asm/hw_irq.h | 2 | |
31 | arch/ia64/include/asm/perfmon.h | 302 ----- | |
32 | arch/ia64/include/asm/perfmon_compat.h | 167 +++ | |
33 | arch/ia64/include/asm/perfmon_default_smpl.h | 121 +- | |
34 | arch/ia64/include/asm/perfmon_kern.h | 356 ++++++ | |
35 | arch/ia64/include/asm/processor.h | 10 | |
36 | arch/ia64/include/asm/system.h | 18 | |
37 | arch/ia64/include/asm/thread_info.h | 4 | |
38 | arch/ia64/include/asm/unistd.h | 14 | |
39 | arch/ia64/kernel/Makefile | 3 | |
40 | arch/ia64/kernel/entry.S | 12 | |
41 | arch/ia64/kernel/irq_ia64.c | 7 | |
42 | arch/ia64/kernel/perfmon_default_smpl.c | 296 ----- | |
43 | arch/ia64/kernel/perfmon_generic.h | 45 | |
44 | arch/ia64/kernel/perfmon_itanium.h | 115 -- | |
45 | arch/ia64/kernel/perfmon_mckinley.h | 187 --- | |
46 | arch/ia64/kernel/perfmon_montecito.h | 269 ----- | |
47 | arch/ia64/kernel/process.c | 98 - | |
48 | arch/ia64/kernel/ptrace.c | 8 | |
49 | arch/ia64/kernel/setup.c | 3 | |
50 | arch/ia64/kernel/smpboot.c | 10 | |
51 | arch/ia64/kernel/sys_ia64.c | 8 | |
52 | arch/ia64/lib/Makefile | 1 | |
53 | arch/ia64/oprofile/init.c | 8 | |
54 | arch/ia64/oprofile/perfmon.c | 39 | |
55 | arch/ia64/perfmon/Kconfig | 67 + | |
56 | arch/ia64/perfmon/Makefile | 11 | |
57 | arch/ia64/perfmon/perfmon.c | 946 +++++++++++++++++ | |
58 | arch/ia64/perfmon/perfmon_compat.c | 1210 ++++++++++++++++++++++ | |
59 | arch/ia64/perfmon/perfmon_default_smpl.c | 273 +++++ | |
60 | arch/ia64/perfmon/perfmon_generic.c | 148 ++ | |
61 | arch/ia64/perfmon/perfmon_itanium.c | 232 ++++ | |
62 | arch/ia64/perfmon/perfmon_mckinley.c | 290 +++++ | |
63 | arch/ia64/perfmon/perfmon_montecito.c | 412 +++++++ | |
64 | arch/mips/Kconfig | 2 | |
65 | arch/mips/Makefile | 6 | |
66 | arch/mips/kernel/process.c | 4 | |
67 | arch/mips/kernel/scall32-o32.S | 12 | |
68 | arch/mips/kernel/scall64-64.S | 12 | |
69 | arch/mips/kernel/scall64-n32.S | 16 | |
70 | arch/mips/kernel/scall64-o32.S | 12 | |
71 | arch/mips/kernel/signal.c | 6 | |
72 | arch/mips/kernel/time.c | 3 | |
73 | arch/mips/kernel/traps.c | 16 | |
74 | arch/mips/mti-malta/malta-time.c | 1 | |
75 | arch/mips/perfmon/Kconfig | 61 + | |
76 | arch/mips/perfmon/Makefile | 2 | |
77 | arch/mips/perfmon/perfmon.c | 313 +++++ | |
78 | arch/mips/perfmon/perfmon_mips64.c | 218 ++++ | |
79 | arch/powerpc/Kconfig | 2 | |
80 | arch/powerpc/Makefile | 1 | |
81 | arch/powerpc/include/asm/Kbuild | 1 | |
82 | arch/powerpc/include/asm/cell-pmu.h | 5 | |
83 | arch/powerpc/include/asm/cell-regs.h | 30 | |
84 | arch/powerpc/include/asm/paca.h | 4 | |
85 | arch/powerpc/include/asm/perfmon.h | 33 | |
86 | arch/powerpc/include/asm/perfmon_kern.h | 390 +++++++ | |
87 | arch/powerpc/include/asm/reg.h | 1 | |
88 | arch/powerpc/include/asm/systbl.h | 12 | |
89 | arch/powerpc/include/asm/thread_info.h | 4 | |
90 | arch/powerpc/include/asm/unistd.h | 14 | |
91 | arch/powerpc/kernel/entry_32.S | 2 | |
92 | arch/powerpc/kernel/entry_64.S | 4 | |
93 | arch/powerpc/kernel/irq.c | 31 | |
94 | arch/powerpc/kernel/process.c | 10 | |
95 | arch/powerpc/perfmon/Kconfig | 67 + | |
96 | arch/powerpc/perfmon/Makefile | 6 | |
97 | arch/powerpc/perfmon/perfmon.c | 334 ++++++ | |
98 | arch/powerpc/perfmon/perfmon_cell.c | 1449 +++++++++++++++++++++++++++ | |
99 | arch/powerpc/perfmon/perfmon_power4.c | 309 +++++ | |
100 | arch/powerpc/perfmon/perfmon_power5.c | 326 ++++++ | |
101 | arch/powerpc/perfmon/perfmon_power6.c | 520 +++++++++ | |
102 | arch/powerpc/perfmon/perfmon_ppc32.c | 340 ++++++ | |
103 | arch/powerpc/platforms/cell/cbe_regs.c | 27 | |
104 | arch/sparc/include/asm/hypervisor.h | 24 | |
105 | arch/sparc/include/asm/irq_64.h | 3 | |
106 | arch/sparc/include/asm/perfmon.h | 11 | |
107 | arch/sparc/include/asm/perfmon_kern.h | 286 +++++ | |
108 | arch/sparc/include/asm/system_64.h | 34 | |
109 | arch/sparc/include/asm/thread_info_64.h | 28 | |
110 | arch/sparc/include/asm/unistd_32.h | 14 | |
111 | arch/sparc/include/asm/unistd_64.h | 14 | |
112 | arch/sparc/kernel/systbls.S | 4 | |
113 | arch/sparc64/Kconfig | 2 | |
114 | arch/sparc64/Makefile | 2 | |
115 | arch/sparc64/kernel/cpu.c | 47 | |
116 | arch/sparc64/kernel/hvcalls.S | 41 | |
117 | arch/sparc64/kernel/irq.c | 63 + | |
118 | arch/sparc64/kernel/process.c | 26 | |
119 | arch/sparc64/kernel/rtrap.S | 51 | |
120 | arch/sparc64/kernel/setup.c | 2 | |
121 | arch/sparc64/kernel/signal.c | 4 | |
122 | arch/sparc64/kernel/sys_sparc.c | 101 - | |
123 | arch/sparc64/kernel/syscalls.S | 23 | |
124 | arch/sparc64/kernel/systbls.S | 8 | |
125 | arch/sparc64/kernel/traps.c | 158 +- | |
126 | arch/sparc64/kernel/ttable.S | 2 | |
127 | arch/sparc64/perfmon/Kconfig | 26 | |
128 | arch/sparc64/perfmon/Makefile | 1 | |
129 | arch/sparc64/perfmon/perfmon.c | 422 +++++++ | |
130 | arch/x86/Kconfig | 2 | |
131 | arch/x86/Makefile | 2 | |
132 | arch/x86/ia32/ia32entry.S | 12 | |
133 | arch/x86/kernel/apic_32.c | 5 | |
134 | arch/x86/kernel/apic_64.c | 1 | |
135 | arch/x86/kernel/cpu/common.c | 3 | |
136 | arch/x86/kernel/entry_32.S | 2 | |
137 | arch/x86/kernel/entry_64.S | 8 | |
138 | arch/x86/kernel/irqinit_64.c | 5 | |
139 | arch/x86/kernel/process_32.c | 10 | |
140 | arch/x86/kernel/process_64.c | 10 | |
141 | arch/x86/kernel/signal_32.c | 5 | |
142 | arch/x86/kernel/signal_64.c | 6 | |
143 | arch/x86/kernel/smpboot.c | 2 | |
144 | arch/x86/kernel/syscall_table_32.S | 12 | |
145 | arch/x86/oprofile/nmi_int.c | 10 | |
146 | arch/x86/perfmon/Kconfig | 89 + | |
147 | arch/x86/perfmon/Makefile | 13 | |
148 | arch/x86/perfmon/perfmon.c | 761 ++++++++++++++ | |
149 | arch/x86/perfmon/perfmon_amd64.c | 754 ++++++++++++++ | |
150 | arch/x86/perfmon/perfmon_intel_arch.c | 610 +++++++++++ | |
151 | arch/x86/perfmon/perfmon_intel_atom.c | 541 ++++++++++ | |
152 | arch/x86/perfmon/perfmon_intel_core.c | 449 ++++++++ | |
153 | arch/x86/perfmon/perfmon_p4.c | 913 +++++++++++++++++ | |
154 | arch/x86/perfmon/perfmon_p6.c | 310 +++++ | |
155 | arch/x86/perfmon/perfmon_pebs_core_smpl.c | 256 ++++ | |
156 | arch/x86/perfmon/perfmon_pebs_p4_smpl.c | 253 ++++ | |
157 | include/asm-mips/Kbuild | 1 | |
158 | include/asm-mips/perfmon.h | 34 | |
159 | include/asm-mips/perfmon_kern.h | 412 +++++++ | |
160 | include/asm-mips/system.h | 4 | |
161 | include/asm-mips/thread_info.h | 4 | |
162 | include/asm-mips/unistd.h | 46 | |
163 | include/asm-x86/Kbuild | 1 | |
164 | include/asm-x86/ia32_unistd.h | 13 | |
165 | include/asm-x86/irq_vectors.h | 5 | |
166 | include/asm-x86/mach-default/entry_arch.h | 4 | |
167 | include/asm-x86/perfmon.h | 34 | |
168 | include/asm-x86/perfmon_kern.h | 548 ++++++++++ | |
169 | include/asm-x86/perfmon_pebs_core_smpl.h | 164 +++ | |
170 | include/asm-x86/perfmon_pebs_p4_smpl.h | 193 +++ | |
171 | include/asm-x86/thread_info.h | 8 | |
172 | include/asm-x86/unistd_32.h | 14 | |
173 | include/asm-x86/unistd_64.h | 25 | |
174 | include/linux/Kbuild | 2 | |
175 | include/linux/perfmon.h | 213 +++ | |
176 | include/linux/perfmon_dfl_smpl.h | 78 + | |
177 | include/linux/perfmon_fmt.h | 74 + | |
178 | include/linux/perfmon_kern.h | 551 ++++++++++ | |
179 | include/linux/perfmon_pmu.h | 192 +++ | |
180 | include/linux/sched.h | 4 | |
181 | include/linux/syscalls.h | 30 | |
182 | kernel/sched.c | 1 | |
183 | kernel/sys_ni.c | 13 | |
184 | perfmon/Makefile | 12 | |
185 | perfmon/perfmon_activate.c | 265 ++++ | |
186 | perfmon/perfmon_attach.c | 474 ++++++++ | |
187 | perfmon/perfmon_ctx.c | 314 +++++ | |
188 | perfmon/perfmon_ctxsw.c | 342 ++++++ | |
189 | perfmon/perfmon_debugfs.c | 168 +++ | |
190 | perfmon/perfmon_dfl_smpl.c | 298 +++++ | |
191 | perfmon/perfmon_file.c | 751 +++++++++++++ | |
192 | perfmon/perfmon_fmt.c | 219 ++++ | |
193 | perfmon/perfmon_hotplug.c | 151 ++ | |
194 | perfmon/perfmon_init.c | 131 ++ | |
195 | perfmon/perfmon_intr.c | 648 ++++++++++++ | |
196 | perfmon/perfmon_msg.c | 229 ++++ | |
197 | perfmon/perfmon_pmu.c | 590 ++++++++++ | |
198 | perfmon/perfmon_priv.h | 182 +++ | |
199 | perfmon/perfmon_res.c | 450 ++++++++ | |
200 | perfmon/perfmon_rw.c | 733 +++++++++++++ | |
201 | perfmon/perfmon_sets.c | 873 ++++++++++++++++ | |
202 | perfmon/perfmon_smpl.c | 865 ++++++++++++++++ | |
203 | perfmon/perfmon_syscalls.c | 1060 +++++++++++++++++++ | |
204 | perfmon/perfmon_sysfs.c | 525 +++++++++ | |
205 | 187 files changed, 27484 insertions(+), 1731 deletions(-) | |
206 | ||
207 | --- /dev/null | |
208 | +++ b/Documentation/ABI/testing/sysfs-perfmon | |
209 | @@ -0,0 +1,87 @@ | |
210 | +What: /sys/kernel/perfmon | |
211 | +Date: Nov 2007 | |
212 | +KernelVersion: 2.6.24 | |
213 | +Contact: eranian@gmail.com | |
214 | + | |
215 | +Description: provide the configuration interface for the perfmon2 subsystems. | |
216 | + The tree contains information about the detected hardware, current | |
217 | + state of the subsystem as well as some configuration parameters. | |
218 | + | |
219 | + The tree consists of the following entries: | |
220 | + | |
221 | + /sys/kernel/perfmon/debug (read-write): | |
222 | + | |
223 | + Enable perfmon2 debugging output via klogd. Debug messages produced during | |
224 | + PMU interrupt handling are not controlled by this entry. The traces a rate-limited | |
225 | + to avoid flooding of the console. It is possible to change the throttling | |
226 | + via /proc/sys/kernel/printk_ratelimit. The value is interpreted as a bitmask. | |
227 | + Each bit enables a particular type of debug messages. Refer to the file | |
228 | + include/linux/perfmon_kern.h for more information | |
229 | + | |
230 | + /sys/kernel/perfmon/pmc_max_fast_arg (read-only): | |
231 | + | |
232 | + Number of perfmon2 syscall arguments copied directly onto the | |
233 | + stack (copy_from_user) for pfm_write_pmcs(). Copying to the stack avoids | |
234 | + having to allocate a buffer. The unit is the number of pfarg_pmc_t | |
235 | + structures. | |
236 | + | |
237 | + /sys/kernel/perfmon/pmd_max_fast_arg (read-only): | |
238 | + | |
239 | + Number of perfmon2 syscall arguments copied directly onto the | |
240 | + stack (copy_from_user) for pfm_write_pmds()/pfm_read_pmds(). Copying | |
241 | + to the stack avoids having to allocate a buffer. The unit is the number | |
242 | + of pfarg_pmd_t structures. | |
243 | + | |
244 | + | |
245 | + /sys/kernel/perfmon/reset_stats (write-only): | |
246 | + | |
247 | + Reset the statistics collected by perfmon2. Stats are available | |
248 | + per-cpu via debugfs. | |
249 | + | |
250 | + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only): | |
251 | + | |
252 | + Reports the amount of memory currently dedicated to sampling | |
253 | + buffers by the kernel. The unit is byte. | |
254 | + | |
255 | + /sys/kernel/perfmon/smpl_buffer_mem_max (read-write): | |
256 | + | |
257 | + Maximum amount of kernel memory usable for sampling buffers. -1 means | |
258 | + everything that is available. Unit is byte. | |
259 | + | |
260 | + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only): | |
261 | + | |
262 | + Current utilization of kernel memory in bytes. | |
263 | + | |
264 | + /sys/kernel/perfmon/sys_group (read-write): | |
265 | + | |
266 | + Users group allowed to create a system-wide perfmon2 context (session). | |
267 | + -1 means any group. This control will be kept until we find a package | |
268 | + able to control capabilities via PAM. | |
269 | + | |
270 | + /sys/kernel/perfmon/task_group (read-write): | |
271 | + | |
272 | + Users group allowed to create a per-thread context (session). | |
273 | + -1 means any group. This control will be kept until we find a | |
274 | + package able to control capabilities via PAM. | |
275 | + | |
276 | + /sys/kernel/perfmon/sys_sessions_count (read-only): | |
277 | + | |
278 | + Number of system-wide contexts currently attached to CPUs. | |
279 | + | |
280 | + /sys/kernel/perfmon/task_sessions_count (read-only): | |
281 | + | |
282 | + Number of per-thread contexts currently attached to threads. | |
283 | + | |
284 | + /sys/kernel/perfmon/version (read-only): | |
285 | + | |
286 | + Perfmon2 interface revision number. | |
287 | + | |
288 | + /sys/kernel/perfmon/arg_mem_max(read-write): | |
289 | + | |
290 | + Maximum size of vector arguments expressed in bytes. Can be modified | |
291 | + | |
292 | + /sys/kernel/perfmon/mode(read-write): | |
293 | + | |
294 | + Bitmask to enable/disable certain perfmon2 features. | |
295 | + Currently defined: | |
296 | + - bit 0: if set, then reserved bitfield are ignored on PMC writes | |
297 | --- /dev/null | |
298 | +++ b/Documentation/ABI/testing/sysfs-perfmon-fmt | |
299 | @@ -0,0 +1,18 @@ | |
300 | +What: /sys/kernel/perfmon/formats | |
301 | +Date: 2007 | |
302 | +KernelVersion: 2.6.24 | |
303 | +Contact: eranian@gmail.com | |
304 | + | |
305 | +Description: provide description of available perfmon2 custom sampling buffer formats | |
306 | + which are implemented as independent kernel modules. Each formats gets | |
307 | + a subdir which a few entries. | |
308 | + | |
309 | + The name of the subdir is the name of the sampling format. The same name | |
310 | + must be passed to pfm_create_context() to use the format. | |
311 | + | |
312 | + Each subdir XX contains the following entries: | |
313 | + | |
314 | + /sys/kernel/perfmon/formats/XX/version (read-only): | |
315 | + | |
316 | + Version number of the format in clear text and null terminated. | |
317 | + | |
318 | --- /dev/null | |
319 | +++ b/Documentation/ABI/testing/sysfs-perfmon-pmu | |
320 | @@ -0,0 +1,46 @@ | |
321 | +What: /sys/kernel/perfmon/pmu | |
322 | +Date: Nov 2007 | |
323 | +KernelVersion: 2.6.24 | |
324 | +Contact: eranian@gmail.com | |
325 | + | |
326 | +Description: provide information about the currently loaded PMU description module. | |
327 | + The module contains the mapping of the actual performance counter registers | |
328 | + onto the logical PMU exposed by perfmon. There is at most one PMU description | |
329 | + module loaded at any time. | |
330 | + | |
331 | + The sysfs PMU tree provides a description of the mapping for each register. | |
332 | + There is one subdir per config and data registers along an entry for the | |
333 | + name of the PMU model. | |
334 | + | |
335 | + The model entry is as follows: | |
336 | + | |
337 | + /sys/kernel/perfmon/pmu_desc/model (read-only): | |
338 | + | |
339 | + Name of the PMU model is clear text and zero terminated. | |
340 | + | |
341 | + Then for each logical PMU register, XX, gets a subtree with the following entries: | |
342 | + | |
343 | + /sys/kernel/perfmon/pmu_desc/pm*XX/addr (read-only): | |
344 | + | |
345 | + The physical address or index of the actual underlying hardware register. | |
346 | + On Itanium, it corresponds to the index. But on X86 processor, this is | |
347 | + the actual MSR address. | |
348 | + | |
349 | + /sys/kernel/perfmon/pmu_desc/pm*XX/dfl_val (read-only): | |
350 | + | |
351 | + The default value of the register in hexadecimal. | |
352 | + | |
353 | + /sys/kernel/perfmon/pmu_desc/pm*XX/name (read-only): | |
354 | + | |
355 | + The name of the hardware register. | |
356 | + | |
357 | + /sys/kernel/perfmon/pmu_desc/pm*XX/rsvd_msk (read-only): | |
358 | + | |
359 | + The bitmask of reserved bits, i.e., bits which cannot be changed by | |
360 | + applications. When a bit is set, it means the corresponding bit in the | |
361 | + actual register is reserved. | |
362 | + | |
363 | + /sys/kernel/perfmon/pmu_desc/pm*XX/width (read-only): | |
364 | + | |
365 | + the width in bits of the registers. This field is only relevant for counter | |
366 | + registers. | |
367 | --- a/Documentation/kernel-parameters.txt | |
368 | +++ b/Documentation/kernel-parameters.txt | |
369 | @@ -1698,6 +1698,9 @@ and is between 256 and 4096 characters. | |
370 | Format: { 0 | 1 } | |
371 | See arch/parisc/kernel/pdc_chassis.c | |
372 | ||
373 | + perfmon_debug [PERFMON] Enables Perfmon debug messages. Needed | |
374 | + to see traces of the early startup startup phase. | |
375 | + | |
376 | pf. [PARIDE] | |
377 | See Documentation/paride.txt. | |
378 | ||
379 | --- /dev/null | |
380 | +++ b/Documentation/perfmon2-debugfs.txt | |
381 | @@ -0,0 +1,126 @@ | |
382 | + The perfmon2 debug and statistics interface | |
383 | + ------------------------------------------ | |
384 | + Stephane Eranian | |
385 | + <eranian@gmail.com> | |
386 | + | |
387 | +The perfmon2 interfaces exports a set of statistics which are used to tune and | |
388 | +debug the implementation. The data is composed of a set of very simple metrics | |
389 | +mostly aggregated counts and durations. They instruments key points in the | |
390 | +perfmon2 code, such as context switch and interrupt handling. | |
391 | + | |
392 | +The data is accessible via the debug filesystem (debugfs). Thus you need to | |
393 | +have the filesystem support enabled in your kernel. Furthermore since, 2.6.25, | |
394 | +the perfmon2 statistics interface is an optional component. It needs to be | |
395 | +explicitely enabled in the kernel config file (CONFIG_PERFMON_DEBUG_FS). | |
396 | + | |
397 | +To access the data, the debugs filesystem must be mounted. Supposing the mount | |
398 | +point is /debugfs, you would need to do: | |
399 | + $ mount -t debugs none /debugfs | |
400 | + | |
401 | +The data is located under the perfmon subdirectory and is organized per CPU. | |
402 | +For each CPU, the same set of metrics is available, one metric per file in | |
403 | +clear ASCII text. | |
404 | + | |
405 | +The metrics are as follows: | |
406 | + | |
407 | + ctxswin_count (read-only): | |
408 | + | |
409 | + Number of PMU context switch in. | |
410 | + | |
411 | + ctxswin_ns (read-only): | |
412 | + | |
413 | + Number of nanoseconds spent in the PMU context switch in | |
414 | + routine. Dividing this number by the value of ctxswin_count, | |
415 | + yields average cost of the PMU context switch in. | |
416 | + | |
417 | + ctxswout_count (read-only): | |
418 | + | |
419 | + Number of PMU context switch out. | |
420 | + | |
421 | + ctxswout_ns (read-only): | |
422 | + | |
423 | + Number of nanoseconds spent in the PMU context switch in | |
424 | + routine. Dividing this number by the value of ctxswout_count, | |
425 | + yields average cost of the PMU context switch out. | |
426 | + | |
427 | + fmt_handler_calls (read-only): | |
428 | + | |
429 | + Number of calls to the sampling format routine that handles | |
430 | + PMU interrupts, i.e., typically the routine that records a | |
431 | + sample. | |
432 | + | |
433 | + fmt_handler_ns (read-only): | |
434 | + | |
435 | + Number of nanoseconds spent in the routine that handle PMU | |
436 | + interrupt in the sampling format. Dividing this number by | |
437 | + the number of calls provided by fmt_handler_calls, yields | |
438 | + average time spent in this routine. | |
439 | + | |
440 | + ovfl_intr_all_count (read-only): | |
441 | + | |
442 | + Number of PMU interrupts received by the kernel. | |
443 | + | |
444 | + | |
445 | + ovfl_intr_nmi_count (read-only): | |
446 | + | |
447 | + Number of Non Maskeable Interrupts (NMI) received by the kernel | |
448 | + for perfmon. This is relevant only on X86 hardware. | |
449 | + | |
450 | + ovfl_intr_ns (read-only): | |
451 | + | |
452 | + Number of nanoseconds spent in the perfmon2 PMU interrupt | |
453 | + handler routine. Dividing this number of ovfl_intr_all_count | |
454 | + yields the average time to handle one PMU interrupt. | |
455 | + | |
456 | + ovfl_intr_regular_count (read-only): | |
457 | + | |
458 | + Number of PMU interrupts which are actually processed by | |
459 | + the perfmon interrupt handler. There may be spurious or replay | |
460 | + interrupts. | |
461 | + | |
462 | + ovfl_intr_replay_count (read-only): | |
463 | + | |
464 | + Number of PMU interrupts which were replayed on context switch | |
465 | + in or on event set switching. Interrupts get replayed when they | |
466 | + were in flight at the time monitoring had to be stopped. | |
467 | + | |
468 | + perfmon/ovfl_intr_spurious_count (read-only): | |
469 | + | |
470 | + Number of PMU interrupts which were dropped because there was | |
471 | + no active context (session). | |
472 | + | |
473 | + ovfl_notify_count (read-only): | |
474 | + | |
475 | + Number of user level notifications sent. Notifications are | |
476 | + appended as messages to the context queue. Notifications may | |
477 | + be sent on PMU interrupts. | |
478 | + | |
479 | + pfm_restart_count (read-only): | |
480 | + | |
481 | + Number of times pfm_restart() is called. | |
482 | + | |
483 | + reset_pmds_count (read-only): | |
484 | + | |
485 | + Number of times pfm_reset_pmds() is called. | |
486 | + | |
487 | + set_switch_count (read-only): | |
488 | + | |
489 | + Number of event set switches. | |
490 | + | |
491 | + set_switch_ns (read-only): | |
492 | + | |
493 | + Number of nanoseconds spent in the set switching routine. | |
494 | + Dividing this number by set_switch_count yields the average | |
495 | + cost of switching sets. | |
496 | + | |
497 | + handle_timeout_count (read-only): | |
498 | + | |
499 | + Number of times the pfm_handle_timeout() routine is called. | |
500 | + It is used for timeout-based set switching. | |
501 | + | |
502 | + handle_work_count (read-only): | |
503 | + | |
504 | + Number of times pfm_handle_work() is called. The routine | |
505 | + handles asynchronous perfmon2 work for per-thread contexts | |
506 | + (sessions). | |
507 | + | |
508 | --- /dev/null | |
509 | +++ b/Documentation/perfmon2.txt | |
510 | @@ -0,0 +1,213 @@ | |
511 | + The perfmon2 hardware monitoring interface | |
512 | + ------------------------------------------ | |
513 | + Stephane Eranian | |
514 | + <eranian@gmail.com> | |
515 | + | |
516 | +I/ Introduction | |
517 | + | |
518 | + The perfmon2 interface provides access to the hardware performance counters of | |
519 | + major processors. Nowadays, all processors implement some flavors of performance | |
520 | + counters which capture micro-architectural level information such as the number | |
521 | + of elapsed cycles, number of cache misses, and so on. | |
522 | + | |
523 | + The interface is implemented as a set of new system calls and a set of config files | |
524 | + in /sys. | |
525 | + | |
526 | + It is possible to monitoring a single thread or a CPU. In either mode, applications | |
527 | + can count or collect samples. System-wide monitoring is supported by running a | |
528 | + monitoring session on each CPU. The interface support event-based sampling where the | |
529 | + sampling period is expressed as the number of occurrences of event, instead of just a | |
530 | + timeout. This approach provides a much better granularity and flexibility. | |
531 | + | |
532 | + For performance reason, it is possible to use a kernel-level sampling buffer to minimize | |
533 | + the overhead incurred by sampling. The format of the buffer, i.e., what is recorded, how | |
534 | + it is recorded, and how it is exported to user-land is controlled by a kernel module called | |
535 | + a custom sampling format. The current implementation comes with a default format but | |
536 | + it is possible to create additional formats. There is an in-kernel registration | |
537 | + interface for formats. Each format is identified by a simple string which a tool | |
538 | + can pass when a monitoring session is created. | |
539 | + | |
540 | + The interface also provides support for event set and multiplexing to work around | |
541 | + hardware limitations in the number of available counters or in how events can be | |
542 | + combined. Each set defines as many counters as the hardware can support. The kernel | |
543 | + then multiplexes the sets. The interface supports time-base switching but also | |
544 | + overflow based switching, i.e., after n overflows of designated counters. | |
545 | + | |
546 | + Applications never manipulates the actual performance counter registers. Instead they see | |
547 | + a logical Performance Monitoring Unit (PMU) composed of a set of config register (PMC) | |
548 | + and a set of data registers (PMD). Note that PMD are not necessarily counters, they | |
549 | + can be buffers. The logical PMU is then mapped onto the actual PMU using a mapping | |
550 | + table which is implemented as a kernel module. The mapping is chosen once for each | |
551 | + new processor. It is visible in /sys/kernel/perfmon/pmu_desc. The kernel module | |
552 | + is automatically loaded on first use. | |
553 | + | |
554 | + A monitoring session, or context, is uniquely identified by a file descriptor | |
555 | + obtained when the context is created. File sharing semantics apply to access | |
556 | + the context inside a process. A context is never inherited across fork. The file | |
557 | + descriptor can be used to received counter overflow notifications or when the | |
558 | + sampling buffer is full. It is possible to use poll/select on the descriptor | |
559 | + to wait for notifications from multiplex contexts. Similarly, the descriptor | |
560 | + supports asynchronous notification via SIGIO. | |
561 | + | |
562 | + Counters are always exported as being 64-bit wide regardless of what the underlying | |
563 | + hardware implements. | |
564 | + | |
565 | +II/ Kernel compilation | |
566 | + | |
567 | + To enable perfmon2, you need to enable CONFIG_PERFMON | |
568 | + | |
569 | +III/ OProfile interactions | |
570 | + | |
571 | + The set of features offered by perfmon2 is rich enough to support migrating | |
572 | + Oprofile on top of it. That means that PMU programming and low-level interrupt | |
573 | + handling could be done by perfmon2. The Oprofile sampling buffer management code | |
574 | + in the kernel as well as how samples are exported to users could remain through | |
575 | + the use of a custom sampling buffer format. This is how Oprofile work on Itanium. | |
576 | + | |
577 | + The current interactions with Oprofile are: | |
578 | + - on X86: Both subsystems can be compiled into the same kernel. There is enforced | |
579 | + mutual exclusion between the two subsystems. When there is an Oprofile | |
580 | + session, no perfmon2 session can exist and vice-versa. Perfmon2 session | |
581 | + encapsulates both per-thread and system-wide sessions here. | |
582 | + | |
583 | + - On IA-64: Oprofile works on top of perfmon2. Oprofile being a system-wide monitoring | |
584 | + tool, the regular per-thread vs. system-wide session restrictions apply. | |
585 | + | |
586 | + - on PPC: no integration yet. You need to enable/disble one of the two subsystems | |
587 | + - on MIPS: no integration yet. You need to enable/disble one of the two subsystems | |
588 | + | |
589 | +IV/ User tools | |
590 | + | |
591 | + We have released a simple monitoring tool to demonstrate the feature of the | |
592 | + interface. The tool is called pfmon and it comes with a simple helper library | |
593 | + called libpfm. The library comes with a set of examples to show how to use the | |
594 | + kernel perfmon2 interface. Visit http://perfmon2.sf.net for details. | |
595 | + | |
596 | + There maybe other tools available for perfmon2. | |
597 | + | |
598 | +V/ How to program? | |
599 | + | |
600 | + The best way to learn how to program perfmon2, is to take a look at the source | |
601 | + code for the examples in libpfm. The source code is available from: | |
602 | + http://perfmon2.sf.net | |
603 | + | |
604 | +VI/ System calls overview | |
605 | + | |
606 | + The interface is implemented by the following system calls: | |
607 | + | |
608 | + * int pfm_create_context(pfarg_ctx_t *ctx, char *fmt, void *arg, size_t arg_size) | |
609 | + | |
610 | + This function create a perfmon2 context. The type of context is per-thread by | |
611 | + default unless PFM_FL_SYSTEM_WIDE is passed in ctx. The sampling format name | |
612 | + is passed in fmt. Arguments to the format are passed in arg which is of size | |
613 | + arg_size. Upon successful return, the file descriptor identifying the context | |
614 | + is returned. | |
615 | + | |
616 | + * int pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int n) | |
617 | + | |
618 | + This function is used to program the PMD registers. It is possible to pass | |
619 | + vectors of PMDs. | |
620 | + | |
621 | + * int pfm_write_pmcs(int fd, pfarg_pmc_t *pmds, int n) | |
622 | + | |
623 | + This function is used to program the PMC registers. It is possible to pass | |
624 | + vectors of PMDs. | |
625 | + | |
626 | + * int pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int n) | |
627 | + | |
628 | + This function is used to read the PMD registers. It is possible to pass | |
629 | + vectors of PMDs. | |
630 | + | |
631 | + * int pfm_load_context(int fd, pfarg_load_t *load) | |
632 | + | |
633 | + This function is used to attach the context to a thread or CPU. | |
634 | + Thread means kernel-visible thread (NPTL). The thread identification | |
635 | + as obtained by gettid must be passed to load->load_target. | |
636 | + | |
637 | + To operate on another thread (not self), it is mandatory that the thread | |
638 | + be stopped via ptrace(). | |
639 | + | |
640 | + To attach to a CPU, the CPU number must be specified in load->load_target | |
641 | + AND the call must be issued on that CPU. To monitor a CPU, a thread MUST | |
642 | + be pinned on that CPU. | |
643 | + | |
644 | + Until the context is attached, the actual counters are not accessed. | |
645 | + | |
646 | + * int pfm_unload_context(int fd) | |
647 | + | |
648 | + The context is detached for the thread or CPU is was attached to. | |
649 | + As a consequence monitoring is stopped. | |
650 | + | |
651 | + When monitoring another thread, the thread MUST be stopped via ptrace() | |
652 | + for this function to succeed. | |
653 | + | |
654 | + * int pfm_start(int fd, pfarg_start_t *st) | |
655 | + | |
656 | + Start monitoring. The context must be attached for this function to succeed. | |
657 | + Optionally, it is possible to specify the event set on which to start using the | |
658 | + st argument, otherwise just pass NULL. | |
659 | + | |
660 | + When monitoring another thread, the thread MUST be stopped via ptrace() | |
661 | + for this function to succeed. | |
662 | + | |
663 | + * int pfm_stop(int fd) | |
664 | + | |
665 | + Stop monitoring. The context must be attached for this function to succeed. | |
666 | + | |
667 | + When monitoring another thread, the thread MUST be stopped via ptrace() | |
668 | + for this function to succeed. | |
669 | + | |
670 | + | |
671 | + * int pfm_create_evtsets(int fd, pfarg_setdesc_t *sets, int n) | |
672 | + | |
673 | + This function is used to create or change event sets. By default set 0 exists. | |
674 | + It is possible to create/change multiple sets in one call. | |
675 | + | |
676 | + The context must be detached for this call to succeed. | |
677 | + | |
678 | + Sets are identified by a 16-bit integer. They are sorted based on this | |
679 | + set and switching occurs in a round-robin fashion. | |
680 | + | |
681 | + * int pfm_delete_evtsets(int fd, pfarg_setdesc_t *sets, int n) | |
682 | + | |
683 | + Delete event sets. The context must be detached for this call to succeed. | |
684 | + | |
685 | + | |
686 | + * int pfm_getinfo_evtsets(int fd, pfarg_setinfo_t *sets, int n) | |
687 | + | |
688 | + Retrieve information about event sets. In particular it is possible | |
689 | + to get the number of activation of a set. It is possible to retrieve | |
690 | + information about multiple sets in one call. | |
691 | + | |
692 | + | |
693 | + * int pfm_restart(int fd) | |
694 | + | |
695 | + Indicate to the kernel that the application is done processing an overflow | |
696 | + notification. A consequence of this call could be that monitoring resumes. | |
697 | + | |
698 | + * int read(fd, pfm_msg_t *msg, sizeof(pfm_msg_t)) | |
699 | + | |
700 | + the regular read() system call can be used with the context file descriptor to | |
701 | + receive overflow notification messages. Non-blocking read() is supported. | |
702 | + | |
703 | + Each message carry information about the overflow such as which counter overflowed | |
704 | + and where the program was (interrupted instruction pointer). | |
705 | + | |
706 | + * int close(int fd) | |
707 | + | |
708 | + To destroy a context, the regular close() system call is used. | |
709 | + | |
710 | + | |
711 | +VII/ /sys interface overview | |
712 | + | |
713 | + Refer to Documentation/ABI/testing/sysfs-perfmon-* for a detailed description | |
714 | + of the sysfs interface of perfmon2. | |
715 | + | |
716 | +VIII/ debugfs interface overview | |
717 | + | |
718 | + Refer to Documentation/perfmon2-debugfs.txt for a detailed description of the | |
719 | + debug and statistics interface of perfmon2. | |
720 | + | |
721 | +IX/ Documentation | |
722 | + | |
723 | + Visit http://perfmon2.sf.net | |
724 | --- a/MAINTAINERS | |
725 | +++ b/MAINTAINERS | |
726 | @@ -3244,6 +3244,14 @@ M: balbir@linux.vnet.ibm.com | |
727 | L: linux-kernel@vger.kernel.org | |
728 | S: Maintained | |
729 | ||
730 | +PERFMON SUBSYSTEM | |
731 | +P: Stephane Eranian | |
732 | +M: eranian@gmail.com | |
733 | +L: perfmon2-devel@lists.sf.net | |
734 | +W: http://perfmon2.sf.net | |
735 | +T: git kernel.org:/pub/scm/linux/kernel/git/eranian/linux-2.6 | |
736 | +S: Maintained | |
737 | + | |
738 | PERSONALITY HANDLING | |
739 | P: Christoph Hellwig | |
740 | M: hch@infradead.org | |
741 | --- a/Makefile | |
742 | +++ b/Makefile | |
743 | @@ -651,6 +651,7 @@ export mod_strip_cmd | |
744 | ifeq ($(KBUILD_EXTMOD),) | |
745 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ | |
746 | core-$(CONFIG_KDB) += kdb/ | |
747 | +core-$(CONFIG_PERFMON) += perfmon/ | |
748 | ||
749 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | |
750 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | |
751 | --- a/arch/ia64/Kconfig | |
752 | +++ b/arch/ia64/Kconfig | |
753 | @@ -479,14 +479,6 @@ config IA64_CPE_MIGRATE | |
754 | build this functionality as a kernel loadable module. Installing | |
755 | the module will turn on the functionality. | |
756 | ||
757 | -config PERFMON | |
758 | - bool "Performance monitor support" | |
759 | - help | |
760 | - Selects whether support for the IA-64 performance monitor hardware | |
761 | - is included in the kernel. This makes some kernel data-structures a | |
762 | - little bigger and slows down execution a bit, but it is generally | |
763 | - a good idea to turn this on. If you're unsure, say Y. | |
764 | - | |
765 | config IA64_PALINFO | |
766 | tristate "/proc/pal support" | |
767 | help | |
768 | @@ -558,6 +550,8 @@ source "drivers/firmware/Kconfig" | |
769 | ||
770 | source "fs/Kconfig.binfmt" | |
771 | ||
772 | +source "arch/ia64/perfmon/Kconfig" | |
773 | + | |
774 | endmenu | |
775 | ||
776 | menu "Power management and ACPI" | |
777 | --- a/arch/ia64/Makefile | |
778 | +++ b/arch/ia64/Makefile | |
779 | @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia6 | |
780 | core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ | |
781 | core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ | |
782 | core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ | |
783 | +core-$(CONFIG_PERFMON) += arch/ia64/perfmon/ | |
784 | core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ | |
785 | core-$(CONFIG_KVM) += arch/ia64/kvm/ | |
786 | ||
787 | --- a/arch/ia64/configs/generic_defconfig | |
788 | +++ b/arch/ia64/configs/generic_defconfig | |
789 | @@ -209,7 +209,6 @@ CONFIG_IA32_SUPPORT=y | |
790 | CONFIG_COMPAT=y | |
791 | CONFIG_COMPAT_FOR_U64_ALIGNMENT=y | |
792 | CONFIG_IA64_MCA_RECOVERY=y | |
793 | -CONFIG_PERFMON=y | |
794 | CONFIG_IA64_PALINFO=y | |
795 | # CONFIG_IA64_MC_ERR_INJECT is not set | |
796 | CONFIG_SGI_SN=y | |
797 | @@ -234,6 +233,16 @@ CONFIG_BINFMT_ELF=y | |
798 | CONFIG_BINFMT_MISC=m | |
799 | ||
800 | # | |
801 | +# Hardware Performance Monitoring support | |
802 | +# | |
803 | +CONFIG_PERFMON=y | |
804 | +CONFIG_IA64_PERFMON_COMPAT=y | |
805 | +CONFIG_IA64_PERFMON_GENERIC=m | |
806 | +CONFIG_IA64_PERFMON_ITANIUM=y | |
807 | +CONFIG_IA64_PERFMON_MCKINLEY=y | |
808 | +CONFIG_IA64_PERFMON_MONTECITO=y | |
809 | + | |
810 | +# | |
811 | # Power management and ACPI | |
812 | # | |
813 | CONFIG_PM=y | |
814 | --- a/arch/ia64/include/asm/Kbuild | |
815 | +++ b/arch/ia64/include/asm/Kbuild | |
816 | @@ -5,10 +5,12 @@ header-y += fpu.h | |
817 | header-y += fpswa.h | |
818 | header-y += ia64regs.h | |
819 | header-y += intel_intrin.h | |
820 | -header-y += perfmon_default_smpl.h | |
821 | header-y += ptrace_offsets.h | |
822 | header-y += rse.h | |
823 | header-y += ucontext.h | |
824 | +header-y += perfmon.h | |
825 | +header-y += perfmon_compat.h | |
826 | +header-y += perfmon_default_smpl.h | |
827 | ||
828 | unifdef-y += gcc_intrin.h | |
829 | unifdef-y += intrinsics.h | |
830 | --- a/arch/ia64/include/asm/hw_irq.h | |
831 | +++ b/arch/ia64/include/asm/hw_irq.h | |
832 | @@ -67,9 +67,9 @@ extern int ia64_last_device_vector; | |
833 | #define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1) | |
834 | ||
835 | #define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */ | |
836 | -#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */ | |
837 | #define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ | |
838 | #define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ | |
839 | +#define IA64_PERFMON_VECTOR 0xf1 /* performance monitor interrupt vector */ | |
840 | #define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */ | |
841 | #define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ | |
842 | #define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ | |
843 | --- a/arch/ia64/include/asm/perfmon.h | |
844 | +++ b/arch/ia64/include/asm/perfmon.h | |
845 | @@ -1,279 +1,59 @@ | |
846 | /* | |
847 | - * Copyright (C) 2001-2003 Hewlett-Packard Co | |
848 | - * Stephane Eranian <eranian@hpl.hp.com> | |
849 | - */ | |
850 | - | |
851 | -#ifndef _ASM_IA64_PERFMON_H | |
852 | -#define _ASM_IA64_PERFMON_H | |
853 | - | |
854 | -/* | |
855 | - * perfmon comamnds supported on all CPU models | |
856 | - */ | |
857 | -#define PFM_WRITE_PMCS 0x01 | |
858 | -#define PFM_WRITE_PMDS 0x02 | |
859 | -#define PFM_READ_PMDS 0x03 | |
860 | -#define PFM_STOP 0x04 | |
861 | -#define PFM_START 0x05 | |
862 | -#define PFM_ENABLE 0x06 /* obsolete */ | |
863 | -#define PFM_DISABLE 0x07 /* obsolete */ | |
864 | -#define PFM_CREATE_CONTEXT 0x08 | |
865 | -#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ | |
866 | -#define PFM_RESTART 0x0a | |
867 | -#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ | |
868 | -#define PFM_GET_FEATURES 0x0c | |
869 | -#define PFM_DEBUG 0x0d | |
870 | -#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ | |
871 | -#define PFM_GET_PMC_RESET_VAL 0x0f | |
872 | -#define PFM_LOAD_CONTEXT 0x10 | |
873 | -#define PFM_UNLOAD_CONTEXT 0x11 | |
874 | - | |
875 | -/* | |
876 | - * PMU model specific commands (may not be supported on all PMU models) | |
877 | - */ | |
878 | -#define PFM_WRITE_IBRS 0x20 | |
879 | -#define PFM_WRITE_DBRS 0x21 | |
880 | - | |
881 | -/* | |
882 | - * context flags | |
883 | - */ | |
884 | -#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */ | |
885 | -#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ | |
886 | -#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */ | |
887 | - | |
888 | -/* | |
889 | - * event set flags | |
890 | - */ | |
891 | -#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */ | |
892 | - | |
893 | -/* | |
894 | - * PMC flags | |
895 | - */ | |
896 | -#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ | |
897 | -#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */ | |
898 | - | |
899 | -/* | |
900 | - * PMD/PMC/IBR/DBR return flags (ignored on input) | |
901 | + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. | |
902 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
903 | * | |
904 | - * Those flags are used on output and must be checked in case EAGAIN is returned | |
905 | - * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure. | |
906 | - */ | |
907 | -#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */ | |
908 | -#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */ | |
909 | -#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) | |
910 | - | |
911 | -#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) | |
912 | - | |
913 | -typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ | |
914 | - | |
915 | -/* | |
916 | - * Request structure used to define a context | |
917 | - */ | |
918 | -typedef struct { | |
919 | - pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ | |
920 | - unsigned long ctx_flags; /* noblock/block */ | |
921 | - unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */ | |
922 | - unsigned short ctx_reserved1; /* for future use */ | |
923 | - int ctx_fd; /* return arg: unique identification for context */ | |
924 | - void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ | |
925 | - unsigned long ctx_reserved2[11];/* for future use */ | |
926 | -} pfarg_context_t; | |
927 | - | |
928 | -/* | |
929 | - * Request structure used to write/read a PMC or PMD | |
930 | - */ | |
931 | -typedef struct { | |
932 | - unsigned int reg_num; /* which register */ | |
933 | - unsigned short reg_set; /* event set for this register */ | |
934 | - unsigned short reg_reserved1; /* for future use */ | |
935 | - | |
936 | - unsigned long reg_value; /* initial pmc/pmd value */ | |
937 | - unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ | |
938 | - | |
939 | - unsigned long reg_long_reset; /* reset after buffer overflow notification */ | |
940 | - unsigned long reg_short_reset; /* reset after counter overflow */ | |
941 | - | |
942 | - unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ | |
943 | - unsigned long reg_random_seed; /* seed value when randomization is used */ | |
944 | - unsigned long reg_random_mask; /* bitmask used to limit random value */ | |
945 | - unsigned long reg_last_reset_val;/* return: PMD last reset value */ | |
946 | - | |
947 | - unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ | |
948 | - unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ | |
949 | - | |
950 | - unsigned long reg_reserved2[3]; /* for future use */ | |
951 | -} pfarg_reg_t; | |
952 | - | |
953 | -typedef struct { | |
954 | - unsigned int dbreg_num; /* which debug register */ | |
955 | - unsigned short dbreg_set; /* event set for this register */ | |
956 | - unsigned short dbreg_reserved1; /* for future use */ | |
957 | - unsigned long dbreg_value; /* value for debug register */ | |
958 | - unsigned long dbreg_flags; /* return: dbreg error */ | |
959 | - unsigned long dbreg_reserved2[1]; /* for future use */ | |
960 | -} pfarg_dbreg_t; | |
961 | - | |
962 | -typedef struct { | |
963 | - unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ | |
964 | - unsigned int ft_reserved; /* reserved for future use */ | |
965 | - unsigned long reserved[4]; /* for future use */ | |
966 | -} pfarg_features_t; | |
967 | - | |
968 | -typedef struct { | |
969 | - pid_t load_pid; /* process to load the context into */ | |
970 | - unsigned short load_set; /* first event set to load */ | |
971 | - unsigned short load_reserved1; /* for future use */ | |
972 | - unsigned long load_reserved2[3]; /* for future use */ | |
973 | -} pfarg_load_t; | |
974 | - | |
975 | -typedef struct { | |
976 | - int msg_type; /* generic message header */ | |
977 | - int msg_ctx_fd; /* generic message header */ | |
978 | - unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ | |
979 | - unsigned short msg_active_set; /* active set at the time of overflow */ | |
980 | - unsigned short msg_reserved1; /* for future use */ | |
981 | - unsigned int msg_reserved2; /* for future use */ | |
982 | - unsigned long msg_tstamp; /* for perf tuning/debug */ | |
983 | -} pfm_ovfl_msg_t; | |
984 | - | |
985 | -typedef struct { | |
986 | - int msg_type; /* generic message header */ | |
987 | - int msg_ctx_fd; /* generic message header */ | |
988 | - unsigned long msg_tstamp; /* for perf tuning */ | |
989 | -} pfm_end_msg_t; | |
990 | - | |
991 | -typedef struct { | |
992 | - int msg_type; /* type of the message */ | |
993 | - int msg_ctx_fd; /* unique identifier for the context */ | |
994 | - unsigned long msg_tstamp; /* for perf tuning */ | |
995 | -} pfm_gen_msg_t; | |
996 | - | |
997 | -#define PFM_MSG_OVFL 1 /* an overflow happened */ | |
998 | -#define PFM_MSG_END 2 /* task to which context was attached ended */ | |
999 | - | |
1000 | -typedef union { | |
1001 | - pfm_ovfl_msg_t pfm_ovfl_msg; | |
1002 | - pfm_end_msg_t pfm_end_msg; | |
1003 | - pfm_gen_msg_t pfm_gen_msg; | |
1004 | -} pfm_msg_t; | |
1005 | - | |
1006 | -/* | |
1007 | - * Define the version numbers for both perfmon as a whole and the sampling buffer format. | |
1008 | + * This file contains Itanium Processor Family specific definitions | |
1009 | + * for the perfmon interface. | |
1010 | + * | |
1011 | + * This program is free software; you can redistribute it and/or | |
1012 | + * modify it under the terms of version 2 of the GNU General Public | |
1013 | + * License as published by the Free Software Foundation. | |
1014 | + * | |
1015 | + * This program is distributed in the hope that it will be useful, | |
1016 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1017 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1018 | + * General Public License for more details. | |
1019 | + * | |
1020 | + * You should have received a copy of the GNU General Public License | |
1021 | + * along with this program; if not, write to the Free Software | |
1022 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1023 | + * 02111-1307 USA | |
1024 | */ | |
1025 | -#define PFM_VERSION_MAJ 2U | |
1026 | -#define PFM_VERSION_MIN 0U | |
1027 | -#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff)) | |
1028 | -#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) | |
1029 | -#define PFM_VERSION_MINOR(x) ((x) & 0xffff) | |
1030 | - | |
1031 | +#ifndef _ASM_IA64_PERFMON_H_ | |
1032 | +#define _ASM_IA64_PERFMON_H_ | |
1033 | ||
1034 | /* | |
1035 | - * miscellaneous architected definitions | |
1036 | + * arch-specific user visible interface definitions | |
1037 | */ | |
1038 | -#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */ | |
1039 | -#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */ | |
1040 | -#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */ | |
1041 | - | |
1042 | -#ifdef __KERNEL__ | |
1043 | - | |
1044 | -extern long perfmonctl(int fd, int cmd, void *arg, int narg); | |
1045 | - | |
1046 | -typedef struct { | |
1047 | - void (*handler)(int irq, void *arg, struct pt_regs *regs); | |
1048 | -} pfm_intr_handler_desc_t; | |
1049 | - | |
1050 | -extern void pfm_save_regs (struct task_struct *); | |
1051 | -extern void pfm_load_regs (struct task_struct *); | |
1052 | ||
1053 | -extern void pfm_exit_thread(struct task_struct *); | |
1054 | -extern int pfm_use_debug_registers(struct task_struct *); | |
1055 | -extern int pfm_release_debug_registers(struct task_struct *); | |
1056 | -extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin); | |
1057 | -extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs); | |
1058 | -extern void pfm_init_percpu(void); | |
1059 | -extern void pfm_handle_work(void); | |
1060 | -extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); | |
1061 | -extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); | |
1062 | +#define PFM_ARCH_MAX_PMCS (256+64) | |
1063 | +#define PFM_ARCH_MAX_PMDS (256+64) | |
1064 | ||
1065 | - | |
1066 | - | |
1067 | -/* | |
1068 | - * Reset PMD register flags | |
1069 | - */ | |
1070 | -#define PFM_PMD_SHORT_RESET 0 | |
1071 | -#define PFM_PMD_LONG_RESET 1 | |
1072 | - | |
1073 | -typedef union { | |
1074 | - unsigned int val; | |
1075 | - struct { | |
1076 | - unsigned int notify_user:1; /* notify user program of overflow */ | |
1077 | - unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */ | |
1078 | - unsigned int block_task:1; /* block monitored task on kernel exit */ | |
1079 | - unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */ | |
1080 | - unsigned int reserved:28; /* for future use */ | |
1081 | - } bits; | |
1082 | -} pfm_ovfl_ctrl_t; | |
1083 | - | |
1084 | -typedef struct { | |
1085 | - unsigned char ovfl_pmd; /* index of overflowed PMD */ | |
1086 | - unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */ | |
1087 | - unsigned short active_set; /* event set active at the time of the overflow */ | |
1088 | - pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */ | |
1089 | - | |
1090 | - unsigned long pmd_last_reset; /* last reset value of of the PMD */ | |
1091 | - unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */ | |
1092 | - unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */ | |
1093 | - unsigned long pmd_value; /* current 64-bit value of the PMD */ | |
1094 | - unsigned long pmd_eventid; /* eventid associated with PMD */ | |
1095 | -} pfm_ovfl_arg_t; | |
1096 | - | |
1097 | - | |
1098 | -typedef struct { | |
1099 | - char *fmt_name; | |
1100 | - pfm_uuid_t fmt_uuid; | |
1101 | - size_t fmt_arg_size; | |
1102 | - unsigned long fmt_flags; | |
1103 | - | |
1104 | - int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg); | |
1105 | - int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size); | |
1106 | - int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg); | |
1107 | - int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp); | |
1108 | - int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); | |
1109 | - int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); | |
1110 | - int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs); | |
1111 | - | |
1112 | - struct list_head fmt_list; | |
1113 | -} pfm_buffer_fmt_t; | |
1114 | - | |
1115 | -extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt); | |
1116 | -extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid); | |
1117 | +#define PFM_ARCH_PMD_STK_ARG 8 | |
1118 | +#define PFM_ARCH_PMC_STK_ARG 8 | |
1119 | ||
1120 | /* | |
1121 | - * perfmon interface exported to modules | |
1122 | + * Itanium specific context flags | |
1123 | + * | |
1124 | + * bits[00-15]: generic flags (see asm/perfmon.h) | |
1125 | + * bits[16-31]: arch-specific flags | |
1126 | */ | |
1127 | -extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); | |
1128 | -extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); | |
1129 | -extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); | |
1130 | -extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); | |
1131 | +#define PFM_ITA_FL_INSECURE 0x10000 /* clear psr.sp on non system, non self */ | |
1132 | ||
1133 | /* | |
1134 | - * describe the content of the local_cpu_date->pfm_syst_info field | |
1135 | + * Itanium specific public event set flags (set_flags) | |
1136 | + * | |
1137 | + * event set flags layout: | |
1138 | + * bits[00-15] : generic flags | |
1139 | + * bits[16-31] : arch-specific flags | |
1140 | */ | |
1141 | -#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */ | |
1142 | -#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */ | |
1143 | -#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */ | |
1144 | +#define PFM_ITA_SETFL_EXCL_INTR 0x10000 /* exclude interrupt execution */ | |
1145 | +#define PFM_ITA_SETFL_INTR_ONLY 0x20000 /* include only interrupt execution */ | |
1146 | +#define PFM_ITA_SETFL_IDLE_EXCL 0x40000 /* stop monitoring in idle loop */ | |
1147 | ||
1148 | /* | |
1149 | - * sysctl control structure. visible to sampling formats | |
1150 | + * compatibility for version v2.0 of the interface | |
1151 | */ | |
1152 | -typedef struct { | |
1153 | - int debug; /* turn on/off debugging via syslog */ | |
1154 | - int debug_ovfl; /* turn on/off debug printk in overflow handler */ | |
1155 | - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ | |
1156 | - int expert_mode; /* turn on/off value checking */ | |
1157 | -} pfm_sysctl_t; | |
1158 | -extern pfm_sysctl_t pfm_sysctl; | |
1159 | - | |
1160 | - | |
1161 | -#endif /* __KERNEL__ */ | |
1162 | +#include <asm/perfmon_compat.h> | |
1163 | ||
1164 | -#endif /* _ASM_IA64_PERFMON_H */ | |
1165 | +#endif /* _ASM_IA64_PERFMON_H_ */ | |
1166 | --- /dev/null | |
1167 | +++ b/arch/ia64/include/asm/perfmon_compat.h | |
1168 | @@ -0,0 +1,167 @@ | |
1169 | +/* | |
1170 | + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. | |
1171 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
1172 | + * | |
1173 | + * This header file contains perfmon interface definition | |
1174 | + * that are now obsolete and should be dropped in favor | |
1175 | + * of their equivalent functions as explained below. | |
1176 | + * | |
1177 | + * This program is free software; you can redistribute it and/or | |
1178 | + * modify it under the terms of version 2 of the GNU General Public | |
1179 | + * License as published by the Free Software Foundation. | |
1180 | + * | |
1181 | + * This program is distributed in the hope that it will be useful, | |
1182 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1183 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1184 | + * General Public License for more details. | |
1185 | + * | |
1186 | + * You should have received a copy of the GNU General Public License | |
1187 | + * along with this program; if not, write to the Free Software | |
1188 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1189 | + * 02111-1307 USA | |
1190 | + */ | |
1191 | + | |
1192 | +#ifndef _ASM_IA64_PERFMON_COMPAT_H_ | |
1193 | +#define _ASM_IA64_PERFMON_COMPAT_H_ | |
1194 | + | |
1195 | +/* | |
1196 | + * custom sampling buffer identifier type | |
1197 | + */ | |
1198 | +typedef __u8 pfm_uuid_t[16]; | |
1199 | + | |
1200 | +/* | |
1201 | + * obsolete perfmon commands. Supported only on IA-64 for | |
1202 | + * backward compatiblity reasons with perfmon v2.0. | |
1203 | + */ | |
1204 | +#define PFM_WRITE_PMCS 0x01 /* use pfm_write_pmcs */ | |
1205 | +#define PFM_WRITE_PMDS 0x02 /* use pfm_write_pmds */ | |
1206 | +#define PFM_READ_PMDS 0x03 /* use pfm_read_pmds */ | |
1207 | +#define PFM_STOP 0x04 /* use pfm_stop */ | |
1208 | +#define PFM_START 0x05 /* use pfm_start */ | |
1209 | +#define PFM_ENABLE 0x06 /* obsolete */ | |
1210 | +#define PFM_DISABLE 0x07 /* obsolete */ | |
1211 | +#define PFM_CREATE_CONTEXT 0x08 /* use pfm_create_context */ | |
1212 | +#define PFM_DESTROY_CONTEXT 0x09 /* use close() */ | |
1213 | +#define PFM_RESTART 0x0a /* use pfm_restart */ | |
1214 | +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ | |
1215 | +#define PFM_GET_FEATURES 0x0c /* use /proc/sys/perfmon */ | |
1216 | +#define PFM_DEBUG 0x0d /* /proc/sys/kernel/perfmon/debug */ | |
1217 | +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ | |
1218 | +#define PFM_GET_PMC_RESET_VAL 0x0f /* use /proc/perfmon_map */ | |
1219 | +#define PFM_LOAD_CONTEXT 0x10 /* use pfm_load_context */ | |
1220 | +#define PFM_UNLOAD_CONTEXT 0x11 /* use pfm_unload_context */ | |
1221 | + | |
1222 | +/* | |
1223 | + * PMU model specific commands (may not be supported on all PMU models) | |
1224 | + */ | |
1225 | +#define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263]*/ | |
1226 | +#define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271]*/ | |
1227 | + | |
1228 | +/* | |
1229 | + * argument to PFM_CREATE_CONTEXT | |
1230 | + */ | |
1231 | +struct pfarg_context { | |
1232 | + pfm_uuid_t ctx_smpl_buf_id; /* buffer format to use */ | |
1233 | + unsigned long ctx_flags; /* noblock/block */ | |
1234 | + unsigned int ctx_reserved1; /* for future use */ | |
1235 | + int ctx_fd; /* return: fildesc */ | |
1236 | + void *ctx_smpl_vaddr; /* return: vaddr of buffer */ | |
1237 | + unsigned long ctx_reserved3[11];/* for future use */ | |
1238 | +}; | |
1239 | + | |
1240 | +/* | |
1241 | + * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS | |
1242 | + */ | |
1243 | +struct pfarg_reg { | |
1244 | + unsigned int reg_num; /* which register */ | |
1245 | + unsigned short reg_set; /* event set for this register */ | |
1246 | + unsigned short reg_reserved1; /* for future use */ | |
1247 | + | |
1248 | + unsigned long reg_value; /* initial pmc/pmd value */ | |
1249 | + unsigned long reg_flags; /* input: flags, ret: error */ | |
1250 | + | |
1251 | + unsigned long reg_long_reset; /* reset value after notification */ | |
1252 | + unsigned long reg_short_reset; /* reset after counter overflow */ | |
1253 | + | |
1254 | + unsigned long reg_reset_pmds[4]; /* registers to reset on overflow */ | |
1255 | + unsigned long reg_random_seed; /* seed for randomization */ | |
1256 | + unsigned long reg_random_mask; /* random range limit */ | |
1257 | + unsigned long reg_last_reset_val;/* return: PMD last reset value */ | |
1258 | + | |
1259 | + unsigned long reg_smpl_pmds[4]; /* pmds to be saved on overflow */ | |
1260 | + unsigned long reg_smpl_eventid; /* opaque sampling event id */ | |
1261 | + unsigned long reg_ovfl_switch_cnt;/* #overflows to switch */ | |
1262 | + | |
1263 | + unsigned long reg_reserved2[2]; /* for future use */ | |
1264 | +}; | |
1265 | + | |
1266 | +/* | |
1267 | + * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS | |
1268 | + */ | |
1269 | +struct pfarg_dbreg { | |
1270 | + unsigned int dbreg_num; /* which debug register */ | |
1271 | + unsigned short dbreg_set; /* event set */ | |
1272 | + unsigned short dbreg_reserved1; /* for future use */ | |
1273 | + unsigned long dbreg_value; /* value for debug register */ | |
1274 | + unsigned long dbreg_flags; /* return: dbreg error */ | |
1275 | + unsigned long dbreg_reserved2[1]; /* for future use */ | |
1276 | +}; | |
1277 | + | |
1278 | +/* | |
1279 | + * argument to PFM_GET_FEATURES | |
1280 | + */ | |
1281 | +struct pfarg_features { | |
1282 | + unsigned int ft_version; /* major [16-31], minor [0-15] */ | |
1283 | + unsigned int ft_reserved; /* reserved for future use */ | |
1284 | + unsigned long reserved[4]; /* for future use */ | |
1285 | +}; | |
1286 | + | |
1287 | +typedef struct { | |
1288 | + int msg_type; /* generic message header */ | |
1289 | + int msg_ctx_fd; /* generic message header */ | |
1290 | + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ | |
1291 | + unsigned short msg_active_set; /* active set on overflow */ | |
1292 | + unsigned short msg_reserved1; /* for future use */ | |
1293 | + unsigned int msg_reserved2; /* for future use */ | |
1294 | + unsigned long msg_tstamp; /* for perf tuning/debug */ | |
1295 | +} pfm_ovfl_msg_t; | |
1296 | + | |
1297 | +typedef struct { | |
1298 | + int msg_type; /* generic message header */ | |
1299 | + int msg_ctx_fd; /* generic message header */ | |
1300 | + unsigned long msg_tstamp; /* for perf tuning */ | |
1301 | +} pfm_end_msg_t; | |
1302 | + | |
1303 | +typedef struct { | |
1304 | + int msg_type; /* type of the message */ | |
1305 | + int msg_ctx_fd; /* context file descriptor */ | |
1306 | + unsigned long msg_tstamp; /* for perf tuning */ | |
1307 | +} pfm_gen_msg_t; | |
1308 | + | |
1309 | +typedef union { | |
1310 | + int type; | |
1311 | + pfm_ovfl_msg_t pfm_ovfl_msg; | |
1312 | + pfm_end_msg_t pfm_end_msg; | |
1313 | + pfm_gen_msg_t pfm_gen_msg; | |
1314 | +} pfm_msg_t; | |
1315 | + | |
1316 | +/* | |
1317 | + * PMD/PMC return flags in case of error (ignored on input) | |
1318 | + * | |
1319 | + * reg_flags layout: | |
1320 | + * bit 00-15 : generic flags | |
1321 | + * bits[16-23] : arch-specific flags (see asm/perfmon.h) | |
1322 | + * bit 24-31 : error codes | |
1323 | + * | |
1324 | + * Those flags are used on output and must be checked in case EINVAL is | |
1325 | + * returned by a command accepting a vector of values and each has a flag | |
1326 | + * field, such as pfarg_reg or pfarg_reg | |
1327 | + */ | |
1328 | +#define PFM_REG_RETFL_NOTAVAIL (1<<31) /* not implemented or unaccessible */ | |
1329 | +#define PFM_REG_RETFL_EINVAL (1<<30) /* entry is invalid */ | |
1330 | +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|\ | |
1331 | + PFM_REG_RETFL_EINVAL) | |
1332 | + | |
1333 | +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) | |
1334 | + | |
1335 | +#endif /* _ASM_IA64_PERFMON_COMPAT_H_ */ | |
1336 | --- a/arch/ia64/include/asm/perfmon_default_smpl.h | |
1337 | +++ b/arch/ia64/include/asm/perfmon_default_smpl.h | |
1338 | @@ -1,83 +1,106 @@ | |
1339 | /* | |
1340 | - * Copyright (C) 2002-2003 Hewlett-Packard Co | |
1341 | - * Stephane Eranian <eranian@hpl.hp.com> | |
1342 | + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. | |
1343 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
1344 | * | |
1345 | - * This file implements the default sampling buffer format | |
1346 | - * for Linux/ia64 perfmon subsystem. | |
1347 | + * This file implements the old default sampling buffer format | |
1348 | + * for the perfmon2 subsystem. For IA-64 only. | |
1349 | + * | |
1350 | + * It requires the use of the perfmon_compat.h header. It is recommended | |
1351 | + * that applications be ported to the new format instead. | |
1352 | + * | |
1353 | + * This program is free software; you can redistribute it and/or | |
1354 | + * modify it under the terms of version 2 of the GNU General Public | |
1355 | + * License as published by the Free Software Foundation. | |
1356 | + * | |
1357 | + * This program is distributed in the hope that it will be useful, | |
1358 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1359 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1360 | + * General Public License for more details. | |
1361 | + * | |
1362 | + * You should have received a copy of the GNU General Public License | |
1363 | + * along with this program; if not, write to the Free Software | |
1364 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1365 | + * 02111-1307 USA | |
1366 | */ | |
1367 | -#ifndef __PERFMON_DEFAULT_SMPL_H__ | |
1368 | -#define __PERFMON_DEFAULT_SMPL_H__ 1 | |
1369 | +#ifndef __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ | |
1370 | +#define __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ 1 | |
1371 | + | |
1372 | +#ifndef __ia64__ | |
1373 | +#error "this file must be used for compatibility reasons only on IA-64" | |
1374 | +#endif | |
1375 | ||
1376 | #define PFM_DEFAULT_SMPL_UUID { \ | |
1377 | - 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} | |
1378 | + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\ | |
1379 | + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} | |
1380 | ||
1381 | /* | |
1382 | * format specific parameters (passed at context creation) | |
1383 | */ | |
1384 | -typedef struct { | |
1385 | +struct pfm_default_smpl_arg { | |
1386 | unsigned long buf_size; /* size of the buffer in bytes */ | |
1387 | unsigned int flags; /* buffer specific flags */ | |
1388 | unsigned int res1; /* for future use */ | |
1389 | unsigned long reserved[2]; /* for future use */ | |
1390 | -} pfm_default_smpl_arg_t; | |
1391 | +}; | |
1392 | ||
1393 | /* | |
1394 | * combined context+format specific structure. Can be passed | |
1395 | - * to PFM_CONTEXT_CREATE | |
1396 | + * to PFM_CONTEXT_CREATE (not PFM_CONTEXT_CREATE2) | |
1397 | */ | |
1398 | -typedef struct { | |
1399 | - pfarg_context_t ctx_arg; | |
1400 | - pfm_default_smpl_arg_t buf_arg; | |
1401 | -} pfm_default_smpl_ctx_arg_t; | |
1402 | +struct pfm_default_smpl_ctx_arg { | |
1403 | + struct pfarg_context ctx_arg; | |
1404 | + struct pfm_default_smpl_arg buf_arg; | |
1405 | +}; | |
1406 | ||
1407 | /* | |
1408 | * This header is at the beginning of the sampling buffer returned to the user. | |
1409 | * It is directly followed by the first record. | |
1410 | */ | |
1411 | -typedef struct { | |
1412 | - unsigned long hdr_count; /* how many valid entries */ | |
1413 | - unsigned long hdr_cur_offs; /* current offset from top of buffer */ | |
1414 | - unsigned long hdr_reserved2; /* reserved for future use */ | |
1415 | - | |
1416 | - unsigned long hdr_overflows; /* how many times the buffer overflowed */ | |
1417 | - unsigned long hdr_buf_size; /* how many bytes in the buffer */ | |
1418 | - | |
1419 | - unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */ | |
1420 | - unsigned int hdr_reserved1; /* for future use */ | |
1421 | - unsigned long hdr_reserved[10]; /* for future use */ | |
1422 | -} pfm_default_smpl_hdr_t; | |
1423 | +struct pfm_default_smpl_hdr { | |
1424 | + u64 hdr_count; /* how many valid entries */ | |
1425 | + u64 hdr_cur_offs; /* current offset from top of buffer */ | |
1426 | + u64 dr_reserved2; /* reserved for future use */ | |
1427 | + | |
1428 | + u64 hdr_overflows; /* how many times the buffer overflowed */ | |
1429 | + u64 hdr_buf_size; /* how many bytes in the buffer */ | |
1430 | + | |
1431 | + u32 hdr_version; /* smpl format version*/ | |
1432 | + u32 hdr_reserved1; /* for future use */ | |
1433 | + u64 hdr_reserved[10]; /* for future use */ | |
1434 | +}; | |
1435 | ||
1436 | /* | |
1437 | * Entry header in the sampling buffer. The header is directly followed | |
1438 | - * with the values of the PMD registers of interest saved in increasing | |
1439 | - * index order: PMD4, PMD5, and so on. How many PMDs are present depends | |
1440 | + * with the values of the PMD registers of interest saved in increasing | |
1441 | + * index order: PMD4, PMD5, and so on. How many PMDs are present depends | |
1442 | * on how the session was programmed. | |
1443 | * | |
1444 | * In the case where multiple counters overflow at the same time, multiple | |
1445 | * entries are written consecutively. | |
1446 | * | |
1447 | - * last_reset_value member indicates the initial value of the overflowed PMD. | |
1448 | + * last_reset_value member indicates the initial value of the overflowed PMD. | |
1449 | */ | |
1450 | -typedef struct { | |
1451 | - int pid; /* thread id (for NPTL, this is gettid()) */ | |
1452 | - unsigned char reserved1[3]; /* reserved for future use */ | |
1453 | - unsigned char ovfl_pmd; /* index of overflowed PMD */ | |
1454 | - | |
1455 | - unsigned long last_reset_val; /* initial value of overflowed PMD */ | |
1456 | - unsigned long ip; /* where did the overflow interrupt happened */ | |
1457 | - unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */ | |
1458 | - | |
1459 | - unsigned short cpu; /* cpu on which the overfow occured */ | |
1460 | - unsigned short set; /* event set active when overflow ocurred */ | |
1461 | - int tgid; /* thread group id (for NPTL, this is getpid()) */ | |
1462 | -} pfm_default_smpl_entry_t; | |
1463 | - | |
1464 | -#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */ | |
1465 | -#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS)) | |
1466 | -#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE) | |
1467 | +struct pfm_default_smpl_entry { | |
1468 | + pid_t pid; /* thread id (for NPTL, this is gettid()) */ | |
1469 | + uint8_t reserved1[3]; /* for future use */ | |
1470 | + uint8_t ovfl_pmd; /* overflow pmd for this sample */ | |
1471 | + u64 last_reset_val; /* initial value of overflowed PMD */ | |
1472 | + unsigned long ip; /* where did the overflow interrupt happened */ | |
1473 | + u64 tstamp; /* overflow timetamp */ | |
1474 | + u16 cpu; /* cpu on which the overfow occured */ | |
1475 | + u16 set; /* event set active when overflow ocurred */ | |
1476 | + pid_t tgid; /* thread group id (for NPTL, this is getpid()) */ | |
1477 | +}; | |
1478 | + | |
1479 | +#define PFM_DEFAULT_MAX_PMDS 64 /* #pmds supported */ | |
1480 | +#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(struct pfm_default_smpl_entry)+\ | |
1481 | + (sizeof(u64)*PFM_DEFAULT_MAX_PMDS)) | |
1482 | +#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(struct pfm_default_smpl_hdr)+\ | |
1483 | + PFM_DEFAULT_MAX_ENTRY_SIZE) | |
1484 | ||
1485 | #define PFM_DEFAULT_SMPL_VERSION_MAJ 2U | |
1486 | -#define PFM_DEFAULT_SMPL_VERSION_MIN 0U | |
1487 | -#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff)) | |
1488 | +#define PFM_DEFAULT_SMPL_VERSION_MIN 1U | |
1489 | +#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|\ | |
1490 | + (PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff)) | |
1491 | ||
1492 | -#endif /* __PERFMON_DEFAULT_SMPL_H__ */ | |
1493 | +#endif /* __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ */ | |
1494 | --- /dev/null | |
1495 | +++ b/arch/ia64/include/asm/perfmon_kern.h | |
1496 | @@ -0,0 +1,356 @@ | |
1497 | +/* | |
1498 | + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. | |
1499 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
1500 | + * | |
1501 | + * This file contains Itanium Processor Family specific definitions | |
1502 | + * for the perfmon interface. | |
1503 | + * | |
1504 | + * This program is free software; you can redistribute it and/or | |
1505 | + * modify it under the terms of version 2 of the GNU General Public | |
1506 | + * License as published by the Free Software Foundation. | |
1507 | + * | |
1508 | + * This program is distributed in the hope that it will be useful, | |
1509 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1510 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1511 | + * General Public License for more details. | |
1512 | + * | |
1513 | + * You should have received a copy of the GNU General Public License | |
1514 | + * along with this program; if not, write to the Free Software | |
1515 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1516 | + * 02111-1307 USA | |
1517 | + */ | |
1518 | +#ifndef _ASM_IA64_PERFMON_KERN_H_ | |
1519 | +#define _ASM_IA64_PERFMON_KERN_H_ | |
1520 | + | |
1521 | +#ifdef __KERNEL__ | |
1522 | + | |
1523 | +#ifdef CONFIG_PERFMON | |
1524 | +#include <asm/unistd.h> | |
1525 | +#include <asm/hw_irq.h> | |
1526 | + | |
1527 | +/* | |
1528 | + * describe the content of the pfm_syst_info field | |
1529 | + * layout: | |
1530 | + * bits[00-15] : generic flags | |
1531 | + * bits[16-31] : arch-specific flags | |
1532 | + */ | |
1533 | +#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */ | |
1534 | + | |
1535 | +/* | |
1536 | + * For some CPUs, the upper bits of a counter must be set in order for the | |
1537 | + * overflow interrupt to happen. On overflow, the counter has wrapped around, | |
1538 | + * and the upper bits are cleared. This function may be used to set them back. | |
1539 | + */ | |
1540 | +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, | |
1541 | + unsigned int cnum) | |
1542 | +{} | |
1543 | + | |
1544 | +/* | |
1545 | + * called from __pfm_interrupt_handler(). ctx is not NULL. | |
1546 | + * ctx is locked. PMU interrupt is masked. | |
1547 | + * | |
1548 | + * must stop all monitoring to ensure handler has consistent view. | |
1549 | + * must collect overflowed PMDs bitmask into povfls_pmds and | |
1550 | + * npend_ovfls. If no interrupt detected then npend_ovfls | |
1551 | + * must be set to zero. | |
1552 | + */ | |
1553 | +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, | |
1554 | + struct pfm_event_set *set) | |
1555 | +{ | |
1556 | + u64 tmp; | |
1557 | + | |
1558 | + /* | |
1559 | + * do not overwrite existing value, must | |
1560 | + * process those first (coming from context switch replay) | |
1561 | + */ | |
1562 | + if (set->npend_ovfls) | |
1563 | + return; | |
1564 | + | |
1565 | + ia64_srlz_d(); | |
1566 | + | |
1567 | + tmp = ia64_get_pmc(0) & ~0xf; | |
1568 | + | |
1569 | + set->povfl_pmds[0] = tmp; | |
1570 | + | |
1571 | + set->npend_ovfls = ia64_popcnt(tmp); | |
1572 | +} | |
1573 | + | |
1574 | +static inline int pfm_arch_init_pmu_config(void) | |
1575 | +{ | |
1576 | + return 0; | |
1577 | +} | |
1578 | + | |
1579 | +static inline void pfm_arch_resend_irq(struct pfm_context *ctx) | |
1580 | +{ | |
1581 | + ia64_resend_irq(IA64_PERFMON_VECTOR); | |
1582 | +} | |
1583 | + | |
1584 | +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, | |
1585 | + struct pfm_event_set *set) | |
1586 | +{} | |
1587 | + | |
1588 | +static inline void pfm_arch_serialize(void) | |
1589 | +{ | |
1590 | + ia64_srlz_d(); | |
1591 | +} | |
1592 | + | |
1593 | +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) | |
1594 | +{ | |
1595 | + PFM_DBG_ovfl("state=%d", ctx->state); | |
1596 | + ia64_set_pmc(0, 0); | |
1597 | + /* no serialization */ | |
1598 | +} | |
1599 | + | |
1600 | +static inline void pfm_arch_write_pmc(struct pfm_context *ctx, | |
1601 | + unsigned int cnum, u64 value) | |
1602 | +{ | |
1603 | + if (cnum < 256) { | |
1604 | + ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value); | |
1605 | + } else if (cnum < 264) { | |
1606 | + ia64_set_ibr(cnum-256, value); | |
1607 | + ia64_dv_serialize_instruction(); | |
1608 | + } else { | |
1609 | + ia64_set_dbr(cnum-264, value); | |
1610 | + ia64_dv_serialize_instruction(); | |
1611 | + } | |
1612 | +} | |
1613 | + | |
1614 | +/* | |
1615 | + * On IA-64, for per-thread context which have the ITA_FL_INSECURE | |
1616 | + * flag, it is possible to start/stop monitoring directly from user evel | |
1617 | + * without calling pfm_start()/pfm_stop. This allows very lightweight | |
1618 | + * control yet the kernel sometimes needs to know if monitoring is actually | |
1619 | + * on or off. | |
1620 | + * | |
1621 | + * Tracking of this information is normally done by pfm_start/pfm_stop | |
1622 | + * in flags.started. Here we need to compensate by checking actual | |
1623 | + * psr bit. | |
1624 | + */ | |
1625 | +static inline int pfm_arch_is_active(struct pfm_context *ctx) | |
1626 | +{ | |
1627 | + return ctx->flags.started | |
1628 | + || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP); | |
1629 | +} | |
1630 | + | |
1631 | +static inline void pfm_arch_write_pmd(struct pfm_context *ctx, | |
1632 | + unsigned int cnum, u64 value) | |
1633 | +{ | |
1634 | + /* | |
1635 | + * for a counting PMD, overflow bit must be cleared | |
1636 | + */ | |
1637 | + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64) | |
1638 | + value &= pfm_pmu_conf->ovfl_mask; | |
1639 | + | |
1640 | + /* | |
1641 | + * for counters, write to upper bits are ignored, no need to mask | |
1642 | + */ | |
1643 | + ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value); | |
1644 | +} | |
1645 | + | |
1646 | +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
1647 | +{ | |
1648 | + return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr); | |
1649 | +} | |
1650 | + | |
1651 | +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) | |
1652 | +{ | |
1653 | + return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr); | |
1654 | +} | |
1655 | + | |
1656 | +static inline void pfm_arch_ctxswout_sys(struct task_struct *task, | |
1657 | + struct pfm_context *ctx) | |
1658 | +{ | |
1659 | + struct pt_regs *regs; | |
1660 | + | |
1661 | + regs = task_pt_regs(task); | |
1662 | + ia64_psr(regs)->pp = 0; | |
1663 | +} | |
1664 | + | |
1665 | +static inline void pfm_arch_ctxswin_sys(struct task_struct *task, | |
1666 | + struct pfm_context *ctx) | |
1667 | +{ | |
1668 | + struct pt_regs *regs; | |
1669 | + | |
1670 | + if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) { | |
1671 | + regs = task_pt_regs(task); | |
1672 | + ia64_psr(regs)->pp = 1; | |
1673 | + } | |
1674 | +} | |
1675 | + | |
1676 | +/* | |
1677 | + * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu() | |
1678 | + * when entering the PMU interrupt handler, thus, we need | |
1679 | + * to save them in pfm_switch_sets_from_intr() | |
1680 | + */ | |
1681 | +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, | |
1682 | + struct pfm_event_set *set) | |
1683 | +{ | |
1684 | + pfm_save_pmds(ctx, set); | |
1685 | +} | |
1686 | + | |
1687 | +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags); | |
1688 | + | |
1689 | +static inline void pfm_arch_context_free(struct pfm_context *ctx) | |
1690 | +{} | |
1691 | + | |
1692 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); | |
1693 | +void pfm_arch_ctxswin_thread(struct task_struct *task, | |
1694 | + struct pfm_context *ctx); | |
1695 | + | |
1696 | +void pfm_arch_unload_context(struct pfm_context *ctx); | |
1697 | +int pfm_arch_load_context(struct pfm_context *ctx); | |
1698 | +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags); | |
1699 | + | |
1700 | +void pfm_arch_mask_monitoring(struct pfm_context *ctx, | |
1701 | + struct pfm_event_set *set); | |
1702 | +void pfm_arch_unmask_monitoring(struct pfm_context *ctx, | |
1703 | + struct pfm_event_set *set); | |
1704 | + | |
1705 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
1706 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
1707 | + | |
1708 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); | |
1709 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); | |
1710 | + | |
1711 | +int pfm_arch_init(void); | |
1712 | +void pfm_arch_init_percpu(void); | |
1713 | +char *pfm_arch_get_pmu_module_name(void); | |
1714 | + | |
1715 | +int __pfm_use_dbregs(struct task_struct *task); | |
1716 | +int __pfm_release_dbregs(struct task_struct *task); | |
1717 | +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx, | |
1718 | + struct pfm_event_set *set); | |
1719 | + | |
1720 | +void pfm_arch_show_session(struct seq_file *m); | |
1721 | + | |
1722 | +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) | |
1723 | +{ | |
1724 | + return 0; | |
1725 | +} | |
1726 | + | |
1727 | +static inline void pfm_arch_pmu_release(void) | |
1728 | +{} | |
1729 | + | |
1730 | +/* not necessary on IA-64 */ | |
1731 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
1732 | +{} | |
1733 | + | |
1734 | +/* | |
1735 | + * miscellaneous architected definitions | |
1736 | + */ | |
1737 | +#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */ | |
1738 | + | |
1739 | +/* | |
1740 | + * private event set flags (set_priv_flags) | |
1741 | + */ | |
1742 | +#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */ | |
1743 | + | |
1744 | + | |
1745 | +/* | |
1746 | + * Itanium-specific data structures | |
1747 | + */ | |
1748 | +struct pfm_ia64_context_flags { | |
1749 | + unsigned int use_dbr:1; /* use range restrictions (debug registers) */ | |
1750 | + unsigned int insecure:1; /* insecure monitoring for non-self session */ | |
1751 | + unsigned int reserved:30;/* for future use */ | |
1752 | +}; | |
1753 | + | |
1754 | +struct pfm_arch_context { | |
1755 | + struct pfm_ia64_context_flags flags; /* arch specific ctx flags */ | |
1756 | + u64 ctx_saved_psr_up;/* storage for psr_up */ | |
1757 | +#ifdef CONFIG_IA64_PERFMON_COMPAT | |
1758 | + void *ctx_smpl_vaddr; /* vaddr of user mapping */ | |
1759 | +#endif | |
1760 | +}; | |
1761 | + | |
1762 | +#ifdef CONFIG_IA64_PERFMON_COMPAT | |
1763 | +ssize_t pfm_arch_compat_read(struct pfm_context *ctx, | |
1764 | + char __user *buf, | |
1765 | + int non_block, | |
1766 | + size_t size); | |
1767 | +int pfm_ia64_compat_init(void); | |
1768 | +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, | |
1769 | + size_t rsize, struct file *filp); | |
1770 | +#else | |
1771 | +static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx, | |
1772 | + char __user *buf, | |
1773 | + int non_block, | |
1774 | + size_t size) | |
1775 | +{ | |
1776 | + return -EINVAL; | |
1777 | +} | |
1778 | + | |
1779 | +static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, | |
1780 | + size_t rsize, struct file *filp) | |
1781 | +{ | |
1782 | + return -EINVAL; | |
1783 | +} | |
1784 | +#endif | |
1785 | + | |
1786 | +static inline void pfm_arch_arm_handle_work(struct task_struct *task) | |
1787 | +{ | |
1788 | + /* | |
1789 | + * On IA-64, we ran out of bits in the bottom 7 bits of the | |
1790 | + * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking | |
1791 | + * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and | |
1792 | + * call pfm_handle_work() if needed | |
1793 | + */ | |
1794 | + set_tsk_thread_flag(task, TIF_NOTIFY_RESUME); | |
1795 | +} | |
1796 | + | |
1797 | +static inline void pfm_arch_disarm_handle_work(struct task_struct *task) | |
1798 | +{ | |
1799 | + /* | |
1800 | + * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are | |
1801 | + * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE | |
1802 | + * | |
1803 | + * The tsk_clear_notify_resume() checks if any of those are set before | |
1804 | + * clearing the * bit | |
1805 | + */ | |
1806 | + tsk_clear_notify_resume(task); | |
1807 | +} | |
1808 | + | |
1809 | +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) | |
1810 | +{ | |
1811 | + return 0; | |
1812 | +} | |
1813 | + | |
1814 | +extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info; | |
1815 | + | |
1816 | +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context)) | |
1817 | + | |
1818 | +/* | |
1819 | + * IA-64 does not need extra alignment requirements for the sampling buffer | |
1820 | + */ | |
1821 | +#define PFM_ARCH_SMPL_ALIGN_SIZE 0 | |
1822 | + | |
1823 | + | |
1824 | +static inline void pfm_release_dbregs(struct task_struct *task) | |
1825 | +{ | |
1826 | + if (task->thread.flags & IA64_THREAD_DBG_VALID) | |
1827 | + __pfm_release_dbregs(task); | |
1828 | +} | |
1829 | + | |
1830 | +#define pfm_use_dbregs(_t) __pfm_use_dbregs(_t) | |
1831 | + | |
1832 | +static inline int pfm_arch_get_base_syscall(void) | |
1833 | +{ | |
1834 | + return __NR_pfm_create_context; | |
1835 | +} | |
1836 | + | |
1837 | +struct pfm_arch_pmu_info { | |
1838 | + unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */ | |
1839 | +}; | |
1840 | + | |
1841 | +DECLARE_PER_CPU(u32, pfm_syst_info); | |
1842 | +#else /* !CONFIG_PERFMON */ | |
1843 | +/* | |
1844 | + * perfmon ia64-specific hooks | |
1845 | + */ | |
1846 | +#define pfm_release_dbregs(_t) do { } while (0) | |
1847 | +#define pfm_use_dbregs(_t) (0) | |
1848 | + | |
1849 | +#endif /* CONFIG_PERFMON */ | |
1850 | + | |
1851 | +#endif /* __KERNEL__ */ | |
1852 | +#endif /* _ASM_IA64_PERFMON_KERN_H_ */ | |
1853 | --- a/arch/ia64/include/asm/processor.h | |
1854 | +++ b/arch/ia64/include/asm/processor.h | |
1855 | @@ -42,7 +42,6 @@ | |
1856 | ||
1857 | #define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ | |
1858 | #define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ | |
1859 | -#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ | |
1860 | #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ | |
1861 | #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ | |
1862 | #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration | |
1863 | @@ -321,14 +320,6 @@ struct thread_struct { | |
1864 | #else | |
1865 | # define INIT_THREAD_IA32 | |
1866 | #endif /* CONFIG_IA32_SUPPORT */ | |
1867 | -#ifdef CONFIG_PERFMON | |
1868 | - void *pfm_context; /* pointer to detailed PMU context */ | |
1869 | - unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */ | |
1870 | -# define INIT_THREAD_PM .pfm_context = NULL, \ | |
1871 | - .pfm_needs_checking = 0UL, | |
1872 | -#else | |
1873 | -# define INIT_THREAD_PM | |
1874 | -#endif | |
1875 | __u64 dbr[IA64_NUM_DBG_REGS]; | |
1876 | __u64 ibr[IA64_NUM_DBG_REGS]; | |
1877 | struct ia64_fpreg fph[96]; /* saved/loaded on demand */ | |
1878 | @@ -343,7 +334,6 @@ struct thread_struct { | |
1879 | .task_size = DEFAULT_TASK_SIZE, \ | |
1880 | .last_fph_cpu = -1, \ | |
1881 | INIT_THREAD_IA32 \ | |
1882 | - INIT_THREAD_PM \ | |
1883 | .dbr = {0, }, \ | |
1884 | .ibr = {0, }, \ | |
1885 | .fph = {{{{0}}}, } \ | |
1886 | --- a/arch/ia64/include/asm/system.h | |
1887 | +++ b/arch/ia64/include/asm/system.h | |
1888 | @@ -217,6 +217,7 @@ struct task_struct; | |
1889 | extern void ia64_save_extra (struct task_struct *task); | |
1890 | extern void ia64_load_extra (struct task_struct *task); | |
1891 | ||
1892 | + | |
1893 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | |
1894 | extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next); | |
1895 | # define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n) | |
1896 | @@ -224,16 +225,9 @@ extern void ia64_account_on_switch (stru | |
1897 | # define IA64_ACCOUNT_ON_SWITCH(p,n) | |
1898 | #endif | |
1899 | ||
1900 | -#ifdef CONFIG_PERFMON | |
1901 | - DECLARE_PER_CPU(unsigned long, pfm_syst_info); | |
1902 | -# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) | |
1903 | -#else | |
1904 | -# define PERFMON_IS_SYSWIDE() (0) | |
1905 | -#endif | |
1906 | - | |
1907 | -#define IA64_HAS_EXTRA_STATE(t) \ | |
1908 | - ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \ | |
1909 | - || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE()) | |
1910 | +#define IA64_HAS_EXTRA_STATE(t) \ | |
1911 | + (((t)->thread.flags & IA64_THREAD_DBG_VALID) \ | |
1912 | + || IS_IA32_PROCESS(task_pt_regs(t))) | |
1913 | ||
1914 | #define __switch_to(prev,next,last) do { \ | |
1915 | IA64_ACCOUNT_ON_SWITCH(prev, next); \ | |
1916 | @@ -241,6 +235,10 @@ extern void ia64_account_on_switch (stru | |
1917 | ia64_save_extra(prev); \ | |
1918 | if (IA64_HAS_EXTRA_STATE(next)) \ | |
1919 | ia64_load_extra(next); \ | |
1920 | + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ | |
1921 | + pfm_ctxsw_out(prev, next); \ | |
1922 | + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ | |
1923 | + pfm_ctxsw_in(prev, next); \ | |
1924 | ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ | |
1925 | (last) = ia64_switch_to((next)); \ | |
1926 | } while (0) | |
1927 | --- a/arch/ia64/include/asm/thread_info.h | |
1928 | +++ b/arch/ia64/include/asm/thread_info.h | |
1929 | @@ -110,6 +110,8 @@ extern void tsk_clear_notify_resume(stru | |
1930 | #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ | |
1931 | #define TIF_FREEZE 20 /* is freezing for suspend */ | |
1932 | #define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ | |
1933 | +#define TIF_PERFMON_CTXSW 22 /* perfmon needs ctxsw calls */ | |
1934 | +#define TIF_PERFMON_WORK 23 /* work for pfm_handle_work() */ | |
1935 | ||
1936 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1937 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
1938 | @@ -123,6 +125,8 @@ extern void tsk_clear_notify_resume(stru | |
1939 | #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) | |
1940 | #define _TIF_FREEZE (1 << TIF_FREEZE) | |
1941 | #define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) | |
1942 | +#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW) | |
1943 | +#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) | |
1944 | ||
1945 | /* "work to do on user-return" bits */ | |
1946 | #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ | |
1947 | --- a/arch/ia64/include/asm/unistd.h | |
1948 | +++ b/arch/ia64/include/asm/unistd.h | |
1949 | @@ -308,11 +308,23 @@ | |
1950 | #define __NR_dup3 1316 | |
1951 | #define __NR_pipe2 1317 | |
1952 | #define __NR_inotify_init1 1318 | |
1953 | +#define __NR_pfm_create_context 1319 | |
1954 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
1955 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
1956 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
1957 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
1958 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
1959 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
1960 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
1961 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
1962 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
1963 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
1964 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
1965 | ||
1966 | #ifdef __KERNEL__ | |
1967 | ||
1968 | ||
1969 | -#define NR_syscalls 295 /* length of syscall table */ | |
1970 | +#define NR_syscalls 307 /* length of syscall table */ | |
1971 | ||
1972 | /* | |
1973 | * The following defines stop scripts/checksyscalls.sh from complaining about | |
1974 | --- a/arch/ia64/kernel/Makefile | |
1975 | +++ b/arch/ia64/kernel/Makefile | |
1976 | @@ -5,7 +5,7 @@ | |
1977 | extra-y := head.o init_task.o vmlinux.lds | |
1978 | ||
1979 | obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ | |
1980 | - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ | |
1981 | + irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \ | |
1982 | salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ | |
1983 | unwind.o mca.o mca_asm.o topology.o | |
1984 | ||
1985 | @@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o | |
1986 | obj-$(CONFIG_MODULES) += module.o | |
1987 | obj-$(CONFIG_SMP) += smp.o smpboot.o | |
1988 | obj-$(CONFIG_NUMA) += numa.o | |
1989 | -obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o | |
1990 | obj-$(CONFIG_IA64_CYCLONE) += cyclone.o | |
1991 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | |
1992 | obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o | |
1993 | --- a/arch/ia64/kernel/entry.S | |
1994 | +++ b/arch/ia64/kernel/entry.S | |
1995 | @@ -1697,6 +1697,18 @@ sys_call_table: | |
1996 | data8 sys_dup3 | |
1997 | data8 sys_pipe2 | |
1998 | data8 sys_inotify_init1 | |
1999 | + data8 sys_pfm_create_context | |
2000 | + data8 sys_pfm_write_pmcs // 1320 | |
2001 | + data8 sys_pfm_write_pmds | |
2002 | + data8 sys_pfm_read_pmds | |
2003 | + data8 sys_pfm_load_context | |
2004 | + data8 sys_pfm_start | |
2005 | + data8 sys_pfm_stop // 1325 | |
2006 | + data8 sys_pfm_restart | |
2007 | + data8 sys_pfm_create_evtsets | |
2008 | + data8 sys_pfm_getinfo_evtsets | |
2009 | + data8 sys_pfm_delete_evtsets | |
2010 | + data8 sys_pfm_unload_context // 1330 | |
2011 | ||
2012 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls | |
2013 | #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ | |
2014 | --- a/arch/ia64/kernel/irq_ia64.c | |
2015 | +++ b/arch/ia64/kernel/irq_ia64.c | |
2016 | @@ -40,10 +40,6 @@ | |
2017 | #include <asm/system.h> | |
2018 | #include <asm/tlbflush.h> | |
2019 | ||
2020 | -#ifdef CONFIG_PERFMON | |
2021 | -# include <asm/perfmon.h> | |
2022 | -#endif | |
2023 | - | |
2024 | #define IRQ_DEBUG 0 | |
2025 | ||
2026 | #define IRQ_VECTOR_UNASSIGNED (0) | |
2027 | @@ -660,9 +656,6 @@ init_IRQ (void) | |
2028 | } | |
2029 | #endif | |
2030 | #endif | |
2031 | -#ifdef CONFIG_PERFMON | |
2032 | - pfm_init_percpu(); | |
2033 | -#endif | |
2034 | platform_irq_init(); | |
2035 | } | |
2036 | ||
2037 | --- a/arch/ia64/kernel/perfmon_default_smpl.c | |
2038 | +++ /dev/null | |
2039 | @@ -1,296 +0,0 @@ | |
2040 | -/* | |
2041 | - * Copyright (C) 2002-2003 Hewlett-Packard Co | |
2042 | - * Stephane Eranian <eranian@hpl.hp.com> | |
2043 | - * | |
2044 | - * This file implements the default sampling buffer format | |
2045 | - * for the Linux/ia64 perfmon-2 subsystem. | |
2046 | - */ | |
2047 | -#include <linux/kernel.h> | |
2048 | -#include <linux/types.h> | |
2049 | -#include <linux/module.h> | |
2050 | -#include <linux/init.h> | |
2051 | -#include <asm/delay.h> | |
2052 | -#include <linux/smp.h> | |
2053 | - | |
2054 | -#include <asm/perfmon.h> | |
2055 | -#include <asm/perfmon_default_smpl.h> | |
2056 | - | |
2057 | -MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
2058 | -MODULE_DESCRIPTION("perfmon default sampling format"); | |
2059 | -MODULE_LICENSE("GPL"); | |
2060 | - | |
2061 | -#define DEFAULT_DEBUG 1 | |
2062 | - | |
2063 | -#ifdef DEFAULT_DEBUG | |
2064 | -#define DPRINT(a) \ | |
2065 | - do { \ | |
2066 | - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ | |
2067 | - } while (0) | |
2068 | - | |
2069 | -#define DPRINT_ovfl(a) \ | |
2070 | - do { \ | |
2071 | - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ | |
2072 | - } while (0) | |
2073 | - | |
2074 | -#else | |
2075 | -#define DPRINT(a) | |
2076 | -#define DPRINT_ovfl(a) | |
2077 | -#endif | |
2078 | - | |
2079 | -static int | |
2080 | -default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) | |
2081 | -{ | |
2082 | - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; | |
2083 | - int ret = 0; | |
2084 | - | |
2085 | - if (data == NULL) { | |
2086 | - DPRINT(("[%d] no argument passed\n", task_pid_nr(task))); | |
2087 | - return -EINVAL; | |
2088 | - } | |
2089 | - | |
2090 | - DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu)); | |
2091 | - | |
2092 | - /* | |
2093 | - * must hold at least the buffer header + one minimally sized entry | |
2094 | - */ | |
2095 | - if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; | |
2096 | - | |
2097 | - DPRINT(("buf_size=%lu\n", arg->buf_size)); | |
2098 | - | |
2099 | - return ret; | |
2100 | -} | |
2101 | - | |
2102 | -static int | |
2103 | -default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) | |
2104 | -{ | |
2105 | - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; | |
2106 | - | |
2107 | - /* | |
2108 | - * size has been validated in default_validate | |
2109 | - */ | |
2110 | - *size = arg->buf_size; | |
2111 | - | |
2112 | - return 0; | |
2113 | -} | |
2114 | - | |
2115 | -static int | |
2116 | -default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) | |
2117 | -{ | |
2118 | - pfm_default_smpl_hdr_t *hdr; | |
2119 | - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; | |
2120 | - | |
2121 | - hdr = (pfm_default_smpl_hdr_t *)buf; | |
2122 | - | |
2123 | - hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; | |
2124 | - hdr->hdr_buf_size = arg->buf_size; | |
2125 | - hdr->hdr_cur_offs = sizeof(*hdr); | |
2126 | - hdr->hdr_overflows = 0UL; | |
2127 | - hdr->hdr_count = 0UL; | |
2128 | - | |
2129 | - DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", | |
2130 | - task_pid_nr(task), | |
2131 | - buf, | |
2132 | - hdr->hdr_buf_size, | |
2133 | - sizeof(*hdr), | |
2134 | - hdr->hdr_version, | |
2135 | - hdr->hdr_cur_offs)); | |
2136 | - | |
2137 | - return 0; | |
2138 | -} | |
2139 | - | |
2140 | -static int | |
2141 | -default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) | |
2142 | -{ | |
2143 | - pfm_default_smpl_hdr_t *hdr; | |
2144 | - pfm_default_smpl_entry_t *ent; | |
2145 | - void *cur, *last; | |
2146 | - unsigned long *e, entry_size; | |
2147 | - unsigned int npmds, i; | |
2148 | - unsigned char ovfl_pmd; | |
2149 | - unsigned char ovfl_notify; | |
2150 | - | |
2151 | - if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { | |
2152 | - DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); | |
2153 | - return -EINVAL; | |
2154 | - } | |
2155 | - | |
2156 | - hdr = (pfm_default_smpl_hdr_t *)buf; | |
2157 | - cur = buf+hdr->hdr_cur_offs; | |
2158 | - last = buf+hdr->hdr_buf_size; | |
2159 | - ovfl_pmd = arg->ovfl_pmd; | |
2160 | - ovfl_notify = arg->ovfl_notify; | |
2161 | - | |
2162 | - /* | |
2163 | - * precheck for sanity | |
2164 | - */ | |
2165 | - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; | |
2166 | - | |
2167 | - npmds = hweight64(arg->smpl_pmds[0]); | |
2168 | - | |
2169 | - ent = (pfm_default_smpl_entry_t *)cur; | |
2170 | - | |
2171 | - prefetch(arg->smpl_pmds_values); | |
2172 | - | |
2173 | - entry_size = sizeof(*ent) + (npmds << 3); | |
2174 | - | |
2175 | - /* position for first pmd */ | |
2176 | - e = (unsigned long *)(ent+1); | |
2177 | - | |
2178 | - hdr->hdr_count++; | |
2179 | - | |
2180 | - DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", | |
2181 | - task->pid, | |
2182 | - hdr->hdr_count, | |
2183 | - cur, last, | |
2184 | - last-cur, | |
2185 | - ovfl_pmd, | |
2186 | - ovfl_notify, npmds)); | |
2187 | - | |
2188 | - /* | |
2189 | - * current = task running at the time of the overflow. | |
2190 | - * | |
2191 | - * per-task mode: | |
2192 | - * - this is ususally the task being monitored. | |
2193 | - * Under certain conditions, it might be a different task | |
2194 | - * | |
2195 | - * system-wide: | |
2196 | - * - this is not necessarily the task controlling the session | |
2197 | - */ | |
2198 | - ent->pid = current->pid; | |
2199 | - ent->ovfl_pmd = ovfl_pmd; | |
2200 | - ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; | |
2201 | - | |
2202 | - /* | |
2203 | - * where did the fault happen (includes slot number) | |
2204 | - */ | |
2205 | - ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); | |
2206 | - | |
2207 | - ent->tstamp = stamp; | |
2208 | - ent->cpu = smp_processor_id(); | |
2209 | - ent->set = arg->active_set; | |
2210 | - ent->tgid = current->tgid; | |
2211 | - | |
2212 | - /* | |
2213 | - * selectively store PMDs in increasing index number | |
2214 | - */ | |
2215 | - if (npmds) { | |
2216 | - unsigned long *val = arg->smpl_pmds_values; | |
2217 | - for(i=0; i < npmds; i++) { | |
2218 | - *e++ = *val++; | |
2219 | - } | |
2220 | - } | |
2221 | - | |
2222 | - /* | |
2223 | - * update position for next entry | |
2224 | - */ | |
2225 | - hdr->hdr_cur_offs += entry_size; | |
2226 | - cur += entry_size; | |
2227 | - | |
2228 | - /* | |
2229 | - * post check to avoid losing the last sample | |
2230 | - */ | |
2231 | - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; | |
2232 | - | |
2233 | - /* | |
2234 | - * keep same ovfl_pmds, ovfl_notify | |
2235 | - */ | |
2236 | - arg->ovfl_ctrl.bits.notify_user = 0; | |
2237 | - arg->ovfl_ctrl.bits.block_task = 0; | |
2238 | - arg->ovfl_ctrl.bits.mask_monitoring = 0; | |
2239 | - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ | |
2240 | - | |
2241 | - return 0; | |
2242 | -full: | |
2243 | - DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); | |
2244 | - | |
2245 | - /* | |
2246 | - * increment number of buffer overflow. | |
2247 | - * important to detect duplicate set of samples. | |
2248 | - */ | |
2249 | - hdr->hdr_overflows++; | |
2250 | - | |
2251 | - /* | |
2252 | - * if no notification requested, then we saturate the buffer | |
2253 | - */ | |
2254 | - if (ovfl_notify == 0) { | |
2255 | - arg->ovfl_ctrl.bits.notify_user = 0; | |
2256 | - arg->ovfl_ctrl.bits.block_task = 0; | |
2257 | - arg->ovfl_ctrl.bits.mask_monitoring = 1; | |
2258 | - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; | |
2259 | - } else { | |
2260 | - arg->ovfl_ctrl.bits.notify_user = 1; | |
2261 | - arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ | |
2262 | - arg->ovfl_ctrl.bits.mask_monitoring = 1; | |
2263 | - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ | |
2264 | - } | |
2265 | - return -1; /* we are full, sorry */ | |
2266 | -} | |
2267 | - | |
2268 | -static int | |
2269 | -default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) | |
2270 | -{ | |
2271 | - pfm_default_smpl_hdr_t *hdr; | |
2272 | - | |
2273 | - hdr = (pfm_default_smpl_hdr_t *)buf; | |
2274 | - | |
2275 | - hdr->hdr_count = 0UL; | |
2276 | - hdr->hdr_cur_offs = sizeof(*hdr); | |
2277 | - | |
2278 | - ctrl->bits.mask_monitoring = 0; | |
2279 | - ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ | |
2280 | - | |
2281 | - return 0; | |
2282 | -} | |
2283 | - | |
2284 | -static int | |
2285 | -default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) | |
2286 | -{ | |
2287 | - DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf)); | |
2288 | - return 0; | |
2289 | -} | |
2290 | - | |
2291 | -static pfm_buffer_fmt_t default_fmt={ | |
2292 | - .fmt_name = "default_format", | |
2293 | - .fmt_uuid = PFM_DEFAULT_SMPL_UUID, | |
2294 | - .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), | |
2295 | - .fmt_validate = default_validate, | |
2296 | - .fmt_getsize = default_get_size, | |
2297 | - .fmt_init = default_init, | |
2298 | - .fmt_handler = default_handler, | |
2299 | - .fmt_restart = default_restart, | |
2300 | - .fmt_restart_active = default_restart, | |
2301 | - .fmt_exit = default_exit, | |
2302 | -}; | |
2303 | - | |
2304 | -static int __init | |
2305 | -pfm_default_smpl_init_module(void) | |
2306 | -{ | |
2307 | - int ret; | |
2308 | - | |
2309 | - ret = pfm_register_buffer_fmt(&default_fmt); | |
2310 | - if (ret == 0) { | |
2311 | - printk("perfmon_default_smpl: %s v%u.%u registered\n", | |
2312 | - default_fmt.fmt_name, | |
2313 | - PFM_DEFAULT_SMPL_VERSION_MAJ, | |
2314 | - PFM_DEFAULT_SMPL_VERSION_MIN); | |
2315 | - } else { | |
2316 | - printk("perfmon_default_smpl: %s cannot register ret=%d\n", | |
2317 | - default_fmt.fmt_name, | |
2318 | - ret); | |
2319 | - } | |
2320 | - | |
2321 | - return ret; | |
2322 | -} | |
2323 | - | |
2324 | -static void __exit | |
2325 | -pfm_default_smpl_cleanup_module(void) | |
2326 | -{ | |
2327 | - int ret; | |
2328 | - ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); | |
2329 | - | |
2330 | - printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); | |
2331 | -} | |
2332 | - | |
2333 | -module_init(pfm_default_smpl_init_module); | |
2334 | -module_exit(pfm_default_smpl_cleanup_module); | |
2335 | - | |
2336 | --- a/arch/ia64/kernel/perfmon_generic.h | |
2337 | +++ /dev/null | |
2338 | @@ -1,45 +0,0 @@ | |
2339 | -/* | |
2340 | - * This file contains the generic PMU register description tables | |
2341 | - * and pmc checker used by perfmon.c. | |
2342 | - * | |
2343 | - * Copyright (C) 2002-2003 Hewlett Packard Co | |
2344 | - * Stephane Eranian <eranian@hpl.hp.com> | |
2345 | - */ | |
2346 | - | |
2347 | -static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ | |
2348 | -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2349 | -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2350 | -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2351 | -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2352 | -/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2353 | -/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2354 | -/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2355 | -/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2356 | - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2357 | -}; | |
2358 | - | |
2359 | -static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ | |
2360 | -/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | |
2361 | -/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | |
2362 | -/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | |
2363 | -/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | |
2364 | -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | |
2365 | -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | |
2366 | -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | |
2367 | -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | |
2368 | - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2369 | -}; | |
2370 | - | |
2371 | -/* | |
2372 | - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
2373 | - */ | |
2374 | -static pmu_config_t pmu_conf_gen={ | |
2375 | - .pmu_name = "Generic", | |
2376 | - .pmu_family = 0xff, /* any */ | |
2377 | - .ovfl_val = (1UL << 32) - 1, | |
2378 | - .num_ibrs = 0, /* does not use */ | |
2379 | - .num_dbrs = 0, /* does not use */ | |
2380 | - .pmd_desc = pfm_gen_pmd_desc, | |
2381 | - .pmc_desc = pfm_gen_pmc_desc | |
2382 | -}; | |
2383 | - | |
2384 | --- a/arch/ia64/kernel/perfmon_itanium.h | |
2385 | +++ /dev/null | |
2386 | @@ -1,115 +0,0 @@ | |
2387 | -/* | |
2388 | - * This file contains the Itanium PMU register description tables | |
2389 | - * and pmc checker used by perfmon.c. | |
2390 | - * | |
2391 | - * Copyright (C) 2002-2003 Hewlett Packard Co | |
2392 | - * Stephane Eranian <eranian@hpl.hp.com> | |
2393 | - */ | |
2394 | -static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | |
2395 | - | |
2396 | -static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ | |
2397 | -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2398 | -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2399 | -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2400 | -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2401 | -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2402 | -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2403 | -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2404 | -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2405 | -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2406 | -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2407 | -/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2408 | -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2409 | -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2410 | -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2411 | - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2412 | -}; | |
2413 | - | |
2414 | -static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ | |
2415 | -/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | |
2416 | -/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | |
2417 | -/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2418 | -/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2419 | -/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | |
2420 | -/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | |
2421 | -/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | |
2422 | -/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | |
2423 | -/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2424 | -/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2425 | -/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2426 | -/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2427 | -/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2428 | -/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2429 | -/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2430 | -/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2431 | -/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2432 | -/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2433 | - { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2434 | -}; | |
2435 | - | |
2436 | -static int | |
2437 | -pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) | |
2438 | -{ | |
2439 | - int ret; | |
2440 | - int is_loaded; | |
2441 | - | |
2442 | - /* sanitfy check */ | |
2443 | - if (ctx == NULL) return -EINVAL; | |
2444 | - | |
2445 | - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; | |
2446 | - | |
2447 | - /* | |
2448 | - * we must clear the (instruction) debug registers if pmc13.ta bit is cleared | |
2449 | - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. | |
2450 | - */ | |
2451 | - if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { | |
2452 | - | |
2453 | - DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); | |
2454 | - | |
2455 | - /* don't mix debug with perfmon */ | |
2456 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2457 | - | |
2458 | - /* | |
2459 | - * a count of 0 will mark the debug registers as in use and also | |
2460 | - * ensure that they are properly cleared. | |
2461 | - */ | |
2462 | - ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); | |
2463 | - if (ret) return ret; | |
2464 | - } | |
2465 | - | |
2466 | - /* | |
2467 | - * we must clear the (data) debug registers if pmc11.pt bit is cleared | |
2468 | - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. | |
2469 | - */ | |
2470 | - if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { | |
2471 | - | |
2472 | - DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); | |
2473 | - | |
2474 | - /* don't mix debug with perfmon */ | |
2475 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2476 | - | |
2477 | - /* | |
2478 | - * a count of 0 will mark the debug registers as in use and also | |
2479 | - * ensure that they are properly cleared. | |
2480 | - */ | |
2481 | - ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); | |
2482 | - if (ret) return ret; | |
2483 | - } | |
2484 | - return 0; | |
2485 | -} | |
2486 | - | |
2487 | -/* | |
2488 | - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
2489 | - */ | |
2490 | -static pmu_config_t pmu_conf_ita={ | |
2491 | - .pmu_name = "Itanium", | |
2492 | - .pmu_family = 0x7, | |
2493 | - .ovfl_val = (1UL << 32) - 1, | |
2494 | - .pmd_desc = pfm_ita_pmd_desc, | |
2495 | - .pmc_desc = pfm_ita_pmc_desc, | |
2496 | - .num_ibrs = 8, | |
2497 | - .num_dbrs = 8, | |
2498 | - .use_rr_dbregs = 1, /* debug register are use for range retrictions */ | |
2499 | -}; | |
2500 | - | |
2501 | - | |
2502 | --- a/arch/ia64/kernel/perfmon_mckinley.h | |
2503 | +++ /dev/null | |
2504 | @@ -1,187 +0,0 @@ | |
2505 | -/* | |
2506 | - * This file contains the McKinley PMU register description tables | |
2507 | - * and pmc checker used by perfmon.c. | |
2508 | - * | |
2509 | - * Copyright (C) 2002-2003 Hewlett Packard Co | |
2510 | - * Stephane Eranian <eranian@hpl.hp.com> | |
2511 | - */ | |
2512 | -static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | |
2513 | - | |
2514 | -static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ | |
2515 | -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2516 | -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2517 | -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2518 | -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2519 | -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2520 | -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2521 | -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2522 | -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2523 | -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2524 | -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2525 | -/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2526 | -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2527 | -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2528 | -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2529 | -/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2530 | -/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | |
2531 | - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2532 | -}; | |
2533 | - | |
2534 | -static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ | |
2535 | -/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | |
2536 | -/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | |
2537 | -/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2538 | -/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2539 | -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | |
2540 | -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | |
2541 | -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | |
2542 | -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | |
2543 | -/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2544 | -/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2545 | -/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2546 | -/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2547 | -/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2548 | -/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2549 | -/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2550 | -/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2551 | -/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | |
2552 | -/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | |
2553 | - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2554 | -}; | |
2555 | - | |
2556 | -/* | |
2557 | - * PMC reserved fields must have their power-up values preserved | |
2558 | - */ | |
2559 | -static int | |
2560 | -pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) | |
2561 | -{ | |
2562 | - unsigned long tmp1, tmp2, ival = *val; | |
2563 | - | |
2564 | - /* remove reserved areas from user value */ | |
2565 | - tmp1 = ival & PMC_RSVD_MASK(cnum); | |
2566 | - | |
2567 | - /* get reserved fields values */ | |
2568 | - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); | |
2569 | - | |
2570 | - *val = tmp1 | tmp2; | |
2571 | - | |
2572 | - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", | |
2573 | - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); | |
2574 | - return 0; | |
2575 | -} | |
2576 | - | |
2577 | -/* | |
2578 | - * task can be NULL if the context is unloaded | |
2579 | - */ | |
2580 | -static int | |
2581 | -pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) | |
2582 | -{ | |
2583 | - int ret = 0, check_case1 = 0; | |
2584 | - unsigned long val8 = 0, val14 = 0, val13 = 0; | |
2585 | - int is_loaded; | |
2586 | - | |
2587 | - /* first preserve the reserved fields */ | |
2588 | - pfm_mck_reserved(cnum, val, regs); | |
2589 | - | |
2590 | - /* sanitfy check */ | |
2591 | - if (ctx == NULL) return -EINVAL; | |
2592 | - | |
2593 | - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; | |
2594 | - | |
2595 | - /* | |
2596 | - * we must clear the debug registers if pmc13 has a value which enable | |
2597 | - * memory pipeline event constraints. In this case we need to clear the | |
2598 | - * the debug registers if they have not yet been accessed. This is required | |
2599 | - * to avoid picking stale state. | |
2600 | - * PMC13 is "active" if: | |
2601 | - * one of the pmc13.cfg_dbrpXX field is different from 0x3 | |
2602 | - * AND | |
2603 | - * at the corresponding pmc13.ena_dbrpXX is set. | |
2604 | - */ | |
2605 | - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); | |
2606 | - | |
2607 | - if (cnum == 13 && is_loaded | |
2608 | - && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { | |
2609 | - | |
2610 | - DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); | |
2611 | - | |
2612 | - /* don't mix debug with perfmon */ | |
2613 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2614 | - | |
2615 | - /* | |
2616 | - * a count of 0 will mark the debug registers as in use and also | |
2617 | - * ensure that they are properly cleared. | |
2618 | - */ | |
2619 | - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); | |
2620 | - if (ret) return ret; | |
2621 | - } | |
2622 | - /* | |
2623 | - * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled | |
2624 | - * before they are (fl_using_dbreg==0) to avoid picking up stale information. | |
2625 | - */ | |
2626 | - if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { | |
2627 | - | |
2628 | - DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); | |
2629 | - | |
2630 | - /* don't mix debug with perfmon */ | |
2631 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2632 | - | |
2633 | - /* | |
2634 | - * a count of 0 will mark the debug registers as in use and also | |
2635 | - * ensure that they are properly cleared. | |
2636 | - */ | |
2637 | - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); | |
2638 | - if (ret) return ret; | |
2639 | - | |
2640 | - } | |
2641 | - | |
2642 | - switch(cnum) { | |
2643 | - case 4: *val |= 1UL << 23; /* force power enable bit */ | |
2644 | - break; | |
2645 | - case 8: val8 = *val; | |
2646 | - val13 = ctx->ctx_pmcs[13]; | |
2647 | - val14 = ctx->ctx_pmcs[14]; | |
2648 | - check_case1 = 1; | |
2649 | - break; | |
2650 | - case 13: val8 = ctx->ctx_pmcs[8]; | |
2651 | - val13 = *val; | |
2652 | - val14 = ctx->ctx_pmcs[14]; | |
2653 | - check_case1 = 1; | |
2654 | - break; | |
2655 | - case 14: val8 = ctx->ctx_pmcs[8]; | |
2656 | - val13 = ctx->ctx_pmcs[13]; | |
2657 | - val14 = *val; | |
2658 | - check_case1 = 1; | |
2659 | - break; | |
2660 | - } | |
2661 | - /* check illegal configuration which can produce inconsistencies in tagging | |
2662 | - * i-side events in L1D and L2 caches | |
2663 | - */ | |
2664 | - if (check_case1) { | |
2665 | - ret = ((val13 >> 45) & 0xf) == 0 | |
2666 | - && ((val8 & 0x1) == 0) | |
2667 | - && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) | |
2668 | - ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); | |
2669 | - | |
2670 | - if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); | |
2671 | - } | |
2672 | - | |
2673 | - return ret ? -EINVAL : 0; | |
2674 | -} | |
2675 | - | |
2676 | -/* | |
2677 | - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
2678 | - */ | |
2679 | -static pmu_config_t pmu_conf_mck={ | |
2680 | - .pmu_name = "Itanium 2", | |
2681 | - .pmu_family = 0x1f, | |
2682 | - .flags = PFM_PMU_IRQ_RESEND, | |
2683 | - .ovfl_val = (1UL << 47) - 1, | |
2684 | - .pmd_desc = pfm_mck_pmd_desc, | |
2685 | - .pmc_desc = pfm_mck_pmc_desc, | |
2686 | - .num_ibrs = 8, | |
2687 | - .num_dbrs = 8, | |
2688 | - .use_rr_dbregs = 1 /* debug register are use for range restrictions */ | |
2689 | -}; | |
2690 | - | |
2691 | - | |
2692 | --- a/arch/ia64/kernel/perfmon_montecito.h | |
2693 | +++ /dev/null | |
2694 | @@ -1,269 +0,0 @@ | |
2695 | -/* | |
2696 | - * This file contains the Montecito PMU register description tables | |
2697 | - * and pmc checker used by perfmon.c. | |
2698 | - * | |
2699 | - * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
2700 | - * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
2701 | - */ | |
2702 | -static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | |
2703 | - | |
2704 | -#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\ | |
2705 | - RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63)) | |
2706 | -#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36)) | |
2707 | -#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35)) | |
2708 | - | |
2709 | -static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ | |
2710 | -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2711 | -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2712 | -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2713 | -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2714 | -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}}, | |
2715 | -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}}, | |
2716 | -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}}, | |
2717 | -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}}, | |
2718 | -/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}}, | |
2719 | -/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}}, | |
2720 | -/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}}, | |
2721 | -/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}}, | |
2722 | -/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}}, | |
2723 | -/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}}, | |
2724 | -/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}}, | |
2725 | -/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}}, | |
2726 | -/* pmc16 */ { PFM_REG_NOTIMPL, }, | |
2727 | -/* pmc17 */ { PFM_REG_NOTIMPL, }, | |
2728 | -/* pmc18 */ { PFM_REG_NOTIMPL, }, | |
2729 | -/* pmc19 */ { PFM_REG_NOTIMPL, }, | |
2730 | -/* pmc20 */ { PFM_REG_NOTIMPL, }, | |
2731 | -/* pmc21 */ { PFM_REG_NOTIMPL, }, | |
2732 | -/* pmc22 */ { PFM_REG_NOTIMPL, }, | |
2733 | -/* pmc23 */ { PFM_REG_NOTIMPL, }, | |
2734 | -/* pmc24 */ { PFM_REG_NOTIMPL, }, | |
2735 | -/* pmc25 */ { PFM_REG_NOTIMPL, }, | |
2736 | -/* pmc26 */ { PFM_REG_NOTIMPL, }, | |
2737 | -/* pmc27 */ { PFM_REG_NOTIMPL, }, | |
2738 | -/* pmc28 */ { PFM_REG_NOTIMPL, }, | |
2739 | -/* pmc29 */ { PFM_REG_NOTIMPL, }, | |
2740 | -/* pmc30 */ { PFM_REG_NOTIMPL, }, | |
2741 | -/* pmc31 */ { PFM_REG_NOTIMPL, }, | |
2742 | -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2743 | -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2744 | -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2745 | -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2746 | -/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2747 | -/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, | |
2748 | -/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2749 | -/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, | |
2750 | -/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, | |
2751 | -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, | |
2752 | -/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, | |
2753 | - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2754 | -}; | |
2755 | - | |
2756 | -static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={ | |
2757 | -/* pmd0 */ { PFM_REG_NOTIMPL, }, | |
2758 | -/* pmd1 */ { PFM_REG_NOTIMPL, }, | |
2759 | -/* pmd2 */ { PFM_REG_NOTIMPL, }, | |
2760 | -/* pmd3 */ { PFM_REG_NOTIMPL, }, | |
2761 | -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}}, | |
2762 | -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}}, | |
2763 | -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}}, | |
2764 | -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}}, | |
2765 | -/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, | |
2766 | -/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}}, | |
2767 | -/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}}, | |
2768 | -/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}}, | |
2769 | -/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}}, | |
2770 | -/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}}, | |
2771 | -/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}}, | |
2772 | -/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}}, | |
2773 | -/* pmd16 */ { PFM_REG_NOTIMPL, }, | |
2774 | -/* pmd17 */ { PFM_REG_NOTIMPL, }, | |
2775 | -/* pmd18 */ { PFM_REG_NOTIMPL, }, | |
2776 | -/* pmd19 */ { PFM_REG_NOTIMPL, }, | |
2777 | -/* pmd20 */ { PFM_REG_NOTIMPL, }, | |
2778 | -/* pmd21 */ { PFM_REG_NOTIMPL, }, | |
2779 | -/* pmd22 */ { PFM_REG_NOTIMPL, }, | |
2780 | -/* pmd23 */ { PFM_REG_NOTIMPL, }, | |
2781 | -/* pmd24 */ { PFM_REG_NOTIMPL, }, | |
2782 | -/* pmd25 */ { PFM_REG_NOTIMPL, }, | |
2783 | -/* pmd26 */ { PFM_REG_NOTIMPL, }, | |
2784 | -/* pmd27 */ { PFM_REG_NOTIMPL, }, | |
2785 | -/* pmd28 */ { PFM_REG_NOTIMPL, }, | |
2786 | -/* pmd29 */ { PFM_REG_NOTIMPL, }, | |
2787 | -/* pmd30 */ { PFM_REG_NOTIMPL, }, | |
2788 | -/* pmd31 */ { PFM_REG_NOTIMPL, }, | |
2789 | -/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, | |
2790 | -/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, | |
2791 | -/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}}, | |
2792 | -/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}}, | |
2793 | -/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}}, | |
2794 | -/* pmd37 */ { PFM_REG_NOTIMPL, }, | |
2795 | -/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2796 | -/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2797 | -/* pmd40 */ { PFM_REG_NOTIMPL, }, | |
2798 | -/* pmd41 */ { PFM_REG_NOTIMPL, }, | |
2799 | -/* pmd42 */ { PFM_REG_NOTIMPL, }, | |
2800 | -/* pmd43 */ { PFM_REG_NOTIMPL, }, | |
2801 | -/* pmd44 */ { PFM_REG_NOTIMPL, }, | |
2802 | -/* pmd45 */ { PFM_REG_NOTIMPL, }, | |
2803 | -/* pmd46 */ { PFM_REG_NOTIMPL, }, | |
2804 | -/* pmd47 */ { PFM_REG_NOTIMPL, }, | |
2805 | -/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2806 | -/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2807 | -/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2808 | -/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2809 | -/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2810 | -/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2811 | -/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2812 | -/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2813 | -/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2814 | -/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2815 | -/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2816 | -/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2817 | -/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2818 | -/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2819 | -/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2820 | -/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, | |
2821 | - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ | |
2822 | -}; | |
2823 | - | |
2824 | -/* | |
2825 | - * PMC reserved fields must have their power-up values preserved | |
2826 | - */ | |
2827 | -static int | |
2828 | -pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) | |
2829 | -{ | |
2830 | - unsigned long tmp1, tmp2, ival = *val; | |
2831 | - | |
2832 | - /* remove reserved areas from user value */ | |
2833 | - tmp1 = ival & PMC_RSVD_MASK(cnum); | |
2834 | - | |
2835 | - /* get reserved fields values */ | |
2836 | - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); | |
2837 | - | |
2838 | - *val = tmp1 | tmp2; | |
2839 | - | |
2840 | - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", | |
2841 | - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); | |
2842 | - return 0; | |
2843 | -} | |
2844 | - | |
2845 | -/* | |
2846 | - * task can be NULL if the context is unloaded | |
2847 | - */ | |
2848 | -static int | |
2849 | -pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) | |
2850 | -{ | |
2851 | - int ret = 0; | |
2852 | - unsigned long val32 = 0, val38 = 0, val41 = 0; | |
2853 | - unsigned long tmpval; | |
2854 | - int check_case1 = 0; | |
2855 | - int is_loaded; | |
2856 | - | |
2857 | - /* first preserve the reserved fields */ | |
2858 | - pfm_mont_reserved(cnum, val, regs); | |
2859 | - | |
2860 | - tmpval = *val; | |
2861 | - | |
2862 | - /* sanity check */ | |
2863 | - if (ctx == NULL) return -EINVAL; | |
2864 | - | |
2865 | - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; | |
2866 | - | |
2867 | - /* | |
2868 | - * we must clear the debug registers if pmc41 has a value which enable | |
2869 | - * memory pipeline event constraints. In this case we need to clear the | |
2870 | - * the debug registers if they have not yet been accessed. This is required | |
2871 | - * to avoid picking stale state. | |
2872 | - * PMC41 is "active" if: | |
2873 | - * one of the pmc41.cfg_dtagXX field is different from 0x3 | |
2874 | - * AND | |
2875 | - * at the corresponding pmc41.en_dbrpXX is set. | |
2876 | - * AND | |
2877 | - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) | |
2878 | - */ | |
2879 | - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); | |
2880 | - | |
2881 | - if (cnum == 41 && is_loaded | |
2882 | - && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { | |
2883 | - | |
2884 | - DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); | |
2885 | - | |
2886 | - /* don't mix debug with perfmon */ | |
2887 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2888 | - | |
2889 | - /* | |
2890 | - * a count of 0 will mark the debug registers if: | |
2891 | - * AND | |
2892 | - */ | |
2893 | - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); | |
2894 | - if (ret) return ret; | |
2895 | - } | |
2896 | - /* | |
2897 | - * we must clear the (instruction) debug registers if: | |
2898 | - * pmc38.ig_ibrpX is 0 (enabled) | |
2899 | - * AND | |
2900 | - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) | |
2901 | - */ | |
2902 | - if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) { | |
2903 | - | |
2904 | - DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval)); | |
2905 | - | |
2906 | - /* don't mix debug with perfmon */ | |
2907 | - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | |
2908 | - | |
2909 | - /* | |
2910 | - * a count of 0 will mark the debug registers as in use and also | |
2911 | - * ensure that they are properly cleared. | |
2912 | - */ | |
2913 | - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); | |
2914 | - if (ret) return ret; | |
2915 | - | |
2916 | - } | |
2917 | - switch(cnum) { | |
2918 | - case 32: val32 = *val; | |
2919 | - val38 = ctx->ctx_pmcs[38]; | |
2920 | - val41 = ctx->ctx_pmcs[41]; | |
2921 | - check_case1 = 1; | |
2922 | - break; | |
2923 | - case 38: val38 = *val; | |
2924 | - val32 = ctx->ctx_pmcs[32]; | |
2925 | - val41 = ctx->ctx_pmcs[41]; | |
2926 | - check_case1 = 1; | |
2927 | - break; | |
2928 | - case 41: val41 = *val; | |
2929 | - val32 = ctx->ctx_pmcs[32]; | |
2930 | - val38 = ctx->ctx_pmcs[38]; | |
2931 | - check_case1 = 1; | |
2932 | - break; | |
2933 | - } | |
2934 | - /* check illegal configuration which can produce inconsistencies in tagging | |
2935 | - * i-side events in L1D and L2 caches | |
2936 | - */ | |
2937 | - if (check_case1) { | |
2938 | - ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) | |
2939 | - && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) | |
2940 | - || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); | |
2941 | - if (ret) { | |
2942 | - DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32)); | |
2943 | - return -EINVAL; | |
2944 | - } | |
2945 | - } | |
2946 | - *val = tmpval; | |
2947 | - return 0; | |
2948 | -} | |
2949 | - | |
2950 | -/* | |
2951 | - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
2952 | - */ | |
2953 | -static pmu_config_t pmu_conf_mont={ | |
2954 | - .pmu_name = "Montecito", | |
2955 | - .pmu_family = 0x20, | |
2956 | - .flags = PFM_PMU_IRQ_RESEND, | |
2957 | - .ovfl_val = (1UL << 47) - 1, | |
2958 | - .pmd_desc = pfm_mont_pmd_desc, | |
2959 | - .pmc_desc = pfm_mont_pmc_desc, | |
2960 | - .num_ibrs = 8, | |
2961 | - .num_dbrs = 8, | |
2962 | - .use_rr_dbregs = 1 /* debug register are use for range retrictions */ | |
2963 | -}; | |
2964 | --- a/arch/ia64/kernel/process.c | |
2965 | +++ b/arch/ia64/kernel/process.c | |
2966 | @@ -28,6 +28,7 @@ | |
2967 | #include <linux/delay.h> | |
2968 | #include <linux/kdebug.h> | |
2969 | #include <linux/utsname.h> | |
2970 | +#include <linux/perfmon_kern.h> | |
2971 | ||
2972 | #include <asm/cpu.h> | |
2973 | #include <asm/delay.h> | |
2974 | @@ -45,10 +46,6 @@ | |
2975 | ||
2976 | #include "entry.h" | |
2977 | ||
2978 | -#ifdef CONFIG_PERFMON | |
2979 | -# include <asm/perfmon.h> | |
2980 | -#endif | |
2981 | - | |
2982 | #include "sigframe.h" | |
2983 | ||
2984 | void (*ia64_mark_idle)(int); | |
2985 | @@ -162,10 +159,8 @@ show_regs (struct pt_regs *regs) | |
2986 | ||
2987 | void tsk_clear_notify_resume(struct task_struct *tsk) | |
2988 | { | |
2989 | -#ifdef CONFIG_PERFMON | |
2990 | - if (tsk->thread.pfm_needs_checking) | |
2991 | + if (test_ti_thread_flag(task_thread_info(tsk), TIF_PERFMON_WORK)) | |
2992 | return; | |
2993 | -#endif | |
2994 | if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) | |
2995 | return; | |
2996 | clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); | |
2997 | @@ -188,14 +183,9 @@ do_notify_resume_user(sigset_t *unused, | |
2998 | return; | |
2999 | } | |
3000 | ||
3001 | -#ifdef CONFIG_PERFMON | |
3002 | - if (current->thread.pfm_needs_checking) | |
3003 | - /* | |
3004 | - * Note: pfm_handle_work() allow us to call it with interrupts | |
3005 | - * disabled, and may enable interrupts within the function. | |
3006 | - */ | |
3007 | - pfm_handle_work(); | |
3008 | -#endif | |
3009 | + /* process perfmon asynchronous work (e.g. block thread or reset) */ | |
3010 | + if (test_thread_flag(TIF_PERFMON_WORK)) | |
3011 | + pfm_handle_work(task_pt_regs(current)); | |
3012 | ||
3013 | /* deal with pending signal delivery */ | |
3014 | if (test_thread_flag(TIF_SIGPENDING)) { | |
3015 | @@ -212,22 +202,15 @@ do_notify_resume_user(sigset_t *unused, | |
3016 | local_irq_disable(); /* force interrupt disable */ | |
3017 | } | |
3018 | ||
3019 | -static int pal_halt = 1; | |
3020 | static int can_do_pal_halt = 1; | |
3021 | ||
3022 | static int __init nohalt_setup(char * str) | |
3023 | { | |
3024 | - pal_halt = can_do_pal_halt = 0; | |
3025 | + can_do_pal_halt = 0; | |
3026 | return 1; | |
3027 | } | |
3028 | __setup("nohalt", nohalt_setup); | |
3029 | ||
3030 | -void | |
3031 | -update_pal_halt_status(int status) | |
3032 | -{ | |
3033 | - can_do_pal_halt = pal_halt && status; | |
3034 | -} | |
3035 | - | |
3036 | /* | |
3037 | * We use this if we don't have any better idle routine.. | |
3038 | */ | |
3039 | @@ -236,6 +219,22 @@ default_idle (void) | |
3040 | { | |
3041 | local_irq_enable(); | |
3042 | while (!need_resched()) { | |
3043 | +#ifdef CONFIG_PERFMON | |
3044 | + u64 psr = 0; | |
3045 | + /* | |
3046 | + * If requested, we stop the PMU to avoid | |
3047 | + * measuring across the core idle loop. | |
3048 | + * | |
3049 | + * dcr.pp is not modified on purpose | |
3050 | + * it is used when coming out of | |
3051 | + * safe_halt() via interrupt | |
3052 | + */ | |
3053 | + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { | |
3054 | + psr = ia64_getreg(_IA64_REG_PSR); | |
3055 | + if (psr & IA64_PSR_PP) | |
3056 | + ia64_rsm(IA64_PSR_PP); | |
3057 | + } | |
3058 | +#endif | |
3059 | if (can_do_pal_halt) { | |
3060 | local_irq_disable(); | |
3061 | if (!need_resched()) { | |
3062 | @@ -244,6 +243,12 @@ default_idle (void) | |
3063 | local_irq_enable(); | |
3064 | } else | |
3065 | cpu_relax(); | |
3066 | +#ifdef CONFIG_PERFMON | |
3067 | + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { | |
3068 | + if (psr & IA64_PSR_PP) | |
3069 | + ia64_ssm(IA64_PSR_PP); | |
3070 | + } | |
3071 | +#endif | |
3072 | } | |
3073 | } | |
3074 | ||
3075 | @@ -344,22 +349,9 @@ cpu_idle (void) | |
3076 | void | |
3077 | ia64_save_extra (struct task_struct *task) | |
3078 | { | |
3079 | -#ifdef CONFIG_PERFMON | |
3080 | - unsigned long info; | |
3081 | -#endif | |
3082 | - | |
3083 | if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) | |
3084 | ia64_save_debug_regs(&task->thread.dbr[0]); | |
3085 | ||
3086 | -#ifdef CONFIG_PERFMON | |
3087 | - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) | |
3088 | - pfm_save_regs(task); | |
3089 | - | |
3090 | - info = __get_cpu_var(pfm_syst_info); | |
3091 | - if (info & PFM_CPUINFO_SYST_WIDE) | |
3092 | - pfm_syst_wide_update_task(task, info, 0); | |
3093 | -#endif | |
3094 | - | |
3095 | #ifdef CONFIG_IA32_SUPPORT | |
3096 | if (IS_IA32_PROCESS(task_pt_regs(task))) | |
3097 | ia32_save_state(task); | |
3098 | @@ -369,22 +361,9 @@ ia64_save_extra (struct task_struct *tas | |
3099 | void | |
3100 | ia64_load_extra (struct task_struct *task) | |
3101 | { | |
3102 | -#ifdef CONFIG_PERFMON | |
3103 | - unsigned long info; | |
3104 | -#endif | |
3105 | - | |
3106 | if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) | |
3107 | ia64_load_debug_regs(&task->thread.dbr[0]); | |
3108 | ||
3109 | -#ifdef CONFIG_PERFMON | |
3110 | - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) | |
3111 | - pfm_load_regs(task); | |
3112 | - | |
3113 | - info = __get_cpu_var(pfm_syst_info); | |
3114 | - if (info & PFM_CPUINFO_SYST_WIDE) | |
3115 | - pfm_syst_wide_update_task(task, info, 1); | |
3116 | -#endif | |
3117 | - | |
3118 | #ifdef CONFIG_IA32_SUPPORT | |
3119 | if (IS_IA32_PROCESS(task_pt_regs(task))) | |
3120 | ia32_load_state(task); | |
3121 | @@ -510,8 +489,7 @@ copy_thread (int nr, unsigned long clone | |
3122 | * call behavior where scratch registers are preserved across | |
3123 | * system calls (unless used by the system call itself). | |
3124 | */ | |
3125 | -# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ | |
3126 | - | IA64_THREAD_PM_VALID) | |
3127 | +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID) | |
3128 | # define THREAD_FLAGS_TO_SET 0 | |
3129 | p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) | |
3130 | | THREAD_FLAGS_TO_SET); | |
3131 | @@ -533,10 +511,8 @@ copy_thread (int nr, unsigned long clone | |
3132 | } | |
3133 | #endif | |
3134 | ||
3135 | -#ifdef CONFIG_PERFMON | |
3136 | - if (current->thread.pfm_context) | |
3137 | - pfm_inherit(p, child_ptregs); | |
3138 | -#endif | |
3139 | + pfm_copy_thread(p); | |
3140 | + | |
3141 | return retval; | |
3142 | } | |
3143 | ||
3144 | @@ -745,15 +721,13 @@ exit_thread (void) | |
3145 | { | |
3146 | ||
3147 | ia64_drop_fpu(current); | |
3148 | -#ifdef CONFIG_PERFMON | |
3149 | - /* if needed, stop monitoring and flush state to perfmon context */ | |
3150 | - if (current->thread.pfm_context) | |
3151 | - pfm_exit_thread(current); | |
3152 | + | |
3153 | + /* if needed, stop monitoring and flush state to perfmon context */ | |
3154 | + pfm_exit_thread(); | |
3155 | ||
3156 | /* free debug register resources */ | |
3157 | - if (current->thread.flags & IA64_THREAD_DBG_VALID) | |
3158 | - pfm_release_debug_registers(current); | |
3159 | -#endif | |
3160 | + pfm_release_dbregs(current); | |
3161 | + | |
3162 | if (IS_IA32_PROCESS(task_pt_regs(current))) | |
3163 | ia32_drop_ia64_partial_page_list(current); | |
3164 | } | |
3165 | --- a/arch/ia64/kernel/ptrace.c | |
3166 | +++ b/arch/ia64/kernel/ptrace.c | |
3167 | @@ -20,6 +20,7 @@ | |
3168 | #include <linux/security.h> | |
3169 | #include <linux/audit.h> | |
3170 | #include <linux/signal.h> | |
3171 | +#include <linux/perfmon_kern.h> | |
3172 | #include <linux/regset.h> | |
3173 | #include <linux/elf.h> | |
3174 | ||
3175 | @@ -30,9 +31,6 @@ | |
3176 | #include <asm/system.h> | |
3177 | #include <asm/uaccess.h> | |
3178 | #include <asm/unwind.h> | |
3179 | -#ifdef CONFIG_PERFMON | |
3180 | -#include <asm/perfmon.h> | |
3181 | -#endif | |
3182 | ||
3183 | #include "entry.h" | |
3184 | ||
3185 | @@ -2124,7 +2122,6 @@ access_uarea(struct task_struct *child, | |
3186 | "address 0x%lx\n", addr); | |
3187 | return -1; | |
3188 | } | |
3189 | -#ifdef CONFIG_PERFMON | |
3190 | /* | |
3191 | * Check if debug registers are used by perfmon. This | |
3192 | * test must be done once we know that we can do the | |
3193 | @@ -2142,9 +2139,8 @@ access_uarea(struct task_struct *child, | |
3194 | * IA64_THREAD_DBG_VALID. The registers are restored | |
3195 | * by the PMU context switch code. | |
3196 | */ | |
3197 | - if (pfm_use_debug_registers(child)) | |
3198 | + if (pfm_use_dbregs(child)) | |
3199 | return -1; | |
3200 | -#endif | |
3201 | ||
3202 | if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { | |
3203 | child->thread.flags |= IA64_THREAD_DBG_VALID; | |
3204 | --- a/arch/ia64/kernel/setup.c | |
3205 | +++ b/arch/ia64/kernel/setup.c | |
3206 | @@ -45,6 +45,7 @@ | |
3207 | #include <linux/cpufreq.h> | |
3208 | #include <linux/kexec.h> | |
3209 | #include <linux/crash_dump.h> | |
3210 | +#include <linux/perfmon_kern.h> | |
3211 | ||
3212 | #include <asm/ia32.h> | |
3213 | #include <asm/machvec.h> | |
3214 | @@ -1052,6 +1053,8 @@ cpu_init (void) | |
3215 | } | |
3216 | platform_cpu_init(); | |
3217 | pm_idle = default_idle; | |
3218 | + | |
3219 | + pfm_init_percpu(); | |
3220 | } | |
3221 | ||
3222 | void __init | |
3223 | --- a/arch/ia64/kernel/smpboot.c | |
3224 | +++ b/arch/ia64/kernel/smpboot.c | |
3225 | @@ -39,6 +39,7 @@ | |
3226 | #include <linux/efi.h> | |
3227 | #include <linux/percpu.h> | |
3228 | #include <linux/bitops.h> | |
3229 | +#include <linux/perfmon_kern.h> | |
3230 | ||
3231 | #include <asm/atomic.h> | |
3232 | #include <asm/cache.h> | |
3233 | @@ -381,10 +382,6 @@ smp_callin (void) | |
3234 | extern void ia64_init_itm(void); | |
3235 | extern volatile int time_keeper_id; | |
3236 | ||
3237 | -#ifdef CONFIG_PERFMON | |
3238 | - extern void pfm_init_percpu(void); | |
3239 | -#endif | |
3240 | - | |
3241 | cpuid = smp_processor_id(); | |
3242 | phys_id = hard_smp_processor_id(); | |
3243 | itc_master = time_keeper_id; | |
3244 | @@ -410,10 +407,6 @@ smp_callin (void) | |
3245 | ||
3246 | ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ | |
3247 | ||
3248 | -#ifdef CONFIG_PERFMON | |
3249 | - pfm_init_percpu(); | |
3250 | -#endif | |
3251 | - | |
3252 | local_irq_enable(); | |
3253 | ||
3254 | if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { | |
3255 | @@ -749,6 +742,7 @@ int __cpu_disable(void) | |
3256 | fixup_irqs(); | |
3257 | local_flush_tlb_all(); | |
3258 | cpu_clear(cpu, cpu_callin_map); | |
3259 | + pfm_cpu_disable(); | |
3260 | return 0; | |
3261 | } | |
3262 | ||
3263 | --- a/arch/ia64/kernel/sys_ia64.c | |
3264 | +++ b/arch/ia64/kernel/sys_ia64.c | |
3265 | @@ -293,3 +293,11 @@ sys_pciconfig_write (unsigned long bus, | |
3266 | } | |
3267 | ||
3268 | #endif /* CONFIG_PCI */ | |
3269 | + | |
3270 | +#ifndef CONFIG_IA64_PERFMON_COMPAT | |
3271 | +asmlinkage long | |
3272 | +sys_perfmonctl (int fd, int cmd, void __user *arg, int count) | |
3273 | +{ | |
3274 | + return -ENOSYS; | |
3275 | +} | |
3276 | +#endif | |
3277 | --- a/arch/ia64/lib/Makefile | |
3278 | +++ b/arch/ia64/lib/Makefile | |
3279 | @@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3 | |
3280 | ||
3281 | obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o | |
3282 | obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o | |
3283 | -lib-$(CONFIG_PERFMON) += carta_random.o | |
3284 | ||
3285 | AFLAGS___divdi3.o = | |
3286 | AFLAGS___udivdi3.o = -DUNSIGNED | |
3287 | --- a/arch/ia64/oprofile/init.c | |
3288 | +++ b/arch/ia64/oprofile/init.c | |
3289 | @@ -12,8 +12,8 @@ | |
3290 | #include <linux/init.h> | |
3291 | #include <linux/errno.h> | |
3292 | ||
3293 | -extern int perfmon_init(struct oprofile_operations * ops); | |
3294 | -extern void perfmon_exit(void); | |
3295 | +extern int op_perfmon_init(struct oprofile_operations * ops); | |
3296 | +extern void op_perfmon_exit(void); | |
3297 | extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); | |
3298 | ||
3299 | int __init oprofile_arch_init(struct oprofile_operations * ops) | |
3300 | @@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct opr | |
3301 | ||
3302 | #ifdef CONFIG_PERFMON | |
3303 | /* perfmon_init() can fail, but we have no way to report it */ | |
3304 | - ret = perfmon_init(ops); | |
3305 | + ret = op_perfmon_init(ops); | |
3306 | #endif | |
3307 | ops->backtrace = ia64_backtrace; | |
3308 | ||
3309 | @@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct opr | |
3310 | void oprofile_arch_exit(void) | |
3311 | { | |
3312 | #ifdef CONFIG_PERFMON | |
3313 | - perfmon_exit(); | |
3314 | + op_perfmon_exit(); | |
3315 | #endif | |
3316 | } | |
3317 | --- a/arch/ia64/oprofile/perfmon.c | |
3318 | +++ b/arch/ia64/oprofile/perfmon.c | |
3319 | @@ -10,25 +10,30 @@ | |
3320 | #include <linux/kernel.h> | |
3321 | #include <linux/oprofile.h> | |
3322 | #include <linux/sched.h> | |
3323 | -#include <asm/perfmon.h> | |
3324 | +#include <linux/module.h> | |
3325 | +#include <linux/perfmon_kern.h> | |
3326 | #include <asm/ptrace.h> | |
3327 | #include <asm/errno.h> | |
3328 | ||
3329 | static int allow_ints; | |
3330 | ||
3331 | static int | |
3332 | -perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, | |
3333 | - struct pt_regs *regs, unsigned long stamp) | |
3334 | +perfmon_handler(struct pfm_context *ctx, | |
3335 | + unsigned long ip, u64 stamp, void *data) | |
3336 | { | |
3337 | - int event = arg->pmd_eventid; | |
3338 | + struct pt_regs *regs; | |
3339 | + struct pfm_ovfl_arg *arg; | |
3340 | + | |
3341 | + regs = data; | |
3342 | + arg = &ctx->ovfl_arg; | |
3343 | ||
3344 | - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; | |
3345 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
3346 | ||
3347 | /* the owner of the oprofile event buffer may have exited | |
3348 | * without perfmon being shutdown (e.g. SIGSEGV) | |
3349 | */ | |
3350 | if (allow_ints) | |
3351 | - oprofile_add_sample(regs, event); | |
3352 | + oprofile_add_sample(regs, arg->pmd_eventid); | |
3353 | return 0; | |
3354 | } | |
3355 | ||
3356 | @@ -45,17 +50,13 @@ static void perfmon_stop(void) | |
3357 | allow_ints = 0; | |
3358 | } | |
3359 | ||
3360 | - | |
3361 | -#define OPROFILE_FMT_UUID { \ | |
3362 | - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } | |
3363 | - | |
3364 | -static pfm_buffer_fmt_t oprofile_fmt = { | |
3365 | - .fmt_name = "oprofile_format", | |
3366 | - .fmt_uuid = OPROFILE_FMT_UUID, | |
3367 | - .fmt_handler = perfmon_handler, | |
3368 | +static struct pfm_smpl_fmt oprofile_fmt = { | |
3369 | + .fmt_name = "OProfile", | |
3370 | + .fmt_handler = perfmon_handler, | |
3371 | + .fmt_flags = PFM_FMT_BUILTIN_FLAG, | |
3372 | + .owner = THIS_MODULE | |
3373 | }; | |
3374 | ||
3375 | - | |
3376 | static char * get_cpu_type(void) | |
3377 | { | |
3378 | __u8 family = local_cpu_data->family; | |
3379 | @@ -75,9 +76,9 @@ static char * get_cpu_type(void) | |
3380 | ||
3381 | static int using_perfmon; | |
3382 | ||
3383 | -int perfmon_init(struct oprofile_operations * ops) | |
3384 | +int __init op_perfmon_init(struct oprofile_operations * ops) | |
3385 | { | |
3386 | - int ret = pfm_register_buffer_fmt(&oprofile_fmt); | |
3387 | + int ret = pfm_fmt_register(&oprofile_fmt); | |
3388 | if (ret) | |
3389 | return -ENODEV; | |
3390 | ||
3391 | @@ -90,10 +91,10 @@ int perfmon_init(struct oprofile_operati | |
3392 | } | |
3393 | ||
3394 | ||
3395 | -void perfmon_exit(void) | |
3396 | +void op_perfmon_exit(void) | |
3397 | { | |
3398 | if (!using_perfmon) | |
3399 | return; | |
3400 | ||
3401 | - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); | |
3402 | + pfm_fmt_unregister(&oprofile_fmt); | |
3403 | } | |
3404 | --- /dev/null | |
3405 | +++ b/arch/ia64/perfmon/Kconfig | |
3406 | @@ -0,0 +1,67 @@ | |
3407 | +menu "Hardware Performance Monitoring support" | |
3408 | +config PERFMON | |
3409 | + bool "Perfmon2 performance monitoring interface" | |
3410 | + default n | |
3411 | + help | |
3412 | + Enables the perfmon2 interface to access the hardware | |
3413 | + performance counters. See <http://perfmon2.sf.net/> for | |
3414 | + more details. | |
3415 | + | |
3416 | +config PERFMON_DEBUG | |
3417 | + bool "Perfmon debugging" | |
3418 | + default n | |
3419 | + depends on PERFMON | |
3420 | + help | |
3421 | + Enables perfmon debugging support | |
3422 | + | |
3423 | +config PERFMON_DEBUG_FS | |
3424 | + bool "Enable perfmon statistics reporting via debugfs" | |
3425 | + default y | |
3426 | + depends on PERFMON && DEBUG_FS | |
3427 | + help | |
3428 | + Enable collection and reporting of perfmon timing statistics under | |
3429 | + debugfs. This is used for debugging and performance analysis of the | |
3430 | + subsystem. The debugfs filesystem must be mounted. | |
3431 | + | |
3432 | +config IA64_PERFMON_COMPAT | |
3433 | + bool "Enable old perfmon-2 compatbility mode" | |
3434 | + default n | |
3435 | + depends on PERFMON | |
3436 | + help | |
3437 | + Enable this option to allow performance tools which used the old | |
3438 | + perfmon-2 interface to continue to work. Old tools are those using | |
3439 | + the obsolete commands and arguments. Check your programs and look | |
3440 | + in include/asm-ia64/perfmon_compat.h for more information. | |
3441 | + | |
3442 | +config IA64_PERFMON_GENERIC | |
3443 | + tristate "Generic IA-64 PMU support" | |
3444 | + depends on PERFMON | |
3445 | + default n | |
3446 | + help | |
3447 | + Enables generic IA-64 PMU support. | |
3448 | + The generic PMU is defined by the IA-64 architecture document. | |
3449 | + This option should only be necessary when running with a PMU that | |
3450 | + is not yet explicitely supported. Even then, there is no guarantee | |
3451 | + that this support will work. | |
3452 | + | |
3453 | +config IA64_PERFMON_ITANIUM | |
3454 | + tristate "Itanium (Merced) Performance Monitoring support" | |
3455 | + depends on PERFMON | |
3456 | + default n | |
3457 | + help | |
3458 | + Enables Itanium (Merced) PMU support. | |
3459 | + | |
3460 | +config IA64_PERFMON_MCKINLEY | |
3461 | + tristate "Itanium 2 (McKinley) Performance Monitoring support" | |
3462 | + depends on PERFMON | |
3463 | + default n | |
3464 | + help | |
3465 | + Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support. | |
3466 | + | |
3467 | +config IA64_PERFMON_MONTECITO | |
3468 | + tristate "Itanium 2 9000 (Montecito) Performance Monitoring support" | |
3469 | + depends on PERFMON | |
3470 | + default n | |
3471 | + help | |
3472 | + Enables support for Itanium 2 9000 (Montecito) PMU. | |
3473 | +endmenu | |
3474 | --- /dev/null | |
3475 | +++ b/arch/ia64/perfmon/Makefile | |
3476 | @@ -0,0 +1,11 @@ | |
3477 | +# | |
3478 | +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
3479 | +# Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
3480 | +# | |
3481 | +obj-$(CONFIG_PERFMON) += perfmon.o | |
3482 | +obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \ | |
3483 | + perfmon_compat.o | |
3484 | +obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o | |
3485 | +obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o | |
3486 | +obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o | |
3487 | +obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o | |
3488 | --- /dev/null | |
3489 | +++ b/arch/ia64/perfmon/perfmon.c | |
3490 | @@ -0,0 +1,946 @@ | |
3491 | +/* | |
3492 | + * This file implements the IA-64 specific | |
3493 | + * support for the perfmon2 interface | |
3494 | + * | |
3495 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
3496 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
3497 | + * | |
3498 | + * This program is free software; you can redistribute it and/or | |
3499 | + * modify it under the terms of version 2 of the GNU General Public | |
3500 | + * License as published by the Free Software Foundation. | |
3501 | + * | |
3502 | + * This program is distributed in the hope that it will be useful, | |
3503 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
3504 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
3505 | + * General Public License for more details. | |
3506 | + * | |
3507 | + * You should have received a copy of the GNU General Public License | |
3508 | + * along with this program; if not, write to the Free Software | |
3509 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
3510 | + * 02111-1307 USA | |
3511 | + */ | |
3512 | +#include <linux/module.h> | |
3513 | +#include <linux/perfmon_kern.h> | |
3514 | + | |
3515 | +struct pfm_arch_session { | |
3516 | + u32 pfs_sys_use_dbr; /* syswide session uses dbr */ | |
3517 | + u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/ | |
3518 | +}; | |
3519 | + | |
3520 | +DEFINE_PER_CPU(u32, pfm_syst_info); | |
3521 | + | |
3522 | +static struct pfm_arch_session pfm_arch_sessions; | |
3523 | +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock); | |
3524 | + | |
3525 | +static inline void pfm_clear_psr_pp(void) | |
3526 | +{ | |
3527 | + ia64_rsm(IA64_PSR_PP); | |
3528 | +} | |
3529 | + | |
3530 | +static inline void pfm_set_psr_pp(void) | |
3531 | +{ | |
3532 | + ia64_ssm(IA64_PSR_PP); | |
3533 | +} | |
3534 | + | |
3535 | +static inline void pfm_clear_psr_up(void) | |
3536 | +{ | |
3537 | + ia64_rsm(IA64_PSR_UP); | |
3538 | +} | |
3539 | + | |
3540 | +static inline void pfm_set_psr_up(void) | |
3541 | +{ | |
3542 | + ia64_ssm(IA64_PSR_UP); | |
3543 | +} | |
3544 | + | |
3545 | +static inline void pfm_set_psr_l(u64 val) | |
3546 | +{ | |
3547 | + ia64_setreg(_IA64_REG_PSR_L, val); | |
3548 | +} | |
3549 | + | |
3550 | +static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs) | |
3551 | +{ | |
3552 | + unsigned int i; | |
3553 | + | |
3554 | + for (i = 0; i < nibrs; i++) { | |
3555 | + ia64_set_ibr(i, ibrs[i]); | |
3556 | + ia64_dv_serialize_instruction(); | |
3557 | + } | |
3558 | + ia64_srlz_i(); | |
3559 | +} | |
3560 | + | |
3561 | +static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs) | |
3562 | +{ | |
3563 | + unsigned int i; | |
3564 | + | |
3565 | + for (i = 0; i < ndbrs; i++) { | |
3566 | + ia64_set_dbr(i, dbrs[i]); | |
3567 | + ia64_dv_serialize_data(); | |
3568 | + } | |
3569 | + ia64_srlz_d(); | |
3570 | +} | |
3571 | + | |
3572 | +irqreturn_t pmu_interrupt_handler(int irq, void *arg) | |
3573 | +{ | |
3574 | + struct pt_regs *regs; | |
3575 | + regs = get_irq_regs(); | |
3576 | + irq_enter(); | |
3577 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
3578 | + irq_exit(); | |
3579 | + return IRQ_HANDLED; | |
3580 | +} | |
3581 | +static struct irqaction perfmon_irqaction = { | |
3582 | + .handler = pmu_interrupt_handler, | |
3583 | + .flags = IRQF_DISABLED, /* means keep interrupts masked */ | |
3584 | + .name = "perfmon" | |
3585 | +}; | |
3586 | + | |
3587 | +void pfm_arch_quiesce_pmu_percpu(void) | |
3588 | +{ | |
3589 | + u64 dcr; | |
3590 | + /* | |
3591 | + * make sure no measurement is active | |
3592 | + * (may inherit programmed PMCs from EFI). | |
3593 | + */ | |
3594 | + pfm_clear_psr_pp(); | |
3595 | + pfm_clear_psr_up(); | |
3596 | + | |
3597 | + /* | |
3598 | + * ensure dcr.pp is cleared | |
3599 | + */ | |
3600 | + dcr = ia64_getreg(_IA64_REG_CR_DCR); | |
3601 | + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); | |
3602 | + | |
3603 | + /* | |
3604 | + * we run with the PMU not frozen at all times | |
3605 | + */ | |
3606 | + ia64_set_pmc(0, 0); | |
3607 | + ia64_srlz_d(); | |
3608 | +} | |
3609 | + | |
3610 | +void pfm_arch_init_percpu(void) | |
3611 | +{ | |
3612 | + pfm_arch_quiesce_pmu_percpu(); | |
3613 | + /* | |
3614 | + * program PMU interrupt vector | |
3615 | + */ | |
3616 | + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); | |
3617 | + ia64_srlz_d(); | |
3618 | +} | |
3619 | + | |
3620 | +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags) | |
3621 | +{ | |
3622 | + struct pfm_arch_context *ctx_arch; | |
3623 | + | |
3624 | + ctx_arch = pfm_ctx_arch(ctx); | |
3625 | + | |
3626 | + ctx_arch->flags.use_dbr = 0; | |
3627 | + ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0; | |
3628 | + | |
3629 | + PFM_DBG("insecure=%d", ctx_arch->flags.insecure); | |
3630 | + | |
3631 | + return 0; | |
3632 | +} | |
3633 | + | |
3634 | +/* | |
3635 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
3636 | + * Context is locked. Interrupts are masked. Monitoring may be active. | |
3637 | + * PMU access is guaranteed. PMC and PMD registers are live in PMU. | |
3638 | + * | |
3639 | + * Return: | |
3640 | + * non-zero : did not save PMDs (as part of stopping the PMU) | |
3641 | + * 0 : saved PMDs (no need to save them in caller) | |
3642 | + */ | |
3643 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) | |
3644 | +{ | |
3645 | + struct pfm_arch_context *ctx_arch; | |
3646 | + struct pfm_event_set *set; | |
3647 | + u64 psr, tmp; | |
3648 | + | |
3649 | + ctx_arch = pfm_ctx_arch(ctx); | |
3650 | + set = ctx->active_set; | |
3651 | + | |
3652 | + /* | |
3653 | + * save current PSR: needed because we modify it | |
3654 | + */ | |
3655 | + ia64_srlz_d(); | |
3656 | + psr = ia64_getreg(_IA64_REG_PSR); | |
3657 | + | |
3658 | + /* | |
3659 | + * stop monitoring: | |
3660 | + * This is the last instruction which may generate an overflow | |
3661 | + * | |
3662 | + * we do not clear ipsr.up | |
3663 | + */ | |
3664 | + pfm_clear_psr_up(); | |
3665 | + ia64_srlz_d(); | |
3666 | + | |
3667 | + /* | |
3668 | + * extract overflow status bits | |
3669 | + */ | |
3670 | + tmp = ia64_get_pmc(0) & ~0xf; | |
3671 | + | |
3672 | + /* | |
3673 | + * keep a copy of psr.up (for reload) | |
3674 | + */ | |
3675 | + ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP; | |
3676 | + | |
3677 | + /* | |
3678 | + * save overflow status bits | |
3679 | + */ | |
3680 | + set->povfl_pmds[0] = tmp; | |
3681 | + | |
3682 | + /* | |
3683 | + * record how many pending overflows | |
3684 | + * XXX: assume identity mapping for counters | |
3685 | + */ | |
3686 | + set->npend_ovfls = ia64_popcnt(tmp); | |
3687 | + | |
3688 | + /* | |
3689 | + * make sure the PMU is unfrozen for the next task | |
3690 | + */ | |
3691 | + if (set->npend_ovfls) { | |
3692 | + ia64_set_pmc(0, 0); | |
3693 | + ia64_srlz_d(); | |
3694 | + } | |
3695 | + return 1; | |
3696 | +} | |
3697 | + | |
3698 | +/* | |
3699 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
3700 | + * set cannot be NULL. Context is locked. Interrupts are masked. | |
3701 | + * Caller has already restored all PMD and PMC registers. | |
3702 | + * | |
3703 | + * must reactivate monitoring | |
3704 | + */ | |
3705 | +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) | |
3706 | +{ | |
3707 | + struct pfm_arch_context *ctx_arch; | |
3708 | + | |
3709 | + ctx_arch = pfm_ctx_arch(ctx); | |
3710 | + | |
3711 | + /* | |
3712 | + * when monitoring is not explicitly started | |
3713 | + * then psr_up = 0, in which case we do not | |
3714 | + * need to restore | |
3715 | + */ | |
3716 | + if (likely(ctx_arch->ctx_saved_psr_up)) { | |
3717 | + pfm_set_psr_up(); | |
3718 | + ia64_srlz_d(); | |
3719 | + } | |
3720 | +} | |
3721 | + | |
3722 | +int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu) | |
3723 | +{ | |
3724 | + struct pfm_arch_context *ctx_arch; | |
3725 | + int is_system; | |
3726 | + int ret = 0; | |
3727 | + | |
3728 | + ctx_arch = pfm_ctx_arch(ctx); | |
3729 | + is_system = ctx->flags.system; | |
3730 | + | |
3731 | + spin_lock(&pfm_arch_sessions_lock); | |
3732 | + | |
3733 | + if (is_system && ctx_arch->flags.use_dbr) { | |
3734 | + PFM_DBG("syswide context uses dbregs"); | |
3735 | + | |
3736 | + if (pfm_arch_sessions.pfs_ptrace_use_dbr) { | |
3737 | + PFM_DBG("cannot reserve syswide context: " | |
3738 | + "dbregs in use by ptrace"); | |
3739 | + ret = -EBUSY; | |
3740 | + } else { | |
3741 | + pfm_arch_sessions.pfs_sys_use_dbr++; | |
3742 | + } | |
3743 | + } | |
3744 | + spin_unlock(&pfm_arch_sessions_lock); | |
3745 | + | |
3746 | + return ret; | |
3747 | +} | |
3748 | + | |
3749 | +void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu) | |
3750 | +{ | |
3751 | + struct pfm_arch_context *ctx_arch; | |
3752 | + int is_system; | |
3753 | + | |
3754 | + ctx_arch = pfm_ctx_arch(ctx); | |
3755 | + is_system = ctx->flags.system; | |
3756 | + | |
3757 | + spin_lock(&pfm_arch_sessions_lock); | |
3758 | + | |
3759 | + if (is_system && ctx_arch->flags.use_dbr) | |
3760 | + pfm_arch_sessions.pfs_sys_use_dbr--; | |
3761 | + spin_unlock(&pfm_arch_sessions_lock); | |
3762 | +} | |
3763 | + | |
3764 | +/* | |
3765 | + * function called from pfm_load_context_*(). Task is not guaranteed to be | |
3766 | + * current task. If not then other task is guaranteed stopped and off any CPU. | |
3767 | + * context is locked and interrupts are masked. | |
3768 | + * | |
3769 | + * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped. | |
3770 | + * | |
3771 | + * For system-wide task is NULL | |
3772 | + */ | |
3773 | +int pfm_arch_load_context(struct pfm_context *ctx) | |
3774 | +{ | |
3775 | + struct pfm_arch_context *ctx_arch; | |
3776 | + struct pt_regs *regs; | |
3777 | + int ret = 0; | |
3778 | + | |
3779 | + ctx_arch = pfm_ctx_arch(ctx); | |
3780 | + | |
3781 | + /* | |
3782 | + * cannot load a context which is using range restrictions, | |
3783 | + * into a thread that is being debugged. | |
3784 | + * | |
3785 | + * if one set out of several is using the debug registers, then | |
3786 | + * we assume the context as whole is using them. | |
3787 | + */ | |
3788 | + if (ctx_arch->flags.use_dbr) { | |
3789 | + if (ctx->flags.system) { | |
3790 | + spin_lock(&pfm_arch_sessions_lock); | |
3791 | + | |
3792 | + if (pfm_arch_sessions.pfs_ptrace_use_dbr) { | |
3793 | + PFM_DBG("cannot reserve syswide context: " | |
3794 | + "dbregs in use by ptrace"); | |
3795 | + ret = -EBUSY; | |
3796 | + } else { | |
3797 | + pfm_arch_sessions.pfs_sys_use_dbr++; | |
3798 | + PFM_DBG("pfs_sys_use_dbr=%u", | |
3799 | + pfm_arch_sessions.pfs_sys_use_dbr); | |
3800 | + } | |
3801 | + spin_unlock(&pfm_arch_sessions_lock); | |
3802 | + | |
3803 | + } else if (ctx->task->thread.flags & IA64_THREAD_DBG_VALID) { | |
3804 | + PFM_DBG("load_pid [%d] thread is debugged, cannot " | |
3805 | + "use range restrictions", ctx->task->pid); | |
3806 | + ret = -EBUSY; | |
3807 | + } | |
3808 | + if (ret) | |
3809 | + return ret; | |
3810 | + } | |
3811 | + | |
3812 | + /* | |
3813 | + * We need to intervene on context switch to toggle the | |
3814 | + * psr.pp bit in system-wide. As such, we set the TIF | |
3815 | + * flag so that pfm_arch_ctxswout_sys() and the | |
3816 | + * pfm_arch_ctxswin_sys() functions get called | |
3817 | + * from pfm_ctxsw_sys(); | |
3818 | + */ | |
3819 | + if (ctx->flags.system) { | |
3820 | + set_thread_flag(TIF_PERFMON_CTXSW); | |
3821 | + PFM_DBG("[%d] set TIF", current->pid); | |
3822 | + return 0; | |
3823 | + } | |
3824 | + | |
3825 | + regs = task_pt_regs(ctx->task); | |
3826 | + | |
3827 | + /* | |
3828 | + * self-monitoring systematically allows user level control | |
3829 | + */ | |
3830 | + if (ctx->task != current) { | |
3831 | + /* | |
3832 | + * when not current, task is stopped, so this is safe | |
3833 | + */ | |
3834 | + ctx_arch->ctx_saved_psr_up = 0; | |
3835 | + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; | |
3836 | + } else | |
3837 | + ctx_arch->flags.insecure = 1; | |
3838 | + | |
3839 | + /* | |
3840 | + * allow user level control (start/stop/read pmd) if: | |
3841 | + * - self-monitoring | |
3842 | + * - requested at context creation (PFM_IA64_FL_INSECURE) | |
3843 | + * | |
3844 | + * There is not security hole with PFM_IA64_FL_INSECURE because | |
3845 | + * when not self-monitored, the caller must have permissions to | |
3846 | + * attached to the task. | |
3847 | + */ | |
3848 | + if (ctx_arch->flags.insecure) { | |
3849 | + ia64_psr(regs)->sp = 0; | |
3850 | + PFM_DBG("clearing psr.sp for [%d]", ctx->task->pid); | |
3851 | + } | |
3852 | + return 0; | |
3853 | +} | |
3854 | + | |
3855 | +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) | |
3856 | +{ | |
3857 | +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH) | |
3858 | +#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\ | |
3859 | + PFM_ITA_SETFL_EXCL_INTR) | |
3860 | + | |
3861 | +/* exclude return value field */ | |
3862 | +#define PFM_SETFL_ALL_MASK (PFM_ITA_SETFL_BOTH_INTR \ | |
3863 | + | PFM_SETFL_BOTH_SWITCH \ | |
3864 | + | PFM_ITA_SETFL_IDLE_EXCL) | |
3865 | + | |
3866 | + if ((flags & ~PFM_SETFL_ALL_MASK)) { | |
3867 | + PFM_DBG("invalid flags=0x%x", flags); | |
3868 | + return -EINVAL; | |
3869 | + } | |
3870 | + | |
3871 | + if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) { | |
3872 | + PFM_DBG("both excl intr and ontr only are set"); | |
3873 | + return -EINVAL; | |
3874 | + } | |
3875 | + | |
3876 | + if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) { | |
3877 | + PFM_DBG("idle exclude flag only for system-wide context"); | |
3878 | + return -EINVAL; | |
3879 | + } | |
3880 | + return 0; | |
3881 | +} | |
3882 | + | |
3883 | +/* | |
3884 | + * function called from pfm_unload_context_*(). Context is locked. | |
3885 | + * interrupts are masked. task is not guaranteed to be current task. | |
3886 | + * Access to PMU is not guaranteed. | |
3887 | + * | |
3888 | + * function must do whatever arch-specific action is required on unload | |
3889 | + * of a context. | |
3890 | + * | |
3891 | + * called for both system-wide and per-thread. task is NULL for ssytem-wide | |
3892 | + */ | |
3893 | +void pfm_arch_unload_context(struct pfm_context *ctx) | |
3894 | +{ | |
3895 | + struct pfm_arch_context *ctx_arch; | |
3896 | + struct pt_regs *regs; | |
3897 | + | |
3898 | + ctx_arch = pfm_ctx_arch(ctx); | |
3899 | + | |
3900 | + if (ctx->flags.system) { | |
3901 | + /* | |
3902 | + * disable context switch hook | |
3903 | + */ | |
3904 | + clear_thread_flag(TIF_PERFMON_CTXSW); | |
3905 | + | |
3906 | + if (ctx_arch->flags.use_dbr) { | |
3907 | + spin_lock(&pfm_arch_sessions_lock); | |
3908 | + pfm_arch_sessions.pfs_sys_use_dbr--; | |
3909 | + PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr); | |
3910 | + spin_unlock(&pfm_arch_sessions_lock); | |
3911 | + } | |
3912 | + } else { | |
3913 | + regs = task_pt_regs(ctx->task); | |
3914 | + | |
3915 | + /* | |
3916 | + * cancel user level control for per-task context | |
3917 | + */ | |
3918 | + ia64_psr(regs)->sp = 1; | |
3919 | + PFM_DBG("setting psr.sp for [%d]", ctx->task->pid); | |
3920 | + } | |
3921 | +} | |
3922 | + | |
3923 | +/* | |
3924 | + * mask monitoring by setting the privilege level to 0 | |
3925 | + * we cannot use psr.pp/psr.up for this, it is controlled by | |
3926 | + * the user | |
3927 | + */ | |
3928 | +void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) | |
3929 | +{ | |
3930 | + struct pfm_arch_pmu_info *arch_info; | |
3931 | + unsigned long mask; | |
3932 | + unsigned int i; | |
3933 | + | |
3934 | + arch_info = pfm_pmu_info(); | |
3935 | + /* | |
3936 | + * as an optimization we look at the first 64 PMC | |
3937 | + * registers only starting at PMC4. | |
3938 | + */ | |
3939 | + mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; | |
3940 | + for (i = PFM_ITA_FCNTR; mask; i++, mask >>= 1) { | |
3941 | + if (likely(mask & 0x1)) | |
3942 | + ia64_set_pmc(i, set->pmcs[i] & ~0xfUL); | |
3943 | + } | |
3944 | + /* | |
3945 | + * make changes visisble | |
3946 | + */ | |
3947 | + ia64_srlz_d(); | |
3948 | +} | |
3949 | + | |
3950 | +/* | |
3951 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
3952 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
3953 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
3954 | + * Access to the PMU is guaranteed. | |
3955 | + * | |
3956 | + * function must restore all PMD registers from set. | |
3957 | + */ | |
3958 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
3959 | +{ | |
3960 | + struct pfm_arch_context *ctx_arch; | |
3961 | + unsigned long *mask; | |
3962 | + u16 i, num; | |
3963 | + | |
3964 | + ctx_arch = pfm_ctx_arch(ctx); | |
3965 | + | |
3966 | + if (ctx_arch->flags.insecure) { | |
3967 | + num = ctx->regs.num_rw_pmd; | |
3968 | + mask = ctx->regs.rw_pmds; | |
3969 | + } else { | |
3970 | + num = set->nused_pmds; | |
3971 | + mask = set->used_pmds; | |
3972 | + } | |
3973 | + /* | |
3974 | + * must restore all implemented read-write PMDS to avoid leaking | |
3975 | + * information especially when PFM_IA64_FL_INSECURE is set. | |
3976 | + * | |
3977 | + * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead | |
3978 | + */ | |
3979 | + for (i = 0; num; i++) { | |
3980 | + if (likely(test_bit(i, mask))) { | |
3981 | + pfm_arch_write_pmd(ctx, i, set->pmds[i].value); | |
3982 | + num--; | |
3983 | + } | |
3984 | + } | |
3985 | + ia64_srlz_d(); | |
3986 | +} | |
3987 | + | |
3988 | +/* | |
3989 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
3990 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
3991 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
3992 | + * Access to the PMU is guaranteed. | |
3993 | + * | |
3994 | + * function must restore all PMC registers from set if needed | |
3995 | + */ | |
3996 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
3997 | +{ | |
3998 | + struct pfm_arch_pmu_info *arch_info; | |
3999 | + u64 mask2 = 0, val, plm; | |
4000 | + unsigned long impl_mask, mask_pmcs; | |
4001 | + unsigned int i; | |
4002 | + | |
4003 | + arch_info = pfm_pmu_info(); | |
4004 | + /* | |
4005 | + * as an optimization we only look at the first 64 | |
4006 | + * PMC registers. In fact, we should never scan the | |
4007 | + * entire impl_pmcs because ibr/dbr are implemented | |
4008 | + * separately. | |
4009 | + * | |
4010 | + * always skip PMC0-PMC3. PMC0 taken care of when saving | |
4011 | + * state. PMC1-PMC3 not used until we get counters in | |
4012 | + * the 60 and above index range. | |
4013 | + */ | |
4014 | + impl_mask = ctx->regs.pmcs[0] >> PFM_ITA_FCNTR; | |
4015 | + mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; | |
4016 | + plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0; | |
4017 | + | |
4018 | + for (i = PFM_ITA_FCNTR; | |
4019 | + impl_mask; | |
4020 | + i++, impl_mask >>= 1, mask_pmcs >>= 1) { | |
4021 | + if (likely(impl_mask & 0x1)) { | |
4022 | + mask2 = mask_pmcs & 0x1 ? plm : ~0; | |
4023 | + val = set->pmcs[i] & mask2; | |
4024 | + ia64_set_pmc(i, val); | |
4025 | + PFM_DBG_ovfl("pmc%u=0x%lx", i, val); | |
4026 | + } | |
4027 | + } | |
4028 | + /* | |
4029 | + * restore DBR/IBR | |
4030 | + */ | |
4031 | + if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) { | |
4032 | + pfm_restore_ibrs(set->pmcs+256, 8); | |
4033 | + pfm_restore_dbrs(set->pmcs+264, 8); | |
4034 | + } | |
4035 | + ia64_srlz_d(); | |
4036 | +} | |
4037 | + | |
4038 | +void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) | |
4039 | +{ | |
4040 | + u64 psr; | |
4041 | + int is_system; | |
4042 | + | |
4043 | + is_system = ctx->flags.system; | |
4044 | + | |
4045 | + psr = ia64_getreg(_IA64_REG_PSR); | |
4046 | + | |
4047 | + /* | |
4048 | + * monitoring is masked via the PMC.plm | |
4049 | + * | |
4050 | + * As we restore their value, we do not want each counter to | |
4051 | + * restart right away. We stop monitoring using the PSR, | |
4052 | + * restore the PMC (and PMD) and then re-establish the psr | |
4053 | + * as it was. Note that there can be no pending overflow at | |
4054 | + * this point, because monitoring is still MASKED. | |
4055 | + * | |
4056 | + * Because interrupts are masked we can avoid changing | |
4057 | + * DCR.pp. | |
4058 | + */ | |
4059 | + if (is_system) | |
4060 | + pfm_clear_psr_pp(); | |
4061 | + else | |
4062 | + pfm_clear_psr_up(); | |
4063 | + | |
4064 | + ia64_srlz_d(); | |
4065 | + | |
4066 | + pfm_arch_restore_pmcs(ctx, set); | |
4067 | + | |
4068 | + /* | |
4069 | + * restore psr | |
4070 | + * | |
4071 | + * monitoring may start right now but interrupts | |
4072 | + * are still masked | |
4073 | + */ | |
4074 | + pfm_set_psr_l(psr); | |
4075 | + ia64_srlz_d(); | |
4076 | +} | |
4077 | + | |
4078 | +/* | |
4079 | + * Called from pfm_stop() | |
4080 | + * | |
4081 | + * For per-thread: | |
4082 | + * task is not necessarily current. If not current task, then | |
4083 | + * task is guaranteed stopped and off any cpu. Access to PMU | |
4084 | + * is not guaranteed. Interrupts are masked. Context is locked. | |
4085 | + * Set is the active set. | |
4086 | + * | |
4087 | + * must disable active monitoring. ctx cannot be NULL | |
4088 | + */ | |
4089 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) | |
4090 | +{ | |
4091 | + struct pfm_arch_context *ctx_arch; | |
4092 | + struct pt_regs *regs; | |
4093 | + u64 dcr, psr; | |
4094 | + | |
4095 | + ctx_arch = pfm_ctx_arch(ctx); | |
4096 | + regs = task_pt_regs(task); | |
4097 | + | |
4098 | + if (!ctx->flags.system) { | |
4099 | + /* | |
4100 | + * in ZOMBIE state we always have task == current due to | |
4101 | + * pfm_exit_thread() | |
4102 | + */ | |
4103 | + ia64_psr(regs)->up = 0; | |
4104 | + ctx_arch->ctx_saved_psr_up = 0; | |
4105 | + | |
4106 | + /* | |
4107 | + * in case of ZOMBIE state, there is no unload to clear | |
4108 | + * insecure monitoring, so we do it in stop instead. | |
4109 | + */ | |
4110 | + if (ctx->state == PFM_CTX_ZOMBIE) | |
4111 | + ia64_psr(regs)->sp = 1; | |
4112 | + | |
4113 | + if (task == current) { | |
4114 | + pfm_clear_psr_up(); | |
4115 | + ia64_srlz_d(); | |
4116 | + } | |
4117 | + } else if (ctx->flags.started) { /* do not stop twice */ | |
4118 | + dcr = ia64_getreg(_IA64_REG_CR_DCR); | |
4119 | + psr = ia64_getreg(_IA64_REG_PSR); | |
4120 | + | |
4121 | + ia64_psr(regs)->pp = 0; | |
4122 | + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); | |
4123 | + pfm_clear_psr_pp(); | |
4124 | + ia64_srlz_d(); | |
4125 | + | |
4126 | + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) { | |
4127 | + PFM_DBG("disabling idle exclude"); | |
4128 | + __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL; | |
4129 | + } | |
4130 | + } | |
4131 | +} | |
4132 | + | |
4133 | +/* | |
4134 | + * called from pfm_start() | |
4135 | + * | |
4136 | + * Interrupts are masked. Context is locked. Set is the active set. | |
4137 | + * | |
4138 | + * For per-thread: | |
4139 | + * Task is not necessarily current. If not current task, then task | |
4140 | + * is guaranteed stopped and off any cpu. No access to PMU is task | |
4141 | + * is not current. | |
4142 | + * | |
4143 | + * For system-wide: | |
4144 | + * task is always current | |
4145 | + * | |
4146 | + * must enable active monitoring. | |
4147 | + */ | |
4148 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) | |
4149 | +{ | |
4150 | + struct pfm_arch_context *ctx_arch; | |
4151 | + struct pt_regs *regs; | |
4152 | + u64 dcr, dcr_pp, psr_pp; | |
4153 | + u32 flags; | |
4154 | + | |
4155 | + ctx_arch = pfm_ctx_arch(ctx); | |
4156 | + regs = task_pt_regs(task); | |
4157 | + flags = ctx->active_set->flags; | |
4158 | + | |
4159 | + /* | |
4160 | + * per-thread mode | |
4161 | + */ | |
4162 | + if (!ctx->flags.system) { | |
4163 | + | |
4164 | + ia64_psr(regs)->up = 1; | |
4165 | + | |
4166 | + if (task == current) { | |
4167 | + pfm_set_psr_up(); | |
4168 | + ia64_srlz_d(); | |
4169 | + } else { | |
4170 | + /* | |
4171 | + * activate monitoring at next ctxswin | |
4172 | + */ | |
4173 | + ctx_arch->ctx_saved_psr_up = IA64_PSR_UP; | |
4174 | + } | |
4175 | + return; | |
4176 | + } | |
4177 | + | |
4178 | + /* | |
4179 | + * system-wide mode | |
4180 | + */ | |
4181 | + dcr = ia64_getreg(_IA64_REG_CR_DCR); | |
4182 | + if (flags & PFM_ITA_SETFL_INTR_ONLY) { | |
4183 | + dcr_pp = 1; | |
4184 | + psr_pp = 0; | |
4185 | + } else if (flags & PFM_ITA_SETFL_EXCL_INTR) { | |
4186 | + dcr_pp = 0; | |
4187 | + psr_pp = 1; | |
4188 | + } else { | |
4189 | + dcr_pp = psr_pp = 1; | |
4190 | + } | |
4191 | + PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp); | |
4192 | + | |
4193 | + /* | |
4194 | + * update dcr_pp and psr_pp | |
4195 | + */ | |
4196 | + if (dcr_pp) | |
4197 | + ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP); | |
4198 | + else | |
4199 | + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); | |
4200 | + | |
4201 | + if (psr_pp) { | |
4202 | + pfm_set_psr_pp(); | |
4203 | + ia64_psr(regs)->pp = 1; | |
4204 | + } else { | |
4205 | + pfm_clear_psr_pp(); | |
4206 | + ia64_psr(regs)->pp = 0; | |
4207 | + } | |
4208 | + ia64_srlz_d(); | |
4209 | + | |
4210 | + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) { | |
4211 | + PFM_DBG("enable idle exclude"); | |
4212 | + __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL; | |
4213 | + } | |
4214 | +} | |
4215 | + | |
4216 | +/* | |
4217 | + * Only call this function when a process is trying to | |
4218 | + * write the debug registers (reading is always allowed) | |
4219 | + * called from arch/ia64/kernel/ptrace.c:access_uarea() | |
4220 | + */ | |
4221 | +int __pfm_use_dbregs(struct task_struct *task) | |
4222 | +{ | |
4223 | + struct pfm_arch_context *ctx_arch; | |
4224 | + struct pfm_context *ctx; | |
4225 | + unsigned long flags; | |
4226 | + int ret = 0; | |
4227 | + | |
4228 | + PFM_DBG("called for [%d]", task->pid); | |
4229 | + | |
4230 | + ctx = task->pfm_context; | |
4231 | + | |
4232 | + /* | |
4233 | + * do it only once | |
4234 | + */ | |
4235 | + if (task->thread.flags & IA64_THREAD_DBG_VALID) { | |
4236 | + PFM_DBG("IA64_THREAD_DBG_VALID already set"); | |
4237 | + return 0; | |
4238 | + } | |
4239 | + if (ctx) { | |
4240 | + spin_lock_irqsave(&ctx->lock, flags); | |
4241 | + ctx_arch = pfm_ctx_arch(ctx); | |
4242 | + | |
4243 | + if (ctx_arch->flags.use_dbr == 1) { | |
4244 | + PFM_DBG("PMU using dbregs already, no ptrace access"); | |
4245 | + ret = -1; | |
4246 | + } | |
4247 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
4248 | + if (ret) | |
4249 | + return ret; | |
4250 | + } | |
4251 | + | |
4252 | + spin_lock(&pfm_arch_sessions_lock); | |
4253 | + | |
4254 | + /* | |
4255 | + * We cannot allow setting breakpoints when system wide monitoring | |
4256 | + * sessions are using the debug registers. | |
4257 | + */ | |
4258 | + if (!pfm_arch_sessions.pfs_sys_use_dbr) | |
4259 | + pfm_arch_sessions.pfs_ptrace_use_dbr++; | |
4260 | + else | |
4261 | + ret = -1; | |
4262 | + | |
4263 | + PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d", | |
4264 | + pfm_arch_sessions.pfs_ptrace_use_dbr, | |
4265 | + pfm_arch_sessions.pfs_sys_use_dbr, | |
4266 | + task->pid, ret); | |
4267 | + | |
4268 | + spin_unlock(&pfm_arch_sessions_lock); | |
4269 | + if (ret) | |
4270 | + return ret; | |
4271 | +#ifndef CONFIG_SMP | |
4272 | + /* | |
4273 | + * in UP, we need to check whether the current | |
4274 | + * owner of the PMU is not using the debug registers | |
4275 | + * for monitoring. Because we are using a lazy | |
4276 | + * save on ctxswout, we must force a save in this | |
4277 | + * case because the debug registers are being | |
4278 | + * modified by another task. We save the current | |
4279 | + * PMD registers, and clear ownership. In ctxswin, | |
4280 | + * full state will be reloaded. | |
4281 | + * | |
4282 | + * Note: we overwrite task. | |
4283 | + */ | |
4284 | + task = __get_cpu_var(pmu_owner); | |
4285 | + ctx = __get_cpu_var(pmu_ctx); | |
4286 | + | |
4287 | + if (task == NULL) | |
4288 | + return 0; | |
4289 | + | |
4290 | + ctx_arch = pfm_ctx_arch(ctx); | |
4291 | + | |
4292 | + if (ctx_arch->flags.use_dbr) | |
4293 | + pfm_save_pmds_release(ctx); | |
4294 | +#endif | |
4295 | + return 0; | |
4296 | +} | |
4297 | + | |
4298 | +/* | |
4299 | + * This function is called for every task that exits with the | |
4300 | + * IA64_THREAD_DBG_VALID set. This indicates a task which was | |
4301 | + * able to use the debug registers for debugging purposes via | |
4302 | + * ptrace(). Therefore we know it was not using them for | |
4303 | + * perfmormance monitoring, so we only decrement the number | |
4304 | + * of "ptraced" debug register users to keep the count up to date | |
4305 | + */ | |
4306 | +int __pfm_release_dbregs(struct task_struct *task) | |
4307 | +{ | |
4308 | + int ret; | |
4309 | + | |
4310 | + spin_lock(&pfm_arch_sessions_lock); | |
4311 | + | |
4312 | + if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) { | |
4313 | + PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid); | |
4314 | + ret = -1; | |
4315 | + } else { | |
4316 | + pfm_arch_sessions.pfs_ptrace_use_dbr--; | |
4317 | + ret = 0; | |
4318 | + } | |
4319 | + spin_unlock(&pfm_arch_sessions_lock); | |
4320 | + | |
4321 | + return ret; | |
4322 | +} | |
4323 | + | |
4324 | +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx, | |
4325 | + struct pfm_event_set *set) | |
4326 | +{ | |
4327 | + struct pfm_arch_context *ctx_arch; | |
4328 | + struct task_struct *task; | |
4329 | + struct thread_struct *thread; | |
4330 | + int ret = 0, state; | |
4331 | + int i, can_access_pmu = 0; | |
4332 | + int is_loaded, is_system; | |
4333 | + | |
4334 | + ctx_arch = pfm_ctx_arch(ctx); | |
4335 | + state = ctx->state; | |
4336 | + task = ctx->task; | |
4337 | + is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED; | |
4338 | + is_system = ctx->flags.system; | |
4339 | + can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system; | |
4340 | + | |
4341 | + if (is_loaded == 0) | |
4342 | + goto done; | |
4343 | + | |
4344 | + if (is_system == 0) { | |
4345 | + thread = &(task->thread); | |
4346 | + | |
4347 | + /* | |
4348 | + * cannot use debug registers for montioring if they are | |
4349 | + * already used for debugging | |
4350 | + */ | |
4351 | + if (thread->flags & IA64_THREAD_DBG_VALID) { | |
4352 | + PFM_DBG("debug registers already in use for [%d]", | |
4353 | + task->pid); | |
4354 | + return -EBUSY; | |
4355 | + } | |
4356 | + } | |
4357 | + | |
4358 | + /* | |
4359 | + * check for debug registers in system wide mode | |
4360 | + */ | |
4361 | + spin_lock(&pfm_arch_sessions_lock); | |
4362 | + | |
4363 | + if (is_system) { | |
4364 | + if (pfm_arch_sessions.pfs_ptrace_use_dbr) | |
4365 | + ret = -EBUSY; | |
4366 | + else | |
4367 | + pfm_arch_sessions.pfs_sys_use_dbr++; | |
4368 | + } | |
4369 | + | |
4370 | + spin_unlock(&pfm_arch_sessions_lock); | |
4371 | + | |
4372 | + if (ret != 0) | |
4373 | + return ret; | |
4374 | + | |
4375 | + /* | |
4376 | + * clear hardware registers to make sure we don't | |
4377 | + * pick up stale state. | |
4378 | + */ | |
4379 | + if (can_access_pmu) { | |
4380 | + PFM_DBG("clearing ibrs, dbrs"); | |
4381 | + for (i = 0; i < 8; i++) { | |
4382 | + ia64_set_ibr(i, 0); | |
4383 | + ia64_dv_serialize_instruction(); | |
4384 | + } | |
4385 | + ia64_srlz_i(); | |
4386 | + for (i = 0; i < 8; i++) { | |
4387 | + ia64_set_dbr(i, 0); | |
4388 | + ia64_dv_serialize_data(); | |
4389 | + } | |
4390 | + ia64_srlz_d(); | |
4391 | + } | |
4392 | +done: | |
4393 | + /* | |
4394 | + * debug registers are now in use | |
4395 | + */ | |
4396 | + ctx_arch->flags.use_dbr = 1; | |
4397 | + set->priv_flags |= PFM_ITA_SETFL_USE_DBR; | |
4398 | + PFM_DBG("set%u use_dbr=1", set->id); | |
4399 | + return 0; | |
4400 | +} | |
4401 | +EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used); | |
4402 | + | |
4403 | +char *pfm_arch_get_pmu_module_name(void) | |
4404 | +{ | |
4405 | + switch (local_cpu_data->family) { | |
4406 | + case 0x07: | |
4407 | + return "perfmon_itanium"; | |
4408 | + case 0x1f: | |
4409 | + return "perfmon_mckinley"; | |
4410 | + case 0x20: | |
4411 | + return "perfmon_montecito"; | |
4412 | + default: | |
4413 | + return "perfmon_generic"; | |
4414 | + } | |
4415 | + return NULL; | |
4416 | +} | |
4417 | + | |
4418 | +/* | |
4419 | + * global arch-specific intialization, called only once | |
4420 | + */ | |
4421 | +int __init pfm_arch_init(void) | |
4422 | +{ | |
4423 | + int ret; | |
4424 | + | |
4425 | + spin_lock_init(&pfm_arch_sessions_lock); | |
4426 | + | |
4427 | +#ifdef CONFIG_IA64_PERFMON_COMPAT | |
4428 | + ret = pfm_ia64_compat_init(); | |
4429 | + if (ret) | |
4430 | + return ret; | |
4431 | +#endif | |
4432 | + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); | |
4433 | + | |
4434 | + | |
4435 | + return 0; | |
4436 | +} | |
4437 | --- /dev/null | |
4438 | +++ b/arch/ia64/perfmon/perfmon_compat.c | |
4439 | @@ -0,0 +1,1210 @@ | |
4440 | +/* | |
4441 | + * This file implements the IA-64 specific | |
4442 | + * support for the perfmon2 interface | |
4443 | + * | |
4444 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
4445 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
4446 | + * | |
4447 | + * This program is free software; you can redistribute it and/or | |
4448 | + * modify it under the terms of version 2 of the GNU General Public | |
4449 | + * License as published by the Free Software Foundation. | |
4450 | + * | |
4451 | + * This program is distributed in the hope that it will be useful, | |
4452 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
4453 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
4454 | + * General Public License for more details. | |
4455 | + * | |
4456 | + * You should have received a copy of the GNU General Public License | |
4457 | + * along with this program; if not, write to the Free Software | |
4458 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
4459 | + * 02111-1307 USA | |
4460 | + */ | |
4461 | +#include <linux/interrupt.h> | |
4462 | +#include <linux/module.h> | |
4463 | +#include <linux/file.h> | |
4464 | +#include <linux/fdtable.h> | |
4465 | +#include <linux/seq_file.h> | |
4466 | +#include <linux/vmalloc.h> | |
4467 | +#include <linux/proc_fs.h> | |
4468 | +#include <linux/perfmon_kern.h> | |
4469 | +#include <linux/uaccess.h> | |
4470 | + | |
4471 | +asmlinkage long sys_pfm_stop(int fd); | |
4472 | +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st); | |
4473 | +asmlinkage long sys_pfm_unload_context(int fd); | |
4474 | +asmlinkage long sys_pfm_restart(int fd); | |
4475 | +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld); | |
4476 | + | |
4477 | +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what); | |
4478 | + | |
4479 | +extern ssize_t __pfm_read(struct pfm_context *ctx, | |
4480 | + union pfarg_msg *msg_buf, | |
4481 | + int non_block); | |
4482 | +/* | |
4483 | + * function providing some help for backward compatiblity with old IA-64 | |
4484 | + * applications. In the old model, certain attributes of a counter were | |
4485 | + * passed via the PMC, now they are passed via the PMD. | |
4486 | + */ | |
4487 | +static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum, | |
4488 | + u32 rflags, | |
4489 | + unsigned long *smpl_pmds, | |
4490 | + unsigned long *reset_pmds, | |
4491 | + u64 eventid) | |
4492 | +{ | |
4493 | + struct pfm_event_set *set; | |
4494 | + int is_counting; | |
4495 | + unsigned long *impl_pmds; | |
4496 | + u32 flags = 0; | |
4497 | + u16 max_pmd; | |
4498 | + | |
4499 | + impl_pmds = ctx->regs.pmds; | |
4500 | + max_pmd = ctx->regs.max_pmd; | |
4501 | + | |
4502 | + /* | |
4503 | + * given that we do not maintain PMC ->PMD dependencies | |
4504 | + * we cannot figure out what to do in case PMCxx != PMDxx | |
4505 | + */ | |
4506 | + if (cnum > max_pmd) | |
4507 | + return 0; | |
4508 | + | |
4509 | + /* | |
4510 | + * assumes PMCxx controls PMDxx which is always true for counters | |
4511 | + * on Itanium PMUs. | |
4512 | + */ | |
4513 | + is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64; | |
4514 | + set = pfm_find_set(ctx, set_id, 0); | |
4515 | + | |
4516 | + /* | |
4517 | + * for v2.0, we only allowed counting PMD to generate | |
4518 | + * user-level notifications. Same thing with randomization. | |
4519 | + */ | |
4520 | + if (is_counting) { | |
4521 | + if (rflags & PFM_REGFL_OVFL_NOTIFY) | |
4522 | + flags |= PFM_REGFL_OVFL_NOTIFY; | |
4523 | + if (rflags & PFM_REGFL_RANDOM) | |
4524 | + flags |= PFM_REGFL_RANDOM; | |
4525 | + /* | |
4526 | + * verify validity of smpl_pmds | |
4527 | + */ | |
4528 | + if (unlikely(bitmap_subset(smpl_pmds, | |
4529 | + impl_pmds, max_pmd) == 0)) { | |
4530 | + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u", | |
4531 | + (unsigned long long)smpl_pmds[0], cnum); | |
4532 | + return -EINVAL; | |
4533 | + } | |
4534 | + /* | |
4535 | + * verify validity of reset_pmds | |
4536 | + */ | |
4537 | + if (unlikely(bitmap_subset(reset_pmds, | |
4538 | + impl_pmds, max_pmd) == 0)) { | |
4539 | + PFM_DBG("invalid reset_pmds=0x%lx for pmd%u", | |
4540 | + reset_pmds[0], cnum); | |
4541 | + return -EINVAL; | |
4542 | + } | |
4543 | + /* | |
4544 | + * ensures that a PFM_READ_PMDS succeeds with a | |
4545 | + * corresponding PFM_WRITE_PMDS | |
4546 | + */ | |
4547 | + __set_bit(cnum, set->used_pmds); | |
4548 | + | |
4549 | + } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { | |
4550 | + PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum); | |
4551 | + return -EINVAL; | |
4552 | + } | |
4553 | + | |
4554 | + set->pmds[cnum].flags = flags; | |
4555 | + | |
4556 | + if (is_counting) { | |
4557 | + bitmap_copy(set->pmds[cnum].reset_pmds, | |
4558 | + reset_pmds, | |
4559 | + max_pmd); | |
4560 | + | |
4561 | + bitmap_copy(set->pmds[cnum].smpl_pmds, | |
4562 | + smpl_pmds, | |
4563 | + max_pmd); | |
4564 | + | |
4565 | + set->pmds[cnum].eventid = eventid; | |
4566 | + | |
4567 | + /* | |
4568 | + * update ovfl_notify | |
4569 | + */ | |
4570 | + if (rflags & PFM_REGFL_OVFL_NOTIFY) | |
4571 | + __set_bit(cnum, set->ovfl_notify); | |
4572 | + else | |
4573 | + __clear_bit(cnum, set->ovfl_notify); | |
4574 | + | |
4575 | + } | |
4576 | + PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx", | |
4577 | + cnum, flags, | |
4578 | + eventid, | |
4579 | + reset_pmds[0], | |
4580 | + smpl_pmds[0]); | |
4581 | + | |
4582 | + return 0; | |
4583 | +} | |
4584 | + | |
4585 | + | |
4586 | +int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count) | |
4587 | +{ | |
4588 | + struct pfarg_dbreg *req = arg; | |
4589 | + struct pfarg_pmc pmc; | |
4590 | + int i, ret = 0; | |
4591 | + | |
4592 | + memset(&pmc, 0, sizeof(pmc)); | |
4593 | + | |
4594 | + for (i = 0; i < count; i++, req++) { | |
4595 | + pmc.reg_num = 256+req->dbreg_num; | |
4596 | + pmc.reg_value = req->dbreg_value; | |
4597 | + pmc.reg_flags = 0; | |
4598 | + pmc.reg_set = req->dbreg_set; | |
4599 | + | |
4600 | + ret = __pfm_write_pmcs(ctx, &pmc, 1); | |
4601 | + | |
4602 | + req->dbreg_flags &= ~PFM_REG_RETFL_MASK; | |
4603 | + req->dbreg_flags |= pmc.reg_flags; | |
4604 | + | |
4605 | + if (ret) | |
4606 | + return ret; | |
4607 | + } | |
4608 | + return 0; | |
4609 | +} | |
4610 | + | |
4611 | +static long pfm_write_ibrs_old(int fd, void __user *ureq, int count) | |
4612 | +{ | |
4613 | + struct pfm_context *ctx; | |
4614 | + struct task_struct *task; | |
4615 | + struct file *filp; | |
4616 | + struct pfarg_dbreg *req = NULL; | |
4617 | + void *fptr, *resume; | |
4618 | + unsigned long flags; | |
4619 | + size_t sz; | |
4620 | + int ret, fput_needed; | |
4621 | + | |
4622 | + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) | |
4623 | + return -EINVAL; | |
4624 | + | |
4625 | + sz = count*sizeof(*req); | |
4626 | + | |
4627 | + filp = fget_light(fd, &fput_needed); | |
4628 | + if (unlikely(filp == NULL)) { | |
4629 | + PFM_DBG("invalid fd %d", fd); | |
4630 | + return -EBADF; | |
4631 | + } | |
4632 | + | |
4633 | + ctx = filp->private_data; | |
4634 | + ret = -EBADF; | |
4635 | + | |
4636 | + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { | |
4637 | + PFM_DBG("fd %d not related to perfmon", fd); | |
4638 | + goto error; | |
4639 | + } | |
4640 | + | |
4641 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
4642 | + if (ret) | |
4643 | + goto error; | |
4644 | + | |
4645 | + spin_lock_irqsave(&ctx->lock, flags); | |
4646 | + | |
4647 | + task = ctx->task; | |
4648 | + | |
4649 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
4650 | + if (ret == 0) | |
4651 | + ret = __pfm_write_ibrs_old(ctx, req, count); | |
4652 | + | |
4653 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
4654 | + | |
4655 | + if (resume) | |
4656 | + pfm_resume_task(task, resume); | |
4657 | + | |
4658 | + if (copy_to_user(ureq, req, sz)) | |
4659 | + ret = -EFAULT; | |
4660 | + | |
4661 | + kfree(fptr); | |
4662 | +error: | |
4663 | + fput_light(filp, fput_needed); | |
4664 | + return ret; | |
4665 | +} | |
4666 | + | |
4667 | +int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count) | |
4668 | +{ | |
4669 | + struct pfarg_dbreg *req = arg; | |
4670 | + struct pfarg_pmc pmc; | |
4671 | + int i, ret = 0; | |
4672 | + | |
4673 | + memset(&pmc, 0, sizeof(pmc)); | |
4674 | + | |
4675 | + for (i = 0; i < count; i++, req++) { | |
4676 | + pmc.reg_num = 264+req->dbreg_num; | |
4677 | + pmc.reg_value = req->dbreg_value; | |
4678 | + pmc.reg_flags = 0; | |
4679 | + pmc.reg_set = req->dbreg_set; | |
4680 | + | |
4681 | + ret = __pfm_write_pmcs(ctx, &pmc, 1); | |
4682 | + | |
4683 | + req->dbreg_flags &= ~PFM_REG_RETFL_MASK; | |
4684 | + req->dbreg_flags |= pmc.reg_flags; | |
4685 | + if (ret) | |
4686 | + return ret; | |
4687 | + } | |
4688 | + return 0; | |
4689 | +} | |
4690 | + | |
4691 | +static long pfm_write_dbrs_old(int fd, void __user *ureq, int count) | |
4692 | +{ | |
4693 | + struct pfm_context *ctx; | |
4694 | + struct task_struct *task; | |
4695 | + struct file *filp; | |
4696 | + struct pfarg_dbreg *req = NULL; | |
4697 | + void *fptr, *resume; | |
4698 | + unsigned long flags; | |
4699 | + size_t sz; | |
4700 | + int ret, fput_needed; | |
4701 | + | |
4702 | + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) | |
4703 | + return -EINVAL; | |
4704 | + | |
4705 | + sz = count*sizeof(*req); | |
4706 | + | |
4707 | + filp = fget_light(fd, &fput_needed); | |
4708 | + if (unlikely(filp == NULL)) { | |
4709 | + PFM_DBG("invalid fd %d", fd); | |
4710 | + return -EBADF; | |
4711 | + } | |
4712 | + | |
4713 | + ctx = filp->private_data; | |
4714 | + ret = -EBADF; | |
4715 | + | |
4716 | + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { | |
4717 | + PFM_DBG("fd %d not related to perfmon", fd); | |
4718 | + goto error; | |
4719 | + } | |
4720 | + | |
4721 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
4722 | + if (ret) | |
4723 | + goto error; | |
4724 | + | |
4725 | + spin_lock_irqsave(&ctx->lock, flags); | |
4726 | + | |
4727 | + task = ctx->task; | |
4728 | + | |
4729 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
4730 | + if (ret == 0) | |
4731 | + ret = __pfm_write_dbrs_old(ctx, req, count); | |
4732 | + | |
4733 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
4734 | + | |
4735 | + if (resume) | |
4736 | + pfm_resume_task(task, resume); | |
4737 | + | |
4738 | + if (copy_to_user(ureq, req, sz)) | |
4739 | + ret = -EFAULT; | |
4740 | + | |
4741 | + kfree(fptr); | |
4742 | +error: | |
4743 | + fput_light(filp, fput_needed); | |
4744 | + return ret; | |
4745 | +} | |
4746 | + | |
4747 | +int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old, | |
4748 | + int count) | |
4749 | +{ | |
4750 | + struct pfarg_pmc req; | |
4751 | + unsigned int i; | |
4752 | + int ret, error_code; | |
4753 | + | |
4754 | + memset(&req, 0, sizeof(req)); | |
4755 | + | |
4756 | + for (i = 0; i < count; i++, req_old++) { | |
4757 | + req.reg_num = req_old->reg_num; | |
4758 | + req.reg_set = req_old->reg_set; | |
4759 | + req.reg_flags = 0; | |
4760 | + req.reg_value = req_old->reg_value; | |
4761 | + | |
4762 | + ret = __pfm_write_pmcs(ctx, (void *)&req, 1); | |
4763 | + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; | |
4764 | + req_old->reg_flags |= req.reg_flags; | |
4765 | + | |
4766 | + if (ret) | |
4767 | + return ret; | |
4768 | + | |
4769 | + ret = pfm_compat_update_pmd(ctx, req_old->reg_set, | |
4770 | + req_old->reg_num, | |
4771 | + (u32)req_old->reg_flags, | |
4772 | + req_old->reg_smpl_pmds, | |
4773 | + req_old->reg_reset_pmds, | |
4774 | + req_old->reg_smpl_eventid); | |
4775 | + | |
4776 | + error_code = ret ? PFM_REG_RETFL_EINVAL : 0; | |
4777 | + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; | |
4778 | + req_old->reg_flags |= error_code; | |
4779 | + | |
4780 | + if (ret) | |
4781 | + return ret; | |
4782 | + } | |
4783 | + return 0; | |
4784 | +} | |
4785 | + | |
4786 | +static long pfm_write_pmcs_old(int fd, void __user *ureq, int count) | |
4787 | +{ | |
4788 | + struct pfm_context *ctx; | |
4789 | + struct task_struct *task; | |
4790 | + struct file *filp; | |
4791 | + struct pfarg_reg *req = NULL; | |
4792 | + void *fptr, *resume; | |
4793 | + unsigned long flags; | |
4794 | + size_t sz; | |
4795 | + int ret, fput_needed; | |
4796 | + | |
4797 | + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) | |
4798 | + return -EINVAL; | |
4799 | + | |
4800 | + sz = count*sizeof(*req); | |
4801 | + | |
4802 | + filp = fget_light(fd, &fput_needed); | |
4803 | + if (unlikely(filp == NULL)) { | |
4804 | + PFM_DBG("invalid fd %d", fd); | |
4805 | + return -EBADF; | |
4806 | + } | |
4807 | + | |
4808 | + ctx = filp->private_data; | |
4809 | + ret = -EBADF; | |
4810 | + | |
4811 | + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { | |
4812 | + PFM_DBG("fd %d not related to perfmon", fd); | |
4813 | + goto error; | |
4814 | + } | |
4815 | + | |
4816 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
4817 | + if (ret) | |
4818 | + goto error; | |
4819 | + | |
4820 | + spin_lock_irqsave(&ctx->lock, flags); | |
4821 | + | |
4822 | + task = ctx->task; | |
4823 | + | |
4824 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
4825 | + if (ret == 0) | |
4826 | + ret = __pfm_write_pmcs_old(ctx, req, count); | |
4827 | + | |
4828 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
4829 | + | |
4830 | + if (resume) | |
4831 | + pfm_resume_task(task, resume); | |
4832 | + | |
4833 | + if (copy_to_user(ureq, req, sz)) | |
4834 | + ret = -EFAULT; | |
4835 | + | |
4836 | + kfree(fptr); | |
4837 | + | |
4838 | +error: | |
4839 | + fput_light(filp, fput_needed); | |
4840 | + return ret; | |
4841 | +} | |
4842 | + | |
4843 | +int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, | |
4844 | + int count) | |
4845 | +{ | |
4846 | + struct pfarg_pmd req; | |
4847 | + int i, ret; | |
4848 | + | |
4849 | + memset(&req, 0, sizeof(req)); | |
4850 | + | |
4851 | + for (i = 0; i < count; i++, req_old++) { | |
4852 | + req.reg_num = req_old->reg_num; | |
4853 | + req.reg_set = req_old->reg_set; | |
4854 | + req.reg_value = req_old->reg_value; | |
4855 | + /* flags passed with pmcs in v2.0 */ | |
4856 | + | |
4857 | + req.reg_long_reset = req_old->reg_long_reset; | |
4858 | + req.reg_short_reset = req_old->reg_short_reset; | |
4859 | + req.reg_random_mask = req_old->reg_random_mask; | |
4860 | + /* | |
4861 | + * reg_random_seed is ignored since v2.3 | |
4862 | + */ | |
4863 | + | |
4864 | + /* | |
4865 | + * skip last_reset_val not used for writing | |
4866 | + * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt | |
4867 | + * as set in pfm_write_pmcs_old. | |
4868 | + * | |
4869 | + * ovfl_switch_cnt ignored, not implemented in v2.0 | |
4870 | + */ | |
4871 | + ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1); | |
4872 | + | |
4873 | + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; | |
4874 | + req_old->reg_flags |= req.reg_flags; | |
4875 | + | |
4876 | + if (ret) | |
4877 | + return ret; | |
4878 | + } | |
4879 | + return 0; | |
4880 | +} | |
4881 | + | |
4882 | +static long pfm_write_pmds_old(int fd, void __user *ureq, int count) | |
4883 | +{ | |
4884 | + struct pfm_context *ctx; | |
4885 | + struct task_struct *task; | |
4886 | + struct file *filp; | |
4887 | + struct pfarg_reg *req = NULL; | |
4888 | + void *fptr, *resume; | |
4889 | + unsigned long flags; | |
4890 | + size_t sz; | |
4891 | + int ret, fput_needed; | |
4892 | + | |
4893 | + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) | |
4894 | + return -EINVAL; | |
4895 | + | |
4896 | + sz = count*sizeof(*req); | |
4897 | + | |
4898 | + filp = fget_light(fd, &fput_needed); | |
4899 | + if (unlikely(filp == NULL)) { | |
4900 | + PFM_DBG("invalid fd %d", fd); | |
4901 | + return -EBADF; | |
4902 | + } | |
4903 | + | |
4904 | + ctx = filp->private_data; | |
4905 | + ret = -EBADF; | |
4906 | + | |
4907 | + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { | |
4908 | + PFM_DBG("fd %d not related to perfmon", fd); | |
4909 | + goto error; | |
4910 | + } | |
4911 | + | |
4912 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
4913 | + if (ret) | |
4914 | + goto error; | |
4915 | + | |
4916 | + spin_lock_irqsave(&ctx->lock, flags); | |
4917 | + | |
4918 | + task = ctx->task; | |
4919 | + | |
4920 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
4921 | + if (ret == 0) | |
4922 | + ret = __pfm_write_pmds_old(ctx, req, count); | |
4923 | + | |
4924 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
4925 | + | |
4926 | + if (copy_to_user(ureq, req, sz)) | |
4927 | + ret = -EFAULT; | |
4928 | + | |
4929 | + if (resume) | |
4930 | + pfm_resume_task(task, resume); | |
4931 | + | |
4932 | + kfree(fptr); | |
4933 | +error: | |
4934 | + fput_light(filp, fput_needed); | |
4935 | + return ret; | |
4936 | +} | |
4937 | + | |
4938 | +int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, | |
4939 | + int count) | |
4940 | +{ | |
4941 | + struct pfarg_pmd req; | |
4942 | + int i, ret; | |
4943 | + | |
4944 | + memset(&req, 0, sizeof(req)); | |
4945 | + | |
4946 | + for (i = 0; i < count; i++, req_old++) { | |
4947 | + req.reg_num = req_old->reg_num; | |
4948 | + req.reg_set = req_old->reg_set; | |
4949 | + | |
4950 | + /* skip value not used for reading */ | |
4951 | + req.reg_flags = req_old->reg_flags; | |
4952 | + | |
4953 | + /* skip short/long_reset not used for reading */ | |
4954 | + /* skip last_reset_val not used for reading */ | |
4955 | + /* skip ovfl_switch_cnt not used for reading */ | |
4956 | + | |
4957 | + ret = __pfm_read_pmds(ctx, (void *)&req, 1); | |
4958 | + | |
4959 | + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; | |
4960 | + req_old->reg_flags |= req.reg_flags; | |
4961 | + if (ret) | |
4962 | + return ret; | |
4963 | + | |
4964 | + /* update fields */ | |
4965 | + req_old->reg_value = req.reg_value; | |
4966 | + | |
4967 | + req_old->reg_last_reset_val = req.reg_last_reset_val; | |
4968 | + req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt; | |
4969 | + } | |
4970 | + return 0; | |
4971 | +} | |
4972 | + | |
4973 | +static long pfm_read_pmds_old(int fd, void __user *ureq, int count) | |
4974 | +{ | |
4975 | + struct pfm_context *ctx; | |
4976 | + struct task_struct *task; | |
4977 | + struct file *filp; | |
4978 | + struct pfarg_reg *req = NULL; | |
4979 | + void *fptr, *resume; | |
4980 | + unsigned long flags; | |
4981 | + size_t sz; | |
4982 | + int ret, fput_needed; | |
4983 | + | |
4984 | + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) | |
4985 | + return -EINVAL; | |
4986 | + | |
4987 | + sz = count*sizeof(*req); | |
4988 | + | |
4989 | + filp = fget_light(fd, &fput_needed); | |
4990 | + if (unlikely(filp == NULL)) { | |
4991 | + PFM_DBG("invalid fd %d", fd); | |
4992 | + return -EBADF; | |
4993 | + } | |
4994 | + | |
4995 | + ctx = filp->private_data; | |
4996 | + ret = -EBADF; | |
4997 | + | |
4998 | + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { | |
4999 | + PFM_DBG("fd %d not related to perfmon", fd); | |
5000 | + goto error; | |
5001 | + } | |
5002 | + | |
5003 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
5004 | + if (ret) | |
5005 | + goto error; | |
5006 | + | |
5007 | + spin_lock_irqsave(&ctx->lock, flags); | |
5008 | + | |
5009 | + task = ctx->task; | |
5010 | + | |
5011 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
5012 | + if (ret == 0) | |
5013 | + ret = __pfm_read_pmds_old(ctx, req, count); | |
5014 | + | |
5015 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
5016 | + | |
5017 | + if (resume) | |
5018 | + pfm_resume_task(task, resume); | |
5019 | + | |
5020 | + if (copy_to_user(ureq, req, sz)) | |
5021 | + ret = -EFAULT; | |
5022 | + | |
5023 | + kfree(fptr); | |
5024 | +error: | |
5025 | + fput_light(filp, fput_needed); | |
5026 | + return ret; | |
5027 | +} | |
5028 | + | |
5029 | +/* | |
5030 | + * OBSOLETE: use /proc/perfmon_map instead | |
5031 | + */ | |
5032 | +static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count) | |
5033 | +{ | |
5034 | + struct pfarg_reg *req = NULL; | |
5035 | + void *fptr; | |
5036 | + size_t sz; | |
5037 | + int ret, i; | |
5038 | + unsigned int cnum; | |
5039 | + | |
5040 | + if (count < 1) | |
5041 | + return -EINVAL; | |
5042 | + | |
5043 | + /* | |
5044 | + * ensure the pfm_pmu_conf does not disappear while | |
5045 | + * we use it | |
5046 | + */ | |
5047 | + ret = pfm_pmu_conf_get(1); | |
5048 | + if (ret) | |
5049 | + return ret; | |
5050 | + | |
5051 | + sz = count*sizeof(*ureq); | |
5052 | + | |
5053 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
5054 | + if (ret) | |
5055 | + goto error; | |
5056 | + | |
5057 | + | |
5058 | + for (i = 0; i < count; i++, req++) { | |
5059 | + cnum = req->reg_num; | |
5060 | + | |
5061 | + if (i >= PFM_MAX_PMCS || | |
5062 | + (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) { | |
5063 | + req->reg_flags = PFM_REG_RETFL_EINVAL; | |
5064 | + break; | |
5065 | + } | |
5066 | + req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val; | |
5067 | + req->reg_flags = 0; | |
5068 | + | |
5069 | + PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value); | |
5070 | + } | |
5071 | + | |
5072 | + if (copy_to_user(ureq, req, sz)) | |
5073 | + ret = -EFAULT; | |
5074 | + | |
5075 | + kfree(fptr); | |
5076 | +error: | |
5077 | + pfm_pmu_conf_put(); | |
5078 | + | |
5079 | + return ret; | |
5080 | +} | |
5081 | + | |
5082 | +/* | |
5083 | + * allocate a sampling buffer and remaps it into the user address space of | |
5084 | + * the task. This is only in compatibility mode | |
5085 | + * | |
5086 | + * function called ONLY on current task | |
5087 | + */ | |
5088 | +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, size_t rsize, | |
5089 | + struct file *filp) | |
5090 | +{ | |
5091 | + struct mm_struct *mm = current->mm; | |
5092 | + struct vm_area_struct *vma = NULL; | |
5093 | + struct pfm_arch_context *ctx_arch; | |
5094 | + size_t size; | |
5095 | + int ret; | |
5096 | + extern struct vm_operations_struct pfm_buf_map_vm_ops; | |
5097 | + | |
5098 | + ctx_arch = pfm_ctx_arch(ctx); | |
5099 | + | |
5100 | + /* | |
5101 | + * allocate buffer + map desc | |
5102 | + */ | |
5103 | + ret = pfm_smpl_buf_alloc(ctx, rsize); | |
5104 | + if (ret) | |
5105 | + return ret; | |
5106 | + | |
5107 | + size = ctx->smpl_size; | |
5108 | + | |
5109 | + | |
5110 | + /* allocate vma */ | |
5111 | + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | |
5112 | + if (!vma) { | |
5113 | + PFM_DBG("Cannot allocate vma"); | |
5114 | + goto error_kmem; | |
5115 | + } | |
5116 | + memset(vma, 0, sizeof(*vma)); | |
5117 | + | |
5118 | + /* | |
5119 | + * partially initialize the vma for the sampling buffer | |
5120 | + */ | |
5121 | + vma->vm_mm = mm; | |
5122 | + vma->vm_flags = VM_READ | VM_MAYREAD | VM_RESERVED; | |
5123 | + vma->vm_page_prot = PAGE_READONLY; | |
5124 | + vma->vm_ops = &pfm_buf_map_vm_ops; | |
5125 | + vma->vm_file = filp; | |
5126 | + vma->vm_private_data = ctx; | |
5127 | + vma->vm_pgoff = 0; | |
5128 | + | |
5129 | + /* | |
5130 | + * simulate effect of mmap() | |
5131 | + */ | |
5132 | + get_file(filp); | |
5133 | + | |
5134 | + /* | |
5135 | + * Let's do the difficult operations next. | |
5136 | + * | |
5137 | + * now we atomically find some area in the address space and | |
5138 | + * remap the buffer into it. | |
5139 | + */ | |
5140 | + down_write(¤t->mm->mmap_sem); | |
5141 | + | |
5142 | + /* find some free area in address space, must have mmap sem held */ | |
5143 | + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, | |
5144 | + MAP_PRIVATE|MAP_ANONYMOUS); | |
5145 | + if (vma->vm_start == 0) { | |
5146 | + PFM_DBG("cannot find unmapped area of size %zu", size); | |
5147 | + up_write(¤t->mm->mmap_sem); | |
5148 | + goto error; | |
5149 | + } | |
5150 | + vma->vm_end = vma->vm_start + size; | |
5151 | + | |
5152 | + PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start); | |
5153 | + /* | |
5154 | + * now insert the vma in the vm list for the process, must be | |
5155 | + * done with mmap lock held | |
5156 | + */ | |
5157 | + insert_vm_struct(mm, vma); | |
5158 | + | |
5159 | + mm->total_vm += size >> PAGE_SHIFT; | |
5160 | + | |
5161 | + up_write(¤t->mm->mmap_sem); | |
5162 | + | |
5163 | + /* | |
5164 | + * IMPORTANT: we do not issue the fput() | |
5165 | + * because we want to increase the ref count | |
5166 | + * on the descriptor to simulate what mmap() | |
5167 | + * would do | |
5168 | + */ | |
5169 | + | |
5170 | + /* | |
5171 | + * used to propagate vaddr to syscall stub | |
5172 | + */ | |
5173 | + ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start; | |
5174 | + | |
5175 | + return 0; | |
5176 | +error: | |
5177 | + kmem_cache_free(vm_area_cachep, vma); | |
5178 | +error_kmem: | |
5179 | + pfm_smpl_buf_space_release(ctx, ctx->smpl_size); | |
5180 | + vfree(ctx->smpl_addr); | |
5181 | + return -ENOMEM; | |
5182 | +} | |
5183 | + | |
5184 | +#define PFM_DEFAULT_SMPL_UUID { \ | |
5185 | + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\ | |
5186 | + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} | |
5187 | + | |
5188 | +static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID; | |
5189 | +static pfm_uuid_t null_uuid; | |
5190 | + | |
5191 | +/* | |
5192 | + * function invoked in case, pfm_context_create fails | |
5193 | + * at the last operation, copy_to_user. It needs to | |
5194 | + * undo memory allocations and free the file descriptor | |
5195 | + */ | |
5196 | +static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx) | |
5197 | +{ | |
5198 | + struct files_struct *files = current->files; | |
5199 | + struct file *file; | |
5200 | + int fput_needed; | |
5201 | + | |
5202 | + file = fget_light(fd, &fput_needed); | |
5203 | + /* | |
5204 | + * there is no fd_uninstall(), so we do it | |
5205 | + * here. put_unused_fd() does not remove the | |
5206 | + * effect of fd_install(). | |
5207 | + */ | |
5208 | + | |
5209 | + spin_lock(&files->file_lock); | |
5210 | + files->fd_array[fd] = NULL; | |
5211 | + spin_unlock(&files->file_lock); | |
5212 | + | |
5213 | + fput_light(file, fput_needed); | |
5214 | + | |
5215 | + /* | |
5216 | + * decrement ref count and kill file | |
5217 | + */ | |
5218 | + put_filp(file); | |
5219 | + | |
5220 | + put_unused_fd(fd); | |
5221 | + | |
5222 | + pfm_free_context(ctx); | |
5223 | +} | |
5224 | + | |
5225 | +static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg, | |
5226 | + size_t usize, void **arg, | |
5227 | + struct pfm_smpl_fmt **fmt) | |
5228 | +{ | |
5229 | + struct pfm_smpl_fmt *f; | |
5230 | + void *addr = NULL; | |
5231 | + size_t sz; | |
5232 | + int ret; | |
5233 | + | |
5234 | + if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t))) | |
5235 | + return 0; | |
5236 | + | |
5237 | + if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) { | |
5238 | + PFM_DBG("compatibility mode supports only default sampling format"); | |
5239 | + return -EINVAL; | |
5240 | + } | |
5241 | + /* | |
5242 | + * find fmt and increase refcount | |
5243 | + */ | |
5244 | + f = pfm_smpl_fmt_get("default-old"); | |
5245 | + if (f == NULL) { | |
5246 | + PFM_DBG("default-old buffer format not found"); | |
5247 | + return -EINVAL; | |
5248 | + } | |
5249 | + | |
5250 | + /* | |
5251 | + * expected format argument size | |
5252 | + */ | |
5253 | + sz = f->fmt_arg_size; | |
5254 | + | |
5255 | + /* | |
5256 | + * check user size matches expected size | |
5257 | + * usize = -1 is for IA-64 backward compatibility | |
5258 | + */ | |
5259 | + ret = -EINVAL; | |
5260 | + if (sz != usize && usize != -1) { | |
5261 | + PFM_DBG("invalid arg size %zu, format expects %zu", | |
5262 | + usize, sz); | |
5263 | + goto error; | |
5264 | + } | |
5265 | + | |
5266 | + ret = -ENOMEM; | |
5267 | + addr = kmalloc(sz, GFP_KERNEL); | |
5268 | + if (addr == NULL) | |
5269 | + goto error; | |
5270 | + | |
5271 | + ret = -EFAULT; | |
5272 | + if (copy_from_user(addr, fmt_uarg, sz)) | |
5273 | + goto error; | |
5274 | + | |
5275 | + *arg = addr; | |
5276 | + *fmt = f; | |
5277 | + return 0; | |
5278 | + | |
5279 | +error: | |
5280 | + kfree(addr); | |
5281 | + pfm_smpl_fmt_put(f); | |
5282 | + return ret; | |
5283 | +} | |
5284 | + | |
5285 | +static long pfm_create_context_old(int fd, void __user *ureq, int count) | |
5286 | +{ | |
5287 | + struct pfm_context *new_ctx; | |
5288 | + struct pfm_arch_context *ctx_arch; | |
5289 | + struct pfm_smpl_fmt *fmt = NULL; | |
5290 | + struct pfarg_context req_old; | |
5291 | + void __user *usmpl_arg; | |
5292 | + void *smpl_arg = NULL; | |
5293 | + struct pfarg_ctx req; | |
5294 | + int ret; | |
5295 | + | |
5296 | + if (count != 1) | |
5297 | + return -EINVAL; | |
5298 | + | |
5299 | + if (copy_from_user(&req_old, ureq, sizeof(req_old))) | |
5300 | + return -EFAULT; | |
5301 | + | |
5302 | + memset(&req, 0, sizeof(req)); | |
5303 | + | |
5304 | + /* | |
5305 | + * sampling format args are following pfarg_context | |
5306 | + */ | |
5307 | + usmpl_arg = ureq+sizeof(req_old); | |
5308 | + | |
5309 | + ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1, | |
5310 | + &smpl_arg, &fmt); | |
5311 | + if (ret) | |
5312 | + return ret; | |
5313 | + | |
5314 | + req.ctx_flags = req_old.ctx_flags; | |
5315 | + | |
5316 | + /* | |
5317 | + * returns file descriptor if >=0, or error code */ | |
5318 | + ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx); | |
5319 | + if (ret >= 0) { | |
5320 | + ctx_arch = pfm_ctx_arch(new_ctx); | |
5321 | + req_old.ctx_fd = ret; | |
5322 | + req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr; | |
5323 | + } | |
5324 | + | |
5325 | + if (copy_to_user(ureq, &req_old, sizeof(req_old))) { | |
5326 | + pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx); | |
5327 | + ret = -EFAULT; | |
5328 | + } | |
5329 | + | |
5330 | + kfree(smpl_arg); | |
5331 | + | |
5332 | + return ret; | |
5333 | +} | |
5334 | + | |
5335 | +/* | |
5336 | + * obsolete call: use /proc/perfmon | |
5337 | + */ | |
5338 | +static long pfm_get_features_old(int fd, void __user *arg, int count) | |
5339 | +{ | |
5340 | + struct pfarg_features req; | |
5341 | + int ret = 0; | |
5342 | + | |
5343 | + if (count != 1) | |
5344 | + return -EINVAL; | |
5345 | + | |
5346 | + memset(&req, 0, sizeof(req)); | |
5347 | + | |
5348 | + req.ft_version = PFM_VERSION; | |
5349 | + | |
5350 | + if (copy_to_user(arg, &req, sizeof(req))) | |
5351 | + ret = -EFAULT; | |
5352 | + | |
5353 | + return ret; | |
5354 | +} | |
5355 | + | |
5356 | +static long pfm_debug_old(int fd, void __user *arg, int count) | |
5357 | +{ | |
5358 | + int m; | |
5359 | + | |
5360 | + if (count != 1) | |
5361 | + return -EINVAL; | |
5362 | + | |
5363 | + if (get_user(m, (int __user *)arg)) | |
5364 | + return -EFAULT; | |
5365 | + | |
5366 | + | |
5367 | + pfm_controls.debug = m == 0 ? 0 : 1; | |
5368 | + | |
5369 | + PFM_INFO("debugging %s (timing reset)", | |
5370 | + pfm_controls.debug ? "on" : "off"); | |
5371 | + | |
5372 | + if (m == 0) | |
5373 | + for_each_online_cpu(m) { | |
5374 | + memset(&per_cpu(pfm_stats, m), 0, | |
5375 | + sizeof(struct pfm_stats)); | |
5376 | + } | |
5377 | + return 0; | |
5378 | +} | |
5379 | + | |
5380 | +static long pfm_unload_context_old(int fd, void __user *arg, int count) | |
5381 | +{ | |
5382 | + if (count) | |
5383 | + return -EINVAL; | |
5384 | + | |
5385 | + return sys_pfm_unload_context(fd); | |
5386 | +} | |
5387 | + | |
5388 | +static long pfm_restart_old(int fd, void __user *arg, int count) | |
5389 | +{ | |
5390 | + if (count) | |
5391 | + return -EINVAL; | |
5392 | + | |
5393 | + return sys_pfm_restart(fd); | |
5394 | +} | |
5395 | + | |
5396 | +static long pfm_stop_old(int fd, void __user *arg, int count) | |
5397 | +{ | |
5398 | + if (count) | |
5399 | + return -EINVAL; | |
5400 | + | |
5401 | + return sys_pfm_stop(fd); | |
5402 | +} | |
5403 | + | |
5404 | +static long pfm_start_old(int fd, void __user *arg, int count) | |
5405 | +{ | |
5406 | + if (count > 1) | |
5407 | + return -EINVAL; | |
5408 | + | |
5409 | + return sys_pfm_start(fd, arg); | |
5410 | +} | |
5411 | + | |
5412 | +static long pfm_load_context_old(int fd, void __user *ureq, int count) | |
5413 | +{ | |
5414 | + if (count != 1) | |
5415 | + return -EINVAL; | |
5416 | + | |
5417 | + return sys_pfm_load_context(fd, ureq); | |
5418 | +} | |
5419 | + | |
5420 | +/* | |
5421 | + * perfmon command descriptions | |
5422 | + */ | |
5423 | +struct pfm_cmd_desc { | |
5424 | + long (*cmd_func)(int fd, void __user *arg, int count); | |
5425 | +}; | |
5426 | + | |
5427 | +/* | |
5428 | + * functions MUST be listed in the increasing order of | |
5429 | + * their index (see permfon.h) | |
5430 | + */ | |
5431 | +#define PFM_CMD(name) \ | |
5432 | + { .cmd_func = name, \ | |
5433 | + } | |
5434 | +#define PFM_CMD_NONE \ | |
5435 | + { .cmd_func = NULL \ | |
5436 | + } | |
5437 | + | |
5438 | +static struct pfm_cmd_desc pfm_cmd_tab[] = { | |
5439 | +/* 0 */PFM_CMD_NONE, | |
5440 | +/* 1 */PFM_CMD(pfm_write_pmcs_old), | |
5441 | +/* 2 */PFM_CMD(pfm_write_pmds_old), | |
5442 | +/* 3 */PFM_CMD(pfm_read_pmds_old), | |
5443 | +/* 4 */PFM_CMD(pfm_stop_old), | |
5444 | +/* 5 */PFM_CMD(pfm_start_old), | |
5445 | +/* 6 */PFM_CMD_NONE, | |
5446 | +/* 7 */PFM_CMD_NONE, | |
5447 | +/* 8 */PFM_CMD(pfm_create_context_old), | |
5448 | +/* 9 */PFM_CMD_NONE, | |
5449 | +/* 10 */PFM_CMD(pfm_restart_old), | |
5450 | +/* 11 */PFM_CMD_NONE, | |
5451 | +/* 12 */PFM_CMD(pfm_get_features_old), | |
5452 | +/* 13 */PFM_CMD(pfm_debug_old), | |
5453 | +/* 14 */PFM_CMD_NONE, | |
5454 | +/* 15 */PFM_CMD(pfm_get_default_pmcs_old), | |
5455 | +/* 16 */PFM_CMD(pfm_load_context_old), | |
5456 | +/* 17 */PFM_CMD(pfm_unload_context_old), | |
5457 | +/* 18 */PFM_CMD_NONE, | |
5458 | +/* 19 */PFM_CMD_NONE, | |
5459 | +/* 20 */PFM_CMD_NONE, | |
5460 | +/* 21 */PFM_CMD_NONE, | |
5461 | +/* 22 */PFM_CMD_NONE, | |
5462 | +/* 23 */PFM_CMD_NONE, | |
5463 | +/* 24 */PFM_CMD_NONE, | |
5464 | +/* 25 */PFM_CMD_NONE, | |
5465 | +/* 26 */PFM_CMD_NONE, | |
5466 | +/* 27 */PFM_CMD_NONE, | |
5467 | +/* 28 */PFM_CMD_NONE, | |
5468 | +/* 29 */PFM_CMD_NONE, | |
5469 | +/* 30 */PFM_CMD_NONE, | |
5470 | +/* 31 */PFM_CMD_NONE, | |
5471 | +/* 32 */PFM_CMD(pfm_write_ibrs_old), | |
5472 | +/* 33 */PFM_CMD(pfm_write_dbrs_old), | |
5473 | +}; | |
5474 | +#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab) | |
5475 | + | |
5476 | +/* | |
5477 | + * system-call entry point (must return long) | |
5478 | + */ | |
5479 | +asmlinkage long sys_perfmonctl(int fd, int cmd, void __user *arg, int count) | |
5480 | +{ | |
5481 | + if (perfmon_disabled) | |
5482 | + return -ENOSYS; | |
5483 | + | |
5484 | + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT | |
5485 | + || pfm_cmd_tab[cmd].cmd_func == NULL)) { | |
5486 | + PFM_DBG("invalid cmd=%d", cmd); | |
5487 | + return -EINVAL; | |
5488 | + } | |
5489 | + return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count); | |
5490 | +} | |
5491 | + | |
5492 | +/* | |
5493 | + * Called from pfm_read() for a perfmon v2.0 context. | |
5494 | + * | |
5495 | + * compatibility mode pfm_read() routine. We need a separate | |
5496 | + * routine because the definition of the message has changed. | |
5497 | + * The pfm_msg and pfarg_msg structures are different. | |
5498 | + * | |
5499 | + * return: sizeof(pfm_msg_t) on success, -errno otherwise | |
5500 | + */ | |
5501 | +ssize_t pfm_arch_compat_read(struct pfm_context *ctx, | |
5502 | + char __user *buf, | |
5503 | + int non_block, | |
5504 | + size_t size) | |
5505 | +{ | |
5506 | + union pfarg_msg msg_buf; | |
5507 | + pfm_msg_t old_msg_buf; | |
5508 | + pfm_ovfl_msg_t *o_msg; | |
5509 | + struct pfarg_ovfl_msg *n_msg; | |
5510 | + int ret; | |
5511 | + | |
5512 | + PFM_DBG("msg=%p size=%zu", buf, size); | |
5513 | + | |
5514 | + /* | |
5515 | + * cannot extract partial messages. | |
5516 | + * check even when there is no message | |
5517 | + * | |
5518 | + * cannot extract more than one message per call. Bytes | |
5519 | + * above sizeof(msg) are ignored. | |
5520 | + */ | |
5521 | + if (size < sizeof(old_msg_buf)) { | |
5522 | + PFM_DBG("message is too small size=%zu must be >=%zu)", | |
5523 | + size, | |
5524 | + sizeof(old_msg_buf)); | |
5525 | + return -EINVAL; | |
5526 | + } | |
5527 | + | |
5528 | + ret = __pfm_read(ctx, &msg_buf, non_block); | |
5529 | + if (ret < 1) | |
5530 | + return ret; | |
5531 | + | |
5532 | + /* | |
5533 | + * force return value to old message size | |
5534 | + */ | |
5535 | + ret = sizeof(old_msg_buf); | |
5536 | + | |
5537 | + o_msg = &old_msg_buf.pfm_ovfl_msg; | |
5538 | + n_msg = &msg_buf.pfm_ovfl_msg; | |
5539 | + | |
5540 | + switch (msg_buf.type) { | |
5541 | + case PFM_MSG_OVFL: | |
5542 | + o_msg->msg_type = PFM_MSG_OVFL; | |
5543 | + o_msg->msg_ctx_fd = 0; | |
5544 | + o_msg->msg_active_set = n_msg->msg_active_set; | |
5545 | + o_msg->msg_tstamp = 0; | |
5546 | + | |
5547 | + o_msg->msg_ovfl_pmds[0] = n_msg->msg_ovfl_pmds[0]; | |
5548 | + o_msg->msg_ovfl_pmds[1] = n_msg->msg_ovfl_pmds[1]; | |
5549 | + o_msg->msg_ovfl_pmds[2] = n_msg->msg_ovfl_pmds[2]; | |
5550 | + o_msg->msg_ovfl_pmds[3] = n_msg->msg_ovfl_pmds[3]; | |
5551 | + break; | |
5552 | + case PFM_MSG_END: | |
5553 | + o_msg->msg_type = PFM_MSG_END; | |
5554 | + o_msg->msg_ctx_fd = 0; | |
5555 | + o_msg->msg_tstamp = 0; | |
5556 | + break; | |
5557 | + default: | |
5558 | + PFM_DBG("unknown msg type=%d", msg_buf.type); | |
5559 | + } | |
5560 | + if (copy_to_user(buf, &old_msg_buf, sizeof(old_msg_buf))) | |
5561 | + ret = -EFAULT; | |
5562 | + PFM_DBG_ovfl("ret=%d", ret); | |
5563 | + return ret; | |
5564 | +} | |
5565 | + | |
5566 | +/* | |
5567 | + * legacy /proc/perfmon simplified interface (we only maintain the | |
5568 | + * global information (no more per-cpu stats, use | |
5569 | + * /sys/devices/system/cpu/cpuXX/perfmon | |
5570 | + */ | |
5571 | +static struct proc_dir_entry *perfmon_proc; | |
5572 | + | |
5573 | +static void *pfm_proc_start(struct seq_file *m, loff_t *pos) | |
5574 | +{ | |
5575 | + if (*pos == 0) | |
5576 | + return (void *)1; | |
5577 | + | |
5578 | + return NULL; | |
5579 | +} | |
5580 | + | |
5581 | +static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) | |
5582 | +{ | |
5583 | + ++*pos; | |
5584 | + return pfm_proc_start(m, pos); | |
5585 | +} | |
5586 | + | |
5587 | +static void pfm_proc_stop(struct seq_file *m, void *v) | |
5588 | +{ | |
5589 | +} | |
5590 | + | |
5591 | +/* | |
5592 | + * this is a simplified version of the legacy /proc/perfmon. | |
5593 | + * We have retained ONLY the key information that tools are actually | |
5594 | + * using | |
5595 | + */ | |
5596 | +static void pfm_proc_show_header(struct seq_file *m) | |
5597 | +{ | |
5598 | + char buf[128]; | |
5599 | + | |
5600 | + pfm_sysfs_res_show(buf, sizeof(buf), 3); | |
5601 | + | |
5602 | + seq_printf(m, "perfmon version : %u.%u\n", | |
5603 | + PFM_VERSION_MAJ, PFM_VERSION_MIN); | |
5604 | + | |
5605 | + seq_printf(m, "model : %s", buf); | |
5606 | +} | |
5607 | + | |
5608 | +static int pfm_proc_show(struct seq_file *m, void *v) | |
5609 | +{ | |
5610 | + pfm_proc_show_header(m); | |
5611 | + return 0; | |
5612 | +} | |
5613 | + | |
5614 | +struct seq_operations pfm_proc_seq_ops = { | |
5615 | + .start = pfm_proc_start, | |
5616 | + .next = pfm_proc_next, | |
5617 | + .stop = pfm_proc_stop, | |
5618 | + .show = pfm_proc_show | |
5619 | +}; | |
5620 | + | |
5621 | +static int pfm_proc_open(struct inode *inode, struct file *file) | |
5622 | +{ | |
5623 | + return seq_open(file, &pfm_proc_seq_ops); | |
5624 | +} | |
5625 | + | |
5626 | + | |
5627 | +static struct file_operations pfm_proc_fops = { | |
5628 | + .open = pfm_proc_open, | |
5629 | + .read = seq_read, | |
5630 | + .llseek = seq_lseek, | |
5631 | + .release = seq_release, | |
5632 | +}; | |
5633 | + | |
5634 | +/* | |
5635 | + * called from pfm_arch_init(), global initialization, called once | |
5636 | + */ | |
5637 | +int __init pfm_ia64_compat_init(void) | |
5638 | +{ | |
5639 | + /* | |
5640 | + * create /proc/perfmon | |
5641 | + */ | |
5642 | + perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL); | |
5643 | + if (perfmon_proc == NULL) { | |
5644 | + PFM_ERR("cannot create /proc entry, perfmon disabled"); | |
5645 | + return -1; | |
5646 | + } | |
5647 | + perfmon_proc->proc_fops = &pfm_proc_fops; | |
5648 | + return 0; | |
5649 | +} | |
5650 | --- /dev/null | |
5651 | +++ b/arch/ia64/perfmon/perfmon_default_smpl.c | |
5652 | @@ -0,0 +1,273 @@ | |
5653 | +/* | |
5654 | + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. | |
5655 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
5656 | + * | |
5657 | + * This file implements the old default sampling buffer format | |
5658 | + * for the Linux/ia64 perfmon-2 subsystem. This is for backward | |
5659 | + * compatibility only. use the new default format in perfmon/ | |
5660 | + * | |
5661 | + * This program is free software; you can redistribute it and/or | |
5662 | + * modify it under the terms of version 2 of the GNU General Public | |
5663 | + * License as published by the Free Software Foundation. | |
5664 | + * | |
5665 | + * This program is distributed in the hope that it will be useful, | |
5666 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5667 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5668 | + * General Public License for more details. | |
5669 | + * | |
5670 | + * You should have received a copy of the GNU General Public License | |
5671 | + * along with this program; if not, write to the Free Software | |
5672 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
5673 | + * 02111-1307 USA | |
5674 | + */ | |
5675 | +#include <linux/kernel.h> | |
5676 | +#include <linux/types.h> | |
5677 | +#include <linux/module.h> | |
5678 | +#include <linux/init.h> | |
5679 | +#include <linux/delay.h> | |
5680 | +#include <linux/smp.h> | |
5681 | +#include <linux/sysctl.h> | |
5682 | + | |
5683 | +#ifdef MODULE | |
5684 | +#define FMT_FLAGS 0 | |
5685 | +#else | |
5686 | +#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN | |
5687 | +#endif | |
5688 | + | |
5689 | +#include <linux/perfmon_kern.h> | |
5690 | +#include <asm/perfmon_default_smpl.h> | |
5691 | + | |
5692 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
5693 | +MODULE_DESCRIPTION("perfmon old default sampling format"); | |
5694 | +MODULE_LICENSE("GPL"); | |
5695 | + | |
5696 | +static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data) | |
5697 | +{ | |
5698 | + struct pfm_default_smpl_arg *arg = data; | |
5699 | + size_t min_buf_size; | |
5700 | + | |
5701 | + if (data == NULL) { | |
5702 | + PFM_DBG("no argument passed"); | |
5703 | + return -EINVAL; | |
5704 | + } | |
5705 | + | |
5706 | + /* | |
5707 | + * compute min buf size. All PMD are manipulated as 64bit entities | |
5708 | + */ | |
5709 | + min_buf_size = sizeof(struct pfm_default_smpl_hdr) | |
5710 | + + (sizeof(struct pfm_default_smpl_entry) + (npmds*sizeof(u64))); | |
5711 | + | |
5712 | + PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu " | |
5713 | + "buf_size=%lu CPU%d", flags, npmds, min_buf_size, | |
5714 | + arg->buf_size, smp_processor_id()); | |
5715 | + | |
5716 | + /* | |
5717 | + * must hold at least the buffer header + one minimally sized entry | |
5718 | + */ | |
5719 | + if (arg->buf_size < min_buf_size) | |
5720 | + return -EINVAL; | |
5721 | + | |
5722 | + return 0; | |
5723 | +} | |
5724 | + | |
5725 | +static int pfm_default_fmt_get_size(unsigned int flags, void *data, | |
5726 | + size_t *size) | |
5727 | +{ | |
5728 | + struct pfm_default_smpl_arg *arg = data; | |
5729 | + | |
5730 | + /* | |
5731 | + * size has been validated in default_validate | |
5732 | + */ | |
5733 | + *size = arg->buf_size; | |
5734 | + | |
5735 | + return 0; | |
5736 | +} | |
5737 | + | |
5738 | +static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf, | |
5739 | + u32 flags, u16 npmds, void *data) | |
5740 | +{ | |
5741 | + struct pfm_default_smpl_hdr *hdr; | |
5742 | + struct pfm_default_smpl_arg *arg = data; | |
5743 | + | |
5744 | + hdr = buf; | |
5745 | + | |
5746 | + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; | |
5747 | + hdr->hdr_buf_size = arg->buf_size; | |
5748 | + hdr->hdr_cur_offs = sizeof(*hdr); | |
5749 | + hdr->hdr_overflows = 0; | |
5750 | + hdr->hdr_count = 0; | |
5751 | + | |
5752 | + PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu " | |
5753 | + "hdr_version=%u cur_offs=%lu", | |
5754 | + buf, | |
5755 | + hdr->hdr_buf_size, | |
5756 | + sizeof(*hdr), | |
5757 | + hdr->hdr_version, | |
5758 | + hdr->hdr_cur_offs); | |
5759 | + | |
5760 | + return 0; | |
5761 | +} | |
5762 | + | |
5763 | +static int pfm_default_fmt_handler(struct pfm_context *ctx, | |
5764 | + unsigned long ip, u64 tstamp, void *data) | |
5765 | +{ | |
5766 | + struct pfm_default_smpl_hdr *hdr; | |
5767 | + struct pfm_default_smpl_entry *ent; | |
5768 | + void *cur, *last, *buf; | |
5769 | + u64 *e; | |
5770 | + size_t entry_size; | |
5771 | + u16 npmds, i, ovfl_pmd; | |
5772 | + struct pfm_ovfl_arg *arg; | |
5773 | + | |
5774 | + hdr = ctx->smpl_addr; | |
5775 | + arg = &ctx->ovfl_arg; | |
5776 | + | |
5777 | + buf = hdr; | |
5778 | + cur = buf+hdr->hdr_cur_offs; | |
5779 | + last = buf+hdr->hdr_buf_size; | |
5780 | + ovfl_pmd = arg->ovfl_pmd; | |
5781 | + | |
5782 | + /* | |
5783 | + * precheck for sanity | |
5784 | + */ | |
5785 | + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) | |
5786 | + goto full; | |
5787 | + | |
5788 | + npmds = arg->num_smpl_pmds; | |
5789 | + | |
5790 | + ent = cur; | |
5791 | + | |
5792 | + prefetch(arg->smpl_pmds_values); | |
5793 | + | |
5794 | + entry_size = sizeof(*ent) + (npmds << 3); | |
5795 | + | |
5796 | + /* position for first pmd */ | |
5797 | + e = (unsigned long *)(ent+1); | |
5798 | + | |
5799 | + hdr->hdr_count++; | |
5800 | + | |
5801 | + PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu " | |
5802 | + "ovfl_pmd=%d npmds=%u", | |
5803 | + hdr->hdr_count, | |
5804 | + cur, last, | |
5805 | + last-cur, | |
5806 | + ovfl_pmd, | |
5807 | + npmds); | |
5808 | + | |
5809 | + /* | |
5810 | + * current = task running at the time of the overflow. | |
5811 | + * | |
5812 | + * per-task mode: | |
5813 | + * - this is ususally the task being monitored. | |
5814 | + * Under certain conditions, it might be a different task | |
5815 | + * | |
5816 | + * system-wide: | |
5817 | + * - this is not necessarily the task controlling the session | |
5818 | + */ | |
5819 | + ent->pid = current->pid; | |
5820 | + ent->ovfl_pmd = ovfl_pmd; | |
5821 | + ent->last_reset_val = arg->pmd_last_reset; | |
5822 | + | |
5823 | + /* | |
5824 | + * where did the fault happen (includes slot number) | |
5825 | + */ | |
5826 | + ent->ip = ip; | |
5827 | + | |
5828 | + ent->tstamp = tstamp; | |
5829 | + ent->cpu = smp_processor_id(); | |
5830 | + ent->set = arg->active_set; | |
5831 | + ent->tgid = current->tgid; | |
5832 | + | |
5833 | + /* | |
5834 | + * selectively store PMDs in increasing index number | |
5835 | + */ | |
5836 | + if (npmds) { | |
5837 | + u64 *val = arg->smpl_pmds_values; | |
5838 | + for (i = 0; i < npmds; i++) | |
5839 | + *e++ = *val++; | |
5840 | + } | |
5841 | + | |
5842 | + /* | |
5843 | + * update position for next entry | |
5844 | + */ | |
5845 | + hdr->hdr_cur_offs += entry_size; | |
5846 | + cur += entry_size; | |
5847 | + | |
5848 | + /* | |
5849 | + * post check to avoid losing the last sample | |
5850 | + */ | |
5851 | + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) | |
5852 | + goto full; | |
5853 | + | |
5854 | + /* | |
5855 | + * reset before returning from interrupt handler | |
5856 | + */ | |
5857 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
5858 | + return 0; | |
5859 | +full: | |
5860 | + PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu", | |
5861 | + last-cur, hdr->hdr_count); | |
5862 | + | |
5863 | + /* | |
5864 | + * increment number of buffer overflow. | |
5865 | + * important to detect duplicate set of samples. | |
5866 | + */ | |
5867 | + hdr->hdr_overflows++; | |
5868 | + | |
5869 | + /* | |
5870 | + * request notification and masking of monitoring. | |
5871 | + * Notification is still subject to the overflowed | |
5872 | + */ | |
5873 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; | |
5874 | + | |
5875 | + return -ENOBUFS; /* we are full, sorry */ | |
5876 | +} | |
5877 | + | |
5878 | +static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf) | |
5879 | +{ | |
5880 | + struct pfm_default_smpl_hdr *hdr; | |
5881 | + | |
5882 | + hdr = buf; | |
5883 | + | |
5884 | + hdr->hdr_count = 0; | |
5885 | + hdr->hdr_cur_offs = sizeof(*hdr); | |
5886 | + | |
5887 | + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
5888 | + | |
5889 | + return 0; | |
5890 | +} | |
5891 | + | |
5892 | +static int pfm_default_fmt_exit(void *buf) | |
5893 | +{ | |
5894 | + return 0; | |
5895 | +} | |
5896 | + | |
5897 | +static struct pfm_smpl_fmt default_fmt = { | |
5898 | + .fmt_name = "default-old", | |
5899 | + .fmt_version = 0x10000, | |
5900 | + .fmt_arg_size = sizeof(struct pfm_default_smpl_arg), | |
5901 | + .fmt_validate = pfm_default_fmt_validate, | |
5902 | + .fmt_getsize = pfm_default_fmt_get_size, | |
5903 | + .fmt_init = pfm_default_fmt_init, | |
5904 | + .fmt_handler = pfm_default_fmt_handler, | |
5905 | + .fmt_restart = pfm_default_fmt_restart, | |
5906 | + .fmt_exit = pfm_default_fmt_exit, | |
5907 | + .fmt_flags = FMT_FLAGS, | |
5908 | + .owner = THIS_MODULE | |
5909 | +}; | |
5910 | + | |
5911 | +static int pfm_default_fmt_init_module(void) | |
5912 | +{ | |
5913 | + int ret; | |
5914 | + | |
5915 | + return pfm_fmt_register(&default_fmt); | |
5916 | + return ret; | |
5917 | +} | |
5918 | + | |
5919 | +static void pfm_default_fmt_cleanup_module(void) | |
5920 | +{ | |
5921 | + pfm_fmt_unregister(&default_fmt); | |
5922 | +} | |
5923 | + | |
5924 | +module_init(pfm_default_fmt_init_module); | |
5925 | +module_exit(pfm_default_fmt_cleanup_module); | |
5926 | --- /dev/null | |
5927 | +++ b/arch/ia64/perfmon/perfmon_generic.c | |
5928 | @@ -0,0 +1,148 @@ | |
5929 | +/* | |
5930 | + * This file contains the generic PMU register description tables | |
5931 | + * and pmc checker used by perfmon.c. | |
5932 | + * | |
5933 | + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. | |
5934 | + * contributed by Stephane Eranian <eranian@hpl.hp.com> | |
5935 | + * | |
5936 | + * This program is free software; you can redistribute it and/or | |
5937 | + * modify it under the terms of version 2 of the GNU General Public | |
5938 | + * License as published by the Free Software Foundation. | |
5939 | + * | |
5940 | + * This program is distributed in the hope that it will be useful, | |
5941 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5942 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5943 | + * General Public License for more details. | |
5944 | + * | |
5945 | + * You should have received a copy of the GNU General Public License | |
5946 | + * along with this program; if not, write to the Free Software | |
5947 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
5948 | + * 02111-1307 USA | |
5949 | + */ | |
5950 | +#include <linux/module.h> | |
5951 | +#include <linux/perfmon_kern.h> | |
5952 | +#include <asm/pal.h> | |
5953 | + | |
5954 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
5955 | +MODULE_DESCRIPTION("Generic IA-64 PMU description tables"); | |
5956 | +MODULE_LICENSE("GPL"); | |
5957 | + | |
5958 | +#define RDEP(x) (1UL << (x)) | |
5959 | + | |
5960 | +#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)) | |
5961 | +#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL) | |
5962 | +#define PFM_IA64GEN_NO64 (1UL<<5) | |
5963 | + | |
5964 | +/* forward declaration */ | |
5965 | +static struct pfm_pmu_config pfm_ia64gen_pmu_conf; | |
5966 | + | |
5967 | +static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info = { | |
5968 | + .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,}, | |
5969 | +}; | |
5970 | + | |
5971 | +static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[] = { | |
5972 | +/* pmc0 */ PMX_NA, | |
5973 | +/* pmc1 */ PMX_NA, | |
5974 | +/* pmc2 */ PMX_NA, | |
5975 | +/* pmc3 */ PMX_NA, | |
5976 | +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4), | |
5977 | +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5), | |
5978 | +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6), | |
5979 | +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7) | |
5980 | +}; | |
5981 | +#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc) | |
5982 | + | |
5983 | +static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[] = { | |
5984 | +/* pmd0 */ PMX_NA, | |
5985 | +/* pmd1 */ PMX_NA, | |
5986 | +/* pmd2 */ PMX_NA, | |
5987 | +/* pmd3 */ PMX_NA, | |
5988 | +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), | |
5989 | +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), | |
5990 | +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), | |
5991 | +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7) | |
5992 | +}; | |
5993 | +#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc) | |
5994 | + | |
5995 | +static int pfm_ia64gen_pmc_check(struct pfm_context *ctx, | |
5996 | + struct pfm_event_set *set, | |
5997 | + struct pfarg_pmc *req) | |
5998 | +{ | |
5999 | +#define PFM_IA64GEN_PMC_PM_POS6 (1UL<<6) | |
6000 | + u64 tmpval; | |
6001 | + int is_system; | |
6002 | + | |
6003 | + is_system = ctx->flags.system; | |
6004 | + tmpval = req->reg_value; | |
6005 | + | |
6006 | + switch (req->reg_num) { | |
6007 | + case 4: | |
6008 | + case 5: | |
6009 | + case 6: | |
6010 | + case 7: | |
6011 | + /* set pmc.oi for 64-bit emulation */ | |
6012 | + tmpval |= 1UL << 5; | |
6013 | + | |
6014 | + if (is_system) | |
6015 | + tmpval |= PFM_IA64GEN_PMC_PM_POS6; | |
6016 | + else | |
6017 | + tmpval &= ~PFM_IA64GEN_PMC_PM_POS6; | |
6018 | + break; | |
6019 | + | |
6020 | + } | |
6021 | + req->reg_value = tmpval; | |
6022 | + | |
6023 | + return 0; | |
6024 | +} | |
6025 | + | |
6026 | +/* | |
6027 | + * matches anything | |
6028 | + */ | |
6029 | +static int pfm_ia64gen_probe_pmu(void) | |
6030 | +{ | |
6031 | + u64 pm_buffer[16]; | |
6032 | + pal_perf_mon_info_u_t pm_info; | |
6033 | + | |
6034 | + /* | |
6035 | + * call PAL_PERFMON_INFO to retrieve counter width which | |
6036 | + * is implementation specific | |
6037 | + */ | |
6038 | + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info)) | |
6039 | + return -1; | |
6040 | + | |
6041 | + pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width; | |
6042 | + | |
6043 | + return 0; | |
6044 | +} | |
6045 | + | |
6046 | +/* | |
6047 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
6048 | + */ | |
6049 | +static struct pfm_pmu_config pfm_ia64gen_pmu_conf = { | |
6050 | + .pmu_name = "Generic IA-64", | |
6051 | + .counter_width = 0, /* computed from PAL_PERFMON_INFO */ | |
6052 | + .pmd_desc = pfm_ia64gen_pmd_desc, | |
6053 | + .pmc_desc = pfm_ia64gen_pmc_desc, | |
6054 | + .probe_pmu = pfm_ia64gen_probe_pmu, | |
6055 | + .num_pmc_entries = PFM_IA64GEN_NUM_PMCS, | |
6056 | + .num_pmd_entries = PFM_IA64GEN_NUM_PMDS, | |
6057 | + .pmc_write_check = pfm_ia64gen_pmc_check, | |
6058 | + .version = "1.0", | |
6059 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
6060 | + .owner = THIS_MODULE, | |
6061 | + .pmu_info = &pfm_ia64gen_pmu_info | |
6062 | + /* no read/write checkers */ | |
6063 | +}; | |
6064 | + | |
6065 | +static int __init pfm_gen_pmu_init_module(void) | |
6066 | +{ | |
6067 | + return pfm_pmu_register(&pfm_ia64gen_pmu_conf); | |
6068 | +} | |
6069 | + | |
6070 | +static void __exit pfm_gen_pmu_cleanup_module(void) | |
6071 | +{ | |
6072 | + pfm_pmu_unregister(&pfm_ia64gen_pmu_conf); | |
6073 | +} | |
6074 | + | |
6075 | +module_init(pfm_gen_pmu_init_module); | |
6076 | +module_exit(pfm_gen_pmu_cleanup_module); | |
6077 | --- /dev/null | |
6078 | +++ b/arch/ia64/perfmon/perfmon_itanium.c | |
6079 | @@ -0,0 +1,232 @@ | |
6080 | +/* | |
6081 | + * This file contains the Itanium PMU register description tables | |
6082 | + * and pmc checker used by perfmon.c. | |
6083 | + * | |
6084 | + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. | |
6085 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
6086 | + * | |
6087 | + * This program is free software; you can redistribute it and/or | |
6088 | + * modify it under the terms of version 2 of the GNU General Public | |
6089 | + * License as published by the Free Software Foundation. | |
6090 | + * | |
6091 | + * This program is distributed in the hope that it will be useful, | |
6092 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
6093 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
6094 | + * General Public License for more details. | |
6095 | + * | |
6096 | + * You should have received a copy of the GNU General Public License | |
6097 | + * along with this program; if not, write to the Free Software | |
6098 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
6099 | + * 02111-1307 USA | |
6100 | + */ | |
6101 | +#include <linux/module.h> | |
6102 | +#include <linux/perfmon_kern.h> | |
6103 | + | |
6104 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
6105 | +MODULE_DESCRIPTION("Itanium (Merced) PMU description tables"); | |
6106 | +MODULE_LICENSE("GPL"); | |
6107 | + | |
6108 | +#define RDEP(x) (1ULL << (x)) | |
6109 | + | |
6110 | +#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ | |
6111 | + RDEP(12)) | |
6112 | + | |
6113 | +#define PFM_ITA_NO64 (1ULL<<5) | |
6114 | + | |
6115 | +static struct pfm_arch_pmu_info pfm_ita_pmu_info = { | |
6116 | + .mask_pmcs = {PFM_ITA_MASK_PMCS,}, | |
6117 | +}; | |
6118 | +/* reserved bits are 1 in the mask */ | |
6119 | +#define PFM_ITA_RSVD 0xfffffffffc8000a0UL | |
6120 | +/* | |
6121 | + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using | |
6122 | + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information | |
6123 | + * but this is fine because they are handled separately in the IA-64 specific | |
6124 | + * code. | |
6125 | + */ | |
6126 | +static struct pfm_regmap_desc pfm_ita_pmc_desc[] = { | |
6127 | +/* pmc0 */ PMX_NA, | |
6128 | +/* pmc1 */ PMX_NA, | |
6129 | +/* pmc2 */ PMX_NA, | |
6130 | +/* pmc3 */ PMX_NA, | |
6131 | +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4), | |
6132 | +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5), | |
6133 | +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6), | |
6134 | +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7), | |
6135 | +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8), | |
6136 | +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9), | |
6137 | +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10), | |
6138 | +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11), | |
6139 | +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12), | |
6140 | +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13), | |
6141 | +/* pmc14 */ PMX_NA, | |
6142 | +/* pmc15 */ PMX_NA, | |
6143 | +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6144 | +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6145 | +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6146 | +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6147 | +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6148 | +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6149 | +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6150 | +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6151 | +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6152 | +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6153 | +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6154 | +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6155 | +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6156 | +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6157 | +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6158 | +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6159 | +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6160 | +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6161 | +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6162 | +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6163 | +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6164 | +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6165 | +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6166 | +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6167 | +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6168 | +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6169 | +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6170 | +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6171 | +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6172 | +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6173 | +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), | |
6174 | +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), | |
6175 | +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), | |
6176 | +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), | |
6177 | +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), | |
6178 | +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), | |
6179 | +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), | |
6180 | +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), | |
6181 | +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), | |
6182 | +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), | |
6183 | +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), | |
6184 | +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), | |
6185 | +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), | |
6186 | +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), | |
6187 | +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), | |
6188 | +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) | |
6189 | +}; | |
6190 | +#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc) | |
6191 | + | |
6192 | +static struct pfm_regmap_desc pfm_ita_pmd_desc[] = { | |
6193 | +/* pmd0 */ PMD_DP(PFM_REG_I , "PMD0", 0, 1ull << 10), | |
6194 | +/* pmd1 */ PMD_DP(PFM_REG_I , "PMD1", 1, 1ull << 10), | |
6195 | +/* pmd2 */ PMD_DP(PFM_REG_I , "PMD2", 2, 1ull << 11), | |
6196 | +/* pmd3 */ PMD_DP(PFM_REG_I , "PMD3", 3, 1ull << 11), | |
6197 | +/* pmd4 */ PMD_DP(PFM_REG_C , "PMD4", 4, 1ull << 4), | |
6198 | +/* pmd5 */ PMD_DP(PFM_REG_C , "PMD5", 5, 1ull << 5), | |
6199 | +/* pmd6 */ PMD_DP(PFM_REG_C , "PMD6", 6, 1ull << 6), | |
6200 | +/* pmd7 */ PMD_DP(PFM_REG_C , "PMD7", 7, 1ull << 7), | |
6201 | +/* pmd8 */ PMD_DP(PFM_REG_I , "PMD8", 8, 1ull << 12), | |
6202 | +/* pmd9 */ PMD_DP(PFM_REG_I , "PMD9", 9, 1ull << 12), | |
6203 | +/* pmd10 */ PMD_DP(PFM_REG_I , "PMD10", 10, 1ull << 12), | |
6204 | +/* pmd11 */ PMD_DP(PFM_REG_I , "PMD11", 11, 1ull << 12), | |
6205 | +/* pmd12 */ PMD_DP(PFM_REG_I , "PMD12", 12, 1ull << 12), | |
6206 | +/* pmd13 */ PMD_DP(PFM_REG_I , "PMD13", 13, 1ull << 12), | |
6207 | +/* pmd14 */ PMD_DP(PFM_REG_I , "PMD14", 14, 1ull << 12), | |
6208 | +/* pmd15 */ PMD_DP(PFM_REG_I , "PMD15", 15, 1ull << 12), | |
6209 | +/* pmd16 */ PMD_DP(PFM_REG_I , "PMD16", 16, 1ull << 12), | |
6210 | +/* pmd17 */ PMD_DP(PFM_REG_I , "PMD17", 17, 1ull << 11) | |
6211 | +}; | |
6212 | +#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc) | |
6213 | + | |
6214 | +static int pfm_ita_pmc_check(struct pfm_context *ctx, | |
6215 | + struct pfm_event_set *set, | |
6216 | + struct pfarg_pmc *req) | |
6217 | +{ | |
6218 | +#define PFM_ITA_PMC_PM_POS6 (1UL<<6) | |
6219 | + struct pfm_arch_context *ctx_arch; | |
6220 | + u64 tmpval; | |
6221 | + u16 cnum; | |
6222 | + int ret = 0, is_system; | |
6223 | + | |
6224 | + tmpval = req->reg_value; | |
6225 | + cnum = req->reg_num; | |
6226 | + ctx_arch = pfm_ctx_arch(ctx); | |
6227 | + is_system = ctx->flags.system; | |
6228 | + | |
6229 | + switch (cnum) { | |
6230 | + case 4: | |
6231 | + case 5: | |
6232 | + case 6: | |
6233 | + case 7: | |
6234 | + case 10: | |
6235 | + case 11: | |
6236 | + case 12: | |
6237 | + if (is_system) | |
6238 | + tmpval |= PFM_ITA_PMC_PM_POS6; | |
6239 | + else | |
6240 | + tmpval &= ~PFM_ITA_PMC_PM_POS6; | |
6241 | + break; | |
6242 | + } | |
6243 | + | |
6244 | + /* | |
6245 | + * we must clear the (instruction) debug registers if pmc13.ta bit is | |
6246 | + * cleared before they are written (fl_using_dbreg==0) to avoid | |
6247 | + * picking up stale information. | |
6248 | + */ | |
6249 | + if (cnum == 13 && ((tmpval & 0x1) == 0) | |
6250 | + && ctx_arch->flags.use_dbr == 0) { | |
6251 | + PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr"); | |
6252 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6253 | + if (ret) | |
6254 | + return ret; | |
6255 | + } | |
6256 | + | |
6257 | + /* | |
6258 | + * we must clear the (data) debug registers if pmc11.pt bit is cleared | |
6259 | + * before they are written (fl_using_dbreg==0) to avoid picking up | |
6260 | + * stale information. | |
6261 | + */ | |
6262 | + if (cnum == 11 && ((tmpval >> 28) & 0x1) == 0 | |
6263 | + && ctx_arch->flags.use_dbr == 0) { | |
6264 | + PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr"); | |
6265 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6266 | + if (ret) | |
6267 | + return ret; | |
6268 | + } | |
6269 | + | |
6270 | + req->reg_value = tmpval; | |
6271 | + | |
6272 | + return 0; | |
6273 | +} | |
6274 | + | |
6275 | +static int pfm_ita_probe_pmu(void) | |
6276 | +{ | |
6277 | + return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim") | |
6278 | + ? 0 : -1; | |
6279 | +} | |
6280 | + | |
6281 | +/* | |
6282 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
6283 | + */ | |
6284 | +static struct pfm_pmu_config pfm_ita_pmu_conf = { | |
6285 | + .pmu_name = "Itanium", | |
6286 | + .counter_width = 32, | |
6287 | + .pmd_desc = pfm_ita_pmd_desc, | |
6288 | + .pmc_desc = pfm_ita_pmc_desc, | |
6289 | + .pmc_write_check = pfm_ita_pmc_check, | |
6290 | + .num_pmc_entries = PFM_ITA_NUM_PMCS, | |
6291 | + .num_pmd_entries = PFM_ITA_NUM_PMDS, | |
6292 | + .probe_pmu = pfm_ita_probe_pmu, | |
6293 | + .version = "1.0", | |
6294 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
6295 | + .owner = THIS_MODULE, | |
6296 | + .pmu_info = &pfm_ita_pmu_info | |
6297 | +}; | |
6298 | + | |
6299 | +static int __init pfm_ita_pmu_init_module(void) | |
6300 | +{ | |
6301 | + return pfm_pmu_register(&pfm_ita_pmu_conf); | |
6302 | +} | |
6303 | + | |
6304 | +static void __exit pfm_ita_pmu_cleanup_module(void) | |
6305 | +{ | |
6306 | + pfm_pmu_unregister(&pfm_ita_pmu_conf); | |
6307 | +} | |
6308 | + | |
6309 | +module_init(pfm_ita_pmu_init_module); | |
6310 | +module_exit(pfm_ita_pmu_cleanup_module); | |
6311 | + | |
6312 | --- /dev/null | |
6313 | +++ b/arch/ia64/perfmon/perfmon_mckinley.c | |
6314 | @@ -0,0 +1,290 @@ | |
6315 | +/* | |
6316 | + * This file contains the McKinley PMU register description tables | |
6317 | + * and pmc checker used by perfmon.c. | |
6318 | + * | |
6319 | + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. | |
6320 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
6321 | + * | |
6322 | + * This program is free software; you can redistribute it and/or | |
6323 | + * modify it under the terms of version 2 of the GNU General Public | |
6324 | + * License as published by the Free Software Foundation. | |
6325 | + * | |
6326 | + * This program is distributed in the hope that it will be useful, | |
6327 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
6328 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
6329 | + * General Public License for more details. | |
6330 | + * | |
6331 | + * You should have received a copy of the GNU General Public License | |
6332 | + * along with this program; if not, write to the Free Software | |
6333 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
6334 | + * 02111-1307 USA | |
6335 | + */ | |
6336 | +#include <linux/module.h> | |
6337 | +#include <linux/perfmon_kern.h> | |
6338 | + | |
6339 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
6340 | +MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables"); | |
6341 | +MODULE_LICENSE("GPL"); | |
6342 | + | |
6343 | +#define RDEP(x) (1UL << (x)) | |
6344 | + | |
6345 | +#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ | |
6346 | + RDEP(12)) | |
6347 | + | |
6348 | +#define PFM_MCK_NO64 (1UL<<5) | |
6349 | + | |
6350 | +static struct pfm_arch_pmu_info pfm_mck_pmu_info = { | |
6351 | + .mask_pmcs = {PFM_MCK_MASK_PMCS,}, | |
6352 | +}; | |
6353 | + | |
6354 | +/* reserved bits are 1 in the mask */ | |
6355 | +#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL | |
6356 | + | |
6357 | +/* | |
6358 | + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using | |
6359 | + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information | |
6360 | + * but this is fine because they are handled separately in the IA-64 specific | |
6361 | + * code. | |
6362 | + */ | |
6363 | +static struct pfm_regmap_desc pfm_mck_pmc_desc[] = { | |
6364 | +/* pmc0 */ PMX_NA, | |
6365 | +/* pmc1 */ PMX_NA, | |
6366 | +/* pmc2 */ PMX_NA, | |
6367 | +/* pmc3 */ PMX_NA, | |
6368 | +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4), | |
6369 | +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5), | |
6370 | +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6), | |
6371 | +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7), | |
6372 | +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8), | |
6373 | +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9), | |
6374 | +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10), | |
6375 | +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11), | |
6376 | +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12), | |
6377 | +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13), | |
6378 | +/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14), | |
6379 | +/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15), | |
6380 | +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6381 | +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6382 | +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6383 | +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6384 | +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6385 | +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6386 | +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6387 | +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6388 | +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6389 | +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6390 | +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6391 | +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6392 | +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6393 | +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6394 | +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6395 | +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6396 | +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6397 | +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6398 | +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6399 | +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6400 | +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6401 | +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6402 | +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6403 | +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6404 | +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6405 | +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6406 | +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6407 | +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6408 | +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6409 | +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6410 | +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), | |
6411 | +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), | |
6412 | +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), | |
6413 | +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), | |
6414 | +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), | |
6415 | +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), | |
6416 | +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), | |
6417 | +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), | |
6418 | +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), | |
6419 | +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), | |
6420 | +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), | |
6421 | +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), | |
6422 | +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), | |
6423 | +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), | |
6424 | +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), | |
6425 | +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) | |
6426 | +}; | |
6427 | +#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc) | |
6428 | + | |
6429 | +static struct pfm_regmap_desc pfm_mck_pmd_desc[] = { | |
6430 | +/* pmd0 */ PMD_DP(PFM_REG_I, "PMD0", 0, 1ull << 10), | |
6431 | +/* pmd1 */ PMD_DP(PFM_REG_I, "PMD1", 1, 1ull << 10), | |
6432 | +/* pmd2 */ PMD_DP(PFM_REG_I, "PMD2", 2, 1ull << 11), | |
6433 | +/* pmd3 */ PMD_DP(PFM_REG_I, "PMD3", 3, 1ull << 11), | |
6434 | +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), | |
6435 | +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), | |
6436 | +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), | |
6437 | +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7), | |
6438 | +/* pmd8 */ PMD_DP(PFM_REG_I, "PMD8", 8, 1ull << 12), | |
6439 | +/* pmd9 */ PMD_DP(PFM_REG_I, "PMD9", 9, 1ull << 12), | |
6440 | +/* pmd10 */ PMD_DP(PFM_REG_I, "PMD10", 10, 1ull << 12), | |
6441 | +/* pmd11 */ PMD_DP(PFM_REG_I, "PMD11", 11, 1ull << 12), | |
6442 | +/* pmd12 */ PMD_DP(PFM_REG_I, "PMD12", 12, 1ull << 12), | |
6443 | +/* pmd13 */ PMD_DP(PFM_REG_I, "PMD13", 13, 1ull << 12), | |
6444 | +/* pmd14 */ PMD_DP(PFM_REG_I, "PMD14", 14, 1ull << 12), | |
6445 | +/* pmd15 */ PMD_DP(PFM_REG_I, "PMD15", 15, 1ull << 12), | |
6446 | +/* pmd16 */ PMD_DP(PFM_REG_I, "PMD16", 16, 1ull << 12), | |
6447 | +/* pmd17 */ PMD_DP(PFM_REG_I, "PMD17", 17, 1ull << 11) | |
6448 | +}; | |
6449 | +#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc) | |
6450 | + | |
6451 | +static int pfm_mck_pmc_check(struct pfm_context *ctx, | |
6452 | + struct pfm_event_set *set, | |
6453 | + struct pfarg_pmc *req) | |
6454 | +{ | |
6455 | + struct pfm_arch_context *ctx_arch; | |
6456 | + u64 val8 = 0, val14 = 0, val13 = 0; | |
6457 | + u64 tmpval; | |
6458 | + u16 cnum; | |
6459 | + int ret = 0, check_case1 = 0; | |
6460 | + int is_system; | |
6461 | + | |
6462 | + tmpval = req->reg_value; | |
6463 | + cnum = req->reg_num; | |
6464 | + ctx_arch = pfm_ctx_arch(ctx); | |
6465 | + is_system = ctx->flags.system; | |
6466 | + | |
6467 | +#define PFM_MCK_PMC_PM_POS6 (1UL<<6) | |
6468 | +#define PFM_MCK_PMC_PM_POS4 (1UL<<4) | |
6469 | + | |
6470 | + switch (cnum) { | |
6471 | + case 4: | |
6472 | + case 5: | |
6473 | + case 6: | |
6474 | + case 7: | |
6475 | + case 11: | |
6476 | + case 12: | |
6477 | + if (is_system) | |
6478 | + tmpval |= PFM_MCK_PMC_PM_POS6; | |
6479 | + else | |
6480 | + tmpval &= ~PFM_MCK_PMC_PM_POS6; | |
6481 | + break; | |
6482 | + | |
6483 | + case 8: | |
6484 | + val8 = tmpval; | |
6485 | + val13 = set->pmcs[13]; | |
6486 | + val14 = set->pmcs[14]; | |
6487 | + check_case1 = 1; | |
6488 | + break; | |
6489 | + | |
6490 | + case 10: | |
6491 | + if (is_system) | |
6492 | + tmpval |= PFM_MCK_PMC_PM_POS4; | |
6493 | + else | |
6494 | + tmpval &= ~PFM_MCK_PMC_PM_POS4; | |
6495 | + break; | |
6496 | + | |
6497 | + case 13: | |
6498 | + val8 = set->pmcs[8]; | |
6499 | + val13 = tmpval; | |
6500 | + val14 = set->pmcs[14]; | |
6501 | + check_case1 = 1; | |
6502 | + break; | |
6503 | + | |
6504 | + case 14: | |
6505 | + val8 = set->pmcs[8]; | |
6506 | + val13 = set->pmcs[13]; | |
6507 | + val14 = tmpval; | |
6508 | + check_case1 = 1; | |
6509 | + break; | |
6510 | + } | |
6511 | + | |
6512 | + /* | |
6513 | + * check illegal configuration which can produce inconsistencies | |
6514 | + * in tagging i-side events in L1D and L2 caches | |
6515 | + */ | |
6516 | + if (check_case1) { | |
6517 | + ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0)) | |
6518 | + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) | |
6519 | + || (((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); | |
6520 | + | |
6521 | + if (ret) { | |
6522 | + PFM_DBG("perfmon: invalid config pmc8=0x%lx " | |
6523 | + "pmc13=0x%lx pmc14=0x%lx", | |
6524 | + val8, val13, val14); | |
6525 | + return -EINVAL; | |
6526 | + } | |
6527 | + } | |
6528 | + | |
6529 | + /* | |
6530 | + * check if configuration implicitely activates the use of | |
6531 | + * the debug registers. If true, then we ensure that this is | |
6532 | + * possible and that we do not pick up stale value in the HW | |
6533 | + * registers. | |
6534 | + * | |
6535 | + * We postpone the checks of pmc13 and pmc14 to avoid side effects | |
6536 | + * in case of errors | |
6537 | + */ | |
6538 | + | |
6539 | + /* | |
6540 | + * pmc13 is "active" if: | |
6541 | + * one of the pmc13.cfg_dbrpXX field is different from 0x3 | |
6542 | + * AND | |
6543 | + * at the corresponding pmc13.ena_dbrpXX is set. | |
6544 | + */ | |
6545 | + if (cnum == 13 && (tmpval & 0x1e00000000000UL) | |
6546 | + && (tmpval & 0x18181818UL) != 0x18181818UL | |
6547 | + && ctx_arch->flags.use_dbr == 0) { | |
6548 | + PFM_DBG("pmc13=0x%lx active", tmpval); | |
6549 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6550 | + if (ret) | |
6551 | + return ret; | |
6552 | + } | |
6553 | + | |
6554 | + /* | |
6555 | + * if any pmc14.ibrpX bit is enabled we must clear the ibrs | |
6556 | + */ | |
6557 | + if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL) | |
6558 | + && ctx_arch->flags.use_dbr == 0) { | |
6559 | + PFM_DBG("pmc14=0x%lx active", tmpval); | |
6560 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6561 | + if (ret) | |
6562 | + return ret; | |
6563 | + } | |
6564 | + | |
6565 | + req->reg_value = tmpval; | |
6566 | + | |
6567 | + return 0; | |
6568 | +} | |
6569 | + | |
6570 | +static int pfm_mck_probe_pmu(void) | |
6571 | +{ | |
6572 | + return local_cpu_data->family == 0x1f ? 0 : -1; | |
6573 | +} | |
6574 | + | |
6575 | +/* | |
6576 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
6577 | + */ | |
6578 | +static struct pfm_pmu_config pfm_mck_pmu_conf = { | |
6579 | + .pmu_name = "Itanium 2", | |
6580 | + .counter_width = 47, | |
6581 | + .pmd_desc = pfm_mck_pmd_desc, | |
6582 | + .pmc_desc = pfm_mck_pmc_desc, | |
6583 | + .pmc_write_check = pfm_mck_pmc_check, | |
6584 | + .num_pmc_entries = PFM_MCK_NUM_PMCS, | |
6585 | + .num_pmd_entries = PFM_MCK_NUM_PMDS, | |
6586 | + .probe_pmu = pfm_mck_probe_pmu, | |
6587 | + .version = "1.0", | |
6588 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
6589 | + .owner = THIS_MODULE, | |
6590 | + .pmu_info = &pfm_mck_pmu_info, | |
6591 | +}; | |
6592 | + | |
6593 | +static int __init pfm_mck_pmu_init_module(void) | |
6594 | +{ | |
6595 | + return pfm_pmu_register(&pfm_mck_pmu_conf); | |
6596 | +} | |
6597 | + | |
6598 | +static void __exit pfm_mck_pmu_cleanup_module(void) | |
6599 | +{ | |
6600 | + pfm_pmu_unregister(&pfm_mck_pmu_conf); | |
6601 | +} | |
6602 | + | |
6603 | +module_init(pfm_mck_pmu_init_module); | |
6604 | +module_exit(pfm_mck_pmu_cleanup_module); | |
6605 | --- /dev/null | |
6606 | +++ b/arch/ia64/perfmon/perfmon_montecito.c | |
6607 | @@ -0,0 +1,412 @@ | |
6608 | +/* | |
6609 | + * This file contains the McKinley PMU register description tables | |
6610 | + * and pmc checker used by perfmon.c. | |
6611 | + * | |
6612 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
6613 | + * Contributed Stephane Eranian <eranian@hpl.hp.com> | |
6614 | + * | |
6615 | + * This program is free software; you can redistribute it and/or | |
6616 | + * modify it under the terms of version 2 of the GNU General Public | |
6617 | + * License as published by the Free Software Foundation. | |
6618 | + * | |
6619 | + * This program is distributed in the hope that it will be useful, | |
6620 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
6621 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
6622 | + * General Public License for more details. | |
6623 | + * | |
6624 | + * You should have received a copy of the GNU General Public License | |
6625 | + * along with this program; if not, write to the Free Software | |
6626 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
6627 | + * 02111-1307 USA | |
6628 | + */ | |
6629 | +#include <linux/module.h> | |
6630 | +#include <linux/smp.h> | |
6631 | +#include <linux/perfmon_kern.h> | |
6632 | + | |
6633 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
6634 | +MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table"); | |
6635 | +MODULE_LICENSE("GPL"); | |
6636 | + | |
6637 | +#define RDEP(x) (1UL << (x)) | |
6638 | + | |
6639 | +#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\ | |
6640 | + RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\ | |
6641 | + RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\ | |
6642 | + RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42)) | |
6643 | + | |
6644 | +#define PFM_MONT_NO64 (1UL<<5) | |
6645 | + | |
6646 | +static struct pfm_arch_pmu_info pfm_mont_pmu_info = { | |
6647 | + .mask_pmcs = {PFM_MONT_MASK_PMCS,}, | |
6648 | +}; | |
6649 | + | |
6650 | +#define PFM_MONT_RSVD 0xffffffff838000a0UL | |
6651 | +/* | |
6652 | + * | |
6653 | + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using | |
6654 | + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information | |
6655 | + * but this is fine because they are handled separately in the IA-64 specific | |
6656 | + * code. | |
6657 | + * | |
6658 | + * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only) | |
6659 | + */ | |
6660 | +static struct pfm_regmap_desc pfm_mont_pmc_desc[] = { | |
6661 | +/* pmc0 */ PMX_NA, | |
6662 | +/* pmc1 */ PMX_NA, | |
6663 | +/* pmc2 */ PMX_NA, | |
6664 | +/* pmc3 */ PMX_NA, | |
6665 | +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4), | |
6666 | +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5), | |
6667 | +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6), | |
6668 | +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7), | |
6669 | +/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8), | |
6670 | +/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9), | |
6671 | +/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10), | |
6672 | +/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11), | |
6673 | +/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12), | |
6674 | +/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13), | |
6675 | +/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14), | |
6676 | +/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15), | |
6677 | +/* pmc16 */ PMX_NA, | |
6678 | +/* pmc17 */ PMX_NA, | |
6679 | +/* pmc18 */ PMX_NA, | |
6680 | +/* pmc19 */ PMX_NA, | |
6681 | +/* pmc20 */ PMX_NA, | |
6682 | +/* pmc21 */ PMX_NA, | |
6683 | +/* pmc22 */ PMX_NA, | |
6684 | +/* pmc23 */ PMX_NA, | |
6685 | +/* pmc24 */ PMX_NA, | |
6686 | +/* pmc25 */ PMX_NA, | |
6687 | +/* pmc26 */ PMX_NA, | |
6688 | +/* pmc27 */ PMX_NA, | |
6689 | +/* pmc28 */ PMX_NA, | |
6690 | +/* pmc29 */ PMX_NA, | |
6691 | +/* pmc30 */ PMX_NA, | |
6692 | +/* pmc31 */ PMX_NA, | |
6693 | +/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32), | |
6694 | +/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33), | |
6695 | +/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34), | |
6696 | +/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35), | |
6697 | +/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36), | |
6698 | +/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37), | |
6699 | +/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38), | |
6700 | +/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39), | |
6701 | +/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40), | |
6702 | +/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41), | |
6703 | +/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42), | |
6704 | +/* pmc43 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6705 | +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6706 | +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6707 | +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6708 | +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6709 | +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6710 | +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6711 | +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6712 | +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6713 | +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6714 | +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6715 | +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6716 | +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6717 | +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6718 | +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6719 | +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6720 | +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6721 | +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6722 | +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6723 | +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6724 | +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6725 | +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6726 | +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6727 | +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6728 | +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6729 | +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6730 | +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
6731 | +/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0), | |
6732 | +/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1), | |
6733 | +/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2), | |
6734 | +/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3), | |
6735 | +/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4), | |
6736 | +/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5), | |
6737 | +/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6), | |
6738 | +/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7), | |
6739 | +/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0), | |
6740 | +/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1), | |
6741 | +/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2), | |
6742 | +/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3), | |
6743 | +/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4), | |
6744 | +/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5), | |
6745 | +/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6), | |
6746 | +/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7) | |
6747 | +}; | |
6748 | +#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc) | |
6749 | + | |
6750 | +static struct pfm_regmap_desc pfm_mont_pmd_desc[] = { | |
6751 | +/* pmd0 */ PMX_NA, | |
6752 | +/* pmd1 */ PMX_NA, | |
6753 | +/* pmd2 */ PMX_NA, | |
6754 | +/* pmd3 */ PMX_NA, | |
6755 | +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), | |
6756 | +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), | |
6757 | +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), | |
6758 | +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7), | |
6759 | +/* pmd8 */ PMD_DP(PFM_REG_C, "PMD8", 8, 1ull << 8), | |
6760 | +/* pmd9 */ PMD_DP(PFM_REG_C, "PMD9", 9, 1ull << 9), | |
6761 | +/* pmd10 */ PMD_DP(PFM_REG_C, "PMD10", 10, 1ull << 10), | |
6762 | +/* pmd11 */ PMD_DP(PFM_REG_C, "PMD11", 11, 1ull << 11), | |
6763 | +/* pmd12 */ PMD_DP(PFM_REG_C, "PMD12", 12, 1ull << 12), | |
6764 | +/* pmd13 */ PMD_DP(PFM_REG_C, "PMD13", 13, 1ull << 13), | |
6765 | +/* pmd14 */ PMD_DP(PFM_REG_C, "PMD14", 14, 1ull << 14), | |
6766 | +/* pmd15 */ PMD_DP(PFM_REG_C, "PMD15", 15, 1ull << 15), | |
6767 | +/* pmd16 */ PMX_NA, | |
6768 | +/* pmd17 */ PMX_NA, | |
6769 | +/* pmd18 */ PMX_NA, | |
6770 | +/* pmd19 */ PMX_NA, | |
6771 | +/* pmd20 */ PMX_NA, | |
6772 | +/* pmd21 */ PMX_NA, | |
6773 | +/* pmd22 */ PMX_NA, | |
6774 | +/* pmd23 */ PMX_NA, | |
6775 | +/* pmd24 */ PMX_NA, | |
6776 | +/* pmd25 */ PMX_NA, | |
6777 | +/* pmd26 */ PMX_NA, | |
6778 | +/* pmd27 */ PMX_NA, | |
6779 | +/* pmd28 */ PMX_NA, | |
6780 | +/* pmd29 */ PMX_NA, | |
6781 | +/* pmd30 */ PMX_NA, | |
6782 | +/* pmd31 */ PMX_NA, | |
6783 | +/* pmd32 */ PMD_DP(PFM_REG_I, "PMD32", 32, 1ull << 40), | |
6784 | +/* pmd33 */ PMD_DP(PFM_REG_I, "PMD33", 33, 1ull << 40), | |
6785 | +/* pmd34 */ PMD_DP(PFM_REG_I, "PMD34", 34, 1ull << 37), | |
6786 | +/* pmd35 */ PMD_DP(PFM_REG_I, "PMD35", 35, 1ull << 37), | |
6787 | +/* pmd36 */ PMD_DP(PFM_REG_I, "PMD36", 36, 1ull << 40), | |
6788 | +/* pmd37 */ PMX_NA, | |
6789 | +/* pmd38 */ PMD_DP(PFM_REG_I, "PMD38", 38, (1ull<<39)|(1ull<<42)), | |
6790 | +/* pmd39 */ PMD_DP(PFM_REG_I, "PMD39", 39, (1ull<<39)|(1ull<<42)), | |
6791 | +/* pmd40 */ PMX_NA, | |
6792 | +/* pmd41 */ PMX_NA, | |
6793 | +/* pmd42 */ PMX_NA, | |
6794 | +/* pmd43 */ PMX_NA, | |
6795 | +/* pmd44 */ PMX_NA, | |
6796 | +/* pmd45 */ PMX_NA, | |
6797 | +/* pmd46 */ PMX_NA, | |
6798 | +/* pmd47 */ PMX_NA, | |
6799 | +/* pmd48 */ PMD_DP(PFM_REG_I, "PMD48", 48, (1ull<<39)|(1ull<<42)), | |
6800 | +/* pmd49 */ PMD_DP(PFM_REG_I, "PMD49", 49, (1ull<<39)|(1ull<<42)), | |
6801 | +/* pmd50 */ PMD_DP(PFM_REG_I, "PMD50", 50, (1ull<<39)|(1ull<<42)), | |
6802 | +/* pmd51 */ PMD_DP(PFM_REG_I, "PMD51", 51, (1ull<<39)|(1ull<<42)), | |
6803 | +/* pmd52 */ PMD_DP(PFM_REG_I, "PMD52", 52, (1ull<<39)|(1ull<<42)), | |
6804 | +/* pmd53 */ PMD_DP(PFM_REG_I, "PMD53", 53, (1ull<<39)|(1ull<<42)), | |
6805 | +/* pmd54 */ PMD_DP(PFM_REG_I, "PMD54", 54, (1ull<<39)|(1ull<<42)), | |
6806 | +/* pmd55 */ PMD_DP(PFM_REG_I, "PMD55", 55, (1ull<<39)|(1ull<<42)), | |
6807 | +/* pmd56 */ PMD_DP(PFM_REG_I, "PMD56", 56, (1ull<<39)|(1ull<<42)), | |
6808 | +/* pmd57 */ PMD_DP(PFM_REG_I, "PMD57", 57, (1ull<<39)|(1ull<<42)), | |
6809 | +/* pmd58 */ PMD_DP(PFM_REG_I, "PMD58", 58, (1ull<<39)|(1ull<<42)), | |
6810 | +/* pmd59 */ PMD_DP(PFM_REG_I, "PMD59", 59, (1ull<<39)|(1ull<<42)), | |
6811 | +/* pmd60 */ PMD_DP(PFM_REG_I, "PMD60", 60, (1ull<<39)|(1ull<<42)), | |
6812 | +/* pmd61 */ PMD_DP(PFM_REG_I, "PMD61", 61, (1ull<<39)|(1ull<<42)), | |
6813 | +/* pmd62 */ PMD_DP(PFM_REG_I, "PMD62", 62, (1ull<<39)|(1ull<<42)), | |
6814 | +/* pmd63 */ PMD_DP(PFM_REG_I, "PMD63", 63, (1ull<<39)|(1ull<<42)) | |
6815 | +}; | |
6816 | +#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc) | |
6817 | + | |
6818 | +static int pfm_mont_has_ht; | |
6819 | + | |
6820 | +static int pfm_mont_pmc_check(struct pfm_context *ctx, | |
6821 | + struct pfm_event_set *set, | |
6822 | + struct pfarg_pmc *req) | |
6823 | +{ | |
6824 | + struct pfm_arch_context *ctx_arch; | |
6825 | + u64 val32 = 0, val38 = 0, val41 = 0; | |
6826 | + u64 tmpval; | |
6827 | + u16 cnum; | |
6828 | + int ret = 0, check_case1 = 0; | |
6829 | + int is_system; | |
6830 | + | |
6831 | + tmpval = req->reg_value; | |
6832 | + cnum = req->reg_num; | |
6833 | + ctx_arch = pfm_ctx_arch(ctx); | |
6834 | + is_system = ctx->flags.system; | |
6835 | + | |
6836 | +#define PFM_MONT_PMC_PM_POS6 (1UL<<6) | |
6837 | +#define PFM_MONT_PMC_PM_POS4 (1UL<<4) | |
6838 | + | |
6839 | + switch (cnum) { | |
6840 | + case 4: | |
6841 | + case 5: | |
6842 | + case 6: | |
6843 | + case 7: | |
6844 | + case 8: | |
6845 | + case 9: | |
6846 | + if (is_system) | |
6847 | + tmpval |= PFM_MONT_PMC_PM_POS6; | |
6848 | + else | |
6849 | + tmpval &= ~PFM_MONT_PMC_PM_POS6; | |
6850 | + break; | |
6851 | + case 10: | |
6852 | + case 11: | |
6853 | + case 12: | |
6854 | + case 13: | |
6855 | + case 14: | |
6856 | + case 15: | |
6857 | + if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) { | |
6858 | + if (pfm_mont_has_ht) { | |
6859 | + PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow" | |
6860 | + "when threads on on"); | |
6861 | + return -EINVAL; | |
6862 | + } | |
6863 | + } | |
6864 | + if (is_system) | |
6865 | + tmpval |= PFM_MONT_PMC_PM_POS6; | |
6866 | + else | |
6867 | + tmpval &= ~PFM_MONT_PMC_PM_POS6; | |
6868 | + break; | |
6869 | + case 39: | |
6870 | + case 40: | |
6871 | + case 42: | |
6872 | + if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) { | |
6873 | + PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on"); | |
6874 | + return -EINVAL; | |
6875 | + } | |
6876 | + if (is_system) | |
6877 | + tmpval |= PFM_MONT_PMC_PM_POS6; | |
6878 | + else | |
6879 | + tmpval &= ~PFM_MONT_PMC_PM_POS6; | |
6880 | + break; | |
6881 | + | |
6882 | + case 32: | |
6883 | + val32 = tmpval; | |
6884 | + val38 = set->pmcs[38]; | |
6885 | + val41 = set->pmcs[41]; | |
6886 | + check_case1 = 1; | |
6887 | + break; | |
6888 | + | |
6889 | + case 37: | |
6890 | + if (is_system) | |
6891 | + tmpval |= PFM_MONT_PMC_PM_POS4; | |
6892 | + else | |
6893 | + tmpval &= ~PFM_MONT_PMC_PM_POS4; | |
6894 | + break; | |
6895 | + | |
6896 | + case 38: | |
6897 | + val38 = tmpval; | |
6898 | + val32 = set->pmcs[32]; | |
6899 | + val41 = set->pmcs[41]; | |
6900 | + check_case1 = 1; | |
6901 | + break; | |
6902 | + case 41: | |
6903 | + val41 = tmpval; | |
6904 | + val32 = set->pmcs[32]; | |
6905 | + val38 = set->pmcs[38]; | |
6906 | + check_case1 = 1; | |
6907 | + break; | |
6908 | + } | |
6909 | + | |
6910 | + if (check_case1) { | |
6911 | + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) | |
6912 | + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) | |
6913 | + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); | |
6914 | + if (ret) { | |
6915 | + PFM_DBG("perfmon: invalid config pmc38=0x%lx " | |
6916 | + "pmc41=0x%lx pmc32=0x%lx", | |
6917 | + val38, val41, val32); | |
6918 | + return -EINVAL; | |
6919 | + } | |
6920 | + } | |
6921 | + | |
6922 | + /* | |
6923 | + * check if configuration implicitely activates the use of the | |
6924 | + * debug registers. If true, then we ensure that this is possible | |
6925 | + * and that we do not pick up stale value in the HW registers. | |
6926 | + */ | |
6927 | + | |
6928 | + /* | |
6929 | + * | |
6930 | + * pmc41 is "active" if: | |
6931 | + * one of the pmc41.cfgdtagXX field is different from 0x3 | |
6932 | + * AND | |
6933 | + * the corsesponding pmc41.en_dbrpXX is set. | |
6934 | + * AND | |
6935 | + * ctx_fl_use_dbr (dbr not yet used) | |
6936 | + */ | |
6937 | + if (cnum == 41 | |
6938 | + && (tmpval & 0x1e00000000000) | |
6939 | + && (tmpval & 0x18181818) != 0x18181818 | |
6940 | + && ctx_arch->flags.use_dbr == 0) { | |
6941 | + PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval); | |
6942 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6943 | + if (ret) | |
6944 | + return ret; | |
6945 | + } | |
6946 | + /* | |
6947 | + * we must clear the (instruction) debug registers if: | |
6948 | + * pmc38.ig_ibrpX is 0 (enabled) | |
6949 | + * and | |
6950 | + * fl_use_dbr == 0 (dbr not yet used) | |
6951 | + */ | |
6952 | + if (cnum == 38 && ((tmpval & 0x492) != 0x492) | |
6953 | + && ctx_arch->flags.use_dbr == 0) { | |
6954 | + PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval); | |
6955 | + ret = pfm_ia64_mark_dbregs_used(ctx, set); | |
6956 | + if (ret) | |
6957 | + return ret; | |
6958 | + | |
6959 | + } | |
6960 | + req->reg_value = tmpval; | |
6961 | + return 0; | |
6962 | +} | |
6963 | + | |
6964 | +static void pfm_handle_errata(void) | |
6965 | +{ | |
6966 | + pfm_mont_has_ht = 1; | |
6967 | + | |
6968 | + PFM_INFO("activating workaround for errata 120 " | |
6969 | + "(Disable IP-EAR when threads are on)"); | |
6970 | + | |
6971 | + PFM_INFO("activating workaround for Errata 121 " | |
6972 | + "(PMC10-PMC15 cannot be used to overflow" | |
6973 | + " when threads are on"); | |
6974 | +} | |
6975 | +static int pfm_mont_probe_pmu(void) | |
6976 | +{ | |
6977 | + if (local_cpu_data->family != 0x20) | |
6978 | + return -1; | |
6979 | + | |
6980 | + /* | |
6981 | + * the 2 errata must be activated when | |
6982 | + * threads are/can be enabled | |
6983 | + */ | |
6984 | + if (is_multithreading_enabled()) | |
6985 | + pfm_handle_errata(); | |
6986 | + | |
6987 | + return 0; | |
6988 | +} | |
6989 | + | |
6990 | +/* | |
6991 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
6992 | + */ | |
6993 | +static struct pfm_pmu_config pfm_mont_pmu_conf = { | |
6994 | + .pmu_name = "Montecito", | |
6995 | + .counter_width = 47, | |
6996 | + .pmd_desc = pfm_mont_pmd_desc, | |
6997 | + .pmc_desc = pfm_mont_pmc_desc, | |
6998 | + .num_pmc_entries = PFM_MONT_NUM_PMCS, | |
6999 | + .num_pmd_entries = PFM_MONT_NUM_PMDS, | |
7000 | + .pmc_write_check = pfm_mont_pmc_check, | |
7001 | + .probe_pmu = pfm_mont_probe_pmu, | |
7002 | + .version = "1.0", | |
7003 | + .pmu_info = &pfm_mont_pmu_info, | |
7004 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
7005 | + .owner = THIS_MODULE | |
7006 | +}; | |
7007 | + | |
7008 | +static int __init pfm_mont_pmu_init_module(void) | |
7009 | +{ | |
7010 | + return pfm_pmu_register(&pfm_mont_pmu_conf); | |
7011 | +} | |
7012 | + | |
7013 | +static void __exit pfm_mont_pmu_cleanup_module(void) | |
7014 | +{ | |
7015 | + pfm_pmu_unregister(&pfm_mont_pmu_conf); | |
7016 | +} | |
7017 | + | |
7018 | +module_init(pfm_mont_pmu_init_module); | |
7019 | +module_exit(pfm_mont_pmu_cleanup_module); | |
7020 | --- a/arch/mips/Kconfig | |
7021 | +++ b/arch/mips/Kconfig | |
7022 | @@ -1858,6 +1858,8 @@ config SECCOMP | |
7023 | ||
7024 | If unsure, say Y. Only embedded should say N here. | |
7025 | ||
7026 | +source "arch/mips/perfmon/Kconfig" | |
7027 | + | |
7028 | endmenu | |
7029 | ||
7030 | config RWSEM_GENERIC_SPINLOCK | |
7031 | --- a/arch/mips/Makefile | |
7032 | +++ b/arch/mips/Makefile | |
7033 | @@ -154,6 +154,12 @@ endif | |
7034 | endif | |
7035 | ||
7036 | # | |
7037 | +# Perfmon support | |
7038 | +# | |
7039 | + | |
7040 | +core-$(CONFIG_PERFMON) += arch/mips/perfmon/ | |
7041 | + | |
7042 | +# | |
7043 | # Firmware support | |
7044 | # | |
7045 | libs-$(CONFIG_ARC) += arch/mips/fw/arc/ | |
7046 | --- a/arch/mips/kernel/process.c | |
7047 | +++ b/arch/mips/kernel/process.c | |
7048 | @@ -27,6 +27,7 @@ | |
7049 | #include <linux/completion.h> | |
7050 | #include <linux/kallsyms.h> | |
7051 | #include <linux/random.h> | |
7052 | +#include <linux/perfmon_kern.h> | |
7053 | ||
7054 | #include <asm/asm.h> | |
7055 | #include <asm/bootinfo.h> | |
7056 | @@ -94,6 +95,7 @@ void start_thread(struct pt_regs * regs, | |
7057 | ||
7058 | void exit_thread(void) | |
7059 | { | |
7060 | + pfm_exit_thread(); | |
7061 | } | |
7062 | ||
7063 | void flush_thread(void) | |
7064 | @@ -162,6 +164,8 @@ int copy_thread(int nr, unsigned long cl | |
7065 | if (clone_flags & CLONE_SETTLS) | |
7066 | ti->tp_value = regs->regs[7]; | |
7067 | ||
7068 | + pfm_copy_thread(p); | |
7069 | + | |
7070 | return 0; | |
7071 | } | |
7072 | ||
7073 | --- a/arch/mips/kernel/scall32-o32.S | |
7074 | +++ b/arch/mips/kernel/scall32-o32.S | |
7075 | @@ -653,6 +653,18 @@ einval: li v0, -EINVAL | |
7076 | sys sys_dup3 3 | |
7077 | sys sys_pipe2 2 | |
7078 | sys sys_inotify_init1 1 | |
7079 | + sys sys_pfm_create_context 4 /* 4330 */ | |
7080 | + sys sys_pfm_write_pmcs 3 | |
7081 | + sys sys_pfm_write_pmds 4 | |
7082 | + sys sys_pfm_read_pmds 3 | |
7083 | + sys sys_pfm_load_context 2 | |
7084 | + sys sys_pfm_start 2 /* 4335 */ | |
7085 | + sys sys_pfm_stop 1 | |
7086 | + sys sys_pfm_restart 1 | |
7087 | + sys sys_pfm_create_evtsets 3 | |
7088 | + sys sys_pfm_getinfo_evtsets 3 | |
7089 | + sys sys_pfm_delete_evtsets 3 /* 4340 */ | |
7090 | + sys sys_pfm_unload_context 1 | |
7091 | .endm | |
7092 | ||
7093 | /* We pre-compute the number of _instruction_ bytes needed to | |
7094 | --- a/arch/mips/kernel/scall64-64.S | |
7095 | +++ b/arch/mips/kernel/scall64-64.S | |
7096 | @@ -487,4 +487,16 @@ sys_call_table: | |
7097 | PTR sys_dup3 | |
7098 | PTR sys_pipe2 | |
7099 | PTR sys_inotify_init1 | |
7100 | + PTR sys_pfm_create_context | |
7101 | + PTR sys_pfm_write_pmcs /* 5290 */ | |
7102 | + PTR sys_pfm_write_pmds | |
7103 | + PTR sys_pfm_read_pmds | |
7104 | + PTR sys_pfm_load_context | |
7105 | + PTR sys_pfm_start | |
7106 | + PTR sys_pfm_stop /* 5295 */ | |
7107 | + PTR sys_pfm_restart | |
7108 | + PTR sys_pfm_create_evtsets | |
7109 | + PTR sys_pfm_getinfo_evtsets | |
7110 | + PTR sys_pfm_delete_evtsets | |
7111 | + PTR sys_pfm_unload_context /* 5300 */ | |
7112 | .size sys_call_table,.-sys_call_table | |
7113 | --- a/arch/mips/kernel/scall64-n32.S | |
7114 | +++ b/arch/mips/kernel/scall64-n32.S | |
7115 | @@ -400,12 +400,12 @@ EXPORT(sysn32_call_table) | |
7116 | PTR sys_ioprio_set | |
7117 | PTR sys_ioprio_get | |
7118 | PTR compat_sys_utimensat | |
7119 | - PTR compat_sys_signalfd /* 5280 */ | |
7120 | + PTR compat_sys_signalfd /* 6280 */ | |
7121 | PTR sys_ni_syscall | |
7122 | PTR sys_eventfd | |
7123 | PTR sys_fallocate | |
7124 | PTR sys_timerfd_create | |
7125 | - PTR sys_timerfd_gettime /* 5285 */ | |
7126 | + PTR sys_timerfd_gettime /* 6285 */ | |
7127 | PTR sys_timerfd_settime | |
7128 | PTR sys_signalfd4 | |
7129 | PTR sys_eventfd2 | |
7130 | @@ -413,4 +413,16 @@ EXPORT(sysn32_call_table) | |
7131 | PTR sys_dup3 /* 5290 */ | |
7132 | PTR sys_pipe2 | |
7133 | PTR sys_inotify_init1 | |
7134 | + PTR sys_pfm_create_context | |
7135 | + PTR sys_pfm_write_pmcs | |
7136 | + PTR sys_pfm_write_pmds /* 6295 */ | |
7137 | + PTR sys_pfm_read_pmds | |
7138 | + PTR sys_pfm_load_context | |
7139 | + PTR sys_pfm_start | |
7140 | + PTR sys_pfm_stop | |
7141 | + PTR sys_pfm_restart /* 6300 */ | |
7142 | + PTR sys_pfm_create_evtsets | |
7143 | + PTR sys_pfm_getinfo_evtsets | |
7144 | + PTR sys_pfm_delete_evtsets | |
7145 | + PTR sys_pfm_unload_context | |
7146 | .size sysn32_call_table,.-sysn32_call_table | |
7147 | --- a/arch/mips/kernel/scall64-o32.S | |
7148 | +++ b/arch/mips/kernel/scall64-o32.S | |
7149 | @@ -535,4 +535,16 @@ sys_call_table: | |
7150 | PTR sys_dup3 | |
7151 | PTR sys_pipe2 | |
7152 | PTR sys_inotify_init1 | |
7153 | + PTR sys_pfm_create_context /* 4330 */ | |
7154 | + PTR sys_pfm_write_pmcs | |
7155 | + PTR sys_pfm_write_pmds | |
7156 | + PTR sys_pfm_read_pmds | |
7157 | + PTR sys_pfm_load_context | |
7158 | + PTR sys_pfm_start /* 4335 */ | |
7159 | + PTR sys_pfm_stop | |
7160 | + PTR sys_pfm_restart | |
7161 | + PTR sys_pfm_create_evtsets | |
7162 | + PTR sys_pfm_getinfo_evtsets | |
7163 | + PTR sys_pfm_delete_evtsets /* 4340 */ | |
7164 | + PTR sys_pfm_unload_context | |
7165 | .size sys_call_table,.-sys_call_table | |
7166 | --- a/arch/mips/kernel/signal.c | |
7167 | +++ b/arch/mips/kernel/signal.c | |
7168 | @@ -21,6 +21,7 @@ | |
7169 | #include <linux/compiler.h> | |
7170 | #include <linux/syscalls.h> | |
7171 | #include <linux/uaccess.h> | |
7172 | +#include <linux/perfmon_kern.h> | |
7173 | ||
7174 | #include <asm/abi.h> | |
7175 | #include <asm/asm.h> | |
7176 | @@ -695,8 +696,11 @@ static void do_signal(struct pt_regs *re | |
7177 | * - triggered by the TIF_WORK_MASK flags | |
7178 | */ | |
7179 | asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, | |
7180 | - __u32 thread_info_flags) | |
7181 | + __u32 thread_info_flags) | |
7182 | { | |
7183 | + if (thread_info_flags & _TIF_PERFMON_WORK) | |
7184 | + pfm_handle_work(regs); | |
7185 | + | |
7186 | /* deal with pending signal delivery */ | |
7187 | if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) | |
7188 | do_signal(regs); | |
7189 | --- a/arch/mips/kernel/time.c | |
7190 | +++ b/arch/mips/kernel/time.c | |
7191 | @@ -49,10 +49,11 @@ int update_persistent_clock(struct times | |
7192 | return rtc_mips_set_mmss(now.tv_sec); | |
7193 | } | |
7194 | ||
7195 | -static int null_perf_irq(void) | |
7196 | +int null_perf_irq(void) | |
7197 | { | |
7198 | return 0; | |
7199 | } | |
7200 | +EXPORT_SYMBOL(null_perf_irq); | |
7201 | ||
7202 | int (*perf_irq)(void) = null_perf_irq; | |
7203 | ||
7204 | --- a/arch/mips/kernel/traps.c | |
7205 | +++ b/arch/mips/kernel/traps.c | |
7206 | @@ -92,17 +92,15 @@ static void show_raw_backtrace(unsigned | |
7207 | #ifdef CONFIG_KALLSYMS | |
7208 | printk("\n"); | |
7209 | #endif | |
7210 | - while (!kstack_end(sp)) { | |
7211 | - unsigned long __user *p = | |
7212 | - (unsigned long __user *)(unsigned long)sp++; | |
7213 | - if (__get_user(addr, p)) { | |
7214 | - printk(" (Bad stack address)"); | |
7215 | - break; | |
7216 | +#define IS_KVA01(a) ((((unsigned long)a) & 0xc0000000) == 0x80000000) | |
7217 | + if (IS_KVA01(sp)) { | |
7218 | + while (!kstack_end(sp)) { | |
7219 | + addr = *sp++; | |
7220 | + if (__kernel_text_address(addr)) | |
7221 | + print_ip_sym(addr); | |
7222 | } | |
7223 | - if (__kernel_text_address(addr)) | |
7224 | - print_ip_sym(addr); | |
7225 | + printk("\n"); | |
7226 | } | |
7227 | - printk("\n"); | |
7228 | } | |
7229 | ||
7230 | #ifdef CONFIG_KALLSYMS | |
7231 | --- a/arch/mips/mti-malta/malta-time.c | |
7232 | +++ b/arch/mips/mti-malta/malta-time.c | |
7233 | @@ -27,6 +27,7 @@ | |
7234 | #include <linux/time.h> | |
7235 | #include <linux/timex.h> | |
7236 | #include <linux/mc146818rtc.h> | |
7237 | +#include <linux/perfmon_kern.h> | |
7238 | ||
7239 | #include <asm/mipsregs.h> | |
7240 | #include <asm/mipsmtregs.h> | |
7241 | --- /dev/null | |
7242 | +++ b/arch/mips/perfmon/Kconfig | |
7243 | @@ -0,0 +1,61 @@ | |
7244 | +menu "Hardware Performance Monitoring support" | |
7245 | +config PERFMON | |
7246 | + bool "Perfmon2 performance monitoring interface" | |
7247 | + default n | |
7248 | + help | |
7249 | + Enables the perfmon2 interface to access the hardware | |
7250 | + performance counters. See <http://perfmon2.sf.net/> for | |
7251 | + more details. | |
7252 | + | |
7253 | +config PERFMON_DEBUG | |
7254 | + bool "Perfmon debugging" | |
7255 | + default n | |
7256 | + depends on PERFMON | |
7257 | + help | |
7258 | + Enables perfmon debugging support | |
7259 | + | |
7260 | +config PERFMON_DEBUG_FS | |
7261 | + bool "Enable perfmon statistics reporting via debugfs" | |
7262 | + default y | |
7263 | + depends on PERFMON && DEBUG_FS | |
7264 | + help | |
7265 | + Enable collection and reporting of perfmon timing statistics under | |
7266 | + debugfs. This is used for debugging and performance analysis of the | |
7267 | + subsystem. The debugfs filesystem must be mounted. | |
7268 | + | |
7269 | +config PERFMON_FLUSH | |
7270 | + bool "Flush sampling buffer when modified" | |
7271 | + depends on PERFMON | |
7272 | + default n | |
7273 | + help | |
7274 | + On some MIPS models, cache aliasing may cause invalid | |
7275 | + data to be read from the perfmon sampling buffer. Use this option | |
7276 | + to flush the buffer when it is modified to ensure valid data is | |
7277 | + visible at the user level. | |
7278 | + | |
7279 | +config PERFMON_ALIGN | |
7280 | + bool "Align sampling buffer to avoid cache aliasing" | |
7281 | + depends on PERFMON | |
7282 | + default n | |
7283 | + help | |
7284 | + On some MIPS models, cache aliasing may cause invalid | |
7285 | + data to be read from the perfmon sampling buffer. By forcing a bigger | |
7286 | + page alignment (4-page), one can guarantee the buffer virtual address | |
7287 | + will conflict in the cache with the user level mapping of the buffer | |
7288 | + thereby ensuring a consistent view by user programs. | |
7289 | + | |
7290 | +config PERFMON_DEBUG | |
7291 | + bool "Perfmon debugging" | |
7292 | + depends on PERFMON | |
7293 | + default n | |
7294 | + depends on PERFMON | |
7295 | + help | |
7296 | + Enables perfmon debugging support | |
7297 | + | |
7298 | +config PERFMON_MIPS64 | |
7299 | + tristate "Support for MIPS64 hardware performance counters" | |
7300 | + depends on PERFMON | |
7301 | + default n | |
7302 | + help | |
7303 | + Enables support for the MIPS64 hardware performance counters" | |
7304 | +endmenu | |
7305 | --- /dev/null | |
7306 | +++ b/arch/mips/perfmon/Makefile | |
7307 | @@ -0,0 +1,2 @@ | |
7308 | +obj-$(CONFIG_PERFMON) += perfmon.o | |
7309 | +obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o | |
7310 | --- /dev/null | |
7311 | +++ b/arch/mips/perfmon/perfmon.c | |
7312 | @@ -0,0 +1,313 @@ | |
7313 | +/* | |
7314 | + * This file implements the MIPS64 specific | |
7315 | + * support for the perfmon2 interface | |
7316 | + * | |
7317 | + * Copyright (c) 2005 Philip J. Mucci | |
7318 | + * | |
7319 | + * based on versions for other architectures: | |
7320 | + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. | |
7321 | + * Contributed by Stephane Eranian <eranian@htrpl.hp.com> | |
7322 | + * | |
7323 | + * This program is free software; you can redistribute it and/or | |
7324 | + * modify it under the terms of version 2 of the GNU General Public | |
7325 | + * License as published by the Free Software Foundation. | |
7326 | + * | |
7327 | + * This program is distributed in the hope that it will be useful, | |
7328 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
7329 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
7330 | + * General Public License for more details. | |
7331 | + * | |
7332 | + * You should have received a copy of the GNU General Public License | |
7333 | + * along with this program; if not, write to the Free Software | |
7334 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
7335 | + * 02111-1307 USA | |
7336 | + */ | |
7337 | +#include <linux/interrupt.h> | |
7338 | +#include <linux/module.h> | |
7339 | +#include <linux/perfmon_kern.h> | |
7340 | + | |
7341 | +/* | |
7342 | + * collect pending overflowed PMDs. Called from pfm_ctxsw() | |
7343 | + * and from PMU interrupt handler. Must fill in set->povfl_pmds[] | |
7344 | + * and set->npend_ovfls. Interrupts are masked | |
7345 | + */ | |
7346 | +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
7347 | +{ | |
7348 | + u64 new_val, wmask; | |
7349 | + u64 *used_mask, *intr_pmds; | |
7350 | + u64 mask[PFM_PMD_BV]; | |
7351 | + unsigned int i, max; | |
7352 | + | |
7353 | + max = ctx->regs.max_intr_pmd; | |
7354 | + intr_pmds = ctx->regs.intr_pmds; | |
7355 | + used_mask = set->used_pmds; | |
7356 | + | |
7357 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
7358 | + | |
7359 | + bitmap_and(cast_ulp(mask), | |
7360 | + cast_ulp(intr_pmds), | |
7361 | + cast_ulp(used_mask), | |
7362 | + max); | |
7363 | + | |
7364 | + /* | |
7365 | + * check all PMD that can generate interrupts | |
7366 | + * (that includes counters) | |
7367 | + */ | |
7368 | + for (i = 0; i < max; i++) { | |
7369 | + if (test_bit(i, mask)) { | |
7370 | + new_val = pfm_arch_read_pmd(ctx, i); | |
7371 | + | |
7372 | + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n", | |
7373 | + i, (unsigned long long)new_val, | |
7374 | + (new_val&wmask) ? 1 : 0); | |
7375 | + | |
7376 | + if (new_val & wmask) { | |
7377 | + __set_bit(i, set->povfl_pmds); | |
7378 | + set->npend_ovfls++; | |
7379 | + } | |
7380 | + } | |
7381 | + } | |
7382 | +} | |
7383 | + | |
7384 | +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx, | |
7385 | + struct pfm_event_set *set) | |
7386 | +{ | |
7387 | + unsigned int i, max; | |
7388 | + | |
7389 | + max = ctx->regs.max_pmc; | |
7390 | + | |
7391 | + /* | |
7392 | + * clear enable bits, assume all pmcs are enable pmcs | |
7393 | + */ | |
7394 | + for (i = 0; i < max; i++) { | |
7395 | + if (test_bit(i, set->used_pmcs)) | |
7396 | + pfm_arch_write_pmc(ctx, i, 0); | |
7397 | + } | |
7398 | + | |
7399 | + if (set->npend_ovfls) | |
7400 | + return; | |
7401 | + | |
7402 | + __pfm_get_ovfl_pmds(ctx, set); | |
7403 | +} | |
7404 | + | |
7405 | +/* | |
7406 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
7407 | + * Context is locked. Interrupts are masked. Monitoring is active. | |
7408 | + * PMU access is guaranteed. PMC and PMD registers are live in PMU. | |
7409 | + * | |
7410 | + * for per-thread: | |
7411 | + * must stop monitoring for the task | |
7412 | + * | |
7413 | + * Return: | |
7414 | + * non-zero : did not save PMDs (as part of stopping the PMU) | |
7415 | + * 0 : saved PMDs (no need to save them in caller) | |
7416 | + */ | |
7417 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) | |
7418 | +{ | |
7419 | + /* | |
7420 | + * disable lazy restore of PMC registers. | |
7421 | + */ | |
7422 | + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; | |
7423 | + | |
7424 | + /* | |
7425 | + * if masked, monitoring is stopped, thus there is no | |
7426 | + * need to stop the PMU again and there is no need to | |
7427 | + * check for pending overflows. This is not just an | |
7428 | + * optimization, this is also for correctness as you | |
7429 | + * may end up detecting overflows twice. | |
7430 | + */ | |
7431 | + if (ctx->state == PFM_CTX_MASKED) | |
7432 | + return 1; | |
7433 | + | |
7434 | + pfm_stop_active(task, ctx, ctx->active_set); | |
7435 | + | |
7436 | + return 1; | |
7437 | +} | |
7438 | + | |
7439 | +/* | |
7440 | + * Called from pfm_stop() and pfm_ctxsw() | |
7441 | + * Interrupts are masked. Context is locked. Set is the active set. | |
7442 | + * | |
7443 | + * For per-thread: | |
7444 | + * task is not necessarily current. If not current task, then | |
7445 | + * task is guaranteed stopped and off any cpu. Access to PMU | |
7446 | + * is not guaranteed. Interrupts are masked. Context is locked. | |
7447 | + * Set is the active set. | |
7448 | + * | |
7449 | + * For system-wide: | |
7450 | + * task is current | |
7451 | + * | |
7452 | + * must disable active monitoring. ctx cannot be NULL | |
7453 | + */ | |
7454 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) | |
7455 | +{ | |
7456 | + /* | |
7457 | + * no need to go through stop_save() | |
7458 | + * if we are already stopped | |
7459 | + */ | |
7460 | + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) | |
7461 | + return; | |
7462 | + | |
7463 | + /* | |
7464 | + * stop live registers and collect pending overflow | |
7465 | + */ | |
7466 | + if (task == current) | |
7467 | + pfm_stop_active(task, ctx, ctx->active_set); | |
7468 | +} | |
7469 | + | |
7470 | +/* | |
7471 | + * called from pfm_start() or pfm_ctxsw() when idle task and | |
7472 | + * EXCL_IDLE is on. | |
7473 | + * | |
7474 | + * Interrupts are masked. Context is locked. Set is the active set. | |
7475 | + * | |
7476 | + * For per-trhead: | |
7477 | + * Task is not necessarily current. If not current task, then task | |
7478 | + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed. | |
7479 | + * | |
7480 | + * For system-wide: | |
7481 | + * task is always current | |
7482 | + * | |
7483 | + * must enable active monitoring. | |
7484 | + */ | |
7485 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) | |
7486 | +{ | |
7487 | + struct pfm_event_set *set; | |
7488 | + unsigned int i, max_pmc; | |
7489 | + | |
7490 | + if (task != current) | |
7491 | + return; | |
7492 | + | |
7493 | + set = ctx->active_set; | |
7494 | + max_pmc = ctx->regs.max_pmc; | |
7495 | + | |
7496 | + for (i = 0; i < max_pmc; i++) { | |
7497 | + if (test_bit(i, set->used_pmcs)) | |
7498 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
7499 | + } | |
7500 | +} | |
7501 | + | |
7502 | +/* | |
7503 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
7504 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
7505 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
7506 | + * Access to the PMU is guaranteed. | |
7507 | + * | |
7508 | + * function must restore all PMD registers from set. | |
7509 | + */ | |
7510 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
7511 | +{ | |
7512 | + u64 ovfl_mask, val; | |
7513 | + u64 *impl_pmds; | |
7514 | + unsigned int i; | |
7515 | + unsigned int max_pmd; | |
7516 | + | |
7517 | + max_pmd = ctx->regs.max_pmd; | |
7518 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
7519 | + impl_pmds = ctx->regs.pmds; | |
7520 | + | |
7521 | + /* | |
7522 | + * must restore all pmds to avoid leaking | |
7523 | + * information to user. | |
7524 | + */ | |
7525 | + for (i = 0; i < max_pmd; i++) { | |
7526 | + | |
7527 | + if (test_bit(i, impl_pmds) == 0) | |
7528 | + continue; | |
7529 | + | |
7530 | + val = set->pmds[i].value; | |
7531 | + | |
7532 | + /* | |
7533 | + * set upper bits for counter to ensure | |
7534 | + * overflow will trigger | |
7535 | + */ | |
7536 | + val &= ovfl_mask; | |
7537 | + | |
7538 | + pfm_arch_write_pmd(ctx, i, val); | |
7539 | + } | |
7540 | +} | |
7541 | + | |
7542 | +/* | |
7543 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
7544 | + * pfm_context_load_sys(), pfm_ctxsw(). | |
7545 | + * Context is locked. Interrupts are masked. set cannot be NULL. | |
7546 | + * Access to the PMU is guaranteed. | |
7547 | + * | |
7548 | + * function must restore all PMC registers from set, if needed. | |
7549 | + */ | |
7550 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
7551 | +{ | |
7552 | + u64 *impl_pmcs; | |
7553 | + unsigned int i, max_pmc; | |
7554 | + | |
7555 | + max_pmc = ctx->regs.max_pmc; | |
7556 | + impl_pmcs = ctx->regs.pmcs; | |
7557 | + | |
7558 | + /* | |
7559 | + * - by default no PMCS measures anything | |
7560 | + * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared) | |
7561 | + * hence when masked we do not need to restore anything | |
7562 | + */ | |
7563 | + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) | |
7564 | + return; | |
7565 | + | |
7566 | + /* | |
7567 | + * restore all pmcs | |
7568 | + */ | |
7569 | + for (i = 0; i < max_pmc; i++) | |
7570 | + if (test_bit(i, impl_pmcs)) | |
7571 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
7572 | +} | |
7573 | + | |
7574 | +char *pfm_arch_get_pmu_module_name(void) | |
7575 | +{ | |
7576 | + switch (cpu_data->cputype) { | |
7577 | +#ifndef CONFIG_SMP | |
7578 | + case CPU_34K: | |
7579 | +#if defined(CPU_74K) | |
7580 | + case CPU_74K: | |
7581 | +#endif | |
7582 | +#endif | |
7583 | + case CPU_SB1: | |
7584 | + case CPU_SB1A: | |
7585 | + case CPU_R12000: | |
7586 | + case CPU_25KF: | |
7587 | + case CPU_24K: | |
7588 | + case CPU_20KC: | |
7589 | + case CPU_5KC: | |
7590 | + return "perfmon_mips64"; | |
7591 | + default: | |
7592 | + return NULL; | |
7593 | + } | |
7594 | + return NULL; | |
7595 | +} | |
7596 | + | |
7597 | +int perfmon_perf_irq(void) | |
7598 | +{ | |
7599 | + /* BLATANTLY STOLEN FROM OPROFILE, then modified */ | |
7600 | + struct pt_regs *regs; | |
7601 | + unsigned int counters = pfm_pmu_conf->regs_all.max_pmc; | |
7602 | + unsigned int control; | |
7603 | + unsigned int counter; | |
7604 | + | |
7605 | + regs = get_irq_regs(); | |
7606 | + switch (counters) { | |
7607 | +#define HANDLE_COUNTER(n) \ | |
7608 | + case n + 1: \ | |
7609 | + control = read_c0_perfctrl ## n(); \ | |
7610 | + counter = read_c0_perfcntr ## n(); \ | |
7611 | + if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \ | |
7612 | + (counter & MIPS64_PMD_INTERRUPT)) { \ | |
7613 | + pfm_interrupt_handler(instruction_pointer(regs),\ | |
7614 | + regs); \ | |
7615 | + return(1); \ | |
7616 | + } | |
7617 | + HANDLE_COUNTER(3) | |
7618 | + HANDLE_COUNTER(2) | |
7619 | + HANDLE_COUNTER(1) | |
7620 | + HANDLE_COUNTER(0) | |
7621 | + } | |
7622 | + | |
7623 | + return 0; | |
7624 | +} | |
7625 | +EXPORT_SYMBOL(perfmon_perf_irq); | |
7626 | --- /dev/null | |
7627 | +++ b/arch/mips/perfmon/perfmon_mips64.c | |
7628 | @@ -0,0 +1,218 @@ | |
7629 | +/* | |
7630 | + * This file contains the MIPS64 and decendent PMU register description tables | |
7631 | + * and pmc checker used by perfmon.c. | |
7632 | + * | |
7633 | + * Copyright (c) 2005 Philip Mucci | |
7634 | + * | |
7635 | + * Based on perfmon_p6.c: | |
7636 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
7637 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
7638 | + * | |
7639 | + * This program is free software; you can redistribute it and/or | |
7640 | + * modify it under the terms of version 2 of the GNU General Public | |
7641 | + * License as published by the Free Software Foundation. | |
7642 | + * | |
7643 | + * This program is distributed in the hope that it will be useful, | |
7644 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
7645 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
7646 | + * General Public License for more details. | |
7647 | + * | |
7648 | + * You should have received a copy of the GNU General Public License | |
7649 | + * along with this program; if not, write to the Free Software | |
7650 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
7651 | + * 02111-1307 USA | |
7652 | + */ | |
7653 | +#include <linux/module.h> | |
7654 | +#include <linux/perfmon_kern.h> | |
7655 | + | |
7656 | +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>"); | |
7657 | +MODULE_DESCRIPTION("MIPS64 PMU description tables"); | |
7658 | +MODULE_LICENSE("GPL"); | |
7659 | + | |
7660 | +/* | |
7661 | + * reserved: | |
7662 | + * - bit 63-9 | |
7663 | + * RSVD: reserved bits must be 1 | |
7664 | + */ | |
7665 | +#define PFM_MIPS64_PMC_RSVD 0xfffffffffffff810ULL | |
7666 | +#define PFM_MIPS64_PMC_VAL (1ULL<<4) | |
7667 | + | |
7668 | +extern int null_perf_irq(struct pt_regs *regs); | |
7669 | +extern int (*perf_irq)(struct pt_regs *regs); | |
7670 | +extern int perfmon_perf_irq(struct pt_regs *regs); | |
7671 | + | |
7672 | +static struct pfm_arch_pmu_info pfm_mips64_pmu_info; | |
7673 | + | |
7674 | +static struct pfm_regmap_desc pfm_mips64_pmc_desc[] = { | |
7675 | +/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0), | |
7676 | +/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1), | |
7677 | +/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2), | |
7678 | +/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3) | |
7679 | +}; | |
7680 | +#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc) | |
7681 | + | |
7682 | +static struct pfm_regmap_desc pfm_mips64_pmd_desc[] = { | |
7683 | +/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0), | |
7684 | +/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1), | |
7685 | +/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2), | |
7686 | +/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3) | |
7687 | +}; | |
7688 | +#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc) | |
7689 | + | |
7690 | +static int pfm_mips64_probe_pmu(void) | |
7691 | +{ | |
7692 | + struct cpuinfo_mips *c = ¤t_cpu_data; | |
7693 | + | |
7694 | + switch (c->cputype) { | |
7695 | +#ifndef CONFIG_SMP | |
7696 | + case CPU_34K: | |
7697 | +#if defined(CPU_74K) | |
7698 | + case CPU_74K: | |
7699 | +#endif | |
7700 | +#endif | |
7701 | + case CPU_SB1: | |
7702 | + case CPU_SB1A: | |
7703 | + case CPU_R12000: | |
7704 | + case CPU_25KF: | |
7705 | + case CPU_24K: | |
7706 | + case CPU_20KC: | |
7707 | + case CPU_5KC: | |
7708 | + return 0; | |
7709 | + break; | |
7710 | + default: | |
7711 | + PFM_INFO("Unknown cputype 0x%x", c->cputype); | |
7712 | + } | |
7713 | + return -1; | |
7714 | +} | |
7715 | + | |
7716 | +/* | |
7717 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
7718 | + */ | |
7719 | +static struct pfm_pmu_config pfm_mips64_pmu_conf = { | |
7720 | + .pmu_name = "MIPS", /* placeholder */ | |
7721 | + .counter_width = 31, | |
7722 | + .pmd_desc = pfm_mips64_pmd_desc, | |
7723 | + .pmc_desc = pfm_mips64_pmc_desc, | |
7724 | + .num_pmc_entries = PFM_MIPS64_NUM_PMCS, | |
7725 | + .num_pmd_entries = PFM_MIPS64_NUM_PMDS, | |
7726 | + .probe_pmu = pfm_mips64_probe_pmu, | |
7727 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
7728 | + .owner = THIS_MODULE, | |
7729 | + .pmu_info = &pfm_mips64_pmu_info | |
7730 | +}; | |
7731 | + | |
7732 | +static inline int n_counters(void) | |
7733 | +{ | |
7734 | + if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK)) | |
7735 | + return 0; | |
7736 | + if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK)) | |
7737 | + return 1; | |
7738 | + if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK)) | |
7739 | + return 2; | |
7740 | + if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK)) | |
7741 | + return 3; | |
7742 | + return 4; | |
7743 | +} | |
7744 | + | |
7745 | +static int __init pfm_mips64_pmu_init_module(void) | |
7746 | +{ | |
7747 | + struct cpuinfo_mips *c = ¤t_cpu_data; | |
7748 | + int i, ret, num; | |
7749 | + u64 temp_mask; | |
7750 | + | |
7751 | + switch (c->cputype) { | |
7752 | + case CPU_5KC: | |
7753 | + pfm_mips64_pmu_conf.pmu_name = "MIPS5KC"; | |
7754 | + break; | |
7755 | + case CPU_R12000: | |
7756 | + pfm_mips64_pmu_conf.pmu_name = "MIPSR12000"; | |
7757 | + break; | |
7758 | + case CPU_20KC: | |
7759 | + pfm_mips64_pmu_conf.pmu_name = "MIPS20KC"; | |
7760 | + break; | |
7761 | + case CPU_24K: | |
7762 | + pfm_mips64_pmu_conf.pmu_name = "MIPS24K"; | |
7763 | + break; | |
7764 | + case CPU_25KF: | |
7765 | + pfm_mips64_pmu_conf.pmu_name = "MIPS25KF"; | |
7766 | + break; | |
7767 | + case CPU_SB1: | |
7768 | + pfm_mips64_pmu_conf.pmu_name = "SB1"; | |
7769 | + break; | |
7770 | + case CPU_SB1A: | |
7771 | + pfm_mips64_pmu_conf.pmu_name = "SB1A"; | |
7772 | + break; | |
7773 | +#ifndef CONFIG_SMP | |
7774 | + case CPU_34K: | |
7775 | + pfm_mips64_pmu_conf.pmu_name = "MIPS34K"; | |
7776 | + break; | |
7777 | +#if defined(CPU_74K) | |
7778 | + case CPU_74K: | |
7779 | + pfm_mips64_pmu_conf.pmu_name = "MIPS74K"; | |
7780 | + break; | |
7781 | +#endif | |
7782 | +#endif | |
7783 | + default: | |
7784 | + PFM_INFO("Unknown cputype 0x%x", c->cputype); | |
7785 | + return -1; | |
7786 | + } | |
7787 | + | |
7788 | + /* The R14k and older performance counters have to */ | |
7789 | + /* be hard-coded, as there is no support for auto-detection */ | |
7790 | + if ((c->cputype == CPU_R12000) || (c->cputype == CPU_R14000)) | |
7791 | + num = 4; | |
7792 | + else if (c->cputype == CPU_R10000) | |
7793 | + num = 2; | |
7794 | + else | |
7795 | + num = n_counters(); | |
7796 | + | |
7797 | + if (num == 0) { | |
7798 | + PFM_INFO("cputype 0x%x has no counters", c->cputype); | |
7799 | + return -1; | |
7800 | + } | |
7801 | + /* mark remaining counters unavailable */ | |
7802 | + for (i = num; i < PFM_MIPS64_NUM_PMCS; i++) | |
7803 | + pfm_mips64_pmc_desc[i].type = PFM_REG_NA; | |
7804 | + | |
7805 | + for (i = num; i < PFM_MIPS64_NUM_PMDS; i++) | |
7806 | + pfm_mips64_pmd_desc[i].type = PFM_REG_NA; | |
7807 | + | |
7808 | + /* set the PMC_RSVD mask */ | |
7809 | + switch (c->cputype) { | |
7810 | + case CPU_5KC: | |
7811 | + case CPU_R10000: | |
7812 | + case CPU_20KC: | |
7813 | + /* 4-bits for event */ | |
7814 | + temp_mask = 0xfffffffffffffe10ULL; | |
7815 | + break; | |
7816 | + case CPU_R12000: | |
7817 | + case CPU_R14000: | |
7818 | + /* 5-bits for event */ | |
7819 | + temp_mask = 0xfffffffffffffc10ULL; | |
7820 | + break; | |
7821 | + default: | |
7822 | + /* 6-bits for event */ | |
7823 | + temp_mask = 0xfffffffffffff810ULL; | |
7824 | + } | |
7825 | + for (i = 0; i < PFM_MIPS64_NUM_PMCS; i++) | |
7826 | + pfm_mips64_pmc_desc[i].rsvd_msk = temp_mask; | |
7827 | + | |
7828 | + pfm_mips64_pmu_conf.num_pmc_entries = num; | |
7829 | + pfm_mips64_pmu_conf.num_pmd_entries = num; | |
7830 | + | |
7831 | + pfm_mips64_pmu_info.pmu_style = c->cputype; | |
7832 | + | |
7833 | + ret = pfm_pmu_register(&pfm_mips64_pmu_conf); | |
7834 | + if (ret == 0) | |
7835 | + perf_irq = perfmon_perf_irq; | |
7836 | + return ret; | |
7837 | +} | |
7838 | + | |
7839 | +static void __exit pfm_mips64_pmu_cleanup_module(void) | |
7840 | +{ | |
7841 | + pfm_pmu_unregister(&pfm_mips64_pmu_conf); | |
7842 | + perf_irq = null_perf_irq; | |
7843 | +} | |
7844 | + | |
7845 | +module_init(pfm_mips64_pmu_init_module); | |
7846 | +module_exit(pfm_mips64_pmu_cleanup_module); | |
7847 | --- a/arch/powerpc/Kconfig | |
7848 | +++ b/arch/powerpc/Kconfig | |
7849 | @@ -231,6 +231,8 @@ source "init/Kconfig" | |
7850 | source "arch/powerpc/sysdev/Kconfig" | |
7851 | source "arch/powerpc/platforms/Kconfig" | |
7852 | ||
7853 | +source "arch/powerpc/perfmon/Kconfig" | |
7854 | + | |
7855 | menu "Kernel options" | |
7856 | ||
7857 | config HIGHMEM | |
7858 | --- a/arch/powerpc/Makefile | |
7859 | +++ b/arch/powerpc/Makefile | |
7860 | @@ -148,6 +148,7 @@ core-y += arch/powerpc/kernel/ \ | |
7861 | arch/powerpc/platforms/ | |
7862 | core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ | |
7863 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ | |
7864 | +core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/ | |
7865 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ | |
7866 | ||
7867 | drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ | |
7868 | --- a/arch/powerpc/include/asm/Kbuild | |
7869 | +++ b/arch/powerpc/include/asm/Kbuild | |
7870 | @@ -21,6 +21,7 @@ header-y += resource.h | |
7871 | header-y += sigcontext.h | |
7872 | header-y += statfs.h | |
7873 | header-y += ps3fb.h | |
7874 | +header-y += perfmon.h | |
7875 | ||
7876 | unifdef-y += bootx.h | |
7877 | unifdef-y += byteorder.h | |
7878 | --- a/arch/powerpc/include/asm/cell-pmu.h | |
7879 | +++ b/arch/powerpc/include/asm/cell-pmu.h | |
7880 | @@ -61,6 +61,11 @@ | |
7881 | ||
7882 | /* Macros for the pm_status register. */ | |
7883 | #define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7))) | |
7884 | +#define CBE_PM_OVERFLOW_CTRS(pm_status) (((pm_status) >> 24) & 0xff) | |
7885 | +#define CBE_PM_ALL_OVERFLOW_INTR 0xff000000 | |
7886 | +#define CBE_PM_INTERVAL_INTR 0x00800000 | |
7887 | +#define CBE_PM_TRACE_BUFFER_FULL_INTR 0x00400000 | |
7888 | +#define CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR 0x00200000 | |
7889 | ||
7890 | enum pm_reg_name { | |
7891 | group_control, | |
7892 | --- a/arch/powerpc/include/asm/cell-regs.h | |
7893 | +++ b/arch/powerpc/include/asm/cell-regs.h | |
7894 | @@ -117,8 +117,9 @@ struct cbe_pmd_regs { | |
7895 | u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */ | |
7896 | #define CBE_PMD_FIR_MODE_M8 0x00800 | |
7897 | u64 fir_enable_mask; /* 0x0c20 */ | |
7898 | - | |
7899 | - u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */ | |
7900 | + u8 pad_0x0c28_0x0c98 [0x0c98 - 0x0c28]; /* 0x0c28 */ | |
7901 | + u64 on_ramp_trace; /* 0x0c98 */ | |
7902 | + u64 pad_0x0ca0; /* 0x0ca0 */ | |
7903 | u64 ras_esc_0; /* 0x0ca8 */ | |
7904 | u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */ | |
7905 | }; | |
7906 | @@ -218,7 +219,11 @@ extern struct cbe_iic_regs __iomem *cbe_ | |
7907 | ||
7908 | ||
7909 | struct cbe_mic_tm_regs { | |
7910 | - u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */ | |
7911 | + u8 pad_0x0000_0x0010[0x0010 - 0x0000]; /* 0x0000 */ | |
7912 | + | |
7913 | + u64 MBL_debug; /* 0x0010 */ | |
7914 | + | |
7915 | + u8 pad_0x0018_0x0040[0x0040 - 0x0018]; /* 0x0018 */ | |
7916 | ||
7917 | u64 mic_ctl_cnfg2; /* 0x0040 */ | |
7918 | #define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL | |
7919 | @@ -303,6 +308,25 @@ struct cbe_mic_tm_regs { | |
7920 | extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np); | |
7921 | extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu); | |
7922 | ||
7923 | +/* | |
7924 | + * | |
7925 | + * PPE Privileged MMIO Registers definition. (offset 0x500000 - 0x500fff) | |
7926 | + * | |
7927 | + */ | |
7928 | +struct cbe_ppe_priv_regs { | |
7929 | + u8 pad_0x0000_0x0858[0x0858 - 0x0000]; /* 0x0000 */ | |
7930 | + | |
7931 | + u64 L2_debug1; /* 0x0858 */ | |
7932 | + | |
7933 | + u8 pad_0x0860_0x0958[0x0958 - 0x0860]; /* 0x0860 */ | |
7934 | + | |
7935 | + u64 ciu_dr1; /* 0x0958 */ | |
7936 | + | |
7937 | + u8 pad_0x0960_0x1000[0x1000 - 0x0960]; /* 0x0960 */ | |
7938 | +}; | |
7939 | + | |
7940 | +extern struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu); | |
7941 | + | |
7942 | /* some utility functions to deal with SMT */ | |
7943 | extern u32 cbe_get_hw_thread_id(int cpu); | |
7944 | extern u32 cbe_cpu_to_node(int cpu); | |
7945 | --- a/arch/powerpc/include/asm/paca.h | |
7946 | +++ b/arch/powerpc/include/asm/paca.h | |
7947 | @@ -97,6 +97,10 @@ struct paca_struct { | |
7948 | u8 soft_enabled; /* irq soft-enable flag */ | |
7949 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | |
7950 | u8 io_sync; /* writel() needs spin_unlock sync */ | |
7951 | +#ifdef CONFIG_PERFMON | |
7952 | + u8 pmu_except_pending; /* PMU exception occurred while soft | |
7953 | + * disabled */ | |
7954 | +#endif | |
7955 | ||
7956 | /* Stuff for accurate time accounting */ | |
7957 | u64 user_time; /* accumulated usermode TB ticks */ | |
7958 | --- /dev/null | |
7959 | +++ b/arch/powerpc/include/asm/perfmon.h | |
7960 | @@ -0,0 +1,33 @@ | |
7961 | +/* | |
7962 | + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. | |
7963 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
7964 | + * | |
7965 | + * This file contains powerpc specific definitions for the perfmon | |
7966 | + * interface. | |
7967 | + * | |
7968 | + * This file MUST never be included directly. Use linux/perfmon.h. | |
7969 | + * | |
7970 | + * This program is free software; you can redistribute it and/or | |
7971 | + * modify it under the terms of version 2 of the GNU General Public | |
7972 | + * License as published by the Free Software Foundation. | |
7973 | + * | |
7974 | + * This program is distributed in the hope that it will be useful, | |
7975 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
7976 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
7977 | + * General Public License for more details. | |
7978 | + * | |
7979 | + * You should have received a copy of the GNU General Public License | |
7980 | + * along with this program; if not, write to the Free Software | |
7981 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
7982 | + * 02111-1307 USA | |
7983 | + */ | |
7984 | +#ifndef _ASM_POWERPC_PERFMON_H_ | |
7985 | +#define _ASM_POWERPC_PERFMON_H_ | |
7986 | + | |
7987 | +/* | |
7988 | + * arch-specific user visible interface definitions | |
7989 | + */ | |
7990 | +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ | |
7991 | +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ | |
7992 | + | |
7993 | +#endif /* _ASM_POWERPC_PERFMON_H_ */ | |
7994 | --- /dev/null | |
7995 | +++ b/arch/powerpc/include/asm/perfmon_kern.h | |
7996 | @@ -0,0 +1,390 @@ | |
7997 | +/* | |
7998 | + * Copyright (c) 2005 David Gibson, IBM Corporation. | |
7999 | + * | |
8000 | + * Based on other versions: | |
8001 | + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. | |
8002 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
8003 | + * | |
8004 | + * This file contains powerpc specific definitions for the perfmon | |
8005 | + * interface. | |
8006 | + * | |
8007 | + * This program is free software; you can redistribute it and/or | |
8008 | + * modify it under the terms of version 2 of the GNU General Public | |
8009 | + * License as published by the Free Software Foundation. | |
8010 | + * | |
8011 | + * This program is distributed in the hope that it will be useful, | |
8012 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8013 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
8014 | + * General Public License for more details. | |
8015 | + * | |
8016 | + * You should have received a copy of the GNU General Public License | |
8017 | + * along with this program; if not, write to the Free Software | |
8018 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
8019 | + * 02111-1307 USA | |
8020 | + */ | |
8021 | +#ifndef _ASM_POWERPC_PERFMON_KERN_H_ | |
8022 | +#define _ASM_POWERPC_PERFMON_KERN_H_ | |
8023 | + | |
8024 | +#ifdef __KERNEL__ | |
8025 | + | |
8026 | +#ifdef CONFIG_PERFMON | |
8027 | + | |
8028 | +#include <asm/pmc.h> | |
8029 | +#include <asm/unistd.h> | |
8030 | + | |
8031 | +#define HID0_PMC5_6_GR_MODE (1UL << (63 - 40)) | |
8032 | + | |
8033 | +enum powerpc_pmu_type { | |
8034 | + PFM_POWERPC_PMU_NONE, | |
8035 | + PFM_POWERPC_PMU_604, | |
8036 | + PFM_POWERPC_PMU_604e, | |
8037 | + PFM_POWERPC_PMU_750, /* XXX: Minor event set diffs between IBM and Moto. */ | |
8038 | + PFM_POWERPC_PMU_7400, | |
8039 | + PFM_POWERPC_PMU_7450, | |
8040 | + PFM_POWERPC_PMU_POWER4, | |
8041 | + PFM_POWERPC_PMU_POWER5, | |
8042 | + PFM_POWERPC_PMU_POWER5p, | |
8043 | + PFM_POWERPC_PMU_POWER6, | |
8044 | + PFM_POWERPC_PMU_CELL, | |
8045 | +}; | |
8046 | + | |
8047 | +struct pfm_arch_pmu_info { | |
8048 | + enum powerpc_pmu_type pmu_style; | |
8049 | + | |
8050 | + void (*write_pmc)(unsigned int cnum, u64 value); | |
8051 | + void (*write_pmd)(unsigned int cnum, u64 value); | |
8052 | + | |
8053 | + u64 (*read_pmd)(unsigned int cnum); | |
8054 | + | |
8055 | + void (*enable_counters)(struct pfm_context *ctx, | |
8056 | + struct pfm_event_set *set); | |
8057 | + void (*disable_counters)(struct pfm_context *ctx, | |
8058 | + struct pfm_event_set *set); | |
8059 | + | |
8060 | + void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx); | |
8061 | + void (*get_ovfl_pmds)(struct pfm_context *ctx, | |
8062 | + struct pfm_event_set *set); | |
8063 | + | |
8064 | + /* The following routines are optional. */ | |
8065 | + void (*restore_pmcs)(struct pfm_context *ctx, | |
8066 | + struct pfm_event_set *set); | |
8067 | + void (*restore_pmds)(struct pfm_context *ctx, | |
8068 | + struct pfm_event_set *set); | |
8069 | + | |
8070 | + int (*ctxswout_thread)(struct task_struct *task, | |
8071 | + struct pfm_context *ctx, | |
8072 | + struct pfm_event_set *set); | |
8073 | + void (*ctxswin_thread)(struct task_struct *task, | |
8074 | + struct pfm_context *ctx, | |
8075 | + struct pfm_event_set *set); | |
8076 | + int (*load_context)(struct pfm_context *ctx); | |
8077 | + void (*unload_context)(struct pfm_context *ctx); | |
8078 | + int (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds); | |
8079 | + void (*release_pmu)(void); | |
8080 | + void *platform_info; | |
8081 | + void (*resend_irq)(struct pfm_context *ctx); | |
8082 | +}; | |
8083 | + | |
8084 | +#ifdef CONFIG_PPC32 | |
8085 | +#define PFM_ARCH_PMD_STK_ARG 6 /* conservative value */ | |
8086 | +#define PFM_ARCH_PMC_STK_ARG 6 /* conservative value */ | |
8087 | +#else | |
8088 | +#define PFM_ARCH_PMD_STK_ARG 8 /* conservative value */ | |
8089 | +#define PFM_ARCH_PMC_STK_ARG 8 /* conservative value */ | |
8090 | +#endif | |
8091 | + | |
8092 | +static inline void pfm_arch_resend_irq(struct pfm_context *ctx) | |
8093 | +{ | |
8094 | + struct pfm_arch_pmu_info *arch_info; | |
8095 | + | |
8096 | + arch_info = pfm_pmu_info(); | |
8097 | + arch_info->resend_irq(ctx); | |
8098 | +} | |
8099 | + | |
8100 | +static inline void pfm_arch_serialize(void) | |
8101 | +{} | |
8102 | + | |
8103 | +static inline void pfm_arch_write_pmc(struct pfm_context *ctx, | |
8104 | + unsigned int cnum, | |
8105 | + u64 value) | |
8106 | +{ | |
8107 | + struct pfm_arch_pmu_info *arch_info; | |
8108 | + | |
8109 | + arch_info = pfm_pmu_info(); | |
8110 | + | |
8111 | + /* | |
8112 | + * we only write to the actual register when monitoring is | |
8113 | + * active (pfm_start was issued) | |
8114 | + */ | |
8115 | + if (ctx && ctx->flags.started == 0) | |
8116 | + return; | |
8117 | + | |
8118 | + BUG_ON(!arch_info->write_pmc); | |
8119 | + | |
8120 | + arch_info->write_pmc(cnum, value); | |
8121 | +} | |
8122 | + | |
8123 | +static inline void pfm_arch_write_pmd(struct pfm_context *ctx, | |
8124 | + unsigned int cnum, u64 value) | |
8125 | +{ | |
8126 | + struct pfm_arch_pmu_info *arch_info; | |
8127 | + | |
8128 | + arch_info = pfm_pmu_info(); | |
8129 | + | |
8130 | + value &= pfm_pmu_conf->ovfl_mask; | |
8131 | + | |
8132 | + BUG_ON(!arch_info->write_pmd); | |
8133 | + | |
8134 | + arch_info->write_pmd(cnum, value); | |
8135 | +} | |
8136 | + | |
8137 | +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
8138 | +{ | |
8139 | + struct pfm_arch_pmu_info *arch_info; | |
8140 | + | |
8141 | + arch_info = pfm_pmu_info(); | |
8142 | + | |
8143 | + BUG_ON(!arch_info->read_pmd); | |
8144 | + | |
8145 | + return arch_info->read_pmd(cnum); | |
8146 | +} | |
8147 | + | |
8148 | +/* | |
8149 | + * For some CPUs, the upper bits of a counter must be set in order for the | |
8150 | + * overflow interrupt to happen. On overflow, the counter has wrapped around, | |
8151 | + * and the upper bits are cleared. This function may be used to set them back. | |
8152 | + */ | |
8153 | +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, | |
8154 | + unsigned int cnum) | |
8155 | +{ | |
8156 | + u64 val = pfm_arch_read_pmd(ctx, cnum); | |
8157 | + | |
8158 | + /* This masks out overflow bit 31 */ | |
8159 | + pfm_arch_write_pmd(ctx, cnum, val); | |
8160 | +} | |
8161 | + | |
8162 | +/* | |
8163 | + * At certain points, perfmon needs to know if monitoring has been | |
8164 | + * explicitely started/stopped by user via pfm_start/pfm_stop. The | |
8165 | + * information is tracked in flags.started. However on certain | |
8166 | + * architectures, it may be possible to start/stop directly from | |
8167 | + * user level with a single assembly instruction bypassing | |
8168 | + * the kernel. This function must be used to determine by | |
8169 | + * an arch-specific mean if monitoring is actually started/stopped. | |
8170 | + */ | |
8171 | +static inline int pfm_arch_is_active(struct pfm_context *ctx) | |
8172 | +{ | |
8173 | + return ctx->flags.started; | |
8174 | +} | |
8175 | + | |
8176 | +static inline void pfm_arch_ctxswout_sys(struct task_struct *task, | |
8177 | + struct pfm_context *ctx) | |
8178 | +{} | |
8179 | + | |
8180 | +static inline void pfm_arch_ctxswin_sys(struct task_struct *task, | |
8181 | + struct pfm_context *ctx) | |
8182 | +{} | |
8183 | + | |
8184 | +void pfm_arch_init_percpu(void); | |
8185 | +int pfm_arch_is_monitoring_active(struct pfm_context *ctx); | |
8186 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); | |
8187 | +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx); | |
8188 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); | |
8189 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); | |
8190 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
8191 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
8192 | +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, struct pfm_event_set *set); | |
8193 | +int pfm_arch_get_ovfl_pmds(struct pfm_context *ctx, | |
8194 | + struct pfm_event_set *set); | |
8195 | +char *pfm_arch_get_pmu_module_name(void); | |
8196 | +/* | |
8197 | + * called from __pfm_interrupt_handler(). ctx is not NULL. | |
8198 | + * ctx is locked. PMU interrupt is masked. | |
8199 | + * | |
8200 | + * must stop all monitoring to ensure handler has consistent view. | |
8201 | + * must collect overflowed PMDs bitmask into povfls_pmds and | |
8202 | + * npend_ovfls. If no interrupt detected then npend_ovfls | |
8203 | + * must be set to zero. | |
8204 | + */ | |
8205 | +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct pfm_event_set *set) | |
8206 | +{ | |
8207 | + pfm_arch_stop(current, ctx); | |
8208 | +} | |
8209 | + | |
8210 | +void powerpc_irq_handler(struct pt_regs *regs); | |
8211 | + | |
8212 | +/* | |
8213 | + * unfreeze PMU from pfm_do_interrupt_handler() | |
8214 | + * ctx may be NULL for spurious | |
8215 | + */ | |
8216 | +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) | |
8217 | +{ | |
8218 | + struct pfm_arch_pmu_info *arch_info; | |
8219 | + | |
8220 | + if (!ctx) | |
8221 | + return; | |
8222 | + | |
8223 | + PFM_DBG_ovfl("state=%d", ctx->state); | |
8224 | + | |
8225 | + ctx->flags.started = 1; | |
8226 | + | |
8227 | + if (ctx->state == PFM_CTX_MASKED) | |
8228 | + return; | |
8229 | + | |
8230 | + arch_info = pfm_pmu_info(); | |
8231 | + BUG_ON(!arch_info->enable_counters); | |
8232 | + arch_info->enable_counters(ctx, ctx->active_set); | |
8233 | +} | |
8234 | + | |
8235 | +/* | |
8236 | + * PowerPC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus | |
8237 | + * this routine needs to do it when switching sets on overflow | |
8238 | + */ | |
8239 | +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, | |
8240 | + struct pfm_event_set *set) | |
8241 | +{ | |
8242 | + pfm_save_pmds(ctx, set); | |
8243 | +} | |
8244 | + | |
8245 | +/* | |
8246 | + * this function is called from the PMU interrupt handler ONLY. | |
8247 | + * On PPC, the PMU is frozen via arch_stop, masking would be implemented | |
8248 | + * via arch-stop as well. Given that the PMU is already stopped when | |
8249 | + * entering the interrupt handler, we do not need to stop it again, so | |
8250 | + * this function is a nop. | |
8251 | + */ | |
8252 | +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, | |
8253 | + struct pfm_event_set *set) | |
8254 | +{} | |
8255 | + | |
8256 | +/* | |
8257 | + * Simply need to start the context in order to unmask. | |
8258 | + */ | |
8259 | +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, | |
8260 | + struct pfm_event_set *set) | |
8261 | +{ | |
8262 | + pfm_arch_start(current, ctx); | |
8263 | +} | |
8264 | + | |
8265 | + | |
8266 | +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) | |
8267 | +{ | |
8268 | + return 0; | |
8269 | +} | |
8270 | + | |
8271 | +static inline int pfm_arch_context_create(struct pfm_context *ctx, | |
8272 | + u32 ctx_flags) | |
8273 | +{ | |
8274 | + return 0; | |
8275 | +} | |
8276 | + | |
8277 | +static inline void pfm_arch_context_free(struct pfm_context *ctx) | |
8278 | +{} | |
8279 | + | |
8280 | +/* not necessary on PowerPC */ | |
8281 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
8282 | +{} | |
8283 | + | |
8284 | +/* | |
8285 | + * function called from pfm_setfl_sane(). Context is locked | |
8286 | + * and interrupts are masked. | |
8287 | + * The value of flags is the value of ctx_flags as passed by | |
8288 | + * user. | |
8289 | + * | |
8290 | + * function must check arch-specific set flags. | |
8291 | + * Return: | |
8292 | + * 1 when flags are valid | |
8293 | + * 0 on error | |
8294 | + */ | |
8295 | +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) | |
8296 | +{ | |
8297 | + return 0; | |
8298 | +} | |
8299 | + | |
8300 | +static inline int pfm_arch_init(void) | |
8301 | +{ | |
8302 | + return 0; | |
8303 | +} | |
8304 | + | |
8305 | +static inline int pfm_arch_load_context(struct pfm_context *ctx) | |
8306 | +{ | |
8307 | + struct pfm_arch_pmu_info *arch_info; | |
8308 | + int rc = 0; | |
8309 | + | |
8310 | + arch_info = pfm_pmu_info(); | |
8311 | + if (arch_info->load_context) | |
8312 | + rc = arch_info->load_context(ctx); | |
8313 | + | |
8314 | + return rc; | |
8315 | +} | |
8316 | + | |
8317 | +static inline void pfm_arch_unload_context(struct pfm_context *ctx) | |
8318 | +{ | |
8319 | + struct pfm_arch_pmu_info *arch_info; | |
8320 | + | |
8321 | + arch_info = pfm_pmu_info(); | |
8322 | + if (arch_info->unload_context) | |
8323 | + arch_info->unload_context(ctx); | |
8324 | +} | |
8325 | + | |
8326 | +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) | |
8327 | +{ | |
8328 | + struct pfm_arch_pmu_info *arch_info; | |
8329 | + int rc = 0; | |
8330 | + | |
8331 | + arch_info = pfm_pmu_info(); | |
8332 | + if (arch_info->acquire_pmu) { | |
8333 | + rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds); | |
8334 | + if (rc) | |
8335 | + return rc; | |
8336 | + } | |
8337 | + | |
8338 | + return reserve_pmc_hardware(powerpc_irq_handler); | |
8339 | +} | |
8340 | + | |
8341 | +static inline void pfm_arch_pmu_release(void) | |
8342 | +{ | |
8343 | + struct pfm_arch_pmu_info *arch_info; | |
8344 | + | |
8345 | + arch_info = pfm_pmu_info(); | |
8346 | + if (arch_info->release_pmu) | |
8347 | + arch_info->release_pmu(); | |
8348 | + | |
8349 | + release_pmc_hardware(); | |
8350 | +} | |
8351 | + | |
8352 | +static inline void pfm_arch_arm_handle_work(struct task_struct *task) | |
8353 | +{} | |
8354 | + | |
8355 | +static inline void pfm_arch_disarm_handle_work(struct task_struct *task) | |
8356 | +{} | |
8357 | + | |
8358 | +static inline int pfm_arch_get_base_syscall(void) | |
8359 | +{ | |
8360 | + return __NR_pfm_create_context; | |
8361 | +} | |
8362 | + | |
8363 | +struct pfm_arch_context { | |
8364 | + /* Cell: Most recent value of the pm_status | |
8365 | + * register read by the interrupt handler. | |
8366 | + * | |
8367 | + * Interrupt handler sets last_read_updated if it | |
8368 | + * just read and updated last_read_pm_status | |
8369 | + */ | |
8370 | + u32 last_read_pm_status; | |
8371 | + u32 last_read_updated; | |
8372 | + u64 powergs_pmc5, powergs_pmc6; | |
8373 | + u64 delta_tb, delta_tb_start; | |
8374 | + u64 delta_purr, delta_purr_start; | |
8375 | +}; | |
8376 | + | |
8377 | +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) | |
8378 | +/* | |
8379 | + * PowerPC does not need extra alignment requirements for the sampling buffer | |
8380 | + */ | |
8381 | +#define PFM_ARCH_SMPL_ALIGN_SIZE 0 | |
8382 | + | |
8383 | +#endif /* CONFIG_PERFMON */ | |
8384 | + | |
8385 | +#endif /* __KERNEL__ */ | |
8386 | +#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */ | |
8387 | --- a/arch/powerpc/include/asm/reg.h | |
8388 | +++ b/arch/powerpc/include/asm/reg.h | |
8389 | @@ -698,6 +698,7 @@ | |
8390 | #define PV_POWER5 0x003A | |
8391 | #define PV_POWER5p 0x003B | |
8392 | #define PV_970FX 0x003C | |
8393 | +#define PV_POWER6 0x003E | |
8394 | #define PV_630 0x0040 | |
8395 | #define PV_630p 0x0041 | |
8396 | #define PV_970MP 0x0044 | |
8397 | --- a/arch/powerpc/include/asm/systbl.h | |
8398 | +++ b/arch/powerpc/include/asm/systbl.h | |
8399 | @@ -322,3 +322,15 @@ SYSCALL_SPU(epoll_create1) | |
8400 | SYSCALL_SPU(dup3) | |
8401 | SYSCALL_SPU(pipe2) | |
8402 | SYSCALL(inotify_init1) | |
8403 | +SYSCALL(pfm_create_context) | |
8404 | +SYSCALL(pfm_write_pmcs) | |
8405 | +SYSCALL(pfm_write_pmds) | |
8406 | +SYSCALL(pfm_read_pmds) | |
8407 | +SYSCALL(pfm_load_context) | |
8408 | +SYSCALL(pfm_start) | |
8409 | +SYSCALL(pfm_stop) | |
8410 | +SYSCALL(pfm_restart) | |
8411 | +SYSCALL(pfm_create_evtsets) | |
8412 | +SYSCALL(pfm_getinfo_evtsets) | |
8413 | +SYSCALL(pfm_delete_evtsets) | |
8414 | +SYSCALL(pfm_unload_context) | |
8415 | --- a/arch/powerpc/include/asm/thread_info.h | |
8416 | +++ b/arch/powerpc/include/asm/thread_info.h | |
8417 | @@ -130,10 +130,12 @@ static inline struct thread_info *curren | |
8418 | #define _TIF_FREEZE (1<<TIF_FREEZE) | |
8419 | #define _TIF_RUNLATCH (1<<TIF_RUNLATCH) | |
8420 | #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) | |
8421 | +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK) | |
8422 | +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW) | |
8423 | #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP) | |
8424 | ||
8425 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
8426 | - _TIF_NOTIFY_RESUME) | |
8427 | + _TIF_NOTIFY_RESUME | _TIF_PERFMON_WORK) | |
8428 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
8429 | ||
8430 | /* Bits in local_flags */ | |
8431 | --- a/arch/powerpc/include/asm/unistd.h | |
8432 | +++ b/arch/powerpc/include/asm/unistd.h | |
8433 | @@ -341,10 +341,22 @@ | |
8434 | #define __NR_dup3 316 | |
8435 | #define __NR_pipe2 317 | |
8436 | #define __NR_inotify_init1 318 | |
8437 | +#define __NR_pfm_create_context 319 | |
8438 | +#define __NR_pfm_write_pmcs 320 | |
8439 | +#define __NR_pfm_write_pmds 321 | |
8440 | +#define __NR_pfm_read_pmds 322 | |
8441 | +#define __NR_pfm_load_context 323 | |
8442 | +#define __NR_pfm_start 324 | |
8443 | +#define __NR_pfm_stop 325 | |
8444 | +#define __NR_pfm_restart 326 | |
8445 | +#define __NR_pfm_create_evtsets 327 | |
8446 | +#define __NR_pfm_getinfo_evtsets 328 | |
8447 | +#define __NR_pfm_delete_evtsets 329 | |
8448 | +#define __NR_pfm_unload_context 330 | |
8449 | ||
8450 | #ifdef __KERNEL__ | |
8451 | ||
8452 | -#define __NR_syscalls 319 | |
8453 | +#define __NR_syscalls 331 | |
8454 | ||
8455 | #define __NR__exit __NR_exit | |
8456 | #define NR_syscalls __NR_syscalls | |
8457 | --- a/arch/powerpc/kernel/entry_32.S | |
8458 | +++ b/arch/powerpc/kernel/entry_32.S | |
8459 | @@ -39,7 +39,7 @@ | |
8460 | * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. | |
8461 | */ | |
8462 | #if MSR_KERNEL >= 0x10000 | |
8463 | -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l | |
8464 | +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l | |
8465 | #else | |
8466 | #define LOAD_MSR_KERNEL(r, x) li r,(x) | |
8467 | #endif | |
8468 | --- a/arch/powerpc/kernel/entry_64.S | |
8469 | +++ b/arch/powerpc/kernel/entry_64.S | |
8470 | @@ -643,6 +643,10 @@ user_work: | |
8471 | b .ret_from_except_lite | |
8472 | ||
8473 | 1: bl .save_nvgprs | |
8474 | +#ifdef CONFIG_PERFMON | |
8475 | + addi r3,r1,STACK_FRAME_OVERHEAD | |
8476 | + bl .pfm_handle_work | |
8477 | +#endif /* CONFIG_PERFMON */ | |
8478 | addi r3,r1,STACK_FRAME_OVERHEAD | |
8479 | bl .do_signal | |
8480 | b .ret_from_except | |
8481 | --- a/arch/powerpc/kernel/irq.c | |
8482 | +++ b/arch/powerpc/kernel/irq.c | |
8483 | @@ -104,6 +104,24 @@ static inline notrace void set_soft_enab | |
8484 | : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); | |
8485 | } | |
8486 | ||
8487 | +#ifdef CONFIG_PERFMON | |
8488 | +static inline unsigned long get_pmu_except_pending(void) | |
8489 | +{ | |
8490 | + unsigned long pending; | |
8491 | + | |
8492 | + __asm__ __volatile__("lbz %0,%1(13)" | |
8493 | + : "=r" (pending) : "i" (offsetof(struct paca_struct, pmu_except_pending))); | |
8494 | + | |
8495 | + return pending; | |
8496 | +} | |
8497 | + | |
8498 | +static inline void set_pmu_except_pending(unsigned long pending) | |
8499 | +{ | |
8500 | + __asm__ __volatile__("stb %0,%1(13)" | |
8501 | + : : "r" (pending), "i" (offsetof(struct paca_struct, pmu_except_pending))); | |
8502 | +} | |
8503 | +#endif /* CONFIG_PERFMON */ | |
8504 | + | |
8505 | notrace void raw_local_irq_restore(unsigned long en) | |
8506 | { | |
8507 | /* | |
8508 | @@ -162,6 +180,19 @@ notrace void raw_local_irq_restore(unsig | |
8509 | lv1_get_version_info(&tmp); | |
8510 | } | |
8511 | ||
8512 | +#ifdef CONFIG_PERFMON | |
8513 | + /* | |
8514 | + * If a PMU exception occurred while interrupts were soft disabled, | |
8515 | + * force a PMU exception. | |
8516 | + */ | |
8517 | + if (get_pmu_except_pending()) { | |
8518 | + set_pmu_except_pending(0); | |
8519 | + /* Make sure we trigger the edge detection circuitry */ | |
8520 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); | |
8521 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); | |
8522 | + } | |
8523 | +#endif /* CONFIG_PERFMON */ | |
8524 | + | |
8525 | __hard_irq_enable(); | |
8526 | } | |
8527 | EXPORT_SYMBOL(raw_local_irq_restore); | |
8528 | --- a/arch/powerpc/kernel/process.c | |
8529 | +++ b/arch/powerpc/kernel/process.c | |
8530 | @@ -33,6 +33,7 @@ | |
8531 | #include <linux/mqueue.h> | |
8532 | #include <linux/hardirq.h> | |
8533 | #include <linux/utsname.h> | |
8534 | +#include <linux/perfmon_kern.h> | |
8535 | ||
8536 | #include <asm/pgtable.h> | |
8537 | #include <asm/uaccess.h> | |
8538 | @@ -393,9 +394,14 @@ struct task_struct *__switch_to(struct t | |
8539 | new_thread->start_tb = current_tb; | |
8540 | } | |
8541 | #endif | |
8542 | - | |
8543 | local_irq_save(flags); | |
8544 | ||
8545 | + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) | |
8546 | + pfm_ctxsw_out(prev, new); | |
8547 | + | |
8548 | + if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW)) | |
8549 | + pfm_ctxsw_in(prev, new); | |
8550 | + | |
8551 | account_system_vtime(current); | |
8552 | account_process_vtime(current); | |
8553 | calculate_steal_time(); | |
8554 | @@ -544,6 +550,7 @@ void show_regs(struct pt_regs * regs) | |
8555 | void exit_thread(void) | |
8556 | { | |
8557 | discard_lazy_cpu_state(); | |
8558 | + pfm_exit_thread(); | |
8559 | } | |
8560 | ||
8561 | void flush_thread(void) | |
8562 | @@ -669,6 +676,7 @@ int copy_thread(int nr, unsigned long cl | |
8563 | #else | |
8564 | kregs->nip = (unsigned long)ret_from_fork; | |
8565 | #endif | |
8566 | + pfm_copy_thread(p); | |
8567 | ||
8568 | return 0; | |
8569 | } | |
8570 | --- /dev/null | |
8571 | +++ b/arch/powerpc/perfmon/Kconfig | |
8572 | @@ -0,0 +1,67 @@ | |
8573 | +menu "Hardware Performance Monitoring support" | |
8574 | +config PERFMON | |
8575 | + bool "Perfmon2 performance monitoring interface" | |
8576 | + default n | |
8577 | + help | |
8578 | + Enables the perfmon2 interface to access the hardware | |
8579 | + performance counters. See <http://perfmon2.sf.net/> for | |
8580 | + more details. | |
8581 | + | |
8582 | +config PERFMON_DEBUG | |
8583 | + bool "Perfmon debugging" | |
8584 | + default n | |
8585 | + depends on PERFMON | |
8586 | + help | |
8587 | + Enables perfmon debugging support | |
8588 | + | |
8589 | +config PERFMON_DEBUG_FS | |
8590 | + bool "Enable perfmon statistics reporting via debugfs" | |
8591 | + default y | |
8592 | + depends on PERFMON && DEBUG_FS | |
8593 | + help | |
8594 | + Enable collection and reporting of perfmon timing statistics under | |
8595 | + debugfs. This is used for debugging and performance analysis of the | |
8596 | + subsystem. The debugfs filesystem must be mounted. | |
8597 | + | |
8598 | +config PERFMON_POWER4 | |
8599 | + tristate "Support for Power4 hardware performance counters" | |
8600 | + depends on PERFMON && PPC64 | |
8601 | + default n | |
8602 | + help | |
8603 | + Enables support for the Power 4 hardware performance counters | |
8604 | + If unsure, say M. | |
8605 | + | |
8606 | +config PERFMON_POWER5 | |
8607 | + tristate "Support for Power5 hardware performance counters" | |
8608 | + depends on PERFMON && PPC64 | |
8609 | + default n | |
8610 | + help | |
8611 | + Enables support for the Power 5 hardware performance counters | |
8612 | + If unsure, say M. | |
8613 | + | |
8614 | +config PERFMON_POWER6 | |
8615 | + tristate "Support for Power6 hardware performance counters" | |
8616 | + depends on PERFMON && PPC64 | |
8617 | + default n | |
8618 | + help | |
8619 | + Enables support for the Power 6 hardware performance counters | |
8620 | + If unsure, say M. | |
8621 | + | |
8622 | +config PERFMON_PPC32 | |
8623 | + tristate "Support for PPC32 hardware performance counters" | |
8624 | + depends on PERFMON && PPC32 | |
8625 | + default n | |
8626 | + help | |
8627 | + Enables support for the PPC32 hardware performance counters | |
8628 | + If unsure, say M. | |
8629 | + | |
8630 | +config PERFMON_CELL | |
8631 | + tristate "Support for Cell hardware performance counters" | |
8632 | + depends on PERFMON && PPC_CELL | |
8633 | + select PS3_LPM if PPC_PS3 | |
8634 | + default n | |
8635 | + help | |
8636 | + Enables support for the Cell hardware performance counters. | |
8637 | + If unsure, say M. | |
8638 | + | |
8639 | +endmenu | |
8640 | --- /dev/null | |
8641 | +++ b/arch/powerpc/perfmon/Makefile | |
8642 | @@ -0,0 +1,6 @@ | |
8643 | +obj-$(CONFIG_PERFMON) += perfmon.o | |
8644 | +obj-$(CONFIG_PERFMON_POWER4) += perfmon_power4.o | |
8645 | +obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o | |
8646 | +obj-$(CONFIG_PERFMON_POWER6) += perfmon_power6.o | |
8647 | +obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o | |
8648 | +obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o | |
8649 | --- /dev/null | |
8650 | +++ b/arch/powerpc/perfmon/perfmon.c | |
8651 | @@ -0,0 +1,334 @@ | |
8652 | +/* | |
8653 | + * This file implements the powerpc specific | |
8654 | + * support for the perfmon2 interface | |
8655 | + * | |
8656 | + * Copyright (c) 2005 David Gibson, IBM Corporation. | |
8657 | + * | |
8658 | + * based on versions for other architectures: | |
8659 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
8660 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
8661 | + * | |
8662 | + * This program is free software; you can redistribute it and/or | |
8663 | + * modify it under the terms of version 2 of the GNU General Public | |
8664 | + * License as published by the Free Software Foundation. | |
8665 | + * | |
8666 | + * This program is distributed in the hope that it will be useful, | |
8667 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8668 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
8669 | + * General Public License for more details. | |
8670 | + * | |
8671 | + * You should have received a copy of the GNU General Public License | |
8672 | + * along with this program; if not, write to the Free Software | |
8673 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
8674 | + * 02111-1307 USA | |
8675 | + */ | |
8676 | +#include <linux/interrupt.h> | |
8677 | +#include <linux/perfmon_kern.h> | |
8678 | + | |
8679 | +static void pfm_stop_active(struct task_struct *task, | |
8680 | + struct pfm_context *ctx, struct pfm_event_set *set) | |
8681 | +{ | |
8682 | + struct pfm_arch_pmu_info *arch_info; | |
8683 | + | |
8684 | + arch_info = pfm_pmu_info(); | |
8685 | + BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds); | |
8686 | + | |
8687 | + arch_info->disable_counters(ctx, set); | |
8688 | + | |
8689 | + if (set->npend_ovfls) | |
8690 | + return; | |
8691 | + | |
8692 | + arch_info->get_ovfl_pmds(ctx, set); | |
8693 | +} | |
8694 | + | |
8695 | +/* | |
8696 | + * Called from pfm_save_pmds(). Interrupts are masked. Registers are | |
8697 | + * already saved away. | |
8698 | + */ | |
8699 | +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, | |
8700 | + struct pfm_event_set *set) | |
8701 | +{ | |
8702 | + int i, num; | |
8703 | + u64 *used_pmds, *intr_pmds; | |
8704 | + | |
8705 | + num = set->nused_pmds; | |
8706 | + used_pmds = set->used_pmds; | |
8707 | + intr_pmds = ctx->regs.intr_pmds; | |
8708 | + | |
8709 | + for (i = 0; num; i++) | |
8710 | + if (likely(test_bit(i, used_pmds))) { | |
8711 | + if (likely(test_bit(i, intr_pmds))) | |
8712 | + pfm_write_pmd(ctx, i, 0); | |
8713 | + num--; | |
8714 | + } | |
8715 | +} | |
8716 | + | |
8717 | +/* | |
8718 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
8719 | + * Context is locked. Interrupts are masked. Monitoring is active. | |
8720 | + * PMU access is guaranteed. PMC and PMD registers are live in PMU. | |
8721 | + * | |
8722 | + * for per-thread: | |
8723 | + * must stop monitoring for the task | |
8724 | + * Return: | |
8725 | + * non-zero : did not save PMDs (as part of stopping the PMU) | |
8726 | + * 0 : saved PMDs (no need to save them in caller) | |
8727 | + */ | |
8728 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) | |
8729 | +{ | |
8730 | + struct pfm_arch_pmu_info *arch_info; | |
8731 | + | |
8732 | + arch_info = pfm_pmu_info(); | |
8733 | + /* | |
8734 | + * disable lazy restore of the PMC/PMD registers. | |
8735 | + */ | |
8736 | + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; | |
8737 | + | |
8738 | + if (ctx->state == PFM_CTX_MASKED) | |
8739 | + return 1; | |
8740 | + | |
8741 | + pfm_stop_active(task, ctx, ctx->active_set); | |
8742 | + | |
8743 | + if (arch_info->ctxswout_thread) | |
8744 | + arch_info->ctxswout_thread(task, ctx, ctx->active_set); | |
8745 | + | |
8746 | + return pfm_arch_is_active(ctx); | |
8747 | +} | |
8748 | + | |
8749 | +/* | |
8750 | + * Called from pfm_ctxsw | |
8751 | + */ | |
8752 | +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) | |
8753 | +{ | |
8754 | + struct pfm_arch_pmu_info *arch_info; | |
8755 | + | |
8756 | + arch_info = pfm_pmu_info(); | |
8757 | + if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) { | |
8758 | + BUG_ON(!arch_info->enable_counters); | |
8759 | + arch_info->enable_counters(ctx, ctx->active_set); | |
8760 | + } | |
8761 | + | |
8762 | + if (arch_info->ctxswin_thread) | |
8763 | + arch_info->ctxswin_thread(task, ctx, ctx->active_set); | |
8764 | +} | |
8765 | + | |
8766 | +/* | |
8767 | + * Called from pfm_stop() and idle notifier | |
8768 | + * | |
8769 | + * Interrupts are masked. Context is locked. Set is the active set. | |
8770 | + * | |
8771 | + * For per-thread: | |
8772 | + * task is not necessarily current. If not current task, then | |
8773 | + * task is guaranteed stopped and off any cpu. Access to PMU | |
8774 | + * is not guaranteed. Interrupts are masked. Context is locked. | |
8775 | + * Set is the active set. | |
8776 | + * | |
8777 | + * For system-wide: | |
8778 | + * task is current | |
8779 | + * | |
8780 | + * must disable active monitoring. ctx cannot be NULL | |
8781 | + */ | |
8782 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) | |
8783 | +{ | |
8784 | + /* | |
8785 | + * no need to go through stop_save() | |
8786 | + * if we are already stopped | |
8787 | + */ | |
8788 | + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) | |
8789 | + return; | |
8790 | + | |
8791 | + /* | |
8792 | + * stop live registers and collect pending overflow | |
8793 | + */ | |
8794 | + if (task == current) | |
8795 | + pfm_stop_active(task, ctx, ctx->active_set); | |
8796 | +} | |
8797 | + | |
8798 | +/* | |
8799 | + * Enable active monitoring. Called from pfm_start() and | |
8800 | + * pfm_arch_unmask_monitoring(). | |
8801 | + * | |
8802 | + * Interrupts are masked. Context is locked. Set is the active set. | |
8803 | + * | |
8804 | + * For per-thread: | |
8805 | + * Task is not necessarily current. If not current task, then task | |
8806 | + * is guaranteed stopped and off any cpu. No access to PMU if task | |
8807 | + * is not current. | |
8808 | + * | |
8809 | + * For system-wide: | |
8810 | + * Task is always current | |
8811 | + */ | |
8812 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) | |
8813 | +{ | |
8814 | + struct pfm_arch_pmu_info *arch_info; | |
8815 | + | |
8816 | + arch_info = pfm_pmu_info(); | |
8817 | + if (task != current) | |
8818 | + return; | |
8819 | + | |
8820 | + BUG_ON(!arch_info->enable_counters); | |
8821 | + | |
8822 | + arch_info->enable_counters(ctx, ctx->active_set); | |
8823 | +} | |
8824 | + | |
8825 | +/* | |
8826 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
8827 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
8828 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
8829 | + * Access to the PMU is guaranteed. | |
8830 | + * | |
8831 | + * function must restore all PMD registers from set. | |
8832 | + */ | |
8833 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
8834 | +{ | |
8835 | + struct pfm_arch_pmu_info *arch_info; | |
8836 | + u64 *used_pmds; | |
8837 | + u16 i, num; | |
8838 | + | |
8839 | + arch_info = pfm_pmu_info(); | |
8840 | + | |
8841 | + /* The model-specific module can override the default | |
8842 | + * restore-PMD method. | |
8843 | + */ | |
8844 | + if (arch_info->restore_pmds) | |
8845 | + return arch_info->restore_pmds(ctx, set); | |
8846 | + | |
8847 | + num = set->nused_pmds; | |
8848 | + used_pmds = set->used_pmds; | |
8849 | + | |
8850 | + for (i = 0; num; i++) { | |
8851 | + if (likely(test_bit(i, used_pmds))) { | |
8852 | + pfm_write_pmd(ctx, i, set->pmds[i].value); | |
8853 | + num--; | |
8854 | + } | |
8855 | + } | |
8856 | +} | |
8857 | + | |
8858 | +/* | |
8859 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
8860 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
8861 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
8862 | + * Access to the PMU is guaranteed. | |
8863 | + * | |
8864 | + * function must restore all PMC registers from set, if needed. | |
8865 | + */ | |
8866 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
8867 | +{ | |
8868 | + struct pfm_arch_pmu_info *arch_info; | |
8869 | + u64 *impl_pmcs; | |
8870 | + unsigned int i, max_pmc, reg; | |
8871 | + | |
8872 | + arch_info = pfm_pmu_info(); | |
8873 | + /* The model-specific module can override the default | |
8874 | + * restore-PMC method. | |
8875 | + */ | |
8876 | + if (arch_info->restore_pmcs) | |
8877 | + return arch_info->restore_pmcs(ctx, set); | |
8878 | + | |
8879 | + /* The "common" powerpc model's enable the counters simply by writing | |
8880 | + * all the control registers. Therefore, if we're masked or stopped we | |
8881 | + * don't need to bother restoring the PMCs now. | |
8882 | + */ | |
8883 | + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) | |
8884 | + return; | |
8885 | + | |
8886 | + max_pmc = ctx->regs.max_pmc; | |
8887 | + impl_pmcs = ctx->regs.pmcs; | |
8888 | + | |
8889 | + /* | |
8890 | + * Restore all pmcs in reverse order to ensure the counters aren't | |
8891 | + * enabled before their event selectors are set correctly. | |
8892 | + */ | |
8893 | + reg = max_pmc - 1; | |
8894 | + for (i = 0; i < max_pmc; i++) { | |
8895 | + if (test_bit(reg, impl_pmcs)) | |
8896 | + pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]); | |
8897 | + reg--; | |
8898 | + } | |
8899 | +} | |
8900 | + | |
8901 | +char *pfm_arch_get_pmu_module_name(void) | |
8902 | +{ | |
8903 | + unsigned int pvr = mfspr(SPRN_PVR); | |
8904 | + | |
8905 | + switch (PVR_VER(pvr)) { | |
8906 | + case 0x0004: /* 604 */ | |
8907 | + case 0x0009: /* 604e; */ | |
8908 | + case 0x000A: /* 604ev */ | |
8909 | + case 0x0008: /* 750/740 */ | |
8910 | + case 0x7000: /* 750FX */ | |
8911 | + case 0x7001: | |
8912 | + case 0x7002: /* 750GX */ | |
8913 | + case 0x000C: /* 7400 */ | |
8914 | + case 0x800C: /* 7410 */ | |
8915 | + case 0x8000: /* 7451/7441 */ | |
8916 | + case 0x8001: /* 7455/7445 */ | |
8917 | + case 0x8002: /* 7457/7447 */ | |
8918 | + case 0x8003: /* 7447A */ | |
8919 | + case 0x8004: /* 7448 */ | |
8920 | + return("perfmon_ppc32"); | |
8921 | + case PV_POWER4: | |
8922 | + case PV_POWER4p: | |
8923 | + return "perfmon_power4"; | |
8924 | + case PV_POWER5: | |
8925 | + return "perfmon_power5"; | |
8926 | + case PV_POWER5p: | |
8927 | + if (PVR_REV(pvr) < 0x300) | |
8928 | + /* PMU behaves like POWER5 */ | |
8929 | + return "perfmon_power5"; | |
8930 | + else | |
8931 | + /* PMU behaves like POWER6 */ | |
8932 | + return "perfmon_power6"; | |
8933 | + case PV_POWER6: | |
8934 | + return "perfmon_power6"; | |
8935 | + case PV_970: | |
8936 | + case PV_970FX: | |
8937 | + case PV_970MP: | |
8938 | + return "perfmon_ppc970"; | |
8939 | + case PV_BE: | |
8940 | + return "perfmon_cell"; | |
8941 | + } | |
8942 | + return NULL; | |
8943 | +} | |
8944 | + | |
8945 | +void pfm_arch_init_percpu(void) | |
8946 | +{ | |
8947 | +#ifdef CONFIG_PPC64 | |
8948 | + extern void ppc64_enable_pmcs(void); | |
8949 | + ppc64_enable_pmcs(); | |
8950 | +#endif | |
8951 | +} | |
8952 | + | |
8953 | +/** | |
8954 | + * powerpc_irq_handler | |
8955 | + * | |
8956 | + * Get the perfmon context that belongs to the current CPU, and call the | |
8957 | + * model-specific interrupt handler. | |
8958 | + **/ | |
8959 | +void powerpc_irq_handler(struct pt_regs *regs) | |
8960 | +{ | |
8961 | + struct pfm_arch_pmu_info *arch_info; | |
8962 | + struct pfm_context *ctx; | |
8963 | + | |
8964 | + if (! regs->softe) { | |
8965 | + /* | |
8966 | + * We got a PMU interrupt while interrupts were soft | |
8967 | + * disabled. Disable hardware interrupts by clearing | |
8968 | + * MSR_EE and also clear PMAO because we will need to set | |
8969 | + * that again later when interrupts are re-enabled and | |
8970 | + * raw_local_irq_restore() sees that the pmu_except_pending | |
8971 | + * flag is set. | |
8972 | + */ | |
8973 | + regs->msr &= ~MSR_EE; | |
8974 | + get_paca()->pmu_except_pending = 1; | |
8975 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); | |
8976 | + return; | |
8977 | + } | |
8978 | + | |
8979 | + arch_info = pfm_pmu_info(); | |
8980 | + if (arch_info->irq_handler) { | |
8981 | + ctx = __get_cpu_var(pmu_ctx); | |
8982 | + if (likely(ctx)) | |
8983 | + arch_info->irq_handler(regs, ctx); | |
8984 | + } | |
8985 | +} | |
8986 | --- /dev/null | |
8987 | +++ b/arch/powerpc/perfmon/perfmon_cell.c | |
8988 | @@ -0,0 +1,1449 @@ | |
8989 | +/* | |
8990 | + * This file contains the Cell PMU register description tables | |
8991 | + * and pmc checker used by perfmon.c. | |
8992 | + * | |
8993 | + * Copyright IBM Corporation 2007 | |
8994 | + * (C) Copyright 2007 TOSHIBA CORPORATION | |
8995 | + * | |
8996 | + * Based on other Perfmon2 PMU modules. | |
8997 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
8998 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
8999 | + * | |
9000 | + * This program is free software; you can redistribute it and/or | |
9001 | + * modify it under the terms of version 2 of the GNU General Public | |
9002 | + * License as published by the Free Software Foundation. | |
9003 | + * | |
9004 | + * This program is distributed in the hope that it will be useful, | |
9005 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9006 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
9007 | + * General Public License for more details. | |
9008 | + * | |
9009 | + * You should have received a copy of the GNU General Public License | |
9010 | + * along with this program; if not, write to the Free Software | |
9011 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
9012 | + * 02111-1307 USA | |
9013 | + */ | |
9014 | + | |
9015 | +#include <linux/module.h> | |
9016 | +#include <linux/perfmon_kern.h> | |
9017 | +#include <linux/io.h> | |
9018 | +#include <asm/cell-pmu.h> | |
9019 | +#include <asm/cell-regs.h> | |
9020 | +#include <asm/machdep.h> | |
9021 | +#include <asm/rtas.h> | |
9022 | +#include <asm/ps3.h> | |
9023 | +#include <asm/spu.h> | |
9024 | + | |
9025 | +MODULE_AUTHOR("Kevin Corry <kevcorry@us.ibm.com>, " | |
9026 | + "Carl Love <carll@us.ibm.com>"); | |
9027 | +MODULE_DESCRIPTION("Cell PMU description table"); | |
9028 | +MODULE_LICENSE("GPL"); | |
9029 | + | |
9030 | +struct pfm_cell_platform_pmu_info { | |
9031 | + u32 (*read_ctr)(u32 cpu, u32 ctr); | |
9032 | + void (*write_ctr)(u32 cpu, u32 ctr, u32 val); | |
9033 | + void (*write_pm07_control)(u32 cpu, u32 ctr, u32 val); | |
9034 | + void (*write_pm)(u32 cpu, enum pm_reg_name reg, u32 val); | |
9035 | + void (*enable_pm)(u32 cpu); | |
9036 | + void (*disable_pm)(u32 cpu); | |
9037 | + void (*enable_pm_interrupts)(u32 cpu, u32 thread, u32 mask); | |
9038 | + u32 (*get_and_clear_pm_interrupts)(u32 cpu); | |
9039 | + u32 (*get_hw_thread_id)(int cpu); | |
9040 | + struct cbe_ppe_priv_regs __iomem *(*get_cpu_ppe_priv_regs)(int cpu); | |
9041 | + struct cbe_pmd_regs __iomem *(*get_cpu_pmd_regs)(int cpu); | |
9042 | + struct cbe_mic_tm_regs __iomem *(*get_cpu_mic_tm_regs)(int cpu); | |
9043 | + int (*rtas_token)(const char *service); | |
9044 | + int (*rtas_call)(int token, int param1, int param2, int *param3, ...); | |
9045 | +}; | |
9046 | + | |
9047 | +/* | |
9048 | + * Mapping from Perfmon logical control registers to Cell hardware registers. | |
9049 | + */ | |
9050 | +static struct pfm_regmap_desc pfm_cell_pmc_desc[] = { | |
9051 | + /* Per-counter control registers. */ | |
9052 | + PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0), | |
9053 | + PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0), | |
9054 | + PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0), | |
9055 | + PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0), | |
9056 | + PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0), | |
9057 | + PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0), | |
9058 | + PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0), | |
9059 | + PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0), | |
9060 | + | |
9061 | + /* Per-counter RTAS arguments. Each of these registers has three fields. | |
9062 | + * bits 63-48: debug-bus word | |
9063 | + * bits 47-32: sub-unit | |
9064 | + * bits 31-0 : full signal number | |
9065 | + * (MSB = 63, LSB = 0) | |
9066 | + */ | |
9067 | + PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0), | |
9068 | + PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0), | |
9069 | + PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0), | |
9070 | + PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0), | |
9071 | + PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0), | |
9072 | + PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0), | |
9073 | + PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0), | |
9074 | + PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0), | |
9075 | + | |
9076 | + /* Global control registers. Same order as enum pm_reg_name. */ | |
9077 | + PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0), | |
9078 | + PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0), | |
9079 | + PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0), | |
9080 | + PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0), | |
9081 | + PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0), | |
9082 | + /* set the interrupt overflow bit for the four 32 bit counters | |
9083 | + * that is currently supported. Will need to fix when 32 and 16 | |
9084 | + * bit counters are supported. | |
9085 | + */ | |
9086 | + PMC_D(PFM_REG_I, "pm_control", 0xF0000000, 0xF0000000, 0, 0), | |
9087 | + PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */ | |
9088 | + PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0), | |
9089 | +}; | |
9090 | +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc) | |
9091 | + | |
9092 | +#define CELL_PMC_GROUP_CONTROL 16 | |
9093 | +#define CELL_PMC_PM_STATUS 20 | |
9094 | +#define CELL_PMC_PM_CONTROL 21 | |
9095 | +#define CELL_PMC_PM_CONTROL_CNTR_MASK 0x01E00000UL | |
9096 | +#define CELL_PMC_PM_CONTROL_CNTR_16 0x01E00000UL | |
9097 | + | |
9098 | +/* | |
9099 | + * Mapping from Perfmon logical data counters to Cell hardware counters. | |
9100 | + */ | |
9101 | +static struct pfm_regmap_desc pfm_cell_pmd_desc[] = { | |
9102 | + PMD_D(PFM_REG_C, "pm0", 0), | |
9103 | + PMD_D(PFM_REG_C, "pm1", 0), | |
9104 | + PMD_D(PFM_REG_C, "pm2", 0), | |
9105 | + PMD_D(PFM_REG_C, "pm3", 0), | |
9106 | + PMD_D(PFM_REG_C, "pm4", 0), | |
9107 | + PMD_D(PFM_REG_C, "pm5", 0), | |
9108 | + PMD_D(PFM_REG_C, "pm6", 0), | |
9109 | + PMD_D(PFM_REG_C, "pm7", 0), | |
9110 | +}; | |
9111 | +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc) | |
9112 | + | |
9113 | +#define PFM_EVENT_PMC_BUS_WORD(x) (((x) >> 48) & 0x00ff) | |
9114 | +#define PFM_EVENT_PMC_FULL_SIGNAL_NUMBER(x) ((x) & 0xffffffff) | |
9115 | +#define PFM_EVENT_PMC_SIGNAL_GROUP(x) (((x) & 0xffffffff) / 100) | |
9116 | +#define PFM_PM_CTR_INPUT_MUX_BIT(pm07_control) (((pm07_control) >> 26) & 0x1f) | |
9117 | +#define PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(pm07_control) ((pm07_control) >> 31) | |
9118 | +#define PFM_GROUP_CONTROL_GROUP0_WORD(grp_ctrl) ((grp_ctrl) >> 30) | |
9119 | +#define PFM_GROUP_CONTROL_GROUP1_WORD(grp_ctrl) (((grp_ctrl) >> 28) & 0x3) | |
9120 | +#define PFM_NUM_OF_GROUPS 2 | |
9121 | +#define PFM_PPU_IU1_THREAD1_BASE_BIT 19 | |
9122 | +#define PFM_PPU_XU_THREAD1_BASE_BIT 16 | |
9123 | +#define PFM_COUNTER_CTRL_PMC_PPU_TH0 0x100000000ULL | |
9124 | +#define PFM_COUNTER_CTRL_PMC_PPU_TH1 0x200000000ULL | |
9125 | + | |
9126 | +/* | |
9127 | + * Debug-bus signal handling. | |
9128 | + * | |
9129 | + * Some Cell systems have firmware that can handle the debug-bus signal | |
9130 | + * routing. For systems without this firmware, we have a minimal in-kernel | |
9131 | + * implementation as well. | |
9132 | + */ | |
9133 | + | |
9134 | +/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */ | |
9135 | +#ifdef CONFIG_SCHED_SMT | |
9136 | +#define RTAS_CPU(cpu) ((cpu) / 2) | |
9137 | +#else | |
9138 | +#define RTAS_CPU(cpu) (cpu) | |
9139 | +#endif | |
9140 | +#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff) | |
9141 | +#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff) | |
9142 | +#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff) | |
9143 | +#define RTAS_SIGNAL_GROUP(x) (RTAS_SIGNAL_NUMBER(x) / 100) | |
9144 | + | |
9145 | +#define subfunc_RESET 1 | |
9146 | +#define subfunc_ACTIVATE 2 | |
9147 | + | |
9148 | +#define passthru_ENABLE 1 | |
9149 | +#define passthru_DISABLE 2 | |
9150 | + | |
9151 | +/** | |
9152 | + * struct cell_rtas_arg | |
9153 | + * | |
9154 | + * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the | |
9155 | + * firmware only sees the physical CPUs. So this value should be the | |
9156 | + * SMT ID (from smp_processor_id() or get_cpu()) divided by 2. | |
9157 | + * @sub_unit: Hardware subunit this applies to (if applicable). | |
9158 | + * @signal_group: Signal group to enable/disable on the trace bus. | |
9159 | + * @bus_word: For signal groups that propagate via the trace bus, this trace | |
9160 | + * bus word will be used. This is a mask of (1 << TraceBusWord). | |
9161 | + * For other signal groups, this specifies the trigger or event bus. | |
9162 | + * @bit: Trigger/Event bit, if applicable for the signal group. | |
9163 | + * | |
9164 | + * An array of these structures are passed to rtas_call() to set up the | |
9165 | + * signals on the debug bus. | |
9166 | + **/ | |
9167 | +struct cell_rtas_arg { | |
9168 | + u16 cpu; | |
9169 | + u16 sub_unit; | |
9170 | + s16 signal_group; | |
9171 | + u8 bus_word; | |
9172 | + u8 bit; | |
9173 | +}; | |
9174 | + | |
9175 | +/** | |
9176 | + * rtas_reset_signals | |
9177 | + * | |
9178 | + * Use the firmware RTAS call to disable signal pass-thru and to reset the | |
9179 | + * debug-bus signals. | |
9180 | + **/ | |
9181 | +static int rtas_reset_signals(u32 cpu) | |
9182 | +{ | |
9183 | + struct cell_rtas_arg signal; | |
9184 | + u64 real_addr = virt_to_phys(&signal); | |
9185 | + int rc; | |
9186 | + struct pfm_cell_platform_pmu_info *info = | |
9187 | + ((struct pfm_arch_pmu_info *) | |
9188 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9189 | + | |
9190 | + memset(&signal, 0, sizeof(signal)); | |
9191 | + signal.cpu = RTAS_CPU(cpu); | |
9192 | + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"), | |
9193 | + 5, 1, NULL, | |
9194 | + subfunc_RESET, | |
9195 | + passthru_DISABLE, | |
9196 | + real_addr >> 32, | |
9197 | + real_addr & 0xffffffff, | |
9198 | + sizeof(signal)); | |
9199 | + | |
9200 | + return rc; | |
9201 | +} | |
9202 | + | |
9203 | +/** | |
9204 | + * rtas_activate_signals | |
9205 | + * | |
9206 | + * Use the firmware RTAS call to enable signal pass-thru and to activate the | |
9207 | + * desired signal groups on the debug-bus. | |
9208 | + **/ | |
9209 | +static int rtas_activate_signals(struct cell_rtas_arg *signals, | |
9210 | + int num_signals) | |
9211 | +{ | |
9212 | + u64 real_addr = virt_to_phys(signals); | |
9213 | + int rc; | |
9214 | + struct pfm_cell_platform_pmu_info *info = | |
9215 | + ((struct pfm_arch_pmu_info *) | |
9216 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9217 | + | |
9218 | + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"), | |
9219 | + 5, 1, NULL, | |
9220 | + subfunc_ACTIVATE, | |
9221 | + passthru_ENABLE, | |
9222 | + real_addr >> 32, | |
9223 | + real_addr & 0xffffffff, | |
9224 | + num_signals * sizeof(*signals)); | |
9225 | + | |
9226 | + return rc; | |
9227 | +} | |
9228 | + | |
9229 | +#define HID1_RESET_MASK (~0x00000001ffffffffUL) | |
9230 | +#define PPU_IU1_WORD0_HID1_EN_MASK (~0x00000001f0c0802cUL) | |
9231 | +#define PPU_IU1_WORD0_HID1_EN_WORD ( 0x00000001f0400000UL) | |
9232 | +#define PPU_IU1_WORD1_HID1_EN_MASK (~0x000000010fc08023UL) | |
9233 | +#define PPU_IU1_WORD1_HID1_EN_WORD ( 0x000000010f400001UL) | |
9234 | +#define PPU_XU_WORD0_HID1_EN_MASK (~0x00000001f038402cUL) | |
9235 | +#define PPU_XU_WORD0_HID1_EN_WORD ( 0x00000001f0080008UL) | |
9236 | +#define PPU_XU_WORD1_HID1_EN_MASK (~0x000000010f074023UL) | |
9237 | +#define PPU_XU_WORD1_HID1_EN_WORD ( 0x000000010f030002UL) | |
9238 | + | |
9239 | +/* The bus_word field in the cell_rtas_arg structure is a bit-mask | |
9240 | + * indicating which debug-bus word(s) to use. | |
9241 | + */ | |
9242 | +enum { | |
9243 | + BUS_WORD_0 = 1, | |
9244 | + BUS_WORD_1 = 2, | |
9245 | + BUS_WORD_2 = 4, | |
9246 | + BUS_WORD_3 = 8, | |
9247 | +}; | |
9248 | + | |
9249 | +/* Definitions of the signal-groups that the built-in signal-activation | |
9250 | + * code can handle. | |
9251 | + */ | |
9252 | +enum { | |
9253 | + SIG_GROUP_NONE = 0, | |
9254 | + | |
9255 | + /* 2.x PowerPC Processor Unit (PPU) Signal Groups */ | |
9256 | + SIG_GROUP_PPU_BASE = 20, | |
9257 | + SIG_GROUP_PPU_IU1 = 21, | |
9258 | + SIG_GROUP_PPU_XU = 22, | |
9259 | + | |
9260 | + /* 3.x PowerPC Storage Subsystem (PPSS) Signal Groups */ | |
9261 | + SIG_GROUP_PPSS_BASE = 30, | |
9262 | + | |
9263 | + /* 4.x Synergistic Processor Unit (SPU) Signal Groups */ | |
9264 | + SIG_GROUP_SPU_BASE = 40, | |
9265 | + | |
9266 | + /* 5.x Memory Flow Controller (MFC) Signal Groups */ | |
9267 | + SIG_GROUP_MFC_BASE = 50, | |
9268 | + | |
9269 | + /* 6.x Element )nterconnect Bus (EIB) Signal Groups */ | |
9270 | + SIG_GROUP_EIB_BASE = 60, | |
9271 | + | |
9272 | + /* 7.x Memory Interface Controller (MIC) Signal Groups */ | |
9273 | + SIG_GROUP_MIC_BASE = 70, | |
9274 | + | |
9275 | + /* 8.x Cell Broadband Engine Interface (BEI) Signal Groups */ | |
9276 | + SIG_GROUP_BEI_BASE = 80, | |
9277 | +}; | |
9278 | + | |
9279 | +/** | |
9280 | + * rmw_spr | |
9281 | + * | |
9282 | + * Read-modify-write for a special-purpose-register. | |
9283 | + **/ | |
9284 | +#define rmw_spr(spr_id, a_mask, o_mask) \ | |
9285 | + do { \ | |
9286 | + u64 value = mfspr(spr_id); \ | |
9287 | + value &= (u64)(a_mask); \ | |
9288 | + value |= (u64)(o_mask); \ | |
9289 | + mtspr((spr_id), value); \ | |
9290 | + } while (0) | |
9291 | + | |
9292 | +/** | |
9293 | + * rmw_mmio_reg64 | |
9294 | + * | |
9295 | + * Read-modify-write for a 64-bit MMIO register. | |
9296 | + **/ | |
9297 | +#define rmw_mmio_reg64(mem, a_mask, o_mask) \ | |
9298 | + do { \ | |
9299 | + u64 value = in_be64(&(mem)); \ | |
9300 | + value &= (u64)(a_mask); \ | |
9301 | + value |= (u64)(o_mask); \ | |
9302 | + out_be64(&(mem), value); \ | |
9303 | + } while (0) | |
9304 | + | |
9305 | +/** | |
9306 | + * rmwb_mmio_reg64 | |
9307 | + * | |
9308 | + * Set or unset a specified bit within a 64-bit MMIO register. | |
9309 | + **/ | |
9310 | +#define rmwb_mmio_reg64(mem, bit_num, set_bit) \ | |
9311 | + rmw_mmio_reg64((mem), ~(1UL << (63 - (bit_num))), \ | |
9312 | + ((set_bit) << (63 - (bit_num)))) | |
9313 | + | |
9314 | +/** | |
9315 | + * passthru | |
9316 | + * | |
9317 | + * Enable or disable passthru mode in all the Cell signal islands. | |
9318 | + **/ | |
9319 | +static int passthru(u32 cpu, u64 enable) | |
9320 | +{ | |
9321 | + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs; | |
9322 | + struct cbe_pmd_regs __iomem *pmd_regs; | |
9323 | + struct cbe_mic_tm_regs __iomem *mic_tm_regs; | |
9324 | + struct pfm_cell_platform_pmu_info *info = | |
9325 | + ((struct pfm_arch_pmu_info *) | |
9326 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9327 | + | |
9328 | + ppe_priv_regs = info->get_cpu_ppe_priv_regs(cpu); | |
9329 | + pmd_regs = info->get_cpu_pmd_regs(cpu); | |
9330 | + mic_tm_regs = info->get_cpu_mic_tm_regs(cpu); | |
9331 | + | |
9332 | + if (!ppe_priv_regs || !pmd_regs || !mic_tm_regs) { | |
9333 | + PFM_ERR("Error getting Cell PPE, PMD, and MIC " | |
9334 | + "register maps: 0x%p, 0x%p, 0x%p", | |
9335 | + ppe_priv_regs, pmd_regs, mic_tm_regs); | |
9336 | + return -EINVAL; | |
9337 | + } | |
9338 | + | |
9339 | + rmwb_mmio_reg64(ppe_priv_regs->L2_debug1, 61, enable); | |
9340 | + rmwb_mmio_reg64(ppe_priv_regs->ciu_dr1, 5, enable); | |
9341 | + rmwb_mmio_reg64(pmd_regs->on_ramp_trace, 39, enable); | |
9342 | + rmwb_mmio_reg64(mic_tm_regs->MBL_debug, 20, enable); | |
9343 | + | |
9344 | + return 0; | |
9345 | +} | |
9346 | + | |
9347 | +#define passthru_enable(cpu) passthru(cpu, 1) | |
9348 | +#define passthru_disable(cpu) passthru(cpu, 0) | |
9349 | + | |
9350 | +static inline void reset_signal_registers(u32 cpu) | |
9351 | +{ | |
9352 | + rmw_spr(SPRN_HID1, HID1_RESET_MASK, 0); | |
9353 | +} | |
9354 | + | |
9355 | +/** | |
9356 | + * celleb_reset_signals | |
9357 | + * | |
9358 | + * Non-rtas version of resetting the debug-bus signals. | |
9359 | + **/ | |
9360 | +static int celleb_reset_signals(u32 cpu) | |
9361 | +{ | |
9362 | + int rc; | |
9363 | + rc = passthru_disable(cpu); | |
9364 | + if (!rc) | |
9365 | + reset_signal_registers(cpu); | |
9366 | + return rc; | |
9367 | +} | |
9368 | + | |
9369 | +/** | |
9370 | + * ppu_selection | |
9371 | + * | |
9372 | + * Write the HID1 register to connect the specified PPU signal-group to the | |
9373 | + * debug-bus. | |
9374 | + **/ | |
9375 | +static int ppu_selection(struct cell_rtas_arg *signal) | |
9376 | +{ | |
9377 | + u64 hid1_enable_word = 0; | |
9378 | + u64 hid1_enable_mask = 0; | |
9379 | + | |
9380 | + switch (signal->signal_group) { | |
9381 | + | |
9382 | + case SIG_GROUP_PPU_IU1: /* 2.1 PPU Instruction Unit - Group 1 */ | |
9383 | + switch (signal->bus_word) { | |
9384 | + case BUS_WORD_0: | |
9385 | + hid1_enable_mask = PPU_IU1_WORD0_HID1_EN_MASK; | |
9386 | + hid1_enable_word = PPU_IU1_WORD0_HID1_EN_WORD; | |
9387 | + break; | |
9388 | + case BUS_WORD_1: | |
9389 | + hid1_enable_mask = PPU_IU1_WORD1_HID1_EN_MASK; | |
9390 | + hid1_enable_word = PPU_IU1_WORD1_HID1_EN_WORD; | |
9391 | + break; | |
9392 | + default: | |
9393 | + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.", | |
9394 | + signal->bus_word, signal->signal_group); | |
9395 | + return -EINVAL; | |
9396 | + } | |
9397 | + break; | |
9398 | + | |
9399 | + case SIG_GROUP_PPU_XU: /* 2.2 PPU Execution Unit */ | |
9400 | + switch (signal->bus_word) { | |
9401 | + case BUS_WORD_0: | |
9402 | + hid1_enable_mask = PPU_XU_WORD0_HID1_EN_MASK; | |
9403 | + hid1_enable_word = PPU_XU_WORD0_HID1_EN_WORD; | |
9404 | + break; | |
9405 | + case BUS_WORD_1: | |
9406 | + hid1_enable_mask = PPU_XU_WORD1_HID1_EN_MASK; | |
9407 | + hid1_enable_word = PPU_XU_WORD1_HID1_EN_WORD; | |
9408 | + break; | |
9409 | + default: | |
9410 | + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.", | |
9411 | + signal->bus_word, signal->signal_group); | |
9412 | + return -EINVAL; | |
9413 | + } | |
9414 | + break; | |
9415 | + | |
9416 | + default: | |
9417 | + PFM_ERR("Signal-group %d not implemented.", | |
9418 | + signal->signal_group); | |
9419 | + return -EINVAL; | |
9420 | + } | |
9421 | + | |
9422 | + rmw_spr(SPRN_HID1, hid1_enable_mask, hid1_enable_word); | |
9423 | + | |
9424 | + return 0; | |
9425 | +} | |
9426 | + | |
9427 | +/** | |
9428 | + * celleb_activate_signals | |
9429 | + * | |
9430 | + * Non-rtas version of activating the debug-bus signals. | |
9431 | + **/ | |
9432 | +static int celleb_activate_signals(struct cell_rtas_arg *signals, | |
9433 | + int num_signals) | |
9434 | +{ | |
9435 | + int i, rc = -EINVAL; | |
9436 | + | |
9437 | + for (i = 0; i < num_signals; i++) { | |
9438 | + switch (signals[i].signal_group) { | |
9439 | + | |
9440 | + /* 2.x PowerPC Processor Unit (PPU) Signal Selection */ | |
9441 | + case SIG_GROUP_PPU_IU1: | |
9442 | + case SIG_GROUP_PPU_XU: | |
9443 | + rc = ppu_selection(signals + i); | |
9444 | + if (rc) | |
9445 | + return rc; | |
9446 | + break; | |
9447 | + | |
9448 | + default: | |
9449 | + PFM_ERR("Signal-group %d not implemented.", | |
9450 | + signals[i].signal_group); | |
9451 | + return -EINVAL; | |
9452 | + } | |
9453 | + } | |
9454 | + | |
9455 | + if (0 < i) | |
9456 | + rc = passthru_enable(signals[0].cpu); | |
9457 | + | |
9458 | + return rc; | |
9459 | +} | |
9460 | + | |
9461 | +/** | |
9462 | + * ps3_reset_signals | |
9463 | + * | |
9464 | + * ps3 version of resetting the debug-bus signals. | |
9465 | + **/ | |
9466 | +static int ps3_reset_signals(u32 cpu) | |
9467 | +{ | |
9468 | +#ifdef CONFIG_PPC_PS3 | |
9469 | + return ps3_set_signal(0, 0, 0, 0); | |
9470 | +#else | |
9471 | + return 0; | |
9472 | +#endif | |
9473 | +} | |
9474 | + | |
9475 | +/** | |
9476 | + * ps3_activate_signals | |
9477 | + * | |
9478 | + * ps3 version of activating the debug-bus signals. | |
9479 | + **/ | |
9480 | +static int ps3_activate_signals(struct cell_rtas_arg *signals, | |
9481 | + int num_signals) | |
9482 | +{ | |
9483 | +#ifdef CONFIG_PPC_PS3 | |
9484 | + int i; | |
9485 | + | |
9486 | + for (i = 0; i < num_signals; i++) | |
9487 | + ps3_set_signal(signals[i].signal_group, signals[i].bit, | |
9488 | + signals[i].sub_unit, signals[i].bus_word); | |
9489 | +#endif | |
9490 | + return 0; | |
9491 | +} | |
9492 | + | |
9493 | + | |
9494 | +/** | |
9495 | + * reset_signals | |
9496 | + * | |
9497 | + * Call to the firmware (if available) to reset the debug-bus signals. | |
9498 | + * Otherwise call the built-in version. | |
9499 | + **/ | |
9500 | +int reset_signals(u32 cpu) | |
9501 | +{ | |
9502 | + int rc; | |
9503 | + | |
9504 | + if (machine_is(celleb)) | |
9505 | + rc = celleb_reset_signals(cpu); | |
9506 | + else if (machine_is(ps3)) | |
9507 | + rc = ps3_reset_signals(cpu); | |
9508 | + else | |
9509 | + rc = rtas_reset_signals(cpu); | |
9510 | + | |
9511 | + return rc; | |
9512 | +} | |
9513 | + | |
9514 | +/** | |
9515 | + * activate_signals | |
9516 | + * | |
9517 | + * Call to the firmware (if available) to activate the debug-bus signals. | |
9518 | + * Otherwise call the built-in version. | |
9519 | + **/ | |
9520 | +int activate_signals(struct cell_rtas_arg *signals, int num_signals) | |
9521 | +{ | |
9522 | + int rc; | |
9523 | + | |
9524 | + if (machine_is(celleb)) | |
9525 | + rc = celleb_activate_signals(signals, num_signals); | |
9526 | + else if (machine_is(ps3)) | |
9527 | + rc = ps3_activate_signals(signals, num_signals); | |
9528 | + else | |
9529 | + rc = rtas_activate_signals(signals, num_signals); | |
9530 | + | |
9531 | + return rc; | |
9532 | +} | |
9533 | + | |
9534 | +/** | |
9535 | + * pfm_cell_pmc_check | |
9536 | + * | |
9537 | + * Verify that we are going to write a valid value to the specified PMC. | |
9538 | + **/ | |
9539 | +int pfm_cell_pmc_check(struct pfm_context *ctx, | |
9540 | + struct pfm_event_set *set, | |
9541 | + struct pfarg_pmc *req) | |
9542 | +{ | |
9543 | + u16 cnum, reg_num = req->reg_num; | |
9544 | + s16 signal_group = RTAS_SIGNAL_GROUP(req->reg_value); | |
9545 | + u8 bus_word = RTAS_BUS_WORD(req->reg_value); | |
9546 | + | |
9547 | + if (reg_num < NR_CTRS || reg_num >= (NR_CTRS * 2)) | |
9548 | + return -EINVAL; | |
9549 | + | |
9550 | + switch (signal_group) { | |
9551 | + case SIG_GROUP_PPU_IU1: | |
9552 | + case SIG_GROUP_PPU_XU: | |
9553 | + if ((bus_word != 0) && (bus_word != 1)) { | |
9554 | + PFM_ERR("Invalid bus word (%d) for signal-group %d", | |
9555 | + bus_word, signal_group); | |
9556 | + return -EINVAL; | |
9557 | + } | |
9558 | + break; | |
9559 | + default: | |
9560 | + PFM_ERR("Signal-group %d not implemented.", signal_group); | |
9561 | + return -EINVAL; | |
9562 | + } | |
9563 | + | |
9564 | + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) { | |
9565 | + if (test_bit(cnum, cast_ulp(set->used_pmcs)) && | |
9566 | + bus_word == RTAS_BUS_WORD(set->pmcs[cnum]) && | |
9567 | + signal_group != RTAS_SIGNAL_GROUP(set->pmcs[cnum])) { | |
9568 | + PFM_ERR("Impossible signal-group combination: " | |
9569 | + "(%u,%u,%d) (%u,%u,%d)", | |
9570 | + reg_num, bus_word, signal_group, cnum, | |
9571 | + RTAS_BUS_WORD(set->pmcs[cnum]), | |
9572 | + RTAS_SIGNAL_GROUP(set->pmcs[cnum])); | |
9573 | + return -EBUSY; | |
9574 | + } | |
9575 | + } | |
9576 | + | |
9577 | + return 0; | |
9578 | +} | |
9579 | + | |
9580 | +/** | |
9581 | + * write_pm07_event | |
9582 | + * | |
9583 | + * Pull out the RTAS arguments from the 64-bit register value and make the | |
9584 | + * RTAS activate-signals call. | |
9585 | + **/ | |
9586 | +static void write_pm07_event(int cpu, unsigned int ctr, u64 value) | |
9587 | +{ | |
9588 | + struct cell_rtas_arg signal; | |
9589 | + s32 signal_number; | |
9590 | + int rc; | |
9591 | + | |
9592 | + signal_number = RTAS_SIGNAL_NUMBER(value); | |
9593 | + if (!signal_number) { | |
9594 | + /* Don't include counters that are counting cycles. */ | |
9595 | + return; | |
9596 | + } | |
9597 | + | |
9598 | + signal.cpu = RTAS_CPU(cpu); | |
9599 | + signal.bus_word = 1 << RTAS_BUS_WORD(value); | |
9600 | + signal.sub_unit = RTAS_SUB_UNIT(value); | |
9601 | + signal.signal_group = signal_number / 100; | |
9602 | + signal.bit = abs(signal_number) % 100; | |
9603 | + | |
9604 | + rc = activate_signals(&signal, 1); | |
9605 | + if (rc) { | |
9606 | + PFM_WARN("%s(%d, %u, %lu): Error calling " | |
9607 | + "activate_signals(): %d\n", __func__, | |
9608 | + cpu, ctr, (unsigned long)value, rc); | |
9609 | + /* FIX: Could we change this routine to return an error? */ | |
9610 | + } | |
9611 | +} | |
9612 | + | |
9613 | +/** | |
9614 | + * pfm_cell_probe_pmu | |
9615 | + * | |
9616 | + * Simply check the processor version register to see if we're currently | |
9617 | + * on a Cell system. | |
9618 | + **/ | |
9619 | +static int pfm_cell_probe_pmu(void) | |
9620 | +{ | |
9621 | + unsigned long pvr = mfspr(SPRN_PVR); | |
9622 | + | |
9623 | + if (PVR_VER(pvr) != PV_BE) | |
9624 | + return -1; | |
9625 | + | |
9626 | + return 0; | |
9627 | +} | |
9628 | + | |
9629 | +/** | |
9630 | + * pfm_cell_write_pmc | |
9631 | + **/ | |
9632 | +static void pfm_cell_write_pmc(unsigned int cnum, u64 value) | |
9633 | +{ | |
9634 | + int cpu = smp_processor_id(); | |
9635 | + struct pfm_cell_platform_pmu_info *info = | |
9636 | + ((struct pfm_arch_pmu_info *) | |
9637 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9638 | + | |
9639 | + if (cnum < NR_CTRS) { | |
9640 | + info->write_pm07_control(cpu, cnum, value); | |
9641 | + | |
9642 | + } else if (cnum < NR_CTRS * 2) { | |
9643 | + write_pm07_event(cpu, cnum - NR_CTRS, value); | |
9644 | + | |
9645 | + } else if (cnum == CELL_PMC_PM_STATUS) { | |
9646 | + /* The pm_status register must be treated separately from | |
9647 | + * the other "global" PMCs. This call will ensure that | |
9648 | + * the interrupts are routed to the correct CPU, as well | |
9649 | + * as writing the desired value to the pm_status register. | |
9650 | + */ | |
9651 | + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu), | |
9652 | + value); | |
9653 | + | |
9654 | + } else if (cnum < PFM_PM_NUM_PMCS) { | |
9655 | + info->write_pm(cpu, cnum - (NR_CTRS * 2), value); | |
9656 | + } | |
9657 | +} | |
9658 | + | |
9659 | +/** | |
9660 | + * pfm_cell_write_pmd | |
9661 | + **/ | |
9662 | +static void pfm_cell_write_pmd(unsigned int cnum, u64 value) | |
9663 | +{ | |
9664 | + int cpu = smp_processor_id(); | |
9665 | + struct pfm_cell_platform_pmu_info *info = | |
9666 | + ((struct pfm_arch_pmu_info *) | |
9667 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9668 | + | |
9669 | + if (cnum < NR_CTRS) | |
9670 | + info->write_ctr(cpu, cnum, value); | |
9671 | +} | |
9672 | + | |
9673 | +/** | |
9674 | + * pfm_cell_read_pmd | |
9675 | + **/ | |
9676 | +static u64 pfm_cell_read_pmd(unsigned int cnum) | |
9677 | +{ | |
9678 | + int cpu = smp_processor_id(); | |
9679 | + struct pfm_cell_platform_pmu_info *info = | |
9680 | + ((struct pfm_arch_pmu_info *) | |
9681 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9682 | + | |
9683 | + if (cnum < NR_CTRS) | |
9684 | + return info->read_ctr(cpu, cnum); | |
9685 | + | |
9686 | + return -EINVAL; | |
9687 | +} | |
9688 | + | |
9689 | +/** | |
9690 | + * pfm_cell_enable_counters | |
9691 | + * | |
9692 | + * Just need to turn on the global disable bit in pm_control. | |
9693 | + **/ | |
9694 | +static void pfm_cell_enable_counters(struct pfm_context *ctx, | |
9695 | + struct pfm_event_set *set) | |
9696 | +{ | |
9697 | + struct pfm_cell_platform_pmu_info *info = | |
9698 | + ((struct pfm_arch_pmu_info *) | |
9699 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9700 | + | |
9701 | + info->enable_pm(smp_processor_id()); | |
9702 | +} | |
9703 | + | |
9704 | +/** | |
9705 | + * pfm_cell_disable_counters | |
9706 | + * | |
9707 | + * Just need to turn off the global disable bit in pm_control. | |
9708 | + **/ | |
9709 | +static void pfm_cell_disable_counters(struct pfm_context *ctx, | |
9710 | + struct pfm_event_set *set) | |
9711 | +{ | |
9712 | + struct pfm_cell_platform_pmu_info *info = | |
9713 | + ((struct pfm_arch_pmu_info *) | |
9714 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9715 | + | |
9716 | + info->disable_pm(smp_processor_id()); | |
9717 | + if (machine_is(ps3)) | |
9718 | + reset_signals(smp_processor_id()); | |
9719 | +} | |
9720 | + | |
9721 | +/* | |
9722 | + * Return the thread id of the specified ppu signal. | |
9723 | + */ | |
9724 | +static inline u32 get_target_ppu_thread_id(u32 group, u32 bit) | |
9725 | +{ | |
9726 | + if ((group == SIG_GROUP_PPU_IU1 && | |
9727 | + bit < PFM_PPU_IU1_THREAD1_BASE_BIT) || | |
9728 | + (group == SIG_GROUP_PPU_XU && | |
9729 | + bit < PFM_PPU_XU_THREAD1_BASE_BIT)) | |
9730 | + return 0; | |
9731 | + else | |
9732 | + return 1; | |
9733 | +} | |
9734 | + | |
9735 | +/* | |
9736 | + * Return whether the specified counter is for PPU signal group. | |
9737 | + */ | |
9738 | +static inline int is_counter_for_ppu_sig_grp(u32 counter_control, u32 sig_grp) | |
9739 | +{ | |
9740 | + if (!(counter_control & CBE_PM_CTR_INPUT_CONTROL) && | |
9741 | + (counter_control & CBE_PM_CTR_ENABLE) && | |
9742 | + ((sig_grp == SIG_GROUP_PPU_IU1) || (sig_grp == SIG_GROUP_PPU_XU))) | |
9743 | + return 1; | |
9744 | + else | |
9745 | + return 0; | |
9746 | +} | |
9747 | + | |
9748 | +/* | |
9749 | + * Search ppu signal groups. | |
9750 | + */ | |
9751 | +static int get_ppu_signal_groups(struct pfm_event_set *set, | |
9752 | + u32 *ppu_sig_grp0, u32 *ppu_sig_grp1) | |
9753 | +{ | |
9754 | + u64 pm_event, *used_pmcs = set->used_pmcs; | |
9755 | + int i, j; | |
9756 | + u32 grp0_wd, grp1_wd, wd, sig_grp; | |
9757 | + | |
9758 | + *ppu_sig_grp0 = 0; | |
9759 | + *ppu_sig_grp1 = 0; | |
9760 | + grp0_wd = PFM_GROUP_CONTROL_GROUP0_WORD( | |
9761 | + set->pmcs[CELL_PMC_GROUP_CONTROL]); | |
9762 | + grp1_wd = PFM_GROUP_CONTROL_GROUP1_WORD( | |
9763 | + set->pmcs[CELL_PMC_GROUP_CONTROL]); | |
9764 | + | |
9765 | + for (i = 0, j = 0; (i < NR_CTRS) && (j < PFM_NUM_OF_GROUPS); i++) { | |
9766 | + if (test_bit(i + NR_CTRS, used_pmcs)) { | |
9767 | + pm_event = set->pmcs[i + NR_CTRS]; | |
9768 | + wd = PFM_EVENT_PMC_BUS_WORD(pm_event); | |
9769 | + sig_grp = PFM_EVENT_PMC_SIGNAL_GROUP(pm_event); | |
9770 | + if ((sig_grp == SIG_GROUP_PPU_IU1) || | |
9771 | + (sig_grp == SIG_GROUP_PPU_XU)) { | |
9772 | + | |
9773 | + if (wd == grp0_wd && *ppu_sig_grp0 == 0) { | |
9774 | + *ppu_sig_grp0 = sig_grp; | |
9775 | + j++; | |
9776 | + } else if (wd == grp1_wd && | |
9777 | + *ppu_sig_grp1 == 0) { | |
9778 | + *ppu_sig_grp1 = sig_grp; | |
9779 | + j++; | |
9780 | + } | |
9781 | + } | |
9782 | + } | |
9783 | + } | |
9784 | + return j; | |
9785 | +} | |
9786 | + | |
9787 | +/** | |
9788 | + * pfm_cell_restore_pmcs | |
9789 | + * | |
9790 | + * Write all control register values that are saved in the specified event | |
9791 | + * set. We could use the pfm_arch_write_pmc() function to restore each PMC | |
9792 | + * individually (as is done in other architectures), but that results in | |
9793 | + * multiple RTAS calls. As an optimization, we will setup the RTAS argument | |
9794 | + * array so we can do all event-control registers in one RTAS call. | |
9795 | + * | |
9796 | + * In per-thread mode, | |
9797 | + * The counter enable bit of the pmX_control PMC is enabled while the target | |
9798 | + * task runs on the target HW thread. | |
9799 | + **/ | |
9800 | +void pfm_cell_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
9801 | +{ | |
9802 | + u64 ctr_ctrl; | |
9803 | + u64 *used_pmcs = set->used_pmcs; | |
9804 | + int i; | |
9805 | + int cpu = smp_processor_id(); | |
9806 | + u32 current_th_id; | |
9807 | + struct pfm_cell_platform_pmu_info *info = | |
9808 | + ((struct pfm_arch_pmu_info *) | |
9809 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9810 | + | |
9811 | + for (i = 0; i < NR_CTRS; i++) { | |
9812 | + ctr_ctrl = set->pmcs[i]; | |
9813 | + | |
9814 | + if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH0) { | |
9815 | + current_th_id = info->get_hw_thread_id(cpu); | |
9816 | + | |
9817 | + /* | |
9818 | + * Set the counter enable bit down if the current | |
9819 | + * HW thread is NOT 0 | |
9820 | + **/ | |
9821 | + if (current_th_id) | |
9822 | + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE; | |
9823 | + | |
9824 | + } else if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH1) { | |
9825 | + current_th_id = info->get_hw_thread_id(cpu); | |
9826 | + | |
9827 | + /* | |
9828 | + * Set the counter enable bit down if the current | |
9829 | + * HW thread is 0 | |
9830 | + **/ | |
9831 | + if (!current_th_id) | |
9832 | + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE; | |
9833 | + } | |
9834 | + | |
9835 | + /* Write the per-counter control register. If the PMC is not | |
9836 | + * in use, then it will simply clear the register, which will | |
9837 | + * disable the associated counter. | |
9838 | + */ | |
9839 | + info->write_pm07_control(cpu, i, ctr_ctrl); | |
9840 | + | |
9841 | + if (test_bit(i + NR_CTRS, used_pmcs)) | |
9842 | + write_pm07_event(cpu, 0, set->pmcs[i + NR_CTRS]); | |
9843 | + } | |
9844 | + | |
9845 | + /* Write all the global PMCs. Need to call pfm_cell_write_pmc() | |
9846 | + * instead of cbe_write_pm() due to special handling for the | |
9847 | + * pm_status register. | |
9848 | + */ | |
9849 | + for (i *= 2; i < PFM_PM_NUM_PMCS; i++) | |
9850 | + pfm_cell_write_pmc(i, set->pmcs[i]); | |
9851 | +} | |
9852 | + | |
9853 | +/** | |
9854 | + * pfm_cell_restore_pmds | |
9855 | + * | |
9856 | + * Write to pm_control register before writing to counter registers | |
9857 | + * so that we can decide the counter width berfore writing to the couters. | |
9858 | + **/ | |
9859 | +void pfm_cell_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
9860 | +{ | |
9861 | + u64 *used_pmds; | |
9862 | + unsigned int i, max_pmd; | |
9863 | + int cpu = smp_processor_id(); | |
9864 | + struct pfm_cell_platform_pmu_info *info = | |
9865 | + ((struct pfm_arch_pmu_info *) | |
9866 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
9867 | + | |
9868 | + /* | |
9869 | + * Write pm_control register value | |
9870 | + */ | |
9871 | + info->write_pm(cpu, pm_control, | |
9872 | + set->pmcs[CELL_PMC_PM_CONTROL] & | |
9873 | + ~CBE_PM_ENABLE_PERF_MON); | |
9874 | + PFM_DBG("restore pm_control(0x%lx) before restoring pmds", | |
9875 | + set->pmcs[CELL_PMC_PM_CONTROL]); | |
9876 | + | |
9877 | + max_pmd = ctx->regs.max_pmd; | |
9878 | + used_pmds = set->used_pmds; | |
9879 | + | |
9880 | + for (i = 0; i < max_pmd; i++) | |
9881 | + if (test_bit(i, used_pmds) && | |
9882 | + !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO)) | |
9883 | + pfm_cell_write_pmd(i, set->pmds[i].value); | |
9884 | +} | |
9885 | + | |
9886 | +/** | |
9887 | + * pfm_cell_get_cntr_width | |
9888 | + * | |
9889 | + * This function check the 16bit counter field in pm_control pmc. | |
9890 | + * | |
9891 | + * Return value | |
9892 | + * 16 : all counters are 16bit width. | |
9893 | + * 32 : all counters are 32bit width. | |
9894 | + * 0 : several counter width exists. | |
9895 | + **/ | |
9896 | +static int pfm_cell_get_cntr_width(struct pfm_context *ctx, | |
9897 | + struct pfm_event_set *s) | |
9898 | +{ | |
9899 | + int width = 0; | |
9900 | + int tmp = 0; | |
9901 | + u64 cntr_field; | |
9902 | + | |
9903 | + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) { | |
9904 | + list_for_each_entry(s, &ctx->set_list, list) { | |
9905 | + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] & | |
9906 | + CELL_PMC_PM_CONTROL_CNTR_MASK; | |
9907 | + | |
9908 | + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16) | |
9909 | + tmp = 16; | |
9910 | + else if (cntr_field == 0x0) | |
9911 | + tmp = 32; | |
9912 | + else | |
9913 | + return 0; | |
9914 | + | |
9915 | + if (tmp != width && width != 0) | |
9916 | + return 0; | |
9917 | + | |
9918 | + width = tmp; | |
9919 | + } | |
9920 | + } else { | |
9921 | + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] & | |
9922 | + CELL_PMC_PM_CONTROL_CNTR_MASK; | |
9923 | + | |
9924 | + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16) | |
9925 | + width = 16; | |
9926 | + else if (cntr_field == 0x0) | |
9927 | + width = 32; | |
9928 | + else | |
9929 | + width = 0; | |
9930 | + } | |
9931 | + return width; | |
9932 | +} | |
9933 | + | |
9934 | +/** | |
9935 | + * pfm_cell_check_cntr_ovfl_mask | |
9936 | + * | |
9937 | + * Return value | |
9938 | + * 1 : cntr_ovfl interrupt is used. | |
9939 | + * 0 : cntr_ovfl interrupt is not used. | |
9940 | + **/ | |
9941 | +static int pfm_cell_check_cntr_ovfl(struct pfm_context *ctx, | |
9942 | + struct pfm_event_set *s) | |
9943 | +{ | |
9944 | + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) { | |
9945 | + list_for_each_entry(s, &ctx->set_list, list) { | |
9946 | + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS])) | |
9947 | + return 1; | |
9948 | + } | |
9949 | + } else { | |
9950 | + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS])) | |
9951 | + return 1; | |
9952 | + } | |
9953 | + return 0; | |
9954 | +} | |
9955 | + | |
9956 | +#ifdef CONFIG_PPC_PS3 | |
9957 | +/** | |
9958 | + * update_sub_unit_field | |
9959 | + * | |
9960 | + **/ | |
9961 | +static inline u64 update_sub_unit_field(u64 pm_event, u64 spe_id) | |
9962 | +{ | |
9963 | + return ((pm_event & 0xFFFF0000FFFFFFFF) | (spe_id << 32)); | |
9964 | +} | |
9965 | + | |
9966 | +/** | |
9967 | + * pfm_get_spe_id | |
9968 | + * | |
9969 | + **/ | |
9970 | +static u64 pfm_get_spe_id(void *arg) | |
9971 | +{ | |
9972 | + struct spu *spu = arg; | |
9973 | + u64 spe_id; | |
9974 | + | |
9975 | + if (machine_is(ps3)) | |
9976 | + spe_id = ps3_get_spe_id(arg); | |
9977 | + else | |
9978 | + spe_id = spu->spe_id; | |
9979 | + | |
9980 | + return spe_id; | |
9981 | +} | |
9982 | + | |
9983 | +/** | |
9984 | + * pfm_spu_number_to_id | |
9985 | + * | |
9986 | + **/ | |
9987 | +static int pfm_spu_number_to_id(int number, u64 *spe_id) | |
9988 | +{ | |
9989 | + struct spu *spu; | |
9990 | + int i; | |
9991 | + | |
9992 | + for (i = 0; i < MAX_NUMNODES; i++) { | |
9993 | + if (cbe_spu_info[i].n_spus == 0) | |
9994 | + continue; | |
9995 | + | |
9996 | + list_for_each_entry(spu, &cbe_spu_info[i].spus, cbe_list) | |
9997 | + if (spu->number == number) { | |
9998 | + *spe_id = pfm_get_spe_id(spu); | |
9999 | + return 0; | |
10000 | + } | |
10001 | + } | |
10002 | + return -ENODEV; | |
10003 | +} | |
10004 | + | |
10005 | +/** | |
10006 | + * pfm_update_pmX_event_subunit_field | |
10007 | + * | |
10008 | + * In system wide mode, | |
10009 | + * This function updates the subunit field of SPE pmX_event. | |
10010 | + **/ | |
10011 | +static int pfm_update_pmX_event_subunit_field(struct pfm_context *ctx) | |
10012 | +{ | |
10013 | + struct pfm_event_set *set; | |
10014 | + int i, last_pmc, ret; | |
10015 | + u64 signal_group, spe_id; | |
10016 | + int sub_unit; | |
10017 | + u64 *used_pmcs; | |
10018 | + | |
10019 | + last_pmc = NR_CTRS + 8; | |
10020 | + ret = 0; | |
10021 | + list_for_each_entry(set, &ctx->set_list, list) { | |
10022 | + | |
10023 | + used_pmcs = set->used_pmcs; | |
10024 | + for (i = NR_CTRS; i < last_pmc; i++) { | |
10025 | + if (!test_bit(i, used_pmcs)) | |
10026 | + continue; | |
10027 | + | |
10028 | + signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]); | |
10029 | + | |
10030 | + /* | |
10031 | + * If the target event is a SPE signal group event, | |
10032 | + * The sub_unit field in pmX_event pmc is changed to the | |
10033 | + * specified spe_id. | |
10034 | + */ | |
10035 | + if (SIG_GROUP_SPU_BASE < signal_group && | |
10036 | + signal_group < SIG_GROUP_EIB_BASE) { | |
10037 | + sub_unit = RTAS_SUB_UNIT(set->pmcs[i]); | |
10038 | + | |
10039 | + ret = pfm_spu_number_to_id(sub_unit, &spe_id); | |
10040 | + if (ret) | |
10041 | + return ret; | |
10042 | + | |
10043 | + set->pmcs[i] = update_sub_unit_field( | |
10044 | + set->pmcs[i], spe_id); | |
10045 | + } | |
10046 | + } | |
10047 | + } | |
10048 | + return 0; | |
10049 | +} | |
10050 | +#endif | |
10051 | + | |
10052 | +/** | |
10053 | + * pfm_cell_load_context | |
10054 | + * | |
10055 | + * In per-thread mode, | |
10056 | + * The pmX_control PMCs which are used for PPU IU/XU event are marked with | |
10057 | + * the thread id(PFM_COUNTER_CTRL_PMC_PPU_TH0/TH1). | |
10058 | + **/ | |
10059 | +static int pfm_cell_load_context(struct pfm_context *ctx) | |
10060 | +{ | |
10061 | + int i; | |
10062 | + u32 ppu_sig_grp[PFM_NUM_OF_GROUPS] = {SIG_GROUP_NONE, SIG_GROUP_NONE}; | |
10063 | + u32 bit; | |
10064 | + int index; | |
10065 | + u32 target_th_id; | |
10066 | + int ppu_sig_num = 0; | |
10067 | + struct pfm_event_set *s; | |
10068 | + int cntr_width = 32; | |
10069 | + int ret = 0; | |
10070 | + | |
10071 | + if (pfm_cell_check_cntr_ovfl(ctx, ctx->active_set)) { | |
10072 | + cntr_width = pfm_cell_get_cntr_width(ctx, ctx->active_set); | |
10073 | + | |
10074 | + /* | |
10075 | + * Counter overflow interrupt works with only 32bit counter, | |
10076 | + * because perfmon core uses pfm_cell_pmu_conf.counter_width | |
10077 | + * to deal with the counter overflow. we can't change the | |
10078 | + * counter width here. | |
10079 | + */ | |
10080 | + if (cntr_width != 32) | |
10081 | + return -EINVAL; | |
10082 | + } | |
10083 | + | |
10084 | + if (ctx->flags.system) { | |
10085 | +#ifdef CONFIG_PPC_PS3 | |
10086 | + if (machine_is(ps3)) | |
10087 | + ret = pfm_update_pmX_event_subunit_field(ctx); | |
10088 | +#endif | |
10089 | + return ret; | |
10090 | + } | |
10091 | + | |
10092 | + list_for_each_entry(s, &ctx->set_list, list) { | |
10093 | + ppu_sig_num = get_ppu_signal_groups(s, &ppu_sig_grp[0], | |
10094 | + &ppu_sig_grp[1]); | |
10095 | + | |
10096 | + for (i = 0; i < NR_CTRS; i++) { | |
10097 | + index = PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(s->pmcs[i]); | |
10098 | + if (ppu_sig_num && | |
10099 | + (ppu_sig_grp[index] != SIG_GROUP_NONE) && | |
10100 | + is_counter_for_ppu_sig_grp(s->pmcs[i], | |
10101 | + ppu_sig_grp[index])) { | |
10102 | + | |
10103 | + bit = PFM_PM_CTR_INPUT_MUX_BIT(s->pmcs[i]); | |
10104 | + target_th_id = get_target_ppu_thread_id( | |
10105 | + ppu_sig_grp[index], bit); | |
10106 | + if (!target_th_id) | |
10107 | + s->pmcs[i] |= | |
10108 | + PFM_COUNTER_CTRL_PMC_PPU_TH0; | |
10109 | + else | |
10110 | + s->pmcs[i] |= | |
10111 | + PFM_COUNTER_CTRL_PMC_PPU_TH1; | |
10112 | + PFM_DBG("set:%d mark ctr:%d target_thread:%d", | |
10113 | + s->id, i, target_th_id); | |
10114 | + } | |
10115 | + } | |
10116 | + } | |
10117 | + | |
10118 | + return ret; | |
10119 | +} | |
10120 | + | |
10121 | +/** | |
10122 | + * pfm_cell_unload_context | |
10123 | + * | |
10124 | + * For system-wide contexts and self-monitored contexts, make the RTAS call | |
10125 | + * to reset the debug-bus signals. | |
10126 | + * | |
10127 | + * For non-self-monitored contexts, the monitored thread will already have | |
10128 | + * been taken off the CPU and we don't need to do anything additional. | |
10129 | + **/ | |
10130 | +static void pfm_cell_unload_context(struct pfm_context *ctx) | |
10131 | +{ | |
10132 | + if (ctx->task == current || ctx->flags.system) | |
10133 | + reset_signals(smp_processor_id()); | |
10134 | +} | |
10135 | + | |
10136 | +/** | |
10137 | + * pfm_cell_ctxswout_thread | |
10138 | + * | |
10139 | + * When a monitored thread is switched out (self-monitored or externally | |
10140 | + * monitored) we need to reset the debug-bus signals so the next context that | |
10141 | + * gets switched in can start from a clean set of signals. | |
10142 | + **/ | |
10143 | +int pfm_cell_ctxswout_thread(struct task_struct *task, | |
10144 | + struct pfm_context *ctx, struct pfm_event_set *set) | |
10145 | +{ | |
10146 | + reset_signals(smp_processor_id()); | |
10147 | + return 0; | |
10148 | +} | |
10149 | + | |
10150 | +/** | |
10151 | + * pfm_cell_get_ovfl_pmds | |
10152 | + * | |
10153 | + * Determine which counters in this set have overflowed and fill in the | |
10154 | + * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status | |
10155 | + * register contains a bit for each counter to indicate overflow. However, | |
10156 | + * those 8 bits are in the reverse order than what Perfmon2 is expecting, | |
10157 | + * so we need to reverse the order of the overflow bits. | |
10158 | + **/ | |
10159 | +static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx, | |
10160 | + struct pfm_event_set *set) | |
10161 | +{ | |
10162 | + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); | |
10163 | + u32 pm_status, ovfl_ctrs; | |
10164 | + u64 povfl_pmds = 0; | |
10165 | + int i; | |
10166 | + struct pfm_cell_platform_pmu_info *info = | |
10167 | + ((struct pfm_arch_pmu_info *) | |
10168 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
10169 | + | |
10170 | + if (!ctx_arch->last_read_updated) | |
10171 | + /* This routine was not called via the interrupt handler. | |
10172 | + * Need to start by getting interrupts and updating | |
10173 | + * last_read_pm_status. | |
10174 | + */ | |
10175 | + ctx_arch->last_read_pm_status = | |
10176 | + info->get_and_clear_pm_interrupts(smp_processor_id()); | |
10177 | + | |
10178 | + /* Reset the flag that the interrupt handler last read pm_status. */ | |
10179 | + ctx_arch->last_read_updated = 0; | |
10180 | + | |
10181 | + pm_status = ctx_arch->last_read_pm_status & | |
10182 | + set->pmcs[CELL_PMC_PM_STATUS]; | |
10183 | + ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status); | |
10184 | + | |
10185 | + /* Reverse the order of the bits in ovfl_ctrs | |
10186 | + * and store the result in povfl_pmds. | |
10187 | + */ | |
10188 | + for (i = 0; i < PFM_PM_NUM_PMDS; i++) { | |
10189 | + povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1); | |
10190 | + ovfl_ctrs >>= 1; | |
10191 | + } | |
10192 | + | |
10193 | + /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds. | |
10194 | + * Count the bits set in set->povfl_pmds to get set->npend_ovfls. | |
10195 | + */ | |
10196 | + bitmap_and(set->povfl_pmds, &povfl_pmds, | |
10197 | + set->used_pmds, PFM_PM_NUM_PMDS); | |
10198 | + set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS); | |
10199 | +} | |
10200 | + | |
10201 | +/** | |
10202 | + * pfm_cell_acquire_pmu | |
10203 | + * | |
10204 | + * acquire PMU resource. | |
10205 | + * This acquisition is done when the first context is created. | |
10206 | + **/ | |
10207 | +int pfm_cell_acquire_pmu(u64 *unavail_pmcs, u64 *unavail_pmds) | |
10208 | +{ | |
10209 | +#ifdef CONFIG_PPC_PS3 | |
10210 | + int ret; | |
10211 | + | |
10212 | + if (machine_is(ps3)) { | |
10213 | + PFM_DBG(""); | |
10214 | + ret = ps3_lpm_open(PS3_LPM_TB_TYPE_INTERNAL, NULL, 0); | |
10215 | + if (ret) { | |
10216 | + PFM_ERR("Can't create PS3 lpm. error:%d", ret); | |
10217 | + return -EFAULT; | |
10218 | + } | |
10219 | + } | |
10220 | +#endif | |
10221 | + return 0; | |
10222 | +} | |
10223 | + | |
10224 | +/** | |
10225 | + * pfm_cell_release_pmu | |
10226 | + * | |
10227 | + * release PMU resource. | |
10228 | + * actual release happens when last context is destroyed | |
10229 | + **/ | |
10230 | +void pfm_cell_release_pmu(void) | |
10231 | +{ | |
10232 | +#ifdef CONFIG_PPC_PS3 | |
10233 | + if (machine_is(ps3)) { | |
10234 | + if (ps3_lpm_close()) | |
10235 | + PFM_ERR("Can't delete PS3 lpm."); | |
10236 | + } | |
10237 | +#endif | |
10238 | +} | |
10239 | + | |
10240 | +/** | |
10241 | + * handle_trace_buffer_interrupts | |
10242 | + * | |
10243 | + * This routine is for processing just the interval timer and trace buffer | |
10244 | + * overflow interrupts. Performance counter interrupts are handled by the | |
10245 | + * perf_irq_handler() routine, which reads and saves the pm_status register. | |
10246 | + * This routine should not read the actual pm_status register, but rather | |
10247 | + * the value passed in. | |
10248 | + **/ | |
10249 | +static void handle_trace_buffer_interrupts(unsigned long iip, | |
10250 | + struct pt_regs *regs, | |
10251 | + struct pfm_context *ctx, | |
10252 | + u32 pm_status) | |
10253 | +{ | |
10254 | + /* FIX: Currently ignoring trace-buffer interrupts. */ | |
10255 | + return; | |
10256 | +} | |
10257 | + | |
10258 | +/** | |
10259 | + * pfm_cell_irq_handler | |
10260 | + * | |
10261 | + * Handler for all Cell performance-monitor interrupts. | |
10262 | + **/ | |
10263 | +static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx) | |
10264 | +{ | |
10265 | + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); | |
10266 | + u32 last_read_pm_status; | |
10267 | + int cpu = smp_processor_id(); | |
10268 | + struct pfm_cell_platform_pmu_info *info = | |
10269 | + ((struct pfm_arch_pmu_info *) | |
10270 | + (pfm_pmu_conf->pmu_info))->platform_info; | |
10271 | + | |
10272 | + /* Need to disable and reenable the performance counters to get the | |
10273 | + * desired behavior from the hardware. This is specific to the Cell | |
10274 | + * PMU hardware. | |
10275 | + */ | |
10276 | + info->disable_pm(cpu); | |
10277 | + | |
10278 | + /* Read the pm_status register to get the interrupt bits. If a | |
10279 | + * perfmormance counter overflow interrupt occurred, call the core | |
10280 | + * perfmon interrupt handler to service the counter overflow. If the | |
10281 | + * interrupt was for the interval timer or the trace_buffer, | |
10282 | + * call the interval timer and trace buffer interrupt handler. | |
10283 | + * | |
10284 | + * The value read from the pm_status register is stored in the | |
10285 | + * pmf_arch_context structure for use by other routines. Note that | |
10286 | + * reading the pm_status register resets the interrupt flags to zero. | |
10287 | + * Hence, it is important that the register is only read in one place. | |
10288 | + * | |
10289 | + * The pm_status reg interrupt reg format is: | |
10290 | + * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:] | |
10291 | + * - pmd0 to pm7 are the perf counter overflow interrupts. | |
10292 | + * - intt is the interval timer overflowed interrupt. | |
10293 | + * - tbf is the trace buffer full interrupt. | |
10294 | + * - tbu is the trace buffer underflow interrupt. | |
10295 | + * - The pmd0 bit is the MSB of the 32 bit register. | |
10296 | + */ | |
10297 | + ctx_arch->last_read_pm_status = last_read_pm_status = | |
10298 | + info->get_and_clear_pm_interrupts(cpu); | |
10299 | + | |
10300 | + /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows | |
10301 | + * last_read_pm_status was updated by the interrupt handler. | |
10302 | + */ | |
10303 | + ctx_arch->last_read_updated = 1; | |
10304 | + | |
10305 | + if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR) | |
10306 | + /* At least one counter overflowed. */ | |
10307 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
10308 | + | |
10309 | + if (last_read_pm_status & (CBE_PM_INTERVAL_INTR | | |
10310 | + CBE_PM_TRACE_BUFFER_FULL_INTR | | |
10311 | + CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR)) | |
10312 | + /* Trace buffer or interval timer overflow. */ | |
10313 | + handle_trace_buffer_interrupts(instruction_pointer(regs), | |
10314 | + regs, ctx, last_read_pm_status); | |
10315 | + | |
10316 | + /* The interrupt settings is the value written to the pm_status | |
10317 | + * register. It is saved in the context when the register is | |
10318 | + * written. | |
10319 | + */ | |
10320 | + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu), | |
10321 | + ctx->active_set->pmcs[CELL_PMC_PM_STATUS]); | |
10322 | + | |
10323 | + /* The writes to the various performance counters only writes to a | |
10324 | + * latch. The new values (interrupt setting bits, reset counter value | |
10325 | + * etc.) are not copied to the actual registers until the performance | |
10326 | + * monitor is enabled. In order to get this to work as desired, the | |
10327 | + * permormance monitor needs to be disabled while writting to the | |
10328 | + * latches. This is a HW design issue. | |
10329 | + */ | |
10330 | + info->enable_pm(cpu); | |
10331 | +} | |
10332 | + | |
10333 | + | |
10334 | +static struct pfm_cell_platform_pmu_info ps3_platform_pmu_info = { | |
10335 | +#ifdef CONFIG_PPC_PS3 | |
10336 | + .read_ctr = ps3_read_ctr, | |
10337 | + .write_ctr = ps3_write_ctr, | |
10338 | + .write_pm07_control = ps3_write_pm07_control, | |
10339 | + .write_pm = ps3_write_pm, | |
10340 | + .enable_pm = ps3_enable_pm, | |
10341 | + .disable_pm = ps3_disable_pm, | |
10342 | + .enable_pm_interrupts = ps3_enable_pm_interrupts, | |
10343 | + .get_and_clear_pm_interrupts = ps3_get_and_clear_pm_interrupts, | |
10344 | + .get_hw_thread_id = ps3_get_hw_thread_id, | |
10345 | + .get_cpu_ppe_priv_regs = NULL, | |
10346 | + .get_cpu_pmd_regs = NULL, | |
10347 | + .get_cpu_mic_tm_regs = NULL, | |
10348 | + .rtas_token = NULL, | |
10349 | + .rtas_call = NULL, | |
10350 | +#endif | |
10351 | +}; | |
10352 | + | |
10353 | +static struct pfm_cell_platform_pmu_info native_platform_pmu_info = { | |
10354 | +#ifdef CONFIG_PPC_CELL_NATIVE | |
10355 | + .read_ctr = cbe_read_ctr, | |
10356 | + .write_ctr = cbe_write_ctr, | |
10357 | + .write_pm07_control = cbe_write_pm07_control, | |
10358 | + .write_pm = cbe_write_pm, | |
10359 | + .enable_pm = cbe_enable_pm, | |
10360 | + .disable_pm = cbe_disable_pm, | |
10361 | + .enable_pm_interrupts = cbe_enable_pm_interrupts, | |
10362 | + .get_and_clear_pm_interrupts = cbe_get_and_clear_pm_interrupts, | |
10363 | + .get_hw_thread_id = cbe_get_hw_thread_id, | |
10364 | + .get_cpu_ppe_priv_regs = cbe_get_cpu_ppe_priv_regs, | |
10365 | + .get_cpu_pmd_regs = cbe_get_cpu_pmd_regs, | |
10366 | + .get_cpu_mic_tm_regs = cbe_get_cpu_mic_tm_regs, | |
10367 | + .rtas_token = rtas_token, | |
10368 | + .rtas_call = rtas_call, | |
10369 | +#endif | |
10370 | +}; | |
10371 | + | |
10372 | +static struct pfm_arch_pmu_info pfm_cell_pmu_info = { | |
10373 | + .pmu_style = PFM_POWERPC_PMU_CELL, | |
10374 | + .acquire_pmu = pfm_cell_acquire_pmu, | |
10375 | + .release_pmu = pfm_cell_release_pmu, | |
10376 | + .write_pmc = pfm_cell_write_pmc, | |
10377 | + .write_pmd = pfm_cell_write_pmd, | |
10378 | + .read_pmd = pfm_cell_read_pmd, | |
10379 | + .enable_counters = pfm_cell_enable_counters, | |
10380 | + .disable_counters = pfm_cell_disable_counters, | |
10381 | + .irq_handler = pfm_cell_irq_handler, | |
10382 | + .get_ovfl_pmds = pfm_cell_get_ovfl_pmds, | |
10383 | + .restore_pmcs = pfm_cell_restore_pmcs, | |
10384 | + .restore_pmds = pfm_cell_restore_pmds, | |
10385 | + .ctxswout_thread = pfm_cell_ctxswout_thread, | |
10386 | + .load_context = pfm_cell_load_context, | |
10387 | + .unload_context = pfm_cell_unload_context, | |
10388 | +}; | |
10389 | + | |
10390 | +static struct pfm_pmu_config pfm_cell_pmu_conf = { | |
10391 | + .pmu_name = "Cell", | |
10392 | + .version = "0.1", | |
10393 | + .counter_width = 32, | |
10394 | + .pmd_desc = pfm_cell_pmd_desc, | |
10395 | + .pmc_desc = pfm_cell_pmc_desc, | |
10396 | + .num_pmc_entries = PFM_PM_NUM_PMCS, | |
10397 | + .num_pmd_entries = PFM_PM_NUM_PMDS, | |
10398 | + .probe_pmu = pfm_cell_probe_pmu, | |
10399 | + .pmu_info = &pfm_cell_pmu_info, | |
10400 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
10401 | + .owner = THIS_MODULE, | |
10402 | +}; | |
10403 | + | |
10404 | +/** | |
10405 | + * pfm_cell_platform_probe | |
10406 | + * | |
10407 | + * If we're on a system without the firmware rtas call available, set up the | |
10408 | + * PMC write-checker for all the pmX_event control registers. | |
10409 | + **/ | |
10410 | +static void pfm_cell_platform_probe(void) | |
10411 | +{ | |
10412 | + if (machine_is(celleb)) { | |
10413 | + int cnum; | |
10414 | + pfm_cell_pmu_conf.pmc_write_check = pfm_cell_pmc_check; | |
10415 | + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) | |
10416 | + pfm_cell_pmc_desc[cnum].type |= PFM_REG_WC; | |
10417 | + } | |
10418 | + | |
10419 | + if (machine_is(ps3)) | |
10420 | + pfm_cell_pmu_info.platform_info = &ps3_platform_pmu_info; | |
10421 | + else | |
10422 | + pfm_cell_pmu_info.platform_info = &native_platform_pmu_info; | |
10423 | +} | |
10424 | + | |
10425 | +static int __init pfm_cell_pmu_init_module(void) | |
10426 | +{ | |
10427 | + pfm_cell_platform_probe(); | |
10428 | + return pfm_pmu_register(&pfm_cell_pmu_conf); | |
10429 | +} | |
10430 | + | |
10431 | +static void __exit pfm_cell_pmu_cleanup_module(void) | |
10432 | +{ | |
10433 | + pfm_pmu_unregister(&pfm_cell_pmu_conf); | |
10434 | +} | |
10435 | + | |
10436 | +module_init(pfm_cell_pmu_init_module); | |
10437 | +module_exit(pfm_cell_pmu_cleanup_module); | |
10438 | --- /dev/null | |
10439 | +++ b/arch/powerpc/perfmon/perfmon_power4.c | |
10440 | @@ -0,0 +1,309 @@ | |
10441 | +/* | |
10442 | + * This file contains the POWER4 PMU register description tables | |
10443 | + * and pmc checker used by perfmon.c. | |
10444 | + * | |
10445 | + * Copyright (c) 2007, IBM Corporation. | |
10446 | + * | |
10447 | + * Based on a simple modification of perfmon_power5.c for POWER4 by | |
10448 | + * Corey Ashford <cjashfor@us.ibm.com>. | |
10449 | + * | |
10450 | + * This program is free software; you can redistribute it and/or | |
10451 | + * modify it under the terms of version 2 of the GNU General Public | |
10452 | + * License as published by the Free Software Foundation. | |
10453 | + * | |
10454 | + * This program is distributed in the hope that it will be useful, | |
10455 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10456 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10457 | + * General Public License for more details. | |
10458 | + * | |
10459 | + * You should have received a copy of the GNU General Public License | |
10460 | + * along with this program; if not, write to the Free Software | |
10461 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
10462 | + * 02111-1307 USA | |
10463 | + */ | |
10464 | +#include <linux/module.h> | |
10465 | +#include <linux/perfmon_kern.h> | |
10466 | + | |
10467 | +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>"); | |
10468 | +MODULE_DESCRIPTION("POWER4 PMU description table"); | |
10469 | +MODULE_LICENSE("GPL"); | |
10470 | + | |
10471 | +static struct pfm_regmap_desc pfm_power4_pmc_desc[] = { | |
10472 | +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), | |
10473 | +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), | |
10474 | +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) | |
10475 | +}; | |
10476 | +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power4_pmc_desc) | |
10477 | + | |
10478 | +/* The TB and PURR registers are read-only. Also, note that the TB register | |
10479 | + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. | |
10480 | + * For Perfmon2's purposes, we'll treat it as a single 64-bit register. | |
10481 | + */ | |
10482 | +static struct pfm_regmap_desc pfm_power4_pmd_desc[] = { | |
10483 | +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), | |
10484 | +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), | |
10485 | +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), | |
10486 | +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), | |
10487 | +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), | |
10488 | +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), | |
10489 | +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), | |
10490 | +/* pmd7 */ PMD_D(PFM_REG_C, "PMC7", SPRN_PMC7), | |
10491 | +/* pmd8 */ PMD_D(PFM_REG_C, "PMC8", SPRN_PMC8) | |
10492 | +}; | |
10493 | +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power4_pmd_desc) | |
10494 | + | |
10495 | +static int pfm_power4_probe_pmu(void) | |
10496 | +{ | |
10497 | + unsigned long pvr = mfspr(SPRN_PVR); | |
10498 | + int ver = PVR_VER(pvr); | |
10499 | + | |
10500 | + if ((ver == PV_POWER4) || (ver == PV_POWER4p)) | |
10501 | + return 0; | |
10502 | + | |
10503 | + return -1; | |
10504 | +} | |
10505 | + | |
10506 | +static void pfm_power4_write_pmc(unsigned int cnum, u64 value) | |
10507 | +{ | |
10508 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
10509 | + case SPRN_MMCR0: | |
10510 | + mtspr(SPRN_MMCR0, value); | |
10511 | + break; | |
10512 | + case SPRN_MMCR1: | |
10513 | + mtspr(SPRN_MMCR1, value); | |
10514 | + break; | |
10515 | + case SPRN_MMCRA: | |
10516 | + mtspr(SPRN_MMCRA, value); | |
10517 | + break; | |
10518 | + default: | |
10519 | + BUG(); | |
10520 | + } | |
10521 | +} | |
10522 | + | |
10523 | +static void pfm_power4_write_pmd(unsigned int cnum, u64 value) | |
10524 | +{ | |
10525 | + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
10526 | + | |
10527 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
10528 | + case SPRN_PMC1: | |
10529 | + mtspr(SPRN_PMC1, value & ovfl_mask); | |
10530 | + break; | |
10531 | + case SPRN_PMC2: | |
10532 | + mtspr(SPRN_PMC2, value & ovfl_mask); | |
10533 | + break; | |
10534 | + case SPRN_PMC3: | |
10535 | + mtspr(SPRN_PMC3, value & ovfl_mask); | |
10536 | + break; | |
10537 | + case SPRN_PMC4: | |
10538 | + mtspr(SPRN_PMC4, value & ovfl_mask); | |
10539 | + break; | |
10540 | + case SPRN_PMC5: | |
10541 | + mtspr(SPRN_PMC5, value & ovfl_mask); | |
10542 | + break; | |
10543 | + case SPRN_PMC6: | |
10544 | + mtspr(SPRN_PMC6, value & ovfl_mask); | |
10545 | + break; | |
10546 | + case SPRN_PMC7: | |
10547 | + mtspr(SPRN_PMC7, value & ovfl_mask); | |
10548 | + break; | |
10549 | + case SPRN_PMC8: | |
10550 | + mtspr(SPRN_PMC8, value & ovfl_mask); | |
10551 | + break; | |
10552 | + case SPRN_TBRL: | |
10553 | + case SPRN_PURR: | |
10554 | + /* Ignore writes to read-only registers. */ | |
10555 | + break; | |
10556 | + default: | |
10557 | + BUG(); | |
10558 | + } | |
10559 | +} | |
10560 | + | |
10561 | +static u64 pfm_power4_read_pmd(unsigned int cnum) | |
10562 | +{ | |
10563 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
10564 | + case SPRN_PMC1: | |
10565 | + return mfspr(SPRN_PMC1); | |
10566 | + case SPRN_PMC2: | |
10567 | + return mfspr(SPRN_PMC2); | |
10568 | + case SPRN_PMC3: | |
10569 | + return mfspr(SPRN_PMC3); | |
10570 | + case SPRN_PMC4: | |
10571 | + return mfspr(SPRN_PMC4); | |
10572 | + case SPRN_PMC5: | |
10573 | + return mfspr(SPRN_PMC5); | |
10574 | + case SPRN_PMC6: | |
10575 | + return mfspr(SPRN_PMC6); | |
10576 | + case SPRN_PMC7: | |
10577 | + return mfspr(SPRN_PMC7); | |
10578 | + case SPRN_PMC8: | |
10579 | + return mfspr(SPRN_PMC8); | |
10580 | + case SPRN_TBRL: | |
10581 | + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); | |
10582 | + case SPRN_PURR: | |
10583 | + if (cpu_has_feature(CPU_FTR_PURR)) | |
10584 | + return mfspr(SPRN_PURR); | |
10585 | + else | |
10586 | + return 0; | |
10587 | + default: | |
10588 | + BUG(); | |
10589 | + } | |
10590 | +} | |
10591 | + | |
10592 | +/* forward decl */ | |
10593 | +static void pfm_power4_disable_counters(struct pfm_context *ctx, | |
10594 | + struct pfm_event_set *set); | |
10595 | + | |
10596 | +/** | |
10597 | + * pfm_power4_enable_counters | |
10598 | + * | |
10599 | + **/ | |
10600 | +static void pfm_power4_enable_counters(struct pfm_context *ctx, | |
10601 | + struct pfm_event_set *set) | |
10602 | +{ | |
10603 | + unsigned int i, max_pmc; | |
10604 | + | |
10605 | + /* Make sure the counters are disabled before touching the other | |
10606 | + control registers */ | |
10607 | + pfm_power4_disable_counters(ctx, set); | |
10608 | + | |
10609 | + max_pmc = ctx->regs.max_pmc; | |
10610 | + | |
10611 | + /* Write MMCR0 last, and a fairly easy way to do this is to write | |
10612 | + the registers in the reverse order */ | |
10613 | + for (i = max_pmc; i != 0; i--) | |
10614 | + if (test_bit(i - 1, set->used_pmcs)) | |
10615 | + pfm_power4_write_pmc(i - 1, set->pmcs[i - 1]); | |
10616 | +} | |
10617 | + | |
10618 | +/** | |
10619 | + * pfm_power4_disable_counters | |
10620 | + * | |
10621 | + **/ | |
10622 | +static void pfm_power4_disable_counters(struct pfm_context *ctx, | |
10623 | + struct pfm_event_set *set) | |
10624 | +{ | |
10625 | + /* Set the Freeze Counters bit */ | |
10626 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); | |
10627 | + asm volatile ("sync"); | |
10628 | +} | |
10629 | + | |
10630 | +/** | |
10631 | + * pfm_power4_get_ovfl_pmds | |
10632 | + * | |
10633 | + * Determine which counters in this set have overflowed and fill in the | |
10634 | + * set->povfl_pmds mask and set->npend_ovfls count. | |
10635 | + **/ | |
10636 | +static void pfm_power4_get_ovfl_pmds(struct pfm_context *ctx, | |
10637 | + struct pfm_event_set *set) | |
10638 | +{ | |
10639 | + unsigned int i; | |
10640 | + unsigned int max_pmd = ctx->regs.max_intr_pmd; | |
10641 | + u64 *used_pmds = set->used_pmds; | |
10642 | + u64 *cntr_pmds = ctx->regs.cnt_pmds; | |
10643 | + u64 width_mask = 1 << pfm_pmu_conf->counter_width; | |
10644 | + u64 new_val, mask[PFM_PMD_BV]; | |
10645 | + | |
10646 | + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), | |
10647 | + cast_ulp(used_pmds), max_pmd); | |
10648 | + | |
10649 | + for (i = 0; i < max_pmd; i++) { | |
10650 | + if (test_bit(i, mask)) { | |
10651 | + new_val = pfm_power4_read_pmd(i); | |
10652 | + if (new_val & width_mask) { | |
10653 | + set_bit(i, set->povfl_pmds); | |
10654 | + set->npend_ovfls++; | |
10655 | + } | |
10656 | + } | |
10657 | + } | |
10658 | +} | |
10659 | + | |
10660 | +static void pfm_power4_irq_handler(struct pt_regs *regs, | |
10661 | + struct pfm_context *ctx) | |
10662 | +{ | |
10663 | + u32 mmcr0; | |
10664 | + | |
10665 | + /* Disable the counters (set the freeze bit) to not polute | |
10666 | + * the counts. | |
10667 | + */ | |
10668 | + mmcr0 = mfspr(SPRN_MMCR0); | |
10669 | + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); | |
10670 | + | |
10671 | + /* Set the PMM bit (see comment below). */ | |
10672 | + mtmsrd(mfmsr() | MSR_PMM); | |
10673 | + | |
10674 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
10675 | + | |
10676 | + mmcr0 = mfspr(SPRN_MMCR0); | |
10677 | + | |
10678 | + /* | |
10679 | + * Reset the perfmon trigger if | |
10680 | + * not in masking mode. | |
10681 | + */ | |
10682 | + if (ctx->state != PFM_CTX_MASKED) | |
10683 | + mmcr0 |= MMCR0_PMXE; | |
10684 | + | |
10685 | + /* | |
10686 | + * We must clear the PMAO bit on some (GQ) chips. Just do it | |
10687 | + * all the time. | |
10688 | + */ | |
10689 | + mmcr0 &= ~MMCR0_PMAO; | |
10690 | + | |
10691 | + /* | |
10692 | + * Now clear the freeze bit, counting will not start until we | |
10693 | + * rfid from this exception, because only at that point will | |
10694 | + * the PMM bit be cleared. | |
10695 | + */ | |
10696 | + mmcr0 &= ~MMCR0_FC; | |
10697 | + mtspr(SPRN_MMCR0, mmcr0); | |
10698 | +} | |
10699 | + | |
10700 | +static void pfm_power4_resend_irq(struct pfm_context *ctx) | |
10701 | +{ | |
10702 | + /* | |
10703 | + * Assert the PMAO bit to cause a PMU interrupt. Make sure we | |
10704 | + * trigger the edge detection circuitry for PMAO | |
10705 | + */ | |
10706 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); | |
10707 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); | |
10708 | +} | |
10709 | + | |
10710 | +struct pfm_arch_pmu_info pfm_power4_pmu_info = { | |
10711 | + .pmu_style = PFM_POWERPC_PMU_POWER4, | |
10712 | + .write_pmc = pfm_power4_write_pmc, | |
10713 | + .write_pmd = pfm_power4_write_pmd, | |
10714 | + .read_pmd = pfm_power4_read_pmd, | |
10715 | + .irq_handler = pfm_power4_irq_handler, | |
10716 | + .get_ovfl_pmds = pfm_power4_get_ovfl_pmds, | |
10717 | + .enable_counters = pfm_power4_enable_counters, | |
10718 | + .disable_counters = pfm_power4_disable_counters, | |
10719 | + .resend_irq = pfm_power4_resend_irq | |
10720 | +}; | |
10721 | + | |
10722 | +/* | |
10723 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
10724 | + */ | |
10725 | +static struct pfm_pmu_config pfm_power4_pmu_conf = { | |
10726 | + .pmu_name = "POWER4", | |
10727 | + .counter_width = 31, | |
10728 | + .pmd_desc = pfm_power4_pmd_desc, | |
10729 | + .pmc_desc = pfm_power4_pmc_desc, | |
10730 | + .num_pmc_entries = PFM_PM_NUM_PMCS, | |
10731 | + .num_pmd_entries = PFM_PM_NUM_PMDS, | |
10732 | + .probe_pmu = pfm_power4_probe_pmu, | |
10733 | + .pmu_info = &pfm_power4_pmu_info, | |
10734 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
10735 | + .owner = THIS_MODULE | |
10736 | +}; | |
10737 | + | |
10738 | +static int __init pfm_power4_pmu_init_module(void) | |
10739 | +{ | |
10740 | + return pfm_pmu_register(&pfm_power4_pmu_conf); | |
10741 | +} | |
10742 | + | |
10743 | +static void __exit pfm_power4_pmu_cleanup_module(void) | |
10744 | +{ | |
10745 | + pfm_pmu_unregister(&pfm_power4_pmu_conf); | |
10746 | +} | |
10747 | + | |
10748 | +module_init(pfm_power4_pmu_init_module); | |
10749 | +module_exit(pfm_power4_pmu_cleanup_module); | |
10750 | --- /dev/null | |
10751 | +++ b/arch/powerpc/perfmon/perfmon_power5.c | |
10752 | @@ -0,0 +1,326 @@ | |
10753 | +/* | |
10754 | + * This file contains the POWER5 PMU register description tables | |
10755 | + * and pmc checker used by perfmon.c. | |
10756 | + * | |
10757 | + * Copyright (c) 2005 David Gibson, IBM Corporation. | |
10758 | + * | |
10759 | + * Based on perfmon_p6.c: | |
10760 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
10761 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
10762 | + * | |
10763 | + * This program is free software; you can redistribute it and/or | |
10764 | + * modify it under the terms of version 2 of the GNU General Public | |
10765 | + * License as published by the Free Software Foundation. | |
10766 | + * | |
10767 | + * This program is distributed in the hope that it will be useful, | |
10768 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10769 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10770 | + * General Public License for more details. | |
10771 | + * | |
10772 | + * You should have received a copy of the GNU General Public License | |
10773 | + * along with this program; if not, write to the Free Software | |
10774 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
10775 | + * 02111-1307 USA | |
10776 | + */ | |
10777 | +#include <linux/module.h> | |
10778 | +#include <linux/perfmon_kern.h> | |
10779 | + | |
10780 | +MODULE_AUTHOR("David Gibson <dwg@au1.ibm.com>"); | |
10781 | +MODULE_DESCRIPTION("POWER5 PMU description table"); | |
10782 | +MODULE_LICENSE("GPL"); | |
10783 | + | |
10784 | +static struct pfm_regmap_desc pfm_power5_pmc_desc[] = { | |
10785 | +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), | |
10786 | +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), | |
10787 | +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) | |
10788 | +}; | |
10789 | +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc) | |
10790 | + | |
10791 | +/* The TB and PURR registers are read-only. Also, note that the TB register | |
10792 | + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. | |
10793 | + * For Perfmon2's purposes, we'll treat it as a single 64-bit register. | |
10794 | + */ | |
10795 | +static struct pfm_regmap_desc pfm_power5_pmd_desc[] = { | |
10796 | +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), | |
10797 | +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), | |
10798 | +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), | |
10799 | +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), | |
10800 | +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), | |
10801 | +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), | |
10802 | +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), | |
10803 | +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR), | |
10804 | +}; | |
10805 | +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc) | |
10806 | + | |
10807 | +/* forward decl */ | |
10808 | +static void pfm_power5_disable_counters(struct pfm_context *ctx, | |
10809 | + struct pfm_event_set *set); | |
10810 | + | |
10811 | +static int pfm_power5_probe_pmu(void) | |
10812 | +{ | |
10813 | + unsigned long pvr = mfspr(SPRN_PVR); | |
10814 | + | |
10815 | + switch (PVR_VER(pvr)) { | |
10816 | + case PV_POWER5: | |
10817 | + return 0; | |
10818 | + case PV_POWER5p: | |
10819 | + return (PVR_REV(pvr) < 0x300) ? 0 : -1; | |
10820 | + default: | |
10821 | + return -1; | |
10822 | + } | |
10823 | +} | |
10824 | + | |
10825 | +static void pfm_power5_write_pmc(unsigned int cnum, u64 value) | |
10826 | +{ | |
10827 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
10828 | + case SPRN_MMCR0: | |
10829 | + mtspr(SPRN_MMCR0, value); | |
10830 | + break; | |
10831 | + case SPRN_MMCR1: | |
10832 | + mtspr(SPRN_MMCR1, value); | |
10833 | + break; | |
10834 | + case SPRN_MMCRA: | |
10835 | + mtspr(SPRN_MMCRA, value); | |
10836 | + break; | |
10837 | + default: | |
10838 | + BUG(); | |
10839 | + } | |
10840 | +} | |
10841 | + | |
10842 | +static void pfm_power5_write_pmd(unsigned int cnum, u64 value) | |
10843 | +{ | |
10844 | + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
10845 | + | |
10846 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
10847 | + case SPRN_PMC1: | |
10848 | + mtspr(SPRN_PMC1, value & ovfl_mask); | |
10849 | + break; | |
10850 | + case SPRN_PMC2: | |
10851 | + mtspr(SPRN_PMC2, value & ovfl_mask); | |
10852 | + break; | |
10853 | + case SPRN_PMC3: | |
10854 | + mtspr(SPRN_PMC3, value & ovfl_mask); | |
10855 | + break; | |
10856 | + case SPRN_PMC4: | |
10857 | + mtspr(SPRN_PMC4, value & ovfl_mask); | |
10858 | + break; | |
10859 | + case SPRN_PMC5: | |
10860 | + mtspr(SPRN_PMC5, value & ovfl_mask); | |
10861 | + break; | |
10862 | + case SPRN_PMC6: | |
10863 | + mtspr(SPRN_PMC6, value & ovfl_mask); | |
10864 | + break; | |
10865 | + case SPRN_TBRL: | |
10866 | + case SPRN_PURR: | |
10867 | + /* Ignore writes to read-only registers. */ | |
10868 | + break; | |
10869 | + default: | |
10870 | + BUG(); | |
10871 | + } | |
10872 | +} | |
10873 | + | |
10874 | +static u64 pfm_power5_read_pmd(unsigned int cnum) | |
10875 | +{ | |
10876 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
10877 | + case SPRN_PMC1: | |
10878 | + return mfspr(SPRN_PMC1); | |
10879 | + case SPRN_PMC2: | |
10880 | + return mfspr(SPRN_PMC2); | |
10881 | + case SPRN_PMC3: | |
10882 | + return mfspr(SPRN_PMC3); | |
10883 | + case SPRN_PMC4: | |
10884 | + return mfspr(SPRN_PMC4); | |
10885 | + case SPRN_PMC5: | |
10886 | + return mfspr(SPRN_PMC5); | |
10887 | + case SPRN_PMC6: | |
10888 | + return mfspr(SPRN_PMC6); | |
10889 | + case SPRN_TBRL: | |
10890 | + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); | |
10891 | + case SPRN_PURR: | |
10892 | + if (cpu_has_feature(CPU_FTR_PURR)) | |
10893 | + return mfspr(SPRN_PURR); | |
10894 | + else | |
10895 | + return 0; | |
10896 | + default: | |
10897 | + BUG(); | |
10898 | + } | |
10899 | +} | |
10900 | + | |
10901 | +/** | |
10902 | + * pfm_power5_enable_counters | |
10903 | + * | |
10904 | + **/ | |
10905 | +static void pfm_power5_enable_counters(struct pfm_context *ctx, | |
10906 | + struct pfm_event_set *set) | |
10907 | +{ | |
10908 | + unsigned int i, max_pmc; | |
10909 | + | |
10910 | + /* | |
10911 | + * Make sure the counters are disabled before touching the | |
10912 | + * other control registers | |
10913 | + */ | |
10914 | + pfm_power5_disable_counters(ctx, set); | |
10915 | + | |
10916 | + max_pmc = ctx->regs.max_pmc; | |
10917 | + | |
10918 | + /* | |
10919 | + * Write MMCR0 last, and a fairly easy way to do | |
10920 | + * this is to write the registers in the reverse | |
10921 | + * order | |
10922 | + */ | |
10923 | + for (i = max_pmc; i != 0; i--) | |
10924 | + if (test_bit(i - 1, set->used_pmcs)) | |
10925 | + pfm_power5_write_pmc(i - 1, set->pmcs[i - 1]); | |
10926 | +} | |
10927 | + | |
10928 | +/** | |
10929 | + * pfm_power5_disable_counters | |
10930 | + * | |
10931 | + * Just need to zero all the control registers. | |
10932 | + **/ | |
10933 | +static void pfm_power5_disable_counters(struct pfm_context *ctx, | |
10934 | + struct pfm_event_set *set) | |
10935 | +{ | |
10936 | + /* Set the Freeze Counters bit */ | |
10937 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); | |
10938 | + asm volatile ("sync"); | |
10939 | +} | |
10940 | + | |
10941 | +/** | |
10942 | + * pfm_power5_get_ovfl_pmds | |
10943 | + * | |
10944 | + * Determine which counters in this set have overflowed and fill in the | |
10945 | + * set->povfl_pmds mask and set->npend_ovfls count. | |
10946 | + **/ | |
10947 | +static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx, | |
10948 | + struct pfm_event_set *set) | |
10949 | +{ | |
10950 | + unsigned int i; | |
10951 | + unsigned int max = ctx->regs.max_intr_pmd; | |
10952 | + u64 *used_pmds = set->used_pmds; | |
10953 | + u64 *intr_pmds = ctx->regs.intr_pmds; | |
10954 | + u64 width_mask = 1 << pfm_pmu_conf->counter_width; | |
10955 | + u64 new_val, mask[PFM_PMD_BV]; | |
10956 | + | |
10957 | + bitmap_and(cast_ulp(mask), cast_ulp(intr_pmds), | |
10958 | + cast_ulp(used_pmds), max); | |
10959 | + /* | |
10960 | + * If either PMC5 or PMC6 are not being used, just zero out the unused | |
10961 | + * ones so that they won't interrupt again for another 2^31 counts. | |
10962 | + * Note that if no other counters overflowed, set->npend_ovfls will | |
10963 | + * be zero upon returning from this call (i.e. a spurious | |
10964 | + * interrupt), but that should be ok. | |
10965 | + * | |
10966 | + * If neither PMC5 nor PMC6 are used, the counters should be frozen | |
10967 | + * via MMCR0_FC5_6 and zeroed out. | |
10968 | + * | |
10969 | + * If both PMC5 and PMC6 are used, they can be handled correctly by | |
10970 | + * the loop that follows. | |
10971 | + */ | |
10972 | + | |
10973 | + if (!test_bit(5, cast_ulp(used_pmds))) | |
10974 | + mtspr(SPRN_PMC5, 0); | |
10975 | + if (!test_bit(6, cast_ulp(used_pmds))) | |
10976 | + mtspr(SPRN_PMC6, 0); | |
10977 | + | |
10978 | + for (i = 0; i < max; i++) { | |
10979 | + if (test_bit(i, mask)) { | |
10980 | + new_val = pfm_power5_read_pmd(i); | |
10981 | + if (new_val & width_mask) { | |
10982 | + set_bit(i, set->povfl_pmds); | |
10983 | + set->npend_ovfls++; | |
10984 | + } | |
10985 | + } | |
10986 | + } | |
10987 | +} | |
10988 | + | |
10989 | +static void pfm_power5_irq_handler(struct pt_regs *regs, | |
10990 | + struct pfm_context *ctx) | |
10991 | +{ | |
10992 | + u32 mmcr0; | |
10993 | + | |
10994 | + /* Disable the counters (set the freeze bit) to not polute | |
10995 | + * the counts. | |
10996 | + */ | |
10997 | + mmcr0 = mfspr(SPRN_MMCR0); | |
10998 | + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); | |
10999 | + | |
11000 | + /* Set the PMM bit (see comment below). */ | |
11001 | + mtmsrd(mfmsr() | MSR_PMM); | |
11002 | + | |
11003 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
11004 | + | |
11005 | + mmcr0 = mfspr(SPRN_MMCR0); | |
11006 | + | |
11007 | + /* | |
11008 | + * Reset the perfmon trigger if | |
11009 | + * not in masking mode. | |
11010 | + */ | |
11011 | + if (ctx->state != PFM_CTX_MASKED) | |
11012 | + mmcr0 |= MMCR0_PMXE; | |
11013 | + | |
11014 | + /* | |
11015 | + * We must clear the PMAO bit on some (GQ) chips. Just do it | |
11016 | + * all the time. | |
11017 | + */ | |
11018 | + mmcr0 &= ~MMCR0_PMAO; | |
11019 | + | |
11020 | + /* | |
11021 | + * Now clear the freeze bit, counting will not start until we | |
11022 | + * rfid from this exception, because only at that point will | |
11023 | + * the PMM bit be cleared. | |
11024 | + */ | |
11025 | + mmcr0 &= ~MMCR0_FC; | |
11026 | + mtspr(SPRN_MMCR0, mmcr0); | |
11027 | +} | |
11028 | + | |
11029 | +static void pfm_power5_resend_irq(struct pfm_context *ctx) | |
11030 | +{ | |
11031 | + /* | |
11032 | + * Assert the PMAO bit to cause a PMU interrupt. Make sure we | |
11033 | + * trigger the edge detection circuitry for PMAO | |
11034 | + */ | |
11035 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); | |
11036 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); | |
11037 | +} | |
11038 | + | |
11039 | +struct pfm_arch_pmu_info pfm_power5_pmu_info = { | |
11040 | + .pmu_style = PFM_POWERPC_PMU_POWER5, | |
11041 | + .write_pmc = pfm_power5_write_pmc, | |
11042 | + .write_pmd = pfm_power5_write_pmd, | |
11043 | + .read_pmd = pfm_power5_read_pmd, | |
11044 | + .irq_handler = pfm_power5_irq_handler, | |
11045 | + .get_ovfl_pmds = pfm_power5_get_ovfl_pmds, | |
11046 | + .enable_counters = pfm_power5_enable_counters, | |
11047 | + .disable_counters = pfm_power5_disable_counters, | |
11048 | + .resend_irq = pfm_power5_resend_irq | |
11049 | +}; | |
11050 | + | |
11051 | +/* | |
11052 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
11053 | + */ | |
11054 | +static struct pfm_pmu_config pfm_power5_pmu_conf = { | |
11055 | + .pmu_name = "POWER5", | |
11056 | + .counter_width = 31, | |
11057 | + .pmd_desc = pfm_power5_pmd_desc, | |
11058 | + .pmc_desc = pfm_power5_pmc_desc, | |
11059 | + .num_pmc_entries = PFM_PM_NUM_PMCS, | |
11060 | + .num_pmd_entries = PFM_PM_NUM_PMDS, | |
11061 | + .probe_pmu = pfm_power5_probe_pmu, | |
11062 | + .pmu_info = &pfm_power5_pmu_info, | |
11063 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
11064 | + .owner = THIS_MODULE | |
11065 | +}; | |
11066 | + | |
11067 | +static int __init pfm_power5_pmu_init_module(void) | |
11068 | +{ | |
11069 | + return pfm_pmu_register(&pfm_power5_pmu_conf); | |
11070 | +} | |
11071 | + | |
11072 | +static void __exit pfm_power5_pmu_cleanup_module(void) | |
11073 | +{ | |
11074 | + pfm_pmu_unregister(&pfm_power5_pmu_conf); | |
11075 | +} | |
11076 | + | |
11077 | +module_init(pfm_power5_pmu_init_module); | |
11078 | +module_exit(pfm_power5_pmu_cleanup_module); | |
11079 | --- /dev/null | |
11080 | +++ b/arch/powerpc/perfmon/perfmon_power6.c | |
11081 | @@ -0,0 +1,520 @@ | |
11082 | +/* | |
11083 | + * This file contains the POWER6 PMU register description tables | |
11084 | + * and pmc checker used by perfmon.c. | |
11085 | + * | |
11086 | + * Copyright (c) 2007, IBM Corporation | |
11087 | + * | |
11088 | + * Based on perfmon_power5.c, and written by Carl Love <carll@us.ibm.com> | |
11089 | + * and Kevin Corry <kevcorry@us.ibm.com>. Some fixes and refinement by | |
11090 | + * Corey Ashford <cjashfor@us.ibm.com> | |
11091 | + * | |
11092 | + * This program is free software; you can redistribute it and/or | |
11093 | + * modify it under the terms of version 2 of the GNU General Public | |
11094 | + * License as published by the Free Software Foundation. | |
11095 | + * | |
11096 | + * This program is distributed in the hope that it will be useful, | |
11097 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11098 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11099 | + * General Public License for more details. | |
11100 | + * | |
11101 | + * You should have received a copy of the GNU General Public License | |
11102 | + * along with this program; if not, write to the Free Software | |
11103 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
11104 | + * 02111-1307 USA | |
11105 | + */ | |
11106 | +#include <linux/module.h> | |
11107 | +#include <linux/perfmon_kern.h> | |
11108 | + | |
11109 | +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>"); | |
11110 | +MODULE_DESCRIPTION("POWER6 PMU description table"); | |
11111 | +MODULE_LICENSE("GPL"); | |
11112 | + | |
11113 | +static struct pfm_regmap_desc pfm_power6_pmc_desc[] = { | |
11114 | +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), | |
11115 | +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), | |
11116 | +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) | |
11117 | +}; | |
11118 | +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power6_pmc_desc) | |
11119 | +#define PFM_DELTA_TB 10000 /* Not a real registers */ | |
11120 | +#define PFM_DELTA_PURR 10001 | |
11121 | + | |
11122 | +/* | |
11123 | + * counters wrap to zero at transition from 2^32-1 to 2^32. Note: | |
11124 | + * interrupt generated at transition from 2^31-1 to 2^31 | |
11125 | + */ | |
11126 | +#define OVERFLOW_VALUE 0x100000000UL | |
11127 | + | |
11128 | +/* The TB and PURR registers are read-only. Also, note that the TB register | |
11129 | + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. | |
11130 | + * For Perfmon2's purposes, we'll treat it as a single 64-bit register. | |
11131 | + */ | |
11132 | +static struct pfm_regmap_desc pfm_power6_pmd_desc[] = { | |
11133 | + /* On POWER 6 PMC5 and PMC6 are not writable, they do not | |
11134 | + * generate interrupts, and do not qualify their counts | |
11135 | + * based on problem mode, supervisor mode or hypervisor mode. | |
11136 | + * These two counters are implemented as virtual counters | |
11137 | + * to make the appear to work like the other counters. A | |
11138 | + * kernel timer is used sample the real PMC5 and PMC6 and | |
11139 | + * update the virtual counters. | |
11140 | + */ | |
11141 | +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), | |
11142 | +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), | |
11143 | +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), | |
11144 | +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), | |
11145 | +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), | |
11146 | +/* pmd5 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5), | |
11147 | +/* pmd6 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6), | |
11148 | +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR), | |
11149 | +/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB), | |
11150 | +/* delta tb */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR), | |
11151 | +}; | |
11152 | + | |
11153 | +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc) | |
11154 | + | |
11155 | +u32 pmc5_start_save[NR_CPUS]; | |
11156 | +u32 pmc6_start_save[NR_CPUS]; | |
11157 | + | |
11158 | +static struct timer_list pmc5_6_update[NR_CPUS]; | |
11159 | +u64 enable_cntrs_cnt; | |
11160 | +u64 disable_cntrs_cnt; | |
11161 | +u64 call_delta; | |
11162 | +u64 pm5_6_interrupt; | |
11163 | +u64 pm1_4_interrupt; | |
11164 | +/* need ctx_arch for kernel timer. Can't get it in context of the kernel | |
11165 | + * timer. | |
11166 | + */ | |
11167 | +struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS]; | |
11168 | +long int update_time; | |
11169 | + | |
11170 | +static void delta(int cpu_num, struct pfm_arch_context *ctx_arch) | |
11171 | +{ | |
11172 | + u32 tmp5, tmp6; | |
11173 | + | |
11174 | + call_delta++; | |
11175 | + | |
11176 | + tmp5 = (u32) mfspr(SPRN_PMC5); | |
11177 | + tmp6 = (u32) mfspr(SPRN_PMC6); | |
11178 | + | |
11179 | + /* | |
11180 | + * The following difference calculation relies on 32-bit modular | |
11181 | + * arithmetic for the deltas to come out correct (especially in the | |
11182 | + * presence of a 32-bit counter wrap). | |
11183 | + */ | |
11184 | + ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]); | |
11185 | + ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]); | |
11186 | + | |
11187 | + pmc5_start_save[cpu_num] = tmp5; | |
11188 | + pmc6_start_save[cpu_num] = tmp6; | |
11189 | + | |
11190 | + return; | |
11191 | +} | |
11192 | + | |
11193 | + | |
11194 | +static void pmc5_6_updater(unsigned long cpu_num) | |
11195 | +{ | |
11196 | + /* update the virtual pmd 5 and pmd 6 counters */ | |
11197 | + | |
11198 | + delta(cpu_num, pmc5_6_ctx_arch[cpu_num]); | |
11199 | + mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time); | |
11200 | +} | |
11201 | + | |
11202 | + | |
11203 | +static int pfm_power6_probe_pmu(void) | |
11204 | +{ | |
11205 | + unsigned long pvr = mfspr(SPRN_PVR); | |
11206 | + | |
11207 | + switch (PVR_VER(pvr)) { | |
11208 | + case PV_POWER6: | |
11209 | + return 0; | |
11210 | + case PV_POWER5p: | |
11211 | + /* If this is a POWER5+ and the revision is less than 0x300, | |
11212 | + don't treat it as a POWER6. */ | |
11213 | + return (PVR_REV(pvr) < 0x300) ? -1 : 0; | |
11214 | + default: | |
11215 | + return -1; | |
11216 | + } | |
11217 | +} | |
11218 | + | |
11219 | +static void pfm_power6_write_pmc(unsigned int cnum, u64 value) | |
11220 | +{ | |
11221 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
11222 | + case SPRN_MMCR0: | |
11223 | + mtspr(SPRN_MMCR0, value); | |
11224 | + break; | |
11225 | + case SPRN_MMCR1: | |
11226 | + mtspr(SPRN_MMCR1, value); | |
11227 | + break; | |
11228 | + case SPRN_MMCRA: | |
11229 | + mtspr(SPRN_MMCRA, value); | |
11230 | + break; | |
11231 | + default: | |
11232 | + BUG(); | |
11233 | + } | |
11234 | +} | |
11235 | + | |
11236 | +static void pfm_power6_write_pmd(unsigned int cnum, u64 value) | |
11237 | +{ | |
11238 | + /* On POWER 6 PMC5 and PMC6 are implemented as | |
11239 | + * virtual counters. See comment in pfm_power6_pmd_desc | |
11240 | + * definition. | |
11241 | + */ | |
11242 | + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
11243 | + | |
11244 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11245 | + case SPRN_PMC1: | |
11246 | + mtspr(SPRN_PMC1, value & ovfl_mask); | |
11247 | + break; | |
11248 | + case SPRN_PMC2: | |
11249 | + mtspr(SPRN_PMC2, value & ovfl_mask); | |
11250 | + break; | |
11251 | + case SPRN_PMC3: | |
11252 | + mtspr(SPRN_PMC3, value & ovfl_mask); | |
11253 | + break; | |
11254 | + case SPRN_PMC4: | |
11255 | + mtspr(SPRN_PMC4, value & ovfl_mask); | |
11256 | + break; | |
11257 | + case SPRN_TBRL: | |
11258 | + case SPRN_PURR: | |
11259 | + /* Ignore writes to read-only registers. */ | |
11260 | + break; | |
11261 | + default: | |
11262 | + BUG(); | |
11263 | + } | |
11264 | +} | |
11265 | + | |
11266 | +static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum) | |
11267 | +{ | |
11268 | + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); | |
11269 | + int cpu_num = smp_processor_id(); | |
11270 | + | |
11271 | + /* On POWER 6 PMC5 and PMC6 are implemented as | |
11272 | + * virtual counters. See comment in pfm_power6_pmd_desc | |
11273 | + * definition. | |
11274 | + */ | |
11275 | + | |
11276 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11277 | + case SPRN_PMC5: | |
11278 | + return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]); | |
11279 | + break; | |
11280 | + | |
11281 | + case SPRN_PMC6: | |
11282 | + return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]); | |
11283 | + break; | |
11284 | + | |
11285 | + case PFM_DELTA_TB: | |
11286 | + return ctx_arch->delta_tb | |
11287 | + + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)) | |
11288 | + - ctx_arch->delta_tb_start; | |
11289 | + break; | |
11290 | + | |
11291 | + case PFM_DELTA_PURR: | |
11292 | + return ctx_arch->delta_purr | |
11293 | + + mfspr(SPRN_PURR) | |
11294 | + - ctx_arch->delta_purr_start; | |
11295 | + break; | |
11296 | + | |
11297 | + default: | |
11298 | + BUG(); | |
11299 | + } | |
11300 | +} | |
11301 | + | |
11302 | +void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum, | |
11303 | + u64 val) | |
11304 | +{ | |
11305 | + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); | |
11306 | + int cpu_num = smp_processor_id(); | |
11307 | + | |
11308 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11309 | + case SPRN_PMC5: | |
11310 | + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5); | |
11311 | + ctx_arch->powergs_pmc5 = val; | |
11312 | + break; | |
11313 | + | |
11314 | + case SPRN_PMC6: | |
11315 | + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6); | |
11316 | + ctx_arch->powergs_pmc6 = val; | |
11317 | + break; | |
11318 | + | |
11319 | + case PFM_DELTA_TB: | |
11320 | + ctx_arch->delta_tb_start = | |
11321 | + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)); | |
11322 | + ctx_arch->delta_tb = val; | |
11323 | + break; | |
11324 | + | |
11325 | + case PFM_DELTA_PURR: | |
11326 | + ctx_arch->delta_purr_start = mfspr(SPRN_PURR); | |
11327 | + ctx_arch->delta_purr = val; | |
11328 | + break; | |
11329 | + | |
11330 | + default: | |
11331 | + BUG(); | |
11332 | + } | |
11333 | +} | |
11334 | + | |
11335 | +static u64 pfm_power6_read_pmd(unsigned int cnum) | |
11336 | +{ | |
11337 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11338 | + case SPRN_PMC1: | |
11339 | + return mfspr(SPRN_PMC1); | |
11340 | + case SPRN_PMC2: | |
11341 | + return mfspr(SPRN_PMC2); | |
11342 | + case SPRN_PMC3: | |
11343 | + return mfspr(SPRN_PMC3); | |
11344 | + case SPRN_PMC4: | |
11345 | + return mfspr(SPRN_PMC4); | |
11346 | + case SPRN_TBRL: | |
11347 | + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); | |
11348 | + case SPRN_PURR: | |
11349 | + if (cpu_has_feature(CPU_FTR_PURR)) | |
11350 | + return mfspr(SPRN_PURR); | |
11351 | + else | |
11352 | + return 0; | |
11353 | + default: | |
11354 | + BUG(); | |
11355 | + } | |
11356 | +} | |
11357 | + | |
11358 | + | |
11359 | +/** | |
11360 | + * pfm_power6_enable_counters | |
11361 | + * | |
11362 | + **/ | |
11363 | +static void pfm_power6_enable_counters(struct pfm_context *ctx, | |
11364 | + struct pfm_event_set *set) | |
11365 | +{ | |
11366 | + | |
11367 | + unsigned int i, max_pmc; | |
11368 | + int cpu_num = smp_processor_id(); | |
11369 | + struct pfm_arch_context *ctx_arch; | |
11370 | + | |
11371 | + enable_cntrs_cnt++; | |
11372 | + | |
11373 | + /* need the ctx passed down to the routine */ | |
11374 | + ctx_arch = pfm_ctx_arch(ctx); | |
11375 | + max_pmc = ctx->regs.max_pmc; | |
11376 | + | |
11377 | + /* Write MMCR0 last, and a fairly easy way to do this is to write | |
11378 | + the registers in the reverse order */ | |
11379 | + for (i = max_pmc; i != 0; i--) | |
11380 | + if (test_bit(i - 1, set->used_pmcs)) | |
11381 | + pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]); | |
11382 | + | |
11383 | + /* save current free running HW event count */ | |
11384 | + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5); | |
11385 | + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6); | |
11386 | + | |
11387 | + ctx_arch->delta_purr_start = mfspr(SPRN_PURR); | |
11388 | + | |
11389 | + if (cpu_has_feature(CPU_FTR_PURR)) | |
11390 | + ctx_arch->delta_tb_start = | |
11391 | + ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); | |
11392 | + else | |
11393 | + ctx_arch->delta_tb_start = 0; | |
11394 | + | |
11395 | + /* Start kernel timer for this cpu to periodically update | |
11396 | + * the virtual counters. | |
11397 | + */ | |
11398 | + init_timer(&pmc5_6_update[cpu_num]); | |
11399 | + pmc5_6_update[cpu_num].function = pmc5_6_updater; | |
11400 | + pmc5_6_update[cpu_num].data = (unsigned long) cpu_num; | |
11401 | + pmc5_6_update[cpu_num].expires = jiffies + update_time; | |
11402 | + /* context for this timer, timer will be removed if context | |
11403 | + * is switched because the counters will be stopped first. | |
11404 | + * NEEDS WORK, I think this is all ok, a little concerned about a | |
11405 | + * race between the kernel timer going off right as the counters | |
11406 | + * are being stopped and the context switching. Need to think | |
11407 | + * about this. | |
11408 | + */ | |
11409 | + pmc5_6_ctx_arch[cpu_num] = ctx_arch; | |
11410 | + add_timer(&pmc5_6_update[cpu_num]); | |
11411 | +} | |
11412 | + | |
11413 | +/** | |
11414 | + * pfm_power6_disable_counters | |
11415 | + * | |
11416 | + **/ | |
11417 | +static void pfm_power6_disable_counters(struct pfm_context *ctx, | |
11418 | + struct pfm_event_set *set) | |
11419 | +{ | |
11420 | + struct pfm_arch_context *ctx_arch; | |
11421 | + int cpu_num = smp_processor_id(); | |
11422 | + | |
11423 | + disable_cntrs_cnt++; | |
11424 | + | |
11425 | + /* Set the Freeze Counters bit */ | |
11426 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); | |
11427 | + asm volatile ("sync"); | |
11428 | + | |
11429 | + /* delete kernel update timer */ | |
11430 | + del_timer_sync(&pmc5_6_update[cpu_num]); | |
11431 | + | |
11432 | + /* Update the virtual pmd 5 and 6 counters from the free running | |
11433 | + * HW counters | |
11434 | + */ | |
11435 | + ctx_arch = pfm_ctx_arch(ctx); | |
11436 | + delta(cpu_num, ctx_arch); | |
11437 | + | |
11438 | + ctx_arch->delta_tb += | |
11439 | + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)) | |
11440 | + - ctx_arch->delta_tb_start; | |
11441 | + | |
11442 | + ctx_arch->delta_purr += mfspr(SPRN_PURR) | |
11443 | + - ctx_arch->delta_purr_start; | |
11444 | +} | |
11445 | + | |
11446 | +/** | |
11447 | + * pfm_power6_get_ovfl_pmds | |
11448 | + * | |
11449 | + * Determine which counters in this set have overflowed and fill in the | |
11450 | + * set->povfl_pmds mask and set->npend_ovfls count. | |
11451 | + **/ | |
11452 | +static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx, | |
11453 | + struct pfm_event_set *set) | |
11454 | +{ | |
11455 | + unsigned int i; | |
11456 | + unsigned int first_intr_pmd = ctx->regs.first_intr_pmd; | |
11457 | + unsigned int max_intr_pmd = ctx->regs.max_intr_pmd; | |
11458 | + u64 *used_pmds = set->used_pmds; | |
11459 | + u64 *cntr_pmds = ctx->regs.cnt_pmds; | |
11460 | + u64 width_mask = 1 << pfm_pmu_conf->counter_width; | |
11461 | + u64 new_val, mask[PFM_PMD_BV]; | |
11462 | + | |
11463 | + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd); | |
11464 | + | |
11465 | + /* max_intr_pmd is actually the last interrupting pmd register + 1 */ | |
11466 | + for (i = first_intr_pmd; i < max_intr_pmd; i++) { | |
11467 | + if (test_bit(i, mask)) { | |
11468 | + new_val = pfm_power6_read_pmd(i); | |
11469 | + if (new_val & width_mask) { | |
11470 | + set_bit(i, set->povfl_pmds); | |
11471 | + set->npend_ovfls++; | |
11472 | + } | |
11473 | + } | |
11474 | + } | |
11475 | +} | |
11476 | + | |
11477 | +static void pfm_power6_irq_handler(struct pt_regs *regs, | |
11478 | + struct pfm_context *ctx) | |
11479 | +{ | |
11480 | + u32 mmcr0; | |
11481 | + u64 mmcra; | |
11482 | + | |
11483 | + /* Disable the counters (set the freeze bit) to not polute | |
11484 | + * the counts. | |
11485 | + */ | |
11486 | + mmcr0 = mfspr(SPRN_MMCR0); | |
11487 | + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); | |
11488 | + mmcra = mfspr(SPRN_MMCRA); | |
11489 | + | |
11490 | + /* Set the PMM bit (see comment below). */ | |
11491 | + mtmsrd(mfmsr() | MSR_PMM); | |
11492 | + | |
11493 | + pm1_4_interrupt++; | |
11494 | + | |
11495 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
11496 | + | |
11497 | + mmcr0 = mfspr(SPRN_MMCR0); | |
11498 | + | |
11499 | + /* | |
11500 | + * Reset the perfmon trigger if | |
11501 | + * not in masking mode. | |
11502 | + */ | |
11503 | + if (ctx->state != PFM_CTX_MASKED) | |
11504 | + mmcr0 |= MMCR0_PMXE; | |
11505 | + | |
11506 | + /* | |
11507 | + * Clear the PMU Alert Occurred bit | |
11508 | + */ | |
11509 | + mmcr0 &= ~MMCR0_PMAO; | |
11510 | + | |
11511 | + /* Clear the appropriate bits in the MMCRA. */ | |
11512 | + mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER); | |
11513 | + mtspr(SPRN_MMCRA, mmcra); | |
11514 | + | |
11515 | + /* | |
11516 | + * Now clear the freeze bit, counting will not start until we | |
11517 | + * rfid from this exception, because only at that point will | |
11518 | + * the PMM bit be cleared. | |
11519 | + */ | |
11520 | + mmcr0 &= ~MMCR0_FC; | |
11521 | + mtspr(SPRN_MMCR0, mmcr0); | |
11522 | +} | |
11523 | + | |
11524 | +static void pfm_power6_resend_irq(struct pfm_context *ctx) | |
11525 | +{ | |
11526 | + /* | |
11527 | + * Assert the PMAO bit to cause a PMU interrupt. Make sure we | |
11528 | + * trigger the edge detection circuitry for PMAO | |
11529 | + */ | |
11530 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); | |
11531 | + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); | |
11532 | +} | |
11533 | + | |
11534 | +struct pfm_arch_pmu_info pfm_power6_pmu_info = { | |
11535 | + .pmu_style = PFM_POWERPC_PMU_POWER6, | |
11536 | + .write_pmc = pfm_power6_write_pmc, | |
11537 | + .write_pmd = pfm_power6_write_pmd, | |
11538 | + .read_pmd = pfm_power6_read_pmd, | |
11539 | + .irq_handler = pfm_power6_irq_handler, | |
11540 | + .get_ovfl_pmds = pfm_power6_get_ovfl_pmds, | |
11541 | + .enable_counters = pfm_power6_enable_counters, | |
11542 | + .disable_counters = pfm_power6_disable_counters, | |
11543 | + .resend_irq = pfm_power6_resend_irq | |
11544 | +}; | |
11545 | + | |
11546 | +/* | |
11547 | + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | |
11548 | + */ | |
11549 | +static struct pfm_pmu_config pfm_power6_pmu_conf = { | |
11550 | + .pmu_name = "POWER6", | |
11551 | + .counter_width = 31, | |
11552 | + .pmd_desc = pfm_power6_pmd_desc, | |
11553 | + .pmc_desc = pfm_power6_pmc_desc, | |
11554 | + .num_pmc_entries = PFM_PM_NUM_PMCS, | |
11555 | + .num_pmd_entries = PFM_PM_NUM_PMDS, | |
11556 | + .probe_pmu = pfm_power6_probe_pmu, | |
11557 | + .pmu_info = &pfm_power6_pmu_info, | |
11558 | + .pmd_sread = pfm_power6_sread, | |
11559 | + .pmd_swrite = pfm_power6_swrite, | |
11560 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
11561 | + .owner = THIS_MODULE | |
11562 | +}; | |
11563 | + | |
11564 | +static int __init pfm_power6_pmu_init_module(void) | |
11565 | +{ | |
11566 | + int ret; | |
11567 | + disable_cntrs_cnt = 0; | |
11568 | + enable_cntrs_cnt = 0; | |
11569 | + call_delta = 0; | |
11570 | + pm5_6_interrupt = 0; | |
11571 | + pm1_4_interrupt = 0; | |
11572 | + | |
11573 | + /* calculate the time for updating counters 5 and 6 */ | |
11574 | + | |
11575 | + /* | |
11576 | + * MAX_EVENT_RATE assumes a max instruction issue rate of 2 | |
11577 | + * instructions per clock cycle. Experience shows that this factor | |
11578 | + * of 2 is more than adequate. | |
11579 | + */ | |
11580 | + | |
11581 | +# define MAX_EVENT_RATE (ppc_proc_freq * 2) | |
11582 | + | |
11583 | + /* | |
11584 | + * Calculate the time, in jiffies, it takes for event counter 5 or | |
11585 | + * 6 to completely wrap when counting at the max event rate, and | |
11586 | + * then figure on sampling at twice that rate. | |
11587 | + */ | |
11588 | + update_time = (((unsigned long)HZ * OVERFLOW_VALUE) | |
11589 | + / ((unsigned long)MAX_EVENT_RATE)) / 2; | |
11590 | + | |
11591 | + ret = pfm_pmu_register(&pfm_power6_pmu_conf); | |
11592 | + return ret; | |
11593 | +} | |
11594 | + | |
11595 | +static void __exit pfm_power6_pmu_cleanup_module(void) | |
11596 | +{ | |
11597 | + pfm_pmu_unregister(&pfm_power6_pmu_conf); | |
11598 | +} | |
11599 | + | |
11600 | +module_init(pfm_power6_pmu_init_module); | |
11601 | +module_exit(pfm_power6_pmu_cleanup_module); | |
11602 | --- /dev/null | |
11603 | +++ b/arch/powerpc/perfmon/perfmon_ppc32.c | |
11604 | @@ -0,0 +1,340 @@ | |
11605 | +/* | |
11606 | + * This file contains the PPC32 PMU register description tables | |
11607 | + * and pmc checker used by perfmon.c. | |
11608 | + * | |
11609 | + * Philip Mucci, mucci@cs.utk.edu | |
11610 | + * | |
11611 | + * Based on code from: | |
11612 | + * Copyright (c) 2005 David Gibson, IBM Corporation. | |
11613 | + * | |
11614 | + * Based on perfmon_p6.c: | |
11615 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
11616 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
11617 | + * | |
11618 | + * This program is free software; you can redistribute it and/or | |
11619 | + * modify it under the terms of version 2 of the GNU General Public | |
11620 | + * License as published by the Free Software Foundation. | |
11621 | + * | |
11622 | + * This program is distributed in the hope that it will be useful, | |
11623 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11624 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11625 | + * General Public License for more details. | |
11626 | + * | |
11627 | + * You should have received a copy of the GNU General Public License | |
11628 | + * along with this program; if not, write to the Free Software | |
11629 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
11630 | + * 02111-1307 USA | |
11631 | + */ | |
11632 | +#include <linux/module.h> | |
11633 | +#include <linux/perfmon_kern.h> | |
11634 | +#include <asm/reg.h> | |
11635 | + | |
11636 | +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>"); | |
11637 | +MODULE_DESCRIPTION("PPC32 PMU description table"); | |
11638 | +MODULE_LICENSE("GPL"); | |
11639 | + | |
11640 | +static struct pfm_pmu_config pfm_ppc32_pmu_conf; | |
11641 | + | |
11642 | +static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = { | |
11643 | +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0), | |
11644 | +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1), | |
11645 | +/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2), | |
11646 | +}; | |
11647 | +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc) | |
11648 | + | |
11649 | +static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = { | |
11650 | +/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), | |
11651 | +/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), | |
11652 | +/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), | |
11653 | +/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), | |
11654 | +/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), | |
11655 | +/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), | |
11656 | +}; | |
11657 | +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc) | |
11658 | + | |
11659 | +static void perfmon_perf_irq(struct pt_regs *regs) | |
11660 | +{ | |
11661 | + u32 mmcr0; | |
11662 | + | |
11663 | + /* BLATANTLY STOLEN FROM OPROFILE, then modified */ | |
11664 | + | |
11665 | + /* set the PMM bit (see comment below) */ | |
11666 | + mtmsr(mfmsr() | MSR_PMM); | |
11667 | + | |
11668 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
11669 | + | |
11670 | + /* The freeze bit was set by the interrupt. | |
11671 | + * Clear the freeze bit, and reenable the interrupt. | |
11672 | + * The counters won't actually start until the rfi clears | |
11673 | + * the PMM bit. | |
11674 | + */ | |
11675 | + | |
11676 | + /* Unfreezes the counters on this CPU, enables the interrupt, | |
11677 | + * enables the counters to trigger the interrupt, and sets the | |
11678 | + * counters to only count when the mark bit is not set. | |
11679 | + */ | |
11680 | + mmcr0 = mfspr(SPRN_MMCR0); | |
11681 | + | |
11682 | + mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0); | |
11683 | + mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE); | |
11684 | + | |
11685 | + mtspr(SPRN_MMCR0, mmcr0); | |
11686 | +} | |
11687 | + | |
11688 | +static int pfm_ppc32_probe_pmu(void) | |
11689 | +{ | |
11690 | + enum ppc32_pmu_type pm_type; | |
11691 | + int nmmcr = 0, npmds = 0, intsok = 0, i; | |
11692 | + unsigned int pvr; | |
11693 | + char *str; | |
11694 | + | |
11695 | + pvr = mfspr(SPRN_PVR); | |
11696 | + | |
11697 | + switch (PVR_VER(pvr)) { | |
11698 | + case 0x0004: /* 604 */ | |
11699 | + str = "PPC604"; | |
11700 | + pm_type = PFM_POWERPC_PMU_604; | |
11701 | + nmmcr = 1; | |
11702 | + npmds = 2; | |
11703 | + break; | |
11704 | + case 0x0009: /* 604e; */ | |
11705 | + case 0x000A: /* 604ev */ | |
11706 | + str = "PPC604e"; | |
11707 | + pm_type = PFM_POWERPC_PMU_604e; | |
11708 | + nmmcr = 2; | |
11709 | + npmds = 4; | |
11710 | + break; | |
11711 | + case 0x0008: /* 750/740 */ | |
11712 | + str = "PPC750"; | |
11713 | + pm_type = PFM_POWERPC_PMU_750; | |
11714 | + nmmcr = 2; | |
11715 | + npmds = 4; | |
11716 | + break; | |
11717 | + case 0x7000: /* 750FX */ | |
11718 | + case 0x7001: | |
11719 | + str = "PPC750"; | |
11720 | + pm_type = PFM_POWERPC_PMU_750; | |
11721 | + nmmcr = 2; | |
11722 | + npmds = 4; | |
11723 | + if ((pvr & 0xFF0F) >= 0x0203) | |
11724 | + intsok = 1; | |
11725 | + break; | |
11726 | + case 0x7002: /* 750GX */ | |
11727 | + str = "PPC750"; | |
11728 | + pm_type = PFM_POWERPC_PMU_750; | |
11729 | + nmmcr = 2; | |
11730 | + npmds = 4; | |
11731 | + intsok = 1; | |
11732 | + case 0x000C: /* 7400 */ | |
11733 | + str = "PPC7400"; | |
11734 | + pm_type = PFM_POWERPC_PMU_7400; | |
11735 | + nmmcr = 3; | |
11736 | + npmds = 4; | |
11737 | + break; | |
11738 | + case 0x800C: /* 7410 */ | |
11739 | + str = "PPC7410"; | |
11740 | + pm_type = PFM_POWERPC_PMU_7400; | |
11741 | + nmmcr = 3; | |
11742 | + npmds = 4; | |
11743 | + if ((pvr & 0xFFFF) >= 0x01103) | |
11744 | + intsok = 1; | |
11745 | + break; | |
11746 | + case 0x8000: /* 7451/7441 */ | |
11747 | + case 0x8001: /* 7455/7445 */ | |
11748 | + case 0x8002: /* 7457/7447 */ | |
11749 | + case 0x8003: /* 7447A */ | |
11750 | + case 0x8004: /* 7448 */ | |
11751 | + str = "PPC7450"; | |
11752 | + pm_type = PFM_POWERPC_PMU_7450; | |
11753 | + nmmcr = 3; npmds = 6; | |
11754 | + intsok = 1; | |
11755 | + break; | |
11756 | + default: | |
11757 | + PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr)); | |
11758 | + return -1; | |
11759 | + } | |
11760 | + | |
11761 | + /* | |
11762 | + * deconfigure unimplemented registers | |
11763 | + */ | |
11764 | + for (i = npmds; i < PFM_PM_NUM_PMDS; i++) | |
11765 | + pfm_ppc32_pmd_desc[i].type = PFM_REG_NA; | |
11766 | + | |
11767 | + for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++) | |
11768 | + pfm_ppc32_pmc_desc[i].type = PFM_REG_NA; | |
11769 | + | |
11770 | + /* | |
11771 | + * update PMU description structure | |
11772 | + */ | |
11773 | + pfm_ppc32_pmu_conf.pmu_name = str; | |
11774 | + pfm_ppc32_pmu_info.pmu_style = pm_type; | |
11775 | + pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr; | |
11776 | + pfm_ppc32_pmu_conf.num_pmd_entries = npmds; | |
11777 | + | |
11778 | + if (intsok == 0) | |
11779 | + PFM_INFO("Interrupts unlikely to work\n"); | |
11780 | + | |
11781 | + return reserve_pmc_hardware(perfmon_perf_irq); | |
11782 | +} | |
11783 | + | |
11784 | +static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value) | |
11785 | +{ | |
11786 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
11787 | + case SPRN_MMCR0: | |
11788 | + mtspr(SPRN_MMCR0, value); | |
11789 | + break; | |
11790 | + case SPRN_MMCR1: | |
11791 | + mtspr(SPRN_MMCR1, value); | |
11792 | + break; | |
11793 | + case SPRN_MMCR2: | |
11794 | + mtspr(SPRN_MMCR2, value); | |
11795 | + break; | |
11796 | + default: | |
11797 | + BUG(); | |
11798 | + } | |
11799 | +} | |
11800 | + | |
11801 | +static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value) | |
11802 | +{ | |
11803 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11804 | + case SPRN_PMC1: | |
11805 | + mtspr(SPRN_PMC1, value); | |
11806 | + break; | |
11807 | + case SPRN_PMC2: | |
11808 | + mtspr(SPRN_PMC2, value); | |
11809 | + break; | |
11810 | + case SPRN_PMC3: | |
11811 | + mtspr(SPRN_PMC3, value); | |
11812 | + break; | |
11813 | + case SPRN_PMC4: | |
11814 | + mtspr(SPRN_PMC4, value); | |
11815 | + break; | |
11816 | + case SPRN_PMC5: | |
11817 | + mtspr(SPRN_PMC5, value); | |
11818 | + break; | |
11819 | + case SPRN_PMC6: | |
11820 | + mtspr(SPRN_PMC6, value); | |
11821 | + break; | |
11822 | + default: | |
11823 | + BUG(); | |
11824 | + } | |
11825 | +} | |
11826 | + | |
11827 | +static u64 pfm_ppc32_read_pmd(unsigned int cnum) | |
11828 | +{ | |
11829 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
11830 | + case SPRN_PMC1: | |
11831 | + return mfspr(SPRN_PMC1); | |
11832 | + case SPRN_PMC2: | |
11833 | + return mfspr(SPRN_PMC2); | |
11834 | + case SPRN_PMC3: | |
11835 | + return mfspr(SPRN_PMC3); | |
11836 | + case SPRN_PMC4: | |
11837 | + return mfspr(SPRN_PMC4); | |
11838 | + case SPRN_PMC5: | |
11839 | + return mfspr(SPRN_PMC5); | |
11840 | + case SPRN_PMC6: | |
11841 | + return mfspr(SPRN_PMC6); | |
11842 | + default: | |
11843 | + BUG(); | |
11844 | + } | |
11845 | +} | |
11846 | + | |
11847 | +/** | |
11848 | + * pfm_ppc32_enable_counters | |
11849 | + * | |
11850 | + * Just need to load the current values into the control registers. | |
11851 | + **/ | |
11852 | +static void pfm_ppc32_enable_counters(struct pfm_context *ctx, | |
11853 | + struct pfm_event_set *set) | |
11854 | +{ | |
11855 | + unsigned int i, max_pmc; | |
11856 | + | |
11857 | + max_pmc = pfm_pmu_conf->regs.max_pmc; | |
11858 | + | |
11859 | + for (i = 0; i < max_pmc; i++) | |
11860 | + if (test_bit(i, set->used_pmcs)) | |
11861 | + pfm_ppc32_write_pmc(i, set->pmcs[i]); | |
11862 | +} | |
11863 | + | |
11864 | +/** | |
11865 | + * pfm_ppc32_disable_counters | |
11866 | + * | |
11867 | + * Just need to zero all the control registers. | |
11868 | + **/ | |
11869 | +static void pfm_ppc32_disable_counters(struct pfm_context *ctx, | |
11870 | + struct pfm_event_set *set) | |
11871 | +{ | |
11872 | + unsigned int i, max; | |
11873 | + | |
11874 | + max = pfm_pmu_conf->regs.max_pmc; | |
11875 | + | |
11876 | + for (i = 0; i < max; i++) | |
11877 | + if (test_bit(i, set->used_pmcs)) | |
11878 | + pfm_ppc32_write_pmc(ctx, 0); | |
11879 | +} | |
11880 | + | |
11881 | +/** | |
11882 | + * pfm_ppc32_get_ovfl_pmds | |
11883 | + * | |
11884 | + * Determine which counters in this set have overflowed and fill in the | |
11885 | + * set->povfl_pmds mask and set->npend_ovfls count. | |
11886 | + **/ | |
11887 | +static void pfm_ppc32_get_ovfl_pmds(struct pfm_context *ctx, | |
11888 | + struct pfm_event_set *set) | |
11889 | +{ | |
11890 | + unsigned int i; | |
11891 | + unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd; | |
11892 | + u64 *used_pmds = set->used_pmds; | |
11893 | + u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds; | |
11894 | + u64 width_mask = 1 << pfm_pmu_conf->counter_width; | |
11895 | + u64 new_val, mask[PFM_PMD_BV]; | |
11896 | + | |
11897 | + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), | |
11898 | + cast_ulp(used_pmds), max_pmd); | |
11899 | + | |
11900 | + for (i = 0; i < max_pmd; i++) { | |
11901 | + if (test_bit(i, mask)) { | |
11902 | + new_val = pfm_ppc32_read_pmd(i); | |
11903 | + if (new_val & width_mask) { | |
11904 | + set_bit(i, set->povfl_pmds); | |
11905 | + set->npend_ovfls++; | |
11906 | + } | |
11907 | + } | |
11908 | + } | |
11909 | +} | |
11910 | + | |
11911 | +struct pfm_arch_pmu_info pfm_ppc32_pmu_info = { | |
11912 | + .pmu_style = PFM_POWERPC_PMU_NONE, | |
11913 | + .write_pmc = pfm_ppc32_write_pmc, | |
11914 | + .write_pmd = pfm_ppc32_write_pmd, | |
11915 | + .read_pmd = pfm_ppc32_read_pmd, | |
11916 | + .get_ovfl_pmds = pfm_ppc32_get_ovfl_pmds, | |
11917 | + .enable_counters = pfm_ppc32_enable_counters, | |
11918 | + .disable_counters = pfm_ppc32_disable_counters, | |
11919 | +}; | |
11920 | + | |
11921 | +static struct pfm_pmu_config pfm_ppc32_pmu_conf = { | |
11922 | + .counter_width = 31, | |
11923 | + .pmd_desc = pfm_ppc32_pmd_desc, | |
11924 | + .pmc_desc = pfm_ppc32_pmc_desc, | |
11925 | + .probe_pmu = pfm_ppc32_probe_pmu, | |
11926 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
11927 | + .owner = THIS_MODULE, | |
11928 | + .version = "0.1", | |
11929 | + .arch_info = &pfm_ppc32_pmu_info, | |
11930 | +}; | |
11931 | + | |
11932 | +static int __init pfm_ppc32_pmu_init_module(void) | |
11933 | +{ | |
11934 | + return pfm_pmu_register(&pfm_ppc32_pmu_conf); | |
11935 | +} | |
11936 | + | |
11937 | +static void __exit pfm_ppc32_pmu_cleanup_module(void) | |
11938 | +{ | |
11939 | + release_pmc_hardware(); | |
11940 | + pfm_pmu_unregister(&pfm_ppc32_pmu_conf); | |
11941 | +} | |
11942 | + | |
11943 | +module_init(pfm_ppc32_pmu_init_module); | |
11944 | +module_exit(pfm_ppc32_pmu_cleanup_module); | |
11945 | --- a/arch/powerpc/platforms/cell/cbe_regs.c | |
11946 | +++ b/arch/powerpc/platforms/cell/cbe_regs.c | |
11947 | @@ -33,6 +33,7 @@ static struct cbe_regs_map | |
11948 | struct cbe_iic_regs __iomem *iic_regs; | |
11949 | struct cbe_mic_tm_regs __iomem *mic_tm_regs; | |
11950 | struct cbe_pmd_shadow_regs pmd_shadow_regs; | |
11951 | + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs; | |
11952 | } cbe_regs_maps[MAX_CBE]; | |
11953 | static int cbe_regs_map_count; | |
11954 | ||
11955 | @@ -145,6 +146,23 @@ struct cbe_mic_tm_regs __iomem *cbe_get_ | |
11956 | } | |
11957 | EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs); | |
11958 | ||
11959 | +struct cbe_ppe_priv_regs __iomem *cbe_get_ppe_priv_regs(struct device_node *np) | |
11960 | +{ | |
11961 | + struct cbe_regs_map *map = cbe_find_map(np); | |
11962 | + if (map == NULL) | |
11963 | + return NULL; | |
11964 | + return map->ppe_priv_regs; | |
11965 | +} | |
11966 | + | |
11967 | +struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu) | |
11968 | +{ | |
11969 | + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; | |
11970 | + if (map == NULL) | |
11971 | + return NULL; | |
11972 | + return map->ppe_priv_regs; | |
11973 | +} | |
11974 | +EXPORT_SYMBOL_GPL(cbe_get_cpu_ppe_priv_regs); | |
11975 | + | |
11976 | u32 cbe_get_hw_thread_id(int cpu) | |
11977 | { | |
11978 | return cbe_thread_map[cpu].thread_id; | |
11979 | @@ -206,6 +224,11 @@ void __init cbe_fill_regs_map(struct cbe | |
11980 | for_each_node_by_type(np, "mic-tm") | |
11981 | if (of_get_parent(np) == be) | |
11982 | map->mic_tm_regs = of_iomap(np, 0); | |
11983 | + | |
11984 | + for_each_node_by_type(np, "ppe-mmio") | |
11985 | + if (of_get_parent(np) == be) | |
11986 | + map->ppe_priv_regs = of_iomap(np, 0); | |
11987 | + | |
11988 | } else { | |
11989 | struct device_node *cpu; | |
11990 | /* That hack must die die die ! */ | |
11991 | @@ -227,6 +250,10 @@ void __init cbe_fill_regs_map(struct cbe | |
11992 | prop = of_get_property(cpu, "mic-tm", NULL); | |
11993 | if (prop != NULL) | |
11994 | map->mic_tm_regs = ioremap(prop->address, prop->len); | |
11995 | + | |
11996 | + prop = of_get_property(cpu, "ppe-mmio", NULL); | |
11997 | + if (prop != NULL) | |
11998 | + map->ppe_priv_regs = ioremap(prop->address, prop->len); | |
11999 | } | |
12000 | } | |
12001 | ||
12002 | --- a/arch/sparc/include/asm/hypervisor.h | |
12003 | +++ b/arch/sparc/include/asm/hypervisor.h | |
12004 | @@ -2713,6 +2713,30 @@ extern unsigned long sun4v_ldc_revoke(un | |
12005 | */ | |
12006 | #define HV_FAST_SET_PERFREG 0x101 | |
12007 | ||
12008 | +#define HV_N2_PERF_SPARC_CTL 0x0 | |
12009 | +#define HV_N2_PERF_DRAM_CTL0 0x1 | |
12010 | +#define HV_N2_PERF_DRAM_CNT0 0x2 | |
12011 | +#define HV_N2_PERF_DRAM_CTL1 0x3 | |
12012 | +#define HV_N2_PERF_DRAM_CNT1 0x4 | |
12013 | +#define HV_N2_PERF_DRAM_CTL2 0x5 | |
12014 | +#define HV_N2_PERF_DRAM_CNT2 0x6 | |
12015 | +#define HV_N2_PERF_DRAM_CTL3 0x7 | |
12016 | +#define HV_N2_PERF_DRAM_CNT3 0x8 | |
12017 | + | |
12018 | +#define HV_FAST_N2_GET_PERFREG 0x104 | |
12019 | +#define HV_FAST_N2_SET_PERFREG 0x105 | |
12020 | + | |
12021 | +#ifndef __ASSEMBLY__ | |
12022 | +extern unsigned long sun4v_niagara_getperf(unsigned long reg, | |
12023 | + unsigned long *val); | |
12024 | +extern unsigned long sun4v_niagara_setperf(unsigned long reg, | |
12025 | + unsigned long val); | |
12026 | +extern unsigned long sun4v_niagara2_getperf(unsigned long reg, | |
12027 | + unsigned long *val); | |
12028 | +extern unsigned long sun4v_niagara2_setperf(unsigned long reg, | |
12029 | + unsigned long val); | |
12030 | +#endif | |
12031 | + | |
12032 | /* MMU statistics services. | |
12033 | * | |
12034 | * The hypervisor maintains MMU statistics and privileged code provides | |
12035 | --- a/arch/sparc/include/asm/irq_64.h | |
12036 | +++ b/arch/sparc/include/asm/irq_64.h | |
12037 | @@ -67,6 +67,9 @@ extern void virt_irq_free(unsigned int v | |
12038 | extern void __init init_IRQ(void); | |
12039 | extern void fixup_irqs(void); | |
12040 | ||
12041 | +extern int register_perfctr_intr(void (*handler)(struct pt_regs *)); | |
12042 | +extern void release_perfctr_intr(void (*handler)(struct pt_regs *)); | |
12043 | + | |
12044 | static inline void set_softint(unsigned long bits) | |
12045 | { | |
12046 | __asm__ __volatile__("wr %0, 0x0, %%set_softint" | |
12047 | --- /dev/null | |
12048 | +++ b/arch/sparc/include/asm/perfmon.h | |
12049 | @@ -0,0 +1,11 @@ | |
12050 | +#ifndef _SPARC64_PERFMON_H_ | |
12051 | +#define _SPARC64_PERFMON_H_ | |
12052 | + | |
12053 | +/* | |
12054 | + * arch-specific user visible interface definitions | |
12055 | + */ | |
12056 | + | |
12057 | +#define PFM_ARCH_MAX_PMCS 2 | |
12058 | +#define PFM_ARCH_MAX_PMDS 3 | |
12059 | + | |
12060 | +#endif /* _SPARC64_PERFMON_H_ */ | |
12061 | --- /dev/null | |
12062 | +++ b/arch/sparc/include/asm/perfmon_kern.h | |
12063 | @@ -0,0 +1,286 @@ | |
12064 | +#ifndef _SPARC64_PERFMON_KERN_H_ | |
12065 | +#define _SPARC64_PERFMON_KERN_H_ | |
12066 | + | |
12067 | +#ifdef __KERNEL__ | |
12068 | + | |
12069 | +#ifdef CONFIG_PERFMON | |
12070 | + | |
12071 | +#include <linux/irq.h> | |
12072 | +#include <asm/system.h> | |
12073 | + | |
12074 | +#define PFM_ARCH_PMD_STK_ARG 2 | |
12075 | +#define PFM_ARCH_PMC_STK_ARG 1 | |
12076 | + | |
12077 | +struct pfm_arch_pmu_info { | |
12078 | + u32 pmu_style; | |
12079 | +}; | |
12080 | + | |
12081 | +static inline void pfm_arch_resend_irq(struct pfm_context *ctx) | |
12082 | +{ | |
12083 | +} | |
12084 | + | |
12085 | +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, | |
12086 | + struct pfm_event_set *set) | |
12087 | +{} | |
12088 | + | |
12089 | +static inline void pfm_arch_serialize(void) | |
12090 | +{ | |
12091 | +} | |
12092 | + | |
12093 | +/* | |
12094 | + * SPARC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus | |
12095 | + * this routine needs to do it when switching sets on overflow | |
12096 | + */ | |
12097 | +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, | |
12098 | + struct pfm_event_set *set) | |
12099 | +{ | |
12100 | + pfm_save_pmds(ctx, set); | |
12101 | +} | |
12102 | + | |
12103 | +extern void pfm_arch_write_pmc(struct pfm_context *ctx, | |
12104 | + unsigned int cnum, u64 value); | |
12105 | +extern u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum); | |
12106 | + | |
12107 | +static inline void pfm_arch_write_pmd(struct pfm_context *ctx, | |
12108 | + unsigned int cnum, u64 value) | |
12109 | +{ | |
12110 | + u64 pic; | |
12111 | + | |
12112 | + value &= pfm_pmu_conf->ovfl_mask; | |
12113 | + | |
12114 | + read_pic(pic); | |
12115 | + | |
12116 | + switch (cnum) { | |
12117 | + case 0: | |
12118 | + pic = (pic & 0xffffffff00000000UL) | | |
12119 | + (value & 0xffffffffUL); | |
12120 | + break; | |
12121 | + case 1: | |
12122 | + pic = (pic & 0xffffffffUL) | | |
12123 | + (value << 32UL); | |
12124 | + break; | |
12125 | + default: | |
12126 | + BUG(); | |
12127 | + } | |
12128 | + | |
12129 | + write_pic(pic); | |
12130 | +} | |
12131 | + | |
12132 | +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, | |
12133 | + unsigned int cnum) | |
12134 | +{ | |
12135 | + u64 pic; | |
12136 | + | |
12137 | + read_pic(pic); | |
12138 | + | |
12139 | + switch (cnum) { | |
12140 | + case 0: | |
12141 | + return pic & 0xffffffffUL; | |
12142 | + case 1: | |
12143 | + return pic >> 32UL; | |
12144 | + default: | |
12145 | + BUG(); | |
12146 | + return 0; | |
12147 | + } | |
12148 | +} | |
12149 | + | |
12150 | +/* | |
12151 | + * For some CPUs, the upper bits of a counter must be set in order for the | |
12152 | + * overflow interrupt to happen. On overflow, the counter has wrapped around, | |
12153 | + * and the upper bits are cleared. This function may be used to set them back. | |
12154 | + */ | |
12155 | +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, | |
12156 | + unsigned int cnum) | |
12157 | +{ | |
12158 | + u64 val = pfm_arch_read_pmd(ctx, cnum); | |
12159 | + | |
12160 | + /* This masks out overflow bit 31 */ | |
12161 | + pfm_arch_write_pmd(ctx, cnum, val); | |
12162 | +} | |
12163 | + | |
12164 | +/* | |
12165 | + * At certain points, perfmon needs to know if monitoring has been | |
12166 | + * explicitely started/stopped by user via pfm_start/pfm_stop. The | |
12167 | + * information is tracked in ctx.flags.started. However on certain | |
12168 | + * architectures, it may be possible to start/stop directly from | |
12169 | + * user level with a single assembly instruction bypassing | |
12170 | + * the kernel. This function must be used to determine by | |
12171 | + * an arch-specific mean if monitoring is actually started/stopped. | |
12172 | + */ | |
12173 | +static inline int pfm_arch_is_active(struct pfm_context *ctx) | |
12174 | +{ | |
12175 | + return ctx->flags.started; | |
12176 | +} | |
12177 | + | |
12178 | +static inline void pfm_arch_ctxswout_sys(struct task_struct *task, | |
12179 | + struct pfm_context *ctx) | |
12180 | +{ | |
12181 | +} | |
12182 | + | |
12183 | +static inline void pfm_arch_ctxswin_sys(struct task_struct *task, | |
12184 | + struct pfm_context *ctx) | |
12185 | +{ | |
12186 | +} | |
12187 | + | |
12188 | +static inline void pfm_arch_ctxswin_thread(struct task_struct *task, | |
12189 | + struct pfm_context *ctx) | |
12190 | +{ | |
12191 | +} | |
12192 | + | |
12193 | +int pfm_arch_is_monitoring_active(struct pfm_context *ctx); | |
12194 | +int pfm_arch_ctxswout_thread(struct task_struct *task, | |
12195 | + struct pfm_context *ctx); | |
12196 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); | |
12197 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); | |
12198 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
12199 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
12200 | +char *pfm_arch_get_pmu_module_name(void); | |
12201 | + | |
12202 | +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, | |
12203 | + struct pfm_event_set *set) | |
12204 | +{ | |
12205 | + pfm_arch_stop(current, ctx); | |
12206 | + /* | |
12207 | + * we mark monitoring as stopped to avoid | |
12208 | + * certain side effects especially in | |
12209 | + * pfm_switch_sets_from_intr() on | |
12210 | + * pfm_arch_restore_pmcs() | |
12211 | + */ | |
12212 | + ctx->flags.started = 0; | |
12213 | +} | |
12214 | + | |
12215 | +/* | |
12216 | + * unfreeze PMU from pfm_do_interrupt_handler() | |
12217 | + * ctx may be NULL for spurious | |
12218 | + */ | |
12219 | +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) | |
12220 | +{ | |
12221 | + if (!ctx) | |
12222 | + return; | |
12223 | + | |
12224 | + PFM_DBG_ovfl("state=%d", ctx->state); | |
12225 | + | |
12226 | + ctx->flags.started = 1; | |
12227 | + | |
12228 | + if (ctx->state == PFM_CTX_MASKED) | |
12229 | + return; | |
12230 | + | |
12231 | + pfm_arch_restore_pmcs(ctx, ctx->active_set); | |
12232 | +} | |
12233 | + | |
12234 | +/* | |
12235 | + * this function is called from the PMU interrupt handler ONLY. | |
12236 | + * On SPARC, the PMU is frozen via arch_stop, masking would be implemented | |
12237 | + * via arch-stop as well. Given that the PMU is already stopped when | |
12238 | + * entering the interrupt handler, we do not need to stop it again, so | |
12239 | + * this function is a nop. | |
12240 | + */ | |
12241 | +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, | |
12242 | + struct pfm_event_set *set) | |
12243 | +{ | |
12244 | +} | |
12245 | + | |
12246 | +/* | |
12247 | + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply | |
12248 | + * need to start here. | |
12249 | + */ | |
12250 | +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, | |
12251 | + struct pfm_event_set *set) | |
12252 | +{ | |
12253 | + pfm_arch_start(current, ctx); | |
12254 | +} | |
12255 | + | |
12256 | +static inline void pfm_arch_pmu_config_remove(void) | |
12257 | +{ | |
12258 | +} | |
12259 | + | |
12260 | +static inline int pfm_arch_context_create(struct pfm_context *ctx, | |
12261 | + u32 ctx_flags) | |
12262 | +{ | |
12263 | + return 0; | |
12264 | +} | |
12265 | + | |
12266 | +static inline void pfm_arch_context_free(struct pfm_context *ctx) | |
12267 | +{ | |
12268 | +} | |
12269 | + | |
12270 | +/* | |
12271 | + * function called from pfm_setfl_sane(). Context is locked | |
12272 | + * and interrupts are masked. | |
12273 | + * The value of flags is the value of ctx_flags as passed by | |
12274 | + * user. | |
12275 | + * | |
12276 | + * function must check arch-specific set flags. | |
12277 | + * Return: | |
12278 | + * 1 when flags are valid | |
12279 | + * 0 on error | |
12280 | + */ | |
12281 | +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) | |
12282 | +{ | |
12283 | + return 0; | |
12284 | +} | |
12285 | + | |
12286 | +static inline int pfm_arch_init(void) | |
12287 | +{ | |
12288 | + return 0; | |
12289 | +} | |
12290 | + | |
12291 | +static inline void pfm_arch_init_percpu(void) | |
12292 | +{ | |
12293 | +} | |
12294 | + | |
12295 | +static inline int pfm_arch_load_context(struct pfm_context *ctx) | |
12296 | +{ | |
12297 | + return 0; | |
12298 | +} | |
12299 | + | |
12300 | +static inline void pfm_arch_unload_context(struct pfm_context *ctx) | |
12301 | +{} | |
12302 | + | |
12303 | +extern void perfmon_interrupt(struct pt_regs *); | |
12304 | + | |
12305 | +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) | |
12306 | +{ | |
12307 | + return register_perfctr_intr(perfmon_interrupt); | |
12308 | +} | |
12309 | + | |
12310 | +static inline void pfm_arch_pmu_release(void) | |
12311 | +{ | |
12312 | + release_perfctr_intr(perfmon_interrupt); | |
12313 | +} | |
12314 | + | |
12315 | +static inline void pfm_arch_arm_handle_work(struct task_struct *task) | |
12316 | +{} | |
12317 | + | |
12318 | +static inline void pfm_arch_disarm_handle_work(struct task_struct *task) | |
12319 | +{} | |
12320 | + | |
12321 | +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) | |
12322 | +{ | |
12323 | + return 0; | |
12324 | +} | |
12325 | + | |
12326 | +static inline int pfm_arch_get_base_syscall(void) | |
12327 | +{ | |
12328 | + return __NR_pfm_create_context; | |
12329 | +} | |
12330 | + | |
12331 | +struct pfm_arch_context { | |
12332 | + /* empty */ | |
12333 | +}; | |
12334 | + | |
12335 | +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) | |
12336 | +/* | |
12337 | + * SPARC needs extra alignment for the sampling buffer | |
12338 | + */ | |
12339 | +#define PFM_ARCH_SMPL_ALIGN_SIZE (16 * 1024) | |
12340 | + | |
12341 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
12342 | +{ | |
12343 | +} | |
12344 | + | |
12345 | +#endif /* CONFIG_PERFMON */ | |
12346 | + | |
12347 | +#endif /* __KERNEL__ */ | |
12348 | + | |
12349 | +#endif /* _SPARC64_PERFMON_KERN_H_ */ | |
12350 | --- a/arch/sparc/include/asm/system_64.h | |
12351 | +++ b/arch/sparc/include/asm/system_64.h | |
12352 | @@ -30,6 +30,9 @@ enum sparc_cpu { | |
12353 | #define ARCH_SUN4C_SUN4 0 | |
12354 | #define ARCH_SUN4 0 | |
12355 | ||
12356 | +extern char *sparc_cpu_type; | |
12357 | +extern char *sparc_fpu_type; | |
12358 | +extern char *sparc_pmu_type; | |
12359 | extern char reboot_command[]; | |
12360 | ||
12361 | /* These are here in an effort to more fully work around Spitfire Errata | |
12362 | @@ -104,15 +107,13 @@ do { __asm__ __volatile__("ba,pt %%xcc, | |
12363 | #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) | |
12364 | #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) | |
12365 | ||
12366 | -/* Blackbird errata workaround. See commentary in | |
12367 | - * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() | |
12368 | - * for more information. | |
12369 | - */ | |
12370 | -#define reset_pic() \ | |
12371 | - __asm__ __volatile__("ba,pt %xcc, 99f\n\t" \ | |
12372 | +/* Blackbird errata workaround. */ | |
12373 | +#define write_pic(val) \ | |
12374 | + __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ | |
12375 | ".align 64\n" \ | |
12376 | - "99:wr %g0, 0x0, %pic\n\t" \ | |
12377 | - "rd %pic, %g0") | |
12378 | + "99:wr %0, 0x0, %%pic\n\t" \ | |
12379 | + "rd %%pic, %%g0" : : "r" (val)) | |
12380 | +#define reset_pic() write_pic(0) | |
12381 | ||
12382 | #ifndef __ASSEMBLY__ | |
12383 | ||
12384 | @@ -145,14 +146,10 @@ do { \ | |
12385 | * and 2 stores in this critical code path. -DaveM | |
12386 | */ | |
12387 | #define switch_to(prev, next, last) \ | |
12388 | -do { if (test_thread_flag(TIF_PERFCTR)) { \ | |
12389 | - unsigned long __tmp; \ | |
12390 | - read_pcr(__tmp); \ | |
12391 | - current_thread_info()->pcr_reg = __tmp; \ | |
12392 | - read_pic(__tmp); \ | |
12393 | - current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\ | |
12394 | - current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \ | |
12395 | - } \ | |
12396 | +do { if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ | |
12397 | + pfm_ctxsw_out(prev, next); \ | |
12398 | + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ | |
12399 | + pfm_ctxsw_in(prev, next); \ | |
12400 | flush_tlb_pending(); \ | |
12401 | save_and_clear_fpu(); \ | |
12402 | /* If you are tempted to conditionalize the following */ \ | |
12403 | @@ -197,11 +194,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) | |
12404 | "l1", "l2", "l3", "l4", "l5", "l6", "l7", \ | |
12405 | "i0", "i1", "i2", "i3", "i4", "i5", \ | |
12406 | "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \ | |
12407 | - /* If you fuck with this, update ret_from_syscall code too. */ \ | |
12408 | - if (test_thread_flag(TIF_PERFCTR)) { \ | |
12409 | - write_pcr(current_thread_info()->pcr_reg); \ | |
12410 | - reset_pic(); \ | |
12411 | - } \ | |
12412 | } while(0) | |
12413 | ||
12414 | static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) | |
12415 | --- a/arch/sparc/include/asm/thread_info_64.h | |
12416 | +++ b/arch/sparc/include/asm/thread_info_64.h | |
12417 | @@ -58,11 +58,6 @@ struct thread_info { | |
12418 | unsigned long gsr[7]; | |
12419 | unsigned long xfsr[7]; | |
12420 | ||
12421 | - __u64 __user *user_cntd0; | |
12422 | - __u64 __user *user_cntd1; | |
12423 | - __u64 kernel_cntd0, kernel_cntd1; | |
12424 | - __u64 pcr_reg; | |
12425 | - | |
12426 | struct restart_block restart_block; | |
12427 | ||
12428 | struct pt_regs *kern_una_regs; | |
12429 | @@ -96,15 +91,10 @@ struct thread_info { | |
12430 | #define TI_RWIN_SPTRS 0x000003c8 | |
12431 | #define TI_GSR 0x00000400 | |
12432 | #define TI_XFSR 0x00000438 | |
12433 | -#define TI_USER_CNTD0 0x00000470 | |
12434 | -#define TI_USER_CNTD1 0x00000478 | |
12435 | -#define TI_KERN_CNTD0 0x00000480 | |
12436 | -#define TI_KERN_CNTD1 0x00000488 | |
12437 | -#define TI_PCR 0x00000490 | |
12438 | -#define TI_RESTART_BLOCK 0x00000498 | |
12439 | -#define TI_KUNA_REGS 0x000004c0 | |
12440 | -#define TI_KUNA_INSN 0x000004c8 | |
12441 | -#define TI_FPREGS 0x00000500 | |
12442 | +#define TI_RESTART_BLOCK 0x00000470 | |
12443 | +#define TI_KUNA_REGS 0x00000498 | |
12444 | +#define TI_KUNA_INSN 0x000004a0 | |
12445 | +#define TI_FPREGS 0x000004c0 | |
12446 | ||
12447 | /* We embed this in the uppermost byte of thread_info->flags */ | |
12448 | #define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */ | |
12449 | @@ -222,11 +212,11 @@ register struct thread_info *current_thr | |
12450 | #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ | |
12451 | #define TIF_SIGPENDING 2 /* signal pending */ | |
12452 | #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ | |
12453 | -#define TIF_PERFCTR 4 /* performance counters active */ | |
12454 | +/* Bit 4 is available */ | |
12455 | #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ | |
12456 | /* flag bit 6 is available */ | |
12457 | #define TIF_32BIT 7 /* 32-bit binary */ | |
12458 | -/* flag bit 8 is available */ | |
12459 | +#define TIF_PERFMON_WORK 8 /* work for pfm_handle_work() */ | |
12460 | #define TIF_SECCOMP 9 /* secure computing */ | |
12461 | #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ | |
12462 | /* flag bit 11 is available */ | |
12463 | @@ -237,22 +227,24 @@ register struct thread_info *current_thr | |
12464 | #define TIF_ABI_PENDING 12 | |
12465 | #define TIF_MEMDIE 13 | |
12466 | #define TIF_POLLING_NRFLAG 14 | |
12467 | +#define TIF_PERFMON_CTXSW 15 /* perfmon needs ctxsw calls */ | |
12468 | ||
12469 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
12470 | #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) | |
12471 | #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) | |
12472 | #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) | |
12473 | -#define _TIF_PERFCTR (1<<TIF_PERFCTR) | |
12474 | #define _TIF_UNALIGNED (1<<TIF_UNALIGNED) | |
12475 | #define _TIF_32BIT (1<<TIF_32BIT) | |
12476 | +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK) | |
12477 | #define _TIF_SECCOMP (1<<TIF_SECCOMP) | |
12478 | #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) | |
12479 | #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) | |
12480 | #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) | |
12481 | +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW) | |
12482 | ||
12483 | #define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \ | |
12484 | _TIF_DO_NOTIFY_RESUME_MASK | \ | |
12485 | - _TIF_NEED_RESCHED | _TIF_PERFCTR) | |
12486 | + _TIF_NEED_RESCHED) | |
12487 | #define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING) | |
12488 | ||
12489 | /* | |
12490 | --- a/arch/sparc/include/asm/unistd_32.h | |
12491 | +++ b/arch/sparc/include/asm/unistd_32.h | |
12492 | @@ -338,8 +338,20 @@ | |
12493 | #define __NR_dup3 320 | |
12494 | #define __NR_pipe2 321 | |
12495 | #define __NR_inotify_init1 322 | |
12496 | +#define __NR_pfm_create_context 323 | |
12497 | +#define __NR_pfm_write_pmcs 324 | |
12498 | +#define __NR_pfm_write_pmds 325 | |
12499 | +#define __NR_pfm_read_pmds 326 | |
12500 | +#define __NR_pfm_load_context 327 | |
12501 | +#define __NR_pfm_start 328 | |
12502 | +#define __NR_pfm_stop 329 | |
12503 | +#define __NR_pfm_restart 330 | |
12504 | +#define __NR_pfm_create_evtsets 331 | |
12505 | +#define __NR_pfm_getinfo_evtsets 332 | |
12506 | +#define __NR_pfm_delete_evtsets 333 | |
12507 | +#define __NR_pfm_unload_context 334 | |
12508 | ||
12509 | -#define NR_SYSCALLS 323 | |
12510 | +#define NR_SYSCALLS 325 | |
12511 | ||
12512 | /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, | |
12513 | * it never had the plain ones and there is no value to adding those | |
12514 | --- a/arch/sparc/include/asm/unistd_64.h | |
12515 | +++ b/arch/sparc/include/asm/unistd_64.h | |
12516 | @@ -340,8 +340,20 @@ | |
12517 | #define __NR_dup3 320 | |
12518 | #define __NR_pipe2 321 | |
12519 | #define __NR_inotify_init1 322 | |
12520 | +#define __NR_pfm_create_context 323 | |
12521 | +#define __NR_pfm_write_pmcs 324 | |
12522 | +#define __NR_pfm_write_pmds 325 | |
12523 | +#define __NR_pfm_read_pmds 326 | |
12524 | +#define __NR_pfm_load_context 327 | |
12525 | +#define __NR_pfm_start 328 | |
12526 | +#define __NR_pfm_stop 329 | |
12527 | +#define __NR_pfm_restart 330 | |
12528 | +#define __NR_pfm_create_evtsets 331 | |
12529 | +#define __NR_pfm_getinfo_evtsets 332 | |
12530 | +#define __NR_pfm_delete_evtsets 333 | |
12531 | +#define __NR_pfm_unload_context 334 | |
12532 | ||
12533 | -#define NR_SYSCALLS 323 | |
12534 | +#define NR_SYSCALLS 335 | |
12535 | ||
12536 | #ifdef __KERNEL__ | |
12537 | #define __ARCH_WANT_IPC_PARSE_VERSION | |
12538 | --- a/arch/sparc/kernel/systbls.S | |
12539 | +++ b/arch/sparc/kernel/systbls.S | |
12540 | @@ -81,4 +81,6 @@ sys_call_table: | |
12541 | /*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait | |
12542 | /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate | |
12543 | /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 | |
12544 | -/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1 | |
12545 | +/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs, sys_pfm_write_pmds | |
12546 | +/*325*/ .long sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop | |
12547 | +/*330*/ .long sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context | |
12548 | --- a/arch/sparc64/Kconfig | |
12549 | +++ b/arch/sparc64/Kconfig | |
12550 | @@ -402,6 +402,8 @@ source "drivers/sbus/char/Kconfig" | |
12551 | ||
12552 | source "fs/Kconfig" | |
12553 | ||
12554 | +source "arch/sparc64/perfmon/Kconfig" | |
12555 | + | |
12556 | source "arch/sparc64/Kconfig.debug" | |
12557 | ||
12558 | source "security/Kconfig" | |
12559 | --- a/arch/sparc64/Makefile | |
12560 | +++ b/arch/sparc64/Makefile | |
12561 | @@ -32,6 +32,8 @@ core-y += arch/sparc64/math-emu/ | |
12562 | libs-y += arch/sparc64/prom/ arch/sparc64/lib/ | |
12563 | drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/ | |
12564 | ||
12565 | +core-$(CONFIG_PERFMON) += arch/sparc64/perfmon/ | |
12566 | + | |
12567 | boot := arch/sparc64/boot | |
12568 | ||
12569 | image tftpboot.img vmlinux.aout: vmlinux | |
12570 | --- a/arch/sparc64/kernel/cpu.c | |
12571 | +++ b/arch/sparc64/kernel/cpu.c | |
12572 | @@ -20,16 +20,17 @@ | |
12573 | DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; | |
12574 | ||
12575 | struct cpu_iu_info { | |
12576 | - short manuf; | |
12577 | - short impl; | |
12578 | - char* cpu_name; /* should be enough I hope... */ | |
12579 | + short manuf; | |
12580 | + short impl; | |
12581 | + char *cpu_name; | |
12582 | + char *pmu_name; | |
12583 | }; | |
12584 | ||
12585 | struct cpu_fp_info { | |
12586 | - short manuf; | |
12587 | - short impl; | |
12588 | - char fpu_vers; | |
12589 | - char* fp_name; | |
12590 | + short manuf; | |
12591 | + short impl; | |
12592 | + char fpu_vers; | |
12593 | + char* fp_name; | |
12594 | }; | |
12595 | ||
12596 | static struct cpu_fp_info linux_sparc_fpu[] = { | |
12597 | @@ -49,23 +50,24 @@ static struct cpu_fp_info linux_sparc_fp | |
12598 | #define NSPARCFPU ARRAY_SIZE(linux_sparc_fpu) | |
12599 | ||
12600 | static struct cpu_iu_info linux_sparc_chips[] = { | |
12601 | - { 0x17, 0x10, "TI UltraSparc I (SpitFire)"}, | |
12602 | - { 0x22, 0x10, "TI UltraSparc I (SpitFire)"}, | |
12603 | - { 0x17, 0x11, "TI UltraSparc II (BlackBird)"}, | |
12604 | - { 0x17, 0x12, "TI UltraSparc IIi (Sabre)"}, | |
12605 | - { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)"}, | |
12606 | - { 0x3e, 0x14, "TI UltraSparc III (Cheetah)"}, | |
12607 | - { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)"}, | |
12608 | - { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)"}, | |
12609 | - { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)"}, | |
12610 | - { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)"}, | |
12611 | - { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)"}, | |
12612 | -}; | |
12613 | + { 0x17, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"}, | |
12614 | + { 0x22, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"}, | |
12615 | + { 0x17, 0x11, "TI UltraSparc II (BlackBird)", "ultra12"}, | |
12616 | + { 0x17, 0x12, "TI UltraSparc IIi (Sabre)", "ultra12"}, | |
12617 | + { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"}, | |
12618 | + { 0x3e, 0x14, "TI UltraSparc III (Cheetah)", "ultra3"}, | |
12619 | + { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"}, | |
12620 | + { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"}, | |
12621 | + { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)", "ultra4"}, | |
12622 | + { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"}, | |
12623 | + { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3+"}, | |
12624 | + }; | |
12625 | ||
12626 | #define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips) | |
12627 | ||
12628 | char *sparc_cpu_type; | |
12629 | char *sparc_fpu_type; | |
12630 | +char *sparc_pmu_type; | |
12631 | ||
12632 | static void __init sun4v_cpu_probe(void) | |
12633 | { | |
12634 | @@ -73,11 +75,13 @@ static void __init sun4v_cpu_probe(void) | |
12635 | case SUN4V_CHIP_NIAGARA1: | |
12636 | sparc_cpu_type = "UltraSparc T1 (Niagara)"; | |
12637 | sparc_fpu_type = "UltraSparc T1 integrated FPU"; | |
12638 | + sparc_pmu_type = "niagara"; | |
12639 | break; | |
12640 | ||
12641 | case SUN4V_CHIP_NIAGARA2: | |
12642 | sparc_cpu_type = "UltraSparc T2 (Niagara2)"; | |
12643 | sparc_fpu_type = "UltraSparc T2 integrated FPU"; | |
12644 | + sparc_pmu_type = "niagara2"; | |
12645 | break; | |
12646 | ||
12647 | default: | |
12648 | @@ -85,6 +89,7 @@ static void __init sun4v_cpu_probe(void) | |
12649 | prom_cpu_compatible); | |
12650 | sparc_cpu_type = "Unknown SUN4V CPU"; | |
12651 | sparc_fpu_type = "Unknown SUN4V FPU"; | |
12652 | + sparc_pmu_type = "Unknown SUN4V PMU"; | |
12653 | break; | |
12654 | } | |
12655 | } | |
12656 | @@ -117,6 +122,8 @@ retry: | |
12657 | if (linux_sparc_chips[i].impl == impl) { | |
12658 | sparc_cpu_type = | |
12659 | linux_sparc_chips[i].cpu_name; | |
12660 | + sparc_pmu_type = | |
12661 | + linux_sparc_chips[i].pmu_name; | |
12662 | break; | |
12663 | } | |
12664 | } | |
12665 | @@ -134,7 +141,7 @@ retry: | |
12666 | printk("DEBUG: manuf[%lx] impl[%lx]\n", | |
12667 | manuf, impl); | |
12668 | } | |
12669 | - sparc_cpu_type = "Unknown CPU"; | |
12670 | + sparc_pmu_type = "Unknown PMU"; | |
12671 | } | |
12672 | ||
12673 | for (i = 0; i < NSPARCFPU; i++) { | |
12674 | --- a/arch/sparc64/kernel/hvcalls.S | |
12675 | +++ b/arch/sparc64/kernel/hvcalls.S | |
12676 | @@ -884,3 +884,44 @@ sun4v_mmu_demap_all: | |
12677 | retl | |
12678 | nop | |
12679 | .size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all | |
12680 | + | |
12681 | + .globl sun4v_niagara_getperf | |
12682 | + .type sun4v_niagara_getperf,#function | |
12683 | +sun4v_niagara_getperf: | |
12684 | + mov %o0, %o4 | |
12685 | + mov HV_FAST_GET_PERFREG, %o5 | |
12686 | + ta HV_FAST_TRAP | |
12687 | + stx %o1, [%o4] | |
12688 | + retl | |
12689 | + nop | |
12690 | + .size sun4v_niagara_getperf, .-sun4v_niagara_getperf | |
12691 | + | |
12692 | + .globl sun4v_niagara_setperf | |
12693 | + .type sun4v_niagara_setperf,#function | |
12694 | +sun4v_niagara_setperf: | |
12695 | + mov HV_FAST_SET_PERFREG, %o5 | |
12696 | + ta HV_FAST_TRAP | |
12697 | + retl | |
12698 | + nop | |
12699 | + .size sun4v_niagara_setperf, .-sun4v_niagara_setperf | |
12700 | + | |
12701 | + .globl sun4v_niagara2_getperf | |
12702 | + .type sun4v_niagara2_getperf,#function | |
12703 | +sun4v_niagara2_getperf: | |
12704 | + mov %o0, %o4 | |
12705 | + mov HV_FAST_N2_GET_PERFREG, %o5 | |
12706 | + ta HV_FAST_TRAP | |
12707 | + stx %o1, [%o4] | |
12708 | + retl | |
12709 | + nop | |
12710 | + .size sun4v_niagara2_getperf, .-sun4v_niagara2_getperf | |
12711 | + | |
12712 | + .globl sun4v_niagara2_setperf | |
12713 | + .type sun4v_niagara2_setperf,#function | |
12714 | +sun4v_niagara2_setperf: | |
12715 | + mov HV_FAST_N2_SET_PERFREG, %o5 | |
12716 | + ta HV_FAST_TRAP | |
12717 | + retl | |
12718 | + nop | |
12719 | + .size sun4v_niagara2_setperf, .-sun4v_niagara2_setperf | |
12720 | + | |
12721 | --- a/arch/sparc64/kernel/irq.c | |
12722 | +++ b/arch/sparc64/kernel/irq.c | |
12723 | @@ -758,6 +758,20 @@ void handler_irq(int irq, struct pt_regs | |
12724 | irq_exit(); | |
12725 | set_irq_regs(old_regs); | |
12726 | } | |
12727 | +static void unhandled_perf_irq(struct pt_regs *regs) | |
12728 | +{ | |
12729 | + unsigned long pcr, pic; | |
12730 | + | |
12731 | + read_pcr(pcr); | |
12732 | + read_pic(pic); | |
12733 | + | |
12734 | + write_pcr(0); | |
12735 | + | |
12736 | + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", | |
12737 | + smp_processor_id()); | |
12738 | + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", | |
12739 | + smp_processor_id(), pcr, pic); | |
12740 | +} | |
12741 | ||
12742 | void do_softirq(void) | |
12743 | { | |
12744 | @@ -785,6 +799,55 @@ void do_softirq(void) | |
12745 | local_irq_restore(flags); | |
12746 | } | |
12747 | ||
12748 | +/* Almost a direct copy of the powerpc PMC code. */ | |
12749 | +static DEFINE_SPINLOCK(perf_irq_lock); | |
12750 | +static void *perf_irq_owner_caller; /* mostly for debugging */ | |
12751 | +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; | |
12752 | + | |
12753 | +/* Invoked from level 15 PIL handler in trap table. */ | |
12754 | +void perfctr_irq(int irq, struct pt_regs *regs) | |
12755 | +{ | |
12756 | + clear_softint(1 << irq); | |
12757 | + perf_irq(regs); | |
12758 | +} | |
12759 | + | |
12760 | +int register_perfctr_intr(void (*handler)(struct pt_regs *)) | |
12761 | +{ | |
12762 | + int ret; | |
12763 | + | |
12764 | + if (!handler) | |
12765 | + return -EINVAL; | |
12766 | + | |
12767 | + spin_lock(&perf_irq_lock); | |
12768 | + if (perf_irq != unhandled_perf_irq) { | |
12769 | + printk(KERN_WARNING "register_perfctr_intr: " | |
12770 | + "perf IRQ busy (reserved by caller %p)\n", | |
12771 | + perf_irq_owner_caller); | |
12772 | + ret = -EBUSY; | |
12773 | + goto out; | |
12774 | + } | |
12775 | + | |
12776 | + perf_irq_owner_caller = __builtin_return_address(0); | |
12777 | + perf_irq = handler; | |
12778 | + | |
12779 | + ret = 0; | |
12780 | +out: | |
12781 | + spin_unlock(&perf_irq_lock); | |
12782 | + | |
12783 | + return ret; | |
12784 | +} | |
12785 | +EXPORT_SYMBOL_GPL(register_perfctr_intr); | |
12786 | + | |
12787 | +void release_perfctr_intr(void (*handler)(struct pt_regs *)) | |
12788 | +{ | |
12789 | + spin_lock(&perf_irq_lock); | |
12790 | + perf_irq_owner_caller = NULL; | |
12791 | + perf_irq = unhandled_perf_irq; | |
12792 | + spin_unlock(&perf_irq_lock); | |
12793 | +} | |
12794 | +EXPORT_SYMBOL_GPL(release_perfctr_intr); | |
12795 | + | |
12796 | + | |
12797 | #ifdef CONFIG_HOTPLUG_CPU | |
12798 | void fixup_irqs(void) | |
12799 | { | |
12800 | --- a/arch/sparc64/kernel/process.c | |
12801 | +++ b/arch/sparc64/kernel/process.c | |
12802 | @@ -30,6 +30,7 @@ | |
12803 | #include <linux/cpu.h> | |
12804 | #include <linux/elfcore.h> | |
12805 | #include <linux/sysrq.h> | |
12806 | +#include <linux/perfmon_kern.h> | |
12807 | ||
12808 | #include <asm/oplib.h> | |
12809 | #include <asm/uaccess.h> | |
12810 | @@ -385,11 +386,7 @@ void exit_thread(void) | |
12811 | t->utraps[0]--; | |
12812 | } | |
12813 | ||
12814 | - if (test_and_clear_thread_flag(TIF_PERFCTR)) { | |
12815 | - t->user_cntd0 = t->user_cntd1 = NULL; | |
12816 | - t->pcr_reg = 0; | |
12817 | - write_pcr(0); | |
12818 | - } | |
12819 | + pfm_exit_thread(); | |
12820 | } | |
12821 | ||
12822 | void flush_thread(void) | |
12823 | @@ -411,13 +408,6 @@ void flush_thread(void) | |
12824 | ||
12825 | set_thread_wsaved(0); | |
12826 | ||
12827 | - /* Turn off performance counters if on. */ | |
12828 | - if (test_and_clear_thread_flag(TIF_PERFCTR)) { | |
12829 | - t->user_cntd0 = t->user_cntd1 = NULL; | |
12830 | - t->pcr_reg = 0; | |
12831 | - write_pcr(0); | |
12832 | - } | |
12833 | - | |
12834 | /* Clear FPU register state. */ | |
12835 | t->fpsaved[0] = 0; | |
12836 | ||
12837 | @@ -631,16 +621,6 @@ int copy_thread(int nr, unsigned long cl | |
12838 | t->kregs->u_regs[UREG_FP] = | |
12839 | ((unsigned long) child_sf) - STACK_BIAS; | |
12840 | ||
12841 | - /* Special case, if we are spawning a kernel thread from | |
12842 | - * a userspace task (usermode helper, NFS or similar), we | |
12843 | - * must disable performance counters in the child because | |
12844 | - * the address space and protection realm are changing. | |
12845 | - */ | |
12846 | - if (t->flags & _TIF_PERFCTR) { | |
12847 | - t->user_cntd0 = t->user_cntd1 = NULL; | |
12848 | - t->pcr_reg = 0; | |
12849 | - t->flags &= ~_TIF_PERFCTR; | |
12850 | - } | |
12851 | t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT); | |
12852 | t->kregs->u_regs[UREG_G6] = (unsigned long) t; | |
12853 | t->kregs->u_regs[UREG_G4] = (unsigned long) t->task; | |
12854 | @@ -673,6 +653,8 @@ int copy_thread(int nr, unsigned long cl | |
12855 | if (clone_flags & CLONE_SETTLS) | |
12856 | t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3]; | |
12857 | ||
12858 | + pfm_copy_thread(p); | |
12859 | + | |
12860 | return 0; | |
12861 | } | |
12862 | ||
12863 | --- a/arch/sparc64/kernel/rtrap.S | |
12864 | +++ b/arch/sparc64/kernel/rtrap.S | |
12865 | @@ -65,55 +65,14 @@ __handle_user_windows: | |
12866 | ba,pt %xcc, __handle_user_windows_continue | |
12867 | ||
12868 | andn %l1, %l4, %l1 | |
12869 | -__handle_perfctrs: | |
12870 | - call update_perfctrs | |
12871 | - wrpr %g0, RTRAP_PSTATE, %pstate | |
12872 | - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate | |
12873 | - ldub [%g6 + TI_WSAVED], %o2 | |
12874 | - brz,pt %o2, 1f | |
12875 | - nop | |
12876 | - /* Redo userwin+sched+sig checks */ | |
12877 | - call fault_in_user_windows | |
12878 | - | |
12879 | - wrpr %g0, RTRAP_PSTATE, %pstate | |
12880 | - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate | |
12881 | - ldx [%g6 + TI_FLAGS], %l0 | |
12882 | - andcc %l0, _TIF_NEED_RESCHED, %g0 | |
12883 | - be,pt %xcc, 1f | |
12884 | - | |
12885 | - nop | |
12886 | - call schedule | |
12887 | - wrpr %g0, RTRAP_PSTATE, %pstate | |
12888 | - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate | |
12889 | - ldx [%g6 + TI_FLAGS], %l0 | |
12890 | -1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0 | |
12891 | - | |
12892 | - be,pt %xcc, __handle_perfctrs_continue | |
12893 | - sethi %hi(TSTATE_PEF), %o0 | |
12894 | - mov %l5, %o1 | |
12895 | - add %sp, PTREGS_OFF, %o0 | |
12896 | - mov %l0, %o2 | |
12897 | - call do_notify_resume | |
12898 | - | |
12899 | - wrpr %g0, RTRAP_PSTATE, %pstate | |
12900 | - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate | |
12901 | - /* Signal delivery can modify pt_regs tstate, so we must | |
12902 | - * reload it. | |
12903 | - */ | |
12904 | - ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 | |
12905 | - sethi %hi(0xf << 20), %l4 | |
12906 | - and %l1, %l4, %l4 | |
12907 | - andn %l1, %l4, %l1 | |
12908 | - ba,pt %xcc, __handle_perfctrs_continue | |
12909 | - | |
12910 | - sethi %hi(TSTATE_PEF), %o0 | |
12911 | __handle_userfpu: | |
12912 | rd %fprs, %l5 | |
12913 | andcc %l5, FPRS_FEF, %g0 | |
12914 | sethi %hi(TSTATE_PEF), %o0 | |
12915 | be,a,pn %icc, __handle_userfpu_continue | |
12916 | andn %l1, %o0, %l1 | |
12917 | - ba,a,pt %xcc, __handle_userfpu_continue | |
12918 | + ba,pt %xcc, __handle_userfpu_continue | |
12919 | + nop | |
12920 | ||
12921 | __handle_signal: | |
12922 | mov %l5, %o1 | |
12923 | @@ -202,12 +161,8 @@ __handle_signal_continue: | |
12924 | brnz,pn %o2, __handle_user_windows | |
12925 | nop | |
12926 | __handle_user_windows_continue: | |
12927 | - ldx [%g6 + TI_FLAGS], %l5 | |
12928 | - andcc %l5, _TIF_PERFCTR, %g0 | |
12929 | sethi %hi(TSTATE_PEF), %o0 | |
12930 | - bne,pn %xcc, __handle_perfctrs | |
12931 | -__handle_perfctrs_continue: | |
12932 | - andcc %l1, %o0, %g0 | |
12933 | + andcc %l1, %o0, %g0 | |
12934 | ||
12935 | /* This fpdepth clear is necessary for non-syscall rtraps only */ | |
12936 | user_nowork: | |
12937 | --- a/arch/sparc64/kernel/setup.c | |
12938 | +++ b/arch/sparc64/kernel/setup.c | |
12939 | @@ -352,6 +352,7 @@ static int show_cpuinfo(struct seq_file | |
12940 | seq_printf(m, | |
12941 | "cpu\t\t: %s\n" | |
12942 | "fpu\t\t: %s\n" | |
12943 | + "pmu\t\t: %s\n" | |
12944 | "prom\t\t: %s\n" | |
12945 | "type\t\t: %s\n" | |
12946 | "ncpus probed\t: %d\n" | |
12947 | @@ -364,6 +365,7 @@ static int show_cpuinfo(struct seq_file | |
12948 | , | |
12949 | sparc_cpu_type, | |
12950 | sparc_fpu_type, | |
12951 | + sparc_pmu_type, | |
12952 | prom_version, | |
12953 | ((tlb_type == hypervisor) ? | |
12954 | "sun4v" : | |
12955 | --- a/arch/sparc64/kernel/signal.c | |
12956 | +++ b/arch/sparc64/kernel/signal.c | |
12957 | @@ -23,6 +23,7 @@ | |
12958 | #include <linux/tty.h> | |
12959 | #include <linux/binfmts.h> | |
12960 | #include <linux/bitops.h> | |
12961 | +#include <linux/perfmon_kern.h> | |
12962 | ||
12963 | #include <asm/uaccess.h> | |
12964 | #include <asm/ptrace.h> | |
12965 | @@ -608,6 +609,9 @@ static void do_signal(struct pt_regs *re | |
12966 | ||
12967 | void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags) | |
12968 | { | |
12969 | + if (thread_info_flags & _TIF_PERFMON_WORK) | |
12970 | + pfm_handle_work(regs); | |
12971 | + | |
12972 | if (thread_info_flags & _TIF_SIGPENDING) | |
12973 | do_signal(regs, orig_i0); | |
12974 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | |
12975 | --- a/arch/sparc64/kernel/sys_sparc.c | |
12976 | +++ b/arch/sparc64/kernel/sys_sparc.c | |
12977 | @@ -26,7 +26,6 @@ | |
12978 | ||
12979 | #include <asm/uaccess.h> | |
12980 | #include <asm/utrap.h> | |
12981 | -#include <asm/perfctr.h> | |
12982 | #include <asm/unistd.h> | |
12983 | ||
12984 | #include "entry.h" | |
12985 | @@ -788,107 +787,11 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, | |
12986 | return ret; | |
12987 | } | |
12988 | ||
12989 | -/* Invoked by rtrap code to update performance counters in | |
12990 | - * user space. | |
12991 | - */ | |
12992 | -asmlinkage void update_perfctrs(void) | |
12993 | -{ | |
12994 | - unsigned long pic, tmp; | |
12995 | - | |
12996 | - read_pic(pic); | |
12997 | - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); | |
12998 | - __put_user(tmp, current_thread_info()->user_cntd0); | |
12999 | - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); | |
13000 | - __put_user(tmp, current_thread_info()->user_cntd1); | |
13001 | - reset_pic(); | |
13002 | -} | |
13003 | - | |
13004 | SYSCALL_DEFINE4(perfctr, int, opcode, unsigned long, arg0, | |
13005 | unsigned long, arg1, unsigned long, arg2) | |
13006 | { | |
13007 | - int err = 0; | |
13008 | - | |
13009 | - switch(opcode) { | |
13010 | - case PERFCTR_ON: | |
13011 | - current_thread_info()->pcr_reg = arg2; | |
13012 | - current_thread_info()->user_cntd0 = (u64 __user *) arg0; | |
13013 | - current_thread_info()->user_cntd1 = (u64 __user *) arg1; | |
13014 | - current_thread_info()->kernel_cntd0 = | |
13015 | - current_thread_info()->kernel_cntd1 = 0; | |
13016 | - write_pcr(arg2); | |
13017 | - reset_pic(); | |
13018 | - set_thread_flag(TIF_PERFCTR); | |
13019 | - break; | |
13020 | - | |
13021 | - case PERFCTR_OFF: | |
13022 | - err = -EINVAL; | |
13023 | - if (test_thread_flag(TIF_PERFCTR)) { | |
13024 | - current_thread_info()->user_cntd0 = | |
13025 | - current_thread_info()->user_cntd1 = NULL; | |
13026 | - current_thread_info()->pcr_reg = 0; | |
13027 | - write_pcr(0); | |
13028 | - clear_thread_flag(TIF_PERFCTR); | |
13029 | - err = 0; | |
13030 | - } | |
13031 | - break; | |
13032 | - | |
13033 | - case PERFCTR_READ: { | |
13034 | - unsigned long pic, tmp; | |
13035 | - | |
13036 | - if (!test_thread_flag(TIF_PERFCTR)) { | |
13037 | - err = -EINVAL; | |
13038 | - break; | |
13039 | - } | |
13040 | - read_pic(pic); | |
13041 | - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); | |
13042 | - err |= __put_user(tmp, current_thread_info()->user_cntd0); | |
13043 | - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); | |
13044 | - err |= __put_user(tmp, current_thread_info()->user_cntd1); | |
13045 | - reset_pic(); | |
13046 | - break; | |
13047 | - } | |
13048 | - | |
13049 | - case PERFCTR_CLRPIC: | |
13050 | - if (!test_thread_flag(TIF_PERFCTR)) { | |
13051 | - err = -EINVAL; | |
13052 | - break; | |
13053 | - } | |
13054 | - current_thread_info()->kernel_cntd0 = | |
13055 | - current_thread_info()->kernel_cntd1 = 0; | |
13056 | - reset_pic(); | |
13057 | - break; | |
13058 | - | |
13059 | - case PERFCTR_SETPCR: { | |
13060 | - u64 __user *user_pcr = (u64 __user *)arg0; | |
13061 | - | |
13062 | - if (!test_thread_flag(TIF_PERFCTR)) { | |
13063 | - err = -EINVAL; | |
13064 | - break; | |
13065 | - } | |
13066 | - err |= __get_user(current_thread_info()->pcr_reg, user_pcr); | |
13067 | - write_pcr(current_thread_info()->pcr_reg); | |
13068 | - current_thread_info()->kernel_cntd0 = | |
13069 | - current_thread_info()->kernel_cntd1 = 0; | |
13070 | - reset_pic(); | |
13071 | - break; | |
13072 | - } | |
13073 | - | |
13074 | - case PERFCTR_GETPCR: { | |
13075 | - u64 __user *user_pcr = (u64 __user *)arg0; | |
13076 | - | |
13077 | - if (!test_thread_flag(TIF_PERFCTR)) { | |
13078 | - err = -EINVAL; | |
13079 | - break; | |
13080 | - } | |
13081 | - err |= __put_user(current_thread_info()->pcr_reg, user_pcr); | |
13082 | - break; | |
13083 | - } | |
13084 | - | |
13085 | - default: | |
13086 | - err = -EINVAL; | |
13087 | - break; | |
13088 | - }; | |
13089 | - return err; | |
13090 | + /* Superceded by perfmon2 */ | |
13091 | + return -ENOSYS; | |
13092 | } | |
13093 | ||
13094 | /* | |
13095 | --- a/arch/sparc64/kernel/syscalls.S | |
13096 | +++ b/arch/sparc64/kernel/syscalls.S | |
13097 | @@ -117,26 +117,9 @@ ret_from_syscall: | |
13098 | stb %g0, [%g6 + TI_NEW_CHILD] | |
13099 | ldx [%g6 + TI_FLAGS], %l0 | |
13100 | call schedule_tail | |
13101 | - mov %g7, %o0 | |
13102 | - andcc %l0, _TIF_PERFCTR, %g0 | |
13103 | - be,pt %icc, 1f | |
13104 | - nop | |
13105 | - ldx [%g6 + TI_PCR], %o7 | |
13106 | - wr %g0, %o7, %pcr | |
13107 | - | |
13108 | - /* Blackbird errata workaround. See commentary in | |
13109 | - * smp.c:smp_percpu_timer_interrupt() for more | |
13110 | - * information. | |
13111 | - */ | |
13112 | - ba,pt %xcc, 99f | |
13113 | - nop | |
13114 | - | |
13115 | - .align 64 | |
13116 | -99: wr %g0, %g0, %pic | |
13117 | - rd %pic, %g0 | |
13118 | - | |
13119 | -1: ba,pt %xcc, ret_sys_call | |
13120 | - ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 | |
13121 | + mov %g7, %o0 | |
13122 | + ba,pt %xcc, ret_sys_call | |
13123 | + ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 | |
13124 | ||
13125 | .globl sparc_exit | |
13126 | .type sparc_exit,#function | |
13127 | --- a/arch/sparc64/kernel/systbls.S | |
13128 | +++ b/arch/sparc64/kernel/systbls.S | |
13129 | @@ -82,7 +82,9 @@ sys_call_table32: | |
13130 | .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait | |
13131 | /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate | |
13132 | .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 | |
13133 | -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1 | |
13134 | +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs | |
13135 | + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop | |
13136 | +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context | |
13137 | ||
13138 | #endif /* CONFIG_COMPAT */ | |
13139 | ||
13140 | @@ -156,4 +158,6 @@ sys_call_table: | |
13141 | .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait | |
13142 | /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate | |
13143 | .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 | |
13144 | -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1 | |
13145 | +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs | |
13146 | + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop | |
13147 | +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context | |
13148 | --- a/arch/sparc64/kernel/traps.c | |
13149 | +++ b/arch/sparc64/kernel/traps.c | |
13150 | @@ -2485,85 +2485,89 @@ extern void tsb_config_offsets_are_bolix | |
13151 | /* Only invoked on boot processor. */ | |
13152 | void __init trap_init(void) | |
13153 | { | |
13154 | - /* Compile time sanity check. */ | |
13155 | - if (TI_TASK != offsetof(struct thread_info, task) || | |
13156 | - TI_FLAGS != offsetof(struct thread_info, flags) || | |
13157 | - TI_CPU != offsetof(struct thread_info, cpu) || | |
13158 | - TI_FPSAVED != offsetof(struct thread_info, fpsaved) || | |
13159 | - TI_KSP != offsetof(struct thread_info, ksp) || | |
13160 | - TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) || | |
13161 | - TI_KREGS != offsetof(struct thread_info, kregs) || | |
13162 | - TI_UTRAPS != offsetof(struct thread_info, utraps) || | |
13163 | - TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) || | |
13164 | - TI_REG_WINDOW != offsetof(struct thread_info, reg_window) || | |
13165 | - TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) || | |
13166 | - TI_GSR != offsetof(struct thread_info, gsr) || | |
13167 | - TI_XFSR != offsetof(struct thread_info, xfsr) || | |
13168 | - TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) || | |
13169 | - TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) || | |
13170 | - TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || | |
13171 | - TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || | |
13172 | - TI_PCR != offsetof(struct thread_info, pcr_reg) || | |
13173 | - TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || | |
13174 | - TI_NEW_CHILD != offsetof(struct thread_info, new_child) || | |
13175 | - TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || | |
13176 | - TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) || | |
13177 | - TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) || | |
13178 | - TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) || | |
13179 | - TI_FPREGS != offsetof(struct thread_info, fpregs) || | |
13180 | - (TI_FPREGS & (64 - 1))) | |
13181 | - thread_info_offsets_are_bolixed_dave(); | |
13182 | + BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task)); | |
13183 | + BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags)); | |
13184 | + BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu)); | |
13185 | + BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved)); | |
13186 | + BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp)); | |
13187 | + BUILD_BUG_ON(TI_FAULT_ADDR != | |
13188 | + offsetof(struct thread_info, fault_address)); | |
13189 | + BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs)); | |
13190 | + BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps)); | |
13191 | + BUILD_BUG_ON(TI_EXEC_DOMAIN != | |
13192 | + offsetof(struct thread_info, exec_domain)); | |
13193 | + BUILD_BUG_ON(TI_REG_WINDOW != | |
13194 | + offsetof(struct thread_info, reg_window)); | |
13195 | + BUILD_BUG_ON(TI_RWIN_SPTRS != | |
13196 | + offsetof(struct thread_info, rwbuf_stkptrs)); | |
13197 | + BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr)); | |
13198 | + BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr)); | |
13199 | + BUILD_BUG_ON(TI_PRE_COUNT != | |
13200 | + offsetof(struct thread_info, preempt_count)); | |
13201 | + BUILD_BUG_ON(TI_NEW_CHILD != | |
13202 | + offsetof(struct thread_info, new_child)); | |
13203 | + BUILD_BUG_ON(TI_SYS_NOERROR != | |
13204 | + offsetof(struct thread_info, syscall_noerror)); | |
13205 | + BUILD_BUG_ON(TI_RESTART_BLOCK != | |
13206 | + offsetof(struct thread_info, restart_block)); | |
13207 | + BUILD_BUG_ON(TI_KUNA_REGS != | |
13208 | + offsetof(struct thread_info, kern_una_regs)); | |
13209 | + BUILD_BUG_ON(TI_KUNA_INSN != | |
13210 | + offsetof(struct thread_info, kern_una_insn)); | |
13211 | + BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs)); | |
13212 | + BUILD_BUG_ON((TI_FPREGS & (64 - 1))); | |
13213 | ||
13214 | - if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || | |
13215 | - (TRAP_PER_CPU_PGD_PADDR != | |
13216 | - offsetof(struct trap_per_cpu, pgd_paddr)) || | |
13217 | - (TRAP_PER_CPU_CPU_MONDO_PA != | |
13218 | - offsetof(struct trap_per_cpu, cpu_mondo_pa)) || | |
13219 | - (TRAP_PER_CPU_DEV_MONDO_PA != | |
13220 | - offsetof(struct trap_per_cpu, dev_mondo_pa)) || | |
13221 | - (TRAP_PER_CPU_RESUM_MONDO_PA != | |
13222 | - offsetof(struct trap_per_cpu, resum_mondo_pa)) || | |
13223 | - (TRAP_PER_CPU_RESUM_KBUF_PA != | |
13224 | - offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || | |
13225 | - (TRAP_PER_CPU_NONRESUM_MONDO_PA != | |
13226 | - offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || | |
13227 | - (TRAP_PER_CPU_NONRESUM_KBUF_PA != | |
13228 | - offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || | |
13229 | - (TRAP_PER_CPU_FAULT_INFO != | |
13230 | - offsetof(struct trap_per_cpu, fault_info)) || | |
13231 | - (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != | |
13232 | - offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || | |
13233 | - (TRAP_PER_CPU_CPU_LIST_PA != | |
13234 | - offsetof(struct trap_per_cpu, cpu_list_pa)) || | |
13235 | - (TRAP_PER_CPU_TSB_HUGE != | |
13236 | - offsetof(struct trap_per_cpu, tsb_huge)) || | |
13237 | - (TRAP_PER_CPU_TSB_HUGE_TEMP != | |
13238 | - offsetof(struct trap_per_cpu, tsb_huge_temp)) || | |
13239 | - (TRAP_PER_CPU_IRQ_WORKLIST_PA != | |
13240 | - offsetof(struct trap_per_cpu, irq_worklist_pa)) || | |
13241 | - (TRAP_PER_CPU_CPU_MONDO_QMASK != | |
13242 | - offsetof(struct trap_per_cpu, cpu_mondo_qmask)) || | |
13243 | - (TRAP_PER_CPU_DEV_MONDO_QMASK != | |
13244 | - offsetof(struct trap_per_cpu, dev_mondo_qmask)) || | |
13245 | - (TRAP_PER_CPU_RESUM_QMASK != | |
13246 | - offsetof(struct trap_per_cpu, resum_qmask)) || | |
13247 | - (TRAP_PER_CPU_NONRESUM_QMASK != | |
13248 | - offsetof(struct trap_per_cpu, nonresum_qmask))) | |
13249 | - trap_per_cpu_offsets_are_bolixed_dave(); | |
13250 | + BUILD_BUG_ON(TRAP_PER_CPU_THREAD != | |
13251 | + offsetof(struct trap_per_cpu, thread)); | |
13252 | + BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR != | |
13253 | + offsetof(struct trap_per_cpu, pgd_paddr)); | |
13254 | + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA != | |
13255 | + offsetof(struct trap_per_cpu, cpu_mondo_pa)); | |
13256 | + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA != | |
13257 | + offsetof(struct trap_per_cpu, dev_mondo_pa)); | |
13258 | + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA != | |
13259 | + offsetof(struct trap_per_cpu, resum_mondo_pa)); | |
13260 | + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA != | |
13261 | + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)); | |
13262 | + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA != | |
13263 | + offsetof(struct trap_per_cpu, nonresum_mondo_pa)); | |
13264 | + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA != | |
13265 | + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)); | |
13266 | + BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO != | |
13267 | + offsetof(struct trap_per_cpu, fault_info)); | |
13268 | + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != | |
13269 | + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)); | |
13270 | + BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA != | |
13271 | + offsetof(struct trap_per_cpu, cpu_list_pa)); | |
13272 | + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE != | |
13273 | + offsetof(struct trap_per_cpu, tsb_huge)); | |
13274 | + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP != | |
13275 | + offsetof(struct trap_per_cpu, tsb_huge_temp)); | |
13276 | +#if 0 | |
13277 | + BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST != | |
13278 | + offsetof(struct trap_per_cpu, irq_worklist)); | |
13279 | +#endif | |
13280 | + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK != | |
13281 | + offsetof(struct trap_per_cpu, cpu_mondo_qmask)); | |
13282 | + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK != | |
13283 | + offsetof(struct trap_per_cpu, dev_mondo_qmask)); | |
13284 | + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK != | |
13285 | + offsetof(struct trap_per_cpu, resum_qmask)); | |
13286 | + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK != | |
13287 | + offsetof(struct trap_per_cpu, nonresum_qmask)); | |
13288 | ||
13289 | - if ((TSB_CONFIG_TSB != | |
13290 | - offsetof(struct tsb_config, tsb)) || | |
13291 | - (TSB_CONFIG_RSS_LIMIT != | |
13292 | - offsetof(struct tsb_config, tsb_rss_limit)) || | |
13293 | - (TSB_CONFIG_NENTRIES != | |
13294 | - offsetof(struct tsb_config, tsb_nentries)) || | |
13295 | - (TSB_CONFIG_REG_VAL != | |
13296 | - offsetof(struct tsb_config, tsb_reg_val)) || | |
13297 | - (TSB_CONFIG_MAP_VADDR != | |
13298 | - offsetof(struct tsb_config, tsb_map_vaddr)) || | |
13299 | - (TSB_CONFIG_MAP_PTE != | |
13300 | - offsetof(struct tsb_config, tsb_map_pte))) | |
13301 | - tsb_config_offsets_are_bolixed_dave(); | |
13302 | + BUILD_BUG_ON(TSB_CONFIG_TSB != | |
13303 | + offsetof(struct tsb_config, tsb)); | |
13304 | + BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT != | |
13305 | + offsetof(struct tsb_config, tsb_rss_limit)); | |
13306 | + BUILD_BUG_ON(TSB_CONFIG_NENTRIES != | |
13307 | + offsetof(struct tsb_config, tsb_nentries)); | |
13308 | + BUILD_BUG_ON(TSB_CONFIG_REG_VAL != | |
13309 | + offsetof(struct tsb_config, tsb_reg_val)); | |
13310 | + BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR != | |
13311 | + offsetof(struct tsb_config, tsb_map_vaddr)); | |
13312 | + BUILD_BUG_ON(TSB_CONFIG_MAP_PTE != | |
13313 | + offsetof(struct tsb_config, tsb_map_pte)); | |
13314 | ||
13315 | /* Attach to the address space of init_task. On SMP we | |
13316 | * do this in smp.c:smp_callin for other cpus. | |
13317 | --- a/arch/sparc64/kernel/ttable.S | |
13318 | +++ b/arch/sparc64/kernel/ttable.S | |
13319 | @@ -72,7 +72,7 @@ tl0_irq8: BTRAP(0x48) | |
13320 | tl0_irq9: BTRAP(0x49) | |
13321 | tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) | |
13322 | tl0_irq14: TRAP_IRQ(timer_interrupt, 14) | |
13323 | -tl0_irq15: TRAP_IRQ(handler_irq, 15) | |
13324 | +tl0_irq15: TRAP_IRQ(perfctr_irq, 15) | |
13325 | tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55) | |
13326 | tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b) | |
13327 | tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f) | |
13328 | --- /dev/null | |
13329 | +++ b/arch/sparc64/perfmon/Kconfig | |
13330 | @@ -0,0 +1,26 @@ | |
13331 | +menu "Hardware Performance Monitoring support" | |
13332 | +config PERFMON | |
13333 | + bool "Perfmon2 performance monitoring interface" | |
13334 | + default n | |
13335 | + help | |
13336 | + Enables the perfmon2 interface to access the hardware | |
13337 | + performance counters. See <http://perfmon2.sf.net/> for | |
13338 | + more details. | |
13339 | + | |
13340 | +config PERFMON_DEBUG | |
13341 | + bool "Perfmon debugging" | |
13342 | + depends on PERFMON | |
13343 | + default n | |
13344 | + help | |
13345 | + Enables perfmon debugging support | |
13346 | + | |
13347 | +config PERFMON_DEBUG_FS | |
13348 | + bool "Enable perfmon statistics reporting via debugfs" | |
13349 | + default y | |
13350 | + depends on PERFMON && DEBUG_FS | |
13351 | + help | |
13352 | + Enable collection and reporting of perfmon timing statistics under | |
13353 | + debugfs. This is used for debugging and performance analysis of the | |
13354 | + subsystem. The debugfs filesystem must be mounted. | |
13355 | + | |
13356 | +endmenu | |
13357 | --- /dev/null | |
13358 | +++ b/arch/sparc64/perfmon/Makefile | |
13359 | @@ -0,0 +1 @@ | |
13360 | +obj-$(CONFIG_PERFMON) += perfmon.o | |
13361 | --- /dev/null | |
13362 | +++ b/arch/sparc64/perfmon/perfmon.c | |
13363 | @@ -0,0 +1,422 @@ | |
13364 | +/* perfmon.c: sparc64 perfmon support | |
13365 | + * | |
13366 | + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) | |
13367 | + */ | |
13368 | + | |
13369 | +#include <linux/kernel.h> | |
13370 | +#include <linux/module.h> | |
13371 | +#include <linux/irq.h> | |
13372 | +#include <linux/perfmon_kern.h> | |
13373 | + | |
13374 | +#include <asm/system.h> | |
13375 | +#include <asm/spitfire.h> | |
13376 | +#include <asm/hypervisor.h> | |
13377 | + | |
13378 | +struct pcr_ops { | |
13379 | + void (*write)(u64); | |
13380 | + u64 (*read)(void); | |
13381 | +}; | |
13382 | + | |
13383 | +static void direct_write_pcr(u64 val) | |
13384 | +{ | |
13385 | + write_pcr(val); | |
13386 | +} | |
13387 | + | |
13388 | +static u64 direct_read_pcr(void) | |
13389 | +{ | |
13390 | + u64 pcr; | |
13391 | + | |
13392 | + read_pcr(pcr); | |
13393 | + | |
13394 | + return pcr; | |
13395 | +} | |
13396 | + | |
13397 | +static struct pcr_ops direct_pcr_ops = { | |
13398 | + .write = direct_write_pcr, | |
13399 | + .read = direct_read_pcr, | |
13400 | +}; | |
13401 | + | |
13402 | +/* Using the hypervisor call is needed so that we can set the | |
13403 | + * hypervisor trace bit correctly, which is hyperprivileged. | |
13404 | + */ | |
13405 | +static void n2_write_pcr(u64 val) | |
13406 | +{ | |
13407 | + unsigned long ret; | |
13408 | + | |
13409 | + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); | |
13410 | + if (val != HV_EOK) | |
13411 | + write_pcr(val); | |
13412 | +} | |
13413 | + | |
13414 | +static u64 n2_read_pcr(void) | |
13415 | +{ | |
13416 | + u64 pcr; | |
13417 | + | |
13418 | + read_pcr(pcr); | |
13419 | + | |
13420 | + return pcr; | |
13421 | +} | |
13422 | + | |
13423 | +static struct pcr_ops n2_pcr_ops = { | |
13424 | + .write = n2_write_pcr, | |
13425 | + .read = n2_read_pcr, | |
13426 | +}; | |
13427 | + | |
13428 | +static struct pcr_ops *pcr_ops; | |
13429 | + | |
13430 | +void pfm_arch_write_pmc(struct pfm_context *ctx, | |
13431 | + unsigned int cnum, u64 value) | |
13432 | +{ | |
13433 | + /* | |
13434 | + * we only write to the actual register when monitoring is | |
13435 | + * active (pfm_start was issued) | |
13436 | + */ | |
13437 | + if (ctx && ctx->flags.started == 0) | |
13438 | + return; | |
13439 | + | |
13440 | + pcr_ops->write(value); | |
13441 | +} | |
13442 | + | |
13443 | +u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) | |
13444 | +{ | |
13445 | + return pcr_ops->read(); | |
13446 | +} | |
13447 | + | |
13448 | +/* | |
13449 | + * collect pending overflowed PMDs. Called from pfm_ctxsw() | |
13450 | + * and from PMU interrupt handler. Must fill in set->povfl_pmds[] | |
13451 | + * and set->npend_ovfls. Interrupts are masked | |
13452 | + */ | |
13453 | +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
13454 | +{ | |
13455 | + unsigned int max = ctx->regs.max_intr_pmd; | |
13456 | + u64 wmask = 1ULL << pfm_pmu_conf->counter_width; | |
13457 | + u64 *intr_pmds = ctx->regs.intr_pmds; | |
13458 | + u64 *used_mask = set->used_pmds; | |
13459 | + u64 mask[PFM_PMD_BV]; | |
13460 | + unsigned int i; | |
13461 | + | |
13462 | + bitmap_and(cast_ulp(mask), | |
13463 | + cast_ulp(intr_pmds), | |
13464 | + cast_ulp(used_mask), | |
13465 | + max); | |
13466 | + | |
13467 | + /* | |
13468 | + * check all PMD that can generate interrupts | |
13469 | + * (that includes counters) | |
13470 | + */ | |
13471 | + for (i = 0; i < max; i++) { | |
13472 | + if (test_bit(i, mask)) { | |
13473 | + u64 new_val = pfm_arch_read_pmd(ctx, i); | |
13474 | + | |
13475 | + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n", | |
13476 | + i, (unsigned long long)new_val, | |
13477 | + (new_val&wmask) ? 1 : 0); | |
13478 | + | |
13479 | + if (new_val & wmask) { | |
13480 | + __set_bit(i, set->povfl_pmds); | |
13481 | + set->npend_ovfls++; | |
13482 | + } | |
13483 | + } | |
13484 | + } | |
13485 | +} | |
13486 | + | |
13487 | +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx, | |
13488 | + struct pfm_event_set *set) | |
13489 | +{ | |
13490 | + unsigned int i, max = ctx->regs.max_pmc; | |
13491 | + | |
13492 | + /* | |
13493 | + * clear enable bits, assume all pmcs are enable pmcs | |
13494 | + */ | |
13495 | + for (i = 0; i < max; i++) { | |
13496 | + if (test_bit(i, set->used_pmcs)) | |
13497 | + pfm_arch_write_pmc(ctx, i, 0); | |
13498 | + } | |
13499 | + | |
13500 | + if (set->npend_ovfls) | |
13501 | + return; | |
13502 | + | |
13503 | + __pfm_get_ovfl_pmds(ctx, set); | |
13504 | +} | |
13505 | + | |
13506 | +/* | |
13507 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
13508 | + * Context is locked. Interrupts are masked. Monitoring is active. | |
13509 | + * PMU access is guaranteed. PMC and PMD registers are live in PMU. | |
13510 | + * | |
13511 | + * for per-thread: | |
13512 | + * must stop monitoring for the task | |
13513 | + * | |
13514 | + * Return: | |
13515 | + * non-zero : did not save PMDs (as part of stopping the PMU) | |
13516 | + * 0 : saved PMDs (no need to save them in caller) | |
13517 | + */ | |
13518 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) | |
13519 | +{ | |
13520 | + /* | |
13521 | + * disable lazy restore of PMC registers. | |
13522 | + */ | |
13523 | + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; | |
13524 | + | |
13525 | + pfm_stop_active(task, ctx, ctx->active_set); | |
13526 | + | |
13527 | + return 1; | |
13528 | +} | |
13529 | + | |
13530 | +/* | |
13531 | + * Called from pfm_stop() and idle notifier | |
13532 | + * | |
13533 | + * Interrupts are masked. Context is locked. Set is the active set. | |
13534 | + * | |
13535 | + * For per-thread: | |
13536 | + * task is not necessarily current. If not current task, then | |
13537 | + * task is guaranteed stopped and off any cpu. Access to PMU | |
13538 | + * is not guaranteed. Interrupts are masked. Context is locked. | |
13539 | + * Set is the active set. | |
13540 | + * | |
13541 | + * For system-wide: | |
13542 | + * task is current | |
13543 | + * | |
13544 | + * must disable active monitoring. ctx cannot be NULL | |
13545 | + */ | |
13546 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) | |
13547 | +{ | |
13548 | + /* | |
13549 | + * no need to go through stop_save() | |
13550 | + * if we are already stopped | |
13551 | + */ | |
13552 | + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) | |
13553 | + return; | |
13554 | + | |
13555 | + /* | |
13556 | + * stop live registers and collect pending overflow | |
13557 | + */ | |
13558 | + if (task == current) | |
13559 | + pfm_stop_active(task, ctx, ctx->active_set); | |
13560 | +} | |
13561 | + | |
13562 | +/* | |
13563 | + * Enable active monitoring. Called from pfm_start() and | |
13564 | + * pfm_arch_unmask_monitoring(). | |
13565 | + * | |
13566 | + * Interrupts are masked. Context is locked. Set is the active set. | |
13567 | + * | |
13568 | + * For per-trhead: | |
13569 | + * Task is not necessarily current. If not current task, then task | |
13570 | + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed. | |
13571 | + * | |
13572 | + * For system-wide: | |
13573 | + * task is always current | |
13574 | + * | |
13575 | + * must enable active monitoring. | |
13576 | + */ | |
13577 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) | |
13578 | +{ | |
13579 | + struct pfm_event_set *set; | |
13580 | + unsigned int max_pmc = ctx->regs.max_pmc; | |
13581 | + unsigned int i; | |
13582 | + | |
13583 | + if (task != current) | |
13584 | + return; | |
13585 | + | |
13586 | + set = ctx->active_set; | |
13587 | + for (i = 0; i < max_pmc; i++) { | |
13588 | + if (test_bit(i, set->used_pmcs)) | |
13589 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
13590 | + } | |
13591 | +} | |
13592 | + | |
13593 | +/* | |
13594 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
13595 | + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() | |
13596 | + * context is locked. Interrupts are masked. set cannot be NULL. | |
13597 | + * Access to the PMU is guaranteed. | |
13598 | + * | |
13599 | + * function must restore all PMD registers from set. | |
13600 | + */ | |
13601 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
13602 | +{ | |
13603 | + unsigned int max_pmd = ctx->regs.max_pmd; | |
13604 | + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
13605 | + u64 *impl_pmds = ctx->regs.pmds; | |
13606 | + unsigned int i; | |
13607 | + | |
13608 | + /* | |
13609 | + * must restore all pmds to avoid leaking | |
13610 | + * information to user. | |
13611 | + */ | |
13612 | + for (i = 0; i < max_pmd; i++) { | |
13613 | + u64 val; | |
13614 | + | |
13615 | + if (test_bit(i, impl_pmds) == 0) | |
13616 | + continue; | |
13617 | + | |
13618 | + val = set->pmds[i].value; | |
13619 | + | |
13620 | + /* | |
13621 | + * set upper bits for counter to ensure | |
13622 | + * overflow will trigger | |
13623 | + */ | |
13624 | + val &= ovfl_mask; | |
13625 | + | |
13626 | + pfm_arch_write_pmd(ctx, i, val); | |
13627 | + } | |
13628 | +} | |
13629 | + | |
13630 | +/* | |
13631 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
13632 | + * pfm_context_load_sys(), pfm_ctxsw(). | |
13633 | + * Context is locked. Interrupts are masked. set cannot be NULL. | |
13634 | + * Access to the PMU is guaranteed. | |
13635 | + * | |
13636 | + * function must restore all PMC registers from set, if needed. | |
13637 | + */ | |
13638 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
13639 | +{ | |
13640 | + unsigned int max_pmc = ctx->regs.max_pmc; | |
13641 | + u64 *impl_pmcs = ctx->regs.pmcs; | |
13642 | + unsigned int i; | |
13643 | + | |
13644 | + /* If we're masked or stopped we don't need to bother restoring | |
13645 | + * the PMCs now. | |
13646 | + */ | |
13647 | + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) | |
13648 | + return; | |
13649 | + | |
13650 | + /* | |
13651 | + * restore all pmcs | |
13652 | + */ | |
13653 | + for (i = 0; i < max_pmc; i++) | |
13654 | + if (test_bit(i, impl_pmcs)) | |
13655 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
13656 | +} | |
13657 | + | |
13658 | +char *pfm_arch_get_pmu_module_name(void) | |
13659 | +{ | |
13660 | + return NULL; | |
13661 | +} | |
13662 | + | |
13663 | +void perfmon_interrupt(struct pt_regs *regs) | |
13664 | +{ | |
13665 | + pfm_interrupt_handler(instruction_pointer(regs), regs); | |
13666 | +} | |
13667 | + | |
13668 | +static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = { | |
13669 | + PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0), | |
13670 | +}; | |
13671 | + | |
13672 | +static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = { | |
13673 | + PMD_D(PFM_REG_C, "PIC0", 0), | |
13674 | + PMD_D(PFM_REG_C, "PIC1", 0), | |
13675 | +}; | |
13676 | + | |
13677 | +static int pfm_sparc64_probe(void) | |
13678 | +{ | |
13679 | + return 0; | |
13680 | +} | |
13681 | + | |
13682 | +static struct pfm_pmu_config pmu_sparc64_pmu_conf = { | |
13683 | + .counter_width = 31, | |
13684 | + .pmd_desc = pfm_sparc64_pmd_desc, | |
13685 | + .num_pmd_entries = 2, | |
13686 | + .pmc_desc = pfm_sparc64_pmc_desc, | |
13687 | + .num_pmc_entries = 1, | |
13688 | + .probe_pmu = pfm_sparc64_probe, | |
13689 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
13690 | + .owner = THIS_MODULE, | |
13691 | +}; | |
13692 | + | |
13693 | +static unsigned long perf_hsvc_group; | |
13694 | +static unsigned long perf_hsvc_major; | |
13695 | +static unsigned long perf_hsvc_minor; | |
13696 | + | |
13697 | +static int __init register_perf_hsvc(void) | |
13698 | +{ | |
13699 | + if (tlb_type == hypervisor) { | |
13700 | + switch (sun4v_chip_type) { | |
13701 | + case SUN4V_CHIP_NIAGARA1: | |
13702 | + perf_hsvc_group = HV_GRP_N2_CPU; | |
13703 | + break; | |
13704 | + | |
13705 | + case SUN4V_CHIP_NIAGARA2: | |
13706 | + perf_hsvc_group = HV_GRP_N2_CPU; | |
13707 | + break; | |
13708 | + | |
13709 | + default: | |
13710 | + return -ENODEV; | |
13711 | + } | |
13712 | + | |
13713 | + | |
13714 | + perf_hsvc_major = 1; | |
13715 | + perf_hsvc_minor = 0; | |
13716 | + if (sun4v_hvapi_register(perf_hsvc_group, | |
13717 | + perf_hsvc_major, | |
13718 | + &perf_hsvc_minor)) { | |
13719 | + printk("perfmon: Could not register N2 hvapi.\n"); | |
13720 | + return -ENODEV; | |
13721 | + } | |
13722 | + } | |
13723 | + return 0; | |
13724 | +} | |
13725 | + | |
13726 | +static void unregister_perf_hsvc(void) | |
13727 | +{ | |
13728 | + if (tlb_type != hypervisor) | |
13729 | + return; | |
13730 | + sun4v_hvapi_unregister(perf_hsvc_group); | |
13731 | +} | |
13732 | + | |
13733 | +static int __init pfm_sparc64_pmu_init(void) | |
13734 | +{ | |
13735 | + u64 mask; | |
13736 | + int err; | |
13737 | + | |
13738 | + err = register_perf_hsvc(); | |
13739 | + if (err) | |
13740 | + return err; | |
13741 | + | |
13742 | + if (tlb_type == hypervisor && | |
13743 | + sun4v_chip_type == SUN4V_CHIP_NIAGARA2) | |
13744 | + pcr_ops = &n2_pcr_ops; | |
13745 | + else | |
13746 | + pcr_ops = &direct_pcr_ops; | |
13747 | + | |
13748 | + if (!strcmp(sparc_pmu_type, "ultra12")) | |
13749 | + mask = (0xf << 11) | (0xf << 4) | 0x7; | |
13750 | + else if (!strcmp(sparc_pmu_type, "ultra3") || | |
13751 | + !strcmp(sparc_pmu_type, "ultra3i") || | |
13752 | + !strcmp(sparc_pmu_type, "ultra3+") || | |
13753 | + !strcmp(sparc_pmu_type, "ultra4+")) | |
13754 | + mask = (0x3f << 11) | (0x3f << 4) | 0x7; | |
13755 | + else if (!strcmp(sparc_pmu_type, "niagara2")) | |
13756 | + mask = ((1UL << 63) | (1UL << 62) | | |
13757 | + (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) | | |
13758 | + (1UL << 18) | (0xfUL << 14) | (0xff << 6) | | |
13759 | + (0x3UL << 4) | 0x7UL); | |
13760 | + else if (!strcmp(sparc_pmu_type, "niagara")) | |
13761 | + mask = ((1UL << 9) | (1UL << 8) | | |
13762 | + (0x7UL << 4) | 0x7UL); | |
13763 | + else { | |
13764 | + err = -ENODEV; | |
13765 | + goto out_err; | |
13766 | + } | |
13767 | + | |
13768 | + pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type; | |
13769 | + pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask; | |
13770 | + | |
13771 | + return pfm_pmu_register(&pmu_sparc64_pmu_conf); | |
13772 | + | |
13773 | +out_err: | |
13774 | + unregister_perf_hsvc(); | |
13775 | + return err; | |
13776 | +} | |
13777 | + | |
13778 | +static void __exit pfm_sparc64_pmu_exit(void) | |
13779 | +{ | |
13780 | + unregister_perf_hsvc(); | |
13781 | + return pfm_pmu_unregister(&pmu_sparc64_pmu_conf); | |
13782 | +} | |
13783 | + | |
13784 | +module_init(pfm_sparc64_pmu_init); | |
13785 | +module_exit(pfm_sparc64_pmu_exit); | |
13786 | --- a/arch/x86/Kconfig | |
13787 | +++ b/arch/x86/Kconfig | |
13788 | @@ -1448,6 +1448,8 @@ config COMPAT_VDSO | |
13789 | ||
13790 | If unsure, say Y. | |
13791 | ||
13792 | +source "arch/x86/perfmon/Kconfig" | |
13793 | + | |
13794 | endmenu | |
13795 | ||
13796 | config ARCH_ENABLE_MEMORY_HOTPLUG | |
13797 | --- a/arch/x86/Makefile | |
13798 | +++ b/arch/x86/Makefile | |
13799 | @@ -152,6 +152,8 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/ | |
13800 | core-y += arch/x86/kernel/ | |
13801 | core-y += arch/x86/mm/ | |
13802 | ||
13803 | +core-$(CONFIG_PERFMON) += arch/x86/perfmon/ | |
13804 | + | |
13805 | # Remaining sub architecture files | |
13806 | core-y += $(mcore-y) | |
13807 | ||
13808 | --- a/arch/x86/ia32/ia32entry.S | |
13809 | +++ b/arch/x86/ia32/ia32entry.S | |
13810 | @@ -834,4 +834,16 @@ ia32_sys_call_table: | |
13811 | .quad sys_dup3 /* 330 */ | |
13812 | .quad sys_pipe2 | |
13813 | .quad sys_inotify_init1 | |
13814 | + .quad sys_pfm_create_context | |
13815 | + .quad sys_pfm_write_pmcs | |
13816 | + .quad sys_pfm_write_pmds /* 335 */ | |
13817 | + .quad sys_pfm_read_pmds | |
13818 | + .quad sys_pfm_load_context | |
13819 | + .quad sys_pfm_start | |
13820 | + .quad sys_pfm_stop | |
13821 | + .quad sys_pfm_restart /* 340 */ | |
13822 | + .quad sys_pfm_create_evtsets | |
13823 | + .quad sys_pfm_getinfo_evtsets | |
13824 | + .quad sys_pfm_delete_evtsets | |
13825 | + .quad sys_pfm_unload_context | |
13826 | ia32_syscall_end: | |
13827 | --- a/arch/x86/kernel/apic_32.c | |
13828 | +++ b/arch/x86/kernel/apic_32.c | |
13829 | @@ -28,6 +28,7 @@ | |
13830 | #include <linux/acpi_pmtmr.h> | |
13831 | #include <linux/module.h> | |
13832 | #include <linux/dmi.h> | |
13833 | +#include <linux/perfmon_kern.h> | |
13834 | ||
13835 | #include <asm/atomic.h> | |
13836 | #include <asm/smp.h> | |
13837 | @@ -697,6 +698,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 ms | |
13838 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | |
13839 | return APIC_EILVT_LVTOFF_IBS; | |
13840 | } | |
13841 | +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); | |
13842 | ||
13843 | /* | |
13844 | * Local APIC start and shutdown | |
13845 | @@ -1397,6 +1399,9 @@ void __init apic_intr_init(void) | |
13846 | #ifdef CONFIG_X86_MCE_P4THERMAL | |
13847 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | |
13848 | #endif | |
13849 | +#ifdef CONFIG_PERFMON | |
13850 | + set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt); | |
13851 | +#endif | |
13852 | } | |
13853 | ||
13854 | /** | |
13855 | --- a/arch/x86/kernel/apic_64.c | |
13856 | +++ b/arch/x86/kernel/apic_64.c | |
13857 | @@ -299,6 +299,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 ms | |
13858 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | |
13859 | return APIC_EILVT_LVTOFF_IBS; | |
13860 | } | |
13861 | +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); | |
13862 | ||
13863 | /* | |
13864 | * Program the next event, relative to now | |
13865 | --- a/arch/x86/kernel/cpu/common.c | |
13866 | +++ b/arch/x86/kernel/cpu/common.c | |
13867 | @@ -5,6 +5,7 @@ | |
13868 | #include <linux/module.h> | |
13869 | #include <linux/percpu.h> | |
13870 | #include <linux/bootmem.h> | |
13871 | +#include <linux/perfmon_kern.h> | |
13872 | #include <asm/processor.h> | |
13873 | #include <asm/i387.h> | |
13874 | #include <asm/msr.h> | |
13875 | @@ -728,6 +729,8 @@ void __cpuinit cpu_init(void) | |
13876 | current_thread_info()->status = 0; | |
13877 | clear_used_math(); | |
13878 | mxcsr_feature_mask_init(); | |
13879 | + | |
13880 | + pfm_init_percpu(); | |
13881 | } | |
13882 | ||
13883 | #ifdef CONFIG_HOTPLUG_CPU | |
13884 | --- a/arch/x86/kernel/entry_32.S | |
13885 | +++ b/arch/x86/kernel/entry_32.S | |
13886 | @@ -513,7 +513,7 @@ ENDPROC(system_call) | |
13887 | ALIGN | |
13888 | RING0_PTREGS_FRAME # can't unwind into user space anyway | |
13889 | work_pending: | |
13890 | - testb $_TIF_NEED_RESCHED, %cl | |
13891 | + testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx | |
13892 | jz work_notifysig | |
13893 | work_resched: | |
13894 | call schedule | |
13895 | --- a/arch/x86/kernel/entry_64.S | |
13896 | +++ b/arch/x86/kernel/entry_64.S | |
13897 | @@ -890,7 +890,13 @@ END(error_interrupt) | |
13898 | ENTRY(spurious_interrupt) | |
13899 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | |
13900 | END(spurious_interrupt) | |
13901 | - | |
13902 | + | |
13903 | +#ifdef CONFIG_PERFMON | |
13904 | +ENTRY(pmu_interrupt) | |
13905 | + apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt | |
13906 | +END(pmu_interrupt) | |
13907 | +#endif | |
13908 | + | |
13909 | /* | |
13910 | * Exception entry points. | |
13911 | */ | |
13912 | --- a/arch/x86/kernel/irqinit_64.c | |
13913 | +++ b/arch/x86/kernel/irqinit_64.c | |
13914 | @@ -11,6 +11,7 @@ | |
13915 | #include <linux/kernel_stat.h> | |
13916 | #include <linux/sysdev.h> | |
13917 | #include <linux/bitops.h> | |
13918 | +#include <linux/perfmon_kern.h> | |
13919 | ||
13920 | #include <asm/acpi.h> | |
13921 | #include <asm/atomic.h> | |
13922 | @@ -217,6 +218,10 @@ void __init native_init_IRQ(void) | |
13923 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | |
13924 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | |
13925 | ||
13926 | +#ifdef CONFIG_PERFMON | |
13927 | + alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt); | |
13928 | +#endif | |
13929 | + | |
13930 | if (!acpi_ioapic) | |
13931 | setup_irq(2, &irq2); | |
13932 | } | |
13933 | --- a/arch/x86/kernel/process_32.c | |
13934 | +++ b/arch/x86/kernel/process_32.c | |
13935 | @@ -36,6 +36,7 @@ | |
13936 | #include <linux/personality.h> | |
13937 | #include <linux/tick.h> | |
13938 | #include <linux/percpu.h> | |
13939 | +#include <linux/perfmon_kern.h> | |
13940 | #include <linux/prctl.h> | |
13941 | ||
13942 | #include <asm/uaccess.h> | |
13943 | @@ -277,6 +278,7 @@ void exit_thread(void) | |
13944 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | |
13945 | put_cpu(); | |
13946 | } | |
13947 | + pfm_exit_thread(); | |
13948 | } | |
13949 | ||
13950 | void flush_thread(void) | |
13951 | @@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long cl | |
13952 | ||
13953 | savesegment(gs, p->thread.gs); | |
13954 | ||
13955 | + pfm_copy_thread(p); | |
13956 | + | |
13957 | tsk = current; | |
13958 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | |
13959 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | |
13960 | @@ -450,6 +454,9 @@ __switch_to_xtra(struct task_struct *pre | |
13961 | prev = &prev_p->thread; | |
13962 | next = &next_p->thread; | |
13963 | ||
13964 | + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW)) | |
13965 | + pfm_ctxsw_out(prev_p, next_p); | |
13966 | + | |
13967 | debugctl = prev->debugctlmsr; | |
13968 | if (next->ds_area_msr != prev->ds_area_msr) { | |
13969 | /* we clear debugctl to make sure DS | |
13970 | @@ -462,6 +469,9 @@ __switch_to_xtra(struct task_struct *pre | |
13971 | if (next->debugctlmsr != debugctl) | |
13972 | update_debugctlmsr(next->debugctlmsr); | |
13973 | ||
13974 | + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW)) | |
13975 | + pfm_ctxsw_in(prev_p, next_p); | |
13976 | + | |
13977 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | |
13978 | set_debugreg(next->debugreg0, 0); | |
13979 | set_debugreg(next->debugreg1, 1); | |
13980 | --- a/arch/x86/kernel/process_64.c | |
13981 | +++ b/arch/x86/kernel/process_64.c | |
13982 | @@ -36,6 +36,7 @@ | |
13983 | #include <linux/kprobes.h> | |
13984 | #include <linux/kdebug.h> | |
13985 | #include <linux/tick.h> | |
13986 | +#include <linux/perfmon_kern.h> | |
13987 | #include <linux/prctl.h> | |
13988 | ||
13989 | #include <asm/uaccess.h> | |
13990 | @@ -240,6 +241,7 @@ void exit_thread(void) | |
13991 | t->io_bitmap_max = 0; | |
13992 | put_cpu(); | |
13993 | } | |
13994 | + pfm_exit_thread(); | |
13995 | } | |
13996 | ||
13997 | void flush_thread(void) | |
13998 | @@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long cl | |
13999 | savesegment(es, p->thread.es); | |
14000 | savesegment(ds, p->thread.ds); | |
14001 | ||
14002 | + pfm_copy_thread(p); | |
14003 | + | |
14004 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | |
14005 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | |
14006 | if (!p->thread.io_bitmap_ptr) { | |
14007 | @@ -474,6 +478,9 @@ static inline void __switch_to_xtra(stru | |
14008 | prev = &prev_p->thread, | |
14009 | next = &next_p->thread; | |
14010 | ||
14011 | + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW)) | |
14012 | + pfm_ctxsw_out(prev_p, next_p); | |
14013 | + | |
14014 | debugctl = prev->debugctlmsr; | |
14015 | if (next->ds_area_msr != prev->ds_area_msr) { | |
14016 | /* we clear debugctl to make sure DS | |
14017 | @@ -486,6 +493,9 @@ static inline void __switch_to_xtra(stru | |
14018 | if (next->debugctlmsr != debugctl) | |
14019 | update_debugctlmsr(next->debugctlmsr); | |
14020 | ||
14021 | + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW)) | |
14022 | + pfm_ctxsw_in(prev_p, next_p); | |
14023 | + | |
14024 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | |
14025 | loaddebug(next, 0); | |
14026 | loaddebug(next, 1); | |
14027 | --- a/arch/x86/kernel/signal_32.c | |
14028 | +++ b/arch/x86/kernel/signal_32.c | |
14029 | @@ -19,6 +19,7 @@ | |
14030 | #include <linux/wait.h> | |
14031 | #include <linux/tracehook.h> | |
14032 | #include <linux/elf.h> | |
14033 | +#include <linux/perfmon_kern.h> | |
14034 | #include <linux/smp.h> | |
14035 | #include <linux/mm.h> | |
14036 | ||
14037 | @@ -664,6 +665,10 @@ static void do_signal(struct pt_regs *re | |
14038 | void | |
14039 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |
14040 | { | |
14041 | + /* process perfmon asynchronous work (e.g. block thread or reset) */ | |
14042 | + if (thread_info_flags & _TIF_PERFMON_WORK) | |
14043 | + pfm_handle_work(regs); | |
14044 | + | |
14045 | /* deal with pending signal delivery */ | |
14046 | if (thread_info_flags & _TIF_SIGPENDING) | |
14047 | do_signal(regs); | |
14048 | --- a/arch/x86/kernel/signal_64.c | |
14049 | +++ b/arch/x86/kernel/signal_64.c | |
14050 | @@ -20,6 +20,7 @@ | |
14051 | #include <linux/stddef.h> | |
14052 | #include <linux/personality.h> | |
14053 | #include <linux/compiler.h> | |
14054 | +#include <linux/perfmon_kern.h> | |
14055 | #include <asm/processor.h> | |
14056 | #include <asm/ucontext.h> | |
14057 | #include <asm/uaccess.h> | |
14058 | @@ -528,12 +529,17 @@ static void do_signal(struct pt_regs *re | |
14059 | void do_notify_resume(struct pt_regs *regs, void *unused, | |
14060 | __u32 thread_info_flags) | |
14061 | { | |
14062 | + | |
14063 | #ifdef CONFIG_X86_MCE | |
14064 | /* notify userspace of pending MCEs */ | |
14065 | if (thread_info_flags & _TIF_MCE_NOTIFY) | |
14066 | mce_notify_user(); | |
14067 | #endif /* CONFIG_X86_MCE */ | |
14068 | ||
14069 | + /* process perfmon asynchronous work (e.g. block thread or reset) */ | |
14070 | + if (thread_info_flags & _TIF_PERFMON_WORK) | |
14071 | + pfm_handle_work(regs); | |
14072 | + | |
14073 | /* deal with pending signal delivery */ | |
14074 | if (thread_info_flags & _TIF_SIGPENDING) | |
14075 | do_signal(regs); | |
14076 | --- a/arch/x86/kernel/smpboot.c | |
14077 | +++ b/arch/x86/kernel/smpboot.c | |
14078 | @@ -42,6 +42,7 @@ | |
14079 | #include <linux/init.h> | |
14080 | #include <linux/smp.h> | |
14081 | #include <linux/module.h> | |
14082 | +#include <linux/perfmon_kern.h> | |
14083 | #include <linux/sched.h> | |
14084 | #include <linux/percpu.h> | |
14085 | #include <linux/bootmem.h> | |
14086 | @@ -1377,6 +1378,7 @@ int __cpu_disable(void) | |
14087 | remove_cpu_from_maps(cpu); | |
14088 | unlock_vector_lock(); | |
14089 | fixup_irqs(cpu_online_map); | |
14090 | + pfm_cpu_disable(); | |
14091 | return 0; | |
14092 | } | |
14093 | ||
14094 | --- a/arch/x86/kernel/syscall_table_32.S | |
14095 | +++ b/arch/x86/kernel/syscall_table_32.S | |
14096 | @@ -332,3 +332,15 @@ ENTRY(sys_call_table) | |
14097 | .long sys_dup3 /* 330 */ | |
14098 | .long sys_pipe2 | |
14099 | .long sys_inotify_init1 | |
14100 | + .long sys_pfm_create_context | |
14101 | + .long sys_pfm_write_pmcs | |
14102 | + .long sys_pfm_write_pmds /* 335 */ | |
14103 | + .long sys_pfm_read_pmds | |
14104 | + .long sys_pfm_load_context | |
14105 | + .long sys_pfm_start | |
14106 | + .long sys_pfm_stop | |
14107 | + .long sys_pfm_restart /* 340 */ | |
14108 | + .long sys_pfm_create_evtsets | |
14109 | + .long sys_pfm_getinfo_evtsets | |
14110 | + .long sys_pfm_delete_evtsets | |
14111 | + .long sys_pfm_unload_context | |
14112 | --- a/arch/x86/oprofile/nmi_int.c | |
14113 | +++ b/arch/x86/oprofile/nmi_int.c | |
14114 | @@ -16,6 +16,7 @@ | |
14115 | #include <linux/moduleparam.h> | |
14116 | #include <linux/kdebug.h> | |
14117 | #include <linux/cpu.h> | |
14118 | +#include <linux/perfmon_kern.h> | |
14119 | #include <asm/nmi.h> | |
14120 | #include <asm/msr.h> | |
14121 | #include <asm/apic.h> | |
14122 | @@ -217,12 +218,18 @@ static int nmi_setup(void) | |
14123 | int err = 0; | |
14124 | int cpu; | |
14125 | ||
14126 | - if (!allocate_msrs()) | |
14127 | + if (pfm_session_allcpus_acquire()) | |
14128 | + return -EBUSY; | |
14129 | + | |
14130 | + if (!allocate_msrs()) { | |
14131 | + pfm_session_allcpus_release(); | |
14132 | return -ENOMEM; | |
14133 | + } | |
14134 | ||
14135 | err = register_die_notifier(&profile_exceptions_nb); | |
14136 | if (err) { | |
14137 | free_msrs(); | |
14138 | + pfm_session_allcpus_release(); | |
14139 | return err; | |
14140 | } | |
14141 | ||
14142 | @@ -304,6 +311,7 @@ static void nmi_shutdown(void) | |
14143 | model->shutdown(msrs); | |
14144 | free_msrs(); | |
14145 | put_cpu_var(cpu_msrs); | |
14146 | + pfm_session_allcpus_release(); | |
14147 | } | |
14148 | ||
14149 | static void nmi_cpu_start(void *dummy) | |
14150 | --- /dev/null | |
14151 | +++ b/arch/x86/perfmon/Kconfig | |
14152 | @@ -0,0 +1,89 @@ | |
14153 | +menu "Hardware Performance Monitoring support" | |
14154 | +config PERFMON | |
14155 | + bool "Perfmon2 performance monitoring interface" | |
14156 | + select X86_LOCAL_APIC | |
14157 | + default n | |
14158 | + help | |
14159 | + Enables the perfmon2 interface to access the hardware | |
14160 | + performance counters. See <http://perfmon2.sf.net/> for | |
14161 | + more details. | |
14162 | + | |
14163 | +config PERFMON_DEBUG | |
14164 | + bool "Perfmon debugging" | |
14165 | + default n | |
14166 | + depends on PERFMON | |
14167 | + help | |
14168 | + Enables perfmon debugging support | |
14169 | + | |
14170 | +config PERFMON_DEBUG_FS | |
14171 | + bool "Enable perfmon statistics reporting via debugfs" | |
14172 | + default y | |
14173 | + depends on PERFMON && DEBUG_FS | |
14174 | + help | |
14175 | + Enable collection and reporting of perfmon timing statistics under | |
14176 | + debugfs. This is used for debugging and performance analysis of the | |
14177 | + subsystem.The debugfs filesystem must be mounted. | |
14178 | + | |
14179 | +config X86_PERFMON_P6 | |
14180 | + tristate "Support for Intel P6/Pentium M processor hardware performance counters" | |
14181 | + depends on PERFMON && X86_32 | |
14182 | + default n | |
14183 | + help | |
14184 | + Enables support for Intel P6-style hardware performance counters. | |
14185 | + To be used for with Intel Pentium III, PentiumPro, Pentium M processors. | |
14186 | + | |
14187 | +config X86_PERFMON_P4 | |
14188 | + tristate "Support for Intel Pentium 4/Xeon hardware performance counters" | |
14189 | + depends on PERFMON | |
14190 | + default n | |
14191 | + help | |
14192 | + Enables support for Intel Pentium 4/Xeon (Netburst) hardware performance | |
14193 | + counters. | |
14194 | + | |
14195 | +config X86_PERFMON_PEBS_P4 | |
14196 | + tristate "Support for Intel Netburst Precise Event-Based Sampling (PEBS)" | |
14197 | + depends on PERFMON && X86_PERFMON_P4 | |
14198 | + default n | |
14199 | + help | |
14200 | + Enables support for Precise Event-Based Sampling (PEBS) on the Intel | |
14201 | + Netburst processors such as Pentium 4, Xeon which support it. | |
14202 | + | |
14203 | +config X86_PERFMON_CORE | |
14204 | + tristate "Support for Intel Core-based performance counters" | |
14205 | + depends on PERFMON | |
14206 | + default n | |
14207 | + help | |
14208 | + Enables support for Intel Core-based performance counters. Enable | |
14209 | + this option to support Intel Core 2 processors. | |
14210 | + | |
14211 | +config X86_PERFMON_PEBS_CORE | |
14212 | + tristate "Support for Intel Core Precise Event-Based Sampling (PEBS)" | |
14213 | + depends on PERFMON && X86_PERFMON_CORE | |
14214 | + default n | |
14215 | + help | |
14216 | + Enables support for Precise Event-Based Sampling (PEBS) on the Intel | |
14217 | + Core processors. | |
14218 | + | |
14219 | +config X86_PERFMON_INTEL_ATOM | |
14220 | + tristate "Support for Intel Atom processor" | |
14221 | + depends on PERFMON | |
14222 | + default n | |
14223 | + help | |
14224 | + Enables support for Intel Atom processors. | |
14225 | + | |
14226 | +config X86_PERFMON_INTEL_ARCH | |
14227 | + tristate "Support for Intel architectural perfmon v1/v2" | |
14228 | + depends on PERFMON | |
14229 | + default n | |
14230 | + help | |
14231 | + Enables support for Intel architectural performance counters. | |
14232 | + This feature was introduced with Intel Core Solo/Core Duo processors. | |
14233 | + | |
14234 | +config X86_PERFMON_AMD64 | |
14235 | + tristate "Support AMD Athlon64/Opteron64 hardware performance counters" | |
14236 | + depends on PERFMON | |
14237 | + default n | |
14238 | + help | |
14239 | + Enables support for Athlon64/Opterton64 hardware performance counters. | |
14240 | + Support for family 6, 15 and 16(10H) processors. | |
14241 | +endmenu | |
14242 | --- /dev/null | |
14243 | +++ b/arch/x86/perfmon/Makefile | |
14244 | @@ -0,0 +1,13 @@ | |
14245 | +# | |
14246 | +# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
14247 | +# Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
14248 | +# | |
14249 | +obj-$(CONFIG_PERFMON) += perfmon.o | |
14250 | +obj-$(CONFIG_X86_PERFMON_P6) += perfmon_p6.o | |
14251 | +obj-$(CONFIG_X86_PERFMON_P4) += perfmon_p4.o | |
14252 | +obj-$(CONFIG_X86_PERFMON_CORE) += perfmon_intel_core.o | |
14253 | +obj-$(CONFIG_X86_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o | |
14254 | +obj-$(CONFIG_X86_PERFMON_PEBS_P4) += perfmon_pebs_p4_smpl.o | |
14255 | +obj-$(CONFIG_X86_PERFMON_PEBS_CORE) += perfmon_pebs_core_smpl.o | |
14256 | +obj-$(CONFIG_X86_PERFMON_AMD64) += perfmon_amd64.o | |
14257 | +obj-$(CONFIG_X86_PERFMON_INTEL_ATOM) += perfmon_intel_atom.o | |
14258 | --- /dev/null | |
14259 | +++ b/arch/x86/perfmon/perfmon.c | |
14260 | @@ -0,0 +1,761 @@ | |
14261 | +/* | |
14262 | + * This file implements the X86 specific support for the perfmon2 interface | |
14263 | + * | |
14264 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
14265 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
14266 | + * | |
14267 | + * Copyright (c) 2007 Advanced Micro Devices, Inc. | |
14268 | + * Contributed by Robert Richter <robert.richter@amd.com> | |
14269 | + * | |
14270 | + * This program is free software; you can redistribute it and/or | |
14271 | + * modify it under the terms of version 2 of the GNU General Public | |
14272 | + * License as published by the Free Software Foundation. | |
14273 | + * | |
14274 | + * This program is distributed in the hope that it will be useful, | |
14275 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14276 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14277 | + * General Public License for more details. | |
14278 | + * | |
14279 | + * You should have received a copy of the GNU General Public License | |
14280 | + * along with this program; if not, write to the Free Software | |
14281 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
14282 | + * 02111-1307 USA | |
14283 | + */ | |
14284 | +#include <linux/interrupt.h> | |
14285 | +#include <linux/perfmon_kern.h> | |
14286 | +#include <linux/kprobes.h> | |
14287 | +#include <linux/kdebug.h> | |
14288 | +#include <linux/nmi.h> | |
14289 | + | |
14290 | +#include <asm/apic.h> | |
14291 | + | |
14292 | +DEFINE_PER_CPU(unsigned long, real_iip); | |
14293 | +DEFINE_PER_CPU(int, pfm_using_nmi); | |
14294 | +DEFINE_PER_CPU(unsigned long, saved_lvtpc); | |
14295 | + | |
14296 | +/** | |
14297 | + * pfm_arch_ctxswin_thread - thread context switch in | |
14298 | + * @task: task switched in | |
14299 | + * @ctx: context for the task | |
14300 | + * | |
14301 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
14302 | + * set cannot be NULL. Context is locked. Interrupts are masked. | |
14303 | + * | |
14304 | + * Caller has already restored all PMD and PMC registers, if | |
14305 | + * necessary (i.e., lazy restore scheme). | |
14306 | + * | |
14307 | + * On x86, the only common code just needs to unsecure RDPMC if necessary | |
14308 | + * | |
14309 | + * On model-specific features, e.g., PEBS, IBS, are taken care of in the | |
14310 | + * corresponding PMU description module | |
14311 | + */ | |
14312 | +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) | |
14313 | +{ | |
14314 | + struct pfm_arch_context *ctx_arch; | |
14315 | + | |
14316 | + ctx_arch = pfm_ctx_arch(ctx); | |
14317 | + | |
14318 | + /* | |
14319 | + * restore saved real iip | |
14320 | + */ | |
14321 | + if (ctx->active_set->npend_ovfls) | |
14322 | + __get_cpu_var(real_iip) = ctx_arch->saved_real_iip; | |
14323 | + | |
14324 | + /* | |
14325 | + * enable RDPMC on this CPU | |
14326 | + */ | |
14327 | + if (ctx_arch->flags.insecure) | |
14328 | + set_in_cr4(X86_CR4_PCE); | |
14329 | +} | |
14330 | + | |
14331 | +/** | |
14332 | + * pfm_arch_ctxswout_thread - context switch out thread | |
14333 | + * @task: task switched out | |
14334 | + * @ctx : context switched out | |
14335 | + * | |
14336 | + * Called from pfm_ctxsw(). Task is guaranteed to be current. | |
14337 | + * Context is locked. Interrupts are masked. Monitoring may be active. | |
14338 | + * PMU access is guaranteed. PMC and PMD registers are live in PMU. | |
14339 | + * | |
14340 | + * Return: | |
14341 | + * non-zero : did not save PMDs (as part of stopping the PMU) | |
14342 | + * 0 : saved PMDs (no need to save them in caller) | |
14343 | + */ | |
14344 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) | |
14345 | +{ | |
14346 | + struct pfm_arch_context *ctx_arch; | |
14347 | + struct pfm_arch_pmu_info *pmu_info; | |
14348 | + | |
14349 | + ctx_arch = pfm_ctx_arch(ctx); | |
14350 | + pmu_info = pfm_pmu_info(); | |
14351 | + | |
14352 | + /* | |
14353 | + * disable lazy restore of PMCS on ctxswin because | |
14354 | + * we modify some of them. | |
14355 | + */ | |
14356 | + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; | |
14357 | + | |
14358 | + if (ctx->active_set->npend_ovfls) | |
14359 | + ctx_arch->saved_real_iip = __get_cpu_var(real_iip); | |
14360 | + | |
14361 | + /* | |
14362 | + * disable RDPMC on this CPU | |
14363 | + */ | |
14364 | + if (ctx_arch->flags.insecure) | |
14365 | + clear_in_cr4(X86_CR4_PCE); | |
14366 | + | |
14367 | + if (ctx->state == PFM_CTX_MASKED) | |
14368 | + return 1; | |
14369 | + | |
14370 | + return pmu_info->stop_save(ctx, ctx->active_set); | |
14371 | +} | |
14372 | + | |
14373 | +/** | |
14374 | + * pfm_arch_stop - deactivate monitoring | |
14375 | + * @task: task to stop | |
14376 | + * @ctx: context to stop | |
14377 | + * | |
14378 | + * Called from pfm_stop() | |
14379 | + * Interrupts are masked. Context is locked. Set is the active set. | |
14380 | + * | |
14381 | + * For per-thread: | |
14382 | + * task is not necessarily current. If not current task, then | |
14383 | + * task is guaranteed stopped and off any cpu. Access to PMU | |
14384 | + * is not guaranteed. | |
14385 | + * | |
14386 | + * For system-wide: | |
14387 | + * task is current | |
14388 | + * | |
14389 | + * must disable active monitoring. ctx cannot be NULL | |
14390 | + */ | |
14391 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) | |
14392 | +{ | |
14393 | + struct pfm_arch_pmu_info *pmu_info; | |
14394 | + | |
14395 | + pmu_info = pfm_pmu_info(); | |
14396 | + | |
14397 | + /* | |
14398 | + * no need to go through stop_save() | |
14399 | + * if we are already stopped | |
14400 | + */ | |
14401 | + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) | |
14402 | + return; | |
14403 | + | |
14404 | + if (task != current) | |
14405 | + return; | |
14406 | + | |
14407 | + pmu_info->stop_save(ctx, ctx->active_set); | |
14408 | +} | |
14409 | + | |
14410 | + | |
14411 | +/** | |
14412 | + * pfm_arch_start - activate monitoring | |
14413 | + * @task: task to start | |
14414 | + * @ctx: context to stop | |
14415 | + * | |
14416 | + * Interrupts are masked. Context is locked. | |
14417 | + * | |
14418 | + * For per-thread: | |
14419 | + * Task is not necessarily current. If not current task, then task | |
14420 | + * is guaranteed stopped and off any cpu. No access to PMU is task | |
14421 | + * is not current. | |
14422 | + * | |
14423 | + * For system-wide: | |
14424 | + * task is always current | |
14425 | + */ | |
14426 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) | |
14427 | +{ | |
14428 | + struct pfm_event_set *set; | |
14429 | + | |
14430 | + set = ctx->active_set; | |
14431 | + | |
14432 | + if (task != current) | |
14433 | + return; | |
14434 | + | |
14435 | + /* | |
14436 | + * cannot restore PMC if no access to PMU. Will be done | |
14437 | + * when the thread is switched back in | |
14438 | + */ | |
14439 | + | |
14440 | + pfm_arch_restore_pmcs(ctx, set); | |
14441 | +} | |
14442 | + | |
14443 | +/** | |
14444 | + * pfm_arch_restore_pmds - reload PMD registers | |
14445 | + * @ctx: context to restore from | |
14446 | + * @set: current event set | |
14447 | + * | |
14448 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
14449 | + * pfm_context_load_sys(), pfm_ctxsw() | |
14450 | + * | |
14451 | + * Context is locked. Interrupts are masked. Set cannot be NULL. | |
14452 | + * Access to the PMU is guaranteed. | |
14453 | + */ | |
14454 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
14455 | +{ | |
14456 | + struct pfm_arch_pmu_info *pmu_info; | |
14457 | + u16 i, num; | |
14458 | + | |
14459 | + pmu_info = pfm_pmu_info(); | |
14460 | + | |
14461 | + num = set->nused_pmds; | |
14462 | + | |
14463 | + /* | |
14464 | + * model-specific override | |
14465 | + */ | |
14466 | + if (pmu_info->restore_pmds) { | |
14467 | + pmu_info->restore_pmds(ctx, set); | |
14468 | + return; | |
14469 | + } | |
14470 | + | |
14471 | + /* | |
14472 | + * we can restore only the PMD we use because: | |
14473 | + * | |
14474 | + * - can only read with pfm_read_pmds() the registers | |
14475 | + * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds | |
14476 | + * | |
14477 | + * - if cr4.pce=1, only counters are exposed to user. RDPMC | |
14478 | + * does not work with other types of PMU registers.Thus, no | |
14479 | + * address is ever exposed by counters | |
14480 | + * | |
14481 | + * - there is never a dependency between one pmd register and | |
14482 | + * another | |
14483 | + */ | |
14484 | + for (i = 0; num; i++) { | |
14485 | + if (likely(test_bit(i, cast_ulp(set->used_pmds)))) { | |
14486 | + pfm_write_pmd(ctx, i, set->pmds[i].value); | |
14487 | + num--; | |
14488 | + } | |
14489 | + } | |
14490 | +} | |
14491 | + | |
14492 | +/** | |
14493 | + * pfm_arch_restore_pmcs - reload PMC registers | |
14494 | + * @ctx: context to restore from | |
14495 | + * @set: current event set | |
14496 | + * | |
14497 | + * function called from pfm_switch_sets(), pfm_context_load_thread(), | |
14498 | + * pfm_context_load_sys(), pfm_ctxsw(). | |
14499 | + * | |
14500 | + * Context is locked. Interrupts are masked. set cannot be NULL. | |
14501 | + * Access to the PMU is guaranteed. | |
14502 | + * | |
14503 | + * function must restore all PMC registers from set | |
14504 | + */ | |
14505 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
14506 | +{ | |
14507 | + struct pfm_arch_pmu_info *pmu_info; | |
14508 | + u64 *mask; | |
14509 | + u16 i, num; | |
14510 | + | |
14511 | + pmu_info = pfm_pmu_info(); | |
14512 | + | |
14513 | + /* | |
14514 | + * we need to restore PMCs only when: | |
14515 | + * - context is not masked | |
14516 | + * - monitoring activated | |
14517 | + * | |
14518 | + * Masking monitoring after an overflow does not change the | |
14519 | + * value of flags.started | |
14520 | + */ | |
14521 | + if (ctx->state == PFM_CTX_MASKED || !ctx->flags.started) | |
14522 | + return; | |
14523 | + | |
14524 | + /* | |
14525 | + * model-specific override | |
14526 | + */ | |
14527 | + if (pmu_info->restore_pmcs) { | |
14528 | + pmu_info->restore_pmcs(ctx, set); | |
14529 | + return; | |
14530 | + } | |
14531 | + /* | |
14532 | + * restore all pmcs | |
14533 | + * | |
14534 | + * It is not possible to restore only the pmcs we used because | |
14535 | + * certain PMU models (e.g. Pentium 4) have dependencies. Thus | |
14536 | + * we do not want one application using stale PMC coming from | |
14537 | + * another one. | |
14538 | + * | |
14539 | + * On PMU models where there is no dependencies between pmc, then | |
14540 | + * it is possible to optimize by only restoring the registers that | |
14541 | + * are used, and this can be done with the models-specific override | |
14542 | + * for this function. | |
14543 | + * | |
14544 | + * The default code takes the safest approach, i.e., assume the worse | |
14545 | + */ | |
14546 | + mask = ctx->regs.pmcs; | |
14547 | + num = ctx->regs.num_pmcs; | |
14548 | + for (i = 0; num; i++) { | |
14549 | + if (test_bit(i, cast_ulp(mask))) { | |
14550 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
14551 | + num--; | |
14552 | + } | |
14553 | + } | |
14554 | +} | |
14555 | + | |
14556 | +/** | |
14557 | + * smp_pmu_interrupt - lowest level PMU interrupt handler for X86 | |
14558 | + * @regs: machine state | |
14559 | + * | |
14560 | + * The PMU interrupt is handled through an interrupt gate, therefore | |
14561 | + * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts. | |
14562 | + * | |
14563 | + * The perfmon interrupt handler MUST run with interrupts disabled due | |
14564 | + * to possible race with other, higher priority interrupts, such as timer | |
14565 | + * or IPI function calls. | |
14566 | + * | |
14567 | + * See description in IA-32 architecture manual, Vol 3 section 5.8.1 | |
14568 | + */ | |
14569 | +void smp_pmu_interrupt(struct pt_regs *regs) | |
14570 | +{ | |
14571 | + struct pfm_arch_pmu_info *pmu_info; | |
14572 | + struct pfm_context *ctx; | |
14573 | + unsigned long iip; | |
14574 | + int using_nmi; | |
14575 | + | |
14576 | + using_nmi = __get_cpu_var(pfm_using_nmi); | |
14577 | + | |
14578 | + ack_APIC_irq(); | |
14579 | + | |
14580 | + irq_enter(); | |
14581 | + | |
14582 | + /* | |
14583 | + * when using NMI, pfm_handle_nmi() gets called | |
14584 | + * first. It stops monitoring and record the | |
14585 | + * iip into real_iip, then it repost the interrupt | |
14586 | + * using the lower priority vector LOCAL_PERFMON_VECTOR | |
14587 | + * | |
14588 | + * On some processors, e.g., P4, it may be that some | |
14589 | + * state is already recorded from pfm_handle_nmi() | |
14590 | + * and it only needs to be copied back into the normal | |
14591 | + * fields so it can be used transparently by higher level | |
14592 | + * code. | |
14593 | + */ | |
14594 | + if (using_nmi) { | |
14595 | + ctx = __get_cpu_var(pmu_ctx); | |
14596 | + pmu_info = pfm_pmu_info(); | |
14597 | + iip = __get_cpu_var(real_iip); | |
14598 | + if (ctx && pmu_info->nmi_copy_state) | |
14599 | + pmu_info->nmi_copy_state(ctx); | |
14600 | + } else | |
14601 | + iip = instruction_pointer(regs); | |
14602 | + | |
14603 | + pfm_interrupt_handler(iip, regs); | |
14604 | + | |
14605 | + /* | |
14606 | + * On Intel P6, Pentium M, P4, Intel Core: | |
14607 | + * - it is necessary to clear the MASK field for the LVTPC | |
14608 | + * vector. Otherwise interrupts remain masked. See | |
14609 | + * section 8.5.1 | |
14610 | + * AMD X86-64: | |
14611 | + * - the documentation does not stipulate the behavior. | |
14612 | + * To be safe, we also rewrite the vector to clear the | |
14613 | + * mask field | |
14614 | + */ | |
14615 | + if (!using_nmi && current_cpu_data.x86_vendor == X86_VENDOR_INTEL) | |
14616 | + apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR); | |
14617 | + | |
14618 | + irq_exit(); | |
14619 | +} | |
14620 | + | |
14621 | +/** | |
14622 | + * pfm_handle_nmi - PMU NMI handler notifier callback | |
14623 | + * @nb ; notifier block | |
14624 | + * @val: type of die notifier | |
14625 | + * @data: die notifier-specific data | |
14626 | + * | |
14627 | + * called from notify_die() notifier from an trap handler path. We only | |
14628 | + * care about NMI related callbacks, and ignore everything else. | |
14629 | + * | |
14630 | + * Cannot grab any locks, include the perfmon context lock | |
14631 | + * | |
14632 | + * Must detect if NMI interrupt comes from perfmon, and if so it must | |
14633 | + * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt | |
14634 | + * handler needs to grab the context lock, thus is cannot be run directly | |
14635 | + * from the NMI interrupt call path. | |
14636 | + */ | |
14637 | +static int __kprobes pfm_handle_nmi(struct notifier_block *nb, | |
14638 | + unsigned long val, | |
14639 | + void *data) | |
14640 | +{ | |
14641 | + struct die_args *args = data; | |
14642 | + struct pfm_context *ctx; | |
14643 | + struct pfm_arch_pmu_info *pmu_info; | |
14644 | + | |
14645 | + /* | |
14646 | + * only NMI related calls | |
14647 | + */ | |
14648 | + if (val != DIE_NMI_IPI) | |
14649 | + return NOTIFY_DONE; | |
14650 | + | |
14651 | + /* | |
14652 | + * perfmon not using NMI | |
14653 | + */ | |
14654 | + if (!__get_cpu_var(pfm_using_nmi)) | |
14655 | + return NOTIFY_DONE; | |
14656 | + | |
14657 | + /* | |
14658 | + * No context | |
14659 | + */ | |
14660 | + ctx = __get_cpu_var(pmu_ctx); | |
14661 | + if (!ctx) { | |
14662 | + PFM_DBG_ovfl("no ctx"); | |
14663 | + return NOTIFY_DONE; | |
14664 | + } | |
14665 | + | |
14666 | + /* | |
14667 | + * Detect if we have overflows, i.e., NMI interrupt | |
14668 | + * caused by PMU | |
14669 | + */ | |
14670 | + pmu_info = pfm_pmu_conf->pmu_info; | |
14671 | + if (!pmu_info->has_ovfls(ctx)) { | |
14672 | + PFM_DBG_ovfl("no ovfl"); | |
14673 | + return NOTIFY_DONE; | |
14674 | + } | |
14675 | + | |
14676 | + /* | |
14677 | + * we stop the PMU to avoid further overflow before this | |
14678 | + * one is treated by lower priority interrupt handler | |
14679 | + */ | |
14680 | + pmu_info->quiesce(); | |
14681 | + | |
14682 | + /* | |
14683 | + * record actual instruction pointer | |
14684 | + */ | |
14685 | + __get_cpu_var(real_iip) = instruction_pointer(args->regs); | |
14686 | + | |
14687 | + /* | |
14688 | + * post lower priority interrupt (LOCAL_PERFMON_VECTOR) | |
14689 | + */ | |
14690 | + pfm_arch_resend_irq(ctx); | |
14691 | + | |
14692 | + pfm_stats_inc(ovfl_intr_nmi_count); | |
14693 | + | |
14694 | + /* | |
14695 | + * we need to rewrite the APIC vector on Intel | |
14696 | + */ | |
14697 | + if (current_cpu_data.x86_vendor == X86_VENDOR_INTEL) | |
14698 | + apic_write(APIC_LVTPC, APIC_DM_NMI); | |
14699 | + | |
14700 | + /* | |
14701 | + * the notification was for us | |
14702 | + */ | |
14703 | + return NOTIFY_STOP; | |
14704 | +} | |
14705 | + | |
14706 | +static struct notifier_block pfm_nmi_nb = { | |
14707 | + .notifier_call = pfm_handle_nmi | |
14708 | +}; | |
14709 | + | |
14710 | +/** | |
14711 | + * pfm_arch_get_pmu_module_name - get PMU description module name for autoload | |
14712 | + * | |
14713 | + * called from pfm_pmu_request_module | |
14714 | + */ | |
14715 | +char *pfm_arch_get_pmu_module_name(void) | |
14716 | +{ | |
14717 | + switch (current_cpu_data.x86) { | |
14718 | + case 6: | |
14719 | + switch (current_cpu_data.x86_model) { | |
14720 | + case 3: /* Pentium II */ | |
14721 | + case 7 ... 11: | |
14722 | + case 13: | |
14723 | + return "perfmon_p6"; | |
14724 | + case 15: /* Merom */ | |
14725 | + case 23: /* Penryn */ | |
14726 | + return "perfmon_intel_core"; | |
14727 | + case 28: /* Atom/Silverthorne */ | |
14728 | + return "perfmon_intel_atom"; | |
14729 | + case 29: /* Dunnington */ | |
14730 | + return "perfmon_intel_core"; | |
14731 | + default: | |
14732 | + goto try_arch; | |
14733 | + } | |
14734 | + case 15: | |
14735 | + case 16: | |
14736 | + /* All Opteron processors */ | |
14737 | + if (current_cpu_data.x86_vendor == X86_VENDOR_AMD) | |
14738 | + return "perfmon_amd64"; | |
14739 | + | |
14740 | + switch (current_cpu_data.x86_model) { | |
14741 | + case 0 ... 6: | |
14742 | + return "perfmon_p4"; | |
14743 | + } | |
14744 | + /* FALL THROUGH */ | |
14745 | + default: | |
14746 | +try_arch: | |
14747 | + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) | |
14748 | + return "perfmon_intel_arch"; | |
14749 | + return NULL; | |
14750 | + } | |
14751 | + return NULL; | |
14752 | +} | |
14753 | + | |
14754 | +/** | |
14755 | + * pfm_arch_resend_irq - post perfmon interrupt on regular vector | |
14756 | + * | |
14757 | + * called from pfm_ctxswin_thread() and pfm_handle_nmi() | |
14758 | + */ | |
14759 | +void pfm_arch_resend_irq(struct pfm_context *ctx) | |
14760 | +{ | |
14761 | + unsigned long val, dest; | |
14762 | + /* | |
14763 | + * we cannot use hw_resend_irq() because it goes to | |
14764 | + * the I/O APIC. We need to go to the Local APIC. | |
14765 | + * | |
14766 | + * The "int vec" is not the right solution either | |
14767 | + * because it triggers a software intr. We need | |
14768 | + * to regenerate the interrupt and have it pended | |
14769 | + * until we unmask interrupts. | |
14770 | + * | |
14771 | + * Instead we send ourself an IPI on the perfmon | |
14772 | + * vector. | |
14773 | + */ | |
14774 | + val = APIC_DEST_SELF|APIC_INT_ASSERT| | |
14775 | + APIC_DM_FIXED|LOCAL_PERFMON_VECTOR; | |
14776 | + | |
14777 | + dest = apic_read(APIC_ID); | |
14778 | + apic_write(APIC_ICR2, dest); | |
14779 | + apic_write(APIC_ICR, val); | |
14780 | +} | |
14781 | + | |
14782 | +/** | |
14783 | + * pfm_arch_pmu_acquire_percpu - setup APIC per CPU | |
14784 | + * @data: contains pmu flags | |
14785 | + */ | |
14786 | +static void pfm_arch_pmu_acquire_percpu(void *data) | |
14787 | +{ | |
14788 | + | |
14789 | + struct pfm_arch_pmu_info *pmu_info; | |
14790 | + unsigned int tmp, vec; | |
14791 | + unsigned long flags = (unsigned long)data; | |
14792 | + unsigned long lvtpc; | |
14793 | + | |
14794 | + pmu_info = pfm_pmu_conf->pmu_info; | |
14795 | + | |
14796 | + /* | |
14797 | + * we only reprogram the LVTPC vector if we have detected | |
14798 | + * no sharing, otherwise it means the APIC is already programmed | |
14799 | + * and we use whatever vector (likely NMI) is there | |
14800 | + */ | |
14801 | + if (!(flags & PFM_X86_FL_SHARING)) { | |
14802 | + if (flags & PFM_X86_FL_USE_NMI) | |
14803 | + vec = APIC_DM_NMI; | |
14804 | + else | |
14805 | + vec = LOCAL_PERFMON_VECTOR; | |
14806 | + | |
14807 | + tmp = apic_read(APIC_LVTERR); | |
14808 | + apic_write(APIC_LVTERR, tmp | APIC_LVT_MASKED); | |
14809 | + apic_write(APIC_LVTPC, vec); | |
14810 | + apic_write(APIC_LVTERR, tmp); | |
14811 | + } | |
14812 | + lvtpc = (unsigned long)apic_read(APIC_LVTPC); | |
14813 | + | |
14814 | + __get_cpu_var(pfm_using_nmi) = lvtpc == APIC_DM_NMI; | |
14815 | + | |
14816 | + PFM_DBG("LTVPC=0x%lx using_nmi=%d", lvtpc, __get_cpu_var(pfm_using_nmi)); | |
14817 | + | |
14818 | + /* | |
14819 | + * invoke model specific acquire routine. May be used for | |
14820 | + * model-specific initializations | |
14821 | + */ | |
14822 | + if (pmu_info->acquire_pmu_percpu) | |
14823 | + pmu_info->acquire_pmu_percpu(); | |
14824 | +} | |
14825 | + | |
14826 | +/** | |
14827 | + * pfm_arch_pmu_acquire - acquire PMU resource from system | |
14828 | + * @unavail_pmcs : bitmask to use to set unavailable pmcs | |
14829 | + * @unavail_pmds : bitmask to use to set unavailable pmds | |
14830 | + * | |
14831 | + * interrupts are not masked | |
14832 | + * | |
14833 | + * Grab PMU registers from lower level MSR allocator | |
14834 | + * | |
14835 | + * Program the APIC according the possible interrupt vector | |
14836 | + * either LOCAL_PERFMON_VECTOR or NMI | |
14837 | + */ | |
14838 | +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) | |
14839 | +{ | |
14840 | + struct pfm_arch_pmu_info *pmu_info; | |
14841 | + struct pfm_regmap_desc *d; | |
14842 | + u16 i, nlost; | |
14843 | + | |
14844 | + pmu_info = pfm_pmu_conf->pmu_info; | |
14845 | + pmu_info->flags &= ~PFM_X86_FL_SHARING; | |
14846 | + | |
14847 | + nlost = 0; | |
14848 | + | |
14849 | + d = pfm_pmu_conf->pmc_desc; | |
14850 | + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { | |
14851 | + if (!(d->type & PFM_REG_I)) | |
14852 | + continue; | |
14853 | + | |
14854 | + if (d->type & PFM_REG_V) | |
14855 | + continue; | |
14856 | + /* | |
14857 | + * reserve register with lower-level allocator | |
14858 | + */ | |
14859 | + if (!reserve_evntsel_nmi(d->hw_addr)) { | |
14860 | + PFM_DBG("pmc%d(%s) already used", i, d->desc); | |
14861 | + __set_bit(i, cast_ulp(unavail_pmcs)); | |
14862 | + nlost++; | |
14863 | + continue; | |
14864 | + } | |
14865 | + } | |
14866 | + PFM_DBG("nlost=%d info_flags=0x%x\n", nlost, pmu_info->flags); | |
14867 | + /* | |
14868 | + * some PMU models (e.g., P6) do not support sharing | |
14869 | + * so check if we found less than the expected number of PMC registers | |
14870 | + */ | |
14871 | + if (nlost) { | |
14872 | + if (pmu_info->flags & PFM_X86_FL_NO_SHARING) { | |
14873 | + PFM_INFO("PMU already used by another subsystem, " | |
14874 | + "PMU does not support sharing, " | |
14875 | + "try disabling Oprofile or " | |
14876 | + "reboot with nmi_watchdog=0"); | |
14877 | + goto undo; | |
14878 | + } | |
14879 | + pmu_info->flags |= PFM_X86_FL_SHARING; | |
14880 | + } | |
14881 | + | |
14882 | + d = pfm_pmu_conf->pmd_desc; | |
14883 | + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) { | |
14884 | + if (!(d->type & PFM_REG_I)) | |
14885 | + continue; | |
14886 | + | |
14887 | + if (d->type & PFM_REG_V) | |
14888 | + continue; | |
14889 | + | |
14890 | + if (!reserve_perfctr_nmi(d->hw_addr)) { | |
14891 | + PFM_DBG("pmd%d(%s) already used", i, d->desc); | |
14892 | + __set_bit(i, cast_ulp(unavail_pmds)); | |
14893 | + } | |
14894 | + } | |
14895 | + /* | |
14896 | + * program APIC on each CPU | |
14897 | + */ | |
14898 | + on_each_cpu(pfm_arch_pmu_acquire_percpu, | |
14899 | + (void *)(unsigned long)pmu_info->flags , 1); | |
14900 | + | |
14901 | + return 0; | |
14902 | +undo: | |
14903 | + /* | |
14904 | + * must undo reservation of pmcs in case of error | |
14905 | + */ | |
14906 | + d = pfm_pmu_conf->pmc_desc; | |
14907 | + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { | |
14908 | + if (!(d->type & (PFM_REG_I|PFM_REG_V))) | |
14909 | + continue; | |
14910 | + if (!test_bit(i, cast_ulp(unavail_pmcs))) | |
14911 | + release_evntsel_nmi(d->hw_addr); | |
14912 | + } | |
14913 | + return -EBUSY; | |
14914 | +} | |
14915 | +/** | |
14916 | + * pfm-arch_pmu_release_percpu - clear NMI state for one CPU | |
14917 | + * | |
14918 | + */ | |
14919 | +static void pfm_arch_pmu_release_percpu(void *data) | |
14920 | +{ | |
14921 | + struct pfm_arch_pmu_info *pmu_info; | |
14922 | + | |
14923 | + pmu_info = pfm_pmu_conf->pmu_info; | |
14924 | + | |
14925 | + __get_cpu_var(pfm_using_nmi) = 0; | |
14926 | + | |
14927 | + /* | |
14928 | + * invoke model specific release routine. | |
14929 | + * May be used to undo certain initializations | |
14930 | + * or free some model-specific ressources. | |
14931 | + */ | |
14932 | + if (pmu_info->release_pmu_percpu) | |
14933 | + pmu_info->release_pmu_percpu(); | |
14934 | +} | |
14935 | + | |
14936 | +/** | |
14937 | + * pfm_arch_pmu_release - release PMU resource to system | |
14938 | + * | |
14939 | + * called from pfm_pmu_release() | |
14940 | + * interrupts are not masked | |
14941 | + * | |
14942 | + * On x86, we return the PMU registers to the MSR allocator | |
14943 | + */ | |
14944 | +void pfm_arch_pmu_release(void) | |
14945 | +{ | |
14946 | + struct pfm_regmap_desc *d; | |
14947 | + u16 i, n; | |
14948 | + | |
14949 | + d = pfm_pmu_conf->pmc_desc; | |
14950 | + n = pfm_pmu_conf->regs_all.num_pmcs; | |
14951 | + for (i = 0; n; i++, d++) { | |
14952 | + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
14953 | + continue; | |
14954 | + release_evntsel_nmi(d->hw_addr); | |
14955 | + n--; | |
14956 | + PFM_DBG("pmc%u released", i); | |
14957 | + } | |
14958 | + d = pfm_pmu_conf->pmd_desc; | |
14959 | + n = pfm_pmu_conf->regs_all.num_pmds; | |
14960 | + for (i = 0; n; i++, d++) { | |
14961 | + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmds))) | |
14962 | + continue; | |
14963 | + release_perfctr_nmi(d->hw_addr); | |
14964 | + n--; | |
14965 | + PFM_DBG("pmd%u released", i); | |
14966 | + } | |
14967 | + | |
14968 | + /* clear NMI variable if used */ | |
14969 | + if (__get_cpu_var(pfm_using_nmi)) | |
14970 | + on_each_cpu(pfm_arch_pmu_release_percpu, NULL , 1); | |
14971 | +} | |
14972 | + | |
14973 | +/** | |
14974 | + * pfm_arch_pmu_config_init - validate PMU description structure | |
14975 | + * @cfg: PMU description structure | |
14976 | + * | |
14977 | + * return: | |
14978 | + * 0 if valid | |
14979 | + * errno otherwise | |
14980 | + * | |
14981 | + * called from pfm_pmu_register() | |
14982 | + */ | |
14983 | +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) | |
14984 | +{ | |
14985 | + struct pfm_arch_pmu_info *pmu_info; | |
14986 | + | |
14987 | + pmu_info = pfm_pmu_info(); | |
14988 | + if (!pmu_info) { | |
14989 | + PFM_DBG("%s missing pmu_info", cfg->pmu_name); | |
14990 | + return -EINVAL; | |
14991 | + } | |
14992 | + if (!pmu_info->has_ovfls) { | |
14993 | + PFM_DBG("%s missing has_ovfls callback", cfg->pmu_name); | |
14994 | + return -EINVAL; | |
14995 | + } | |
14996 | + if (!pmu_info->quiesce) { | |
14997 | + PFM_DBG("%s missing quiesce callback", cfg->pmu_name); | |
14998 | + return -EINVAL; | |
14999 | + } | |
15000 | + if (!pmu_info->stop_save) { | |
15001 | + PFM_DBG("%s missing stop_save callback", cfg->pmu_name); | |
15002 | + return -EINVAL; | |
15003 | + } | |
15004 | + return 0; | |
15005 | +} | |
15006 | + | |
15007 | +/** | |
15008 | + * pfm_arch_init - one time global arch-specific initialization | |
15009 | + * | |
15010 | + * called from pfm_init() | |
15011 | + */ | |
15012 | +int __init pfm_arch_init(void) | |
15013 | +{ | |
15014 | + /* | |
15015 | + * we need to register our NMI handler when the kernels boots | |
15016 | + * to avoid a deadlock condition with the NMI watchdog or Oprofile | |
15017 | + * if we were to try and register/unregister on-demand. | |
15018 | + */ | |
15019 | + register_die_notifier(&pfm_nmi_nb); | |
15020 | + return 0; | |
15021 | +} | |
15022 | --- /dev/null | |
15023 | +++ b/arch/x86/perfmon/perfmon_amd64.c | |
15024 | @@ -0,0 +1,754 @@ | |
15025 | +/* | |
15026 | + * This file contains the PMU description for the Athlon64 and Opteron64 | |
15027 | + * processors. It supports 32 and 64-bit modes. | |
15028 | + * | |
15029 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
15030 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
15031 | + * | |
15032 | + * Copyright (c) 2007 Advanced Micro Devices, Inc. | |
15033 | + * Contributed by Robert Richter <robert.richter@amd.com> | |
15034 | + * | |
15035 | + * This program is free software; you can redistribute it and/or | |
15036 | + * modify it under the terms of version 2 of the GNU General Public | |
15037 | + * License as published by the Free Software Foundation. | |
15038 | + * | |
15039 | + * This program is distributed in the hope that it will be useful, | |
15040 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15041 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15042 | + * General Public License for more details. | |
15043 | + * | |
15044 | + * You should have received a copy of the GNU General Public License | |
15045 | + * along with this program; if not, write to the Free Software | |
15046 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
15047 | + * 02111-1307 USA | |
15048 | + */ | |
15049 | +#include <linux/module.h> | |
15050 | +#include <linux/vmalloc.h> | |
15051 | +#include <linux/topology.h> | |
15052 | +#include <linux/kprobes.h> | |
15053 | +#include <linux/pci.h> | |
15054 | +#include <linux/perfmon_kern.h> | |
15055 | +#include <asm/hw_irq.h> | |
15056 | +#include <asm/apic.h> | |
15057 | + | |
15058 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
15059 | +MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>"); | |
15060 | +MODULE_DESCRIPTION("AMD64 PMU description table"); | |
15061 | +MODULE_LICENSE("GPL"); | |
15062 | + | |
15063 | +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 | |
15064 | + | |
15065 | +static int force_nmi; | |
15066 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
15067 | +module_param(force_nmi, bool, 0600); | |
15068 | + | |
15069 | +#define HAS_IBS 0x01 /* has IBS support */ | |
15070 | + | |
15071 | +static u8 ibs_eilvt_off, ibs_status; /* AMD: extended interrupt LVT offset */ | |
15072 | + | |
15073 | +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx, | |
15074 | + struct pfm_event_set *set); | |
15075 | +static void __kprobes pfm_amd64_quiesce(void); | |
15076 | +static int pfm_amd64_has_ovfls(struct pfm_context *ctx); | |
15077 | +static int pfm_amd64_stop_save(struct pfm_context *ctx, | |
15078 | + struct pfm_event_set *set); | |
15079 | + | |
15080 | +#define IBSFETCHCTL_PMC 4 /* pmc4 */ | |
15081 | +#define IBSFETCHCTL_PMD 4 /* pmd4 */ | |
15082 | +#define IBSOPSCTL_PMC 5 /* pmc5 */ | |
15083 | +#define IBSOPSCTL_PMD 7 /* pmd7 */ | |
15084 | + | |
15085 | +static u64 enable_mask[PFM_MAX_PMCS]; | |
15086 | +static u16 max_enable; | |
15087 | + | |
15088 | +static struct pfm_arch_pmu_info pfm_amd64_pmu_info = { | |
15089 | + .stop_save = pfm_amd64_stop_save, | |
15090 | + .has_ovfls = pfm_amd64_has_ovfls, | |
15091 | + .quiesce = pfm_amd64_quiesce, | |
15092 | + .restore_pmcs = pfm_amd64_restore_pmcs | |
15093 | +}; | |
15094 | + | |
15095 | +#define PFM_AMD64_IBSFETCHVAL (1ULL<<49) /* valid fetch sample */ | |
15096 | +#define PFM_AMD64_IBSFETCHEN (1ULL<<48) /* fetch sampling enabled */ | |
15097 | +#define PFM_AMD64_IBSOPVAL (1ULL<<18) /* valid execution sample */ | |
15098 | +#define PFM_AMD64_IBSOPEN (1ULL<<17) /* execution sampling enabled */ | |
15099 | + | |
15100 | +/* | |
15101 | + * force Local APIC interrupt on overflow | |
15102 | + */ | |
15103 | +#define PFM_K8_VAL (1ULL<<20) | |
15104 | +#define PFM_K8_NO64 (1ULL<<20) | |
15105 | + | |
15106 | +/* | |
15107 | + * reserved bits must be 1 | |
15108 | + * | |
15109 | + * for family 15: | |
15110 | + * - upper 32 bits are reserved | |
15111 | + * - bit 20, bit 21 | |
15112 | + * | |
15113 | + * for family 16: | |
15114 | + * - bits 36-39 are reserved | |
15115 | + * - bits 42-63 are reserved | |
15116 | + * - bit 20, bit 21 | |
15117 | + * | |
15118 | + * for IBS registers: | |
15119 | + * IBSFETCHCTL: all bits are reserved except bits 57, 48, 15:0 | |
15120 | + * IBSOPSCTL : all bits are reserved except bits 17, 15:0 | |
15121 | + */ | |
15122 | +#define PFM_K8_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21)) | |
15123 | +#define PFM_16_RSVD ((0x3fffffULL<<42) | (0xfULL<<36) | (1ULL<<20) | (1ULL<<21)) | |
15124 | +#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<48)|(1ULL<<57)|0xffffULL)) | |
15125 | +#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<17)|0xffffULL)) | |
15126 | + | |
15127 | +static struct pfm_regmap_desc pfm_amd64_pmc_desc[] = { | |
15128 | +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0), | |
15129 | +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1), | |
15130 | +/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2), | |
15131 | +/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3), | |
15132 | +/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", 0, PFM_AMD64_IBSFETCHCTL_RSVD, 0, MSR_AMD64_IBSFETCHCTL), | |
15133 | +/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", 0, PFM_AMD64_IBSOPCTL_RSVD, 0, MSR_AMD64_IBSOPCTL), | |
15134 | +}; | |
15135 | +#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc) | |
15136 | + | |
15137 | +#define PFM_REG_IBS (PFM_REG_I|PFM_REG_INTR) | |
15138 | + | |
15139 | +/* | |
15140 | + * AMD64 counters are 48 bits, upper bits are reserved | |
15141 | + */ | |
15142 | +#define PFM_AMD64_CTR_RSVD (~((1ULL<<48)-1)) | |
15143 | + | |
15144 | +#define PFM_AMD_D(n) \ | |
15145 | + { .type = PFM_REG_C, \ | |
15146 | + .desc = "PERFCTR"#n, \ | |
15147 | + .hw_addr = MSR_K7_PERFCTR0+n, \ | |
15148 | + .rsvd_msk = PFM_AMD64_CTR_RSVD, \ | |
15149 | + .dep_pmcs[0] = 1ULL << n \ | |
15150 | + } | |
15151 | + | |
15152 | +#define PFM_AMD_IBSO(t, s, a) \ | |
15153 | + { .type = t, \ | |
15154 | + .desc = s, \ | |
15155 | + .hw_addr = a, \ | |
15156 | + .rsvd_msk = 0, \ | |
15157 | + .dep_pmcs[0] = 1ULL << 5 \ | |
15158 | + } | |
15159 | + | |
15160 | +#define PFM_AMD_IBSF(t, s, a) \ | |
15161 | + { .type = t, \ | |
15162 | + .desc = s, \ | |
15163 | + .hw_addr = a, \ | |
15164 | + .rsvd_msk = 0, \ | |
15165 | + .dep_pmcs[0] = 1ULL << 6 \ | |
15166 | + } | |
15167 | + | |
15168 | +static struct pfm_regmap_desc pfm_amd64_pmd_desc[] = { | |
15169 | +/* pmd0 */ PFM_AMD_D(0), | |
15170 | +/* pmd1 */ PFM_AMD_D(1), | |
15171 | +/* pmd2 */ PFM_AMD_D(2), | |
15172 | +/* pmd3 */ PFM_AMD_D(3), | |
15173 | +/* pmd4 */ PFM_AMD_IBSF(PFM_REG_IBS, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL), | |
15174 | +/* pmd5 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD), | |
15175 | +/* pmd6 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD), | |
15176 | +/* pmd7 */ PFM_AMD_IBSO(PFM_REG_IBS, "IBSOPCTL", MSR_AMD64_IBSOPCTL), | |
15177 | +/* pmd8 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP), | |
15178 | +/* pmd9 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA), | |
15179 | +/* pmd10 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2), | |
15180 | +/* pmd11 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3), | |
15181 | +/* pmd12 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD), | |
15182 | +/* pmd13 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD), | |
15183 | +}; | |
15184 | +#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc) | |
15185 | + | |
15186 | +static struct pfm_context **pfm_nb_sys_owners; | |
15187 | +static struct pfm_context *pfm_nb_task_owner; | |
15188 | + | |
15189 | +static struct pfm_pmu_config pfm_amd64_pmu_conf; | |
15190 | + | |
15191 | +#define is_ibs_pmc(x) (x == 4 || x == 5) | |
15192 | + | |
15193 | +static void pfm_amd64_setup_eilvt_per_cpu(void *info) | |
15194 | +{ | |
15195 | + u8 lvt_off; | |
15196 | + | |
15197 | + /* program the IBS vector to the perfmon vector */ | |
15198 | + lvt_off = setup_APIC_eilvt_ibs(LOCAL_PERFMON_VECTOR, | |
15199 | + APIC_EILVT_MSG_FIX, 0); | |
15200 | + PFM_DBG("APIC_EILVT%d set to 0x%x", lvt_off, LOCAL_PERFMON_VECTOR); | |
15201 | + ibs_eilvt_off = lvt_off; | |
15202 | +} | |
15203 | + | |
15204 | +static int pfm_amd64_setup_eilvt(void) | |
15205 | +{ | |
15206 | +#define IBSCTL_LVTOFFSETVAL (1 << 8) | |
15207 | +#define IBSCTL 0x1cc | |
15208 | + struct pci_dev *cpu_cfg; | |
15209 | + int nodes; | |
15210 | + u32 value = 0; | |
15211 | + | |
15212 | + /* per CPU setup */ | |
15213 | + on_each_cpu(pfm_amd64_setup_eilvt_per_cpu, NULL, 1); | |
15214 | + | |
15215 | + nodes = 0; | |
15216 | + cpu_cfg = NULL; | |
15217 | + do { | |
15218 | + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | |
15219 | + PCI_DEVICE_ID_AMD_10H_NB_MISC, | |
15220 | + cpu_cfg); | |
15221 | + if (!cpu_cfg) | |
15222 | + break; | |
15223 | + ++nodes; | |
15224 | + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | |
15225 | + | IBSCTL_LVTOFFSETVAL); | |
15226 | + pci_read_config_dword(cpu_cfg, IBSCTL, &value); | |
15227 | + if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { | |
15228 | + PFM_DBG("Failed to setup IBS LVT offset, " | |
15229 | + "IBSCTL = 0x%08x", value); | |
15230 | + return 1; | |
15231 | + } | |
15232 | + } while (1); | |
15233 | + | |
15234 | + if (!nodes) { | |
15235 | + PFM_DBG("No CPU node configured for IBS"); | |
15236 | + return 1; | |
15237 | + } | |
15238 | + | |
15239 | +#ifdef CONFIG_NUMA | |
15240 | + /* Sanity check */ | |
15241 | + /* Works only for 64bit with proper numa implementation. */ | |
15242 | + if (nodes != num_possible_nodes()) { | |
15243 | + PFM_DBG("Failed to setup CPU node(s) for IBS, " | |
15244 | + "found: %d, expected %d", | |
15245 | + nodes, num_possible_nodes()); | |
15246 | + return 1; | |
15247 | + } | |
15248 | +#endif | |
15249 | + return 0; | |
15250 | +} | |
15251 | + | |
15252 | +/* | |
15253 | + * There can only be one user per socket for the Northbridge (NB) events, | |
15254 | + * so we enforce mutual exclusion as follows: | |
15255 | + * - per-thread : only one context machine-wide can use NB events | |
15256 | + * - system-wide: only one context per processor socket | |
15257 | + * | |
15258 | + * Exclusion is enforced at: | |
15259 | + * - pfm_load_context() | |
15260 | + * - pfm_write_pmcs() for attached contexts | |
15261 | + * | |
15262 | + * Exclusion is released at: | |
15263 | + * - pfm_unload_context() or any calls that implicitely uses it | |
15264 | + * | |
15265 | + * return: | |
15266 | + * 0 : successfully acquire NB access | |
15267 | + * < 0: errno, failed to acquire NB access | |
15268 | + */ | |
15269 | +static int pfm_amd64_acquire_nb(struct pfm_context *ctx) | |
15270 | +{ | |
15271 | + struct pfm_context **entry, *old; | |
15272 | + int proc_id; | |
15273 | + | |
15274 | +#ifdef CONFIG_SMP | |
15275 | + proc_id = cpu_data(smp_processor_id()).phys_proc_id; | |
15276 | +#else | |
15277 | + proc_id = 0; | |
15278 | +#endif | |
15279 | + | |
15280 | + if (ctx->flags.system) | |
15281 | + entry = &pfm_nb_sys_owners[proc_id]; | |
15282 | + else | |
15283 | + entry = &pfm_nb_task_owner; | |
15284 | + | |
15285 | + old = cmpxchg(entry, NULL, ctx); | |
15286 | + if (!old) { | |
15287 | + if (ctx->flags.system) | |
15288 | + PFM_DBG("acquired Northbridge event access on socket %u", proc_id); | |
15289 | + else | |
15290 | + PFM_DBG("acquired Northbridge event access globally"); | |
15291 | + } else if (old != ctx) { | |
15292 | + if (ctx->flags.system) | |
15293 | + PFM_DBG("NorthBridge event conflict on socket %u", proc_id); | |
15294 | + else | |
15295 | + PFM_DBG("global NorthBridge event conflict"); | |
15296 | + return -EBUSY; | |
15297 | + } | |
15298 | + return 0; | |
15299 | +} | |
15300 | + | |
15301 | +/* | |
15302 | + * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e., | |
15303 | + * when we have detected a multi-core processor. | |
15304 | + * | |
15305 | + * context is locked, interrupts are masked | |
15306 | + */ | |
15307 | +static int pfm_amd64_pmc_write_check(struct pfm_context *ctx, | |
15308 | + struct pfm_event_set *set, | |
15309 | + struct pfarg_pmc *req) | |
15310 | +{ | |
15311 | + unsigned int event; | |
15312 | + | |
15313 | + /* | |
15314 | + * delay checking NB event until we load the context | |
15315 | + */ | |
15316 | + if (ctx->state == PFM_CTX_UNLOADED) | |
15317 | + return 0; | |
15318 | + | |
15319 | + /* | |
15320 | + * check event is NB event | |
15321 | + */ | |
15322 | + event = (unsigned int)(req->reg_value & 0xff); | |
15323 | + if (event < 0xee) | |
15324 | + return 0; | |
15325 | + | |
15326 | + return pfm_amd64_acquire_nb(ctx); | |
15327 | +} | |
15328 | + | |
15329 | +/* | |
15330 | + * invoked on pfm_load_context(). | |
15331 | + * context is locked, interrupts are masked | |
15332 | + */ | |
15333 | +static int pfm_amd64_load_context(struct pfm_context *ctx) | |
15334 | +{ | |
15335 | + struct pfm_event_set *set; | |
15336 | + unsigned int i, n; | |
15337 | + | |
15338 | + /* | |
15339 | + * scan all sets for NB events | |
15340 | + */ | |
15341 | + list_for_each_entry(set, &ctx->set_list, list) { | |
15342 | + n = set->nused_pmcs; | |
15343 | + for (i = 0; n; i++) { | |
15344 | + if (!test_bit(i, cast_ulp(set->used_pmcs))) | |
15345 | + continue; | |
15346 | + | |
15347 | + if (!is_ibs_pmc(i) && (set->pmcs[i] & 0xff) >= 0xee) | |
15348 | + goto found; | |
15349 | + n--; | |
15350 | + } | |
15351 | + } | |
15352 | + return 0; | |
15353 | +found: | |
15354 | + return pfm_amd64_acquire_nb(ctx); | |
15355 | +} | |
15356 | + | |
15357 | +/* | |
15358 | + * invoked on pfm_unload_context() | |
15359 | + */ | |
15360 | +static void pfm_amd64_unload_context(struct pfm_context *ctx) | |
15361 | +{ | |
15362 | + struct pfm_context **entry, *old; | |
15363 | + int proc_id; | |
15364 | + | |
15365 | +#ifdef CONFIG_SMP | |
15366 | + proc_id = cpu_data(smp_processor_id()).phys_proc_id; | |
15367 | +#else | |
15368 | + proc_id = 0; | |
15369 | +#endif | |
15370 | + | |
15371 | + /* | |
15372 | + * unload always happens on the monitored CPU in system-wide | |
15373 | + */ | |
15374 | + if (ctx->flags.system) | |
15375 | + entry = &pfm_nb_sys_owners[proc_id]; | |
15376 | + else | |
15377 | + entry = &pfm_nb_task_owner; | |
15378 | + | |
15379 | + old = cmpxchg(entry, ctx, NULL); | |
15380 | + if (old == ctx) { | |
15381 | + if (ctx->flags.system) | |
15382 | + PFM_DBG("released NorthBridge on socket %u", proc_id); | |
15383 | + else | |
15384 | + PFM_DBG("released NorthBridge events globally"); | |
15385 | + } | |
15386 | +} | |
15387 | + | |
15388 | +/* | |
15389 | + * detect if we need to activate NorthBridge event access control | |
15390 | + */ | |
15391 | +static int pfm_amd64_setup_nb_event_control(void) | |
15392 | +{ | |
15393 | + unsigned int c, n = 0; | |
15394 | + unsigned int max_phys = 0; | |
15395 | + | |
15396 | +#ifdef CONFIG_SMP | |
15397 | + for_each_possible_cpu(c) { | |
15398 | + if (cpu_data(c).phys_proc_id > max_phys) | |
15399 | + max_phys = cpu_data(c).phys_proc_id; | |
15400 | + } | |
15401 | +#else | |
15402 | + max_phys = 0; | |
15403 | +#endif | |
15404 | + if (max_phys > 255) { | |
15405 | + PFM_INFO("socket id %d is too big to handle", max_phys); | |
15406 | + return -ENOMEM; | |
15407 | + } | |
15408 | + | |
15409 | + n = max_phys + 1; | |
15410 | + if (n < 2) | |
15411 | + return 0; | |
15412 | + | |
15413 | + pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners)); | |
15414 | + if (!pfm_nb_sys_owners) | |
15415 | + return -ENOMEM; | |
15416 | + | |
15417 | + memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners)); | |
15418 | + pfm_nb_task_owner = NULL; | |
15419 | + | |
15420 | + /* | |
15421 | + * activate write-checker for PMC registers | |
15422 | + */ | |
15423 | + for (c = 0; c < PFM_AMD_NUM_PMCS; c++) { | |
15424 | + if (!is_ibs_pmc(c)) | |
15425 | + pfm_amd64_pmc_desc[c].type |= PFM_REG_WC; | |
15426 | + } | |
15427 | + | |
15428 | + pfm_amd64_pmu_info.load_context = pfm_amd64_load_context; | |
15429 | + pfm_amd64_pmu_info.unload_context = pfm_amd64_unload_context; | |
15430 | + | |
15431 | + pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check; | |
15432 | + | |
15433 | + PFM_INFO("NorthBridge event access control enabled"); | |
15434 | + | |
15435 | + return 0; | |
15436 | +} | |
15437 | + | |
15438 | +/* | |
15439 | + * disable registers which are not available on | |
15440 | + * the host (applies to IBS registers) | |
15441 | + */ | |
15442 | +static void pfm_amd64_check_registers(void) | |
15443 | +{ | |
15444 | + u16 i; | |
15445 | + | |
15446 | + PFM_DBG("has_ibs=%d", !!(ibs_status & HAS_IBS)); | |
15447 | + | |
15448 | + __set_bit(0, cast_ulp(enable_mask)); | |
15449 | + __set_bit(1, cast_ulp(enable_mask)); | |
15450 | + __set_bit(2, cast_ulp(enable_mask)); | |
15451 | + __set_bit(3, cast_ulp(enable_mask)); | |
15452 | + max_enable = 3+1; | |
15453 | + | |
15454 | + | |
15455 | + /* | |
15456 | + * remove IBS registers if feature not present | |
15457 | + */ | |
15458 | + if (!(ibs_status & HAS_IBS)) { | |
15459 | + pfm_amd64_pmc_desc[4].type = PFM_REG_NA; | |
15460 | + pfm_amd64_pmc_desc[5].type = PFM_REG_NA; | |
15461 | + for (i = 4; i < 14; i++) | |
15462 | + pfm_amd64_pmd_desc[i].type = PFM_REG_NA; | |
15463 | + } else { | |
15464 | + __set_bit(16, cast_ulp(enable_mask)); | |
15465 | + __set_bit(17, cast_ulp(enable_mask)); | |
15466 | + max_enable = 17 + 1; | |
15467 | + } | |
15468 | + | |
15469 | + /* | |
15470 | + * adjust reserved bit fields for family 16 | |
15471 | + */ | |
15472 | + if (current_cpu_data.x86 == 16) { | |
15473 | + for (i = 0; i < PFM_AMD_NUM_PMCS; i++) | |
15474 | + if (pfm_amd64_pmc_desc[i].rsvd_msk == PFM_K8_RSVD) | |
15475 | + pfm_amd64_pmc_desc[i].rsvd_msk = PFM_16_RSVD; | |
15476 | + } | |
15477 | +} | |
15478 | + | |
15479 | +static int pfm_amd64_probe_pmu(void) | |
15480 | +{ | |
15481 | + u64 val = 0; | |
15482 | + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) { | |
15483 | + PFM_INFO("not an AMD processor"); | |
15484 | + return -1; | |
15485 | + } | |
15486 | + | |
15487 | + switch (current_cpu_data.x86) { | |
15488 | + case 16: | |
15489 | + case 15: | |
15490 | + case 6: | |
15491 | + break; | |
15492 | + default: | |
15493 | + PFM_INFO("unsupported family=%d", current_cpu_data.x86); | |
15494 | + return -1; | |
15495 | + } | |
15496 | + | |
15497 | + /* check for IBS */ | |
15498 | + if (cpu_has(¤t_cpu_data, X86_FEATURE_IBS)) { | |
15499 | + ibs_status |= HAS_IBS; | |
15500 | + rdmsrl(MSR_AMD64_IBSCTL, val); | |
15501 | + } | |
15502 | + | |
15503 | + PFM_INFO("found family=%d IBSCTL=0x%llx", current_cpu_data.x86, (unsigned long long)val); | |
15504 | + | |
15505 | + /* | |
15506 | + * check for local APIC (required) | |
15507 | + */ | |
15508 | + if (!cpu_has_apic) { | |
15509 | + PFM_INFO("no local APIC, unsupported"); | |
15510 | + return -1; | |
15511 | + } | |
15512 | + | |
15513 | + if (current_cpu_data.x86_max_cores > 1 | |
15514 | + && pfm_amd64_setup_nb_event_control()) | |
15515 | + return -1; | |
15516 | + | |
15517 | + if (force_nmi) | |
15518 | + pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
15519 | + | |
15520 | + if (ibs_status & HAS_IBS) { | |
15521 | + /* Setup extended interrupt */ | |
15522 | + if (pfm_amd64_setup_eilvt()) { | |
15523 | + PFM_INFO("Failed to initialize extended interrupts " | |
15524 | + "for IBS"); | |
15525 | + ibs_status &= ~HAS_IBS; | |
15526 | + PFM_INFO("Unable to use IBS"); | |
15527 | + } else { | |
15528 | + PFM_INFO("IBS supported"); | |
15529 | + } | |
15530 | + } | |
15531 | + | |
15532 | + pfm_amd64_check_registers(); | |
15533 | + | |
15534 | + return 0; | |
15535 | +} | |
15536 | + | |
15537 | +/* | |
15538 | + * detect is counters have overflowed. | |
15539 | + * return: | |
15540 | + * 0 : no overflow | |
15541 | + * 1 : at least one overflow | |
15542 | + */ | |
15543 | +static int __kprobes pfm_amd64_has_ovfls(struct pfm_context *ctx) | |
15544 | +{ | |
15545 | + struct pfm_regmap_desc *xrd; | |
15546 | + u64 *cnt_mask; | |
15547 | + u64 wmask, val; | |
15548 | + u16 i, num; | |
15549 | + | |
15550 | + /* | |
15551 | + * Check for IBS events | |
15552 | + */ | |
15553 | + if (ibs_status & HAS_IBS) { | |
15554 | + rdmsrl(MSR_AMD64_IBSFETCHCTL, val); | |
15555 | + if (val & PFM_AMD64_IBSFETCHVAL) | |
15556 | + return 1; | |
15557 | + rdmsrl(MSR_AMD64_IBSOPCTL, val); | |
15558 | + if (val & PFM_AMD64_IBSOPVAL) | |
15559 | + return 1; | |
15560 | + } | |
15561 | + /* | |
15562 | + * Check regular counters | |
15563 | + */ | |
15564 | + cnt_mask = ctx->regs.cnt_pmds; | |
15565 | + num = ctx->regs.num_counters; | |
15566 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
15567 | + xrd = pfm_amd64_pmd_desc; | |
15568 | + | |
15569 | + for (i = 0; num; i++) { | |
15570 | + if (test_bit(i, cast_ulp(cnt_mask))) { | |
15571 | + rdmsrl(xrd[i].hw_addr, val); | |
15572 | + if (!(val & wmask)) | |
15573 | + return 1; | |
15574 | + num--; | |
15575 | + } | |
15576 | + } | |
15577 | + return 0; | |
15578 | +} | |
15579 | + | |
15580 | +/* | |
15581 | + * Must check for IBS event BEFORE stop_save_p6 because | |
15582 | + * stopping monitoring does destroy IBS state information | |
15583 | + * in IBSFETCHCTL/IBSOPCTL because they are tagged as enable | |
15584 | + * registers. | |
15585 | + */ | |
15586 | +static int pfm_amd64_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) | |
15587 | +{ | |
15588 | + struct pfm_arch_pmu_info *pmu_info; | |
15589 | + u64 used_mask[PFM_PMC_BV]; | |
15590 | + u64 *cnt_pmds; | |
15591 | + u64 val, wmask, ovfl_mask; | |
15592 | + u32 i, count, use_ibs; | |
15593 | + | |
15594 | + pmu_info = pfm_pmu_info(); | |
15595 | + | |
15596 | + /* | |
15597 | + * IBS used if: | |
15598 | + * - on family 10h processor with IBS | |
15599 | + * - at least one of the IBS PMD registers is used | |
15600 | + */ | |
15601 | + use_ibs = (ibs_status & HAS_IBS) | |
15602 | + && (test_bit(IBSFETCHCTL_PMD, cast_ulp(set->used_pmds)) | |
15603 | + || test_bit(IBSOPSCTL_PMD, cast_ulp(set->used_pmds))); | |
15604 | + | |
15605 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
15606 | + | |
15607 | + bitmap_and(cast_ulp(used_mask), | |
15608 | + cast_ulp(set->used_pmcs), | |
15609 | + cast_ulp(enable_mask), | |
15610 | + max_enable); | |
15611 | + | |
15612 | + count = bitmap_weight(cast_ulp(used_mask), max_enable); | |
15613 | + | |
15614 | + /* | |
15615 | + * stop monitoring | |
15616 | + * Unfortunately, this is very expensive! | |
15617 | + * wrmsrl() is serializing. | |
15618 | + * | |
15619 | + * With IBS, we need to do read-modify-write to preserve the content | |
15620 | + * for OpsCTL and FetchCTL because they are also used as PMDs and saved | |
15621 | + * below | |
15622 | + */ | |
15623 | + if (use_ibs) { | |
15624 | + for (i = 0; count; i++) { | |
15625 | + if (test_bit(i, cast_ulp(used_mask))) { | |
15626 | + if (i == IBSFETCHCTL_PMC) { | |
15627 | + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); | |
15628 | + val &= ~PFM_AMD64_IBSFETCHEN; | |
15629 | + } else if (i == IBSOPSCTL_PMC) { | |
15630 | + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); | |
15631 | + val &= ~PFM_AMD64_IBSOPEN; | |
15632 | + } else | |
15633 | + val = 0; | |
15634 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); | |
15635 | + count--; | |
15636 | + } | |
15637 | + } | |
15638 | + } else { | |
15639 | + for (i = 0; count; i++) { | |
15640 | + if (test_bit(i, cast_ulp(used_mask))) { | |
15641 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); | |
15642 | + count--; | |
15643 | + } | |
15644 | + } | |
15645 | + } | |
15646 | + | |
15647 | + /* | |
15648 | + * if we already having a pending overflow condition, we simply | |
15649 | + * return to take care of this first. | |
15650 | + */ | |
15651 | + if (set->npend_ovfls) | |
15652 | + return 1; | |
15653 | + | |
15654 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
15655 | + cnt_pmds = ctx->regs.cnt_pmds; | |
15656 | + | |
15657 | + /* | |
15658 | + * check for pending overflows and save PMDs (combo) | |
15659 | + * we employ used_pmds because we also need to save | |
15660 | + * and not just check for pending interrupts. | |
15661 | + * | |
15662 | + * Must check for counting PMDs because of virtual PMDs and IBS | |
15663 | + */ | |
15664 | + count = set->nused_pmds; | |
15665 | + for (i = 0; count; i++) { | |
15666 | + if (test_bit(i, cast_ulp(set->used_pmds))) { | |
15667 | + val = pfm_arch_read_pmd(ctx, i); | |
15668 | + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { | |
15669 | + if (!(val & wmask)) { | |
15670 | + __set_bit(i, cast_ulp(set->povfl_pmds)); | |
15671 | + set->npend_ovfls++; | |
15672 | + } | |
15673 | + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); | |
15674 | + } | |
15675 | + set->pmds[i].value = val; | |
15676 | + count--; | |
15677 | + } | |
15678 | + } | |
15679 | + | |
15680 | + /* | |
15681 | + * check if IBS contains valid data, and mark the corresponding | |
15682 | + * PMD has overflowed | |
15683 | + */ | |
15684 | + if (use_ibs) { | |
15685 | + if (set->pmds[IBSFETCHCTL_PMD].value & PFM_AMD64_IBSFETCHVAL) { | |
15686 | + __set_bit(IBSFETCHCTL_PMD, cast_ulp(set->povfl_pmds)); | |
15687 | + set->npend_ovfls++; | |
15688 | + } | |
15689 | + if (set->pmds[IBSOPSCTL_PMD].value & PFM_AMD64_IBSOPVAL) { | |
15690 | + __set_bit(IBSOPSCTL_PMD, cast_ulp(set->povfl_pmds)); | |
15691 | + set->npend_ovfls++; | |
15692 | + } | |
15693 | + } | |
15694 | + /* 0 means: no need to save PMDs at upper level */ | |
15695 | + return 0; | |
15696 | +} | |
15697 | + | |
15698 | +/** | |
15699 | + * pfm_amd64_quiesce_pmu -- stop monitoring without grabbing any lock | |
15700 | + * | |
15701 | + * called from NMI interrupt handler to immediately stop monitoring | |
15702 | + * cannot grab any lock, including perfmon related locks | |
15703 | + */ | |
15704 | +static void __kprobes pfm_amd64_quiesce(void) | |
15705 | +{ | |
15706 | + /* | |
15707 | + * quiesce PMU by clearing available registers that have | |
15708 | + * the start/stop capability | |
15709 | + */ | |
15710 | + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15711 | + wrmsrl(MSR_K7_EVNTSEL0, 0); | |
15712 | + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15713 | + wrmsrl(MSR_K7_EVNTSEL0+1, 0); | |
15714 | + if (test_bit(2, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15715 | + wrmsrl(MSR_K7_EVNTSEL0+2, 0); | |
15716 | + if (test_bit(3, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15717 | + wrmsrl(MSR_K7_EVNTSEL0+3, 0); | |
15718 | + | |
15719 | + if (test_bit(4, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15720 | + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | |
15721 | + if (test_bit(5, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
15722 | + wrmsrl(MSR_AMD64_IBSOPCTL, 0); | |
15723 | +} | |
15724 | + | |
15725 | +/** | |
15726 | + * pfm_amd64_restore_pmcs - reload PMC registers | |
15727 | + * @ctx: context to restore from | |
15728 | + * @set: current event set | |
15729 | + * | |
15730 | + * optimized version of pfm_arch_restore_pmcs(). On AMD64, we can | |
15731 | + * afford to only restore the pmcs registers we use, because they are | |
15732 | + * all independent from each other. | |
15733 | + */ | |
15734 | +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx, | |
15735 | + struct pfm_event_set *set) | |
15736 | +{ | |
15737 | + u64 *mask; | |
15738 | + u16 i, num; | |
15739 | + | |
15740 | + mask = set->used_pmcs; | |
15741 | + num = set->nused_pmcs; | |
15742 | + for (i = 0; num; i++) { | |
15743 | + if (test_bit(i, cast_ulp(mask))) { | |
15744 | + wrmsrl(pfm_amd64_pmc_desc[i].hw_addr, set->pmcs[i]); | |
15745 | + num--; | |
15746 | + } | |
15747 | + } | |
15748 | +} | |
15749 | + | |
15750 | +static struct pfm_pmu_config pfm_amd64_pmu_conf = { | |
15751 | + .pmu_name = "AMD64", | |
15752 | + .counter_width = 47, | |
15753 | + .pmd_desc = pfm_amd64_pmd_desc, | |
15754 | + .pmc_desc = pfm_amd64_pmc_desc, | |
15755 | + .num_pmc_entries = PFM_AMD_NUM_PMCS, | |
15756 | + .num_pmd_entries = PFM_AMD_NUM_PMDS, | |
15757 | + .probe_pmu = pfm_amd64_probe_pmu, | |
15758 | + .version = "1.2", | |
15759 | + .pmu_info = &pfm_amd64_pmu_info, | |
15760 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
15761 | + .owner = THIS_MODULE, | |
15762 | +}; | |
15763 | + | |
15764 | +static int __init pfm_amd64_pmu_init_module(void) | |
15765 | +{ | |
15766 | + return pfm_pmu_register(&pfm_amd64_pmu_conf); | |
15767 | +} | |
15768 | + | |
15769 | +static void __exit pfm_amd64_pmu_cleanup_module(void) | |
15770 | +{ | |
15771 | + if (pfm_nb_sys_owners) | |
15772 | + vfree(pfm_nb_sys_owners); | |
15773 | + | |
15774 | + pfm_pmu_unregister(&pfm_amd64_pmu_conf); | |
15775 | +} | |
15776 | + | |
15777 | +module_init(pfm_amd64_pmu_init_module); | |
15778 | +module_exit(pfm_amd64_pmu_cleanup_module); | |
15779 | --- /dev/null | |
15780 | +++ b/arch/x86/perfmon/perfmon_intel_arch.c | |
15781 | @@ -0,0 +1,610 @@ | |
15782 | +/* | |
15783 | + * This file contains the Intel architectural perfmon v1, v2, v3 | |
15784 | + * description tables. | |
15785 | + * | |
15786 | + * Architectural perfmon was introduced with Intel Core Solo/Duo | |
15787 | + * processors. | |
15788 | + * | |
15789 | + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. | |
15790 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
15791 | + * | |
15792 | + * This program is free software; you can redistribute it and/or | |
15793 | + * modify it under the terms of version 2 of the GNU General Public | |
15794 | + * License as published by the Free Software Foundation. | |
15795 | + * | |
15796 | + * This program is distributed in the hope that it will be useful, | |
15797 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15798 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15799 | + * General Public License for more details. | |
15800 | + * | |
15801 | + * You should have received a copy of the GNU General Public License | |
15802 | + * along with this program; if not, write to the Free Software | |
15803 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
15804 | + * 02111-1307 USA | |
15805 | + */ | |
15806 | +#include <linux/module.h> | |
15807 | +#include <linux/kprobes.h> | |
15808 | +#include <linux/perfmon_kern.h> | |
15809 | +#include <linux/nmi.h> | |
15810 | +#include <asm/msr.h> | |
15811 | +#include <asm/apic.h> | |
15812 | + | |
15813 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
15814 | +MODULE_DESCRIPTION("Intel architectural perfmon v1"); | |
15815 | +MODULE_LICENSE("GPL"); | |
15816 | + | |
15817 | +static int force, force_nmi; | |
15818 | +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); | |
15819 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
15820 | +module_param(force, bool, 0600); | |
15821 | +module_param(force_nmi, bool, 0600); | |
15822 | + | |
15823 | +static u64 enable_mask[PFM_MAX_PMCS]; | |
15824 | +static u16 max_enable; | |
15825 | + | |
15826 | +/* | |
15827 | + * - upper 32 bits are reserved | |
15828 | + * - INT: APIC enable bit is reserved (forced to 1) | |
15829 | + * - bit 21 is reserved | |
15830 | + * | |
15831 | + * RSVD: reserved bits are 1 | |
15832 | + */ | |
15833 | +#define PFM_IA_PMC_RSVD ((~((1ULL<<32)-1)) \ | |
15834 | + | (1ULL<<20) \ | |
15835 | + | (1ULL<<21)) | |
15836 | + | |
15837 | +/* | |
15838 | + * force Local APIC interrupt on overflow | |
15839 | + * disable with NO_EMUL64 | |
15840 | + */ | |
15841 | +#define PFM_IA_PMC_VAL (1ULL<<20) | |
15842 | +#define PFM_IA_NO64 (1ULL<<20) | |
15843 | + | |
15844 | +/* | |
15845 | + * architectuture specifies that: | |
15846 | + * IA32_PMCx MSR : starts at 0x0c1 & occupy a contiguous block of MSR | |
15847 | + * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR | |
15848 | + * MSR_GEN_FIXED_CTR0 : starts at 0x309 & occupy a contiguous block of MSR | |
15849 | + */ | |
15850 | +#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0 | |
15851 | +#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0 | |
15852 | +#define MSR_GEN_FIXED_PMC_BASE MSR_CORE_PERF_FIXED_CTR0 | |
15853 | + | |
15854 | +/* | |
15855 | + * layout of EAX for CPUID.0xa leaf function | |
15856 | + */ | |
15857 | +struct pmu_eax { | |
15858 | + unsigned int version:8; /* architectural perfmon version */ | |
15859 | + unsigned int num_cnt:8; /* number of generic counters */ | |
15860 | + unsigned int cnt_width:8; /* width of generic counters */ | |
15861 | + unsigned int ebx_length:8; /* number of architected events */ | |
15862 | +}; | |
15863 | + | |
15864 | +/* | |
15865 | + * layout of EDX for CPUID.0xa leaf function when perfmon v2 is detected | |
15866 | + */ | |
15867 | +struct pmu_edx { | |
15868 | + unsigned int num_cnt:5; /* number of fixed counters */ | |
15869 | + unsigned int cnt_width:8; /* width of fixed counters */ | |
15870 | + unsigned int reserved:19; | |
15871 | +}; | |
15872 | + | |
15873 | +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx, | |
15874 | + struct pfm_event_set *set); | |
15875 | +static int pfm_intel_arch_stop_save(struct pfm_context *ctx, | |
15876 | + struct pfm_event_set *set); | |
15877 | +static int pfm_intel_arch_has_ovfls(struct pfm_context *ctx); | |
15878 | +static void __kprobes pfm_intel_arch_quiesce(void); | |
15879 | + | |
15880 | +/* | |
15881 | + * physical addresses of MSR controlling the perfevtsel and counter registers | |
15882 | + */ | |
15883 | +struct pfm_arch_pmu_info pfm_intel_arch_pmu_info = { | |
15884 | + .stop_save = pfm_intel_arch_stop_save, | |
15885 | + .has_ovfls = pfm_intel_arch_has_ovfls, | |
15886 | + .quiesce = pfm_intel_arch_quiesce, | |
15887 | + .restore_pmcs = pfm_intel_arch_restore_pmcs | |
15888 | +}; | |
15889 | + | |
15890 | +#define PFM_IA_C(n) { \ | |
15891 | + .type = PFM_REG_I64, \ | |
15892 | + .desc = "PERFEVTSEL"#n, \ | |
15893 | + .dfl_val = PFM_IA_PMC_VAL, \ | |
15894 | + .rsvd_msk = PFM_IA_PMC_RSVD, \ | |
15895 | + .no_emul64_msk = PFM_IA_NO64, \ | |
15896 | + .hw_addr = MSR_GEN_SEL_BASE+(n) \ | |
15897 | + } | |
15898 | + | |
15899 | +#define PFM_IA_D(n) \ | |
15900 | + { .type = PFM_REG_C, \ | |
15901 | + .desc = "PMC"#n, \ | |
15902 | + .hw_addr = MSR_P6_PERFCTR0+n, \ | |
15903 | + .dep_pmcs[0] = 1ULL << n \ | |
15904 | + } | |
15905 | + | |
15906 | +#define PFM_IA_FD(n) \ | |
15907 | + { .type = PFM_REG_C, \ | |
15908 | + .desc = "FIXED_CTR"#n, \ | |
15909 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ | |
15910 | + .dep_pmcs[0] = 1ULL << 16 \ | |
15911 | + } | |
15912 | + | |
15913 | +static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[] = { | |
15914 | +/* pmc0 */ PFM_IA_C(0), PFM_IA_C(1), PFM_IA_C(2), PFM_IA_C(3), | |
15915 | +/* pmc4 */ PFM_IA_C(4), PFM_IA_C(5), PFM_IA_C(6), PFM_IA_C(7), | |
15916 | +/* pmc8 */ PFM_IA_C(8), PFM_IA_C(9), PFM_IA_C(10), PFM_IA_C(11), | |
15917 | +/* pmc12 */ PFM_IA_C(12), PFM_IA_C(13), PFM_IA_C(14), PFM_IA_C(15), | |
15918 | + | |
15919 | +/* pmc16 */ { .type = PFM_REG_I, | |
15920 | + .desc = "FIXED_CTRL", | |
15921 | + .dfl_val = 0x8888888888888888ULL, /* force PMI */ | |
15922 | + .rsvd_msk = 0, /* set dynamically */ | |
15923 | + .no_emul64_msk = 0, | |
15924 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL | |
15925 | + }, | |
15926 | +}; | |
15927 | +#define PFM_IA_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc) | |
15928 | + | |
15929 | +static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[] = { | |
15930 | +/* pmd0 */ PFM_IA_D(0), PFM_IA_D(1), PFM_IA_D(2), PFM_IA_D(3), | |
15931 | +/* pmd4 */ PFM_IA_D(4), PFM_IA_D(5), PFM_IA_D(6), PFM_IA_D(7), | |
15932 | +/* pmd8 */ PFM_IA_D(8), PFM_IA_D(9), PFM_IA_D(10), PFM_IA_D(11), | |
15933 | +/* pmd12 */ PFM_IA_D(12), PFM_IA_D(13), PFM_IA_D(14), PFM_IA_D(15), | |
15934 | + | |
15935 | +/* pmd16 */ PFM_IA_FD(0), PFM_IA_FD(1), PFM_IA_FD(2), PFM_IA_FD(3), | |
15936 | +/* pmd20 */ PFM_IA_FD(4), PFM_IA_FD(5), PFM_IA_FD(6), PFM_IA_FD(7), | |
15937 | +/* pmd24 */ PFM_IA_FD(8), PFM_IA_FD(9), PFM_IA_FD(10), PFM_IA_FD(11), | |
15938 | +/* pmd28 */ PFM_IA_FD(16), PFM_IA_FD(17), PFM_IA_FD(18), PFM_IA_FD(19) | |
15939 | +}; | |
15940 | +#define PFM_IA_MAX_PMDS ARRAY_SIZE(pfm_intel_arch_pmd_desc) | |
15941 | + | |
15942 | +#define PFM_IA_MAX_CNT 16 /* # generic counters in mapping table */ | |
15943 | +#define PFM_IA_MAX_FCNT 16 /* # of fixed counters in mapping table */ | |
15944 | +#define PFM_IA_FCNT_BASE 16 /* base index of fixed counters PMD */ | |
15945 | + | |
15946 | +static struct pfm_pmu_config pfm_intel_arch_pmu_conf; | |
15947 | + | |
15948 | +static void pfm_intel_arch_check_errata(void) | |
15949 | +{ | |
15950 | + /* | |
15951 | + * Core Duo errata AE49 (no fix). Both counters share a single | |
15952 | + * enable bit in PERFEVTSEL0 | |
15953 | + */ | |
15954 | + if (current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 14) | |
15955 | + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_NO_SHARING; | |
15956 | +} | |
15957 | + | |
15958 | +static inline void set_enable_mask(unsigned int i) | |
15959 | +{ | |
15960 | + __set_bit(i, cast_ulp(enable_mask)); | |
15961 | + | |
15962 | + /* max_enable = highest + 1 */ | |
15963 | + if ((i+1) > max_enable) | |
15964 | + max_enable = i+ 1; | |
15965 | +} | |
15966 | + | |
15967 | +static void pfm_intel_arch_setup_generic(unsigned int version, | |
15968 | + unsigned int width, | |
15969 | + unsigned int count) | |
15970 | +{ | |
15971 | + u64 rsvd; | |
15972 | + unsigned int i; | |
15973 | + | |
15974 | + /* | |
15975 | + * first we handle the generic counters: | |
15976 | + * | |
15977 | + * - ensure HW does not have more registers than hardcoded in the tables | |
15978 | + * - adjust rsvd_msk to actual counter width | |
15979 | + * - initialize enable_mask (list of PMC with start/stop capability) | |
15980 | + * - mark unused hardcoded generic counters as unimplemented | |
15981 | + */ | |
15982 | + | |
15983 | + /* | |
15984 | + * min of number of Hw counters and hardcoded in the tables | |
15985 | + */ | |
15986 | + if (count >= PFM_IA_MAX_CNT) { | |
15987 | + printk(KERN_INFO "perfmon: Limiting number of generic counters" | |
15988 | + " to %u, HW supports %u", | |
15989 | + PFM_IA_MAX_CNT, count); | |
15990 | + count = PFM_IA_MAX_CNT; | |
15991 | + } | |
15992 | + | |
15993 | + /* | |
15994 | + * adjust rsvd_msk for generic counters based on actual width | |
15995 | + * initialize enable_mask (1 per pmd) | |
15996 | + */ | |
15997 | + rsvd = ~((1ULL << width)-1); | |
15998 | + for (i = 0; i < count; i++) { | |
15999 | + pfm_intel_arch_pmd_desc[i].rsvd_msk = rsvd; | |
16000 | + set_enable_mask(i); | |
16001 | + } | |
16002 | + | |
16003 | + /* | |
16004 | + * handle version 3 new anythread bit (21) | |
16005 | + */ | |
16006 | + if (version == 3) { | |
16007 | + for (i = 0; i < count; i++) | |
16008 | + pfm_intel_arch_pmc_desc[i].rsvd_msk &= ~(1ULL << 21); | |
16009 | + } | |
16010 | + | |
16011 | + | |
16012 | + /* | |
16013 | + * mark unused generic counters as not available | |
16014 | + */ | |
16015 | + for (i = count ; i < PFM_IA_MAX_CNT; i++) { | |
16016 | + pfm_intel_arch_pmd_desc[i].type = PFM_REG_NA; | |
16017 | + pfm_intel_arch_pmc_desc[i].type = PFM_REG_NA; | |
16018 | + } | |
16019 | +} | |
16020 | + | |
16021 | +static void pfm_intel_arch_setup_fixed(unsigned int version, | |
16022 | + unsigned int width, | |
16023 | + unsigned int count) | |
16024 | +{ | |
16025 | + u64 rsvd, dfl; | |
16026 | + unsigned int i; | |
16027 | + | |
16028 | + /* | |
16029 | + * handle the fixed counters (if any): | |
16030 | + * | |
16031 | + * - ensure HW does not have more registers than hardcoded in the tables | |
16032 | + * - adjust rsvd_msk to actual counter width | |
16033 | + * - initialize enable_mask (list of PMC with start/stop capability) | |
16034 | + * - mark unused hardcoded generic counters as unimplemented | |
16035 | + */ | |
16036 | + if (count >= PFM_IA_MAX_FCNT) { | |
16037 | + printk(KERN_INFO "perfmon: Limiting number of fixed counters" | |
16038 | + " to %u, HW supports %u", | |
16039 | + PFM_IA_MAX_FCNT, count); | |
16040 | + count = PFM_IA_MAX_FCNT; | |
16041 | + } | |
16042 | + /* | |
16043 | + * adjust rsvd_msk for fixed counters based on actual width | |
16044 | + */ | |
16045 | + rsvd = ~((1ULL << width)-1); | |
16046 | + for (i = 0; i < count; i++) | |
16047 | + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].rsvd_msk = rsvd; | |
16048 | + | |
16049 | + /* | |
16050 | + * handle version new anythread bit (bit 2) | |
16051 | + */ | |
16052 | + if (version == 3) | |
16053 | + rsvd = 1ULL << 3; | |
16054 | + else | |
16055 | + rsvd = 3ULL << 2; | |
16056 | + | |
16057 | + pfm_intel_arch_pmc_desc[16].rsvd_msk = 0; | |
16058 | + for (i = 0; i < count; i++) | |
16059 | + pfm_intel_arch_pmc_desc[16].rsvd_msk |= rsvd << (i<<2); | |
16060 | + | |
16061 | + /* | |
16062 | + * mark unused fixed counters as unimplemented | |
16063 | + * | |
16064 | + * update the rsvd_msk, dfl_val in FIXED_CTRL: | |
16065 | + * - rsvd_msk: set all 4 bits | |
16066 | + * - dfl_val : clear all 4 bits | |
16067 | + */ | |
16068 | + dfl = pfm_intel_arch_pmc_desc[16].dfl_val; | |
16069 | + rsvd = pfm_intel_arch_pmc_desc[16].rsvd_msk; | |
16070 | + | |
16071 | + for (i = count ; i < PFM_IA_MAX_FCNT; i++) { | |
16072 | + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].type = PFM_REG_NA; | |
16073 | + rsvd |= 0xfULL << (i<<2); | |
16074 | + dfl &= ~(0xfULL << (i<<2)); | |
16075 | + } | |
16076 | + | |
16077 | + /* | |
16078 | + * FIXED_CTR_CTRL unavailable when no fixed counters are defined | |
16079 | + */ | |
16080 | + if (!count) { | |
16081 | + pfm_intel_arch_pmc_desc[16].type = PFM_REG_NA; | |
16082 | + } else { | |
16083 | + /* update rsvd_mask and dfl_val */ | |
16084 | + pfm_intel_arch_pmc_desc[16].rsvd_msk = rsvd; | |
16085 | + pfm_intel_arch_pmc_desc[16].dfl_val = dfl; | |
16086 | + set_enable_mask(16); | |
16087 | + } | |
16088 | +} | |
16089 | + | |
16090 | +static int pfm_intel_arch_probe_pmu(void) | |
16091 | +{ | |
16092 | + union { | |
16093 | + unsigned int val; | |
16094 | + struct pmu_eax eax; | |
16095 | + struct pmu_edx edx; | |
16096 | + } eax, edx; | |
16097 | + unsigned int ebx, ecx; | |
16098 | + unsigned int width = 0; | |
16099 | + | |
16100 | + edx.val = 0; | |
16101 | + | |
16102 | + if (!(cpu_has_arch_perfmon || force)) { | |
16103 | + PFM_INFO("no support for Intel architectural PMU"); | |
16104 | + return -1; | |
16105 | + } | |
16106 | + | |
16107 | + if (!cpu_has_apic) { | |
16108 | + PFM_INFO("no Local APIC, try rebooting with lapic option"); | |
16109 | + return -1; | |
16110 | + } | |
16111 | + | |
16112 | + /* cpuid() call protected by cpu_has_arch_perfmon */ | |
16113 | + cpuid(0xa, &eax.val, &ebx, &ecx, &edx.val); | |
16114 | + | |
16115 | + /* | |
16116 | + * reject processors supported by perfmon_intel_core | |
16117 | + * | |
16118 | + * We need to do this explicitely to avoid depending | |
16119 | + * on the link order in case, the modules are compiled as | |
16120 | + * builtin. | |
16121 | + * | |
16122 | + * non Intel processors are rejected by cpu_has_arch_perfmon | |
16123 | + */ | |
16124 | + if (current_cpu_data.x86 == 6 && !force) { | |
16125 | + switch (current_cpu_data.x86_model) { | |
16126 | + case 15: /* Merom: use perfmon_intel_core */ | |
16127 | + case 23: /* Penryn: use perfmon_intel_core */ | |
16128 | + return -1; | |
16129 | + default: | |
16130 | + break; | |
16131 | + } | |
16132 | + } | |
16133 | + | |
16134 | + /* | |
16135 | + * some 6/15 models have buggy BIOS | |
16136 | + */ | |
16137 | + if (eax.eax.version == 0 | |
16138 | + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { | |
16139 | + PFM_INFO("buggy v2 BIOS, adjusting for 2 generic counters"); | |
16140 | + eax.eax.version = 2; | |
16141 | + eax.eax.num_cnt = 2; | |
16142 | + eax.eax.cnt_width = 40; | |
16143 | + } | |
16144 | + | |
16145 | + /* | |
16146 | + * Intel Atom processors have a buggy firmware which does not report | |
16147 | + * the correct number of fixed counters | |
16148 | + */ | |
16149 | + if (eax.eax.version == 3 && edx.edx.num_cnt < 3 | |
16150 | + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 28) { | |
16151 | + PFM_INFO("buggy v3 BIOS, adjusting for 3 fixed counters"); | |
16152 | + edx.edx.num_cnt = 3; | |
16153 | + } | |
16154 | + | |
16155 | + /* | |
16156 | + * some v2 BIOSes are incomplete | |
16157 | + */ | |
16158 | + if (eax.eax.version == 2 && !edx.edx.num_cnt) { | |
16159 | + PFM_INFO("buggy v2 BIOS, adjusting for 3 fixed counters"); | |
16160 | + edx.edx.num_cnt = 3; | |
16161 | + edx.edx.cnt_width = 40; | |
16162 | + } | |
16163 | + | |
16164 | + /* | |
16165 | + * no fixed counters on earlier versions | |
16166 | + */ | |
16167 | + if (eax.eax.version < 2) { | |
16168 | + edx.val = 0; | |
16169 | + } else { | |
16170 | + /* | |
16171 | + * use the min value of both widths until we support | |
16172 | + * variable width counters | |
16173 | + */ | |
16174 | + width = eax.eax.cnt_width < edx.edx.cnt_width ? | |
16175 | + eax.eax.cnt_width : edx.edx.cnt_width; | |
16176 | + } | |
16177 | + | |
16178 | + PFM_INFO("detected architecural perfmon v%d", eax.eax.version); | |
16179 | + PFM_INFO("num_gen=%d width=%d num_fixed=%d width=%d", | |
16180 | + eax.eax.num_cnt, | |
16181 | + eax.eax.cnt_width, | |
16182 | + edx.edx.num_cnt, | |
16183 | + edx.edx.cnt_width); | |
16184 | + | |
16185 | + | |
16186 | + pfm_intel_arch_setup_generic(eax.eax.version, | |
16187 | + width, | |
16188 | + eax.eax.num_cnt); | |
16189 | + | |
16190 | + pfm_intel_arch_setup_fixed(eax.eax.version, | |
16191 | + width, | |
16192 | + edx.edx.num_cnt); | |
16193 | + | |
16194 | + if (force_nmi) | |
16195 | + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
16196 | + | |
16197 | + pfm_intel_arch_check_errata(); | |
16198 | + | |
16199 | + return 0; | |
16200 | +} | |
16201 | + | |
16202 | +/** | |
16203 | + * pfm_intel_arch_has_ovfls - check for pending overflow condition | |
16204 | + * @ctx: context to work on | |
16205 | + * | |
16206 | + * detect if counters have overflowed. | |
16207 | + * return: | |
16208 | + * 0 : no overflow | |
16209 | + * 1 : at least one overflow | |
16210 | + */ | |
16211 | +static int __kprobes pfm_intel_arch_has_ovfls(struct pfm_context *ctx) | |
16212 | +{ | |
16213 | + u64 *cnt_mask; | |
16214 | + u64 wmask, val; | |
16215 | + u16 i, num; | |
16216 | + | |
16217 | + cnt_mask = ctx->regs.cnt_pmds; | |
16218 | + num = ctx->regs.num_counters; | |
16219 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
16220 | + | |
16221 | + /* | |
16222 | + * we can leverage the fact that we know the mapping | |
16223 | + * to hardcode the MSR address and avoid accessing | |
16224 | + * more cachelines | |
16225 | + * | |
16226 | + * We need to check cnt_mask because not all registers | |
16227 | + * may be available. | |
16228 | + */ | |
16229 | + for (i = 0; num; i++) { | |
16230 | + if (test_bit(i, cast_ulp(cnt_mask))) { | |
16231 | + rdmsrl(pfm_intel_arch_pmd_desc[i].hw_addr, val); | |
16232 | + if (!(val & wmask)) | |
16233 | + return 1; | |
16234 | + num--; | |
16235 | + } | |
16236 | + } | |
16237 | + return 0; | |
16238 | +} | |
16239 | + | |
16240 | +static int pfm_intel_arch_stop_save(struct pfm_context *ctx, | |
16241 | + struct pfm_event_set *set) | |
16242 | +{ | |
16243 | + u64 used_mask[PFM_PMC_BV]; | |
16244 | + u64 *cnt_pmds; | |
16245 | + u64 val, wmask, ovfl_mask; | |
16246 | + u32 i, count; | |
16247 | + | |
16248 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
16249 | + | |
16250 | + bitmap_and(cast_ulp(used_mask), | |
16251 | + cast_ulp(set->used_pmcs), | |
16252 | + cast_ulp(enable_mask), | |
16253 | + max_enable); | |
16254 | + | |
16255 | + count = bitmap_weight(cast_ulp(used_mask), max_enable); | |
16256 | + | |
16257 | + /* | |
16258 | + * stop monitoring | |
16259 | + * Unfortunately, this is very expensive! | |
16260 | + * wrmsrl() is serializing. | |
16261 | + */ | |
16262 | + for (i = 0; count; i++) { | |
16263 | + if (test_bit(i, cast_ulp(used_mask))) { | |
16264 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); | |
16265 | + count--; | |
16266 | + } | |
16267 | + } | |
16268 | + | |
16269 | + /* | |
16270 | + * if we already having a pending overflow condition, we simply | |
16271 | + * return to take care of this first. | |
16272 | + */ | |
16273 | + if (set->npend_ovfls) | |
16274 | + return 1; | |
16275 | + | |
16276 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
16277 | + cnt_pmds = ctx->regs.cnt_pmds; | |
16278 | + | |
16279 | + /* | |
16280 | + * check for pending overflows and save PMDs (combo) | |
16281 | + * we employ used_pmds because we also need to save | |
16282 | + * and not just check for pending interrupts. | |
16283 | + * | |
16284 | + * Must check for counting PMDs because of virtual PMDs | |
16285 | + */ | |
16286 | + count = set->nused_pmds; | |
16287 | + for (i = 0; count; i++) { | |
16288 | + if (test_bit(i, cast_ulp(set->used_pmds))) { | |
16289 | + val = pfm_arch_read_pmd(ctx, i); | |
16290 | + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { | |
16291 | + if (!(val & wmask)) { | |
16292 | + __set_bit(i, cast_ulp(set->povfl_pmds)); | |
16293 | + set->npend_ovfls++; | |
16294 | + } | |
16295 | + val = (set->pmds[i].value & ~ovfl_mask) | |
16296 | + | (val & ovfl_mask); | |
16297 | + } | |
16298 | + set->pmds[i].value = val; | |
16299 | + count--; | |
16300 | + } | |
16301 | + } | |
16302 | + /* 0 means: no need to save PMDs at upper level */ | |
16303 | + return 0; | |
16304 | +} | |
16305 | + | |
16306 | +/** | |
16307 | + * pfm_intel_arch_quiesce - stop monitoring without grabbing any lock | |
16308 | + * | |
16309 | + * called from NMI interrupt handler to immediately stop monitoring | |
16310 | + * cannot grab any lock, including perfmon related locks | |
16311 | + */ | |
16312 | +static void __kprobes pfm_intel_arch_quiesce(void) | |
16313 | +{ | |
16314 | + u16 i; | |
16315 | + | |
16316 | + /* | |
16317 | + * PMC16 is the fixed control control register so it has a | |
16318 | + * distinct MSR address | |
16319 | + * | |
16320 | + * We do not use the hw_addr field in the table to avoid touching | |
16321 | + * too many cachelines | |
16322 | + */ | |
16323 | + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) { | |
16324 | + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) { | |
16325 | + if (i == 16) | |
16326 | + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); | |
16327 | + else | |
16328 | + wrmsrl(MSR_P6_EVNTSEL0+i, 0); | |
16329 | + } | |
16330 | + } | |
16331 | +} | |
16332 | + | |
16333 | +/** | |
16334 | + * pfm_intel_arch_restore_pmcs - reload PMC registers | |
16335 | + * @ctx: context to restore from | |
16336 | + * @set: current event set | |
16337 | + * | |
16338 | + * optimized version of pfm_arch_restore_pmcs(). On architectural perfmon, | |
16339 | + * we can afford to only restore the pmcs registers we use, because they | |
16340 | + * are all independent from each other. | |
16341 | + */ | |
16342 | +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx, | |
16343 | + struct pfm_event_set *set) | |
16344 | +{ | |
16345 | + u64 *mask; | |
16346 | + u16 i, num; | |
16347 | + | |
16348 | + mask = set->used_pmcs; | |
16349 | + num = set->nused_pmcs; | |
16350 | + for (i = 0; num; i++) { | |
16351 | + if (test_bit(i, cast_ulp(mask))) { | |
16352 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]); | |
16353 | + num--; | |
16354 | + } | |
16355 | + } | |
16356 | +} | |
16357 | +/* | |
16358 | + * Counters may have model-specific width. Yet the documentation says | |
16359 | + * that only the lower 32 bits can be written to due to the specification | |
16360 | + * of wrmsr. bits [32-(w-1)] are sign extensions of bit 31. Bits [w-63] must | |
16361 | + * not be set (see rsvd_msk for PMDs). As such the effective width of a | |
16362 | + * counter is 31 bits only regardless of what CPUID.0xa returns. | |
16363 | + * | |
16364 | + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18 | |
16365 | + */ | |
16366 | +static struct pfm_pmu_config pfm_intel_arch_pmu_conf = { | |
16367 | + .pmu_name = "Intel architectural", | |
16368 | + .pmd_desc = pfm_intel_arch_pmd_desc, | |
16369 | + .counter_width = 31, | |
16370 | + .num_pmc_entries = PFM_IA_MAX_PMCS, | |
16371 | + .num_pmd_entries = PFM_IA_MAX_PMDS, | |
16372 | + .pmc_desc = pfm_intel_arch_pmc_desc, | |
16373 | + .probe_pmu = pfm_intel_arch_probe_pmu, | |
16374 | + .version = "1.0", | |
16375 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
16376 | + .owner = THIS_MODULE, | |
16377 | + .pmu_info = &pfm_intel_arch_pmu_info | |
16378 | +}; | |
16379 | + | |
16380 | +static int __init pfm_intel_arch_pmu_init_module(void) | |
16381 | +{ | |
16382 | + return pfm_pmu_register(&pfm_intel_arch_pmu_conf); | |
16383 | +} | |
16384 | + | |
16385 | +static void __exit pfm_intel_arch_pmu_cleanup_module(void) | |
16386 | +{ | |
16387 | + pfm_pmu_unregister(&pfm_intel_arch_pmu_conf); | |
16388 | +} | |
16389 | + | |
16390 | +module_init(pfm_intel_arch_pmu_init_module); | |
16391 | +module_exit(pfm_intel_arch_pmu_cleanup_module); | |
16392 | --- /dev/null | |
16393 | +++ b/arch/x86/perfmon/perfmon_intel_atom.c | |
16394 | @@ -0,0 +1,541 @@ | |
16395 | +/* | |
16396 | + * perfmon support for Intel Atom (architectural perfmon v3 + PEBS) | |
16397 | + * | |
16398 | + * Copyright (c) 2008 Google,Inc | |
16399 | + * Contributed by Stephane Eranian <eranian@gmail.com> | |
16400 | + * | |
16401 | + * This program is free software; you can redistribute it and/or | |
16402 | + * modify it under the terms of version 2 of the GNU General Public | |
16403 | + * License as published by the Free Software Foundation. | |
16404 | + * | |
16405 | + * This program is distributed in the hope that it will be useful, | |
16406 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16407 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16408 | + * General Public License for more details. | |
16409 | + * | |
16410 | + * You should have received a copy of the GNU General Public License | |
16411 | + * along with this program; if not, write to the Free Software | |
16412 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
16413 | + * 02111-1307 USA | |
16414 | + */ | |
16415 | +#include <linux/module.h> | |
16416 | +#include <linux/kprobes.h> | |
16417 | +#include <linux/perfmon_kern.h> | |
16418 | +#include <asm/msr.h> | |
16419 | + | |
16420 | +MODULE_AUTHOR("Stephane Eranian <eranian@gmail.com>"); | |
16421 | +MODULE_DESCRIPTION("Intel Atom"); | |
16422 | +MODULE_LICENSE("GPL"); | |
16423 | + | |
16424 | +static int force, force_nmi; | |
16425 | +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); | |
16426 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
16427 | +module_param(force, bool, 0600); | |
16428 | +module_param(force_nmi, bool, 0600); | |
16429 | + | |
16430 | +/* | |
16431 | + * - upper 32 bits are reserved | |
16432 | + * - INT: APIC enable bit is reserved (forced to 1) | |
16433 | + * | |
16434 | + * RSVD: reserved bits are 1 | |
16435 | + */ | |
16436 | +#define PFM_ATOM_PMC_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20)) | |
16437 | + | |
16438 | +/* | |
16439 | + * force Local APIC interrupt on overflow | |
16440 | + * disable with NO_EMUL64 | |
16441 | + */ | |
16442 | +#define PFM_ATOM_PMC_VAL (1ULL<<20) | |
16443 | +#define PFM_ATOM_NO64 (1ULL<<20) | |
16444 | + | |
16445 | +/* | |
16446 | + * Atom counters are 40-bits. 40-bits can be read but ony 31 can be written | |
16447 | + * to due to a limitation of wrmsr. Bits [[63-32] are sign extensions of bit 31. | |
16448 | + * Bits [63-40] must not be set | |
16449 | + * | |
16450 | + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18 | |
16451 | + */ | |
16452 | +#define PFM_ATOM_PMD_WIDTH 31 | |
16453 | +#define PFM_ATOM_PMD_RSVD ~((1ULL << 40)-1) | |
16454 | + | |
16455 | +static void pfm_intel_atom_acquire_pmu_percpu(void); | |
16456 | +static void pfm_intel_atom_release_pmu_percpu(void); | |
16457 | +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx, | |
16458 | + struct pfm_event_set *set); | |
16459 | +static int pfm_intel_atom_stop_save(struct pfm_context *ctx, | |
16460 | + struct pfm_event_set *set); | |
16461 | +static int pfm_intel_atom_has_ovfls(struct pfm_context *ctx); | |
16462 | +static void __kprobes pfm_intel_atom_quiesce(void); | |
16463 | + | |
16464 | +struct pfm_arch_pmu_info pfm_intel_atom_pmu_info = { | |
16465 | + .stop_save = pfm_intel_atom_stop_save, | |
16466 | + .has_ovfls = pfm_intel_atom_has_ovfls, | |
16467 | + .quiesce = pfm_intel_atom_quiesce, | |
16468 | + .restore_pmcs = pfm_intel_atom_restore_pmcs, | |
16469 | + .acquire_pmu_percpu = pfm_intel_atom_acquire_pmu_percpu, | |
16470 | + .release_pmu_percpu = pfm_intel_atom_release_pmu_percpu | |
16471 | + | |
16472 | +}; | |
16473 | + | |
16474 | +#define PFM_ATOM_C(n) { \ | |
16475 | + .type = PFM_REG_I64, \ | |
16476 | + .desc = "PERFEVTSEL"#n, \ | |
16477 | + .dfl_val = PFM_ATOM_PMC_VAL, \ | |
16478 | + .rsvd_msk = PFM_ATOM_PMC_RSVD, \ | |
16479 | + .no_emul64_msk = PFM_ATOM_NO64, \ | |
16480 | + .hw_addr = MSR_P6_EVNTSEL0 + (n) \ | |
16481 | + } | |
16482 | + | |
16483 | + | |
16484 | +static struct pfm_regmap_desc pfm_intel_atom_pmc_desc[] = { | |
16485 | +/* pmc0 */ PFM_ATOM_C(0), | |
16486 | +/* pmc1 */ PFM_ATOM_C(1), | |
16487 | +/* pmc2 */ PMX_NA, PMX_NA, | |
16488 | +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16489 | +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16490 | +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16491 | +/* pmc16 */ { .type = PFM_REG_I, | |
16492 | + .desc = "FIXED_CTRL", | |
16493 | + .dfl_val = 0x0000000000000888ULL, /* force PMI */ | |
16494 | + .rsvd_msk = 0xfffffffffffffcccULL, /* 3 fixed counters defined */ | |
16495 | + .no_emul64_msk = 0, | |
16496 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL | |
16497 | + }, | |
16498 | +/* pmc17 */{ .type = PFM_REG_W, | |
16499 | + .desc = "PEBS_ENABLE", | |
16500 | + .dfl_val = 0, | |
16501 | + .rsvd_msk = 0xfffffffffffffffeULL, | |
16502 | + .no_emul64_msk = 0, | |
16503 | + .hw_addr = MSR_IA32_PEBS_ENABLE | |
16504 | + } | |
16505 | +}; | |
16506 | +#define PFM_ATOM_MAX_PMCS ARRAY_SIZE(pfm_intel_atom_pmc_desc) | |
16507 | + | |
16508 | +#define PFM_ATOM_D(n) \ | |
16509 | + { .type = PFM_REG_C, \ | |
16510 | + .desc = "PMC"#n, \ | |
16511 | + .rsvd_msk = PFM_ATOM_PMD_RSVD, \ | |
16512 | + .hw_addr = MSR_P6_PERFCTR0+n, \ | |
16513 | + .dep_pmcs[0] = 1ULL << n \ | |
16514 | + } | |
16515 | + | |
16516 | +#define PFM_ATOM_FD(n) \ | |
16517 | + { .type = PFM_REG_C, \ | |
16518 | + .desc = "FIXED_CTR"#n, \ | |
16519 | + .rsvd_msk = PFM_ATOM_PMD_RSVD, \ | |
16520 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ | |
16521 | + .dep_pmcs[0] = 1ULL << 16 \ | |
16522 | + } | |
16523 | + | |
16524 | +static struct pfm_regmap_desc pfm_intel_atom_pmd_desc[] = { | |
16525 | +/* pmd0 */ PFM_ATOM_D(0), | |
16526 | +/* pmd1 */ PFM_ATOM_D(1), | |
16527 | +/* pmd2 */ PMX_NA, | |
16528 | +/* pmd3 */ PMX_NA, | |
16529 | +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16530 | +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16531 | +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
16532 | +/* pmd16 */ PFM_ATOM_FD(0), | |
16533 | +/* pmd17 */ PFM_ATOM_FD(1), | |
16534 | +/* pmd18 */ PFM_ATOM_FD(2) | |
16535 | +}; | |
16536 | +#define PFM_ATOM_MAX_PMDS ARRAY_SIZE(pfm_intel_atom_pmd_desc) | |
16537 | + | |
16538 | +static struct pfm_pmu_config pfm_intel_atom_pmu_conf; | |
16539 | + | |
16540 | +static int pfm_intel_atom_probe_pmu(void) | |
16541 | +{ | |
16542 | + if (force) | |
16543 | + goto doit; | |
16544 | + | |
16545 | + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) | |
16546 | + return -1; | |
16547 | + | |
16548 | + if (current_cpu_data.x86 != 6) | |
16549 | + return -1; | |
16550 | + | |
16551 | + if (current_cpu_data.x86_model != 28) | |
16552 | + return -1; | |
16553 | +doit: | |
16554 | + /* | |
16555 | + * having APIC is mandatory, so disregard force option | |
16556 | + */ | |
16557 | + if (!cpu_has_apic) { | |
16558 | + PFM_INFO("no Local APIC, try rebooting with lapic option"); | |
16559 | + return -1; | |
16560 | + } | |
16561 | + | |
16562 | + PFM_INFO("detected Intel Atom PMU"); | |
16563 | + | |
16564 | + if (force_nmi) | |
16565 | + pfm_intel_atom_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
16566 | + | |
16567 | + return 0; | |
16568 | +} | |
16569 | + | |
16570 | +/** | |
16571 | + * pfm_intel_atom_has_ovfls - check for pending overflow condition | |
16572 | + * @ctx: context to work on | |
16573 | + * | |
16574 | + * detect if counters have overflowed. | |
16575 | + * return: | |
16576 | + * 0 : no overflow | |
16577 | + * 1 : at least one overflow | |
16578 | + */ | |
16579 | +static int __kprobes pfm_intel_atom_has_ovfls(struct pfm_context *ctx) | |
16580 | +{ | |
16581 | + struct pfm_regmap_desc *d; | |
16582 | + u64 ovf; | |
16583 | + | |
16584 | + d = pfm_pmu_conf->pmd_desc; | |
16585 | + /* | |
16586 | + * read global overflow status register | |
16587 | + * if sharing PMU, then not all bit are ours so must | |
16588 | + * check only the ones we actually use | |
16589 | + */ | |
16590 | + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf); | |
16591 | + | |
16592 | + /* | |
16593 | + * for pmd0, we also check PEBS overflow on bit 62 | |
16594 | + */ | |
16595 | + if ((d[0].type & PFM_REG_I) && (ovf & ((1ull << 62) | 1ull))) | |
16596 | + return 1; | |
16597 | + | |
16598 | + if ((d[1].type & PFM_REG_I) && (ovf & 2ull)) | |
16599 | + return 1; | |
16600 | + | |
16601 | + if ((d[16].type & PFM_REG_I) && (ovf & (1ull << 32))) | |
16602 | + return 1; | |
16603 | + | |
16604 | + if ((d[17].type & PFM_REG_I) && (ovf & (2ull << 32))) | |
16605 | + return 1; | |
16606 | + | |
16607 | + if ((d[18].type & PFM_REG_I) && (ovf & (4ull << 32))) | |
16608 | + return 1; | |
16609 | + | |
16610 | + return 0; | |
16611 | +} | |
16612 | + | |
16613 | +/** | |
16614 | + * pfm_intel_atom_stop_save - stop monitoring, collect pending overflow, save pmds | |
16615 | + * @ctx: context to work on | |
16616 | + * @set: active set | |
16617 | + * | |
16618 | + * return: | |
16619 | + * 1: caller needs to save pmds | |
16620 | + * 0: caller does not need to save pmds, they have been saved by this call | |
16621 | + */ | |
16622 | +static int pfm_intel_atom_stop_save(struct pfm_context *ctx, | |
16623 | + struct pfm_event_set *set) | |
16624 | +{ | |
16625 | +#define PFM_ATOM_WMASK (1ULL << 31) | |
16626 | +#define PFM_ATOM_OMASK ((1ULL << 31)-1) | |
16627 | + u64 clear_ovf = 0; | |
16628 | + u64 ovf, ovf2, val; | |
16629 | + | |
16630 | + /* | |
16631 | + * read global overflow status register | |
16632 | + * if sharing PMU, then not all bit are ours so must | |
16633 | + * check only the ones we actually use. | |
16634 | + * | |
16635 | + * XXX: Atom seems to have a bug with the stickyness of | |
16636 | + * GLOBAL_STATUS. If we read GLOBAL_STATUS after we | |
16637 | + * clear the generic counters, then their bits in | |
16638 | + * GLOBAL_STATUS are cleared. This should not be the | |
16639 | + * case accoding to architected PMU. To workaround | |
16640 | + * the problem, we read GLOBAL_STATUS BEFORE we stop | |
16641 | + * all monitoring. | |
16642 | + */ | |
16643 | + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf); | |
16644 | + | |
16645 | + /* | |
16646 | + * stop monitoring | |
16647 | + */ | |
16648 | + if (test_bit(0, cast_ulp(set->used_pmcs))) | |
16649 | + wrmsrl(MSR_P6_EVNTSEL0, 0); | |
16650 | + | |
16651 | + if (test_bit(1, cast_ulp(set->used_pmcs))) | |
16652 | + wrmsrl(MSR_P6_EVNTSEL1, 0); | |
16653 | + | |
16654 | + if (test_bit(16, cast_ulp(set->used_pmcs))) | |
16655 | + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); | |
16656 | + | |
16657 | + if (test_bit(17, cast_ulp(set->used_pmcs))) | |
16658 | + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | |
16659 | + | |
16660 | + /* | |
16661 | + * XXX: related to bug mentioned above | |
16662 | + * | |
16663 | + * read GLOBAL_STATUS again to avoid race condition | |
16664 | + * with overflows happening after first read and | |
16665 | + * before stop. That avoids missing overflows on | |
16666 | + * the fixed counters and PEBS | |
16667 | + */ | |
16668 | + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf2); | |
16669 | + ovf |= ovf2; | |
16670 | + | |
16671 | + /* | |
16672 | + * if we already have a pending overflow condition, we simply | |
16673 | + * return to take care of it first. | |
16674 | + */ | |
16675 | + if (set->npend_ovfls) | |
16676 | + return 1; | |
16677 | + | |
16678 | + /* | |
16679 | + * check PMD 0,1,16,17,18 for overflow and save their value | |
16680 | + */ | |
16681 | + if (test_bit(0, cast_ulp(set->used_pmds))) { | |
16682 | + rdmsrl(MSR_P6_PERFCTR0, val); | |
16683 | + if (ovf & ((1ull<<62)|1ull)) { | |
16684 | + __set_bit(0, cast_ulp(set->povfl_pmds)); | |
16685 | + set->npend_ovfls++; | |
16686 | + clear_ovf = (1ull << 62) | 1ull; | |
16687 | + } | |
16688 | + val = (set->pmds[0].value & ~PFM_ATOM_OMASK) | |
16689 | + | (val & PFM_ATOM_OMASK); | |
16690 | + set->pmds[0].value = val; | |
16691 | + } | |
16692 | + | |
16693 | + if (test_bit(1, cast_ulp(set->used_pmds))) { | |
16694 | + rdmsrl(MSR_P6_PERFCTR1, val); | |
16695 | + if (ovf & 2ull) { | |
16696 | + __set_bit(1, cast_ulp(set->povfl_pmds)); | |
16697 | + set->npend_ovfls++; | |
16698 | + clear_ovf |= 2ull; | |
16699 | + } | |
16700 | + val = (set->pmds[1].value & ~PFM_ATOM_OMASK) | |
16701 | + | (val & PFM_ATOM_OMASK); | |
16702 | + set->pmds[1].value = val; | |
16703 | + } | |
16704 | + | |
16705 | + if (test_bit(16, cast_ulp(set->used_pmds))) { | |
16706 | + rdmsrl(MSR_CORE_PERF_FIXED_CTR0, val); | |
16707 | + if (ovf & (1ull << 32)) { | |
16708 | + __set_bit(16, cast_ulp(set->povfl_pmds)); | |
16709 | + set->npend_ovfls++; | |
16710 | + clear_ovf |= 1ull << 32; | |
16711 | + } | |
16712 | + val = (set->pmds[16].value & ~PFM_ATOM_OMASK) | |
16713 | + | (val & PFM_ATOM_OMASK); | |
16714 | + set->pmds[16].value = val; | |
16715 | + } | |
16716 | + | |
16717 | + if (test_bit(17, cast_ulp(set->used_pmds))) { | |
16718 | + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+1, val); | |
16719 | + if (ovf & (2ull << 32)) { | |
16720 | + __set_bit(17, cast_ulp(set->povfl_pmds)); | |
16721 | + set->npend_ovfls++; | |
16722 | + clear_ovf |= 2ull << 32; | |
16723 | + } | |
16724 | + val = (set->pmds[17].value & ~PFM_ATOM_OMASK) | |
16725 | + | (val & PFM_ATOM_OMASK); | |
16726 | + set->pmds[17].value = val; | |
16727 | + } | |
16728 | + | |
16729 | + if (test_bit(18, cast_ulp(set->used_pmds))) { | |
16730 | + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+2, val); | |
16731 | + if (ovf & (4ull << 32)) { | |
16732 | + __set_bit(18, cast_ulp(set->povfl_pmds)); | |
16733 | + set->npend_ovfls++; | |
16734 | + clear_ovf |= 4ull << 32; | |
16735 | + } | |
16736 | + val = (set->pmds[18].value & ~PFM_ATOM_OMASK) | |
16737 | + | (val & PFM_ATOM_OMASK); | |
16738 | + set->pmds[18].value = val; | |
16739 | + } | |
16740 | + | |
16741 | + if (clear_ovf) | |
16742 | + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf); | |
16743 | + | |
16744 | + /* 0 means: no need to save PMDs at upper level */ | |
16745 | + return 0; | |
16746 | +} | |
16747 | + | |
16748 | +/** | |
16749 | + * pfm_intel_atom_quiesce - stop monitoring without grabbing any lock | |
16750 | + * | |
16751 | + * called from NMI interrupt handler to immediately stop monitoring | |
16752 | + * cannot grab any lock, including perfmon related locks | |
16753 | + */ | |
16754 | +static void __kprobes pfm_intel_atom_quiesce(void) | |
16755 | +{ | |
16756 | + /* | |
16757 | + * quiesce PMU by clearing available registers that have | |
16758 | + * the start/stop capability | |
16759 | + */ | |
16760 | + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
16761 | + wrmsrl(MSR_P6_EVNTSEL0, 0); | |
16762 | + | |
16763 | + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
16764 | + wrmsrl(MSR_P6_EVNTSEL1, 0); | |
16765 | + | |
16766 | + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
16767 | + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); | |
16768 | + | |
16769 | + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
16770 | + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | |
16771 | +} | |
16772 | + | |
16773 | +/** | |
16774 | + * pfm_intel_atom_restore_pmcs - reload PMC registers | |
16775 | + * @ctx: context to restore from | |
16776 | + * @set: current event set | |
16777 | + * | |
16778 | + * restores pmcs and also PEBS Data Save area pointer | |
16779 | + */ | |
16780 | +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx, | |
16781 | + struct pfm_event_set *set) | |
16782 | +{ | |
16783 | + struct pfm_arch_context *ctx_arch; | |
16784 | + u64 clear_ovf = 0; | |
16785 | + | |
16786 | + ctx_arch = pfm_ctx_arch(ctx); | |
16787 | + /* | |
16788 | + * must restore DS pointer before restoring PMCs | |
16789 | + * as this can potentially reactivate monitoring | |
16790 | + */ | |
16791 | + if (ctx_arch->flags.use_ds) | |
16792 | + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); | |
16793 | + | |
16794 | + if (test_bit(0, cast_ulp(set->used_pmcs))) { | |
16795 | + wrmsrl(MSR_P6_EVNTSEL0, set->pmcs[0]); | |
16796 | + clear_ovf = 1ull; | |
16797 | + } | |
16798 | + | |
16799 | + if (test_bit(1, cast_ulp(set->used_pmcs))) { | |
16800 | + wrmsrl(MSR_P6_EVNTSEL1, set->pmcs[1]); | |
16801 | + clear_ovf |= 2ull; | |
16802 | + } | |
16803 | + | |
16804 | + if (test_bit(16, cast_ulp(set->used_pmcs))) { | |
16805 | + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, set->pmcs[16]); | |
16806 | + clear_ovf |= 7ull << 32; | |
16807 | + } | |
16808 | + | |
16809 | + if (test_bit(17, cast_ulp(set->used_pmcs))) { | |
16810 | + wrmsrl(MSR_IA32_PEBS_ENABLE, set->pmcs[17]); | |
16811 | + clear_ovf |= 1ull << 62; | |
16812 | + } | |
16813 | + | |
16814 | + if (clear_ovf) | |
16815 | + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf); | |
16816 | +} | |
16817 | + | |
16818 | +static int pfm_intel_atom_pmc17_check(struct pfm_context *ctx, | |
16819 | + struct pfm_event_set *set, | |
16820 | + struct pfarg_pmc *req) | |
16821 | +{ | |
16822 | + struct pfm_arch_context *ctx_arch; | |
16823 | + ctx_arch = pfm_ctx_arch(ctx); | |
16824 | + | |
16825 | + /* | |
16826 | + * if user activates PEBS_ENABLE, then we need to have a valid | |
16827 | + * DS Area setup. This only happens when the PEBS sampling format is | |
16828 | + * used in which case PFM_X86_USE_PEBS is set. We must reject all other | |
16829 | + * requests. | |
16830 | + * | |
16831 | + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears | |
16832 | + * that a value of 0 for this MSR does crash the system with | |
16833 | + * PEBS_ENABLE=1. | |
16834 | + */ | |
16835 | + if (!ctx_arch->flags.use_pebs && req->reg_value) { | |
16836 | + PFM_DBG("pmc17 useable only with a PEBS sampling format"); | |
16837 | + return -EINVAL; | |
16838 | + } | |
16839 | + return 0; | |
16840 | +} | |
16841 | + | |
16842 | +DEFINE_PER_CPU(u64, saved_global_ctrl); | |
16843 | + | |
16844 | +/** | |
16845 | + * pfm_intel_atom_acquire_pmu_percpu - acquire PMU resource per CPU | |
16846 | + * | |
16847 | + * For Atom, it is necessary to enable all available | |
16848 | + * registers. The firmware rightfully has the fixed counters | |
16849 | + * disabled for backward compatibility with architectural perfmon | |
16850 | + * v1 | |
16851 | + * | |
16852 | + * This function is invoked on each online CPU | |
16853 | + */ | |
16854 | +static void pfm_intel_atom_acquire_pmu_percpu(void) | |
16855 | +{ | |
16856 | + struct pfm_regmap_desc *d; | |
16857 | + u64 mask = 0; | |
16858 | + unsigned int i; | |
16859 | + | |
16860 | + /* | |
16861 | + * build bitmask of registers that are available to | |
16862 | + * us. In some cases, there may be fewer registers than | |
16863 | + * what Atom supports due to sharing with other kernel | |
16864 | + * subsystems, such as NMI | |
16865 | + */ | |
16866 | + d = pfm_pmu_conf->pmd_desc; | |
16867 | + for (i=0; i < 16; i++) { | |
16868 | + if ((d[i].type & PFM_REG_I) == 0) | |
16869 | + continue; | |
16870 | + mask |= 1ull << i; | |
16871 | + } | |
16872 | + for (i=16; i < PFM_ATOM_MAX_PMDS; i++) { | |
16873 | + if ((d[i].type & PFM_REG_I) == 0) | |
16874 | + continue; | |
16875 | + mask |= 1ull << (32+i-16); | |
16876 | + } | |
16877 | + | |
16878 | + /* | |
16879 | + * keep a local copy of the current MSR_CORE_PERF_GLOBAL_CTRL | |
16880 | + */ | |
16881 | + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl)); | |
16882 | + | |
16883 | + PFM_DBG("global=0x%llx set to 0x%llx", | |
16884 | + __get_cpu_var(saved_global_ctrl), | |
16885 | + mask); | |
16886 | + | |
16887 | + /* | |
16888 | + * enable all registers | |
16889 | + * | |
16890 | + * No need to quiesce PMU. If there is a overflow, it will be | |
16891 | + * treated as spurious by the handler | |
16892 | + */ | |
16893 | + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, mask); | |
16894 | +} | |
16895 | + | |
16896 | +/** | |
16897 | + * pfm_intel_atom_release_pmu_percpu - release PMU resource per CPU | |
16898 | + * | |
16899 | + * For Atom, we restore MSR_CORE_PERF_GLOBAL_CTRL to its orginal value | |
16900 | + */ | |
16901 | +static void pfm_intel_atom_release_pmu_percpu(void) | |
16902 | +{ | |
16903 | + PFM_DBG("global_ctrl restored to 0x%llx\n", | |
16904 | + __get_cpu_var(saved_global_ctrl)); | |
16905 | + | |
16906 | + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl)); | |
16907 | +} | |
16908 | + | |
16909 | +static struct pfm_pmu_config pfm_intel_atom_pmu_conf = { | |
16910 | + .pmu_name = "Intel Atom", | |
16911 | + .pmd_desc = pfm_intel_atom_pmd_desc, | |
16912 | + .counter_width = PFM_ATOM_PMD_WIDTH, | |
16913 | + .num_pmc_entries = PFM_ATOM_MAX_PMCS, | |
16914 | + .num_pmd_entries = PFM_ATOM_MAX_PMDS, | |
16915 | + .pmc_desc = pfm_intel_atom_pmc_desc, | |
16916 | + .probe_pmu = pfm_intel_atom_probe_pmu, | |
16917 | + .version = "1.0", | |
16918 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
16919 | + .owner = THIS_MODULE, | |
16920 | + .pmc_write_check = pfm_intel_atom_pmc17_check, | |
16921 | + .pmu_info = &pfm_intel_atom_pmu_info | |
16922 | +}; | |
16923 | + | |
16924 | +static int __init pfm_intel_atom_pmu_init_module(void) | |
16925 | +{ | |
16926 | + return pfm_pmu_register(&pfm_intel_atom_pmu_conf); | |
16927 | +} | |
16928 | + | |
16929 | +static void __exit pfm_intel_atom_pmu_cleanup_module(void) | |
16930 | +{ | |
16931 | + pfm_pmu_unregister(&pfm_intel_atom_pmu_conf); | |
16932 | +} | |
16933 | + | |
16934 | +module_init(pfm_intel_atom_pmu_init_module); | |
16935 | +module_exit(pfm_intel_atom_pmu_cleanup_module); | |
16936 | --- /dev/null | |
16937 | +++ b/arch/x86/perfmon/perfmon_intel_core.c | |
16938 | @@ -0,0 +1,449 @@ | |
16939 | +/* | |
16940 | + * This file contains the Intel Core PMU registers description tables. | |
16941 | + * Intel Core-based processors support architectural perfmon v2 + PEBS | |
16942 | + * | |
16943 | + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. | |
16944 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
16945 | + */ | |
16946 | +#include <linux/module.h> | |
16947 | +#include <linux/kprobes.h> | |
16948 | +#include <linux/perfmon_kern.h> | |
16949 | +#include <linux/nmi.h> | |
16950 | + | |
16951 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
16952 | +MODULE_DESCRIPTION("Intel Core"); | |
16953 | +MODULE_LICENSE("GPL"); | |
16954 | + | |
16955 | +static int force_nmi; | |
16956 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
16957 | +module_param(force_nmi, bool, 0600); | |
16958 | + | |
16959 | +/* | |
16960 | + * - upper 32 bits are reserved | |
16961 | + * - INT: APIC enable bit is reserved (forced to 1) | |
16962 | + * - bit 21 is reserved | |
16963 | + * | |
16964 | + * RSVD: reserved bits must be 1 | |
16965 | + */ | |
16966 | +#define PFM_CORE_PMC_RSVD ((~((1ULL<<32)-1)) \ | |
16967 | + | (1ULL<<20) \ | |
16968 | + | (1ULL<<21)) | |
16969 | + | |
16970 | +/* | |
16971 | + * Core counters are 40-bits | |
16972 | + */ | |
16973 | +#define PFM_CORE_CTR_RSVD (~((1ULL<<40)-1)) | |
16974 | + | |
16975 | +/* | |
16976 | + * force Local APIC interrupt on overflow | |
16977 | + * disable with NO_EMUL64 | |
16978 | + */ | |
16979 | +#define PFM_CORE_PMC_VAL (1ULL<<20) | |
16980 | +#define PFM_CORE_NO64 (1ULL<<20) | |
16981 | + | |
16982 | +#define PFM_CORE_NA { .reg_type = PFM_REGT_NA} | |
16983 | + | |
16984 | +#define PFM_CORE_CA(m, c, t) \ | |
16985 | + { \ | |
16986 | + .addrs[0] = m, \ | |
16987 | + .ctr = c, \ | |
16988 | + .reg_type = t \ | |
16989 | + } | |
16990 | + | |
16991 | +struct pfm_ds_area_intel_core { | |
16992 | + u64 bts_buf_base; | |
16993 | + u64 bts_index; | |
16994 | + u64 bts_abs_max; | |
16995 | + u64 bts_intr_thres; | |
16996 | + u64 pebs_buf_base; | |
16997 | + u64 pebs_index; | |
16998 | + u64 pebs_abs_max; | |
16999 | + u64 pebs_intr_thres; | |
17000 | + u64 pebs_cnt_reset; | |
17001 | +}; | |
17002 | + | |
17003 | +static void pfm_core_restore_pmcs(struct pfm_context *ctx, | |
17004 | + struct pfm_event_set *set); | |
17005 | +static int pfm_core_has_ovfls(struct pfm_context *ctx); | |
17006 | +static int pfm_core_stop_save(struct pfm_context *ctx, | |
17007 | + struct pfm_event_set *set); | |
17008 | +static void __kprobes pfm_core_quiesce(void); | |
17009 | + | |
17010 | +static u64 enable_mask[PFM_MAX_PMCS]; | |
17011 | +static u16 max_enable; | |
17012 | + | |
17013 | +struct pfm_arch_pmu_info pfm_core_pmu_info = { | |
17014 | + .stop_save = pfm_core_stop_save, | |
17015 | + .has_ovfls = pfm_core_has_ovfls, | |
17016 | + .quiesce = pfm_core_quiesce, | |
17017 | + .restore_pmcs = pfm_core_restore_pmcs | |
17018 | +}; | |
17019 | + | |
17020 | +static struct pfm_regmap_desc pfm_core_pmc_desc[] = { | |
17021 | +/* pmc0 */ { | |
17022 | + .type = PFM_REG_I64, | |
17023 | + .desc = "PERFEVTSEL0", | |
17024 | + .dfl_val = PFM_CORE_PMC_VAL, | |
17025 | + .rsvd_msk = PFM_CORE_PMC_RSVD, | |
17026 | + .no_emul64_msk = PFM_CORE_NO64, | |
17027 | + .hw_addr = MSR_P6_EVNTSEL0 | |
17028 | + }, | |
17029 | +/* pmc1 */ { | |
17030 | + .type = PFM_REG_I64, | |
17031 | + .desc = "PERFEVTSEL1", | |
17032 | + .dfl_val = PFM_CORE_PMC_VAL, | |
17033 | + .rsvd_msk = PFM_CORE_PMC_RSVD, | |
17034 | + .no_emul64_msk = PFM_CORE_NO64, | |
17035 | + .hw_addr = MSR_P6_EVNTSEL1 | |
17036 | + }, | |
17037 | +/* pmc2 */ PMX_NA, PMX_NA, | |
17038 | +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17039 | +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17040 | +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17041 | +/* pmc16 */ { .type = PFM_REG_I, | |
17042 | + .desc = "FIXED_CTRL", | |
17043 | + .dfl_val = 0x888ULL, | |
17044 | + .rsvd_msk = 0xfffffffffffffcccULL, | |
17045 | + .no_emul64_msk = 0, | |
17046 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL | |
17047 | + }, | |
17048 | +/* pmc17 */ { .type = PFM_REG_W, | |
17049 | + .desc = "PEBS_ENABLE", | |
17050 | + .dfl_val = 0, | |
17051 | + .rsvd_msk = 0xfffffffffffffffeULL, | |
17052 | + .no_emul64_msk = 0, | |
17053 | + .hw_addr = MSR_IA32_PEBS_ENABLE | |
17054 | + } | |
17055 | +}; | |
17056 | + | |
17057 | +#define PFM_CORE_D(n) \ | |
17058 | + { .type = PFM_REG_C, \ | |
17059 | + .desc = "PMC"#n, \ | |
17060 | + .rsvd_msk = PFM_CORE_CTR_RSVD, \ | |
17061 | + .hw_addr = MSR_P6_PERFCTR0+n, \ | |
17062 | + .dep_pmcs[0] = 1ULL << n \ | |
17063 | + } | |
17064 | + | |
17065 | +#define PFM_CORE_FD(n) \ | |
17066 | + { .type = PFM_REG_C, \ | |
17067 | + .desc = "FIXED_CTR"#n, \ | |
17068 | + .rsvd_msk = PFM_CORE_CTR_RSVD, \ | |
17069 | + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ | |
17070 | + .dep_pmcs[0] = 1ULL << 16 \ | |
17071 | + } | |
17072 | + | |
17073 | +static struct pfm_regmap_desc pfm_core_pmd_desc[] = { | |
17074 | +/* pmd0 */ PFM_CORE_D(0), | |
17075 | +/* pmd1 */ PFM_CORE_D(1), | |
17076 | +/* pmd2 */ PMX_NA, PMX_NA, | |
17077 | +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17078 | +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17079 | +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, | |
17080 | +/* pmd16 */ PFM_CORE_FD(0), | |
17081 | +/* pmd17 */ PFM_CORE_FD(1), | |
17082 | +/* pmd18 */ PFM_CORE_FD(2) | |
17083 | +}; | |
17084 | +#define PFM_CORE_NUM_PMCS ARRAY_SIZE(pfm_core_pmc_desc) | |
17085 | +#define PFM_CORE_NUM_PMDS ARRAY_SIZE(pfm_core_pmd_desc) | |
17086 | + | |
17087 | +static struct pfm_pmu_config pfm_core_pmu_conf; | |
17088 | + | |
17089 | +static int pfm_core_probe_pmu(void) | |
17090 | +{ | |
17091 | + /* | |
17092 | + * Check for Intel Core processor explicitely | |
17093 | + * Checking for cpu_has_perfmon is not enough as this | |
17094 | + * matches intel Core Duo/Core Solo but none supports | |
17095 | + * PEBS. | |
17096 | + * | |
17097 | + * Intel Core = arch perfmon v2 + PEBS | |
17098 | + */ | |
17099 | + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | |
17100 | + PFM_INFO("not an AMD processor"); | |
17101 | + return -1; | |
17102 | + } | |
17103 | + | |
17104 | + if (current_cpu_data.x86 != 6) | |
17105 | + return -1; | |
17106 | + | |
17107 | + switch (current_cpu_data.x86_model) { | |
17108 | + case 15: /* Merom */ | |
17109 | + break; | |
17110 | + case 23: /* Penryn */ | |
17111 | + break; | |
17112 | + case 29: /* Dunnington */ | |
17113 | + break; | |
17114 | + default: | |
17115 | + return -1; | |
17116 | + } | |
17117 | + | |
17118 | + if (!cpu_has_apic) { | |
17119 | + PFM_INFO("no Local APIC, unsupported"); | |
17120 | + return -1; | |
17121 | + } | |
17122 | + | |
17123 | + PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d", | |
17124 | + nmi_watchdog, atomic_read(&nmi_active), force_nmi); | |
17125 | + | |
17126 | + /* | |
17127 | + * Intel Core processors implement DS and PEBS, no need to check | |
17128 | + */ | |
17129 | + if (cpu_has_pebs) | |
17130 | + PFM_INFO("PEBS supported, enabled"); | |
17131 | + | |
17132 | + /* | |
17133 | + * initialize bitmask of register with enable capability, i.e., | |
17134 | + * startstop. This is used to restrict the number of registers to | |
17135 | + * touch on start/stop | |
17136 | + * max_enable: number of bits to scan in enable_mask = highest + 1 | |
17137 | + * | |
17138 | + * may be adjusted in pfm_arch_pmu_acquire() | |
17139 | + */ | |
17140 | + __set_bit(0, cast_ulp(enable_mask)); | |
17141 | + __set_bit(1, cast_ulp(enable_mask)); | |
17142 | + __set_bit(16, cast_ulp(enable_mask)); | |
17143 | + __set_bit(17, cast_ulp(enable_mask)); | |
17144 | + max_enable = 17+1; | |
17145 | + | |
17146 | + if (force_nmi) | |
17147 | + pfm_core_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
17148 | + | |
17149 | + return 0; | |
17150 | +} | |
17151 | + | |
17152 | +static int pfm_core_pmc17_check(struct pfm_context *ctx, | |
17153 | + struct pfm_event_set *set, | |
17154 | + struct pfarg_pmc *req) | |
17155 | +{ | |
17156 | + struct pfm_arch_context *ctx_arch; | |
17157 | + ctx_arch = pfm_ctx_arch(ctx); | |
17158 | + | |
17159 | + /* | |
17160 | + * if user activates PEBS_ENABLE, then we need to have a valid | |
17161 | + * DS Area setup. This only happens when the PEBS sampling format is | |
17162 | + * used in which case PFM_X86_USE_PEBS is set. We must reject all other | |
17163 | + * requests. | |
17164 | + * | |
17165 | + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears | |
17166 | + * that a value of 0 for this MSR does crash the system with | |
17167 | + * PEBS_ENABLE=1. | |
17168 | + */ | |
17169 | + if (!ctx_arch->flags.use_pebs && req->reg_value) { | |
17170 | + PFM_DBG("pmc17 useable only with a PEBS sampling format"); | |
17171 | + return -EINVAL; | |
17172 | + } | |
17173 | + return 0; | |
17174 | +} | |
17175 | + | |
17176 | +/* | |
17177 | + * detect is counters have overflowed. | |
17178 | + * return: | |
17179 | + * 0 : no overflow | |
17180 | + * 1 : at least one overflow | |
17181 | + * | |
17182 | + * used by Intel Core-based processors | |
17183 | + */ | |
17184 | +static int __kprobes pfm_core_has_ovfls(struct pfm_context *ctx) | |
17185 | +{ | |
17186 | + struct pfm_arch_pmu_info *pmu_info; | |
17187 | + u64 *cnt_mask; | |
17188 | + u64 wmask, val; | |
17189 | + u16 i, num; | |
17190 | + | |
17191 | + pmu_info = &pfm_core_pmu_info; | |
17192 | + cnt_mask = ctx->regs.cnt_pmds; | |
17193 | + num = ctx->regs.num_counters; | |
17194 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
17195 | + | |
17196 | + for (i = 0; num; i++) { | |
17197 | + if (test_bit(i, cast_ulp(cnt_mask))) { | |
17198 | + rdmsrl(pfm_core_pmd_desc[i].hw_addr, val); | |
17199 | + if (!(val & wmask)) | |
17200 | + return 1; | |
17201 | + num--; | |
17202 | + } | |
17203 | + } | |
17204 | + return 0; | |
17205 | +} | |
17206 | + | |
17207 | +static int pfm_core_stop_save(struct pfm_context *ctx, | |
17208 | + struct pfm_event_set *set) | |
17209 | +{ | |
17210 | + struct pfm_arch_context *ctx_arch; | |
17211 | + struct pfm_ds_area_intel_core *ds = NULL; | |
17212 | + u64 used_mask[PFM_PMC_BV]; | |
17213 | + u64 *cnt_mask; | |
17214 | + u64 val, wmask, ovfl_mask; | |
17215 | + u16 count, has_ovfl; | |
17216 | + u16 i, pebs_idx = ~0; | |
17217 | + | |
17218 | + ctx_arch = pfm_ctx_arch(ctx); | |
17219 | + | |
17220 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
17221 | + | |
17222 | + /* | |
17223 | + * used enable pmc bitmask | |
17224 | + */ | |
17225 | + bitmap_and(cast_ulp(used_mask), | |
17226 | + cast_ulp(set->used_pmcs), | |
17227 | + cast_ulp(enable_mask), | |
17228 | + max_enable); | |
17229 | + | |
17230 | + count = bitmap_weight(cast_ulp(used_mask), max_enable); | |
17231 | + /* | |
17232 | + * stop monitoring | |
17233 | + * Unfortunately, this is very expensive! | |
17234 | + * wrmsrl() is serializing. | |
17235 | + */ | |
17236 | + for (i = 0; count; i++) { | |
17237 | + if (test_bit(i, cast_ulp(used_mask))) { | |
17238 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); | |
17239 | + count--; | |
17240 | + } | |
17241 | + } | |
17242 | + /* | |
17243 | + * if we already having a pending overflow condition, we simply | |
17244 | + * return to take care of this first. | |
17245 | + */ | |
17246 | + if (set->npend_ovfls) | |
17247 | + return 1; | |
17248 | + | |
17249 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
17250 | + cnt_mask = ctx->regs.cnt_pmds; | |
17251 | + | |
17252 | + if (ctx_arch->flags.use_pebs) { | |
17253 | + ds = ctx_arch->ds_area; | |
17254 | + pebs_idx = 0; /* PMC0/PMD0 */ | |
17255 | + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx", | |
17256 | + ds, | |
17257 | + (unsigned long long)ds->pebs_index, | |
17258 | + (unsigned long long)ds->pebs_intr_thres); | |
17259 | + } | |
17260 | + | |
17261 | + /* | |
17262 | + * Check for pending overflows and save PMDs (combo) | |
17263 | + * We employ used_pmds and not intr_pmds because we must | |
17264 | + * also saved on PMD registers. | |
17265 | + * Must check for counting PMDs because of virtual PMDs | |
17266 | + * | |
17267 | + * XXX: should use the ovf_status register instead, yet | |
17268 | + * we would have to check if NMI is used and fallback | |
17269 | + * to individual pmd inspection. | |
17270 | + */ | |
17271 | + count = set->nused_pmds; | |
17272 | + | |
17273 | + for (i = 0; count; i++) { | |
17274 | + if (test_bit(i, cast_ulp(set->used_pmds))) { | |
17275 | + val = pfm_arch_read_pmd(ctx, i); | |
17276 | + if (likely(test_bit(i, cast_ulp(cnt_mask)))) { | |
17277 | + if (i == pebs_idx) | |
17278 | + has_ovfl = (ds->pebs_index >= | |
17279 | + ds->pebs_intr_thres); | |
17280 | + else | |
17281 | + has_ovfl = !(val & wmask); | |
17282 | + if (has_ovfl) { | |
17283 | + __set_bit(i, cast_ulp(set->povfl_pmds)); | |
17284 | + set->npend_ovfls++; | |
17285 | + } | |
17286 | + val = (set->pmds[i].value & ~ovfl_mask) | |
17287 | + | (val & ovfl_mask); | |
17288 | + } | |
17289 | + set->pmds[i].value = val; | |
17290 | + count--; | |
17291 | + } | |
17292 | + } | |
17293 | + /* 0 means: no need to save PMDs at upper level */ | |
17294 | + return 0; | |
17295 | +} | |
17296 | + | |
17297 | +/** | |
17298 | + * pfm_core_quiesce - stop monitoring without grabbing any lock | |
17299 | + * | |
17300 | + * called from NMI interrupt handler to immediately stop monitoring | |
17301 | + * cannot grab any lock, including perfmon related locks | |
17302 | + */ | |
17303 | +static void __kprobes pfm_core_quiesce(void) | |
17304 | +{ | |
17305 | + /* | |
17306 | + * quiesce PMU by clearing available registers that have | |
17307 | + * the start/stop capability | |
17308 | + */ | |
17309 | + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
17310 | + wrmsrl(MSR_P6_EVNTSEL0, 0); | |
17311 | + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
17312 | + wrmsrl(MSR_P6_EVNTSEL1, 0); | |
17313 | + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
17314 | + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); | |
17315 | + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
17316 | + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | |
17317 | +} | |
17318 | +/** | |
17319 | + * pfm_core_restore_pmcs - reload PMC registers | |
17320 | + * @ctx: context to restore from | |
17321 | + * @set: current event set | |
17322 | + * | |
17323 | + * optimized version of pfm_arch_restore_pmcs(). On Core, we can | |
17324 | + * afford to only restore the pmcs registers we use, because they are | |
17325 | + * all independent from each other. | |
17326 | + */ | |
17327 | +static void pfm_core_restore_pmcs(struct pfm_context *ctx, | |
17328 | + struct pfm_event_set *set) | |
17329 | +{ | |
17330 | + struct pfm_arch_context *ctx_arch; | |
17331 | + u64 *mask; | |
17332 | + u16 i, num; | |
17333 | + | |
17334 | + ctx_arch = pfm_ctx_arch(ctx); | |
17335 | + | |
17336 | + /* | |
17337 | + * must restore DS pointer before restoring PMCs | |
17338 | + * as this can potentially reactivate monitoring | |
17339 | + */ | |
17340 | + if (ctx_arch->flags.use_ds) | |
17341 | + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); | |
17342 | + | |
17343 | + mask = set->used_pmcs; | |
17344 | + num = set->nused_pmcs; | |
17345 | + for (i = 0; num; i++) { | |
17346 | + if (test_bit(i, cast_ulp(mask))) { | |
17347 | + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]); | |
17348 | + num--; | |
17349 | + } | |
17350 | + } | |
17351 | +} | |
17352 | + | |
17353 | +/* | |
17354 | + * Counters may have model-specific width which can be probed using | |
17355 | + * the CPUID.0xa leaf. Yet, the documentation says: " | |
17356 | + * In the initial implementation, only the read bit width is reported | |
17357 | + * by CPUID, write operations are limited to the low 32 bits. | |
17358 | + * Bits [w-32] are sign extensions of bit 31. As such the effective width | |
17359 | + * of a counter is 31 bits only. | |
17360 | + */ | |
17361 | +static struct pfm_pmu_config pfm_core_pmu_conf = { | |
17362 | + .pmu_name = "Intel Core", | |
17363 | + .pmd_desc = pfm_core_pmd_desc, | |
17364 | + .counter_width = 31, | |
17365 | + .num_pmc_entries = PFM_CORE_NUM_PMCS, | |
17366 | + .num_pmd_entries = PFM_CORE_NUM_PMDS, | |
17367 | + .pmc_desc = pfm_core_pmc_desc, | |
17368 | + .probe_pmu = pfm_core_probe_pmu, | |
17369 | + .version = "1.2", | |
17370 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
17371 | + .owner = THIS_MODULE, | |
17372 | + .pmu_info = &pfm_core_pmu_info, | |
17373 | + .pmc_write_check = pfm_core_pmc17_check | |
17374 | +}; | |
17375 | + | |
17376 | +static int __init pfm_core_pmu_init_module(void) | |
17377 | +{ | |
17378 | + return pfm_pmu_register(&pfm_core_pmu_conf); | |
17379 | +} | |
17380 | + | |
17381 | +static void __exit pfm_core_pmu_cleanup_module(void) | |
17382 | +{ | |
17383 | + pfm_pmu_unregister(&pfm_core_pmu_conf); | |
17384 | +} | |
17385 | + | |
17386 | +module_init(pfm_core_pmu_init_module); | |
17387 | +module_exit(pfm_core_pmu_cleanup_module); | |
17388 | --- /dev/null | |
17389 | +++ b/arch/x86/perfmon/perfmon_p4.c | |
17390 | @@ -0,0 +1,913 @@ | |
17391 | +/* | |
17392 | + * This file contains the P4/Xeon PMU register description tables | |
17393 | + * for both 32 and 64 bit modes. | |
17394 | + * | |
17395 | + * Copyright (c) 2005 Intel Corporation | |
17396 | + * Contributed by Bryan Wilkerson <bryan.p.wilkerson@intel.com> | |
17397 | + * | |
17398 | + * This program is free software; you can redistribute it and/or | |
17399 | + * modify it under the terms of version 2 of the GNU General Public | |
17400 | + * License as published by the Free Software Foundation. | |
17401 | + * | |
17402 | + * This program is distributed in the hope that it will be useful, | |
17403 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17404 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17405 | + * General Public License for more details. | |
17406 | + * | |
17407 | + * You should have received a copy of the GNU General Public License | |
17408 | + * along with this program; if not, write to the Free Software | |
17409 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
17410 | + * 02111-1307 USA | |
17411 | + */ | |
17412 | +#include <linux/module.h> | |
17413 | +#include <linux/perfmon_kern.h> | |
17414 | +#include <linux/kprobes.h> | |
17415 | +#include <linux/nmi.h> | |
17416 | +#include <asm/msr.h> | |
17417 | +#include <asm/apic.h> | |
17418 | + | |
17419 | +MODULE_AUTHOR("Bryan Wilkerson <bryan.p.wilkerson@intel.com>"); | |
17420 | +MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table"); | |
17421 | +MODULE_LICENSE("GPL"); | |
17422 | + | |
17423 | +static int force; | |
17424 | +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); | |
17425 | +module_param(force, bool, 0600); | |
17426 | + | |
17427 | +static int force_nmi; | |
17428 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
17429 | +module_param(force_nmi, bool, 0600); | |
17430 | + | |
17431 | +/* | |
17432 | + * For extended register information in addition to address that is used | |
17433 | + * at runtime to figure out the mapping of reg addresses to logical procs | |
17434 | + * and association of registers to hardware specific features | |
17435 | + */ | |
17436 | +struct pfm_p4_regmap { | |
17437 | + /* | |
17438 | + * one each for the logical CPUs. Index 0 corresponds to T0 and | |
17439 | + * index 1 corresponds to T1. Index 1 can be zero if no T1 | |
17440 | + * complement reg exists. | |
17441 | + */ | |
17442 | + unsigned long addrs[2]; /* 2 = number of threads */ | |
17443 | + unsigned int ctr; /* for CCCR/PERFEVTSEL, associated counter */ | |
17444 | + unsigned int reg_type; | |
17445 | +}; | |
17446 | + | |
17447 | +/* | |
17448 | + * bitmask for pfm_p4_regmap.reg_type | |
17449 | + */ | |
17450 | +#define PFM_REGT_NA 0x0000 /* not available */ | |
17451 | +#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */ | |
17452 | +#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */ | |
17453 | +#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */ | |
17454 | +#define PFM_REGT_PEBS 0x0010 /* PEBS related */ | |
17455 | +#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */ | |
17456 | +#define PFM_REGT_CTR 0x0040 /* counter */ | |
17457 | + | |
17458 | +/* | |
17459 | + * architecture specific context extension. | |
17460 | + * located at: (struct pfm_arch_context *)(ctx+1) | |
17461 | + */ | |
17462 | +struct pfm_arch_p4_context { | |
17463 | + u32 npend_ovfls; /* P4 NMI #pending ovfls */ | |
17464 | + u32 reserved; | |
17465 | + u64 povfl_pmds[PFM_PMD_BV]; /* P4 NMI overflowed counters */ | |
17466 | + u64 saved_cccrs[PFM_MAX_PMCS]; | |
17467 | +}; | |
17468 | + | |
17469 | +/* | |
17470 | + * ESCR reserved bitmask: | |
17471 | + * - bits 31 - 63 reserved | |
17472 | + * - T1_OS and T1_USR bits are reserved - set depending on logical proc | |
17473 | + * user mode application should use T0_OS and T0_USR to indicate | |
17474 | + * RSVD: reserved bits must be 1 | |
17475 | + */ | |
17476 | +#define PFM_ESCR_RSVD ~0x000000007ffffffcULL | |
17477 | + | |
17478 | +/* | |
17479 | + * CCCR default value: | |
17480 | + * - OVF_PMI_T0=1 (bit 26) | |
17481 | + * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg()) | |
17482 | + * - all other bits are zero | |
17483 | + * | |
17484 | + * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR | |
17485 | + */ | |
17486 | +#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16) | |
17487 | + | |
17488 | +/* | |
17489 | + * CCCR reserved fields: | |
17490 | + * - bits 0-11, 25-29, 31-63 | |
17491 | + * - OVF_PMI (26-27), override with REGFL_NO_EMUL64 | |
17492 | + * | |
17493 | + * RSVD: reserved bits must be 1 | |
17494 | + */ | |
17495 | +#define PFM_CCCR_RSVD ~((0xfull<<12) \ | |
17496 | + | (0x7full<<18) \ | |
17497 | + | (0x1ull<<30)) | |
17498 | + | |
17499 | +#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */ | |
17500 | + | |
17501 | +#define PEBS_PMD 8 /* thread0: IQ_CTR4, thread1: IQ_CTR5 */ | |
17502 | + | |
17503 | +/* | |
17504 | + * With HyperThreading enabled: | |
17505 | + * | |
17506 | + * The ESCRs and CCCRs are divided in half with the top half | |
17507 | + * belonging to logical processor 0 and the bottom half going to | |
17508 | + * logical processor 1. Thus only half of the PMU resources are | |
17509 | + * accessible to applications. | |
17510 | + * | |
17511 | + * PEBS is not available due to the fact that: | |
17512 | + * - MSR_PEBS_MATRIX_VERT is shared between the threads | |
17513 | + * - IA32_PEBS_ENABLE is shared between the threads | |
17514 | + * | |
17515 | + * With HyperThreading disabled: | |
17516 | + * | |
17517 | + * The full set of PMU resources is exposed to applications. | |
17518 | + * | |
17519 | + * The mapping is chosen such that PMCxx -> MSR is the same | |
17520 | + * in HT and non HT mode, if register is present in HT mode. | |
17521 | + * | |
17522 | + */ | |
17523 | +#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT) | |
17524 | +#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN) | |
17525 | +#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN) | |
17526 | +#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT) | |
17527 | +#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN) | |
17528 | + | |
17529 | +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value); | |
17530 | +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value); | |
17531 | +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum); | |
17532 | +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum); | |
17533 | +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags); | |
17534 | +static void pfm_p4_free_context(struct pfm_context *ctx); | |
17535 | +static int pfm_p4_has_ovfls(struct pfm_context *ctx); | |
17536 | +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set); | |
17537 | +static void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
17538 | +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx); | |
17539 | +static void __kprobes pfm_p4_quiesce(void); | |
17540 | + | |
17541 | +static u64 enable_mask[PFM_MAX_PMCS]; | |
17542 | +static u16 max_enable; | |
17543 | + | |
17544 | +static struct pfm_p4_regmap pmc_addrs[PFM_MAX_PMCS] = { | |
17545 | + /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */ | |
17546 | + /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */ | |
17547 | + /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */ | |
17548 | + /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */ | |
17549 | + /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */ | |
17550 | + /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */ | |
17551 | + /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */ | |
17552 | + /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */ | |
17553 | + /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */ | |
17554 | + /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */ | |
17555 | + /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */ | |
17556 | + /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */ | |
17557 | + /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */ | |
17558 | + /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */ | |
17559 | + /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */ | |
17560 | + /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */ | |
17561 | + /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */ | |
17562 | + /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */ | |
17563 | + /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */ | |
17564 | + /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */ | |
17565 | + /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */ | |
17566 | + /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */ | |
17567 | + /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */ | |
17568 | + | |
17569 | + /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */ | |
17570 | + /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */ | |
17571 | + /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */ | |
17572 | + /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */ | |
17573 | + /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */ | |
17574 | + /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */ | |
17575 | + /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */ | |
17576 | + /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */ | |
17577 | + /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */ | |
17578 | + /* non HT extensions */ | |
17579 | + /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */ | |
17580 | + /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */ | |
17581 | + /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */ | |
17582 | + /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */ | |
17583 | + /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */ | |
17584 | + /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */ | |
17585 | + /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */ | |
17586 | + /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */ | |
17587 | + /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */ | |
17588 | + /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */ | |
17589 | + /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */ | |
17590 | + /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */ | |
17591 | + /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */ | |
17592 | + /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */ | |
17593 | + /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */ | |
17594 | + /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */ | |
17595 | + /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */ | |
17596 | + /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */ | |
17597 | + /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */ | |
17598 | + /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */ | |
17599 | + /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */ | |
17600 | + /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */ | |
17601 | + /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */ | |
17602 | + /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0}, 10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */ | |
17603 | + /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0}, 11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */ | |
17604 | + /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0}, 12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */ | |
17605 | + /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0}, 13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */ | |
17606 | + /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0}, 14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */ | |
17607 | + /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0}, 15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */ | |
17608 | + /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0}, 16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */ | |
17609 | + /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0}, 17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */ | |
17610 | + /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */ | |
17611 | + /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */ | |
17612 | +}; | |
17613 | + | |
17614 | +static struct pfm_p4_regmap pmd_addrs[PFM_MAX_PMDS] = { | |
17615 | + /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */ | |
17616 | + /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */ | |
17617 | + /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */ | |
17618 | + /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */ | |
17619 | + /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */ | |
17620 | + /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */ | |
17621 | + /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */ | |
17622 | + /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */ | |
17623 | + /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */ | |
17624 | + /* | |
17625 | + * non HT extensions | |
17626 | + */ | |
17627 | + /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */ | |
17628 | + /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */ | |
17629 | + /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */ | |
17630 | + /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */ | |
17631 | + /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */ | |
17632 | + /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */ | |
17633 | + /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */ | |
17634 | + /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */ | |
17635 | + /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */ | |
17636 | +}; | |
17637 | + | |
17638 | +static struct pfm_arch_pmu_info pfm_p4_pmu_info = { | |
17639 | + .write_pmc = pfm_p4_write_pmc, | |
17640 | + .write_pmd = pfm_p4_write_pmd, | |
17641 | + .read_pmc = pfm_p4_read_pmc, | |
17642 | + .read_pmd = pfm_p4_read_pmd, | |
17643 | + .create_context = pfm_p4_create_context, | |
17644 | + .free_context = pfm_p4_free_context, | |
17645 | + .has_ovfls = pfm_p4_has_ovfls, | |
17646 | + .stop_save = pfm_p4_stop_save, | |
17647 | + .restore_pmcs = pfm_p4_restore_pmcs, | |
17648 | + .nmi_copy_state = pfm_p4_nmi_copy_state, | |
17649 | + .quiesce = pfm_p4_quiesce | |
17650 | +}; | |
17651 | + | |
17652 | +static struct pfm_regmap_desc pfm_p4_pmc_desc[] = { | |
17653 | +/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0), | |
17654 | +/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), | |
17655 | +/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0), | |
17656 | +/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0), | |
17657 | +/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0), | |
17658 | +/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0), | |
17659 | +/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0), | |
17660 | +/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0), | |
17661 | +/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0), | |
17662 | +/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0), | |
17663 | +/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0), | |
17664 | +/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0), | |
17665 | +/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0), | |
17666 | +/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0), | |
17667 | +/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0), | |
17668 | +/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0), | |
17669 | +/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/ | |
17670 | +/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0), | |
17671 | +/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0), | |
17672 | +/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0), | |
17673 | +/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0), | |
17674 | +/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2), | |
17675 | +/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4), | |
17676 | +/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0), | |
17677 | +/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1), | |
17678 | +/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0), | |
17679 | +/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1), | |
17680 | +/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0), | |
17681 | +/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1), | |
17682 | +/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0), | |
17683 | +/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1), | |
17684 | +/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4), | |
17685 | + /* No HT extension */ | |
17686 | +/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1), | |
17687 | +/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1), | |
17688 | +/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1), | |
17689 | +/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1), | |
17690 | +/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1), | |
17691 | +/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1), | |
17692 | +/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1), | |
17693 | +/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1), | |
17694 | +/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1), | |
17695 | +/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1), | |
17696 | +/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1), | |
17697 | +/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1), | |
17698 | +/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1), | |
17699 | +/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1), | |
17700 | +/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1), | |
17701 | +/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1), | |
17702 | +/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */ | |
17703 | +/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1), | |
17704 | +/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1), | |
17705 | +/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1), | |
17706 | +/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3), | |
17707 | +/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5), | |
17708 | +/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2), | |
17709 | +/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3), | |
17710 | +/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2), | |
17711 | +/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3), | |
17712 | +/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2), | |
17713 | +/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3), | |
17714 | +/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2), | |
17715 | +/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3), | |
17716 | +/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5), | |
17717 | +/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2), | |
17718 | +/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1) | |
17719 | +}; | |
17720 | +#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc) | |
17721 | + | |
17722 | +/* | |
17723 | + * See section 15.10.6.6 for details about the IQ block | |
17724 | + */ | |
17725 | +static struct pfm_regmap_desc pfm_p4_pmd_desc[] = { | |
17726 | +/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0), | |
17727 | +/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1), | |
17728 | +/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0), | |
17729 | +/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1), | |
17730 | +/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0), | |
17731 | +/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1), | |
17732 | +/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0), | |
17733 | +/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1), | |
17734 | +/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4), | |
17735 | + /* no HT extension */ | |
17736 | +/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2), | |
17737 | +/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3), | |
17738 | +/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2), | |
17739 | +/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3), | |
17740 | +/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2), | |
17741 | +/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3), | |
17742 | +/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR2), | |
17743 | +/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3), | |
17744 | +/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5) | |
17745 | +}; | |
17746 | +#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc) | |
17747 | + | |
17748 | +/* | |
17749 | + * Due to hotplug CPU support, threads may not necessarily | |
17750 | + * be activated at the time the module is inserted. We need | |
17751 | + * to check whether they could be activated by looking at | |
17752 | + * the present CPU (present != online). | |
17753 | + */ | |
17754 | +static int pfm_p4_probe_pmu(void) | |
17755 | +{ | |
17756 | + unsigned int i; | |
17757 | + int ht_enabled; | |
17758 | + | |
17759 | + /* | |
17760 | + * only works on Intel processors | |
17761 | + */ | |
17762 | + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | |
17763 | + PFM_INFO("not running on Intel processor"); | |
17764 | + return -1; | |
17765 | + } | |
17766 | + | |
17767 | + if (current_cpu_data.x86 != 15) { | |
17768 | + PFM_INFO("unsupported family=%d", current_cpu_data.x86); | |
17769 | + return -1; | |
17770 | + } | |
17771 | + | |
17772 | + switch (current_cpu_data.x86_model) { | |
17773 | + case 0 ... 2: | |
17774 | + break; | |
17775 | + case 3 ... 6: | |
17776 | + /* | |
17777 | + * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2 | |
17778 | + */ | |
17779 | + pfm_p4_pmc_desc[16].type = PFM_REG_NA; | |
17780 | + pfm_p4_pmc_desc[48].type = PFM_REG_NA; | |
17781 | + break; | |
17782 | + default: | |
17783 | + /* | |
17784 | + * do not know if they all work the same, so reject | |
17785 | + * for now | |
17786 | + */ | |
17787 | + if (!force) { | |
17788 | + PFM_INFO("unsupported model %d", | |
17789 | + current_cpu_data.x86_model); | |
17790 | + return -1; | |
17791 | + } | |
17792 | + } | |
17793 | + | |
17794 | + /* | |
17795 | + * check for local APIC (required) | |
17796 | + */ | |
17797 | + if (!cpu_has_apic) { | |
17798 | + PFM_INFO("no local APIC, unsupported"); | |
17799 | + return -1; | |
17800 | + } | |
17801 | +#ifdef CONFIG_SMP | |
17802 | + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map)) | |
17803 | + / current_cpu_data.x86_max_cores) > 1; | |
17804 | +#else | |
17805 | + ht_enabled = 0; | |
17806 | +#endif | |
17807 | + if (cpu_has_ht) { | |
17808 | + | |
17809 | + PFM_INFO("HyperThreading supported, status %s", | |
17810 | + ht_enabled ? "on": "off"); | |
17811 | + /* | |
17812 | + * disable registers not supporting HT | |
17813 | + */ | |
17814 | + if (ht_enabled) { | |
17815 | + PFM_INFO("disabling half the registers for HT"); | |
17816 | + for (i = 0; i < PFM_P4_NUM_PMCS; i++) { | |
17817 | + if (pmc_addrs[(i)].reg_type & PFM_REGT_NOHT) | |
17818 | + pfm_p4_pmc_desc[i].type = PFM_REG_NA; | |
17819 | + } | |
17820 | + for (i = 0; i < PFM_P4_NUM_PMDS; i++) { | |
17821 | + if (pmd_addrs[(i)].reg_type & PFM_REGT_NOHT) | |
17822 | + pfm_p4_pmd_desc[i].type = PFM_REG_NA; | |
17823 | + } | |
17824 | + } | |
17825 | + } | |
17826 | + | |
17827 | + if (cpu_has_ds) { | |
17828 | + PFM_INFO("Data Save Area (DS) supported"); | |
17829 | + | |
17830 | + if (cpu_has_pebs) { | |
17831 | + /* | |
17832 | + * PEBS does not work with HyperThreading enabled | |
17833 | + */ | |
17834 | + if (ht_enabled) | |
17835 | + PFM_INFO("PEBS supported, status off (because of HT)"); | |
17836 | + else | |
17837 | + PFM_INFO("PEBS supported, status on"); | |
17838 | + } | |
17839 | + } | |
17840 | + | |
17841 | + /* | |
17842 | + * build enable mask | |
17843 | + */ | |
17844 | + for (i = 0; i < PFM_P4_NUM_PMCS; i++) { | |
17845 | + if (pmc_addrs[(i)].reg_type & PFM_REGT_EN) { | |
17846 | + __set_bit(i, cast_ulp(enable_mask)); | |
17847 | + max_enable = i + 1; | |
17848 | + } | |
17849 | + } | |
17850 | + | |
17851 | + if (force_nmi) | |
17852 | + pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
17853 | + return 0; | |
17854 | +} | |
17855 | +static inline int get_smt_id(void) | |
17856 | +{ | |
17857 | +#ifdef CONFIG_SMP | |
17858 | + int cpu = smp_processor_id(); | |
17859 | + return (cpu != first_cpu(__get_cpu_var(cpu_sibling_map))); | |
17860 | +#else | |
17861 | + return 0; | |
17862 | +#endif | |
17863 | +} | |
17864 | + | |
17865 | +static void __pfm_write_reg_p4(const struct pfm_p4_regmap *xreg, u64 val) | |
17866 | +{ | |
17867 | + u64 pmi; | |
17868 | + int smt_id; | |
17869 | + | |
17870 | + smt_id = get_smt_id(); | |
17871 | + /* | |
17872 | + * HT is only supported by P4-style PMU | |
17873 | + * | |
17874 | + * Adjust for T1 if necessary: | |
17875 | + * | |
17876 | + * - move the T0_OS/T0_USR bits into T1 slots | |
17877 | + * - move the OVF_PMI_T0 bits into T1 slot | |
17878 | + * | |
17879 | + * The P4/EM64T T1 is cleared by description table. | |
17880 | + * User only works with T0. | |
17881 | + */ | |
17882 | + if (smt_id) { | |
17883 | + if (xreg->reg_type & PFM_REGT_ESCR) { | |
17884 | + | |
17885 | + /* copy T0_USR & T0_OS to T1 */ | |
17886 | + val |= ((val & 0xc) >> 2); | |
17887 | + | |
17888 | + /* clear bits T0_USR & T0_OS */ | |
17889 | + val &= ~0xc; | |
17890 | + | |
17891 | + } else if (xreg->reg_type & PFM_REGT_CCCR) { | |
17892 | + pmi = (val >> 26) & 0x1; | |
17893 | + if (pmi) { | |
17894 | + val &= ~(1UL<<26); | |
17895 | + val |= 1UL<<27; | |
17896 | + } | |
17897 | + } | |
17898 | + } | |
17899 | + if (xreg->addrs[smt_id]) | |
17900 | + wrmsrl(xreg->addrs[smt_id], val); | |
17901 | +} | |
17902 | + | |
17903 | +void __pfm_read_reg_p4(const struct pfm_p4_regmap *xreg, u64 *val) | |
17904 | +{ | |
17905 | + int smt_id; | |
17906 | + | |
17907 | + smt_id = get_smt_id(); | |
17908 | + | |
17909 | + if (likely(xreg->addrs[smt_id])) { | |
17910 | + rdmsrl(xreg->addrs[smt_id], *val); | |
17911 | + /* | |
17912 | + * HT is only supported by P4-style PMU | |
17913 | + * | |
17914 | + * move the Tx_OS and Tx_USR bits into | |
17915 | + * T0 slots setting the T1 slots to zero | |
17916 | + */ | |
17917 | + if (xreg->reg_type & PFM_REGT_ESCR) { | |
17918 | + if (smt_id) | |
17919 | + *val |= (((*val) & 0x3) << 2); | |
17920 | + | |
17921 | + /* | |
17922 | + * zero out bits that are reserved | |
17923 | + * (including T1_OS and T1_USR) | |
17924 | + */ | |
17925 | + *val &= PFM_ESCR_RSVD; | |
17926 | + } | |
17927 | + } else { | |
17928 | + *val = 0; | |
17929 | + } | |
17930 | +} | |
17931 | +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value) | |
17932 | +{ | |
17933 | + __pfm_write_reg_p4(&pmc_addrs[cnum], value); | |
17934 | +} | |
17935 | + | |
17936 | +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value) | |
17937 | +{ | |
17938 | + __pfm_write_reg_p4(&pmd_addrs[cnum], value); | |
17939 | +} | |
17940 | + | |
17941 | +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
17942 | +{ | |
17943 | + u64 tmp; | |
17944 | + __pfm_read_reg_p4(&pmd_addrs[cnum], &tmp); | |
17945 | + return tmp; | |
17946 | +} | |
17947 | + | |
17948 | +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum) | |
17949 | +{ | |
17950 | + u64 tmp; | |
17951 | + __pfm_read_reg_p4(&pmc_addrs[cnum], &tmp); | |
17952 | + return tmp; | |
17953 | +} | |
17954 | + | |
17955 | +struct pfm_ds_area_p4 { | |
17956 | + unsigned long bts_buf_base; | |
17957 | + unsigned long bts_index; | |
17958 | + unsigned long bts_abs_max; | |
17959 | + unsigned long bts_intr_thres; | |
17960 | + unsigned long pebs_buf_base; | |
17961 | + unsigned long pebs_index; | |
17962 | + unsigned long pebs_abs_max; | |
17963 | + unsigned long pebs_intr_thres; | |
17964 | + u64 pebs_cnt_reset; | |
17965 | +}; | |
17966 | + | |
17967 | + | |
17968 | +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) | |
17969 | +{ | |
17970 | + struct pfm_arch_pmu_info *pmu_info; | |
17971 | + struct pfm_arch_context *ctx_arch; | |
17972 | + struct pfm_ds_area_p4 *ds = NULL; | |
17973 | + u64 used_mask[PFM_PMC_BV]; | |
17974 | + u16 i, j, count, pebs_idx = ~0; | |
17975 | + u16 max_pmc; | |
17976 | + u64 cccr, ctr1, ctr2, ovfl_mask; | |
17977 | + | |
17978 | + pmu_info = &pfm_p4_pmu_info; | |
17979 | + ctx_arch = pfm_ctx_arch(ctx); | |
17980 | + max_pmc = ctx->regs.max_pmc; | |
17981 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
17982 | + | |
17983 | + /* | |
17984 | + * build used enable PMC bitmask | |
17985 | + * if user did not set any CCCR, then mask is | |
17986 | + * empty and there is nothing to do because nothing | |
17987 | + * was started | |
17988 | + */ | |
17989 | + bitmap_and(cast_ulp(used_mask), | |
17990 | + cast_ulp(set->used_pmcs), | |
17991 | + cast_ulp(enable_mask), | |
17992 | + max_enable); | |
17993 | + | |
17994 | + count = bitmap_weight(cast_ulp(used_mask), max_enable); | |
17995 | + | |
17996 | + PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u", | |
17997 | + set->npend_ovfls, | |
17998 | + (unsigned long long)enable_mask[0], | |
17999 | + (unsigned long long)set->used_pmcs[0], | |
18000 | + count, max_enable); | |
18001 | + | |
18002 | + /* | |
18003 | + * ensures we do not destroy pending overflow | |
18004 | + * information. If pended interrupts are already | |
18005 | + * known, then we just stop monitoring. | |
18006 | + */ | |
18007 | + if (set->npend_ovfls) { | |
18008 | + /* | |
18009 | + * clear enable bit | |
18010 | + * unfortunately, this is very expensive! | |
18011 | + */ | |
18012 | + for (i = 0; count; i++) { | |
18013 | + if (test_bit(i, cast_ulp(used_mask))) { | |
18014 | + __pfm_write_reg_p4(pmc_addrs+i, 0); | |
18015 | + count--; | |
18016 | + } | |
18017 | + } | |
18018 | + /* need save PMDs at upper level */ | |
18019 | + return 1; | |
18020 | + } | |
18021 | + | |
18022 | + if (ctx_arch->flags.use_pebs) { | |
18023 | + ds = ctx_arch->ds_area; | |
18024 | + pebs_idx = PEBS_PMD; | |
18025 | + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx", | |
18026 | + ds, | |
18027 | + (unsigned long long)ds->pebs_index, | |
18028 | + (unsigned long long)ds->pebs_intr_thres); | |
18029 | + } | |
18030 | + | |
18031 | + /* | |
18032 | + * stop monitoring AND collect pending overflow information AND | |
18033 | + * save pmds. | |
18034 | + * | |
18035 | + * We need to access the CCCR twice, once to get overflow info | |
18036 | + * and a second to stop monitoring (which destroys the OVF flag) | |
18037 | + * Similarly, we need to read the counter twice to check whether | |
18038 | + * it did overflow between the CCR read and the CCCR write. | |
18039 | + */ | |
18040 | + for (i = 0; count; i++) { | |
18041 | + if (i != pebs_idx && test_bit(i, cast_ulp(used_mask))) { | |
18042 | + /* | |
18043 | + * controlled counter | |
18044 | + */ | |
18045 | + j = pmc_addrs[i].ctr; | |
18046 | + | |
18047 | + /* read CCCR (PMC) value */ | |
18048 | + __pfm_read_reg_p4(pmc_addrs+i, &cccr); | |
18049 | + | |
18050 | + /* read counter (PMD) controlled by PMC */ | |
18051 | + __pfm_read_reg_p4(pmd_addrs+j, &ctr1); | |
18052 | + | |
18053 | + /* clear CCCR value: stop counter but destroy OVF */ | |
18054 | + __pfm_write_reg_p4(pmc_addrs+i, 0); | |
18055 | + | |
18056 | + /* read counter controlled by CCCR again */ | |
18057 | + __pfm_read_reg_p4(pmd_addrs+j, &ctr2); | |
18058 | + | |
18059 | + /* | |
18060 | + * there is an overflow if either: | |
18061 | + * - CCCR.ovf is set (and we just cleared it) | |
18062 | + * - ctr2 < ctr1 | |
18063 | + * in that case we set the bit corresponding to the | |
18064 | + * overflowed PMD in povfl_pmds. | |
18065 | + */ | |
18066 | + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { | |
18067 | + __set_bit(j, cast_ulp(set->povfl_pmds)); | |
18068 | + set->npend_ovfls++; | |
18069 | + } | |
18070 | + ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask); | |
18071 | + set->pmds[j].value = ctr2; | |
18072 | + count--; | |
18073 | + } | |
18074 | + } | |
18075 | + /* | |
18076 | + * check for PEBS buffer full and set the corresponding PMD overflow | |
18077 | + */ | |
18078 | + if (ctx_arch->flags.use_pebs) { | |
18079 | + PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres); | |
18080 | + if (ds->pebs_index >= ds->pebs_intr_thres | |
18081 | + && test_bit(PEBS_PMD, cast_ulp(set->used_pmds))) { | |
18082 | + __set_bit(PEBS_PMD, cast_ulp(set->povfl_pmds)); | |
18083 | + set->npend_ovfls++; | |
18084 | + } | |
18085 | + } | |
18086 | + /* 0 means: no need to save the PMD at higher level */ | |
18087 | + return 0; | |
18088 | +} | |
18089 | + | |
18090 | +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags) | |
18091 | +{ | |
18092 | + struct pfm_arch_context *ctx_arch; | |
18093 | + | |
18094 | + ctx_arch = pfm_ctx_arch(ctx); | |
18095 | + | |
18096 | + ctx_arch->data = kzalloc(sizeof(struct pfm_arch_p4_context), GFP_KERNEL); | |
18097 | + if (!ctx_arch->data) | |
18098 | + return -ENOMEM; | |
18099 | + | |
18100 | + return 0; | |
18101 | +} | |
18102 | + | |
18103 | +static void pfm_p4_free_context(struct pfm_context *ctx) | |
18104 | +{ | |
18105 | + struct pfm_arch_context *ctx_arch; | |
18106 | + | |
18107 | + ctx_arch = pfm_ctx_arch(ctx); | |
18108 | + /* | |
18109 | + * we do not check if P4, because it would be NULL and | |
18110 | + * kfree can deal with NULL | |
18111 | + */ | |
18112 | + kfree(ctx_arch->data); | |
18113 | +} | |
18114 | + | |
18115 | +/* | |
18116 | + * detect is counters have overflowed. | |
18117 | + * return: | |
18118 | + * 0 : no overflow | |
18119 | + * 1 : at least one overflow | |
18120 | + * | |
18121 | + * used by Intel P4 | |
18122 | + */ | |
18123 | +static int __kprobes pfm_p4_has_ovfls(struct pfm_context *ctx) | |
18124 | +{ | |
18125 | + struct pfm_arch_pmu_info *pmu_info; | |
18126 | + struct pfm_p4_regmap *xrc, *xrd; | |
18127 | + struct pfm_arch_context *ctx_arch; | |
18128 | + struct pfm_arch_p4_context *p4; | |
18129 | + u64 ena_mask[PFM_PMC_BV]; | |
18130 | + u64 cccr, ctr1, ctr2; | |
18131 | + int n, i, j; | |
18132 | + | |
18133 | + pmu_info = &pfm_p4_pmu_info; | |
18134 | + | |
18135 | + ctx_arch = pfm_ctx_arch(ctx); | |
18136 | + xrc = pmc_addrs; | |
18137 | + xrd = pmd_addrs; | |
18138 | + p4 = ctx_arch->data; | |
18139 | + | |
18140 | + bitmap_and(cast_ulp(ena_mask), | |
18141 | + cast_ulp(ctx->regs.pmcs), | |
18142 | + cast_ulp(enable_mask), | |
18143 | + max_enable); | |
18144 | + | |
18145 | + n = bitmap_weight(cast_ulp(ena_mask), max_enable); | |
18146 | + | |
18147 | + for (i = 0; n; i++) { | |
18148 | + if (!test_bit(i, cast_ulp(ena_mask))) | |
18149 | + continue; | |
18150 | + /* | |
18151 | + * controlled counter | |
18152 | + */ | |
18153 | + j = xrc[i].ctr; | |
18154 | + | |
18155 | + /* read CCCR (PMC) value */ | |
18156 | + __pfm_read_reg_p4(xrc+i, &cccr); | |
18157 | + | |
18158 | + /* read counter (PMD) controlled by PMC */ | |
18159 | + __pfm_read_reg_p4(xrd+j, &ctr1); | |
18160 | + | |
18161 | + /* clear CCCR value: stop counter but destroy OVF */ | |
18162 | + __pfm_write_reg_p4(xrc+i, 0); | |
18163 | + | |
18164 | + /* read counter controlled by CCCR again */ | |
18165 | + __pfm_read_reg_p4(xrd+j, &ctr2); | |
18166 | + | |
18167 | + /* | |
18168 | + * there is an overflow if either: | |
18169 | + * - CCCR.ovf is set (and we just cleared it) | |
18170 | + * - ctr2 < ctr1 | |
18171 | + * in that case we set the bit corresponding to the | |
18172 | + * overflowed PMD in povfl_pmds. | |
18173 | + */ | |
18174 | + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { | |
18175 | + __set_bit(j, cast_ulp(p4->povfl_pmds)); | |
18176 | + p4->npend_ovfls++; | |
18177 | + } | |
18178 | + p4->saved_cccrs[i] = cccr; | |
18179 | + n--; | |
18180 | + } | |
18181 | + /* | |
18182 | + * if there was no overflow, then it means the NMI was not really | |
18183 | + * for us, so we have to resume monitoring | |
18184 | + */ | |
18185 | + if (unlikely(!p4->npend_ovfls)) { | |
18186 | + for (i = 0; n; i++) { | |
18187 | + if (!test_bit(i, cast_ulp(ena_mask))) | |
18188 | + continue; | |
18189 | + __pfm_write_reg_p4(xrc+i, p4->saved_cccrs[i]); | |
18190 | + } | |
18191 | + } | |
18192 | + return 0; | |
18193 | +} | |
18194 | + | |
18195 | +void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) | |
18196 | +{ | |
18197 | + struct pfm_arch_pmu_info *pmu_info; | |
18198 | + struct pfm_arch_context *ctx_arch; | |
18199 | + u64 *mask; | |
18200 | + u16 i, num; | |
18201 | + | |
18202 | + ctx_arch = pfm_ctx_arch(ctx); | |
18203 | + pmu_info = pfm_pmu_info(); | |
18204 | + | |
18205 | + /* | |
18206 | + * must restore DS pointer before restoring PMCs | |
18207 | + * as this can potentially reactivate monitoring | |
18208 | + */ | |
18209 | + if (ctx_arch->flags.use_ds) | |
18210 | + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); | |
18211 | + | |
18212 | + /* | |
18213 | + * must restore everything because there are some dependencies | |
18214 | + * (e.g., ESCR and CCCR) | |
18215 | + */ | |
18216 | + num = ctx->regs.num_pmcs; | |
18217 | + mask = ctx->regs.pmcs; | |
18218 | + for (i = 0; num; i++) { | |
18219 | + if (test_bit(i, cast_ulp(mask))) { | |
18220 | + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); | |
18221 | + num--; | |
18222 | + } | |
18223 | + } | |
18224 | +} | |
18225 | + | |
18226 | +/* | |
18227 | + * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR | |
18228 | + * handler to copy P4 overflow state captured when the NMI triggered. | |
18229 | + * Given that on P4, stopping monitoring destroy the overflow information | |
18230 | + * we save it in pfm_has_ovfl_p4() where monitoring is also stopped. | |
18231 | + * | |
18232 | + * Here we propagate the overflow state to current active set. The | |
18233 | + * freeze_pmu() call we not overwrite this state because npend_ovfls | |
18234 | + * is non-zero. | |
18235 | + */ | |
18236 | +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx) | |
18237 | +{ | |
18238 | + struct pfm_arch_context *ctx_arch; | |
18239 | + struct pfm_event_set *set; | |
18240 | + struct pfm_arch_p4_context *p4; | |
18241 | + | |
18242 | + ctx_arch = pfm_ctx_arch(ctx); | |
18243 | + p4 = ctx_arch->data; | |
18244 | + set = ctx->active_set; | |
18245 | + | |
18246 | + if (p4->npend_ovfls) { | |
18247 | + set->npend_ovfls = p4->npend_ovfls; | |
18248 | + | |
18249 | + bitmap_copy(cast_ulp(set->povfl_pmds), | |
18250 | + cast_ulp(p4->povfl_pmds), | |
18251 | + ctx->regs.max_pmd); | |
18252 | + | |
18253 | + p4->npend_ovfls = 0; | |
18254 | + } | |
18255 | +} | |
18256 | + | |
18257 | +/** | |
18258 | + * pfm_p4_quiesce - stop monitoring without grabbing any lock | |
18259 | + * | |
18260 | + * called from NMI interrupt handler to immediately stop monitoring | |
18261 | + * cannot grab any lock, including perfmon related locks | |
18262 | + */ | |
18263 | +static void __kprobes pfm_p4_quiesce(void) | |
18264 | +{ | |
18265 | + u16 i; | |
18266 | + /* | |
18267 | + * quiesce PMU by clearing available registers that have | |
18268 | + * the start/stop capability | |
18269 | + */ | |
18270 | + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) { | |
18271 | + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs)) | |
18272 | + && test_bit(i, cast_ulp(enable_mask))) | |
18273 | + __pfm_write_reg_p4(pmc_addrs+i, 0); | |
18274 | + } | |
18275 | +} | |
18276 | + | |
18277 | + | |
18278 | +static struct pfm_pmu_config pfm_p4_pmu_conf = { | |
18279 | + .pmu_name = "Intel P4", | |
18280 | + .counter_width = 40, | |
18281 | + .pmd_desc = pfm_p4_pmd_desc, | |
18282 | + .pmc_desc = pfm_p4_pmc_desc, | |
18283 | + .num_pmc_entries = PFM_P4_NUM_PMCS, | |
18284 | + .num_pmd_entries = PFM_P4_NUM_PMDS, | |
18285 | + .probe_pmu = pfm_p4_probe_pmu, | |
18286 | + .version = "1.0", | |
18287 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
18288 | + .owner = THIS_MODULE, | |
18289 | + .pmu_info = &pfm_p4_pmu_info | |
18290 | +}; | |
18291 | + | |
18292 | +static int __init pfm_p4_pmu_init_module(void) | |
18293 | +{ | |
18294 | + return pfm_pmu_register(&pfm_p4_pmu_conf); | |
18295 | +} | |
18296 | + | |
18297 | +static void __exit pfm_p4_pmu_cleanup_module(void) | |
18298 | +{ | |
18299 | + pfm_pmu_unregister(&pfm_p4_pmu_conf); | |
18300 | +} | |
18301 | + | |
18302 | +module_init(pfm_p4_pmu_init_module); | |
18303 | +module_exit(pfm_p4_pmu_cleanup_module); | |
18304 | --- /dev/null | |
18305 | +++ b/arch/x86/perfmon/perfmon_p6.c | |
18306 | @@ -0,0 +1,310 @@ | |
18307 | +/* | |
18308 | + * This file contains the P6 family processor PMU register description tables | |
18309 | + * | |
18310 | + * This module supports original P6 processors | |
18311 | + * (Pentium II, Pentium Pro, Pentium III) and Pentium M. | |
18312 | + * | |
18313 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
18314 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
18315 | + * | |
18316 | + * This program is free software; you can redistribute it and/or | |
18317 | + * modify it under the terms of version 2 of the GNU General Public | |
18318 | + * License as published by the Free Software Foundation. | |
18319 | + * | |
18320 | + * This program is distributed in the hope that it will be useful, | |
18321 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18322 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18323 | + * General Public License for more details. | |
18324 | + * | |
18325 | + * You should have received a copy of the GNU General Public License | |
18326 | + * along with this program; if not, write to the Free Software | |
18327 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
18328 | + * 02111-1307 USA | |
18329 | + */ | |
18330 | +#include <linux/module.h> | |
18331 | +#include <linux/kprobes.h> | |
18332 | +#include <linux/perfmon_kern.h> | |
18333 | +#include <linux/nmi.h> | |
18334 | +#include <asm/msr.h> | |
18335 | + | |
18336 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
18337 | +MODULE_DESCRIPTION("P6 PMU description table"); | |
18338 | +MODULE_LICENSE("GPL"); | |
18339 | + | |
18340 | +static int force_nmi; | |
18341 | +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); | |
18342 | +module_param(force_nmi, bool, 0600); | |
18343 | + | |
18344 | +/* | |
18345 | + * - upper 32 bits are reserved | |
18346 | + * - INT: APIC enable bit is reserved (forced to 1) | |
18347 | + * - bit 21 is reserved | |
18348 | + * - bit 22 is reserved on PEREVNTSEL1 | |
18349 | + * | |
18350 | + * RSVD: reserved bits are 1 | |
18351 | + */ | |
18352 | +#define PFM_P6_PMC0_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21)) | |
18353 | +#define PFM_P6_PMC1_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (3ULL<<21)) | |
18354 | + | |
18355 | +/* | |
18356 | + * force Local APIC interrupt on overflow | |
18357 | + * disable with NO_EMUL64 | |
18358 | + */ | |
18359 | +#define PFM_P6_PMC_VAL (1ULL<<20) | |
18360 | +#define PFM_P6_NO64 (1ULL<<20) | |
18361 | + | |
18362 | + | |
18363 | +static void __kprobes pfm_p6_quiesce(void); | |
18364 | +static int pfm_p6_has_ovfls(struct pfm_context *ctx); | |
18365 | +static int pfm_p6_stop_save(struct pfm_context *ctx, | |
18366 | + struct pfm_event_set *set); | |
18367 | + | |
18368 | +static u64 enable_mask[PFM_MAX_PMCS]; | |
18369 | +static u16 max_enable; | |
18370 | + | |
18371 | +/* | |
18372 | + * PFM_X86_FL_NO_SHARING: because of the single enable bit on MSR_P6_EVNTSEL0 | |
18373 | + * the PMU cannot be shared with NMI watchdog or Oprofile | |
18374 | + */ | |
18375 | +struct pfm_arch_pmu_info pfm_p6_pmu_info = { | |
18376 | + .stop_save = pfm_p6_stop_save, | |
18377 | + .has_ovfls = pfm_p6_has_ovfls, | |
18378 | + .quiesce = pfm_p6_quiesce, | |
18379 | + .flags = PFM_X86_FL_NO_SHARING, | |
18380 | +}; | |
18381 | + | |
18382 | +static struct pfm_regmap_desc pfm_p6_pmc_desc[] = { | |
18383 | +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC0_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0), | |
18384 | +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC1_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1) | |
18385 | +}; | |
18386 | +#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc) | |
18387 | + | |
18388 | +#define PFM_P6_D(n) \ | |
18389 | + { .type = PFM_REG_C, \ | |
18390 | + .desc = "PERFCTR"#n, \ | |
18391 | + .hw_addr = MSR_P6_PERFCTR0+n, \ | |
18392 | + .rsvd_msk = 0, \ | |
18393 | + .dep_pmcs[0] = 1ULL << n \ | |
18394 | + } | |
18395 | + | |
18396 | +static struct pfm_regmap_desc pfm_p6_pmd_desc[] = { | |
18397 | +/* pmd0 */ PFM_P6_D(0), | |
18398 | +/* pmd1 */ PFM_P6_D(1) | |
18399 | +}; | |
18400 | +#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc) | |
18401 | + | |
18402 | +static int pfm_p6_probe_pmu(void) | |
18403 | +{ | |
18404 | + int high, low; | |
18405 | + | |
18406 | + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | |
18407 | + PFM_INFO("not an Intel processor"); | |
18408 | + return -1; | |
18409 | + } | |
18410 | + | |
18411 | + /* | |
18412 | + * check for P6 processor family | |
18413 | + */ | |
18414 | + if (current_cpu_data.x86 != 6) { | |
18415 | + PFM_INFO("unsupported family=%d", current_cpu_data.x86); | |
18416 | + return -1; | |
18417 | + } | |
18418 | + | |
18419 | + switch (current_cpu_data.x86_model) { | |
18420 | + case 1: /* Pentium Pro */ | |
18421 | + case 3: | |
18422 | + case 5: /* Pentium II Deschutes */ | |
18423 | + case 7 ... 11: | |
18424 | + break; | |
18425 | + case 13: | |
18426 | + /* for Pentium M, we need to check if PMU exist */ | |
18427 | + rdmsr(MSR_IA32_MISC_ENABLE, low, high); | |
18428 | + if (low & (1U << 7)) | |
18429 | + break; | |
18430 | + default: | |
18431 | + PFM_INFO("unsupported CPU model %d", | |
18432 | + current_cpu_data.x86_model); | |
18433 | + return -1; | |
18434 | + | |
18435 | + } | |
18436 | + | |
18437 | + if (!cpu_has_apic) { | |
18438 | + PFM_INFO("no Local APIC, try rebooting with lapic"); | |
18439 | + return -1; | |
18440 | + } | |
18441 | + __set_bit(0, cast_ulp(enable_mask)); | |
18442 | + __set_bit(1, cast_ulp(enable_mask)); | |
18443 | + max_enable = 1 + 1; | |
18444 | + /* | |
18445 | + * force NMI interrupt? | |
18446 | + */ | |
18447 | + if (force_nmi) | |
18448 | + pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI; | |
18449 | + | |
18450 | + return 0; | |
18451 | +} | |
18452 | + | |
18453 | +/** | |
18454 | + * pfm_p6_has_ovfls - check for pending overflow condition | |
18455 | + * @ctx: context to work on | |
18456 | + * | |
18457 | + * detect if counters have overflowed. | |
18458 | + * return: | |
18459 | + * 0 : no overflow | |
18460 | + * 1 : at least one overflow | |
18461 | + */ | |
18462 | +static int __kprobes pfm_p6_has_ovfls(struct pfm_context *ctx) | |
18463 | +{ | |
18464 | + u64 *cnt_mask; | |
18465 | + u64 wmask, val; | |
18466 | + u16 i, num; | |
18467 | + | |
18468 | + cnt_mask = ctx->regs.cnt_pmds; | |
18469 | + num = ctx->regs.num_counters; | |
18470 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
18471 | + | |
18472 | + /* | |
18473 | + * we can leverage the fact that we know the mapping | |
18474 | + * to hardcode the MSR address and avoid accessing | |
18475 | + * more cachelines | |
18476 | + * | |
18477 | + * We need to check cnt_mask because not all registers | |
18478 | + * may be available. | |
18479 | + */ | |
18480 | + for (i = 0; num; i++) { | |
18481 | + if (test_bit(i, cast_ulp(cnt_mask))) { | |
18482 | + rdmsrl(MSR_P6_PERFCTR0+i, val); | |
18483 | + if (!(val & wmask)) | |
18484 | + return 1; | |
18485 | + num--; | |
18486 | + } | |
18487 | + } | |
18488 | + return 0; | |
18489 | +} | |
18490 | + | |
18491 | +/** | |
18492 | + * pfm_p6_stop_save -- stop monitoring and save PMD values | |
18493 | + * @ctx: context to work on | |
18494 | + * @set: current event set | |
18495 | + * | |
18496 | + * return value: | |
18497 | + * 0 - no need to save PMDs in caller | |
18498 | + * 1 - need to save PMDs in caller | |
18499 | + */ | |
18500 | +static int pfm_p6_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) | |
18501 | +{ | |
18502 | + struct pfm_arch_pmu_info *pmu_info; | |
18503 | + u64 used_mask[PFM_PMC_BV]; | |
18504 | + u64 *cnt_pmds; | |
18505 | + u64 val, wmask, ovfl_mask; | |
18506 | + u32 i, count; | |
18507 | + | |
18508 | + pmu_info = pfm_pmu_info(); | |
18509 | + | |
18510 | + wmask = 1ULL << pfm_pmu_conf->counter_width; | |
18511 | + bitmap_and(cast_ulp(used_mask), | |
18512 | + cast_ulp(set->used_pmcs), | |
18513 | + cast_ulp(enable_mask), | |
18514 | + max_enable); | |
18515 | + | |
18516 | + count = bitmap_weight(cast_ulp(used_mask), ctx->regs.max_pmc); | |
18517 | + | |
18518 | + /* | |
18519 | + * stop monitoring | |
18520 | + * Unfortunately, this is very expensive! | |
18521 | + * wrmsrl() is serializing. | |
18522 | + */ | |
18523 | + for (i = 0; count; i++) { | |
18524 | + if (test_bit(i, cast_ulp(used_mask))) { | |
18525 | + wrmsrl(MSR_P6_EVNTSEL0+i, 0); | |
18526 | + count--; | |
18527 | + } | |
18528 | + } | |
18529 | + | |
18530 | + /* | |
18531 | + * if we already having a pending overflow condition, we simply | |
18532 | + * return to take care of this first. | |
18533 | + */ | |
18534 | + if (set->npend_ovfls) | |
18535 | + return 1; | |
18536 | + | |
18537 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
18538 | + cnt_pmds = ctx->regs.cnt_pmds; | |
18539 | + | |
18540 | + /* | |
18541 | + * check for pending overflows and save PMDs (combo) | |
18542 | + * we employ used_pmds because we also need to save | |
18543 | + * and not just check for pending interrupts. | |
18544 | + * | |
18545 | + * Must check for counting PMDs because of virtual PMDs | |
18546 | + */ | |
18547 | + count = set->nused_pmds; | |
18548 | + for (i = 0; count; i++) { | |
18549 | + if (test_bit(i, cast_ulp(set->used_pmds))) { | |
18550 | + val = pfm_arch_read_pmd(ctx, i); | |
18551 | + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { | |
18552 | + if (!(val & wmask)) { | |
18553 | + __set_bit(i, cast_ulp(set->povfl_pmds)); | |
18554 | + set->npend_ovfls++; | |
18555 | + } | |
18556 | + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); | |
18557 | + } | |
18558 | + set->pmds[i].value = val; | |
18559 | + count--; | |
18560 | + } | |
18561 | + } | |
18562 | + /* 0 means: no need to save PMDs at upper level */ | |
18563 | + return 0; | |
18564 | +} | |
18565 | + | |
18566 | +/** | |
18567 | + * pfm_p6_quiesce_pmu -- stop monitoring without grabbing any lock | |
18568 | + * | |
18569 | + * called from NMI interrupt handler to immediately stop monitoring | |
18570 | + * cannot grab any lock, including perfmon related locks | |
18571 | + */ | |
18572 | +static void __kprobes pfm_p6_quiesce(void) | |
18573 | +{ | |
18574 | + /* | |
18575 | + * quiesce PMU by clearing available registers that have | |
18576 | + * the start/stop capability | |
18577 | + * | |
18578 | + * P6 processors only have enable bit on PERFEVTSEL0 | |
18579 | + */ | |
18580 | + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) | |
18581 | + wrmsrl(MSR_P6_EVNTSEL0, 0); | |
18582 | +} | |
18583 | + | |
18584 | +/* | |
18585 | + * Counters have 40 bits implemented. However they are designed such | |
18586 | + * that bits [32-39] are sign extensions of bit 31. As such the | |
18587 | + * effective width of a counter for P6-like PMU is 31 bits only. | |
18588 | + * | |
18589 | + * See IA-32 Intel Architecture Software developer manual Vol 3B | |
18590 | + */ | |
18591 | +static struct pfm_pmu_config pfm_p6_pmu_conf = { | |
18592 | + .pmu_name = "Intel P6 processor Family", | |
18593 | + .counter_width = 31, | |
18594 | + .pmd_desc = pfm_p6_pmd_desc, | |
18595 | + .pmc_desc = pfm_p6_pmc_desc, | |
18596 | + .num_pmc_entries = PFM_P6_NUM_PMCS, | |
18597 | + .num_pmd_entries = PFM_P6_NUM_PMDS, | |
18598 | + .probe_pmu = pfm_p6_probe_pmu, | |
18599 | + .version = "1.0", | |
18600 | + .flags = PFM_PMU_BUILTIN_FLAG, | |
18601 | + .owner = THIS_MODULE, | |
18602 | + .pmu_info = &pfm_p6_pmu_info | |
18603 | +}; | |
18604 | + | |
18605 | +static int __init pfm_p6_pmu_init_module(void) | |
18606 | +{ | |
18607 | + return pfm_pmu_register(&pfm_p6_pmu_conf); | |
18608 | +} | |
18609 | + | |
18610 | +static void __exit pfm_p6_pmu_cleanup_module(void) | |
18611 | +{ | |
18612 | + pfm_pmu_unregister(&pfm_p6_pmu_conf); | |
18613 | +} | |
18614 | + | |
18615 | +module_init(pfm_p6_pmu_init_module); | |
18616 | +module_exit(pfm_p6_pmu_cleanup_module); | |
18617 | --- /dev/null | |
18618 | +++ b/arch/x86/perfmon/perfmon_pebs_core_smpl.c | |
18619 | @@ -0,0 +1,256 @@ | |
18620 | +/* | |
18621 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
18622 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
18623 | + * | |
18624 | + * This file implements the Precise Event Based Sampling (PEBS) | |
18625 | + * sampling format for Intel Core and Atom processors. | |
18626 | + * | |
18627 | + * This program is free software; you can redistribute it and/or | |
18628 | + * modify it under the terms of version 2 of the GNU General Public | |
18629 | + * License as published by the Free Software Foundation. | |
18630 | + * | |
18631 | + * This program is distributed in the hope that it will be useful, | |
18632 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18633 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18634 | + * General Public License for more details. | |
18635 | + * | |
18636 | + * You should have received a copy of the GNU General Public License | |
18637 | + * along with this program; if not, write to the Free Software | |
18638 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
18639 | + * 02111-1307 USA | |
18640 | + */ | |
18641 | +#include <linux/kernel.h> | |
18642 | +#include <linux/types.h> | |
18643 | +#include <linux/module.h> | |
18644 | +#include <linux/init.h> | |
18645 | +#include <linux/smp.h> | |
18646 | +#include <linux/perfmon_kern.h> | |
18647 | + | |
18648 | +#include <asm/msr.h> | |
18649 | +#include <asm/perfmon_pebs_core_smpl.h> | |
18650 | + | |
18651 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
18652 | +MODULE_DESCRIPTION("Intel Core Precise Event-Based Sampling (PEBS)"); | |
18653 | +MODULE_LICENSE("GPL"); | |
18654 | + | |
18655 | +#define ALIGN_PEBS(a, order) \ | |
18656 | + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1) | |
18657 | + | |
18658 | +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */ | |
18659 | + | |
18660 | +static int pfm_pebs_core_fmt_validate(u32 flags, u16 npmds, void *data) | |
18661 | +{ | |
18662 | + struct pfm_pebs_core_smpl_arg *arg = data; | |
18663 | + size_t min_buf_size; | |
18664 | + | |
18665 | + /* | |
18666 | + * need to define at least the size of the buffer | |
18667 | + */ | |
18668 | + if (data == NULL) { | |
18669 | + PFM_DBG("no argument passed"); | |
18670 | + return -EINVAL; | |
18671 | + } | |
18672 | + | |
18673 | + /* | |
18674 | + * compute min buf size. npmds is the maximum number | |
18675 | + * of implemented PMD registers. | |
18676 | + */ | |
18677 | + min_buf_size = sizeof(struct pfm_pebs_core_smpl_hdr) | |
18678 | + + sizeof(struct pfm_pebs_core_smpl_entry) | |
18679 | + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */ | |
18680 | + | |
18681 | + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu", | |
18682 | + flags, | |
18683 | + min_buf_size, | |
18684 | + arg->buf_size); | |
18685 | + | |
18686 | + /* | |
18687 | + * must hold at least the buffer header + one minimally sized entry | |
18688 | + */ | |
18689 | + if (arg->buf_size < min_buf_size) | |
18690 | + return -EINVAL; | |
18691 | + | |
18692 | + return 0; | |
18693 | +} | |
18694 | + | |
18695 | +static int pfm_pebs_core_fmt_get_size(unsigned int flags, void *data, size_t *size) | |
18696 | +{ | |
18697 | + struct pfm_pebs_core_smpl_arg *arg = data; | |
18698 | + | |
18699 | + /* | |
18700 | + * size has been validated in pfm_pebs_core_fmt_validate() | |
18701 | + */ | |
18702 | + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER); | |
18703 | + | |
18704 | + return 0; | |
18705 | +} | |
18706 | + | |
18707 | +static int pfm_pebs_core_fmt_init(struct pfm_context *ctx, void *buf, | |
18708 | + u32 flags, u16 npmds, void *data) | |
18709 | +{ | |
18710 | + struct pfm_arch_context *ctx_arch; | |
18711 | + struct pfm_pebs_core_smpl_hdr *hdr; | |
18712 | + struct pfm_pebs_core_smpl_arg *arg = data; | |
18713 | + u64 pebs_start, pebs_end; | |
18714 | + struct pfm_ds_area_core *ds; | |
18715 | + | |
18716 | + ctx_arch = pfm_ctx_arch(ctx); | |
18717 | + | |
18718 | + hdr = buf; | |
18719 | + ds = &hdr->ds; | |
18720 | + | |
18721 | + /* | |
18722 | + * align PEBS buffer base | |
18723 | + */ | |
18724 | + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER); | |
18725 | + pebs_end = pebs_start + arg->buf_size + 1; | |
18726 | + | |
18727 | + hdr->version = PFM_PEBS_CORE_SMPL_VERSION; | |
18728 | + hdr->buf_size = arg->buf_size; | |
18729 | + hdr->overflows = 0; | |
18730 | + | |
18731 | + /* | |
18732 | + * express PEBS buffer base as offset from the end of the header | |
18733 | + */ | |
18734 | + hdr->start_offs = pebs_start - (unsigned long)(hdr+1); | |
18735 | + | |
18736 | + /* | |
18737 | + * PEBS buffer boundaries | |
18738 | + */ | |
18739 | + ds->pebs_buf_base = pebs_start; | |
18740 | + ds->pebs_abs_max = pebs_end; | |
18741 | + | |
18742 | + /* | |
18743 | + * PEBS starting position | |
18744 | + */ | |
18745 | + ds->pebs_index = pebs_start; | |
18746 | + | |
18747 | + /* | |
18748 | + * PEBS interrupt threshold | |
18749 | + */ | |
18750 | + ds->pebs_intr_thres = pebs_start | |
18751 | + + arg->intr_thres | |
18752 | + * sizeof(struct pfm_pebs_core_smpl_entry); | |
18753 | + | |
18754 | + /* | |
18755 | + * save counter reset value for PEBS counter | |
18756 | + */ | |
18757 | + ds->pebs_cnt_reset = arg->cnt_reset; | |
18758 | + | |
18759 | + /* | |
18760 | + * keep track of DS AREA | |
18761 | + */ | |
18762 | + ctx_arch->ds_area = ds; | |
18763 | + ctx_arch->flags.use_ds = 1; | |
18764 | + ctx_arch->flags.use_pebs = 1; | |
18765 | + | |
18766 | + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%llx " | |
18767 | + "pebs_end=0x%llx ds=%p pebs_thres=0x%llx cnt_reset=0x%llx", | |
18768 | + buf, | |
18769 | + (unsigned long long)hdr->buf_size, | |
18770 | + (unsigned long long)hdr->start_offs, | |
18771 | + (unsigned long long)pebs_start, | |
18772 | + (unsigned long long)pebs_end, | |
18773 | + ds, | |
18774 | + (unsigned long long)ds->pebs_intr_thres, | |
18775 | + (unsigned long long)ds->pebs_cnt_reset); | |
18776 | + | |
18777 | + return 0; | |
18778 | +} | |
18779 | + | |
18780 | +static int pfm_pebs_core_fmt_handler(struct pfm_context *ctx, | |
18781 | + unsigned long ip, u64 tstamp, void *data) | |
18782 | +{ | |
18783 | + struct pfm_pebs_core_smpl_hdr *hdr; | |
18784 | + struct pfm_ovfl_arg *arg; | |
18785 | + | |
18786 | + hdr = ctx->smpl_addr; | |
18787 | + arg = &ctx->ovfl_arg; | |
18788 | + | |
18789 | + PFM_DBG_ovfl("buffer full"); | |
18790 | + /* | |
18791 | + * increment number of buffer overflows. | |
18792 | + * important to detect duplicate set of samples. | |
18793 | + */ | |
18794 | + hdr->overflows++; | |
18795 | + | |
18796 | + /* | |
18797 | + * request notification and masking of monitoring. | |
18798 | + * Notification is still subject to the overflowed | |
18799 | + * register having the FL_NOTIFY flag set. | |
18800 | + */ | |
18801 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; | |
18802 | + | |
18803 | + return -ENOBUFS; /* we are full, sorry */ | |
18804 | +} | |
18805 | + | |
18806 | +static int pfm_pebs_core_fmt_restart(int is_active, u32 *ovfl_ctrl, | |
18807 | + void *buf) | |
18808 | +{ | |
18809 | + struct pfm_pebs_core_smpl_hdr *hdr = buf; | |
18810 | + | |
18811 | + /* | |
18812 | + * reset index to base of buffer | |
18813 | + */ | |
18814 | + hdr->ds.pebs_index = hdr->ds.pebs_buf_base; | |
18815 | + | |
18816 | + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
18817 | + | |
18818 | + return 0; | |
18819 | +} | |
18820 | + | |
18821 | +static int pfm_pebs_core_fmt_exit(void *buf) | |
18822 | +{ | |
18823 | + return 0; | |
18824 | +} | |
18825 | + | |
18826 | +static struct pfm_smpl_fmt pebs_core_fmt = { | |
18827 | + .fmt_name = PFM_PEBS_CORE_SMPL_NAME, | |
18828 | + .fmt_version = 0x1, | |
18829 | + .fmt_arg_size = sizeof(struct pfm_pebs_core_smpl_arg), | |
18830 | + .fmt_validate = pfm_pebs_core_fmt_validate, | |
18831 | + .fmt_getsize = pfm_pebs_core_fmt_get_size, | |
18832 | + .fmt_init = pfm_pebs_core_fmt_init, | |
18833 | + .fmt_handler = pfm_pebs_core_fmt_handler, | |
18834 | + .fmt_restart = pfm_pebs_core_fmt_restart, | |
18835 | + .fmt_exit = pfm_pebs_core_fmt_exit, | |
18836 | + .fmt_flags = PFM_FMT_BUILTIN_FLAG, | |
18837 | + .owner = THIS_MODULE, | |
18838 | +}; | |
18839 | + | |
18840 | +static int __init pfm_pebs_core_fmt_init_module(void) | |
18841 | +{ | |
18842 | + if (!cpu_has_pebs) { | |
18843 | + PFM_INFO("processor does not have PEBS support"); | |
18844 | + return -1; | |
18845 | + } | |
18846 | + /* | |
18847 | + * cpu_has_pebs is not enough to identify Intel Core PEBS | |
18848 | + * which is different fro Pentium 4 PEBS. Therefore we do | |
18849 | + * a more detailed check here | |
18850 | + */ | |
18851 | + if (current_cpu_data.x86 != 6) { | |
18852 | + PFM_INFO("not a supported Intel processor"); | |
18853 | + return -1; | |
18854 | + } | |
18855 | + | |
18856 | + switch (current_cpu_data.x86_model) { | |
18857 | + case 15: /* Merom */ | |
18858 | + case 23: /* Penryn */ | |
18859 | + case 28: /* Atom (Silverthorne) */ | |
18860 | + case 29: /* Dunnington */ | |
18861 | + break; | |
18862 | + default: | |
18863 | + PFM_INFO("not a supported Intel processor"); | |
18864 | + return -1; | |
18865 | + } | |
18866 | + return pfm_fmt_register(&pebs_core_fmt); | |
18867 | +} | |
18868 | + | |
18869 | +static void __exit pfm_pebs_core_fmt_cleanup_module(void) | |
18870 | +{ | |
18871 | + pfm_fmt_unregister(&pebs_core_fmt); | |
18872 | +} | |
18873 | + | |
18874 | +module_init(pfm_pebs_core_fmt_init_module); | |
18875 | +module_exit(pfm_pebs_core_fmt_cleanup_module); | |
18876 | --- /dev/null | |
18877 | +++ b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c | |
18878 | @@ -0,0 +1,253 @@ | |
18879 | +/* | |
18880 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
18881 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
18882 | + * | |
18883 | + * This file implements the Precise Event Based Sampling (PEBS) | |
18884 | + * sampling format. It supports the following processors: | |
18885 | + * - 32-bit Pentium 4 or other Netburst-based processors | |
18886 | + * - 64-bit Pentium 4 or other Netburst-based processors | |
18887 | + * | |
18888 | + * This program is free software; you can redistribute it and/or | |
18889 | + * modify it under the terms of version 2 of the GNU General Public | |
18890 | + * License as published by the Free Software Foundation. | |
18891 | + * | |
18892 | + * This program is distributed in the hope that it will be useful, | |
18893 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18894 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18895 | + * General Public License for more details. | |
18896 | + * | |
18897 | + * You should have received a copy of the GNU General Public License | |
18898 | + * along with this program; if not, write to the Free Software | |
18899 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
18900 | + * 02111-1307 USA | |
18901 | + */ | |
18902 | +#include <linux/kernel.h> | |
18903 | +#include <linux/types.h> | |
18904 | +#include <linux/module.h> | |
18905 | +#include <linux/init.h> | |
18906 | +#include <linux/smp.h> | |
18907 | +#include <linux/perfmon_kern.h> | |
18908 | + | |
18909 | +#include <asm/msr.h> | |
18910 | +#include <asm/perfmon_pebs_p4_smpl.h> | |
18911 | + | |
18912 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
18913 | +MODULE_DESCRIPTION("Intel P4 Precise Event-Based Sampling (PEBS)"); | |
18914 | +MODULE_LICENSE("GPL"); | |
18915 | + | |
18916 | +#define ALIGN_PEBS(a, order) \ | |
18917 | + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1) | |
18918 | + | |
18919 | +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */ | |
18920 | + | |
18921 | +static int pfm_pebs_p4_fmt_validate(u32 flags, u16 npmds, void *data) | |
18922 | +{ | |
18923 | + struct pfm_pebs_p4_smpl_arg *arg = data; | |
18924 | + size_t min_buf_size; | |
18925 | + | |
18926 | + /* | |
18927 | + * need to define at least the size of the buffer | |
18928 | + */ | |
18929 | + if (data == NULL) { | |
18930 | + PFM_DBG("no argument passed"); | |
18931 | + return -EINVAL; | |
18932 | + } | |
18933 | + | |
18934 | + /* | |
18935 | + * compute min buf size. npmds is the maximum number | |
18936 | + * of implemented PMD registers. | |
18937 | + */ | |
18938 | + min_buf_size = sizeof(struct pfm_pebs_p4_smpl_hdr) | |
18939 | + + sizeof(struct pfm_pebs_p4_smpl_entry) | |
18940 | + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */ | |
18941 | + | |
18942 | + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu", | |
18943 | + flags, | |
18944 | + min_buf_size, | |
18945 | + arg->buf_size); | |
18946 | + | |
18947 | + /* | |
18948 | + * must hold at least the buffer header + one minimally sized entry | |
18949 | + */ | |
18950 | + if (arg->buf_size < min_buf_size) | |
18951 | + return -EINVAL; | |
18952 | + | |
18953 | + return 0; | |
18954 | +} | |
18955 | + | |
18956 | +static int pfm_pebs_p4_fmt_get_size(unsigned int flags, void *data, size_t *size) | |
18957 | +{ | |
18958 | + struct pfm_pebs_p4_smpl_arg *arg = data; | |
18959 | + | |
18960 | + /* | |
18961 | + * size has been validated in pfm_pebs_p4_fmt_validate() | |
18962 | + */ | |
18963 | + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER); | |
18964 | + | |
18965 | + return 0; | |
18966 | +} | |
18967 | + | |
18968 | +static int pfm_pebs_p4_fmt_init(struct pfm_context *ctx, void *buf, | |
18969 | + u32 flags, u16 npmds, void *data) | |
18970 | +{ | |
18971 | + struct pfm_arch_context *ctx_arch; | |
18972 | + struct pfm_pebs_p4_smpl_hdr *hdr; | |
18973 | + struct pfm_pebs_p4_smpl_arg *arg = data; | |
18974 | + unsigned long pebs_start, pebs_end; | |
18975 | + struct pfm_ds_area_p4 *ds; | |
18976 | + | |
18977 | + ctx_arch = pfm_ctx_arch(ctx); | |
18978 | + | |
18979 | + hdr = buf; | |
18980 | + ds = &hdr->ds; | |
18981 | + | |
18982 | + /* | |
18983 | + * align PEBS buffer base | |
18984 | + */ | |
18985 | + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER); | |
18986 | + pebs_end = pebs_start + arg->buf_size + 1; | |
18987 | + | |
18988 | + hdr->version = PFM_PEBS_P4_SMPL_VERSION; | |
18989 | + hdr->buf_size = arg->buf_size; | |
18990 | + hdr->overflows = 0; | |
18991 | + | |
18992 | + /* | |
18993 | + * express PEBS buffer base as offset from the end of the header | |
18994 | + */ | |
18995 | + hdr->start_offs = pebs_start - (unsigned long)(hdr+1); | |
18996 | + | |
18997 | + /* | |
18998 | + * PEBS buffer boundaries | |
18999 | + */ | |
19000 | + ds->pebs_buf_base = pebs_start; | |
19001 | + ds->pebs_abs_max = pebs_end; | |
19002 | + | |
19003 | + /* | |
19004 | + * PEBS starting position | |
19005 | + */ | |
19006 | + ds->pebs_index = pebs_start; | |
19007 | + | |
19008 | + /* | |
19009 | + * PEBS interrupt threshold | |
19010 | + */ | |
19011 | + ds->pebs_intr_thres = pebs_start | |
19012 | + + arg->intr_thres * sizeof(struct pfm_pebs_p4_smpl_entry); | |
19013 | + | |
19014 | + /* | |
19015 | + * save counter reset value for PEBS counter | |
19016 | + */ | |
19017 | + ds->pebs_cnt_reset = arg->cnt_reset; | |
19018 | + | |
19019 | + /* | |
19020 | + * keep track of DS AREA | |
19021 | + */ | |
19022 | + ctx_arch->ds_area = ds; | |
19023 | + ctx_arch->flags.use_pebs = 1; | |
19024 | + ctx_arch->flags.use_ds = 1; | |
19025 | + | |
19026 | + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx " | |
19027 | + "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx", | |
19028 | + buf, | |
19029 | + (unsigned long long)hdr->buf_size, | |
19030 | + (unsigned long long)hdr->start_offs, | |
19031 | + pebs_start, | |
19032 | + pebs_end, | |
19033 | + ds, | |
19034 | + ds->pebs_intr_thres, | |
19035 | + (unsigned long long)ds->pebs_cnt_reset); | |
19036 | + | |
19037 | + return 0; | |
19038 | +} | |
19039 | + | |
19040 | +static int pfm_pebs_p4_fmt_handler(struct pfm_context *ctx, | |
19041 | + unsigned long ip, u64 tstamp, void *data) | |
19042 | +{ | |
19043 | + struct pfm_pebs_p4_smpl_hdr *hdr; | |
19044 | + struct pfm_ovfl_arg *arg; | |
19045 | + | |
19046 | + hdr = ctx->smpl_addr; | |
19047 | + arg = &ctx->ovfl_arg; | |
19048 | + | |
19049 | + PFM_DBG_ovfl("buffer full"); | |
19050 | + /* | |
19051 | + * increment number of buffer overflows. | |
19052 | + * important to detect duplicate set of samples. | |
19053 | + */ | |
19054 | + hdr->overflows++; | |
19055 | + | |
19056 | + /* | |
19057 | + * request notification and masking of monitoring. | |
19058 | + * Notification is still subject to the overflowed | |
19059 | + * register having the FL_NOTIFY flag set. | |
19060 | + */ | |
19061 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; | |
19062 | + | |
19063 | + return -ENOBUFS; /* we are full, sorry */ | |
19064 | +} | |
19065 | + | |
19066 | +static int pfm_pebs_p4_fmt_restart(int is_active, u32 *ovfl_ctrl, | |
19067 | + void *buf) | |
19068 | +{ | |
19069 | + struct pfm_pebs_p4_smpl_hdr *hdr = buf; | |
19070 | + | |
19071 | + /* | |
19072 | + * reset index to base of buffer | |
19073 | + */ | |
19074 | + hdr->ds.pebs_index = hdr->ds.pebs_buf_base; | |
19075 | + | |
19076 | + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
19077 | + | |
19078 | + return 0; | |
19079 | +} | |
19080 | + | |
19081 | +static int pfm_pebs_p4_fmt_exit(void *buf) | |
19082 | +{ | |
19083 | + return 0; | |
19084 | +} | |
19085 | + | |
19086 | +static struct pfm_smpl_fmt pebs_p4_fmt = { | |
19087 | + .fmt_name = PFM_PEBS_P4_SMPL_NAME, | |
19088 | + .fmt_version = 0x1, | |
19089 | + .fmt_arg_size = sizeof(struct pfm_pebs_p4_smpl_arg), | |
19090 | + .fmt_validate = pfm_pebs_p4_fmt_validate, | |
19091 | + .fmt_getsize = pfm_pebs_p4_fmt_get_size, | |
19092 | + .fmt_init = pfm_pebs_p4_fmt_init, | |
19093 | + .fmt_handler = pfm_pebs_p4_fmt_handler, | |
19094 | + .fmt_restart = pfm_pebs_p4_fmt_restart, | |
19095 | + .fmt_exit = pfm_pebs_p4_fmt_exit, | |
19096 | + .fmt_flags = PFM_FMT_BUILTIN_FLAG, | |
19097 | + .owner = THIS_MODULE, | |
19098 | +}; | |
19099 | + | |
19100 | +static int __init pfm_pebs_p4_fmt_init_module(void) | |
19101 | +{ | |
19102 | + int ht_enabled; | |
19103 | + | |
19104 | + if (!cpu_has_pebs) { | |
19105 | + PFM_INFO("processor does not have PEBS support"); | |
19106 | + return -1; | |
19107 | + } | |
19108 | + if (current_cpu_data.x86 != 15) { | |
19109 | + PFM_INFO("not an Intel Pentium 4"); | |
19110 | + return -1; | |
19111 | + } | |
19112 | +#ifdef CONFIG_SMP | |
19113 | + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map)) | |
19114 | + / current_cpu_data.x86_max_cores) > 1; | |
19115 | +#else | |
19116 | + ht_enabled = 0; | |
19117 | +#endif | |
19118 | + if (ht_enabled) { | |
19119 | + PFM_INFO("PEBS not available because HyperThreading is on"); | |
19120 | + return -1; | |
19121 | + } | |
19122 | + return pfm_fmt_register(&pebs_p4_fmt); | |
19123 | +} | |
19124 | + | |
19125 | +static void __exit pfm_pebs_p4_fmt_cleanup_module(void) | |
19126 | +{ | |
19127 | + pfm_fmt_unregister(&pebs_p4_fmt); | |
19128 | +} | |
19129 | + | |
19130 | +module_init(pfm_pebs_p4_fmt_init_module); | |
19131 | +module_exit(pfm_pebs_p4_fmt_cleanup_module); | |
19132 | --- a/include/asm-mips/Kbuild | |
19133 | +++ b/include/asm-mips/Kbuild | |
19134 | @@ -1,3 +1,4 @@ | |
19135 | include include/asm-generic/Kbuild.asm | |
19136 | ||
19137 | header-y += cachectl.h sgidefs.h sysmips.h | |
19138 | +header-y += perfmon.h | |
19139 | --- /dev/null | |
19140 | +++ b/include/asm-mips/perfmon.h | |
19141 | @@ -0,0 +1,34 @@ | |
19142 | +/* | |
19143 | + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. | |
19144 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
19145 | + * | |
19146 | + * This file contains mips64 specific definitions for the perfmon | |
19147 | + * interface. | |
19148 | + * | |
19149 | + * This file MUST never be included directly. Use linux/perfmon.h. | |
19150 | + * | |
19151 | + * This program is free software; you can redistribute it and/or | |
19152 | + * modify it under the terms of version 2 of the GNU General Public | |
19153 | + * License as published by the Free Software Foundation. | |
19154 | + * | |
19155 | + * This program is distributed in the hope that it will be useful, | |
19156 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19157 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19158 | + * General Public License for more details. | |
19159 | + * | |
19160 | + * You should have received a copy of the GNU General Public License | |
19161 | + * along with this program; if not, write to the Free Software | |
19162 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19163 | + * 02111-1307 USA | |
19164 | + */ | |
19165 | +#ifndef _ASM_MIPS64_PERFMON_H_ | |
19166 | +#define _ASM_MIPS64_PERFMON_H_ | |
19167 | + | |
19168 | +/* | |
19169 | + * arch-specific user visible interface definitions | |
19170 | + */ | |
19171 | + | |
19172 | +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ | |
19173 | +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ | |
19174 | + | |
19175 | +#endif /* _ASM_MIPS64_PERFMON_H_ */ | |
19176 | --- /dev/null | |
19177 | +++ b/include/asm-mips/perfmon_kern.h | |
19178 | @@ -0,0 +1,412 @@ | |
19179 | +/* | |
19180 | + * Copyright (c) 2005 Philip Mucci. | |
19181 | + * | |
19182 | + * Based on other versions: | |
19183 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
19184 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
19185 | + * | |
19186 | + * This file contains mips64 specific definitions for the perfmon | |
19187 | + * interface. | |
19188 | + * | |
19189 | + * This program is free software; you can redistribute it and/or | |
19190 | + * modify it under the terms of version 2 of the GNU General Public | |
19191 | + * License as published by the Free Software Foundation. | |
19192 | + * | |
19193 | + * This program is distributed in the hope that it will be useful, | |
19194 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19195 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19196 | + * General Public License for more details. | |
19197 | + * | |
19198 | + * You should have received a copy of the GNU General Public License | |
19199 | + * along with this program; if not, write to the Free Software | |
19200 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19201 | + * 02111-1307 USA | |
19202 | + */ | |
19203 | +#ifndef _ASM_MIPS64_PERFMON_KERN_H_ | |
19204 | +#define _ASM_MIPS64_PERFMON_KERN_H_ | |
19205 | + | |
19206 | +#ifdef __KERNEL__ | |
19207 | + | |
19208 | +#ifdef CONFIG_PERFMON | |
19209 | +#include <linux/unistd.h> | |
19210 | +#include <asm/cacheflush.h> | |
19211 | + | |
19212 | +#define PFM_ARCH_PMD_STK_ARG 2 | |
19213 | +#define PFM_ARCH_PMC_STK_ARG 2 | |
19214 | + | |
19215 | +struct pfm_arch_pmu_info { | |
19216 | + u32 pmu_style; | |
19217 | +}; | |
19218 | + | |
19219 | +#define MIPS64_CONFIG_PMC_MASK (1 << 4) | |
19220 | +#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4) | |
19221 | +#define MIPS64_PMC_CNT_ENABLE_MASK (0xf) | |
19222 | +#define MIPS64_PMC_EVT_MASK (0x7 << 6) | |
19223 | +#define MIPS64_PMC_CTR_MASK (1 << 31) | |
19224 | +#define MIPS64_PMD_INTERRUPT (1 << 31) | |
19225 | + | |
19226 | +/* Coprocessor register 25 contains the PMU interface. */ | |
19227 | +/* Sel 0 is control for counter 0 */ | |
19228 | +/* Sel 1 is count for counter 0. */ | |
19229 | +/* Sel 2 is control for counter 1. */ | |
19230 | +/* Sel 3 is count for counter 1. */ | |
19231 | + | |
19232 | +/* | |
19233 | + | |
19234 | +31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 | |
19235 | +M 0--------------------------------------------------------------0 Event-- IE U S K EXL | |
19236 | + | |
19237 | +M 31 If this bit is one, another pair of Performance Control | |
19238 | +and Counter registers is implemented at a MTC0 | |
19239 | + | |
19240 | +Event 8:5 Counter event enabled for this counter. Possible events | |
19241 | +are listed in Table 6-30. R/W Undefined | |
19242 | + | |
19243 | +IE 4 Counter Interrupt Enable. This bit masks bit 31 of the | |
19244 | +associated count register from the interrupt exception | |
19245 | +request output. R/W 0 | |
19246 | + | |
19247 | +U 3 Count in User Mode. When this bit is set, the specified | |
19248 | +event is counted in User Mode. R/W Undefined | |
19249 | + | |
19250 | +S 2 Count in Supervisor Mode. When this bit is set, the | |
19251 | +specified event is counted in Supervisor Mode. R/W Undefined | |
19252 | + | |
19253 | +K 1 Count in Kernel Mode. When this bit is set, count the | |
19254 | +event in Kernel Mode when EXL and ERL both are 0. R/W Undefined | |
19255 | + | |
19256 | +EXL 0 Count when EXL. When this bit is set, count the event | |
19257 | +when EXL = 1 and ERL = 0. R/W Undefined | |
19258 | +*/ | |
19259 | + | |
19260 | +static inline void pfm_arch_resend_irq(struct pfm_context *ctx) | |
19261 | +{} | |
19262 | + | |
19263 | +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, | |
19264 | + struct pfm_event_set *set) | |
19265 | +{} | |
19266 | + | |
19267 | +static inline void pfm_arch_serialize(void) | |
19268 | +{} | |
19269 | + | |
19270 | + | |
19271 | +/* | |
19272 | + * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus | |
19273 | + * this routine needs to do it when switching sets on overflow | |
19274 | + */ | |
19275 | +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, | |
19276 | + struct pfm_event_set *set) | |
19277 | +{ | |
19278 | + pfm_save_pmds(ctx, set); | |
19279 | +} | |
19280 | + | |
19281 | +static inline void pfm_arch_write_pmc(struct pfm_context *ctx, | |
19282 | + unsigned int cnum, u64 value) | |
19283 | +{ | |
19284 | + /* | |
19285 | + * we only write to the actual register when monitoring is | |
19286 | + * active (pfm_start was issued) | |
19287 | + */ | |
19288 | + if (ctx && (ctx->flags.started == 0)) | |
19289 | + return; | |
19290 | + | |
19291 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
19292 | + case 0: | |
19293 | + write_c0_perfctrl0(value); | |
19294 | + break; | |
19295 | + case 1: | |
19296 | + write_c0_perfctrl1(value); | |
19297 | + break; | |
19298 | + case 2: | |
19299 | + write_c0_perfctrl2(value); | |
19300 | + break; | |
19301 | + case 3: | |
19302 | + write_c0_perfctrl3(value); | |
19303 | + break; | |
19304 | + default: | |
19305 | + BUG(); | |
19306 | + } | |
19307 | +} | |
19308 | + | |
19309 | +static inline void pfm_arch_write_pmd(struct pfm_context *ctx, | |
19310 | + unsigned int cnum, u64 value) | |
19311 | +{ | |
19312 | + value &= pfm_pmu_conf->ovfl_mask; | |
19313 | + | |
19314 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
19315 | + case 0: | |
19316 | + write_c0_perfcntr0(value); | |
19317 | + break; | |
19318 | + case 1: | |
19319 | + write_c0_perfcntr1(value); | |
19320 | + break; | |
19321 | + case 2: | |
19322 | + write_c0_perfcntr2(value); | |
19323 | + break; | |
19324 | + case 3: | |
19325 | + write_c0_perfcntr3(value); | |
19326 | + break; | |
19327 | + default: | |
19328 | + BUG(); | |
19329 | + } | |
19330 | +} | |
19331 | + | |
19332 | +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
19333 | +{ | |
19334 | + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { | |
19335 | + case 0: | |
19336 | + return read_c0_perfcntr0(); | |
19337 | + break; | |
19338 | + case 1: | |
19339 | + return read_c0_perfcntr1(); | |
19340 | + break; | |
19341 | + case 2: | |
19342 | + return read_c0_perfcntr2(); | |
19343 | + break; | |
19344 | + case 3: | |
19345 | + return read_c0_perfcntr3(); | |
19346 | + break; | |
19347 | + default: | |
19348 | + BUG(); | |
19349 | + return 0; | |
19350 | + } | |
19351 | +} | |
19352 | + | |
19353 | +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) | |
19354 | +{ | |
19355 | + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { | |
19356 | + case 0: | |
19357 | + return read_c0_perfctrl0(); | |
19358 | + break; | |
19359 | + case 1: | |
19360 | + return read_c0_perfctrl1(); | |
19361 | + break; | |
19362 | + case 2: | |
19363 | + return read_c0_perfctrl2(); | |
19364 | + break; | |
19365 | + case 3: | |
19366 | + return read_c0_perfctrl3(); | |
19367 | + break; | |
19368 | + default: | |
19369 | + BUG(); | |
19370 | + return 0; | |
19371 | + } | |
19372 | +} | |
19373 | + | |
19374 | +/* | |
19375 | + * For some CPUs, the upper bits of a counter must be set in order for the | |
19376 | + * overflow interrupt to happen. On overflow, the counter has wrapped around, | |
19377 | + * and the upper bits are cleared. This function may be used to set them back. | |
19378 | + */ | |
19379 | +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, | |
19380 | + unsigned int cnum) | |
19381 | +{ | |
19382 | + u64 val; | |
19383 | + val = pfm_arch_read_pmd(ctx, cnum); | |
19384 | + /* This masks out overflow bit 31 */ | |
19385 | + pfm_arch_write_pmd(ctx, cnum, val); | |
19386 | +} | |
19387 | + | |
19388 | +/* | |
19389 | + * At certain points, perfmon needs to know if monitoring has been | |
19390 | + * explicitely started/stopped by user via pfm_start/pfm_stop. The | |
19391 | + * information is tracked in ctx.flags.started. However on certain | |
19392 | + * architectures, it may be possible to start/stop directly from | |
19393 | + * user level with a single assembly instruction bypassing | |
19394 | + * the kernel. This function must be used to determine by | |
19395 | + * an arch-specific mean if monitoring is actually started/stopped. | |
19396 | + */ | |
19397 | +static inline int pfm_arch_is_active(struct pfm_context *ctx) | |
19398 | +{ | |
19399 | + return ctx->flags.started; | |
19400 | +} | |
19401 | + | |
19402 | +static inline void pfm_arch_ctxswout_sys(struct task_struct *task, | |
19403 | + struct pfm_context *ctx) | |
19404 | +{} | |
19405 | + | |
19406 | +static inline void pfm_arch_ctxswin_sys(struct task_struct *task, | |
19407 | + struct pfm_context *ctx) | |
19408 | +{} | |
19409 | + | |
19410 | +static inline void pfm_arch_ctxswin_thread(struct task_struct *task, | |
19411 | + struct pfm_context *ctx) | |
19412 | +{} | |
19413 | +int pfm_arch_ctxswout_thread(struct task_struct *task, | |
19414 | + struct pfm_context *ctx); | |
19415 | + | |
19416 | +int pfm_arch_is_monitoring_active(struct pfm_context *ctx); | |
19417 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); | |
19418 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); | |
19419 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
19420 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
19421 | +char *pfm_arch_get_pmu_module_name(void); | |
19422 | + | |
19423 | +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, | |
19424 | + struct pfm_event_set *set) | |
19425 | +{ | |
19426 | + pfm_arch_stop(current, ctx); | |
19427 | + /* | |
19428 | + * we mark monitoring as stopped to avoid | |
19429 | + * certain side effects especially in | |
19430 | + * pfm_switch_sets_from_intr() on | |
19431 | + * pfm_arch_restore_pmcs() | |
19432 | + */ | |
19433 | + ctx->flags.started = 0; | |
19434 | +} | |
19435 | + | |
19436 | +/* | |
19437 | + * unfreeze PMU from pfm_do_interrupt_handler() | |
19438 | + * ctx may be NULL for spurious | |
19439 | + */ | |
19440 | +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) | |
19441 | +{ | |
19442 | + if (!ctx) | |
19443 | + return; | |
19444 | + | |
19445 | + PFM_DBG_ovfl("state=%d", ctx->state); | |
19446 | + | |
19447 | + ctx->flags.started = 1; | |
19448 | + | |
19449 | + if (ctx->state == PFM_CTX_MASKED) | |
19450 | + return; | |
19451 | + | |
19452 | + pfm_arch_restore_pmcs(ctx, ctx->active_set); | |
19453 | +} | |
19454 | + | |
19455 | +/* | |
19456 | + * this function is called from the PMU interrupt handler ONLY. | |
19457 | + * On MIPS, the PMU is frozen via arch_stop, masking would be implemented | |
19458 | + * via arch-stop as well. Given that the PMU is already stopped when | |
19459 | + * entering the interrupt handler, we do not need to stop it again, so | |
19460 | + * this function is a nop. | |
19461 | + */ | |
19462 | +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, | |
19463 | + struct pfm_event_set *set) | |
19464 | +{} | |
19465 | + | |
19466 | +/* | |
19467 | + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply | |
19468 | + * need to start here. | |
19469 | + */ | |
19470 | +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, | |
19471 | + struct pfm_event_set *set) | |
19472 | +{ | |
19473 | + pfm_arch_start(current, ctx); | |
19474 | +} | |
19475 | + | |
19476 | +static inline int pfm_arch_context_create(struct pfm_context *ctx, | |
19477 | + u32 ctx_flags) | |
19478 | +{ | |
19479 | + return 0; | |
19480 | +} | |
19481 | + | |
19482 | +static inline void pfm_arch_context_free(struct pfm_context *ctx) | |
19483 | +{} | |
19484 | + | |
19485 | + | |
19486 | + | |
19487 | + | |
19488 | + | |
19489 | +/* | |
19490 | + * function called from pfm_setfl_sane(). Context is locked | |
19491 | + * and interrupts are masked. | |
19492 | + * The value of flags is the value of ctx_flags as passed by | |
19493 | + * user. | |
19494 | + * | |
19495 | + * function must check arch-specific set flags. | |
19496 | + * Return: | |
19497 | + * 1 when flags are valid | |
19498 | + * 0 on error | |
19499 | + */ | |
19500 | +static inline int | |
19501 | +pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) | |
19502 | +{ | |
19503 | + return 0; | |
19504 | +} | |
19505 | + | |
19506 | +static inline int pfm_arch_init(void) | |
19507 | +{ | |
19508 | + return 0; | |
19509 | +} | |
19510 | + | |
19511 | +static inline void pfm_arch_init_percpu(void) | |
19512 | +{} | |
19513 | + | |
19514 | +static inline int pfm_arch_load_context(struct pfm_context *ctx) | |
19515 | +{ | |
19516 | + return 0; | |
19517 | +} | |
19518 | + | |
19519 | +static inline void pfm_arch_unload_context(struct pfm_context *ctx) | |
19520 | +{} | |
19521 | + | |
19522 | +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) | |
19523 | +{ | |
19524 | + return 0; | |
19525 | +} | |
19526 | + | |
19527 | +static inline void pfm_arch_pmu_release(void) | |
19528 | +{} | |
19529 | + | |
19530 | +#ifdef CONFIG_PERFMON_FLUSH | |
19531 | +/* | |
19532 | + * due to cache aliasing problem on MIPS, it is necessary to flush | |
19533 | + * pages out of the cache when they are modified. | |
19534 | + */ | |
19535 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
19536 | +{ | |
19537 | + unsigned long start, end; | |
19538 | + | |
19539 | + start = (unsigned long)addr & PAGE_MASK; | |
19540 | + end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK; | |
19541 | + | |
19542 | + while (start < end) { | |
19543 | + flush_data_cache_page(start); | |
19544 | + start += PAGE_SIZE; | |
19545 | + } | |
19546 | +} | |
19547 | +#else | |
19548 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
19549 | +{} | |
19550 | +#endif | |
19551 | + | |
19552 | +static inline void pfm_arch_arm_handle_work(struct task_struct *task) | |
19553 | +{} | |
19554 | + | |
19555 | +static inline void pfm_arch_disarm_handle_work(struct task_struct *task) | |
19556 | +{} | |
19557 | + | |
19558 | +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) | |
19559 | +{ | |
19560 | + return 0; | |
19561 | +} | |
19562 | + | |
19563 | +static inline int pfm_arch_get_base_syscall(void) | |
19564 | +{ | |
19565 | + if (test_thread_flag(TIF_32BIT_ADDR)) { | |
19566 | + if (test_thread_flag(TIF_32BIT_REGS)) | |
19567 | + return __NR_O32_Linux+330; | |
19568 | + return __NR_N32_Linux+293; | |
19569 | + } | |
19570 | + return __NR_64_Linux+289; | |
19571 | +} | |
19572 | + | |
19573 | +struct pfm_arch_context { | |
19574 | + /* empty */ | |
19575 | +}; | |
19576 | + | |
19577 | +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) | |
19578 | +/* | |
19579 | + * MIPS may need extra alignment requirements for the sampling buffer | |
19580 | + */ | |
19581 | +#ifdef CONFIG_PERFMON_SMPL_ALIGN | |
19582 | +#define PFM_ARCH_SMPL_ALIGN_SIZE 0x4000 | |
19583 | +#else | |
19584 | +#define PFM_ARCH_SMPL_ALIGN_SIZE 0 | |
19585 | +#endif | |
19586 | + | |
19587 | +#endif /* CONFIG_PERFMON */ | |
19588 | + | |
19589 | +#endif /* __KERNEL__ */ | |
19590 | +#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */ | |
19591 | --- a/include/asm-mips/system.h | |
19592 | +++ b/include/asm-mips/system.h | |
19593 | @@ -67,6 +67,10 @@ do { \ | |
19594 | __mips_mt_fpaff_switch_to(prev); \ | |
19595 | if (cpu_has_dsp) \ | |
19596 | __save_dsp(prev); \ | |
19597 | + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ | |
19598 | + pfm_ctxsw_out(prev, next); \ | |
19599 | + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ | |
19600 | + pfm_ctxsw_in(prev, next); \ | |
19601 | (last) = resume(prev, next, task_thread_info(next)); \ | |
19602 | } while (0) | |
19603 | ||
19604 | --- a/include/asm-mips/thread_info.h | |
19605 | +++ b/include/asm-mips/thread_info.h | |
19606 | @@ -114,6 +114,7 @@ register struct thread_info *__current_t | |
19607 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
19608 | #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ | |
19609 | #define TIF_SECCOMP 4 /* secure computing */ | |
19610 | +#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ | |
19611 | #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ | |
19612 | #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ | |
19613 | #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ | |
19614 | @@ -124,6 +125,7 @@ register struct thread_info *__current_t | |
19615 | #define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */ | |
19616 | #define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */ | |
19617 | #define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */ | |
19618 | +#define TIF_PERFMON_CTXSW 25 /* perfmon needs ctxsw calls */ | |
19619 | #define TIF_SYSCALL_TRACE 31 /* syscall trace active */ | |
19620 | ||
19621 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
19622 | @@ -140,6 +142,8 @@ register struct thread_info *__current_t | |
19623 | #define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS) | |
19624 | #define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR) | |
19625 | #define _TIF_FPUBOUND (1<<TIF_FPUBOUND) | |
19626 | +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK) | |
19627 | +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW) | |
19628 | ||
19629 | /* work to do on interrupt/exception return */ | |
19630 | #define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP) | |
19631 | --- a/include/asm-mips/unistd.h | |
19632 | +++ b/include/asm-mips/unistd.h | |
19633 | @@ -350,11 +350,23 @@ | |
19634 | #define __NR_dup3 (__NR_Linux + 327) | |
19635 | #define __NR_pipe2 (__NR_Linux + 328) | |
19636 | #define __NR_inotify_init1 (__NR_Linux + 329) | |
19637 | +#define __NR_pfm_create_context (__NR_Linux + 330) | |
19638 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
19639 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
19640 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
19641 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
19642 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
19643 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
19644 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
19645 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
19646 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
19647 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
19648 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
19649 | ||
19650 | /* | |
19651 | * Offset of the last Linux o32 flavoured syscall | |
19652 | */ | |
19653 | -#define __NR_Linux_syscalls 329 | |
19654 | +#define __NR_Linux_syscalls 341 | |
19655 | ||
19656 | #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ | |
19657 | ||
19658 | @@ -656,16 +668,28 @@ | |
19659 | #define __NR_dup3 (__NR_Linux + 286) | |
19660 | #define __NR_pipe2 (__NR_Linux + 287) | |
19661 | #define __NR_inotify_init1 (__NR_Linux + 288) | |
19662 | +#define __NR_pfm_create_context (__NR_Linux + 289) | |
19663 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
19664 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
19665 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
19666 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
19667 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
19668 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
19669 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
19670 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
19671 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
19672 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
19673 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
19674 | ||
19675 | /* | |
19676 | * Offset of the last Linux 64-bit flavoured syscall | |
19677 | */ | |
19678 | -#define __NR_Linux_syscalls 288 | |
19679 | +#define __NR_Linux_syscalls 300 | |
19680 | ||
19681 | #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ | |
19682 | ||
19683 | #define __NR_64_Linux 5000 | |
19684 | -#define __NR_64_Linux_syscalls 288 | |
19685 | +#define __NR_64_Linux_syscalls 300 | |
19686 | ||
19687 | #if _MIPS_SIM == _MIPS_SIM_NABI32 | |
19688 | ||
19689 | @@ -966,16 +990,28 @@ | |
19690 | #define __NR_dup3 (__NR_Linux + 290) | |
19691 | #define __NR_pipe2 (__NR_Linux + 291) | |
19692 | #define __NR_inotify_init1 (__NR_Linux + 292) | |
19693 | +#define __NR_pfm_create_context (__NR_Linux + 293) | |
19694 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
19695 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
19696 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
19697 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
19698 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
19699 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
19700 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
19701 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
19702 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
19703 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
19704 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
19705 | ||
19706 | /* | |
19707 | * Offset of the last N32 flavoured syscall | |
19708 | */ | |
19709 | -#define __NR_Linux_syscalls 292 | |
19710 | +#define __NR_Linux_syscalls 304 | |
19711 | ||
19712 | #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ | |
19713 | ||
19714 | #define __NR_N32_Linux 6000 | |
19715 | -#define __NR_N32_Linux_syscalls 292 | |
19716 | +#define __NR_N32_Linux_syscalls 304 | |
19717 | ||
19718 | #ifdef __KERNEL__ | |
19719 | ||
19720 | --- a/include/asm-x86/Kbuild | |
19721 | +++ b/include/asm-x86/Kbuild | |
19722 | @@ -9,6 +9,7 @@ header-y += prctl.h | |
19723 | header-y += ptrace-abi.h | |
19724 | header-y += sigcontext32.h | |
19725 | header-y += ucontext.h | |
19726 | +header-y += perfmon.h | |
19727 | header-y += processor-flags.h | |
19728 | ||
19729 | unifdef-y += e820.h | |
19730 | --- a/include/asm-x86/ia32_unistd.h | |
19731 | +++ b/include/asm-x86/ia32_unistd.h | |
19732 | @@ -8,11 +8,12 @@ | |
19733 | * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK | |
19734 | */ | |
19735 | ||
19736 | -#define __NR_ia32_restart_syscall 0 | |
19737 | -#define __NR_ia32_exit 1 | |
19738 | -#define __NR_ia32_read 3 | |
19739 | -#define __NR_ia32_write 4 | |
19740 | -#define __NR_ia32_sigreturn 119 | |
19741 | -#define __NR_ia32_rt_sigreturn 173 | |
19742 | +#define __NR_ia32_restart_syscall 0 | |
19743 | +#define __NR_ia32_exit 1 | |
19744 | +#define __NR_ia32_read 3 | |
19745 | +#define __NR_ia32_write 4 | |
19746 | +#define __NR_ia32_sigreturn 119 | |
19747 | +#define __NR_ia32_rt_sigreturn 173 | |
19748 | +#define __NR_ia32_pfm_create_context 333 | |
19749 | ||
19750 | #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ | |
19751 | --- a/include/asm-x86/irq_vectors.h | |
19752 | +++ b/include/asm-x86/irq_vectors.h | |
19753 | @@ -99,6 +99,11 @@ | |
19754 | #define LOCAL_TIMER_VECTOR 0xef | |
19755 | ||
19756 | /* | |
19757 | + * Perfmon PMU interrupt vector | |
19758 | + */ | |
19759 | +#define LOCAL_PERFMON_VECTOR 0xee | |
19760 | + | |
19761 | +/* | |
19762 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | |
19763 | * start at 0x31(0x41) to spread out vectors evenly between priority | |
19764 | * levels. (0x80 is the syscall vector) | |
19765 | --- a/include/asm-x86/mach-default/entry_arch.h | |
19766 | +++ b/include/asm-x86/mach-default/entry_arch.h | |
19767 | @@ -32,4 +32,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURI | |
19768 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | |
19769 | #endif | |
19770 | ||
19771 | +#ifdef CONFIG_PERFMON | |
19772 | +BUILD_INTERRUPT(pmu_interrupt,LOCAL_PERFMON_VECTOR) | |
19773 | +#endif | |
19774 | + | |
19775 | #endif | |
19776 | --- /dev/null | |
19777 | +++ b/include/asm-x86/perfmon.h | |
19778 | @@ -0,0 +1,34 @@ | |
19779 | +/* | |
19780 | + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. | |
19781 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
19782 | + * | |
19783 | + * This file contains i386/x86_64 specific definitions for the perfmon | |
19784 | + * interface. | |
19785 | + * | |
19786 | + * This file MUST never be included directly. Use linux/perfmon.h. | |
19787 | + * | |
19788 | + * This program is free software; you can redistribute it and/or | |
19789 | + * modify it under the terms of version 2 of the GNU General Public | |
19790 | + * License as published by the Free Software Foundation. | |
19791 | + * | |
19792 | + * This program is distributed in the hope that it will be useful, | |
19793 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19794 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19795 | + * General Public License for more details. | |
19796 | + * | |
19797 | + * You should have received a copy of the GNU General Public License | |
19798 | + * along with this program; if not, write to the Free Software | |
19799 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19800 | + * 02111-1307 USA | |
19801 | + */ | |
19802 | +#ifndef _ASM_X86_PERFMON__H_ | |
19803 | +#define _ASM_X86_PERFMON__H_ | |
19804 | + | |
19805 | +/* | |
19806 | + * arch-specific user visible interface definitions | |
19807 | + */ | |
19808 | + | |
19809 | +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ | |
19810 | +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ | |
19811 | + | |
19812 | +#endif /* _ASM_X86_PERFMON_H_ */ | |
19813 | --- /dev/null | |
19814 | +++ b/include/asm-x86/perfmon_kern.h | |
19815 | @@ -0,0 +1,548 @@ | |
19816 | +/* | |
19817 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
19818 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
19819 | + * | |
19820 | + * Copyright (c) 2007 Advanced Micro Devices, Inc. | |
19821 | + * Contributed by Robert Richter <robert.richter@amd.com> | |
19822 | + * | |
19823 | + * This file contains X86 Processor Family specific definitions | |
19824 | + * for the perfmon interface. This covers P6, Pentium M, P4/Xeon | |
19825 | + * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64. | |
19826 | + * | |
19827 | + * This program is free software; you can redistribute it and/or | |
19828 | + * modify it under the terms of version 2 of the GNU General Public | |
19829 | + * License as published by the Free Software Foundation. | |
19830 | + * | |
19831 | + * This program is distributed in the hope that it will be useful, | |
19832 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19833 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19834 | + * General Public License for more details. | |
19835 | + * | |
19836 | + * You should have received a copy of the GNU General Public License | |
19837 | + * along with this program; if not, write to the Free Software | |
19838 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19839 | + * 02111-1307 USA | |
19840 | + */ | |
19841 | +#ifndef _ASM_X86_PERFMON_KERN_H_ | |
19842 | +#define _ASM_X86_PERFMON_KERN_H_ | |
19843 | + | |
19844 | +#ifdef CONFIG_PERFMON | |
19845 | +#include <linux/unistd.h> | |
19846 | +#ifdef CONFIG_4KSTACKS | |
19847 | +#define PFM_ARCH_PMD_STK_ARG 2 | |
19848 | +#define PFM_ARCH_PMC_STK_ARG 2 | |
19849 | +#else | |
19850 | +#define PFM_ARCH_PMD_STK_ARG 4 /* about 700 bytes of stack space */ | |
19851 | +#define PFM_ARCH_PMC_STK_ARG 4 /* about 200 bytes of stack space */ | |
19852 | +#endif | |
19853 | + | |
19854 | +struct pfm_arch_pmu_info { | |
19855 | + u32 flags; /* PMU feature flags */ | |
19856 | + /* | |
19857 | + * mandatory model-specific callbacks | |
19858 | + */ | |
19859 | + int (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set); | |
19860 | + int (*has_ovfls)(struct pfm_context *ctx); | |
19861 | + void (*quiesce)(void); | |
19862 | + | |
19863 | + /* | |
19864 | + * optional model-specific callbacks | |
19865 | + */ | |
19866 | + void (*acquire_pmu_percpu)(void); | |
19867 | + void (*release_pmu_percpu)(void); | |
19868 | + int (*create_context)(struct pfm_context *ctx, u32 ctx_flags); | |
19869 | + void (*free_context)(struct pfm_context *ctx); | |
19870 | + int (*load_context)(struct pfm_context *ctx); | |
19871 | + void (*unload_context)(struct pfm_context *ctx); | |
19872 | + void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value); | |
19873 | + void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value); | |
19874 | + u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum); | |
19875 | + u64 (*read_pmc)(struct pfm_context *ctx, unsigned int cnum); | |
19876 | + void (*nmi_copy_state)(struct pfm_context *ctx); | |
19877 | + void (*restore_pmcs)(struct pfm_context *ctx, | |
19878 | + struct pfm_event_set *set); | |
19879 | + void (*restore_pmds)(struct pfm_context *ctx, | |
19880 | + struct pfm_event_set *set); | |
19881 | +}; | |
19882 | + | |
19883 | +/* | |
19884 | + * PMU feature flags | |
19885 | + */ | |
19886 | +#define PFM_X86_FL_USE_NMI 0x01 /* user asking for NMI */ | |
19887 | +#define PFM_X86_FL_NO_SHARING 0x02 /* no sharing with other subsystems */ | |
19888 | +#define PFM_X86_FL_SHARING 0x04 /* PMU is being shared */ | |
19889 | + | |
19890 | +struct pfm_x86_ctx_flags { | |
19891 | + unsigned int insecure:1; /* rdpmc per-thread self-monitoring */ | |
19892 | + unsigned int use_pebs:1; /* PEBS used */ | |
19893 | + unsigned int use_ds:1; /* DS used */ | |
19894 | + unsigned int reserved:29; /* for future use */ | |
19895 | +}; | |
19896 | + | |
19897 | +struct pfm_arch_context { | |
19898 | + u64 saved_real_iip; /* instr pointer of last NMI intr */ | |
19899 | + struct pfm_x86_ctx_flags flags; /* flags */ | |
19900 | + void *ds_area; /* address of DS area (to go away) */ | |
19901 | + void *data; /* model-specific data */ | |
19902 | +}; | |
19903 | + | |
19904 | +/* | |
19905 | + * functions implemented as inline on x86 | |
19906 | + */ | |
19907 | + | |
19908 | +/** | |
19909 | + * pfm_arch_write_pmc - write a single PMC register | |
19910 | + * @ctx: context to work on | |
19911 | + * @cnum: PMC index | |
19912 | + * @value: PMC 64-bit value | |
19913 | + * | |
19914 | + * in certain situations, ctx may be NULL | |
19915 | + */ | |
19916 | +static inline void pfm_arch_write_pmc(struct pfm_context *ctx, | |
19917 | + unsigned int cnum, u64 value) | |
19918 | +{ | |
19919 | + struct pfm_arch_pmu_info *pmu_info; | |
19920 | + | |
19921 | + pmu_info = pfm_pmu_info(); | |
19922 | + | |
19923 | + /* | |
19924 | + * we only write to the actual register when monitoring is | |
19925 | + * active (pfm_start was issued) | |
19926 | + */ | |
19927 | + if (ctx && ctx->flags.started == 0) | |
19928 | + return; | |
19929 | + | |
19930 | + /* | |
19931 | + * model-specific override, if any | |
19932 | + */ | |
19933 | + if (pmu_info->write_pmc) { | |
19934 | + pmu_info->write_pmc(ctx, cnum, value); | |
19935 | + return; | |
19936 | + } | |
19937 | + | |
19938 | + PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)", | |
19939 | + pfm_pmu_conf->pmc_desc[cnum].hw_addr, | |
19940 | + (unsigned long long) value); | |
19941 | + | |
19942 | + wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value); | |
19943 | +} | |
19944 | + | |
19945 | +/** | |
19946 | + * pfm_arch_write_pmd - write a single PMD register | |
19947 | + * @ctx: context to work on | |
19948 | + * @cnum: PMD index | |
19949 | + * @value: PMD 64-bit value | |
19950 | + */ | |
19951 | +static inline void pfm_arch_write_pmd(struct pfm_context *ctx, | |
19952 | + unsigned int cnum, u64 value) | |
19953 | +{ | |
19954 | + struct pfm_arch_pmu_info *pmu_info; | |
19955 | + | |
19956 | + pmu_info = pfm_pmu_info(); | |
19957 | + | |
19958 | + /* | |
19959 | + * to make sure the counter overflows, we set the | |
19960 | + * upper bits. we also clear any other unimplemented | |
19961 | + * bits as this may cause crash on some processors. | |
19962 | + */ | |
19963 | + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64) | |
19964 | + value = (value | ~pfm_pmu_conf->ovfl_mask) | |
19965 | + & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk; | |
19966 | + | |
19967 | + PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)", | |
19968 | + pfm_pmu_conf->pmd_desc[cnum].hw_addr, | |
19969 | + (unsigned long long) value); | |
19970 | + | |
19971 | + /* | |
19972 | + * model-specific override, if any | |
19973 | + */ | |
19974 | + if (pmu_info->write_pmd) { | |
19975 | + pmu_info->write_pmd(ctx, cnum, value); | |
19976 | + return; | |
19977 | + } | |
19978 | + | |
19979 | + wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value); | |
19980 | +} | |
19981 | + | |
19982 | +/** | |
19983 | + * pfm_arch_read_pmd - read a single PMD register | |
19984 | + * @ctx: context to work on | |
19985 | + * @cnum: PMD index | |
19986 | + * | |
19987 | + * return value is register 64-bit value | |
19988 | + */ | |
19989 | +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
19990 | +{ | |
19991 | + struct pfm_arch_pmu_info *pmu_info; | |
19992 | + u64 tmp; | |
19993 | + | |
19994 | + pmu_info = pfm_pmu_info(); | |
19995 | + | |
19996 | + /* | |
19997 | + * model-specific override, if any | |
19998 | + */ | |
19999 | + if (pmu_info->read_pmd) | |
20000 | + tmp = pmu_info->read_pmd(ctx, cnum); | |
20001 | + else | |
20002 | + rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp); | |
20003 | + | |
20004 | + PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx", | |
20005 | + pfm_pmu_conf->pmd_desc[cnum].hw_addr, | |
20006 | + (unsigned long long) tmp); | |
20007 | + return tmp; | |
20008 | +} | |
20009 | + | |
20010 | +/** | |
20011 | + * pfm_arch_read_pmc - read a single PMC register | |
20012 | + * @ctx: context to work on | |
20013 | + * @cnum: PMC index | |
20014 | + * | |
20015 | + * return value is register 64-bit value | |
20016 | + */ | |
20017 | +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) | |
20018 | +{ | |
20019 | + struct pfm_arch_pmu_info *pmu_info; | |
20020 | + u64 tmp; | |
20021 | + | |
20022 | + pmu_info = pfm_pmu_info(); | |
20023 | + | |
20024 | + /* | |
20025 | + * model-specific override, if any | |
20026 | + */ | |
20027 | + if (pmu_info->read_pmc) | |
20028 | + tmp = pmu_info->read_pmc(ctx, cnum); | |
20029 | + else | |
20030 | + rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp); | |
20031 | + | |
20032 | + PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx", | |
20033 | + pfm_pmu_conf->pmc_desc[cnum].hw_addr, | |
20034 | + (unsigned long long) tmp); | |
20035 | + return tmp; | |
20036 | +} | |
20037 | + | |
20038 | +/** | |
20039 | + * pfm_arch_is_active - return non-zero is monitoring has been started | |
20040 | + * @ctx: context to check | |
20041 | + * | |
20042 | + * At certain points, perfmon needs to know if monitoring has been | |
20043 | + * explicitly started. | |
20044 | + * | |
20045 | + * On x86, there is not other way but to use pfm_start/pfm_stop | |
20046 | + * to activate monitoring, thus we can simply check flags.started | |
20047 | + */ | |
20048 | +static inline int pfm_arch_is_active(struct pfm_context *ctx) | |
20049 | +{ | |
20050 | + return ctx->flags.started; | |
20051 | +} | |
20052 | + | |
20053 | + | |
20054 | +/** | |
20055 | + * pfm_arch_unload_context - detach context from thread or CPU | |
20056 | + * @ctx: context to detach | |
20057 | + * | |
20058 | + * in system-wide ctx->task is NULL, otherwise it points to the | |
20059 | + * attached thread | |
20060 | + */ | |
20061 | +static inline void pfm_arch_unload_context(struct pfm_context *ctx) | |
20062 | +{ | |
20063 | + struct pfm_arch_pmu_info *pmu_info; | |
20064 | + struct pfm_arch_context *ctx_arch; | |
20065 | + | |
20066 | + ctx_arch = pfm_ctx_arch(ctx); | |
20067 | + pmu_info = pfm_pmu_info(); | |
20068 | + | |
20069 | + if (ctx_arch->flags.insecure) { | |
20070 | + PFM_DBG("clear cr4.pce"); | |
20071 | + clear_in_cr4(X86_CR4_PCE); | |
20072 | + } | |
20073 | + | |
20074 | + if (pmu_info->unload_context) | |
20075 | + pmu_info->unload_context(ctx); | |
20076 | +} | |
20077 | + | |
20078 | +/** | |
20079 | + * pfm_arch_load_context - attach context to thread or CPU | |
20080 | + * @ctx: context to attach | |
20081 | + */ | |
20082 | +static inline int pfm_arch_load_context(struct pfm_context *ctx) | |
20083 | +{ | |
20084 | + struct pfm_arch_pmu_info *pmu_info; | |
20085 | + struct pfm_arch_context *ctx_arch; | |
20086 | + int ret = 0; | |
20087 | + | |
20088 | + ctx_arch = pfm_ctx_arch(ctx); | |
20089 | + pmu_info = pfm_pmu_info(); | |
20090 | + | |
20091 | + /* | |
20092 | + * RDPMC authorized in system-wide and | |
20093 | + * per-thread self-monitoring. | |
20094 | + * | |
20095 | + * RDPMC only gives access to counts. | |
20096 | + * | |
20097 | + * The context-switch routine code does not restore | |
20098 | + * all the PMD registers (optimization), thus there | |
20099 | + * is a possible leak of counts there in per-thread | |
20100 | + * mode. | |
20101 | + */ | |
20102 | + if (ctx->task == current || ctx->flags.system) { | |
20103 | + PFM_DBG("set cr4.pce"); | |
20104 | + set_in_cr4(X86_CR4_PCE); | |
20105 | + ctx_arch->flags.insecure = 1; | |
20106 | + } | |
20107 | + | |
20108 | + if (pmu_info->load_context) | |
20109 | + ret = pmu_info->load_context(ctx); | |
20110 | + | |
20111 | + return ret; | |
20112 | +} | |
20113 | + | |
20114 | +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); | |
20115 | +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); | |
20116 | +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); | |
20117 | + | |
20118 | +/** | |
20119 | + * pfm_arch_unmask_monitoring - unmask monitoring | |
20120 | + * @ctx: context to mask | |
20121 | + * @set: current event set | |
20122 | + * | |
20123 | + * masking is slightly different from stopping in that, it does not undo | |
20124 | + * the pfm_start() issued by user. This is used in conjunction with | |
20125 | + * sampling. Masking means stop monitoring, but do not authorize user | |
20126 | + * to issue pfm_start/stop during that time. Unmasking is achieved via | |
20127 | + * pfm_restart() and also may also depend on the sampling format used. | |
20128 | + * | |
20129 | + * on x86 masking/unmasking use the start/stop mechanism, except | |
20130 | + * that flags.started is not modified. | |
20131 | + */ | |
20132 | +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, | |
20133 | + struct pfm_event_set *set) | |
20134 | +{ | |
20135 | + pfm_arch_start(current, ctx); | |
20136 | +} | |
20137 | + | |
20138 | +/** | |
20139 | + * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt | |
20140 | + * @ctx: current context | |
20141 | + * @set: current event set | |
20142 | + * | |
20143 | + * called from __pfm_interrupt_handler(). | |
20144 | + * ctx is not NULL. ctx is locked. interrupts are masked | |
20145 | + * | |
20146 | + * The following actions must take place: | |
20147 | + * - stop all monitoring to ensure handler has consistent view. | |
20148 | + * - collect overflowed PMDs bitmask into povfls_pmds and | |
20149 | + * npend_ovfls. If no interrupt detected then npend_ovfls | |
20150 | + * must be set to zero. | |
20151 | + */ | |
20152 | +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, | |
20153 | + struct pfm_event_set *set) | |
20154 | +{ | |
20155 | + /* | |
20156 | + * on X86, freezing is equivalent to stopping | |
20157 | + */ | |
20158 | + pfm_arch_stop(current, ctx); | |
20159 | + | |
20160 | + /* | |
20161 | + * we mark monitoring as stopped to avoid | |
20162 | + * certain side effects especially in | |
20163 | + * pfm_switch_sets_from_intr() and | |
20164 | + * pfm_arch_restore_pmcs() | |
20165 | + */ | |
20166 | + ctx->flags.started = 0; | |
20167 | +} | |
20168 | + | |
20169 | +/** | |
20170 | + * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring | |
20171 | + * @ctx: current context | |
20172 | + * | |
20173 | + * current context may be not when dealing when spurious interrupts | |
20174 | + * | |
20175 | + * Must re-activate monitoring if context is not MASKED. | |
20176 | + * interrupts are masked. | |
20177 | + */ | |
20178 | +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) | |
20179 | +{ | |
20180 | + if (ctx == NULL) | |
20181 | + return; | |
20182 | + | |
20183 | + PFM_DBG_ovfl("state=%d", ctx->state); | |
20184 | + | |
20185 | + /* | |
20186 | + * restore flags.started which is cleared in | |
20187 | + * pfm_arch_intr_freeze_pmu() | |
20188 | + */ | |
20189 | + ctx->flags.started = 1; | |
20190 | + | |
20191 | + if (ctx->state == PFM_CTX_MASKED) | |
20192 | + return; | |
20193 | + | |
20194 | + pfm_arch_restore_pmcs(ctx, ctx->active_set); | |
20195 | +} | |
20196 | + | |
20197 | +/** | |
20198 | + * pfm_arch_setfl_sane - check arch/model specific event set flags | |
20199 | + * @ctx: context to work on | |
20200 | + * @flags: event set flags as passed by user | |
20201 | + * | |
20202 | + * called from pfm_setfl_sane(). Context is locked. Interrupts are masked. | |
20203 | + * | |
20204 | + * Return: | |
20205 | + * 0 when flags are valid | |
20206 | + * 1 on error | |
20207 | + */ | |
20208 | +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) | |
20209 | +{ | |
20210 | + return 0; | |
20211 | +} | |
20212 | + | |
20213 | +/** | |
20214 | + * pfm_arch_ovfl_reset_pmd - reset pmd on overflow | |
20215 | + * @ctx: current context | |
20216 | + * @cnum: PMD index | |
20217 | + * | |
20218 | + * On some CPUs, the upper bits of a counter must be set in order for the | |
20219 | + * overflow interrupt to happen. On overflow, the counter has wrapped around, | |
20220 | + * and the upper bits are cleared. This function may be used to set them back. | |
20221 | + * | |
20222 | + * For x86, the current version loses whatever is remaining in the counter, | |
20223 | + * which is usually has a small count. In order not to loose this count, | |
20224 | + * we do a read-modify-write to set the upper bits while preserving the | |
20225 | + * low-order bits. This is slow but works. | |
20226 | + */ | |
20227 | +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum) | |
20228 | +{ | |
20229 | + u64 val; | |
20230 | + val = pfm_arch_read_pmd(ctx, cnum); | |
20231 | + pfm_arch_write_pmd(ctx, cnum, val); | |
20232 | +} | |
20233 | + | |
20234 | +/** | |
20235 | + * pfm_arch_context_create - create context | |
20236 | + * @ctx: newly created context | |
20237 | + * @flags: context flags as passed by user | |
20238 | + * | |
20239 | + * called from __pfm_create_context() | |
20240 | + */ | |
20241 | +static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags) | |
20242 | +{ | |
20243 | + struct pfm_arch_pmu_info *pmu_info; | |
20244 | + | |
20245 | + pmu_info = pfm_pmu_info(); | |
20246 | + | |
20247 | + if (pmu_info->create_context) | |
20248 | + return pmu_info->create_context(ctx, ctx_flags); | |
20249 | + | |
20250 | + return 0; | |
20251 | +} | |
20252 | + | |
20253 | +/** | |
20254 | + * pfm_arch_context_free - free context | |
20255 | + * @ctx: context to free | |
20256 | + */ | |
20257 | +static inline void pfm_arch_context_free(struct pfm_context *ctx) | |
20258 | +{ | |
20259 | + struct pfm_arch_pmu_info *pmu_info; | |
20260 | + | |
20261 | + pmu_info = pfm_pmu_info(); | |
20262 | + | |
20263 | + if (pmu_info->free_context) | |
20264 | + pmu_info->free_context(ctx); | |
20265 | +} | |
20266 | + | |
20267 | +/* | |
20268 | + * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they | |
20269 | + * will not cause cause interrupts when unused. | |
20270 | + * | |
20271 | + * This is a nop on x86 | |
20272 | + */ | |
20273 | +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, | |
20274 | + struct pfm_event_set *set) | |
20275 | +{} | |
20276 | + | |
20277 | +/* | |
20278 | + * functions implemented in arch/x86/perfmon/perfmon.c | |
20279 | + */ | |
20280 | +int pfm_arch_init(void); | |
20281 | +void pfm_arch_resend_irq(struct pfm_context *ctx); | |
20282 | + | |
20283 | +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); | |
20284 | +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx); | |
20285 | + | |
20286 | +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
20287 | +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg); | |
20288 | +void pfm_arch_pmu_config_remove(void); | |
20289 | +char *pfm_arch_get_pmu_module_name(void); | |
20290 | +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds); | |
20291 | +void pfm_arch_pmu_release(void); | |
20292 | + | |
20293 | +/* | |
20294 | + * pfm_arch_serialize - make PMU modifications visible to subsequent instructions | |
20295 | + * | |
20296 | + * This is a nop on x86 | |
20297 | + */ | |
20298 | +static inline void pfm_arch_serialize(void) | |
20299 | +{} | |
20300 | + | |
20301 | +/* | |
20302 | + * on x86, the PMDs are already saved by pfm_arch_freeze_pmu() | |
20303 | + * when entering the PMU interrupt handler, thus, we do not need | |
20304 | + * to save them again in pfm_switch_sets_from_intr() | |
20305 | + */ | |
20306 | +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, | |
20307 | + struct pfm_event_set *set) | |
20308 | +{} | |
20309 | + | |
20310 | + | |
20311 | +static inline void pfm_arch_ctxswout_sys(struct task_struct *task, | |
20312 | + struct pfm_context *ctx) | |
20313 | +{} | |
20314 | + | |
20315 | +static inline void pfm_arch_ctxswin_sys(struct task_struct *task, | |
20316 | + struct pfm_context *ctx) | |
20317 | +{} | |
20318 | + | |
20319 | +static inline void pfm_arch_init_percpu(void) | |
20320 | +{} | |
20321 | + | |
20322 | +static inline void pfm_cacheflush(void *addr, unsigned int len) | |
20323 | +{} | |
20324 | + | |
20325 | +/* | |
20326 | + * this function is called from the PMU interrupt handler ONLY. | |
20327 | + * On x86, the PMU is frozen via arch_stop, masking would be implemented | |
20328 | + * via arch-stop as well. Given that the PMU is already stopped when | |
20329 | + * entering the interrupt handler, we do not need to stop it again, so | |
20330 | + * this function is a nop. | |
20331 | + */ | |
20332 | +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, | |
20333 | + struct pfm_event_set *set) | |
20334 | +{} | |
20335 | + | |
20336 | + | |
20337 | +static inline void pfm_arch_arm_handle_work(struct task_struct *task) | |
20338 | +{} | |
20339 | + | |
20340 | +static inline void pfm_arch_disarm_handle_work(struct task_struct *task) | |
20341 | +{} | |
20342 | + | |
20343 | +static inline int pfm_arch_get_base_syscall(void) | |
20344 | +{ | |
20345 | +#ifdef __x86_64__ | |
20346 | + /* 32-bit syscall definition coming from ia32_unistd.h */ | |
20347 | + if (test_thread_flag(TIF_IA32)) | |
20348 | + return __NR_ia32_pfm_create_context; | |
20349 | +#endif | |
20350 | + return __NR_pfm_create_context; | |
20351 | +} | |
20352 | + | |
20353 | +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context)) | |
20354 | +/* | |
20355 | + * x86 does not need extra alignment requirements for the sampling buffer | |
20356 | + */ | |
20357 | +#define PFM_ARCH_SMPL_ALIGN_SIZE 0 | |
20358 | + | |
20359 | +asmlinkage void pmu_interrupt(void); | |
20360 | + | |
20361 | +#endif /* CONFIG_PEFMON */ | |
20362 | + | |
20363 | +#endif /* _ASM_X86_PERFMON_KERN_H_ */ | |
20364 | --- /dev/null | |
20365 | +++ b/include/asm-x86/perfmon_pebs_core_smpl.h | |
20366 | @@ -0,0 +1,164 @@ | |
20367 | +/* | |
20368 | + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. | |
20369 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
20370 | + * | |
20371 | + * This program is free software; you can redistribute it and/or | |
20372 | + * modify it under the terms of version 2 of the GNU General Public | |
20373 | + * License as published by the Free Software Foundation. | |
20374 | + * | |
20375 | + * This program is distributed in the hope that it will be useful, | |
20376 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20377 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20378 | + * General Public License for more details. | |
20379 | + * | |
20380 | + * You should have received a copy of the GNU General Public License | |
20381 | + * along with this program; if not, write to the Free Software | |
20382 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
20383 | + * 02111-1307 USA | |
20384 | + * | |
20385 | + * This file implements the sampling format to support Intel | |
20386 | + * Precise Event Based Sampling (PEBS) feature of Intel Core | |
20387 | + * processors, such as Intel Core 2. | |
20388 | + * | |
20389 | + * What is PEBS? | |
20390 | + * ------------ | |
20391 | + * This is a hardware feature to enhance sampling by providing | |
20392 | + * better precision as to where a sample is taken. This avoids the | |
20393 | + * typical skew in the instruction one can observe with any | |
20394 | + * interrupt-based sampling technique. | |
20395 | + * | |
20396 | + * PEBS also lowers sampling overhead significantly by having the | |
20397 | + * processor store samples instead of the OS. PMU interrupt are only | |
20398 | + * generated after multiple samples are written. | |
20399 | + * | |
20400 | + * Another benefit of PEBS is that samples can be captured inside | |
20401 | + * critical sections where interrupts are masked. | |
20402 | + * | |
20403 | + * How does it work? | |
20404 | + * PEBS effectively implements a Hw buffer. The Os must pass a region | |
20405 | + * of memory where samples are to be stored. The region can have any | |
20406 | + * size. The OS must also specify the sampling period to reload. The PMU | |
20407 | + * will interrupt when it reaches the end of the buffer or a specified | |
20408 | + * threshold location inside the memory region. | |
20409 | + * | |
20410 | + * The description of the buffer is stored in the Data Save Area (DS). | |
20411 | + * The samples are stored sequentially in the buffer. The format of the | |
20412 | + * buffer is fixed and specified in the PEBS documentation. The sample | |
20413 | + * format does not change between 32-bit and 64-bit modes unlike on the | |
20414 | + * Pentium 4 version of PEBS. | |
20415 | + * | |
20416 | + * PEBS does not work when HyperThreading is enabled due to certain MSR | |
20417 | + * being shared being to two threads. | |
20418 | + * | |
20419 | + * What does the format do? | |
20420 | + * It provides access to the PEBS feature for both 32-bit and 64-bit | |
20421 | + * processors that support it. | |
20422 | + * | |
20423 | + * The same code and data structures are used for both 32-bit and 64-bi | |
20424 | + * modes. A single format name is used for both modes. In 32-bit mode, | |
20425 | + * some of the extended registers are written to zero in each sample. | |
20426 | + * | |
20427 | + * It is important to realize that the format provides a zero-copy | |
20428 | + * environment for the samples, i.e,, the OS never touches the | |
20429 | + * samples. Whatever the processor write is directly accessible to | |
20430 | + * the user. | |
20431 | + * | |
20432 | + * Parameters to the buffer can be passed via pfm_create_context() in | |
20433 | + * the pfm_pebs_smpl_arg structure. | |
20434 | + */ | |
20435 | +#ifndef __PERFMON_PEBS_CORE_SMPL_H__ | |
20436 | +#define __PERFMON_PEBS_CORE_SMPL_H__ 1 | |
20437 | + | |
20438 | +/* | |
20439 | + * The 32-bit and 64-bit formats are identical, thus we use only | |
20440 | + * one name for the format. | |
20441 | + */ | |
20442 | +#define PFM_PEBS_CORE_SMPL_NAME "pebs_core" | |
20443 | + | |
20444 | +/* | |
20445 | + * format specific parameters (passed at context creation) | |
20446 | + * | |
20447 | + * intr_thres: index from start of buffer of entry where the | |
20448 | + * PMU interrupt must be triggered. It must be several samples | |
20449 | + * short of the end of the buffer. | |
20450 | + */ | |
20451 | +struct pfm_pebs_core_smpl_arg { | |
20452 | + u64 cnt_reset; /* counter reset value */ | |
20453 | + size_t buf_size; /* size of the PEBS buffer in bytes */ | |
20454 | + size_t intr_thres;/* index of PEBS interrupt threshold entry */ | |
20455 | + u64 reserved[6]; /* for future use */ | |
20456 | +}; | |
20457 | + | |
20458 | +/* | |
20459 | + * Data Save Area (32 and 64-bit mode) | |
20460 | + * | |
20461 | + * The DS area is exposed to the user. To determine the number | |
20462 | + * of samples available in PEBS, it is necessary to substract | |
20463 | + * pebs_index from pebs_base. | |
20464 | + * | |
20465 | + * Layout of the structure is mandated by hardware and specified | |
20466 | + * in the Intel documentation. | |
20467 | + */ | |
20468 | +struct pfm_ds_area_core { | |
20469 | + u64 bts_buf_base; | |
20470 | + u64 bts_index; | |
20471 | + u64 bts_abs_max; | |
20472 | + u64 bts_intr_thres; | |
20473 | + u64 pebs_buf_base; | |
20474 | + u64 pebs_index; | |
20475 | + u64 pebs_abs_max; | |
20476 | + u64 pebs_intr_thres; | |
20477 | + u64 pebs_cnt_reset; | |
20478 | +}; | |
20479 | + | |
20480 | +/* | |
20481 | + * This header is at the beginning of the sampling buffer returned to the user. | |
20482 | + * | |
20483 | + * Because of PEBS alignement constraints, the actual PEBS buffer area does | |
20484 | + * not necessarily begin right after the header. The hdr_start_offs must be | |
20485 | + * used to compute the first byte of the buffer. The offset is defined as | |
20486 | + * the number of bytes between the end of the header and the beginning of | |
20487 | + * the buffer. As such the formula is: | |
20488 | + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs | |
20489 | + */ | |
20490 | +struct pfm_pebs_core_smpl_hdr { | |
20491 | + u64 overflows; /* #overflows for buffer */ | |
20492 | + size_t buf_size; /* bytes in the buffer */ | |
20493 | + size_t start_offs; /* actual buffer start offset */ | |
20494 | + u32 version; /* smpl format version */ | |
20495 | + u32 reserved1; /* for future use */ | |
20496 | + u64 reserved2[5]; /* for future use */ | |
20497 | + struct pfm_ds_area_core ds; /* data save area */ | |
20498 | +}; | |
20499 | + | |
20500 | +/* | |
20501 | + * Sample format as mandated by Intel documentation. | |
20502 | + * The same format is used in both 32 and 64 bit modes. | |
20503 | + */ | |
20504 | +struct pfm_pebs_core_smpl_entry { | |
20505 | + u64 eflags; | |
20506 | + u64 ip; | |
20507 | + u64 eax; | |
20508 | + u64 ebx; | |
20509 | + u64 ecx; | |
20510 | + u64 edx; | |
20511 | + u64 esi; | |
20512 | + u64 edi; | |
20513 | + u64 ebp; | |
20514 | + u64 esp; | |
20515 | + u64 r8; /* 0 in 32-bit mode */ | |
20516 | + u64 r9; /* 0 in 32-bit mode */ | |
20517 | + u64 r10; /* 0 in 32-bit mode */ | |
20518 | + u64 r11; /* 0 in 32-bit mode */ | |
20519 | + u64 r12; /* 0 in 32-bit mode */ | |
20520 | + u64 r13; /* 0 in 32-bit mode */ | |
20521 | + u64 r14; /* 0 in 32-bit mode */ | |
20522 | + u64 r15; /* 0 in 32-bit mode */ | |
20523 | +}; | |
20524 | + | |
20525 | +#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U | |
20526 | +#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U | |
20527 | +#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\ | |
20528 | + (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff)) | |
20529 | + | |
20530 | +#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */ | |
20531 | --- /dev/null | |
20532 | +++ b/include/asm-x86/perfmon_pebs_p4_smpl.h | |
20533 | @@ -0,0 +1,193 @@ | |
20534 | +/* | |
20535 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
20536 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
20537 | + * | |
20538 | + * This program is free software; you can redistribute it and/or | |
20539 | + * modify it under the terms of version 2 of the GNU General Public | |
20540 | + * License as published by the Free Software Foundation. | |
20541 | + * | |
20542 | + * This program is distributed in the hope that it will be useful, | |
20543 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20544 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20545 | + * General Public License for more details. | |
20546 | + * | |
20547 | + * You should have received a copy of the GNU General Public License | |
20548 | + * along with this program; if not, write to the Free Software | |
20549 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
20550 | + * 02111-1307 USA | |
20551 | + * | |
20552 | + * This file implements the sampling format to support Intel | |
20553 | + * Precise Event Based Sampling (PEBS) feature of Pentium 4 | |
20554 | + * and other Netburst-based processors. Not to be used for | |
20555 | + * Intel Core-based processors. | |
20556 | + * | |
20557 | + * What is PEBS? | |
20558 | + * ------------ | |
20559 | + * This is a hardware feature to enhance sampling by providing | |
20560 | + * better precision as to where a sample is taken. This avoids the | |
20561 | + * typical skew in the instruction one can observe with any | |
20562 | + * interrupt-based sampling technique. | |
20563 | + * | |
20564 | + * PEBS also lowers sampling overhead significantly by having the | |
20565 | + * processor store samples instead of the OS. PMU interrupt are only | |
20566 | + * generated after multiple samples are written. | |
20567 | + * | |
20568 | + * Another benefit of PEBS is that samples can be captured inside | |
20569 | + * critical sections where interrupts are masked. | |
20570 | + * | |
20571 | + * How does it work? | |
20572 | + * PEBS effectively implements a Hw buffer. The Os must pass a region | |
20573 | + * of memory where samples are to be stored. The region can have any | |
20574 | + * size. The OS must also specify the sampling period to reload. The PMU | |
20575 | + * will interrupt when it reaches the end of the buffer or a specified | |
20576 | + * threshold location inside the memory region. | |
20577 | + * | |
20578 | + * The description of the buffer is stored in the Data Save Area (DS). | |
20579 | + * The samples are stored sequentially in the buffer. The format of the | |
20580 | + * buffer is fixed and specified in the PEBS documentation. The sample | |
20581 | + * format changes between 32-bit and 64-bit modes due to extended register | |
20582 | + * file. | |
20583 | + * | |
20584 | + * PEBS does not work when HyperThreading is enabled due to certain MSR | |
20585 | + * being shared being to two threads. | |
20586 | + * | |
20587 | + * What does the format do? | |
20588 | + * It provides access to the PEBS feature for both 32-bit and 64-bit | |
20589 | + * processors that support it. | |
20590 | + * | |
20591 | + * The same code is used for both 32-bit and 64-bit modes, but different | |
20592 | + * format names are used because the two modes are not compatible due to | |
20593 | + * data model and register file differences. Similarly the public data | |
20594 | + * structures describing the samples are different. | |
20595 | + * | |
20596 | + * It is important to realize that the format provides a zero-copy environment | |
20597 | + * for the samples, i.e,, the OS never touches the samples. Whatever the | |
20598 | + * processor write is directly accessible to the user. | |
20599 | + * | |
20600 | + * Parameters to the buffer can be passed via pfm_create_context() in | |
20601 | + * the pfm_pebs_smpl_arg structure. | |
20602 | + * | |
20603 | + * It is not possible to mix a 32-bit PEBS application on top of a 64-bit | |
20604 | + * host kernel. | |
20605 | + */ | |
20606 | +#ifndef __PERFMON_PEBS_P4_SMPL_H__ | |
20607 | +#define __PERFMON_PEBS_P4_SMPL_H__ 1 | |
20608 | + | |
20609 | +#ifdef __i386__ | |
20610 | +/* | |
20611 | + * The 32-bit and 64-bit formats are not compatible, thus we have | |
20612 | + * two different identifications so that 32-bit programs running on | |
20613 | + * 64-bit OS will fail to use the 64-bit PEBS support. | |
20614 | + */ | |
20615 | +#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4" | |
20616 | +#else | |
20617 | +#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4" | |
20618 | +#endif | |
20619 | + | |
20620 | +/* | |
20621 | + * format specific parameters (passed at context creation) | |
20622 | + * | |
20623 | + * intr_thres: index from start of buffer of entry where the | |
20624 | + * PMU interrupt must be triggered. It must be several samples | |
20625 | + * short of the end of the buffer. | |
20626 | + */ | |
20627 | +struct pfm_pebs_p4_smpl_arg { | |
20628 | + u64 cnt_reset; /* counter reset value */ | |
20629 | + size_t buf_size; /* size of the PEBS buffer in bytes */ | |
20630 | + size_t intr_thres;/* index of PEBS interrupt threshold entry */ | |
20631 | + u64 reserved[6]; /* for future use */ | |
20632 | +}; | |
20633 | + | |
20634 | +/* | |
20635 | + * Data Save Area (32 and 64-bit mode) | |
20636 | + * | |
20637 | + * The DS area must be exposed to the user because this is the only | |
20638 | + * way to report on the number of valid entries recorded by the CPU. | |
20639 | + * This is required when the buffer is not full, i..e, there was not | |
20640 | + * PMU interrupt. | |
20641 | + * | |
20642 | + * Layout of the structure is mandated by hardware and specified in | |
20643 | + * the Intel documentation. | |
20644 | + */ | |
20645 | +struct pfm_ds_area_p4 { | |
20646 | + unsigned long bts_buf_base; | |
20647 | + unsigned long bts_index; | |
20648 | + unsigned long bts_abs_max; | |
20649 | + unsigned long bts_intr_thres; | |
20650 | + unsigned long pebs_buf_base; | |
20651 | + unsigned long pebs_index; | |
20652 | + unsigned long pebs_abs_max; | |
20653 | + unsigned long pebs_intr_thres; | |
20654 | + u64 pebs_cnt_reset; | |
20655 | +}; | |
20656 | + | |
20657 | +/* | |
20658 | + * This header is at the beginning of the sampling buffer returned to the user. | |
20659 | + * | |
20660 | + * Because of PEBS alignement constraints, the actual PEBS buffer area does | |
20661 | + * not necessarily begin right after the header. The hdr_start_offs must be | |
20662 | + * used to compute the first byte of the buffer. The offset is defined as | |
20663 | + * the number of bytes between the end of the header and the beginning of | |
20664 | + * the buffer. As such the formula is: | |
20665 | + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs | |
20666 | + */ | |
20667 | +struct pfm_pebs_p4_smpl_hdr { | |
20668 | + u64 overflows; /* #overflows for buffer */ | |
20669 | + size_t buf_size; /* bytes in the buffer */ | |
20670 | + size_t start_offs; /* actual buffer start offset */ | |
20671 | + u32 version; /* smpl format version */ | |
20672 | + u32 reserved1; /* for future use */ | |
20673 | + u64 reserved2[5]; /* for future use */ | |
20674 | + struct pfm_ds_area_p4 ds; /* data save area */ | |
20675 | +}; | |
20676 | + | |
20677 | +/* | |
20678 | + * 64-bit PEBS record format is described in | |
20679 | + * http://www.intel.com/technology/64bitextensions/30083502.pdf | |
20680 | + * | |
20681 | + * The format does not peek at samples. The sample structure is only | |
20682 | + * used to ensure that the buffer is large enough to accomodate one | |
20683 | + * sample. | |
20684 | + */ | |
20685 | +#ifdef __i386__ | |
20686 | +struct pfm_pebs_p4_smpl_entry { | |
20687 | + u32 eflags; | |
20688 | + u32 ip; | |
20689 | + u32 eax; | |
20690 | + u32 ebx; | |
20691 | + u32 ecx; | |
20692 | + u32 edx; | |
20693 | + u32 esi; | |
20694 | + u32 edi; | |
20695 | + u32 ebp; | |
20696 | + u32 esp; | |
20697 | +}; | |
20698 | +#else | |
20699 | +struct pfm_pebs_p4_smpl_entry { | |
20700 | + u64 eflags; | |
20701 | + u64 ip; | |
20702 | + u64 eax; | |
20703 | + u64 ebx; | |
20704 | + u64 ecx; | |
20705 | + u64 edx; | |
20706 | + u64 esi; | |
20707 | + u64 edi; | |
20708 | + u64 ebp; | |
20709 | + u64 esp; | |
20710 | + u64 r8; | |
20711 | + u64 r9; | |
20712 | + u64 r10; | |
20713 | + u64 r11; | |
20714 | + u64 r12; | |
20715 | + u64 r13; | |
20716 | + u64 r14; | |
20717 | + u64 r15; | |
20718 | +}; | |
20719 | +#endif | |
20720 | + | |
20721 | +#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U | |
20722 | +#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U | |
20723 | +#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\ | |
20724 | + (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff)) | |
20725 | + | |
20726 | +#endif /* __PERFMON_PEBS_P4_SMPL_H__ */ | |
20727 | --- a/include/asm-x86/thread_info.h | |
20728 | +++ b/include/asm-x86/thread_info.h | |
20729 | @@ -79,6 +79,7 @@ struct thread_info { | |
20730 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | |
20731 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
20732 | #define TIF_SECCOMP 8 /* secure computing */ | |
20733 | +#define TIF_PERFMON_WORK 9 /* work for pfm_handle_work() */ | |
20734 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | |
20735 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
20736 | #define TIF_IA32 17 /* 32bit process */ | |
20737 | @@ -92,6 +93,7 @@ struct thread_info { | |
20738 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | |
20739 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | |
20740 | #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ | |
20741 | +#define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */ | |
20742 | ||
20743 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
20744 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
20745 | @@ -114,6 +116,8 @@ struct thread_info { | |
20746 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | |
20747 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | |
20748 | #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) | |
20749 | +#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) | |
20750 | +#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW) | |
20751 | ||
20752 | /* work to do in syscall_trace_enter() */ | |
20753 | #define _TIF_WORK_SYSCALL_ENTRY \ | |
20754 | @@ -135,12 +139,12 @@ struct thread_info { | |
20755 | ||
20756 | /* Only used for 64 bit */ | |
20757 | #define _TIF_DO_NOTIFY_MASK \ | |
20758 | - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) | |
20759 | + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME|_TIF_PERFMON_WORK) | |
20760 | ||
20761 | /* flags to check in __switch_to() */ | |
20762 | #define _TIF_WORK_CTXSW \ | |
20763 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ | |
20764 | - _TIF_NOTSC) | |
20765 | + _TIF_NOTSC|_TIF_PERFMON_CTXSW) | |
20766 | ||
20767 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | |
20768 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | |
20769 | --- a/include/asm-x86/unistd_32.h | |
20770 | +++ b/include/asm-x86/unistd_32.h | |
20771 | @@ -338,9 +338,23 @@ | |
20772 | #define __NR_dup3 330 | |
20773 | #define __NR_pipe2 331 | |
20774 | #define __NR_inotify_init1 332 | |
20775 | +#define __NR_pfm_create_context 333 | |
20776 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
20777 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
20778 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
20779 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
20780 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
20781 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
20782 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
20783 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
20784 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
20785 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
20786 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
20787 | ||
20788 | #ifdef __KERNEL__ | |
20789 | ||
20790 | +#define NR_syscalls 345 | |
20791 | + | |
20792 | #define __ARCH_WANT_IPC_PARSE_VERSION | |
20793 | #define __ARCH_WANT_OLD_READDIR | |
20794 | #define __ARCH_WANT_OLD_STAT | |
20795 | --- a/include/asm-x86/unistd_64.h | |
20796 | +++ b/include/asm-x86/unistd_64.h | |
20797 | @@ -653,7 +653,30 @@ __SYSCALL(__NR_dup3, sys_dup3) | |
20798 | __SYSCALL(__NR_pipe2, sys_pipe2) | |
20799 | #define __NR_inotify_init1 294 | |
20800 | __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | |
20801 | - | |
20802 | +#define __NR_pfm_create_context 295 | |
20803 | +__SYSCALL(__NR_pfm_create_context, sys_pfm_create_context) | |
20804 | +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) | |
20805 | +__SYSCALL(__NR_pfm_write_pmcs, sys_pfm_write_pmcs) | |
20806 | +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) | |
20807 | +__SYSCALL(__NR_pfm_write_pmds, sys_pfm_write_pmds) | |
20808 | +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) | |
20809 | + __SYSCALL(__NR_pfm_read_pmds, sys_pfm_read_pmds) | |
20810 | +#define __NR_pfm_load_context (__NR_pfm_create_context+4) | |
20811 | +__SYSCALL(__NR_pfm_load_context, sys_pfm_load_context) | |
20812 | +#define __NR_pfm_start (__NR_pfm_create_context+5) | |
20813 | +__SYSCALL(__NR_pfm_start, sys_pfm_start) | |
20814 | +#define __NR_pfm_stop (__NR_pfm_create_context+6) | |
20815 | +__SYSCALL(__NR_pfm_stop, sys_pfm_stop) | |
20816 | +#define __NR_pfm_restart (__NR_pfm_create_context+7) | |
20817 | +__SYSCALL(__NR_pfm_restart, sys_pfm_restart) | |
20818 | +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) | |
20819 | +__SYSCALL(__NR_pfm_create_evtsets, sys_pfm_create_evtsets) | |
20820 | +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) | |
20821 | +__SYSCALL(__NR_pfm_getinfo_evtsets, sys_pfm_getinfo_evtsets) | |
20822 | +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) | |
20823 | +__SYSCALL(__NR_pfm_delete_evtsets, sys_pfm_delete_evtsets) | |
20824 | +#define __NR_pfm_unload_context (__NR_pfm_create_context+11) | |
20825 | +__SYSCALL(__NR_pfm_unload_context, sys_pfm_unload_context) | |
20826 | ||
20827 | #ifndef __NO_STUBS | |
20828 | #define __ARCH_WANT_OLD_READDIR | |
20829 | --- a/include/linux/Kbuild | |
20830 | +++ b/include/linux/Kbuild | |
20831 | @@ -163,6 +163,8 @@ header-y += video_decoder.h | |
20832 | header-y += video_encoder.h | |
20833 | header-y += videotext.h | |
20834 | header-y += x25.h | |
20835 | +header-y += perfmon.h | |
20836 | +header-y += perfmon_dfl_smpl.h | |
20837 | ||
20838 | unifdef-y += acct.h | |
20839 | unifdef-y += adb.h | |
20840 | --- /dev/null | |
20841 | +++ b/include/linux/perfmon.h | |
20842 | @@ -0,0 +1,213 @@ | |
20843 | +/* | |
20844 | + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. | |
20845 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
20846 | + * | |
20847 | + * This program is free software; you can redistribute it and/or | |
20848 | + * modify it under the terms of version 2 of the GNU General Public | |
20849 | + * License as published by the Free Software Foundation. | |
20850 | + * | |
20851 | + * This program is distributed in the hope that it will be useful, | |
20852 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20853 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20854 | + * General Public License for more details. | |
20855 | + * | |
20856 | + * You should have received a copy of the GNU General Public License | |
20857 | + * along with this program; if not, write to the Free Software | |
20858 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
20859 | + * 02111-1307 USA | |
20860 | + */ | |
20861 | + | |
20862 | +#ifndef __LINUX_PERFMON_H__ | |
20863 | +#define __LINUX_PERFMON_H__ | |
20864 | + | |
20865 | +/* | |
20866 | + * This file contains all the user visible generic definitions for the | |
20867 | + * interface. Model-specific user-visible definitions are located in | |
20868 | + * the asm/perfmon.h file. | |
20869 | + */ | |
20870 | + | |
20871 | +/* | |
20872 | + * include arch-specific user interface definitions | |
20873 | + */ | |
20874 | +#include <asm/perfmon.h> | |
20875 | + | |
20876 | +/* | |
20877 | + * defined by each arch | |
20878 | + */ | |
20879 | +#define PFM_MAX_PMCS PFM_ARCH_MAX_PMCS | |
20880 | +#define PFM_MAX_PMDS PFM_ARCH_MAX_PMDS | |
20881 | + | |
20882 | +/* | |
20883 | + * number of elements for each type of bitvector | |
20884 | + * all bitvectors use u64 fixed size type on all architectures. | |
20885 | + */ | |
20886 | +#define PFM_BVSIZE(x) (((x)+(sizeof(__u64)<<3)-1) / (sizeof(__u64)<<3)) | |
20887 | +#define PFM_PMD_BV PFM_BVSIZE(PFM_MAX_PMDS) | |
20888 | +#define PFM_PMC_BV PFM_BVSIZE(PFM_MAX_PMCS) | |
20889 | + | |
20890 | +/* | |
20891 | + * register flags layout: | |
20892 | + * bit[00-15] : generic flags | |
20893 | + * bit[16-31] : arch-specific flags | |
20894 | + * | |
20895 | + * PFM_REGFL_NO_EMUL64: must be set on the PMC controlling the PMD | |
20896 | + */ | |
20897 | +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* PMD: send notification on event */ | |
20898 | +#define PFM_REGFL_RANDOM 0x2 /* PMD: randomize value after event */ | |
20899 | +#define PFM_REGFL_NO_EMUL64 0x4 /* PMC: no 64-bit emulation */ | |
20900 | + | |
20901 | +/* | |
20902 | + * event set flags layout: | |
20903 | + * bits[00-15] : generic flags | |
20904 | + * bits[16-31] : arch-specific flags (see asm/perfmon.h) | |
20905 | + */ | |
20906 | +#define PFM_SETFL_OVFL_SWITCH 0x01 /* enable switch on overflow */ | |
20907 | +#define PFM_SETFL_TIME_SWITCH 0x02 /* enable switch on timeout */ | |
20908 | + | |
20909 | +/* | |
20910 | + * argument to pfm_create_context() system call | |
20911 | + * structure shared with user level | |
20912 | + */ | |
20913 | +struct pfarg_ctx { | |
20914 | + __u32 ctx_flags; /* noblock/block/syswide */ | |
20915 | + __u32 ctx_reserved1; /* for future use */ | |
20916 | + __u64 ctx_reserved2[7]; /* for future use */ | |
20917 | +}; | |
20918 | + | |
20919 | +/* | |
20920 | + * context flags layout: | |
20921 | + * bits[00-15]: generic flags | |
20922 | + * bits[16-31]: arch-specific flags (see perfmon_const.h) | |
20923 | + */ | |
20924 | +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user notifications */ | |
20925 | +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ | |
20926 | +#define PFM_FL_OVFL_NO_MSG 0x80 /* no overflow msgs */ | |
20927 | + | |
20928 | +/* | |
20929 | + * argument to pfm_write_pmcs() system call. | |
20930 | + * structure shared with user level | |
20931 | + */ | |
20932 | +struct pfarg_pmc { | |
20933 | + __u16 reg_num; /* which register */ | |
20934 | + __u16 reg_set; /* event set for this register */ | |
20935 | + __u32 reg_flags; /* REGFL flags */ | |
20936 | + __u64 reg_value; /* pmc value */ | |
20937 | + __u64 reg_reserved2[4]; /* for future use */ | |
20938 | +}; | |
20939 | + | |
20940 | +/* | |
20941 | + * argument to pfm_write_pmds() and pfm_read_pmds() system calls. | |
20942 | + * structure shared with user level | |
20943 | + */ | |
20944 | +struct pfarg_pmd { | |
20945 | + __u16 reg_num; /* which register */ | |
20946 | + __u16 reg_set; /* event set for this register */ | |
20947 | + __u32 reg_flags; /* REGFL flags */ | |
20948 | + __u64 reg_value; /* initial pmc/pmd value */ | |
20949 | + __u64 reg_long_reset; /* value to reload after notification */ | |
20950 | + __u64 reg_short_reset; /* reset after counter overflow */ | |
20951 | + __u64 reg_last_reset_val; /* return: PMD last reset value */ | |
20952 | + __u64 reg_ovfl_switch_cnt; /* #overflows before switch */ | |
20953 | + __u64 reg_reset_pmds[PFM_PMD_BV]; /* reset on overflow */ | |
20954 | + __u64 reg_smpl_pmds[PFM_PMD_BV]; /* record in sample */ | |
20955 | + __u64 reg_smpl_eventid; /* opaque event identifier */ | |
20956 | + __u64 reg_random_mask; /* bitmask used to limit random value */ | |
20957 | + __u32 reg_random_seed; /* seed for randomization (OBSOLETE) */ | |
20958 | + __u32 reg_reserved2[7]; /* for future use */ | |
20959 | +}; | |
20960 | + | |
20961 | +/* | |
20962 | + * optional argument to pfm_start() system call. Pass NULL if not needed. | |
20963 | + * structure shared with user level | |
20964 | + */ | |
20965 | +struct pfarg_start { | |
20966 | + __u16 start_set; /* event set to start with */ | |
20967 | + __u16 start_reserved1; /* for future use */ | |
20968 | + __u32 start_reserved2; /* for future use */ | |
20969 | + __u64 reserved3[3]; /* for future use */ | |
20970 | +}; | |
20971 | + | |
20972 | +/* | |
20973 | + * argument to pfm_load_context() system call. | |
20974 | + * structure shared with user level | |
20975 | + */ | |
20976 | +struct pfarg_load { | |
20977 | + __u32 load_pid; /* thread or CPU to attach to */ | |
20978 | + __u16 load_set; /* set to load first */ | |
20979 | + __u16 load_reserved1; /* for future use */ | |
20980 | + __u64 load_reserved2[3]; /* for future use */ | |
20981 | +}; | |
20982 | + | |
20983 | +/* | |
20984 | + * argument to pfm_create_evtsets() and pfm_delete_evtsets() system calls. | |
20985 | + * structure shared with user level. | |
20986 | + */ | |
20987 | +struct pfarg_setdesc { | |
20988 | + __u16 set_id; /* which set */ | |
20989 | + __u16 set_reserved1; /* for future use */ | |
20990 | + __u32 set_flags; /* SETFL flags */ | |
20991 | + __u64 set_timeout; /* switch timeout in nsecs */ | |
20992 | + __u64 reserved[6]; /* for future use */ | |
20993 | +}; | |
20994 | + | |
20995 | +/* | |
20996 | + * argument to pfm_getinfo_evtsets() system call. | |
20997 | + * structure shared with user level | |
20998 | + */ | |
20999 | +struct pfarg_setinfo { | |
21000 | + __u16 set_id; /* which set */ | |
21001 | + __u16 set_reserved1; /* for future use */ | |
21002 | + __u32 set_flags; /* out: SETFL flags */ | |
21003 | + __u64 set_ovfl_pmds[PFM_PMD_BV]; /* out: last ovfl PMDs */ | |
21004 | + __u64 set_runs; /* out: #times the set was active */ | |
21005 | + __u64 set_timeout; /* out: eff/leftover timeout (nsecs) */ | |
21006 | + __u64 set_act_duration; /* out: time set was active in nsecs */ | |
21007 | + __u64 set_avail_pmcs[PFM_PMC_BV];/* out: available PMCs */ | |
21008 | + __u64 set_avail_pmds[PFM_PMD_BV];/* out: available PMDs */ | |
21009 | + __u64 set_reserved3[6]; /* for future use */ | |
21010 | +}; | |
21011 | + | |
21012 | +/* | |
21013 | + * default value for the user and group security parameters in | |
21014 | + * /proc/sys/kernel/perfmon/sys_group | |
21015 | + * /proc/sys/kernel/perfmon/task_group | |
21016 | + */ | |
21017 | +#define PFM_GROUP_PERM_ANY -1 /* any user/group */ | |
21018 | + | |
21019 | +/* | |
21020 | + * overflow notification message. | |
21021 | + * structure shared with user level | |
21022 | + */ | |
21023 | +struct pfarg_ovfl_msg { | |
21024 | + __u32 msg_type; /* message type: PFM_MSG_OVFL */ | |
21025 | + __u32 msg_ovfl_pid; /* process id */ | |
21026 | + __u16 msg_active_set; /* active set at overflow */ | |
21027 | + __u16 msg_ovfl_cpu; /* cpu of PMU interrupt */ | |
21028 | + __u32 msg_ovfl_tid; /* thread id */ | |
21029 | + __u64 msg_ovfl_ip; /* IP on PMU intr */ | |
21030 | + __u64 msg_ovfl_pmds[PFM_PMD_BV];/* overflowed PMDs */ | |
21031 | +}; | |
21032 | + | |
21033 | +#define PFM_MSG_OVFL 1 /* an overflow happened */ | |
21034 | +#define PFM_MSG_END 2 /* task to which context was attached ended */ | |
21035 | + | |
21036 | +/* | |
21037 | + * generic notification message (union). | |
21038 | + * union shared with user level | |
21039 | + */ | |
21040 | +union pfarg_msg { | |
21041 | + __u32 type; | |
21042 | + struct pfarg_ovfl_msg pfm_ovfl_msg; | |
21043 | +}; | |
21044 | + | |
21045 | +/* | |
21046 | + * perfmon version number | |
21047 | + */ | |
21048 | +#define PFM_VERSION_MAJ 2U | |
21049 | +#define PFM_VERSION_MIN 82U | |
21050 | +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|\ | |
21051 | + (PFM_VERSION_MIN & 0xffff)) | |
21052 | +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) | |
21053 | +#define PFM_VERSION_MINOR(x) ((x) & 0xffff) | |
21054 | + | |
21055 | +#endif /* __LINUX_PERFMON_H__ */ | |
21056 | --- /dev/null | |
21057 | +++ b/include/linux/perfmon_dfl_smpl.h | |
21058 | @@ -0,0 +1,78 @@ | |
21059 | +/* | |
21060 | + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
21061 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
21062 | + * | |
21063 | + * This file implements the new dfl sampling buffer format | |
21064 | + * for perfmon2 subsystem. | |
21065 | + * | |
21066 | + * This program is free software; you can redistribute it and/or | |
21067 | + * modify it under the terms of version 2 of the GNU General Public | |
21068 | + * License as published by the Free Software Foundation. | |
21069 | + * | |
21070 | + * This program is distributed in the hope that it will be useful, | |
21071 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21072 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21073 | + * General Public License for more details. | |
21074 | + * | |
21075 | + * You should have received a copy of the GNU General Public License | |
21076 | + * along with this program; if not, write to the Free Software | |
21077 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
21078 | + * 02111-1307 USA | |
21079 | + */ | |
21080 | +#ifndef __PERFMON_DFL_SMPL_H__ | |
21081 | +#define __PERFMON_DFL_SMPL_H__ 1 | |
21082 | + | |
21083 | +/* | |
21084 | + * format specific parameters (passed at context creation) | |
21085 | + */ | |
21086 | +struct pfm_dfl_smpl_arg { | |
21087 | + __u64 buf_size; /* size of the buffer in bytes */ | |
21088 | + __u32 buf_flags; /* buffer specific flags */ | |
21089 | + __u32 reserved1; /* for future use */ | |
21090 | + __u64 reserved[6]; /* for future use */ | |
21091 | +}; | |
21092 | + | |
21093 | +/* | |
21094 | + * This header is at the beginning of the sampling buffer returned to the user. | |
21095 | + * It is directly followed by the first record. | |
21096 | + */ | |
21097 | +struct pfm_dfl_smpl_hdr { | |
21098 | + __u64 hdr_count; /* how many valid entries */ | |
21099 | + __u64 hdr_cur_offs; /* current offset from top of buffer */ | |
21100 | + __u64 hdr_overflows; /* #overflows for buffer */ | |
21101 | + __u64 hdr_buf_size; /* bytes in the buffer */ | |
21102 | + __u64 hdr_min_buf_space;/* minimal buffer size (internal use) */ | |
21103 | + __u32 hdr_version; /* smpl format version */ | |
21104 | + __u32 hdr_buf_flags; /* copy of buf_flags */ | |
21105 | + __u64 hdr_reserved[10]; /* for future use */ | |
21106 | +}; | |
21107 | + | |
21108 | +/* | |
21109 | + * Entry header in the sampling buffer. The header is directly followed | |
21110 | + * with the values of the PMD registers of interest saved in increasing | |
21111 | + * index order: PMD4, PMD5, and so on. How many PMDs are present depends | |
21112 | + * on how the session was programmed. | |
21113 | + * | |
21114 | + * In the case where multiple counters overflow at the same time, multiple | |
21115 | + * entries are written consecutively. | |
21116 | + * | |
21117 | + * last_reset_value member indicates the initial value of the overflowed PMD. | |
21118 | + */ | |
21119 | +struct pfm_dfl_smpl_entry { | |
21120 | + __u32 pid; /* thread id (for NPTL, this is gettid()) */ | |
21121 | + __u16 ovfl_pmd; /* index of overflowed PMD for this sample */ | |
21122 | + __u16 reserved; /* for future use */ | |
21123 | + __u64 last_reset_val; /* initial value of overflowed PMD */ | |
21124 | + __u64 ip; /* where did the overflow intr happened */ | |
21125 | + __u64 tstamp; /* overflow timetamp */ | |
21126 | + __u16 cpu; /* cpu on which the overfow occurred */ | |
21127 | + __u16 set; /* event set active when overflow ocurred */ | |
21128 | + __u32 tgid; /* thread group id (getpid() for NPTL) */ | |
21129 | +}; | |
21130 | + | |
21131 | +#define PFM_DFL_SMPL_VERSION_MAJ 1U | |
21132 | +#define PFM_DFL_SMPL_VERSION_MIN 0U | |
21133 | +#define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|\ | |
21134 | + (PFM_DFL_SMPL_VERSION_MIN & 0xffff)) | |
21135 | + | |
21136 | +#endif /* __PERFMON_DFL_SMPL_H__ */ | |
21137 | --- /dev/null | |
21138 | +++ b/include/linux/perfmon_fmt.h | |
21139 | @@ -0,0 +1,74 @@ | |
21140 | +/* | |
21141 | + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. | |
21142 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
21143 | + * | |
21144 | + * Interface for custom sampling buffer format modules | |
21145 | + * | |
21146 | + * This program is free software; you can redistribute it and/or | |
21147 | + * modify it under the terms of version 2 of the GNU General Public | |
21148 | + * License as published by the Free Software Foundation. | |
21149 | + * | |
21150 | + * This program is distributed in the hope that it will be useful, | |
21151 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21152 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21153 | + * General Public License for more details. | |
21154 | + * | |
21155 | + * You should have received a copy of the GNU General Public License | |
21156 | + * along with this program; if not, write to the Free Software | |
21157 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
21158 | + * 02111-1307 USA | |
21159 | + */ | |
21160 | +#ifndef __PERFMON_FMT_H__ | |
21161 | +#define __PERFMON_FMT_H__ 1 | |
21162 | + | |
21163 | +#include <linux/kobject.h> | |
21164 | + | |
21165 | +typedef int (*fmt_validate_t)(u32 flags, u16 npmds, void *arg); | |
21166 | +typedef int (*fmt_getsize_t)(u32 flags, void *arg, size_t *size); | |
21167 | +typedef int (*fmt_init_t)(struct pfm_context *ctx, void *buf, u32 flags, | |
21168 | + u16 nmpds, void *arg); | |
21169 | +typedef int (*fmt_restart_t)(int is_active, u32 *ovfl_ctrl, void *buf); | |
21170 | +typedef int (*fmt_exit_t)(void *buf); | |
21171 | +typedef int (*fmt_handler_t)(struct pfm_context *ctx, | |
21172 | + unsigned long ip, u64 stamp, void *data); | |
21173 | + | |
21174 | +struct pfm_smpl_fmt { | |
21175 | + char *fmt_name; /* name of the format (required) */ | |
21176 | + size_t fmt_arg_size; /* size of fmt args for ctx create */ | |
21177 | + u32 fmt_flags; /* format specific flags */ | |
21178 | + u32 fmt_version; /* format version number */ | |
21179 | + | |
21180 | + fmt_validate_t fmt_validate; /* validate context flags */ | |
21181 | + fmt_getsize_t fmt_getsize; /* get size for sampling buffer */ | |
21182 | + fmt_init_t fmt_init; /* initialize buffer area */ | |
21183 | + fmt_handler_t fmt_handler; /* overflow handler (required) */ | |
21184 | + fmt_restart_t fmt_restart; /* restart after notification */ | |
21185 | + fmt_exit_t fmt_exit; /* context termination */ | |
21186 | + | |
21187 | + struct list_head fmt_list; /* internal use only */ | |
21188 | + | |
21189 | + struct kobject kobj; /* sysfs internal use only */ | |
21190 | + struct module *owner; /* pointer to module owner */ | |
21191 | + u32 fmt_qdepth; /* Max notify queue depth (required) */ | |
21192 | +}; | |
21193 | +#define to_smpl_fmt(n) container_of(n, struct pfm_smpl_fmt, kobj) | |
21194 | + | |
21195 | +#define PFM_FMTFL_IS_BUILTIN 0x1 /* fmt is compiled in */ | |
21196 | +/* | |
21197 | + * we need to know whether the format is builtin or compiled | |
21198 | + * as a module | |
21199 | + */ | |
21200 | +#ifdef MODULE | |
21201 | +#define PFM_FMT_BUILTIN_FLAG 0 /* not built as a module */ | |
21202 | +#else | |
21203 | +#define PFM_FMT_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */ | |
21204 | +#endif | |
21205 | + | |
21206 | +int pfm_fmt_register(struct pfm_smpl_fmt *fmt); | |
21207 | +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt); | |
21208 | +void pfm_sysfs_builtin_fmt_add(void); | |
21209 | + | |
21210 | +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt); | |
21211 | +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt); | |
21212 | + | |
21213 | +#endif /* __PERFMON_FMT_H__ */ | |
21214 | --- /dev/null | |
21215 | +++ b/include/linux/perfmon_kern.h | |
21216 | @@ -0,0 +1,551 @@ | |
21217 | +/* | |
21218 | + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. | |
21219 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
21220 | + * | |
21221 | + * This program is free software; you can redistribute it and/or | |
21222 | + * modify it under the terms of version 2 of the GNU General Public | |
21223 | + * License as published by the Free Software Foundation. | |
21224 | + * | |
21225 | + * This program is distributed in the hope that it will be useful, | |
21226 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21227 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21228 | + * General Public License for more details. | |
21229 | + * | |
21230 | + * You should have received a copy of the GNU General Public License | |
21231 | + * along with this program; if not, write to the Free Software | |
21232 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
21233 | + * 02111-1307 USA | |
21234 | + */ | |
21235 | + | |
21236 | +#ifndef __LINUX_PERFMON_KERN_H__ | |
21237 | +#define __LINUX_PERFMON_KERN_H__ | |
21238 | +/* | |
21239 | + * This file contains all the definitions of data structures, variables, macros | |
21240 | + * that are to be shared between generic code and arch-specific code | |
21241 | + * | |
21242 | + * For generic only definitions, use perfmon/perfmon_priv.h | |
21243 | + */ | |
21244 | +#ifdef CONFIG_PERFMON | |
21245 | + | |
21246 | +#include <linux/file.h> | |
21247 | +#include <linux/sched.h> | |
21248 | +#include <linux/perfmon.h> | |
21249 | + | |
21250 | +/* | |
21251 | + * system adminstrator configuration controls available via | |
21252 | + * the /sys/kerne/perfmon interface | |
21253 | + */ | |
21254 | +struct pfm_controls { | |
21255 | + u32 debug; /* debugging control bitmask */ | |
21256 | + gid_t sys_group; /* gid to create a syswide context */ | |
21257 | + gid_t task_group; /* gid to create a per-task context */ | |
21258 | + u32 flags; /* control flags (see below) */ | |
21259 | + size_t arg_mem_max; /* maximum vector argument size */ | |
21260 | + size_t smpl_buffer_mem_max; /* max buf mem, -1 for infinity */ | |
21261 | +}; | |
21262 | +extern struct pfm_controls pfm_controls; | |
21263 | + | |
21264 | +/* | |
21265 | + * control flags | |
21266 | + */ | |
21267 | +#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */ | |
21268 | + | |
21269 | +/* | |
21270 | + * software PMD | |
21271 | + */ | |
21272 | +struct pfm_pmd { | |
21273 | + u64 value; /* 64-bit value */ | |
21274 | + u64 lval; /* last reset value */ | |
21275 | + u64 ovflsw_thres; /* #ovfls left before switch */ | |
21276 | + u64 long_reset; /* long reset value on overflow */ | |
21277 | + u64 short_reset; /* short reset value on overflow */ | |
21278 | + u64 reset_pmds[PFM_PMD_BV]; /* pmds to reset on overflow */ | |
21279 | + u64 smpl_pmds[PFM_PMD_BV]; /* pmds to record on overflow */ | |
21280 | + u64 mask; /* range mask for random value */ | |
21281 | + u64 ovflsw_ref_thres; /* #ovfls before next set */ | |
21282 | + u64 eventid; /* opaque event identifier */ | |
21283 | + u32 flags; /* notify/do not notify */ | |
21284 | +}; | |
21285 | + | |
21286 | +/* | |
21287 | + * event_set: encapsulates the full PMU state | |
21288 | + */ | |
21289 | +struct pfm_event_set { | |
21290 | + struct list_head list; /* ordered chain of sets */ | |
21291 | + u16 id; /* set identification */ | |
21292 | + u16 nused_pmds; /* max number of used PMDs */ | |
21293 | + u16 nused_pmcs; /* max number of used PMCs */ | |
21294 | + u16 pad1; /* paddding */ | |
21295 | + u32 flags; /* public flags */ | |
21296 | + u32 priv_flags; /* private flags (see below) */ | |
21297 | + u64 runs; /* # of activations */ | |
21298 | + u32 npend_ovfls; /* number of pending PMD overflow */ | |
21299 | + u32 pad2; /* padding */ | |
21300 | + u64 used_pmds[PFM_PMD_BV]; /* used PMDs */ | |
21301 | + u64 povfl_pmds[PFM_PMD_BV]; /* pending overflowed PMDs */ | |
21302 | + u64 ovfl_pmds[PFM_PMD_BV]; /* last overflowed PMDs */ | |
21303 | + u64 reset_pmds[PFM_PMD_BV]; /* PMDs to reset after overflow */ | |
21304 | + u64 ovfl_notify[PFM_PMD_BV]; /* notify on overflow */ | |
21305 | + u64 used_pmcs[PFM_PMC_BV]; /* used PMCs */ | |
21306 | + u64 pmcs[PFM_MAX_PMCS]; /* PMC values */ | |
21307 | + | |
21308 | + struct pfm_pmd pmds[PFM_MAX_PMDS]; | |
21309 | + | |
21310 | + ktime_t hrtimer_exp; /* switch timeout reference */ | |
21311 | + ktime_t hrtimer_rem; /* per-thread remainder timeout */ | |
21312 | + | |
21313 | + u64 duration_start; /* start time in ns */ | |
21314 | + u64 duration; /* total active ns */ | |
21315 | +}; | |
21316 | + | |
21317 | +/* | |
21318 | + * common private event set flags (priv_flags) | |
21319 | + * | |
21320 | + * upper 16 bits: for arch-specific use | |
21321 | + * lower 16 bits: for common use | |
21322 | + */ | |
21323 | +#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */ | |
21324 | +#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */ | |
21325 | +#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */ | |
21326 | +#define PFM_SETFL_PRIV_MOD_BOTH (PFM_SETFL_PRIV_MOD_PMDS \ | |
21327 | + | PFM_SETFL_PRIV_MOD_PMCS) | |
21328 | + | |
21329 | +/* | |
21330 | + * context flags | |
21331 | + */ | |
21332 | +struct pfm_context_flags { | |
21333 | + unsigned int block:1; /* task blocks on user notifications */ | |
21334 | + unsigned int system:1; /* do system wide monitoring */ | |
21335 | + unsigned int no_msg:1; /* no message sent on overflow */ | |
21336 | + unsigned int switch_ovfl:1; /* switch set on counter ovfl */ | |
21337 | + unsigned int switch_time:1; /* switch set on timeout */ | |
21338 | + unsigned int started:1; /* pfm_start() issued */ | |
21339 | + unsigned int work_type:2; /* type of work for pfm_handle_work */ | |
21340 | + unsigned int mmap_nlock:1; /* no lock in pfm_release_buf_space */ | |
21341 | + unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */ | |
21342 | + unsigned int can_restart:8; /* allowed to issue a PFM_RESTART */ | |
21343 | + unsigned int reset_count:8; /* number of pending resets */ | |
21344 | + unsigned int is_self:1; /* per-thread and self-montoring */ | |
21345 | + unsigned int reserved:5; /* for future use */ | |
21346 | +}; | |
21347 | + | |
21348 | +/* | |
21349 | + * values for work_type (TIF_PERFMON_WORK must be set) | |
21350 | + */ | |
21351 | +#define PFM_WORK_NONE 0 /* nothing to do */ | |
21352 | +#define PFM_WORK_RESET 1 /* reset overflowed counters */ | |
21353 | +#define PFM_WORK_BLOCK 2 /* block current thread */ | |
21354 | +#define PFM_WORK_ZOMBIE 3 /* cleanup zombie context */ | |
21355 | + | |
21356 | +/* | |
21357 | + * overflow description argument passed to sampling format | |
21358 | + */ | |
21359 | +struct pfm_ovfl_arg { | |
21360 | + u16 ovfl_pmd; /* index of overflowed PMD */ | |
21361 | + u16 active_set; /* set active at the time of the overflow */ | |
21362 | + u32 ovfl_ctrl; /* control flags */ | |
21363 | + u64 pmd_last_reset; /* last reset value of overflowed PMD */ | |
21364 | + u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */ | |
21365 | + u64 pmd_eventid; /* eventid associated with PMD */ | |
21366 | + u16 num_smpl_pmds; /* number of PMDS in smpl_pmd_values */ | |
21367 | +}; | |
21368 | +/* | |
21369 | + * depth of message queue | |
21370 | + * | |
21371 | + * Depth cannot be bigger than 255 (see reset_count) | |
21372 | + */ | |
21373 | +#define PFM_MSGS_ORDER 3 /* log2(number of messages) */ | |
21374 | +#define PFM_MSGS_COUNT (1<<PFM_MSGS_ORDER) /* number of messages */ | |
21375 | +#define PFM_MSGQ_MASK (PFM_MSGS_COUNT-1) | |
21376 | + | |
21377 | +/* | |
21378 | + * perfmon context state | |
21379 | + */ | |
21380 | +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ | |
21381 | +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ | |
21382 | +#define PFM_CTX_MASKED 3 /* context is loaded, monitoring is masked */ | |
21383 | +#define PFM_CTX_ZOMBIE 4 /* context lost owner but still attached */ | |
21384 | + | |
21385 | +/* | |
21386 | + * registers description | |
21387 | + */ | |
21388 | +struct pfm_regdesc { | |
21389 | + u64 pmcs[PFM_PMC_BV]; /* available PMC */ | |
21390 | + u64 pmds[PFM_PMD_BV]; /* available PMD */ | |
21391 | + u64 rw_pmds[PFM_PMD_BV]; /* available RW PMD */ | |
21392 | + u64 intr_pmds[PFM_PMD_BV]; /* PMD generating intr */ | |
21393 | + u64 cnt_pmds[PFM_PMD_BV]; /* PMD counters */ | |
21394 | + u16 max_pmc; /* highest+1 avail PMC */ | |
21395 | + u16 max_pmd; /* highest+1 avail PMD */ | |
21396 | + u16 max_rw_pmd; /* highest+1 avail RW PMD */ | |
21397 | + u16 first_intr_pmd; /* first intr PMD */ | |
21398 | + u16 max_intr_pmd; /* highest+1 intr PMD */ | |
21399 | + u16 num_rw_pmd; /* number of avail RW PMD */ | |
21400 | + u16 num_pmcs; /* number of logical PMCS */ | |
21401 | + u16 num_pmds; /* number of logical PMDS */ | |
21402 | + u16 num_counters; /* number of counting PMD */ | |
21403 | +}; | |
21404 | + | |
21405 | +/* | |
21406 | + * context: contains all the state of a session | |
21407 | + */ | |
21408 | +struct pfm_context { | |
21409 | + spinlock_t lock; /* context protection */ | |
21410 | + | |
21411 | + struct pfm_context_flags flags; | |
21412 | + u32 state; /* current state */ | |
21413 | + struct task_struct *task; /* attached task */ | |
21414 | + | |
21415 | + struct completion restart_complete;/* block on notification */ | |
21416 | + u64 last_act; /* last activation */ | |
21417 | + u32 last_cpu; /* last CPU used (SMP only) */ | |
21418 | + u32 cpu; /* cpu bound to context */ | |
21419 | + | |
21420 | + struct pfm_smpl_fmt *smpl_fmt; /* sampling format callbacks */ | |
21421 | + void *smpl_addr; /* user smpl buffer base */ | |
21422 | + size_t smpl_size; /* user smpl buffer size */ | |
21423 | + void *smpl_real_addr;/* actual smpl buffer base */ | |
21424 | + size_t smpl_real_size; /* actual smpl buffer size */ | |
21425 | + | |
21426 | + wait_queue_head_t msgq_wait; /* pfm_read() wait queue */ | |
21427 | + | |
21428 | + union pfarg_msg msgq[PFM_MSGS_COUNT]; | |
21429 | + int msgq_head; | |
21430 | + int msgq_tail; | |
21431 | + | |
21432 | + struct fasync_struct *async_queue; /* async notification */ | |
21433 | + | |
21434 | + struct pfm_event_set *active_set; /* active set */ | |
21435 | + struct list_head set_list; /* ordered list of sets */ | |
21436 | + | |
21437 | + struct pfm_regdesc regs; /* registers available to context */ | |
21438 | + | |
21439 | + /* | |
21440 | + * save stack space by allocating temporary variables for | |
21441 | + * pfm_overflow_handler() in pfm_context | |
21442 | + */ | |
21443 | + struct pfm_ovfl_arg ovfl_arg; | |
21444 | + u64 tmp_ovfl_notify[PFM_PMD_BV]; | |
21445 | +}; | |
21446 | + | |
21447 | +/* | |
21448 | + * ovfl_ctrl bitmask (used by interrupt handler) | |
21449 | + */ | |
21450 | +#define PFM_OVFL_CTRL_NOTIFY 0x1 /* notify user */ | |
21451 | +#define PFM_OVFL_CTRL_RESET 0x2 /* reset overflowed pmds */ | |
21452 | +#define PFM_OVFL_CTRL_MASK 0x4 /* mask monitoring */ | |
21453 | +#define PFM_OVFL_CTRL_SWITCH 0x8 /* switch sets */ | |
21454 | + | |
21455 | +/* | |
21456 | + * logging | |
21457 | + */ | |
21458 | +#define PFM_ERR(f, x...) printk(KERN_ERR "perfmon: " f "\n", ## x) | |
21459 | +#define PFM_WARN(f, x...) printk(KERN_WARNING "perfmon: " f "\n", ## x) | |
21460 | +#define PFM_LOG(f, x...) printk(KERN_NOTICE "perfmon: " f "\n", ## x) | |
21461 | +#define PFM_INFO(f, x...) printk(KERN_INFO "perfmon: " f "\n", ## x) | |
21462 | + | |
21463 | +/* | |
21464 | + * debugging | |
21465 | + * | |
21466 | + * Printk rate limiting is enforced to avoid getting flooded with too many | |
21467 | + * error messages on the console (which could render the machine unresponsive). | |
21468 | + * To get full debug output (turn off ratelimit): | |
21469 | + * $ echo 0 >/proc/sys/kernel/printk_ratelimit | |
21470 | + * | |
21471 | + * debug is a bitmask where bits are defined as follows: | |
21472 | + * bit 0: enable non-interrupt code degbug messages | |
21473 | + * bit 1: enable interrupt code debug messages | |
21474 | + */ | |
21475 | +#ifdef CONFIG_PERFMON_DEBUG | |
21476 | +#define _PFM_DBG(lm, f, x...) \ | |
21477 | + do { \ | |
21478 | + if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \ | |
21479 | + preempt_disable(); \ | |
21480 | + printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \ | |
21481 | + __func__, __LINE__, \ | |
21482 | + smp_processor_id(), current->pid , ## x); \ | |
21483 | + preempt_enable(); \ | |
21484 | + } \ | |
21485 | + } while (0) | |
21486 | + | |
21487 | +#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x) | |
21488 | +#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x) | |
21489 | +#else | |
21490 | +#define PFM_DBG(f, x...) do {} while (0) | |
21491 | +#define PFM_DBG_ovfl(f, x...) do {} while (0) | |
21492 | +#endif | |
21493 | + | |
21494 | +extern struct pfm_pmu_config *pfm_pmu_conf; | |
21495 | +extern int perfmon_disabled; | |
21496 | + | |
21497 | +static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c) | |
21498 | +{ | |
21499 | + return (struct pfm_arch_context *)(c+1); | |
21500 | +} | |
21501 | + | |
21502 | +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr, | |
21503 | + void **req, void **to_free); | |
21504 | + | |
21505 | +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize, | |
21506 | + void **arg, struct pfm_smpl_fmt **fmt); | |
21507 | + | |
21508 | +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, | |
21509 | + int count); | |
21510 | +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count, | |
21511 | + int compat); | |
21512 | +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count); | |
21513 | + | |
21514 | +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req, | |
21515 | + struct task_struct *task); | |
21516 | +int __pfm_unload_context(struct pfm_context *ctx, int *can_release); | |
21517 | + | |
21518 | +int __pfm_stop(struct pfm_context *ctx, int *release_info); | |
21519 | +int __pfm_restart(struct pfm_context *ctx, int *unblock); | |
21520 | +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start); | |
21521 | + | |
21522 | +void pfm_free_context(struct pfm_context *ctx); | |
21523 | + | |
21524 | +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size); | |
21525 | + | |
21526 | +int pfm_check_task_state(struct pfm_context *ctx, int check_mask, | |
21527 | + unsigned long *flags, void **resume); | |
21528 | +/* | |
21529 | + * check_mask bitmask values for pfm_check_task_state() | |
21530 | + */ | |
21531 | +#define PFM_CMD_STOPPED 0x01 /* command needs thread stopped */ | |
21532 | +#define PFM_CMD_UNLOADED 0x02 /* command needs ctx unloaded */ | |
21533 | +#define PFM_CMD_UNLOAD 0x04 /* command is unload */ | |
21534 | + | |
21535 | +int __pfm_create_context(struct pfarg_ctx *req, | |
21536 | + struct pfm_smpl_fmt *fmt, | |
21537 | + void *fmt_arg, | |
21538 | + int mode, | |
21539 | + struct pfm_context **new_ctx); | |
21540 | + | |
21541 | +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, | |
21542 | + int alloc); | |
21543 | + | |
21544 | +int pfm_pmu_conf_get(int autoload); | |
21545 | +void pfm_pmu_conf_put(void); | |
21546 | + | |
21547 | +int pfm_session_allcpus_acquire(void); | |
21548 | +void pfm_session_allcpus_release(void); | |
21549 | + | |
21550 | +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize); | |
21551 | +void pfm_smpl_buf_free(struct pfm_context *ctx); | |
21552 | + | |
21553 | +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name); | |
21554 | +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt); | |
21555 | + | |
21556 | +void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs); | |
21557 | + | |
21558 | +void pfm_resume_task(struct task_struct *t, void *data); | |
21559 | + | |
21560 | +#include <linux/perfmon_pmu.h> | |
21561 | +#include <linux/perfmon_fmt.h> | |
21562 | + | |
21563 | +extern const struct file_operations pfm_file_ops; | |
21564 | +/* | |
21565 | + * upper limit for count in calls that take vector arguments. This is used | |
21566 | + * to prevent for multiplication overflow when we compute actual storage size | |
21567 | + */ | |
21568 | +#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m))) | |
21569 | + | |
21570 | +#define cast_ulp(_x) ((unsigned long *)_x) | |
21571 | + | |
21572 | +#define PFM_NORMAL 0 | |
21573 | +#define PFM_COMPAT 1 | |
21574 | + | |
21575 | +void __pfm_exit_thread(void); | |
21576 | +void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next); | |
21577 | +void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next); | |
21578 | +void pfm_handle_work(struct pt_regs *regs); | |
21579 | +void __pfm_init_percpu(void *dummy); | |
21580 | +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set); | |
21581 | + | |
21582 | +static inline void pfm_exit_thread(void) | |
21583 | +{ | |
21584 | + if (current->pfm_context) | |
21585 | + __pfm_exit_thread(); | |
21586 | +} | |
21587 | + | |
21588 | +/* | |
21589 | + * include arch-specific kernel level definitions | |
21590 | + */ | |
21591 | +#include <asm/perfmon_kern.h> | |
21592 | + | |
21593 | +static inline void pfm_copy_thread(struct task_struct *task) | |
21594 | +{ | |
21595 | + /* | |
21596 | + * context or perfmon TIF state is NEVER inherited | |
21597 | + * in child task. Holds for per-thread and system-wide | |
21598 | + */ | |
21599 | + task->pfm_context = NULL; | |
21600 | + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW); | |
21601 | + clear_tsk_thread_flag(task, TIF_PERFMON_WORK); | |
21602 | + pfm_arch_disarm_handle_work(task); | |
21603 | +} | |
21604 | + | |
21605 | + | |
21606 | +/* | |
21607 | + * read a single PMD register. | |
21608 | + * | |
21609 | + * virtual PMD registers have special handler. | |
21610 | + * Depends on definitions in asm/perfmon_kern.h | |
21611 | + */ | |
21612 | +static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum) | |
21613 | +{ | |
21614 | + if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V)) | |
21615 | + return pfm_pmu_conf->pmd_sread(ctx, cnum); | |
21616 | + | |
21617 | + return pfm_arch_read_pmd(ctx, cnum); | |
21618 | +} | |
21619 | +/* | |
21620 | + * write a single PMD register. | |
21621 | + * | |
21622 | + * virtual PMD registers have special handler. | |
21623 | + * Depends on definitions in asm/perfmon_kern.h | |
21624 | + */ | |
21625 | +static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum, | |
21626 | + u64 value) | |
21627 | +{ | |
21628 | + /* | |
21629 | + * PMD writes are ignored for read-only registers | |
21630 | + */ | |
21631 | + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO) | |
21632 | + return; | |
21633 | + | |
21634 | + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) { | |
21635 | + pfm_pmu_conf->pmd_swrite(ctx, cnum, value); | |
21636 | + return; | |
21637 | + } | |
21638 | + /* | |
21639 | + * clear unimplemented bits | |
21640 | + */ | |
21641 | + value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk; | |
21642 | + | |
21643 | + pfm_arch_write_pmd(ctx, cnum, value); | |
21644 | +} | |
21645 | + | |
21646 | +void __pfm_init_percpu(void *dummy); | |
21647 | + | |
21648 | +static inline void pfm_init_percpu(void) | |
21649 | +{ | |
21650 | + __pfm_init_percpu(NULL); | |
21651 | +} | |
21652 | + | |
21653 | +/* | |
21654 | + * pfm statistics are available via debugfs | |
21655 | + * and perfmon subdir. | |
21656 | + * | |
21657 | + * When adding/removing new stats, make sure you also | |
21658 | + * update the name table in perfmon_debugfs.c | |
21659 | + */ | |
21660 | +enum pfm_stats_names { | |
21661 | + PFM_ST_ovfl_intr_all_count = 0, | |
21662 | + PFM_ST_ovfl_intr_ns, | |
21663 | + PFM_ST_ovfl_intr_spurious_count, | |
21664 | + PFM_ST_ovfl_intr_replay_count, | |
21665 | + PFM_ST_ovfl_intr_regular_count, | |
21666 | + PFM_ST_handle_work_count, | |
21667 | + PFM_ST_ovfl_notify_count, | |
21668 | + PFM_ST_reset_pmds_count, | |
21669 | + PFM_ST_pfm_restart_count, | |
21670 | + PFM_ST_fmt_handler_calls, | |
21671 | + PFM_ST_fmt_handler_ns, | |
21672 | + PFM_ST_set_switch_count, | |
21673 | + PFM_ST_set_switch_ns, | |
21674 | + PFM_ST_set_switch_exp, | |
21675 | + PFM_ST_ctxswin_count, | |
21676 | + PFM_ST_ctxswin_ns, | |
21677 | + PFM_ST_handle_timeout_count, | |
21678 | + PFM_ST_ovfl_intr_nmi_count, | |
21679 | + PFM_ST_ctxswout_count, | |
21680 | + PFM_ST_ctxswout_ns, | |
21681 | + PFM_ST_LAST /* last entry marked */ | |
21682 | +}; | |
21683 | +#define PFM_NUM_STATS PFM_ST_LAST | |
21684 | + | |
21685 | +struct pfm_stats { | |
21686 | + u64 v[PFM_NUM_STATS]; | |
21687 | + struct dentry *dirs[PFM_NUM_STATS]; | |
21688 | + struct dentry *cpu_dir; | |
21689 | + char cpu_name[8]; | |
21690 | +}; | |
21691 | + | |
21692 | +#ifdef CONFIG_PERFMON_DEBUG_FS | |
21693 | +#define pfm_stats_get(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x] | |
21694 | +#define pfm_stats_inc(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]++ | |
21695 | +#define pfm_stats_add(x, y) __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y) | |
21696 | +void pfm_reset_stats(int cpu); | |
21697 | +#else | |
21698 | +#define pfm_stats_get(x) | |
21699 | +#define pfm_stats_inc(x) | |
21700 | +#define pfm_stats_add(x, y) | |
21701 | +static inline void pfm_reset_stats(int cpu) | |
21702 | +{} | |
21703 | +#endif | |
21704 | + | |
21705 | + | |
21706 | + | |
21707 | +DECLARE_PER_CPU(struct pfm_context *, pmu_ctx); | |
21708 | +DECLARE_PER_CPU(struct pfm_stats, pfm_stats); | |
21709 | +DECLARE_PER_CPU(struct task_struct *, pmu_owner); | |
21710 | + | |
21711 | +void pfm_cpu_disable(void); | |
21712 | + | |
21713 | + | |
21714 | +/* | |
21715 | + * max vector argument elements for local storage (no kmalloc/kfree) | |
21716 | + * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h. | |
21717 | + * If not, default (conservative) values are used | |
21718 | + */ | |
21719 | +#ifndef PFM_ARCH_PMC_STK_ARG | |
21720 | +#define PFM_ARCH_PMC_STK_ARG 1 | |
21721 | +#endif | |
21722 | + | |
21723 | +#ifndef PFM_ARCH_PMD_STK_ARG | |
21724 | +#define PFM_ARCH_PMD_STK_ARG 1 | |
21725 | +#endif | |
21726 | + | |
21727 | +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG | |
21728 | +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG | |
21729 | + | |
21730 | +#else /* !CONFIG_PERFMON */ | |
21731 | + | |
21732 | + | |
21733 | +/* | |
21734 | + * perfmon hooks are nops when CONFIG_PERFMON is undefined | |
21735 | + */ | |
21736 | +static inline void pfm_cpu_disable(void) | |
21737 | +{} | |
21738 | + | |
21739 | +static inline void pfm_exit_thread(void) | |
21740 | +{} | |
21741 | + | |
21742 | +static inline void pfm_handle_work(struct pt_regs *regs) | |
21743 | +{} | |
21744 | + | |
21745 | +static inline void pfm_copy_thread(struct task_struct *t) | |
21746 | +{} | |
21747 | + | |
21748 | +static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n) | |
21749 | +{} | |
21750 | + | |
21751 | +static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n) | |
21752 | +{} | |
21753 | + | |
21754 | +static inline void pfm_session_allcpus_release(void) | |
21755 | +{} | |
21756 | + | |
21757 | +static inline int pfm_session_allcpus_acquire(void) | |
21758 | +{ | |
21759 | + return 0; | |
21760 | +} | |
21761 | + | |
21762 | +static inline void pfm_init_percpu(void) | |
21763 | +{} | |
21764 | + | |
21765 | +#endif /* CONFIG_PERFMON */ | |
21766 | + | |
21767 | +#endif /* __LINUX_PERFMON_KERN_H__ */ | |
21768 | --- /dev/null | |
21769 | +++ b/include/linux/perfmon_pmu.h | |
21770 | @@ -0,0 +1,192 @@ | |
21771 | +/* | |
21772 | + * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. | |
21773 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
21774 | + * | |
21775 | + * Interface for PMU description modules | |
21776 | + * | |
21777 | + * This program is free software; you can redistribute it and/or | |
21778 | + * modify it under the terms of version 2 of the GNU General Public | |
21779 | + * License as published by the Free Software Foundation. | |
21780 | + * | |
21781 | + * This program is distributed in the hope that it will be useful, | |
21782 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21783 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21784 | + * General Public License for more details. | |
21785 | + * | |
21786 | + * You should have received a copy of the GNU General Public License | |
21787 | + * along with this program; if not, write to the Free Software | |
21788 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
21789 | + * 02111-1307 USA | |
21790 | + */ | |
21791 | +#ifndef __PERFMON_PMU_H__ | |
21792 | +#define __PERFMON_PMU_H__ 1 | |
21793 | + | |
21794 | +/* | |
21795 | + * generic information about a PMC or PMD register | |
21796 | + * | |
21797 | + * Dependency bitmasks: | |
21798 | + * They are used to allow lazy save/restore in the context switch | |
21799 | + * code. To avoid picking up stale configuration from a previous | |
21800 | + * thread. Usng the bitmask, the generic read/write routines can | |
21801 | + * ensure that all registers needed to support the measurement are | |
21802 | + * restored properly on context switch in. | |
21803 | + */ | |
21804 | +struct pfm_regmap_desc { | |
21805 | + u16 type; /* role of the register */ | |
21806 | + u16 reserved1; /* for future use */ | |
21807 | + u32 reserved2; /* for future use */ | |
21808 | + u64 dfl_val; /* power-on default value (quiescent) */ | |
21809 | + u64 rsvd_msk; /* reserved bits: 1 means reserved */ | |
21810 | + u64 no_emul64_msk; /* bits to clear for PFM_REGFL_NO_EMUL64 */ | |
21811 | + unsigned long hw_addr; /* HW register address or index */ | |
21812 | + struct kobject kobj; /* for internal use only */ | |
21813 | + char *desc; /* HW register description string */ | |
21814 | + u64 dep_pmcs[PFM_PMC_BV];/* depending PMC registers */ | |
21815 | +}; | |
21816 | +#define to_reg(n) container_of(n, struct pfm_regmap_desc, kobj) | |
21817 | + | |
21818 | +/* | |
21819 | + * pfm_reg_desc helper macros | |
21820 | + */ | |
21821 | +#define PMC_D(t, d, v, r, n, h) \ | |
21822 | + { .type = t, \ | |
21823 | + .desc = d, \ | |
21824 | + .dfl_val = v, \ | |
21825 | + .rsvd_msk = r, \ | |
21826 | + .no_emul64_msk = n, \ | |
21827 | + .hw_addr = h \ | |
21828 | + } | |
21829 | + | |
21830 | +#define PMD_D(t, d, h) \ | |
21831 | + { .type = t, \ | |
21832 | + .desc = d, \ | |
21833 | + .rsvd_msk = 0, \ | |
21834 | + .no_emul64_msk = 0, \ | |
21835 | + .hw_addr = h \ | |
21836 | + } | |
21837 | + | |
21838 | +#define PMD_DR(t, d, h, r) \ | |
21839 | + { .type = t, \ | |
21840 | + .desc = d, \ | |
21841 | + .rsvd_msk = r, \ | |
21842 | + .no_emul64_msk = 0, \ | |
21843 | + .hw_addr = h \ | |
21844 | + } | |
21845 | + | |
21846 | +#define PMX_NA \ | |
21847 | + { .type = PFM_REG_NA } | |
21848 | + | |
21849 | +#define PMD_DP(t, d, h, p) \ | |
21850 | + { .type = t, \ | |
21851 | + .desc = d, \ | |
21852 | + .rsvd_msk = 0, \ | |
21853 | + .no_emul64_msk = 0, \ | |
21854 | + .dep_pmcs[0] = p, \ | |
21855 | + .hw_addr = h \ | |
21856 | + } | |
21857 | + | |
21858 | +/* | |
21859 | + * type of a PMU register (16-bit bitmask) for use with pfm_reg_desc.type | |
21860 | + */ | |
21861 | +#define PFM_REG_NA 0x00 /* not avail. (not impl.,no access) must be 0 */ | |
21862 | +#define PFM_REG_I 0x01 /* PMC/PMD: implemented */ | |
21863 | +#define PFM_REG_WC 0x02 /* PMC: has write_checker */ | |
21864 | +#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */ | |
21865 | +#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */ | |
21866 | +#define PFM_REG_V 0x10 /* PMD: virtual reg */ | |
21867 | +#define PFM_REG_INTR 0x20 /* PMD: register can generate interrupt */ | |
21868 | +#define PFM_REG_SYS 0x40 /* PMC/PMD: register is for system-wide only */ | |
21869 | +#define PFM_REG_THR 0x80 /* PMC/PMD: register is for per-thread only */ | |
21870 | +#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */ | |
21871 | + | |
21872 | +/* | |
21873 | + * define some shortcuts for common types | |
21874 | + */ | |
21875 | +#define PFM_REG_W (PFM_REG_WC|PFM_REG_I) | |
21876 | +#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I) | |
21877 | +#define PFM_REG_C (PFM_REG_C64|PFM_REG_INTR|PFM_REG_I) | |
21878 | +#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I) | |
21879 | +#define PFM_REG_IRO (PFM_REG_I|PFM_REG_RO) | |
21880 | + | |
21881 | +typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx, | |
21882 | + struct pfm_event_set *set, | |
21883 | + struct pfarg_pmc *req); | |
21884 | + | |
21885 | +typedef int (*pfm_pmd_check_t)(struct pfm_context *ctx, | |
21886 | + struct pfm_event_set *set, | |
21887 | + struct pfarg_pmd *req); | |
21888 | + | |
21889 | + | |
21890 | +typedef u64 (*pfm_sread_t)(struct pfm_context *ctx, unsigned int cnum); | |
21891 | +typedef void (*pfm_swrite_t)(struct pfm_context *ctx, unsigned int cnum, u64 val); | |
21892 | + | |
21893 | +/* | |
21894 | + * structure used by pmu description modules | |
21895 | + * | |
21896 | + * probe_pmu() routine return value: | |
21897 | + * - 1 means recognized PMU | |
21898 | + * - 0 means not recognized PMU | |
21899 | + */ | |
21900 | +struct pfm_pmu_config { | |
21901 | + char *pmu_name; /* PMU family name */ | |
21902 | + char *version; /* config module version */ | |
21903 | + | |
21904 | + int counter_width; /* width of hardware counter */ | |
21905 | + | |
21906 | + struct pfm_regmap_desc *pmc_desc; /* PMC register descriptions */ | |
21907 | + struct pfm_regmap_desc *pmd_desc; /* PMD register descriptions */ | |
21908 | + | |
21909 | + pfm_pmc_check_t pmc_write_check;/* write checker (optional) */ | |
21910 | + pfm_pmd_check_t pmd_write_check;/* write checker (optional) */ | |
21911 | + pfm_pmd_check_t pmd_read_check; /* read checker (optional) */ | |
21912 | + | |
21913 | + pfm_sread_t pmd_sread; /* virtual pmd read */ | |
21914 | + pfm_swrite_t pmd_swrite; /* virtual pmd write */ | |
21915 | + | |
21916 | + int (*probe_pmu)(void);/* probe PMU routine */ | |
21917 | + | |
21918 | + u16 num_pmc_entries;/* #entries in pmc_desc */ | |
21919 | + u16 num_pmd_entries;/* #entries in pmd_desc */ | |
21920 | + | |
21921 | + void *pmu_info; /* model-specific infos */ | |
21922 | + u32 flags; /* set of flags */ | |
21923 | + | |
21924 | + struct module *owner; /* pointer to module struct */ | |
21925 | + | |
21926 | + /* | |
21927 | + * fields computed internally, do not set in module | |
21928 | + */ | |
21929 | + struct pfm_regdesc regs_all; /* regs available to all */ | |
21930 | + struct pfm_regdesc regs_thr; /* regs avail per-thread */ | |
21931 | + struct pfm_regdesc regs_sys; /* regs avail system-wide */ | |
21932 | + | |
21933 | + u64 ovfl_mask; /* overflow mask */ | |
21934 | +}; | |
21935 | + | |
21936 | +static inline void *pfm_pmu_info(void) | |
21937 | +{ | |
21938 | + return pfm_pmu_conf->pmu_info; | |
21939 | +} | |
21940 | + | |
21941 | +/* | |
21942 | + * pfm_pmu_config flags | |
21943 | + */ | |
21944 | +#define PFM_PMUFL_IS_BUILTIN 0x1 /* pmu config is compiled in */ | |
21945 | + | |
21946 | +/* | |
21947 | + * we need to know whether the PMU description is builtin or compiled | |
21948 | + * as a module | |
21949 | + */ | |
21950 | +#ifdef MODULE | |
21951 | +#define PFM_PMU_BUILTIN_FLAG 0 /* not built as a module */ | |
21952 | +#else | |
21953 | +#define PFM_PMU_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */ | |
21954 | +#endif | |
21955 | + | |
21956 | +int pfm_pmu_register(struct pfm_pmu_config *cfg); | |
21957 | +void pfm_pmu_unregister(struct pfm_pmu_config *cfg); | |
21958 | + | |
21959 | +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu); | |
21960 | +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu); | |
21961 | + | |
21962 | +#endif /* __PERFMON_PMU_H__ */ | |
21963 | --- a/include/linux/sched.h | |
21964 | +++ b/include/linux/sched.h | |
21965 | @@ -96,6 +96,7 @@ struct exec_domain; | |
21966 | struct futex_pi_state; | |
21967 | struct robust_list_head; | |
21968 | struct bio; | |
21969 | +struct pfm_context; | |
21970 | ||
21971 | /* | |
21972 | * List of flags we want to share for kernel threads, | |
21973 | @@ -1309,6 +1310,9 @@ struct task_struct { | |
21974 | struct latency_record latency_record[LT_SAVECOUNT]; | |
21975 | #endif | |
21976 | u64 instrumentation; | |
21977 | +#ifdef CONFIG_PERFMON | |
21978 | + struct pfm_context *pfm_context; | |
21979 | +#endif | |
21980 | }; | |
21981 | ||
21982 | /* | |
21983 | --- a/include/linux/syscalls.h | |
21984 | +++ b/include/linux/syscalls.h | |
21985 | @@ -29,6 +29,13 @@ struct msqid_ds; | |
21986 | struct new_utsname; | |
21987 | struct nfsctl_arg; | |
21988 | struct __old_kernel_stat; | |
21989 | +struct pfarg_ctx; | |
21990 | +struct pfarg_pmc; | |
21991 | +struct pfarg_pmd; | |
21992 | +struct pfarg_start; | |
21993 | +struct pfarg_load; | |
21994 | +struct pfarg_setinfo; | |
21995 | +struct pfarg_setdesc; | |
21996 | struct pollfd; | |
21997 | struct rlimit; | |
21998 | struct rusage; | |
21999 | @@ -690,4 +697,27 @@ asmlinkage long sys_pipe(int __user *); | |
22000 | ||
22001 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | |
22002 | ||
22003 | +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq, | |
22004 | + void __user *uarg, size_t smpl_size); | |
22005 | +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, | |
22006 | + int count); | |
22007 | +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, | |
22008 | + int count); | |
22009 | +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, | |
22010 | + int count); | |
22011 | +asmlinkage long sys_pfm_restart(int fd); | |
22012 | +asmlinkage long sys_pfm_stop(int fd); | |
22013 | +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq); | |
22014 | +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq); | |
22015 | +asmlinkage long sys_pfm_unload_context(int fd); | |
22016 | +asmlinkage long sys_pfm_delete_evtsets(int fd, | |
22017 | + struct pfarg_setinfo __user *ureq, | |
22018 | + int count); | |
22019 | +asmlinkage long sys_pfm_create_evtsets(int fd, | |
22020 | + struct pfarg_setdesc __user *ureq, | |
22021 | + int count); | |
22022 | +asmlinkage long sys_pfm_getinfo_evtsets(int fd, | |
22023 | + struct pfarg_setinfo __user *ureq, | |
22024 | + int count); | |
22025 | + | |
22026 | #endif | |
22027 | --- a/kernel/sched.c | |
22028 | +++ b/kernel/sched.c | |
22029 | @@ -71,6 +71,7 @@ | |
22030 | #include <linux/debugfs.h> | |
22031 | #include <linux/ctype.h> | |
22032 | #include <linux/ftrace.h> | |
22033 | +#include <linux/perfmon_kern.h> | |
22034 | ||
22035 | #include <asm/tlb.h> | |
22036 | #include <asm/irq_regs.h> | |
22037 | --- a/kernel/sys_ni.c | |
22038 | +++ b/kernel/sys_ni.c | |
22039 | @@ -127,6 +127,19 @@ cond_syscall(compat_sys_ipc); | |
22040 | cond_syscall(compat_sys_sysctl); | |
22041 | cond_syscall(sys_syslog); | |
22042 | ||
22043 | +cond_syscall(sys_pfm_create_context); | |
22044 | +cond_syscall(sys_pfm_write_pmcs); | |
22045 | +cond_syscall(sys_pfm_write_pmds); | |
22046 | +cond_syscall(sys_pfm_read_pmds); | |
22047 | +cond_syscall(sys_pfm_restart); | |
22048 | +cond_syscall(sys_pfm_start); | |
22049 | +cond_syscall(sys_pfm_stop); | |
22050 | +cond_syscall(sys_pfm_load_context); | |
22051 | +cond_syscall(sys_pfm_unload_context); | |
22052 | +cond_syscall(sys_pfm_create_evtsets); | |
22053 | +cond_syscall(sys_pfm_delete_evtsets); | |
22054 | +cond_syscall(sys_pfm_getinfo_evtsets); | |
22055 | + | |
22056 | /* arch-specific weak syscall entries */ | |
22057 | cond_syscall(sys_pciconfig_read); | |
22058 | cond_syscall(sys_pciconfig_write); | |
22059 | --- /dev/null | |
22060 | +++ b/perfmon/Makefile | |
22061 | @@ -0,0 +1,12 @@ | |
22062 | +# | |
22063 | +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. | |
22064 | +# Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
22065 | +# | |
22066 | +obj-y = perfmon_init.o perfmon_rw.o perfmon_res.o \ | |
22067 | + perfmon_pmu.o perfmon_sysfs.o perfmon_syscalls.o \ | |
22068 | + perfmon_file.o perfmon_ctxsw.o perfmon_intr.o \ | |
22069 | + perfmon_dfl_smpl.o perfmon_sets.o perfmon_hotplug.o \ | |
22070 | + perfmon_msg.o perfmon_smpl.o perfmon_attach.o \ | |
22071 | + perfmon_activate.o perfmon_ctx.o perfmon_fmt.o | |
22072 | + | |
22073 | +obj-$(CONFIG_PERFMON_DEBUG_FS) += perfmon_debugfs.o | |
22074 | --- /dev/null | |
22075 | +++ b/perfmon/perfmon_activate.c | |
22076 | @@ -0,0 +1,265 @@ | |
22077 | +/* | |
22078 | + * perfmon_activate.c: perfmon2 start/stop functions | |
22079 | + * | |
22080 | + * This file implements the perfmon2 interface which | |
22081 | + * provides access to the hardware performance counters | |
22082 | + * of the host processor. | |
22083 | + * | |
22084 | + * | |
22085 | + * The initial version of perfmon.c was written by | |
22086 | + * Ganesh Venkitachalam, IBM Corp. | |
22087 | + * | |
22088 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
22089 | + * David Mosberger, Hewlett Packard Co. | |
22090 | + * | |
22091 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
22092 | + * by Stephane Eranian, Hewlett Packard Co. | |
22093 | + * | |
22094 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
22095 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
22096 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
22097 | + * | |
22098 | + * More information about perfmon available at: | |
22099 | + * http://perfmon2.sf.net | |
22100 | + * | |
22101 | + * This program is free software; you can redistribute it and/or | |
22102 | + * modify it under the terms of version 2 of the GNU General Public | |
22103 | + * License as published by the Free Software Foundation. | |
22104 | + * | |
22105 | + * This program is distributed in the hope that it will be useful, | |
22106 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22107 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
22108 | + * General Public License for more details. | |
22109 | + * | |
22110 | + * You should have received a copy of the GNU General Public License | |
22111 | + * along with this program; if not, write to the Free Software | |
22112 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
22113 | + * 02111-1307 USA | |
22114 | + */ | |
22115 | +#include <linux/kernel.h> | |
22116 | +#include <linux/perfmon_kern.h> | |
22117 | +#include "perfmon_priv.h" | |
22118 | + | |
22119 | +/** | |
22120 | + * __pfm_start - activate monitoring | |
22121 | + * @ctx: context to operate on | |
22122 | + * @start: pfarg_start as passed by user | |
22123 | + * | |
22124 | + * When operating in per-thread mode and not self-monitoring, the monitored | |
22125 | + * thread must be stopped. Activation will be effective next time the thread | |
22126 | + * is context switched in. | |
22127 | + * | |
22128 | + * The pfarg_start argument is optional and may be used to designate | |
22129 | + * the initial event set to activate. When not provided, the last active | |
22130 | + * set is used. For the first activation, set0 is used when start is NULL. | |
22131 | + * | |
22132 | + * On some architectures, e.g., IA-64, it may be possible to start monitoring | |
22133 | + * without calling this function under certain conditions (per-thread and self | |
22134 | + * monitoring). In this case, either set0 or the last active set is used. | |
22135 | + * | |
22136 | + * the context is locked and interrupts are disabled. | |
22137 | + */ | |
22138 | +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start) | |
22139 | +{ | |
22140 | + struct task_struct *task, *owner_task; | |
22141 | + struct pfm_event_set *new_set, *old_set; | |
22142 | + int is_self; | |
22143 | + | |
22144 | + task = ctx->task; | |
22145 | + | |
22146 | + /* | |
22147 | + * UNLOADED: error | |
22148 | + * LOADED : normal start, nop if started unless set is different | |
22149 | + * MASKED : nop or change set when unmasking | |
22150 | + * ZOMBIE : cannot happen | |
22151 | + */ | |
22152 | + if (ctx->state == PFM_CTX_UNLOADED) | |
22153 | + return -EINVAL; | |
22154 | + | |
22155 | + old_set = new_set = ctx->active_set; | |
22156 | + | |
22157 | + /* | |
22158 | + * always the case for system-wide | |
22159 | + */ | |
22160 | + if (task == NULL) | |
22161 | + task = current; | |
22162 | + | |
22163 | + is_self = task == current; | |
22164 | + | |
22165 | + /* | |
22166 | + * argument is provided? | |
22167 | + */ | |
22168 | + if (start) { | |
22169 | + /* | |
22170 | + * find the set to load first | |
22171 | + */ | |
22172 | + new_set = pfm_find_set(ctx, start->start_set, 0); | |
22173 | + if (new_set == NULL) { | |
22174 | + PFM_DBG("event set%u does not exist", | |
22175 | + start->start_set); | |
22176 | + return -EINVAL; | |
22177 | + } | |
22178 | + } | |
22179 | + | |
22180 | + PFM_DBG("cur_set=%u req_set=%u", old_set->id, new_set->id); | |
22181 | + | |
22182 | + /* | |
22183 | + * if we need to change the active set we need | |
22184 | + * to check if we can access the PMU | |
22185 | + */ | |
22186 | + if (new_set != old_set) { | |
22187 | + | |
22188 | + owner_task = __get_cpu_var(pmu_owner); | |
22189 | + /* | |
22190 | + * system-wide: must run on the right CPU | |
22191 | + * per-thread : must be the owner of the PMU context | |
22192 | + * | |
22193 | + * pfm_switch_sets() returns with monitoring stopped | |
22194 | + */ | |
22195 | + if (is_self) { | |
22196 | + pfm_switch_sets(ctx, new_set, PFM_PMD_RESET_LONG, 1); | |
22197 | + } else { | |
22198 | + /* | |
22199 | + * In a UP kernel, the PMU may contain the state | |
22200 | + * of the task we want to operate on, yet the task | |
22201 | + * may be switched out (lazy save). We need to save | |
22202 | + * current state (old_set), switch active_set and | |
22203 | + * mark it for reload. | |
22204 | + */ | |
22205 | + if (owner_task == task) | |
22206 | + pfm_save_pmds(ctx, old_set); | |
22207 | + ctx->active_set = new_set; | |
22208 | + new_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; | |
22209 | + } | |
22210 | + } | |
22211 | + | |
22212 | + /* | |
22213 | + * mark as started | |
22214 | + * must be done before calling pfm_arch_start() | |
22215 | + */ | |
22216 | + ctx->flags.started = 1; | |
22217 | + | |
22218 | + pfm_arch_start(task, ctx); | |
22219 | + | |
22220 | + /* | |
22221 | + * we check whether we had a pending ovfl before restarting. | |
22222 | + * If so we need to regenerate the interrupt to make sure we | |
22223 | + * keep recorded samples. For non-self monitoring this check | |
22224 | + * is done in the pfm_ctxswin_thread() routine. | |
22225 | + * | |
22226 | + * we check new_set/old_set because pfm_switch_sets() already | |
22227 | + * takes care of replaying the pending interrupts | |
22228 | + */ | |
22229 | + if (is_self && new_set != old_set && new_set->npend_ovfls) { | |
22230 | + pfm_arch_resend_irq(ctx); | |
22231 | + pfm_stats_inc(ovfl_intr_replay_count); | |
22232 | + } | |
22233 | + | |
22234 | + /* | |
22235 | + * always start with full timeout | |
22236 | + */ | |
22237 | + new_set->hrtimer_rem = new_set->hrtimer_exp; | |
22238 | + | |
22239 | + /* | |
22240 | + * activate timeout for system-wide, self-montoring | |
22241 | + * Always start with full timeout | |
22242 | + * Timeout is at least one tick away, so no risk of | |
22243 | + * having hrtimer_start() trying to wakeup softirqd | |
22244 | + * and thus causing troubles. This cannot happen anmyway | |
22245 | + * because cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ | |
22246 | + */ | |
22247 | + if (is_self && new_set->flags & PFM_SETFL_TIME_SWITCH) { | |
22248 | + hrtimer_start(&__get_cpu_var(pfm_hrtimer), | |
22249 | + new_set->hrtimer_rem, | |
22250 | + HRTIMER_MODE_REL); | |
22251 | + | |
22252 | + PFM_DBG("set%u started timeout=%lld", | |
22253 | + new_set->id, | |
22254 | + (unsigned long long)new_set->hrtimer_rem.tv64); | |
22255 | + } | |
22256 | + | |
22257 | + /* | |
22258 | + * we restart total duration even if context was | |
22259 | + * already started. In that case, counts are simply | |
22260 | + * reset. | |
22261 | + * | |
22262 | + * For per-thread, if not self-monitoring, the statement | |
22263 | + * below will have no effect because thread is stopped. | |
22264 | + * The field is reset of ctxsw in. | |
22265 | + */ | |
22266 | + new_set->duration_start = sched_clock(); | |
22267 | + | |
22268 | + return 0; | |
22269 | +} | |
22270 | + | |
22271 | +/** | |
22272 | + * __pfm_stop - stop monitoring | |
22273 | + * @ctx: context to operate on | |
22274 | + * @release_info: infos for caller (see below) | |
22275 | + * | |
22276 | + * When operating in per-thread* mode and when not self-monitoring, | |
22277 | + * the monitored thread must be stopped. | |
22278 | + * | |
22279 | + * the context is locked and interrupts are disabled. | |
22280 | + * | |
22281 | + * release_info value upon return: | |
22282 | + * - bit 0 : unused | |
22283 | + * - bit 1 : when set, must cancel hrtimer | |
22284 | + */ | |
22285 | +int __pfm_stop(struct pfm_context *ctx, int *release_info) | |
22286 | +{ | |
22287 | + struct pfm_event_set *set; | |
22288 | + struct task_struct *task; | |
22289 | + u64 now; | |
22290 | + int state; | |
22291 | + | |
22292 | + *release_info = 0; | |
22293 | + | |
22294 | + now = sched_clock(); | |
22295 | + state = ctx->state; | |
22296 | + set = ctx->active_set; | |
22297 | + | |
22298 | + /* | |
22299 | + * context must be attached (zombie cannot happen) | |
22300 | + */ | |
22301 | + if (state == PFM_CTX_UNLOADED) | |
22302 | + return -EINVAL; | |
22303 | + | |
22304 | + task = ctx->task; | |
22305 | + | |
22306 | + PFM_DBG("ctx_task=[%d] ctx_state=%d is_system=%d", | |
22307 | + task ? task->pid : -1, | |
22308 | + state, | |
22309 | + !task); | |
22310 | + | |
22311 | + /* | |
22312 | + * this happens for system-wide context | |
22313 | + */ | |
22314 | + if (task == NULL) | |
22315 | + task = current; | |
22316 | + | |
22317 | + /* | |
22318 | + * compute elapsed time | |
22319 | + * | |
22320 | + * unless masked, compute elapsed duration, stop timeout | |
22321 | + */ | |
22322 | + if (task == current && state == PFM_CTX_LOADED) { | |
22323 | + /* | |
22324 | + * timeout cancel must be deferred until context is | |
22325 | + * unlocked to avoid race with pfm_handle_switch_timeout() | |
22326 | + */ | |
22327 | + if (set->flags & PFM_SETFL_TIME_SWITCH) | |
22328 | + *release_info |= 0x2; | |
22329 | + | |
22330 | + set->duration += now - set->duration_start; | |
22331 | + } | |
22332 | + | |
22333 | + pfm_arch_stop(task, ctx); | |
22334 | + | |
22335 | + ctx->flags.started = 0; | |
22336 | + /* | |
22337 | + * starting now, in-flight PMU interrupt for this context | |
22338 | + * are treated as spurious | |
22339 | + */ | |
22340 | + return 0; | |
22341 | +} | |
22342 | --- /dev/null | |
22343 | +++ b/perfmon/perfmon_attach.c | |
22344 | @@ -0,0 +1,474 @@ | |
22345 | +/* | |
22346 | + * perfmon_attach.c: perfmon2 load/unload functions | |
22347 | + * | |
22348 | + * This file implements the perfmon2 interface which | |
22349 | + * provides access to the hardware performance counters | |
22350 | + * of the host processor. | |
22351 | + * | |
22352 | + * | |
22353 | + * The initial version of perfmon.c was written by | |
22354 | + * Ganesh Venkitachalam, IBM Corp. | |
22355 | + * | |
22356 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
22357 | + * David Mosberger, Hewlett Packard Co. | |
22358 | + * | |
22359 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
22360 | + * by Stephane Eranian, Hewlett Packard Co. | |
22361 | + * | |
22362 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
22363 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
22364 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
22365 | + * | |
22366 | + * More information about perfmon available at: | |
22367 | + * http://perfmon2.sf.net | |
22368 | + * | |
22369 | + * This program is free software; you can redistribute it and/or | |
22370 | + * modify it under the terms of version 2 of the GNU General Public | |
22371 | + * License as published by the Free Software Foundation. | |
22372 | + * | |
22373 | + * This program is distributed in the hope that it will be useful, | |
22374 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22375 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
22376 | + * General Public License for more details. | |
22377 | + * | |
22378 | + * You should have received a copy of the GNU General Public License | |
22379 | + * along with this program; if not, write to the Free Software | |
22380 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
22381 | + * 02111-1307 USA | |
22382 | + */ | |
22383 | +#include <linux/kernel.h> | |
22384 | +#include <linux/fs.h> | |
22385 | +#include <linux/perfmon_kern.h> | |
22386 | +#include "perfmon_priv.h" | |
22387 | + | |
22388 | +/** | |
22389 | + * __pfm_load_context_sys - attach context to a CPU in system-wide mode | |
22390 | + * @ctx: context to operate on | |
22391 | + * @set_id: set to activate first | |
22392 | + * @cpu: CPU to monitor | |
22393 | + * | |
22394 | + * The cpu specified in the pfarg_load.load_pid argument must be the current | |
22395 | + * CPU. | |
22396 | + * | |
22397 | + * The function must be called with the context locked and interrupts disabled. | |
22398 | + */ | |
22399 | +static int pfm_load_ctx_sys(struct pfm_context *ctx, u16 set_id, u32 cpu) | |
22400 | +{ | |
22401 | + struct pfm_event_set *set; | |
22402 | + int mycpu; | |
22403 | + int ret; | |
22404 | + | |
22405 | + mycpu = smp_processor_id(); | |
22406 | + | |
22407 | + /* | |
22408 | + * system-wide: check we are running on the desired CPU | |
22409 | + */ | |
22410 | + if (cpu != mycpu) { | |
22411 | + PFM_DBG("wrong CPU: asking %u but on %u", cpu, mycpu); | |
22412 | + return -EINVAL; | |
22413 | + } | |
22414 | + | |
22415 | + /* | |
22416 | + * initialize sets | |
22417 | + */ | |
22418 | + set = pfm_prepare_sets(ctx, set_id); | |
22419 | + if (!set) { | |
22420 | + PFM_DBG("event set%u does not exist", set_id); | |
22421 | + return -EINVAL; | |
22422 | + } | |
22423 | + | |
22424 | + PFM_DBG("set=%u set_flags=0x%x", set->id, set->flags); | |
22425 | + | |
22426 | + ctx->cpu = mycpu; | |
22427 | + ctx->task = NULL; | |
22428 | + ctx->active_set = set; | |
22429 | + | |
22430 | + /* | |
22431 | + * perform any architecture specific actions | |
22432 | + */ | |
22433 | + ret = pfm_arch_load_context(ctx); | |
22434 | + if (ret) | |
22435 | + goto error_noload; | |
22436 | + | |
22437 | + /* | |
22438 | + * now reserve the session, before we can proceed with | |
22439 | + * actually accessing the PMU hardware | |
22440 | + */ | |
22441 | + ret = pfm_session_acquire(1, mycpu); | |
22442 | + if (ret) | |
22443 | + goto error; | |
22444 | + | |
22445 | + | |
22446 | + /* | |
22447 | + * caller must be on monitored CPU to access PMU, thus this is | |
22448 | + * a form of self-monitoring | |
22449 | + */ | |
22450 | + ctx->flags.is_self = 1; | |
22451 | + | |
22452 | + set->runs++; | |
22453 | + | |
22454 | + /* | |
22455 | + * load PMD from set | |
22456 | + * load PMC from set | |
22457 | + */ | |
22458 | + pfm_arch_restore_pmds(ctx, set); | |
22459 | + pfm_arch_restore_pmcs(ctx, set); | |
22460 | + | |
22461 | + /* | |
22462 | + * set new ownership | |
22463 | + */ | |
22464 | + pfm_set_pmu_owner(NULL, ctx); | |
22465 | + | |
22466 | + /* | |
22467 | + * reset pending work | |
22468 | + */ | |
22469 | + ctx->flags.work_type = PFM_WORK_NONE; | |
22470 | + ctx->flags.reset_count = 0; | |
22471 | + | |
22472 | + /* | |
22473 | + * reset message queue | |
22474 | + */ | |
22475 | + ctx->msgq_head = ctx->msgq_tail = 0; | |
22476 | + | |
22477 | + ctx->state = PFM_CTX_LOADED; | |
22478 | + | |
22479 | + return 0; | |
22480 | +error: | |
22481 | + pfm_arch_unload_context(ctx); | |
22482 | +error_noload: | |
22483 | + return ret; | |
22484 | +} | |
22485 | + | |
22486 | +/** | |
22487 | + * __pfm_load_context_thread - attach context to a thread | |
22488 | + * @ctx: context to operate on | |
22489 | + * @set_id: first set | |
22490 | + * @task: threadf to attach to | |
22491 | + * | |
22492 | + * The function must be called with the context locked and interrupts disabled. | |
22493 | + */ | |
22494 | +static int pfm_load_ctx_thread(struct pfm_context *ctx, u16 set_id, | |
22495 | + struct task_struct *task) | |
22496 | +{ | |
22497 | + struct pfm_event_set *set; | |
22498 | + struct pfm_context *old; | |
22499 | + int ret; | |
22500 | + | |
22501 | + PFM_DBG("load_pid=%d set=%u", task->pid, set_id); | |
22502 | + /* | |
22503 | + * per-thread: | |
22504 | + * - task to attach to is checked in sys_pfm_load_context() to avoid | |
22505 | + * locking issues. if found, and not self, task refcount was | |
22506 | + * incremented. | |
22507 | + */ | |
22508 | + old = cmpxchg(&task->pfm_context, NULL, ctx); | |
22509 | + if (old) { | |
22510 | + PFM_DBG("load_pid=%d has a context " | |
22511 | + "old=%p new=%p cur=%p", | |
22512 | + task->pid, | |
22513 | + old, | |
22514 | + ctx, | |
22515 | + task->pfm_context); | |
22516 | + return -EEXIST; | |
22517 | + } | |
22518 | + | |
22519 | + /* | |
22520 | + * initialize sets | |
22521 | + */ | |
22522 | + set = pfm_prepare_sets(ctx, set_id); | |
22523 | + if (!set) { | |
22524 | + PFM_DBG("event set%u does not exist", set_id); | |
22525 | + return -EINVAL; | |
22526 | + } | |
22527 | + | |
22528 | + | |
22529 | + ctx->task = task; | |
22530 | + ctx->cpu = -1; | |
22531 | + ctx->active_set = set; | |
22532 | + | |
22533 | + /* | |
22534 | + * perform any architecture specific actions | |
22535 | + */ | |
22536 | + ret = pfm_arch_load_context(ctx); | |
22537 | + if (ret) | |
22538 | + goto error_noload; | |
22539 | + | |
22540 | + /* | |
22541 | + * now reserve the session, before we can proceed with | |
22542 | + * actually accessing the PMU hardware | |
22543 | + */ | |
22544 | + ret = pfm_session_acquire(0, -1); | |
22545 | + if (ret) | |
22546 | + goto error; | |
22547 | + | |
22548 | + | |
22549 | + set->runs++; | |
22550 | + if (ctx->task != current) { | |
22551 | + | |
22552 | + ctx->flags.is_self = 0; | |
22553 | + | |
22554 | + /* force a full reload */ | |
22555 | + ctx->last_act = PFM_INVALID_ACTIVATION; | |
22556 | + ctx->last_cpu = -1; | |
22557 | + set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; | |
22558 | + | |
22559 | + } else { | |
22560 | + pfm_check_save_prev_ctx(); | |
22561 | + | |
22562 | + ctx->last_cpu = smp_processor_id(); | |
22563 | + __get_cpu_var(pmu_activation_number)++; | |
22564 | + ctx->last_act = __get_cpu_var(pmu_activation_number); | |
22565 | + | |
22566 | + ctx->flags.is_self = 1; | |
22567 | + | |
22568 | + /* | |
22569 | + * load PMD from set | |
22570 | + * load PMC from set | |
22571 | + */ | |
22572 | + pfm_arch_restore_pmds(ctx, set); | |
22573 | + pfm_arch_restore_pmcs(ctx, set); | |
22574 | + | |
22575 | + /* | |
22576 | + * set new ownership | |
22577 | + */ | |
22578 | + pfm_set_pmu_owner(ctx->task, ctx); | |
22579 | + } | |
22580 | + set_tsk_thread_flag(task, TIF_PERFMON_CTXSW); | |
22581 | + | |
22582 | + /* | |
22583 | + * reset pending work | |
22584 | + */ | |
22585 | + ctx->flags.work_type = PFM_WORK_NONE; | |
22586 | + ctx->flags.reset_count = 0; | |
22587 | + | |
22588 | + /* | |
22589 | + * reset message queue | |
22590 | + */ | |
22591 | + ctx->msgq_head = ctx->msgq_tail = 0; | |
22592 | + | |
22593 | + ctx->state = PFM_CTX_LOADED; | |
22594 | + | |
22595 | + return 0; | |
22596 | + | |
22597 | +error: | |
22598 | + pfm_arch_unload_context(ctx); | |
22599 | + ctx->task = NULL; | |
22600 | +error_noload: | |
22601 | + /* | |
22602 | + * detach context | |
22603 | + */ | |
22604 | + task->pfm_context = NULL; | |
22605 | + return ret; | |
22606 | +} | |
22607 | + | |
22608 | +/** | |
22609 | + * __pfm_load_context - attach context to a CPU or thread | |
22610 | + * @ctx: context to operate on | |
22611 | + * @load: pfarg_load as passed by user | |
22612 | + * @task: thread to attach to, NULL for system-wide | |
22613 | + */ | |
22614 | +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *load, | |
22615 | + struct task_struct *task) | |
22616 | +{ | |
22617 | + if (ctx->flags.system) | |
22618 | + return pfm_load_ctx_sys(ctx, load->load_set, load->load_pid); | |
22619 | + return pfm_load_ctx_thread(ctx, load->load_set, task); | |
22620 | +} | |
22621 | + | |
22622 | +/** | |
22623 | + * pfm_update_ovfl_pmds - account for pending ovfls on PMDs | |
22624 | + * @ctx: context to operate on | |
22625 | + * | |
22626 | + * This function is always called after pfm_stop has been issued | |
22627 | + */ | |
22628 | +static void pfm_update_ovfl_pmds(struct pfm_context *ctx) | |
22629 | +{ | |
22630 | + struct pfm_event_set *set; | |
22631 | + u64 *cnt_pmds; | |
22632 | + u64 ovfl_mask; | |
22633 | + u16 num_ovfls, i, first; | |
22634 | + | |
22635 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
22636 | + first = ctx->regs.first_intr_pmd; | |
22637 | + cnt_pmds = ctx->regs.cnt_pmds; | |
22638 | + | |
22639 | + /* | |
22640 | + * look for pending interrupts and adjust PMD values accordingly | |
22641 | + */ | |
22642 | + list_for_each_entry(set, &ctx->set_list, list) { | |
22643 | + | |
22644 | + if (!set->npend_ovfls) | |
22645 | + continue; | |
22646 | + | |
22647 | + num_ovfls = set->npend_ovfls; | |
22648 | + PFM_DBG("set%u nintrs=%u", set->id, num_ovfls); | |
22649 | + | |
22650 | + for (i = first; num_ovfls; i++) { | |
22651 | + if (test_bit(i, cast_ulp(set->povfl_pmds))) { | |
22652 | + /* only correct value for counters */ | |
22653 | + if (test_bit(i, cast_ulp(cnt_pmds))) | |
22654 | + set->pmds[i].value += 1 + ovfl_mask; | |
22655 | + num_ovfls--; | |
22656 | + } | |
22657 | + PFM_DBG("pmd%u set=%u val=0x%llx", | |
22658 | + i, | |
22659 | + set->id, | |
22660 | + (unsigned long long)set->pmds[i].value); | |
22661 | + } | |
22662 | + /* | |
22663 | + * we need to clear to prevent a pfm_getinfo_evtsets() from | |
22664 | + * returning stale data even after the context is unloaded | |
22665 | + */ | |
22666 | + set->npend_ovfls = 0; | |
22667 | + bitmap_zero(cast_ulp(set->povfl_pmds), ctx->regs.max_intr_pmd); | |
22668 | + } | |
22669 | +} | |
22670 | + | |
22671 | + | |
22672 | +/** | |
22673 | + * __pfm_unload_context - detach context from CPU or thread | |
22674 | + * @ctx: context to operate on | |
22675 | + * @release_info: pointer to return info (see below) | |
22676 | + * | |
22677 | + * The function must be called with the context locked and interrupts disabled. | |
22678 | + * | |
22679 | + * release_info value upon return: | |
22680 | + * - bit 0: when set, must free context | |
22681 | + * - bit 1: when set, must cancel hrtimer | |
22682 | + */ | |
22683 | +int __pfm_unload_context(struct pfm_context *ctx, int *release_info) | |
22684 | +{ | |
22685 | + struct task_struct *task; | |
22686 | + int ret; | |
22687 | + | |
22688 | + PFM_DBG("ctx_state=%d task [%d]", | |
22689 | + ctx->state, | |
22690 | + ctx->task ? ctx->task->pid : -1); | |
22691 | + | |
22692 | + *release_info = 0; | |
22693 | + | |
22694 | + /* | |
22695 | + * unload only when necessary | |
22696 | + */ | |
22697 | + if (ctx->state == PFM_CTX_UNLOADED) | |
22698 | + return 0; | |
22699 | + | |
22700 | + task = ctx->task; | |
22701 | + | |
22702 | + /* | |
22703 | + * stop monitoring | |
22704 | + */ | |
22705 | + ret = __pfm_stop(ctx, release_info); | |
22706 | + if (ret) | |
22707 | + return ret; | |
22708 | + | |
22709 | + ctx->state = PFM_CTX_UNLOADED; | |
22710 | + ctx->flags.can_restart = 0; | |
22711 | + | |
22712 | + /* | |
22713 | + * save active set | |
22714 | + * UP: | |
22715 | + * if not current task and due to lazy, state may | |
22716 | + * still be live | |
22717 | + * for system-wide, guaranteed to run on correct CPU | |
22718 | + */ | |
22719 | + if (__get_cpu_var(pmu_ctx) == ctx) { | |
22720 | + /* | |
22721 | + * pending overflows have been saved by pfm_stop() | |
22722 | + */ | |
22723 | + pfm_save_pmds(ctx, ctx->active_set); | |
22724 | + pfm_set_pmu_owner(NULL, NULL); | |
22725 | + PFM_DBG("released ownership"); | |
22726 | + } | |
22727 | + | |
22728 | + /* | |
22729 | + * account for pending overflows | |
22730 | + */ | |
22731 | + pfm_update_ovfl_pmds(ctx); | |
22732 | + | |
22733 | + /* | |
22734 | + * arch-specific unload operations | |
22735 | + */ | |
22736 | + pfm_arch_unload_context(ctx); | |
22737 | + | |
22738 | + /* | |
22739 | + * per-thread: disconnect from monitored task | |
22740 | + */ | |
22741 | + if (task) { | |
22742 | + task->pfm_context = NULL; | |
22743 | + ctx->task = NULL; | |
22744 | + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW); | |
22745 | + clear_tsk_thread_flag(task, TIF_PERFMON_WORK); | |
22746 | + pfm_arch_disarm_handle_work(task); | |
22747 | + } | |
22748 | + /* | |
22749 | + * session can be freed, must have interrupts enabled | |
22750 | + * thus we release in the caller. Bit 0 signals to the | |
22751 | + * caller that the session can be released. | |
22752 | + */ | |
22753 | + *release_info |= 0x1; | |
22754 | + | |
22755 | + return 0; | |
22756 | +} | |
22757 | + | |
22758 | +/** | |
22759 | + * __pfm_exit_thread - detach and free context on thread exit | |
22760 | + */ | |
22761 | +void __pfm_exit_thread(void) | |
22762 | +{ | |
22763 | + struct pfm_context *ctx; | |
22764 | + unsigned long flags; | |
22765 | + int free_ok = 0, release_info = 0; | |
22766 | + int ret; | |
22767 | + | |
22768 | + ctx = current->pfm_context; | |
22769 | + | |
22770 | + BUG_ON(ctx->flags.system); | |
22771 | + | |
22772 | + spin_lock_irqsave(&ctx->lock, flags); | |
22773 | + | |
22774 | + PFM_DBG("state=%d is_self=%d", ctx->state, ctx->flags.is_self); | |
22775 | + | |
22776 | + /* | |
22777 | + * __pfm_unload_context() cannot fail | |
22778 | + * in the context states we are interested in | |
22779 | + */ | |
22780 | + switch (ctx->state) { | |
22781 | + case PFM_CTX_LOADED: | |
22782 | + case PFM_CTX_MASKED: | |
22783 | + __pfm_unload_context(ctx, &release_info); | |
22784 | + /* | |
22785 | + * end notification only sent for non | |
22786 | + * self-monitoring context | |
22787 | + */ | |
22788 | + if (!ctx->flags.is_self) | |
22789 | + pfm_end_notify(ctx); | |
22790 | + break; | |
22791 | + case PFM_CTX_ZOMBIE: | |
22792 | + __pfm_unload_context(ctx, &release_info); | |
22793 | + free_ok = 1; | |
22794 | + break; | |
22795 | + default: | |
22796 | + BUG_ON(ctx->state != PFM_CTX_LOADED); | |
22797 | + break; | |
22798 | + } | |
22799 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
22800 | + | |
22801 | + /* | |
22802 | + * cancel timer now that context is unlocked | |
22803 | + */ | |
22804 | + if (release_info & 0x2) { | |
22805 | + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); | |
22806 | + PFM_DBG("timeout cancel=%d", ret); | |
22807 | + } | |
22808 | + | |
22809 | + if (release_info & 0x1) | |
22810 | + pfm_session_release(0, 0); | |
22811 | + | |
22812 | + /* | |
22813 | + * All memory free operations (especially for vmalloc'ed memory) | |
22814 | + * MUST be done with interrupts ENABLED. | |
22815 | + */ | |
22816 | + if (free_ok) | |
22817 | + pfm_free_context(ctx); | |
22818 | +} | |
22819 | --- /dev/null | |
22820 | +++ b/perfmon/perfmon_ctx.c | |
22821 | @@ -0,0 +1,314 @@ | |
22822 | +/* | |
22823 | + * perfmon_ctx.c: perfmon2 context functions | |
22824 | + * | |
22825 | + * This file implements the perfmon2 interface which | |
22826 | + * provides access to the hardware performance counters | |
22827 | + * of the host processor. | |
22828 | + * | |
22829 | + * | |
22830 | + * The initial version of perfmon.c was written by | |
22831 | + * Ganesh Venkitachalam, IBM Corp. | |
22832 | + * | |
22833 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
22834 | + * David Mosberger, Hewlett Packard Co. | |
22835 | + * | |
22836 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
22837 | + * by Stephane Eranian, Hewlett Packard Co. | |
22838 | + * | |
22839 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
22840 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
22841 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
22842 | + * | |
22843 | + * More information about perfmon available at: | |
22844 | + * http://perfmon2.sf.net | |
22845 | + * | |
22846 | + * This program is free software; you can redistribute it and/or | |
22847 | + * modify it under the terms of version 2 of the GNU General Public | |
22848 | + * License as published by the Free Software Foundation. | |
22849 | + * | |
22850 | + * This program is distributed in the hope that it will be useful, | |
22851 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22852 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
22853 | + * General Public License for more details. | |
22854 | + * | |
22855 | + * You should have received a copy of the GNU General Public License | |
22856 | + * along with this program; if not, write to the Free Software | |
22857 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
22858 | + * 02111-1307 USA | |
22859 | + */ | |
22860 | +#include <linux/kernel.h> | |
22861 | +#include <linux/fs.h> | |
22862 | +#include <linux/perfmon_kern.h> | |
22863 | +#include "perfmon_priv.h" | |
22864 | + | |
22865 | +/* | |
22866 | + * context memory pool pointer | |
22867 | + */ | |
22868 | +static struct kmem_cache *pfm_ctx_cachep; | |
22869 | + | |
22870 | +/** | |
22871 | + * pfm_free_context - de-allocate context and associated resources | |
22872 | + * @ctx: context to free | |
22873 | + */ | |
22874 | +void pfm_free_context(struct pfm_context *ctx) | |
22875 | +{ | |
22876 | + pfm_arch_context_free(ctx); | |
22877 | + | |
22878 | + pfm_free_sets(ctx); | |
22879 | + | |
22880 | + pfm_smpl_buf_free(ctx); | |
22881 | + | |
22882 | + PFM_DBG("free ctx @0x%p", ctx); | |
22883 | + kmem_cache_free(pfm_ctx_cachep, ctx); | |
22884 | + /* | |
22885 | + * decrease refcount on: | |
22886 | + * - PMU description table | |
22887 | + * - sampling format | |
22888 | + */ | |
22889 | + pfm_pmu_conf_put(); | |
22890 | + pfm_pmu_release(); | |
22891 | +} | |
22892 | + | |
22893 | +/** | |
22894 | + * pfm_ctx_flags_sane - check if context flags passed by user are okay | |
22895 | + * @ctx_flags: flags passed user on pfm_create_context | |
22896 | + * | |
22897 | + * return: | |
22898 | + * 0 if successful | |
22899 | + * <0 and error code otherwise | |
22900 | + */ | |
22901 | +static inline int pfm_ctx_flags_sane(u32 ctx_flags) | |
22902 | +{ | |
22903 | + if (ctx_flags & PFM_FL_SYSTEM_WIDE) { | |
22904 | + if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { | |
22905 | + PFM_DBG("cannot use blocking mode in syswide mode"); | |
22906 | + return -EINVAL; | |
22907 | + } | |
22908 | + } | |
22909 | + return 0; | |
22910 | +} | |
22911 | + | |
22912 | +/** | |
22913 | + * pfm_ctx_permissions - check authorization to create new context | |
22914 | + * @ctx_flags: context flags passed by user | |
22915 | + * | |
22916 | + * check for permissions to create a context. | |
22917 | + * | |
22918 | + * A sysadmin may decide to restrict creation of per-thread | |
22919 | + * and/or system-wide context to a group of users using the | |
22920 | + * group id via /sys/kernel/perfmon/task_group and | |
22921 | + * /sys/kernel/perfmon/sys_group. | |
22922 | + * | |
22923 | + * Once we identify a user level package which can be used | |
22924 | + * to grant/revoke Linux capabilites at login via PAM, we will | |
22925 | + * be able to use capabilities. We would also need to increase | |
22926 | + * the size of cap_t to support more than 32 capabilities (it | |
22927 | + * is currently defined as u32 and 32 capabilities are alrady | |
22928 | + * defined). | |
22929 | + */ | |
22930 | +static inline int pfm_ctx_permissions(u32 ctx_flags) | |
22931 | +{ | |
22932 | + if ((ctx_flags & PFM_FL_SYSTEM_WIDE) | |
22933 | + && pfm_controls.sys_group != PFM_GROUP_PERM_ANY | |
22934 | + && !in_group_p(pfm_controls.sys_group)) { | |
22935 | + PFM_DBG("user group not allowed to create a syswide ctx"); | |
22936 | + return -EPERM; | |
22937 | + } else if (pfm_controls.task_group != PFM_GROUP_PERM_ANY | |
22938 | + && !in_group_p(pfm_controls.task_group)) { | |
22939 | + PFM_DBG("user group not allowed to create a task context"); | |
22940 | + return -EPERM; | |
22941 | + } | |
22942 | + return 0; | |
22943 | +} | |
22944 | + | |
22945 | +/** | |
22946 | + * __pfm_create_context - allocate and initialize a perfmon context | |
22947 | + * @req : pfarg_ctx from user | |
22948 | + * @fmt : pointer sampling format, NULL if not used | |
22949 | + * @fmt_arg: pointer to argument to sampling format, NULL if not used | |
22950 | + * @mode: PFM_NORMAL or PFM_COMPAT(IA-64 v2.0 compatibility) | |
22951 | + * @ctx : address of new context upon succesful return, undefined otherwise | |
22952 | + * | |
22953 | + * function used to allocate a new context. A context is allocated along | |
22954 | + * with the default event set. If a sampling format is used, the buffer | |
22955 | + * may be allocated and initialized. | |
22956 | + * | |
22957 | + * The file descriptor identifying the context is allocated and returned | |
22958 | + * to caller. | |
22959 | + * | |
22960 | + * This function operates with no locks and interrupts are enabled. | |
22961 | + * return: | |
22962 | + * >=0: the file descriptor to identify the context | |
22963 | + * <0 : the error code | |
22964 | + */ | |
22965 | +int __pfm_create_context(struct pfarg_ctx *req, | |
22966 | + struct pfm_smpl_fmt *fmt, | |
22967 | + void *fmt_arg, | |
22968 | + int mode, | |
22969 | + struct pfm_context **new_ctx) | |
22970 | +{ | |
22971 | + struct pfm_context *ctx; | |
22972 | + struct file *filp = NULL; | |
22973 | + u32 ctx_flags; | |
22974 | + int fd = 0, ret; | |
22975 | + | |
22976 | + ctx_flags = req->ctx_flags; | |
22977 | + | |
22978 | + /* Increase refcount on PMU description */ | |
22979 | + ret = pfm_pmu_conf_get(1); | |
22980 | + if (ret < 0) | |
22981 | + goto error_conf; | |
22982 | + | |
22983 | + ret = pfm_ctx_flags_sane(ctx_flags); | |
22984 | + if (ret < 0) | |
22985 | + goto error_alloc; | |
22986 | + | |
22987 | + ret = pfm_ctx_permissions(ctx_flags); | |
22988 | + if (ret < 0) | |
22989 | + goto error_alloc; | |
22990 | + | |
22991 | + /* | |
22992 | + * we can use GFP_KERNEL and potentially sleep because we do | |
22993 | + * not hold any lock at this point. | |
22994 | + */ | |
22995 | + might_sleep(); | |
22996 | + ret = -ENOMEM; | |
22997 | + ctx = kmem_cache_zalloc(pfm_ctx_cachep, GFP_KERNEL); | |
22998 | + if (!ctx) | |
22999 | + goto error_alloc; | |
23000 | + | |
23001 | + PFM_DBG("alloc ctx @0x%p", ctx); | |
23002 | + | |
23003 | + INIT_LIST_HEAD(&ctx->set_list); | |
23004 | + spin_lock_init(&ctx->lock); | |
23005 | + init_completion(&ctx->restart_complete); | |
23006 | + init_waitqueue_head(&ctx->msgq_wait); | |
23007 | + | |
23008 | + /* | |
23009 | + * context is unloaded | |
23010 | + */ | |
23011 | + ctx->state = PFM_CTX_UNLOADED; | |
23012 | + | |
23013 | + /* | |
23014 | + * initialization of context's flags | |
23015 | + * must be done before pfm_find_set() | |
23016 | + */ | |
23017 | + ctx->flags.block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; | |
23018 | + ctx->flags.system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; | |
23019 | + ctx->flags.no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; | |
23020 | + ctx->flags.ia64_v20_compat = mode == PFM_COMPAT ? 1 : 0; | |
23021 | + | |
23022 | + ret = pfm_pmu_acquire(ctx); | |
23023 | + if (ret) | |
23024 | + goto error_file; | |
23025 | + /* | |
23026 | + * check if PMU is usable | |
23027 | + */ | |
23028 | + if (!(ctx->regs.num_pmcs && ctx->regs.num_pmcs)) { | |
23029 | + PFM_DBG("no usable PMU registers"); | |
23030 | + ret = -EBUSY; | |
23031 | + goto error_file; | |
23032 | + } | |
23033 | + | |
23034 | + /* | |
23035 | + * link to format, must be done first for correct | |
23036 | + * error handling in pfm_context_free() | |
23037 | + */ | |
23038 | + ctx->smpl_fmt = fmt; | |
23039 | + | |
23040 | + ret = -ENFILE; | |
23041 | + fd = pfm_alloc_fd(&filp); | |
23042 | + if (fd < 0) | |
23043 | + goto error_file; | |
23044 | + | |
23045 | + /* | |
23046 | + * initialize arch-specific section | |
23047 | + * must be done before fmt_init() | |
23048 | + */ | |
23049 | + ret = pfm_arch_context_create(ctx, ctx_flags); | |
23050 | + if (ret) | |
23051 | + goto error_set; | |
23052 | + | |
23053 | + ret = -ENOMEM; | |
23054 | + | |
23055 | + /* | |
23056 | + * add initial set | |
23057 | + */ | |
23058 | + if (pfm_create_initial_set(ctx)) | |
23059 | + goto error_set; | |
23060 | + | |
23061 | + /* | |
23062 | + * does the user want to sample? | |
23063 | + * must be done after pfm_pmu_acquire() because | |
23064 | + * needs ctx->regs | |
23065 | + */ | |
23066 | + if (fmt) { | |
23067 | + ret = pfm_setup_smpl_fmt(ctx, ctx_flags, fmt_arg, filp); | |
23068 | + if (ret) | |
23069 | + goto error_set; | |
23070 | + } | |
23071 | + | |
23072 | + filp->private_data = ctx; | |
23073 | + | |
23074 | + ctx->last_act = PFM_INVALID_ACTIVATION; | |
23075 | + ctx->last_cpu = -1; | |
23076 | + | |
23077 | + /* | |
23078 | + * initialize notification message queue | |
23079 | + */ | |
23080 | + ctx->msgq_head = ctx->msgq_tail = 0; | |
23081 | + | |
23082 | + PFM_DBG("flags=0x%x system=%d notify_block=%d no_msg=%d" | |
23083 | + " use_fmt=%d ctx_fd=%d mode=%d", | |
23084 | + ctx_flags, | |
23085 | + ctx->flags.system, | |
23086 | + ctx->flags.block, | |
23087 | + ctx->flags.no_msg, | |
23088 | + !!fmt, | |
23089 | + fd, mode); | |
23090 | + | |
23091 | + if (new_ctx) | |
23092 | + *new_ctx = ctx; | |
23093 | + | |
23094 | + /* | |
23095 | + * we defer the fd_install until we are certain the call succeeded | |
23096 | + * to ensure we do not have to undo its effect. Neither put_filp() | |
23097 | + * nor put_unused_fd() undoes the effect of fd_install(). | |
23098 | + */ | |
23099 | + fd_install(fd, filp); | |
23100 | + | |
23101 | + return fd; | |
23102 | + | |
23103 | +error_set: | |
23104 | + put_filp(filp); | |
23105 | + put_unused_fd(fd); | |
23106 | +error_file: | |
23107 | + /* | |
23108 | + * calls the right *_put() functions | |
23109 | + * calls pfm_release_pmu() | |
23110 | + */ | |
23111 | + pfm_free_context(ctx); | |
23112 | + return ret; | |
23113 | +error_alloc: | |
23114 | + pfm_pmu_conf_put(); | |
23115 | +error_conf: | |
23116 | + pfm_smpl_fmt_put(fmt); | |
23117 | + return ret; | |
23118 | +} | |
23119 | + | |
23120 | +/** | |
23121 | + * pfm_init_ctx -- initialize context SLAB | |
23122 | + * | |
23123 | + * called from pfm_init | |
23124 | + */ | |
23125 | +int __init pfm_init_ctx(void) | |
23126 | +{ | |
23127 | + pfm_ctx_cachep = kmem_cache_create("pfm_context", | |
23128 | + sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE, | |
23129 | + SLAB_HWCACHE_ALIGN, 0, NULL); | |
23130 | + if (!pfm_ctx_cachep) { | |
23131 | + PFM_ERR("cannot initialize context slab"); | |
23132 | + return -ENOMEM; | |
23133 | + } | |
23134 | + return 0; | |
23135 | +} | |
23136 | --- /dev/null | |
23137 | +++ b/perfmon/perfmon_ctxsw.c | |
23138 | @@ -0,0 +1,342 @@ | |
23139 | +/* | |
23140 | + * perfmon_cxtsw.c: perfmon2 context switch code | |
23141 | + * | |
23142 | + * This file implements the perfmon2 interface which | |
23143 | + * provides access to the hardware performance counters | |
23144 | + * of the host processor. | |
23145 | + * | |
23146 | + * The initial version of perfmon.c was written by | |
23147 | + * Ganesh Venkitachalam, IBM Corp. | |
23148 | + * | |
23149 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
23150 | + * David Mosberger, Hewlett Packard Co. | |
23151 | + * | |
23152 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
23153 | + * by Stephane Eranian, Hewlett Packard Co. | |
23154 | + * | |
23155 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
23156 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
23157 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
23158 | + * | |
23159 | + * More information about perfmon available at: | |
23160 | + * http://perfmon2.sf.net | |
23161 | + * | |
23162 | + * This program is free software; you can redistribute it and/or | |
23163 | + * modify it under the terms of version 2 of the GNU General Public | |
23164 | + * License as published by the Free Software Foundation. | |
23165 | + * | |
23166 | + * This program is distributed in the hope that it will be useful, | |
23167 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23168 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
23169 | + * General Public License for more details. | |
23170 | + * | |
23171 | + * You should have received a copy of the GNU General Public License | |
23172 | + * along with this program; if not, write to the Free Software | |
23173 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
23174 | + * 02111-1307 USA | |
23175 | + */ | |
23176 | +#include <linux/kernel.h> | |
23177 | +#include <linux/perfmon_kern.h> | |
23178 | +#include "perfmon_priv.h" | |
23179 | + | |
23180 | +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set) | |
23181 | +{ | |
23182 | + u64 val, ovfl_mask; | |
23183 | + u64 *used_pmds, *cnt_pmds; | |
23184 | + u16 i, num; | |
23185 | + | |
23186 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
23187 | + num = set->nused_pmds; | |
23188 | + cnt_pmds = ctx->regs.cnt_pmds; | |
23189 | + used_pmds = set->used_pmds; | |
23190 | + | |
23191 | + /* | |
23192 | + * save HW PMD, for counters, reconstruct 64-bit value | |
23193 | + */ | |
23194 | + for (i = 0; num; i++) { | |
23195 | + if (test_bit(i, cast_ulp(used_pmds))) { | |
23196 | + val = pfm_read_pmd(ctx, i); | |
23197 | + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) | |
23198 | + val = (set->pmds[i].value & ~ovfl_mask) | | |
23199 | + (val & ovfl_mask); | |
23200 | + set->pmds[i].value = val; | |
23201 | + num--; | |
23202 | + } | |
23203 | + } | |
23204 | + pfm_arch_clear_pmd_ovfl_cond(ctx, set); | |
23205 | +} | |
23206 | + | |
23207 | +/* | |
23208 | + * interrupts are disabled (no preemption) | |
23209 | + */ | |
23210 | +void __pfm_ctxswin_thread(struct task_struct *task, | |
23211 | + struct pfm_context *ctx, u64 now) | |
23212 | +{ | |
23213 | + u64 cur_act; | |
23214 | + struct pfm_event_set *set; | |
23215 | + int reload_pmcs, reload_pmds; | |
23216 | + int mycpu, is_active; | |
23217 | + | |
23218 | + mycpu = smp_processor_id(); | |
23219 | + | |
23220 | + cur_act = __get_cpu_var(pmu_activation_number); | |
23221 | + /* | |
23222 | + * we need to lock context because it could be accessed | |
23223 | + * from another CPU. Normally the schedule() functions | |
23224 | + * has masked interrupts which should be enough to | |
23225 | + * protect against PMU interrupts. | |
23226 | + */ | |
23227 | + spin_lock(&ctx->lock); | |
23228 | + | |
23229 | + is_active = pfm_arch_is_active(ctx); | |
23230 | + | |
23231 | + set = ctx->active_set; | |
23232 | + | |
23233 | + /* | |
23234 | + * in case fo zombie, we do not complete ctswin of the | |
23235 | + * PMU, and we force a call to pfm_handle_work() to finish | |
23236 | + * cleanup, i.e., free context + smpl_buff. The reason for | |
23237 | + * deferring to pfm_handle_work() is that it is not possible | |
23238 | + * to vfree() with interrupts disabled. | |
23239 | + */ | |
23240 | + if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) { | |
23241 | + pfm_post_work(task, ctx, PFM_WORK_ZOMBIE); | |
23242 | + goto done; | |
23243 | + } | |
23244 | + | |
23245 | + /* | |
23246 | + * if we were the last user of the PMU on that CPU, | |
23247 | + * then nothing to do except restore psr | |
23248 | + */ | |
23249 | + if (ctx->last_cpu == mycpu && ctx->last_act == cur_act) { | |
23250 | + /* | |
23251 | + * check for forced reload conditions | |
23252 | + */ | |
23253 | + reload_pmcs = set->priv_flags & PFM_SETFL_PRIV_MOD_PMCS; | |
23254 | + reload_pmds = set->priv_flags & PFM_SETFL_PRIV_MOD_PMDS; | |
23255 | + } else { | |
23256 | +#ifndef CONFIG_SMP | |
23257 | + pfm_check_save_prev_ctx(); | |
23258 | +#endif | |
23259 | + reload_pmcs = 1; | |
23260 | + reload_pmds = 1; | |
23261 | + } | |
23262 | + /* consumed */ | |
23263 | + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; | |
23264 | + | |
23265 | + if (reload_pmds) | |
23266 | + pfm_arch_restore_pmds(ctx, set); | |
23267 | + | |
23268 | + /* | |
23269 | + * need to check if had in-flight interrupt in | |
23270 | + * pfm_ctxswout_thread(). If at least one bit set, then we must replay | |
23271 | + * the interrupt to avoid losing some important performance data. | |
23272 | + * | |
23273 | + * npend_ovfls is cleared in interrupt handler | |
23274 | + */ | |
23275 | + if (set->npend_ovfls) { | |
23276 | + pfm_arch_resend_irq(ctx); | |
23277 | + pfm_stats_inc(ovfl_intr_replay_count); | |
23278 | + } | |
23279 | + | |
23280 | + if (reload_pmcs) | |
23281 | + pfm_arch_restore_pmcs(ctx, set); | |
23282 | + | |
23283 | + /* | |
23284 | + * record current activation for this context | |
23285 | + */ | |
23286 | + __get_cpu_var(pmu_activation_number)++; | |
23287 | + ctx->last_cpu = mycpu; | |
23288 | + ctx->last_act = __get_cpu_var(pmu_activation_number); | |
23289 | + | |
23290 | + /* | |
23291 | + * establish new ownership. | |
23292 | + */ | |
23293 | + pfm_set_pmu_owner(task, ctx); | |
23294 | + | |
23295 | + pfm_arch_ctxswin_thread(task, ctx); | |
23296 | + /* | |
23297 | + * set->duration does not count when context in MASKED state. | |
23298 | + * set->duration_start is reset in unmask_monitoring() | |
23299 | + */ | |
23300 | + set->duration_start = now; | |
23301 | + | |
23302 | + /* | |
23303 | + * re-arm switch timeout, if necessary | |
23304 | + * Timeout is active only if monitoring is active, | |
23305 | + * i.e., LOADED + started | |
23306 | + * | |
23307 | + * We reload the remainder timeout or the full timeout. | |
23308 | + * Remainder is recorded on context switch out or in | |
23309 | + * pfm_load_context() | |
23310 | + */ | |
23311 | + if (ctx->state == PFM_CTX_LOADED | |
23312 | + && (set->flags & PFM_SETFL_TIME_SWITCH) && is_active) { | |
23313 | + pfm_restart_timer(ctx, set); | |
23314 | + /* careful here as pfm_restart_timer may switch sets */ | |
23315 | + } | |
23316 | +done: | |
23317 | + spin_unlock(&ctx->lock); | |
23318 | +} | |
23319 | + | |
23320 | +/* | |
23321 | + * interrupts are masked, runqueue lock is held. | |
23322 | + * | |
23323 | + * In UP. we simply stop monitoring and leave the state | |
23324 | + * in place, i.e., lazy save | |
23325 | + */ | |
23326 | +void __pfm_ctxswout_thread(struct task_struct *task, | |
23327 | + struct pfm_context *ctx, u64 now) | |
23328 | +{ | |
23329 | + struct pfm_event_set *set; | |
23330 | + int need_save_pmds, is_active; | |
23331 | + | |
23332 | + /* | |
23333 | + * we need to lock context because it could be accessed | |
23334 | + * from another CPU. Normally the schedule() functions | |
23335 | + * has masked interrupts which should be enough to | |
23336 | + * protect against PMU interrupts. | |
23337 | + */ | |
23338 | + | |
23339 | + spin_lock(&ctx->lock); | |
23340 | + | |
23341 | + is_active = pfm_arch_is_active(ctx); | |
23342 | + set = ctx->active_set; | |
23343 | + | |
23344 | + /* | |
23345 | + * stop monitoring and | |
23346 | + * collect pending overflow information | |
23347 | + * needed on ctxswin. We cannot afford to lose | |
23348 | + * a PMU interrupt. | |
23349 | + */ | |
23350 | + need_save_pmds = pfm_arch_ctxswout_thread(task, ctx); | |
23351 | + | |
23352 | + if (ctx->state == PFM_CTX_LOADED) { | |
23353 | + /* | |
23354 | + * accumulate only when set is actively monitoring, | |
23355 | + */ | |
23356 | + set->duration += now - set->duration_start; | |
23357 | + | |
23358 | + /* | |
23359 | + * record remaining timeout | |
23360 | + * reload in pfm_ctxsw_in() | |
23361 | + */ | |
23362 | + if (is_active && (set->flags & PFM_SETFL_TIME_SWITCH)) { | |
23363 | + struct hrtimer *h = NULL; | |
23364 | + h = &__get_cpu_var(pfm_hrtimer); | |
23365 | + hrtimer_cancel(h); | |
23366 | + set->hrtimer_rem = hrtimer_get_remaining(h); | |
23367 | + PFM_DBG_ovfl("hrtimer=%lld", | |
23368 | + (long long)set->hrtimer_rem.tv64); | |
23369 | + } | |
23370 | + } | |
23371 | + | |
23372 | +#ifdef CONFIG_SMP | |
23373 | + /* | |
23374 | + * in SMP, release ownership of this PMU. | |
23375 | + * PMU interrupts are masked, so nothing | |
23376 | + * can happen. | |
23377 | + */ | |
23378 | + pfm_set_pmu_owner(NULL, NULL); | |
23379 | + | |
23380 | + /* | |
23381 | + * On some architectures, it is necessary to read the | |
23382 | + * PMD registers to check for pending overflow in | |
23383 | + * pfm_arch_ctxswout_thread(). In that case, saving of | |
23384 | + * the PMDs may be done there and not here. | |
23385 | + */ | |
23386 | + if (need_save_pmds) | |
23387 | + pfm_save_pmds(ctx, set); | |
23388 | +#endif | |
23389 | + spin_unlock(&ctx->lock); | |
23390 | +} | |
23391 | + | |
23392 | +/* | |
23393 | + * | |
23394 | + */ | |
23395 | +static void __pfm_ctxswout_sys(struct task_struct *prev, | |
23396 | + struct task_struct *next) | |
23397 | +{ | |
23398 | + struct pfm_context *ctx; | |
23399 | + | |
23400 | + ctx = __get_cpu_var(pmu_ctx); | |
23401 | + BUG_ON(!ctx); | |
23402 | + | |
23403 | + /* | |
23404 | + * propagate TIF_PERFMON_CTXSW to ensure that: | |
23405 | + * - previous task has TIF_PERFMON_CTXSW cleared, in case it is | |
23406 | + * scheduled onto another CPU where there is syswide monitoring | |
23407 | + * - next task has TIF_PERFMON_CTXSW set to ensure it will come back | |
23408 | + * here when context switched out | |
23409 | + */ | |
23410 | + clear_tsk_thread_flag(prev, TIF_PERFMON_CTXSW); | |
23411 | + set_tsk_thread_flag(next, TIF_PERFMON_CTXSW); | |
23412 | + | |
23413 | + /* | |
23414 | + * nothing to do until actually started | |
23415 | + * XXX: assumes no mean to start from user level | |
23416 | + */ | |
23417 | + if (!ctx->flags.started) | |
23418 | + return; | |
23419 | + | |
23420 | + pfm_arch_ctxswout_sys(prev, ctx); | |
23421 | +} | |
23422 | + | |
23423 | +/* | |
23424 | + * | |
23425 | + */ | |
23426 | +static void __pfm_ctxswin_sys(struct task_struct *prev, | |
23427 | + struct task_struct *next) | |
23428 | +{ | |
23429 | + struct pfm_context *ctx; | |
23430 | + | |
23431 | + ctx = __get_cpu_var(pmu_ctx); | |
23432 | + BUG_ON(!ctx); | |
23433 | + | |
23434 | + /* | |
23435 | + * nothing to do until actually started | |
23436 | + * XXX: assumes no mean to start from user level | |
23437 | + */ | |
23438 | + if (!ctx->flags.started) | |
23439 | + return; | |
23440 | + | |
23441 | + pfm_arch_ctxswin_sys(next, ctx); | |
23442 | +} | |
23443 | + | |
23444 | +void pfm_ctxsw_out(struct task_struct *prev, | |
23445 | + struct task_struct *next) | |
23446 | +{ | |
23447 | + struct pfm_context *ctxp; | |
23448 | + u64 now; | |
23449 | + | |
23450 | + now = sched_clock(); | |
23451 | + | |
23452 | + ctxp = prev->pfm_context; | |
23453 | + | |
23454 | + if (ctxp) | |
23455 | + __pfm_ctxswout_thread(prev, ctxp, now); | |
23456 | + else | |
23457 | + __pfm_ctxswout_sys(prev, next); | |
23458 | + | |
23459 | + pfm_stats_inc(ctxswout_count); | |
23460 | + pfm_stats_add(ctxswout_ns, sched_clock() - now); | |
23461 | +} | |
23462 | + | |
23463 | +void pfm_ctxsw_in(struct task_struct *prev, | |
23464 | + struct task_struct *next) | |
23465 | +{ | |
23466 | + struct pfm_context *ctxn; | |
23467 | + u64 now; | |
23468 | + | |
23469 | + now = sched_clock(); | |
23470 | + | |
23471 | + ctxn = next->pfm_context; | |
23472 | + | |
23473 | + if (ctxn) | |
23474 | + __pfm_ctxswin_thread(next, ctxn, now); | |
23475 | + else | |
23476 | + __pfm_ctxswin_sys(prev, next); | |
23477 | + | |
23478 | + pfm_stats_inc(ctxswin_count); | |
23479 | + pfm_stats_add(ctxswin_ns, sched_clock() - now); | |
23480 | +} | |
23481 | --- /dev/null | |
23482 | +++ b/perfmon/perfmon_debugfs.c | |
23483 | @@ -0,0 +1,168 @@ | |
23484 | +/* | |
23485 | + * perfmon_debugfs.c: perfmon2 statistics interface to debugfs | |
23486 | + * | |
23487 | + * This file implements the perfmon2 interface which | |
23488 | + * provides access to the hardware performance counters | |
23489 | + * of the host processor. | |
23490 | + * | |
23491 | + * The initial version of perfmon.c was written by | |
23492 | + * Ganesh Venkitachalam, IBM Corp. | |
23493 | + * | |
23494 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
23495 | + * David Mosberger, Hewlett Packard Co. | |
23496 | + * | |
23497 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
23498 | + * by Stephane Eranian, Hewlett Packard Co. | |
23499 | + * | |
23500 | + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. | |
23501 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
23502 | + * | |
23503 | + * More information about perfmon available at: | |
23504 | + * http://perfmon2.sf.net | |
23505 | + * | |
23506 | + * This program is free software; you can redistribute it and/or | |
23507 | + * modify it under the terms of version 2 of the GNU General Public | |
23508 | + * License as published by the Free Software Foundation. | |
23509 | + * | |
23510 | + * This program is distributed in the hope that it will be useful, | |
23511 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23512 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
23513 | + * General Public License for more details. | |
23514 | + * | |
23515 | + * You should have received a copy of the GNU General Public License | |
23516 | + * along with this program; if not, write to the Free Software | |
23517 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
23518 | + * 02111-1307 USA | |
23519 | + */ | |
23520 | +#include <linux/kernel.h> | |
23521 | +#include <linux/debugfs.h> | |
23522 | +#include <linux/perfmon_kern.h> | |
23523 | + | |
23524 | +/* | |
23525 | + * to make the statistics visible to user space: | |
23526 | + * $ mount -t debugfs none /mnt | |
23527 | + * $ cd /mnt/perfmon | |
23528 | + * then choose a CPU subdir | |
23529 | + */ | |
23530 | +DECLARE_PER_CPU(struct pfm_stats, pfm_stats); | |
23531 | + | |
23532 | +static struct dentry *pfm_debugfs_dir; | |
23533 | + | |
23534 | +void pfm_reset_stats(int cpu) | |
23535 | +{ | |
23536 | + struct pfm_stats *st; | |
23537 | + unsigned long flags; | |
23538 | + | |
23539 | + st = &per_cpu(pfm_stats, cpu); | |
23540 | + | |
23541 | + local_irq_save(flags); | |
23542 | + memset(st->v, 0, sizeof(st->v)); | |
23543 | + local_irq_restore(flags); | |
23544 | +} | |
23545 | + | |
23546 | +static const char *pfm_stats_strs[] = { | |
23547 | + "ovfl_intr_all_count", | |
23548 | + "ovfl_intr_ns", | |
23549 | + "ovfl_intr_spurious_count", | |
23550 | + "ovfl_intr_replay_count", | |
23551 | + "ovfl_intr_regular_count", | |
23552 | + "handle_work_count", | |
23553 | + "ovfl_notify_count", | |
23554 | + "reset_pmds_count", | |
23555 | + "pfm_restart_count", | |
23556 | + "fmt_handler_calls", | |
23557 | + "fmt_handler_ns", | |
23558 | + "set_switch_count", | |
23559 | + "set_switch_ns", | |
23560 | + "set_switch_exp", | |
23561 | + "ctxswin_count", | |
23562 | + "ctxswin_ns", | |
23563 | + "handle_timeout_count", | |
23564 | + "ovfl_intr_nmi_count", | |
23565 | + "ctxswout_count", | |
23566 | + "ctxswout_ns", | |
23567 | +}; | |
23568 | +#define PFM_NUM_STRS ARRAY_SIZE(pfm_stats_strs) | |
23569 | + | |
23570 | +void pfm_debugfs_del_cpu(int cpu) | |
23571 | +{ | |
23572 | + struct pfm_stats *st; | |
23573 | + int i; | |
23574 | + | |
23575 | + st = &per_cpu(pfm_stats, cpu); | |
23576 | + | |
23577 | + for (i = 0; i < PFM_NUM_STATS; i++) { | |
23578 | + if (st->dirs[i]) | |
23579 | + debugfs_remove(st->dirs[i]); | |
23580 | + st->dirs[i] = NULL; | |
23581 | + } | |
23582 | + if (st->cpu_dir) | |
23583 | + debugfs_remove(st->cpu_dir); | |
23584 | + st->cpu_dir = NULL; | |
23585 | +} | |
23586 | + | |
23587 | +int pfm_debugfs_add_cpu(int cpu) | |
23588 | +{ | |
23589 | + struct pfm_stats *st; | |
23590 | + int i; | |
23591 | + | |
23592 | + /* | |
23593 | + * sanity check between stats names and the number | |
23594 | + * of entries in the pfm_stats value array. | |
23595 | + */ | |
23596 | + if (PFM_NUM_STRS != PFM_NUM_STATS) { | |
23597 | + PFM_ERR("PFM_NUM_STRS != PFM_NUM_STATS error"); | |
23598 | + return -1; | |
23599 | + } | |
23600 | + | |
23601 | + st = &per_cpu(pfm_stats, cpu); | |
23602 | + sprintf(st->cpu_name, "cpu%d", cpu); | |
23603 | + | |
23604 | + st->cpu_dir = debugfs_create_dir(st->cpu_name, pfm_debugfs_dir); | |
23605 | + if (!st->cpu_dir) | |
23606 | + return -1; | |
23607 | + | |
23608 | + for (i = 0; i < PFM_NUM_STATS; i++) { | |
23609 | + st->dirs[i] = debugfs_create_u64(pfm_stats_strs[i], | |
23610 | + S_IRUGO, | |
23611 | + st->cpu_dir, | |
23612 | + &st->v[i]); | |
23613 | + if (!st->dirs[i]) | |
23614 | + goto error; | |
23615 | + } | |
23616 | + pfm_reset_stats(cpu); | |
23617 | + return 0; | |
23618 | +error: | |
23619 | + while (i >= 0) { | |
23620 | + debugfs_remove(st->dirs[i]); | |
23621 | + i--; | |
23622 | + } | |
23623 | + debugfs_remove(st->cpu_dir); | |
23624 | + return -1; | |
23625 | +} | |
23626 | + | |
23627 | +/* | |
23628 | + * called once from pfm_init() | |
23629 | + */ | |
23630 | +int __init pfm_init_debugfs(void) | |
23631 | +{ | |
23632 | + int cpu1, cpu2, ret; | |
23633 | + | |
23634 | + pfm_debugfs_dir = debugfs_create_dir("perfmon", NULL); | |
23635 | + if (!pfm_debugfs_dir) | |
23636 | + return -1; | |
23637 | + | |
23638 | + for_each_online_cpu(cpu1) { | |
23639 | + ret = pfm_debugfs_add_cpu(cpu1); | |
23640 | + if (ret) | |
23641 | + goto error; | |
23642 | + } | |
23643 | + return 0; | |
23644 | +error: | |
23645 | + for_each_online_cpu(cpu2) { | |
23646 | + if (cpu2 == cpu1) | |
23647 | + break; | |
23648 | + pfm_debugfs_del_cpu(cpu2); | |
23649 | + } | |
23650 | + return -1; | |
23651 | +} | |
23652 | --- /dev/null | |
23653 | +++ b/perfmon/perfmon_dfl_smpl.c | |
23654 | @@ -0,0 +1,298 @@ | |
23655 | +/* | |
23656 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
23657 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
23658 | + * | |
23659 | + * This file implements the new default sampling buffer format | |
23660 | + * for the perfmon2 subsystem. | |
23661 | + * | |
23662 | + * This program is free software; you can redistribute it and/or | |
23663 | + * modify it under the terms of version 2 of the GNU General Public | |
23664 | + * License as published by the Free Software Foundation. | |
23665 | + * | |
23666 | + * This program is distributed in the hope that it will be useful, | |
23667 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23668 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
23669 | + * General Public License for more details. | |
23670 | + * | |
23671 | + * You should have received a copy of the GNU General Public License | |
23672 | + * along with this program; if not, write to the Free Software | |
23673 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
23674 | + * 02111-1307 USA | |
23675 | + */ | |
23676 | +#include <linux/kernel.h> | |
23677 | +#include <linux/types.h> | |
23678 | +#include <linux/module.h> | |
23679 | +#include <linux/init.h> | |
23680 | +#include <linux/smp.h> | |
23681 | + | |
23682 | +#include <linux/perfmon_kern.h> | |
23683 | +#include <linux/perfmon_dfl_smpl.h> | |
23684 | + | |
23685 | +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | |
23686 | +MODULE_DESCRIPTION("new perfmon default sampling format"); | |
23687 | +MODULE_LICENSE("GPL"); | |
23688 | + | |
23689 | +static int pfm_dfl_fmt_validate(u32 ctx_flags, u16 npmds, void *data) | |
23690 | +{ | |
23691 | + struct pfm_dfl_smpl_arg *arg = data; | |
23692 | + u64 min_buf_size; | |
23693 | + | |
23694 | + if (data == NULL) { | |
23695 | + PFM_DBG("no argument passed"); | |
23696 | + return -EINVAL; | |
23697 | + } | |
23698 | + | |
23699 | + /* | |
23700 | + * sanity check in case size_t is smaller then u64 | |
23701 | + */ | |
23702 | +#if BITS_PER_LONG == 4 | |
23703 | +#define MAX_SIZE_T (1ULL<<(sizeof(size_t)<<3)) | |
23704 | + if (sizeof(size_t) < sizeof(arg->buf_size)) { | |
23705 | + if (arg->buf_size >= MAX_SIZE_T) | |
23706 | + return -ETOOBIG; | |
23707 | + } | |
23708 | +#endif | |
23709 | + | |
23710 | + /* | |
23711 | + * compute min buf size. npmds is the maximum number | |
23712 | + * of implemented PMD registers. | |
23713 | + */ | |
23714 | + min_buf_size = sizeof(struct pfm_dfl_smpl_hdr) | |
23715 | + + (sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64))); | |
23716 | + | |
23717 | + PFM_DBG("validate ctx_flags=0x%x flags=0x%x npmds=%u " | |
23718 | + "min_buf_size=%llu buf_size=%llu\n", | |
23719 | + ctx_flags, | |
23720 | + arg->buf_flags, | |
23721 | + npmds, | |
23722 | + (unsigned long long)min_buf_size, | |
23723 | + (unsigned long long)arg->buf_size); | |
23724 | + | |
23725 | + /* | |
23726 | + * must hold at least the buffer header + one minimally sized entry | |
23727 | + */ | |
23728 | + if (arg->buf_size < min_buf_size) | |
23729 | + return -EINVAL; | |
23730 | + | |
23731 | + return 0; | |
23732 | +} | |
23733 | + | |
23734 | +static int pfm_dfl_fmt_get_size(u32 flags, void *data, size_t *size) | |
23735 | +{ | |
23736 | + struct pfm_dfl_smpl_arg *arg = data; | |
23737 | + | |
23738 | + /* | |
23739 | + * size has been validated in default_validate | |
23740 | + * we can never loose bits from buf_size. | |
23741 | + */ | |
23742 | + *size = (size_t)arg->buf_size; | |
23743 | + | |
23744 | + return 0; | |
23745 | +} | |
23746 | + | |
23747 | +static int pfm_dfl_fmt_init(struct pfm_context *ctx, void *buf, u32 ctx_flags, | |
23748 | + u16 npmds, void *data) | |
23749 | +{ | |
23750 | + struct pfm_dfl_smpl_hdr *hdr; | |
23751 | + struct pfm_dfl_smpl_arg *arg = data; | |
23752 | + | |
23753 | + hdr = buf; | |
23754 | + | |
23755 | + hdr->hdr_version = PFM_DFL_SMPL_VERSION; | |
23756 | + hdr->hdr_buf_size = arg->buf_size; | |
23757 | + hdr->hdr_buf_flags = arg->buf_flags; | |
23758 | + hdr->hdr_cur_offs = sizeof(*hdr); | |
23759 | + hdr->hdr_overflows = 0; | |
23760 | + hdr->hdr_count = 0; | |
23761 | + hdr->hdr_min_buf_space = sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64)); | |
23762 | + /* | |
23763 | + * due to cache aliasing, it may be necessary to flush the cache | |
23764 | + * on certain architectures (e.g., MIPS) | |
23765 | + */ | |
23766 | + pfm_cacheflush(hdr, sizeof(*hdr)); | |
23767 | + | |
23768 | + PFM_DBG("buffer=%p buf_size=%llu hdr_size=%zu hdr_version=%u.%u " | |
23769 | + "min_space=%llu npmds=%u", | |
23770 | + buf, | |
23771 | + (unsigned long long)hdr->hdr_buf_size, | |
23772 | + sizeof(*hdr), | |
23773 | + PFM_VERSION_MAJOR(hdr->hdr_version), | |
23774 | + PFM_VERSION_MINOR(hdr->hdr_version), | |
23775 | + (unsigned long long)hdr->hdr_min_buf_space, | |
23776 | + npmds); | |
23777 | + | |
23778 | + return 0; | |
23779 | +} | |
23780 | + | |
23781 | +/* | |
23782 | + * called from pfm_overflow_handler() to record a new sample | |
23783 | + * | |
23784 | + * context is locked, interrupts are disabled (no preemption) | |
23785 | + */ | |
23786 | +static int pfm_dfl_fmt_handler(struct pfm_context *ctx, | |
23787 | + unsigned long ip, u64 tstamp, void *data) | |
23788 | +{ | |
23789 | + struct pfm_dfl_smpl_hdr *hdr; | |
23790 | + struct pfm_dfl_smpl_entry *ent; | |
23791 | + struct pfm_ovfl_arg *arg; | |
23792 | + void *cur, *last; | |
23793 | + u64 *e; | |
23794 | + size_t entry_size, min_size; | |
23795 | + u16 npmds, i; | |
23796 | + u16 ovfl_pmd; | |
23797 | + void *buf; | |
23798 | + | |
23799 | + hdr = ctx->smpl_addr; | |
23800 | + arg = &ctx->ovfl_arg; | |
23801 | + | |
23802 | + buf = hdr; | |
23803 | + cur = buf+hdr->hdr_cur_offs; | |
23804 | + last = buf+hdr->hdr_buf_size; | |
23805 | + ovfl_pmd = arg->ovfl_pmd; | |
23806 | + min_size = hdr->hdr_min_buf_space; | |
23807 | + | |
23808 | + /* | |
23809 | + * precheck for sanity | |
23810 | + */ | |
23811 | + if ((last - cur) < min_size) | |
23812 | + goto full; | |
23813 | + | |
23814 | + npmds = arg->num_smpl_pmds; | |
23815 | + | |
23816 | + ent = (struct pfm_dfl_smpl_entry *)cur; | |
23817 | + | |
23818 | + entry_size = sizeof(*ent) + (npmds << 3); | |
23819 | + | |
23820 | + /* position for first pmd */ | |
23821 | + e = (u64 *)(ent+1); | |
23822 | + | |
23823 | + hdr->hdr_count++; | |
23824 | + | |
23825 | + PFM_DBG_ovfl("count=%llu cur=%p last=%p free_bytes=%zu ovfl_pmd=%d " | |
23826 | + "npmds=%u", | |
23827 | + (unsigned long long)hdr->hdr_count, | |
23828 | + cur, last, | |
23829 | + (last-cur), | |
23830 | + ovfl_pmd, | |
23831 | + npmds); | |
23832 | + | |
23833 | + /* | |
23834 | + * current = task running at the time of the overflow. | |
23835 | + * | |
23836 | + * per-task mode: | |
23837 | + * - this is usually the task being monitored. | |
23838 | + * Under certain conditions, it might be a different task | |
23839 | + * | |
23840 | + * system-wide: | |
23841 | + * - this is not necessarily the task controlling the session | |
23842 | + */ | |
23843 | + ent->pid = current->pid; | |
23844 | + ent->ovfl_pmd = ovfl_pmd; | |
23845 | + ent->last_reset_val = arg->pmd_last_reset; | |
23846 | + | |
23847 | + /* | |
23848 | + * where did the fault happen (includes slot number) | |
23849 | + */ | |
23850 | + ent->ip = ip; | |
23851 | + | |
23852 | + ent->tstamp = tstamp; | |
23853 | + ent->cpu = smp_processor_id(); | |
23854 | + ent->set = arg->active_set; | |
23855 | + ent->tgid = current->tgid; | |
23856 | + | |
23857 | + /* | |
23858 | + * selectively store PMDs in increasing index number | |
23859 | + */ | |
23860 | + if (npmds) { | |
23861 | + u64 *val = arg->smpl_pmds_values; | |
23862 | + for (i = 0; i < npmds; i++) | |
23863 | + *e++ = *val++; | |
23864 | + } | |
23865 | + | |
23866 | + /* | |
23867 | + * update position for next entry | |
23868 | + */ | |
23869 | + hdr->hdr_cur_offs += entry_size; | |
23870 | + cur += entry_size; | |
23871 | + | |
23872 | + pfm_cacheflush(hdr, sizeof(*hdr)); | |
23873 | + pfm_cacheflush(ent, entry_size); | |
23874 | + | |
23875 | + /* | |
23876 | + * post check to avoid losing the last sample | |
23877 | + */ | |
23878 | + if ((last - cur) < min_size) | |
23879 | + goto full; | |
23880 | + | |
23881 | + /* reset before returning from interrupt handler */ | |
23882 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
23883 | + | |
23884 | + return 0; | |
23885 | +full: | |
23886 | + PFM_DBG_ovfl("sampling buffer full free=%zu, count=%llu", | |
23887 | + last-cur, | |
23888 | + (unsigned long long)hdr->hdr_count); | |
23889 | + | |
23890 | + /* | |
23891 | + * increment number of buffer overflows. | |
23892 | + * important to detect duplicate set of samples. | |
23893 | + */ | |
23894 | + hdr->hdr_overflows++; | |
23895 | + | |
23896 | + /* | |
23897 | + * request notification and masking of monitoring. | |
23898 | + * Notification is still subject to the overflowed | |
23899 | + * register having the FL_NOTIFY flag set. | |
23900 | + */ | |
23901 | + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; | |
23902 | + | |
23903 | + return -ENOBUFS; /* we are full, sorry */ | |
23904 | +} | |
23905 | + | |
23906 | +static int pfm_dfl_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf) | |
23907 | +{ | |
23908 | + struct pfm_dfl_smpl_hdr *hdr; | |
23909 | + | |
23910 | + hdr = buf; | |
23911 | + | |
23912 | + hdr->hdr_count = 0; | |
23913 | + hdr->hdr_cur_offs = sizeof(*hdr); | |
23914 | + | |
23915 | + pfm_cacheflush(hdr, sizeof(*hdr)); | |
23916 | + | |
23917 | + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; | |
23918 | + | |
23919 | + return 0; | |
23920 | +} | |
23921 | + | |
23922 | +static int pfm_dfl_fmt_exit(void *buf) | |
23923 | +{ | |
23924 | + return 0; | |
23925 | +} | |
23926 | + | |
23927 | +static struct pfm_smpl_fmt dfl_fmt = { | |
23928 | + .fmt_name = "default", | |
23929 | + .fmt_version = 0x10000, | |
23930 | + .fmt_arg_size = sizeof(struct pfm_dfl_smpl_arg), | |
23931 | + .fmt_validate = pfm_dfl_fmt_validate, | |
23932 | + .fmt_getsize = pfm_dfl_fmt_get_size, | |
23933 | + .fmt_init = pfm_dfl_fmt_init, | |
23934 | + .fmt_handler = pfm_dfl_fmt_handler, | |
23935 | + .fmt_restart = pfm_dfl_fmt_restart, | |
23936 | + .fmt_exit = pfm_dfl_fmt_exit, | |
23937 | + .fmt_flags = PFM_FMT_BUILTIN_FLAG, | |
23938 | + .owner = THIS_MODULE | |
23939 | +}; | |
23940 | + | |
23941 | +static int pfm_dfl_fmt_init_module(void) | |
23942 | +{ | |
23943 | + return pfm_fmt_register(&dfl_fmt); | |
23944 | +} | |
23945 | + | |
23946 | +static void pfm_dfl_fmt_cleanup_module(void) | |
23947 | +{ | |
23948 | + pfm_fmt_unregister(&dfl_fmt); | |
23949 | +} | |
23950 | + | |
23951 | +module_init(pfm_dfl_fmt_init_module); | |
23952 | +module_exit(pfm_dfl_fmt_cleanup_module); | |
23953 | --- /dev/null | |
23954 | +++ b/perfmon/perfmon_file.c | |
23955 | @@ -0,0 +1,751 @@ | |
23956 | +/* | |
23957 | + * perfmon_file.c: perfmon2 file input/output functions | |
23958 | + * | |
23959 | + * This file implements the perfmon2 interface which | |
23960 | + * provides access to the hardware performance counters | |
23961 | + * of the host processor. | |
23962 | + * | |
23963 | + * The initial version of perfmon.c was written by | |
23964 | + * Ganesh Venkitachalam, IBM Corp. | |
23965 | + * | |
23966 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
23967 | + * David Mosberger, Hewlett Packard Co. | |
23968 | + * | |
23969 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
23970 | + * by Stephane Eranian, Hewlett Packard Co. | |
23971 | + * | |
23972 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
23973 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
23974 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
23975 | + * | |
23976 | + * More information about perfmon available at: | |
23977 | + * http://perfmon2.sf.net | |
23978 | + * | |
23979 | + * This program is free software; you can redistribute it and/or | |
23980 | + * modify it under the terms of version 2 of the GNU General Public | |
23981 | + * License as published by the Free Software Foundation. | |
23982 | + * | |
23983 | + * This program is distributed in the hope that it will be useful, | |
23984 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23985 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
23986 | + * General Public License for more details. | |
23987 | + * | |
23988 | + * You should have received a copy of the GNU General Public License | |
23989 | + * along with this program; if not, write to the Free Software | |
23990 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
23991 | + * 02111-1307 USA | |
23992 | + */ | |
23993 | +#include <linux/kernel.h> | |
23994 | +#include <linux/module.h> | |
23995 | +#include <linux/file.h> | |
23996 | +#include <linux/poll.h> | |
23997 | +#include <linux/vfs.h> | |
23998 | +#include <linux/pagemap.h> | |
23999 | +#include <linux/mount.h> | |
24000 | +#include <linux/perfmon_kern.h> | |
24001 | +#include "perfmon_priv.h" | |
24002 | + | |
24003 | +#define PFMFS_MAGIC 0xa0b4d889 /* perfmon filesystem magic number */ | |
24004 | + | |
24005 | +struct pfm_controls pfm_controls = { | |
24006 | + .sys_group = PFM_GROUP_PERM_ANY, | |
24007 | + .task_group = PFM_GROUP_PERM_ANY, | |
24008 | + .arg_mem_max = PAGE_SIZE, | |
24009 | + .smpl_buffer_mem_max = ~0, | |
24010 | +}; | |
24011 | +EXPORT_SYMBOL(pfm_controls); | |
24012 | + | |
24013 | +static int __init enable_debug(char *str) | |
24014 | +{ | |
24015 | + pfm_controls.debug = 1; | |
24016 | + PFM_INFO("debug output enabled\n"); | |
24017 | + return 1; | |
24018 | +} | |
24019 | +__setup("perfmon_debug", enable_debug); | |
24020 | + | |
24021 | +static int pfmfs_delete_dentry(struct dentry *dentry) | |
24022 | +{ | |
24023 | + return 1; | |
24024 | +} | |
24025 | + | |
24026 | +static struct dentry_operations pfmfs_dentry_operations = { | |
24027 | + .d_delete = pfmfs_delete_dentry, | |
24028 | +}; | |
24029 | + | |
24030 | +int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf) | |
24031 | +{ | |
24032 | + void *kaddr; | |
24033 | + unsigned long address; | |
24034 | + struct pfm_context *ctx; | |
24035 | + size_t size; | |
24036 | + | |
24037 | + address = (unsigned long)vmf->virtual_address; | |
24038 | + | |
24039 | + ctx = vma->vm_private_data; | |
24040 | + if (ctx == NULL) { | |
24041 | + PFM_DBG("no ctx"); | |
24042 | + return VM_FAULT_SIGBUS; | |
24043 | + } | |
24044 | + /* | |
24045 | + * size available to user (maybe different from real_smpl_size | |
24046 | + */ | |
24047 | + size = ctx->smpl_size; | |
24048 | + | |
24049 | + if ((address < vma->vm_start) || | |
24050 | + (address >= (vma->vm_start + size))) | |
24051 | + return VM_FAULT_SIGBUS; | |
24052 | + | |
24053 | + kaddr = ctx->smpl_addr + (address - vma->vm_start); | |
24054 | + | |
24055 | + vmf->page = vmalloc_to_page(kaddr); | |
24056 | + get_page(vmf->page); | |
24057 | + | |
24058 | + PFM_DBG("[%d] start=%p ref_count=%d", | |
24059 | + current->pid, | |
24060 | + kaddr, page_count(vmf->page)); | |
24061 | + | |
24062 | + return 0; | |
24063 | +} | |
24064 | + | |
24065 | +/* | |
24066 | + * we need to determine whther or not we are closing the last reference | |
24067 | + * to the file and thus are going to end up in pfm_close() which eventually | |
24068 | + * calls pfm_release_buf_space(). In that function, we update the accouting | |
24069 | + * for locked_vm given that we are actually freeing the sampling buffer. The | |
24070 | + * issue is that there are multiple paths leading to pfm_release_buf_space(), | |
24071 | + * from exit(), munmap(), close(). The path coming from munmap() is problematic | |
24072 | + * becuse do_munmap() grabs mmap_sem in write-mode which is also what | |
24073 | + * pfm_release_buf_space does. To avoid deadlock, we need to determine where | |
24074 | + * we are calling from and skip the locking. The vm_ops->close() callback | |
24075 | + * is invoked for each remove_vma() independently of the number of references | |
24076 | + * left on the file descriptor, therefore simple reference counter does not | |
24077 | + * work. We need to determine if this is the last call, and then set a flag | |
24078 | + * to skip the locking. | |
24079 | + */ | |
24080 | +static void pfm_buf_map_close(struct vm_area_struct *vma) | |
24081 | +{ | |
24082 | + struct file *file; | |
24083 | + struct pfm_context *ctx; | |
24084 | + | |
24085 | + file = vma->vm_file; | |
24086 | + ctx = vma->vm_private_data; | |
24087 | + | |
24088 | + /* | |
24089 | + * if file is going to close, then pfm_close() will | |
24090 | + * be called, do not lock in pfm_release_buf | |
24091 | + */ | |
24092 | + if (atomic_read(&file->f_count) == 1) | |
24093 | + ctx->flags.mmap_nlock = 1; | |
24094 | +} | |
24095 | + | |
24096 | +/* | |
24097 | + * we do not have a close callback because, the locked | |
24098 | + * memory accounting must be done when the actual buffer | |
24099 | + * is freed. Munmap does not free the page backing the vma | |
24100 | + * because they may still be in use by the PMU interrupt handler. | |
24101 | + */ | |
24102 | +struct vm_operations_struct pfm_buf_map_vm_ops = { | |
24103 | + .fault = pfm_buf_map_pagefault, | |
24104 | + .close = pfm_buf_map_close | |
24105 | +}; | |
24106 | + | |
24107 | +static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma, | |
24108 | + size_t size) | |
24109 | +{ | |
24110 | + if (ctx->smpl_addr == NULL) { | |
24111 | + PFM_DBG("no sampling buffer to map"); | |
24112 | + return -EINVAL; | |
24113 | + } | |
24114 | + | |
24115 | + if (size > ctx->smpl_size) { | |
24116 | + PFM_DBG("mmap size=%zu >= actual buf size=%zu", | |
24117 | + size, | |
24118 | + ctx->smpl_size); | |
24119 | + return -EINVAL; | |
24120 | + } | |
24121 | + | |
24122 | + vma->vm_ops = &pfm_buf_map_vm_ops; | |
24123 | + vma->vm_private_data = ctx; | |
24124 | + | |
24125 | + return 0; | |
24126 | +} | |
24127 | + | |
24128 | +static int pfm_mmap(struct file *file, struct vm_area_struct *vma) | |
24129 | +{ | |
24130 | + size_t size; | |
24131 | + struct pfm_context *ctx; | |
24132 | + unsigned long flags; | |
24133 | + int ret; | |
24134 | + | |
24135 | + PFM_DBG("pfm_file_ops"); | |
24136 | + | |
24137 | + ctx = file->private_data; | |
24138 | + size = (vma->vm_end - vma->vm_start); | |
24139 | + | |
24140 | + if (ctx == NULL) | |
24141 | + return -EINVAL; | |
24142 | + | |
24143 | + ret = -EINVAL; | |
24144 | + | |
24145 | + spin_lock_irqsave(&ctx->lock, flags); | |
24146 | + | |
24147 | + if (vma->vm_flags & VM_WRITE) { | |
24148 | + PFM_DBG("cannot map buffer for writing"); | |
24149 | + goto done; | |
24150 | + } | |
24151 | + | |
24152 | + PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx", | |
24153 | + vma->vm_pgoff, | |
24154 | + size, | |
24155 | + vma->vm_start); | |
24156 | + | |
24157 | + ret = pfm_mmap_buffer(ctx, vma, size); | |
24158 | + if (ret == 0) | |
24159 | + vma->vm_flags |= VM_RESERVED; | |
24160 | + | |
24161 | + PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu", | |
24162 | + ret, | |
24163 | + vma->vm_flags, | |
24164 | + vma->vm_start, | |
24165 | + vma->vm_end-vma->vm_start); | |
24166 | +done: | |
24167 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24168 | + | |
24169 | + return ret; | |
24170 | +} | |
24171 | + | |
24172 | +/* | |
24173 | + * Extract one message from queue. | |
24174 | + * | |
24175 | + * return: | |
24176 | + * -EAGAIN: when non-blocking and nothing is* in the queue. | |
24177 | + * -ERESTARTSYS: when blocking and signal is pending | |
24178 | + * Otherwise returns size of message (sizeof(pfarg_msg)) | |
24179 | + */ | |
24180 | +ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block) | |
24181 | +{ | |
24182 | + ssize_t ret = 0; | |
24183 | + unsigned long flags; | |
24184 | + DECLARE_WAITQUEUE(wait, current); | |
24185 | + | |
24186 | + /* | |
24187 | + * we must masks interrupts to avoid a race condition | |
24188 | + * with the PMU interrupt handler. | |
24189 | + */ | |
24190 | + spin_lock_irqsave(&ctx->lock, flags); | |
24191 | + | |
24192 | + while (pfm_msgq_is_empty(ctx)) { | |
24193 | + | |
24194 | + /* | |
24195 | + * handle non-blocking reads | |
24196 | + * return -EAGAIN | |
24197 | + */ | |
24198 | + ret = -EAGAIN; | |
24199 | + if (non_block) | |
24200 | + break; | |
24201 | + | |
24202 | + add_wait_queue(&ctx->msgq_wait, &wait); | |
24203 | + set_current_state(TASK_INTERRUPTIBLE); | |
24204 | + | |
24205 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24206 | + | |
24207 | + schedule(); | |
24208 | + | |
24209 | + /* | |
24210 | + * during this window, another thread may call | |
24211 | + * pfm_read() and steal our message | |
24212 | + */ | |
24213 | + | |
24214 | + spin_lock_irqsave(&ctx->lock, flags); | |
24215 | + | |
24216 | + remove_wait_queue(&ctx->msgq_wait, &wait); | |
24217 | + set_current_state(TASK_RUNNING); | |
24218 | + | |
24219 | + /* | |
24220 | + * check for pending signals | |
24221 | + * return -ERESTARTSYS | |
24222 | + */ | |
24223 | + ret = -ERESTARTSYS; | |
24224 | + if (signal_pending(current)) | |
24225 | + break; | |
24226 | + | |
24227 | + /* | |
24228 | + * we may have a message | |
24229 | + */ | |
24230 | + ret = 0; | |
24231 | + } | |
24232 | + | |
24233 | + /* | |
24234 | + * extract message | |
24235 | + */ | |
24236 | + if (ret == 0) { | |
24237 | + /* | |
24238 | + * copy the oldest message into msg_buf. | |
24239 | + * We cannot directly call copy_to_user() | |
24240 | + * because interrupts masked. This is done | |
24241 | + * in the caller | |
24242 | + */ | |
24243 | + pfm_get_next_msg(ctx, msg_buf); | |
24244 | + | |
24245 | + ret = sizeof(*msg_buf); | |
24246 | + | |
24247 | + PFM_DBG("extracted type=%d", msg_buf->type); | |
24248 | + } | |
24249 | + | |
24250 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24251 | + | |
24252 | + PFM_DBG("blocking=%d ret=%zd", non_block, ret); | |
24253 | + | |
24254 | + return ret; | |
24255 | +} | |
24256 | + | |
24257 | +static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size, | |
24258 | + loff_t *ppos) | |
24259 | +{ | |
24260 | + struct pfm_context *ctx; | |
24261 | + union pfarg_msg msg_buf; | |
24262 | + int non_block, ret; | |
24263 | + | |
24264 | + PFM_DBG_ovfl("buf=%p size=%zu", buf, size); | |
24265 | + | |
24266 | + ctx = filp->private_data; | |
24267 | + if (ctx == NULL) { | |
24268 | + PFM_ERR("no ctx for pfm_read"); | |
24269 | + return -EINVAL; | |
24270 | + } | |
24271 | + | |
24272 | + non_block = filp->f_flags & O_NONBLOCK; | |
24273 | + | |
24274 | +#ifdef CONFIG_IA64_PERFMON_COMPAT | |
24275 | + /* | |
24276 | + * detect IA-64 v2.0 context read (message size is different) | |
24277 | + * nops on all other architectures | |
24278 | + */ | |
24279 | + if (unlikely(ctx->flags.ia64_v20_compat)) | |
24280 | + return pfm_arch_compat_read(ctx, buf, non_block, size); | |
24281 | +#endif | |
24282 | + /* | |
24283 | + * cannot extract partial messages. | |
24284 | + * check even when there is no message | |
24285 | + * | |
24286 | + * cannot extract more than one message per call. Bytes | |
24287 | + * above sizeof(msg) are ignored. | |
24288 | + */ | |
24289 | + if (size < sizeof(msg_buf)) { | |
24290 | + PFM_DBG("message is too small size=%zu must be >=%zu)", | |
24291 | + size, | |
24292 | + sizeof(msg_buf)); | |
24293 | + return -EINVAL; | |
24294 | + } | |
24295 | + | |
24296 | + ret = __pfm_read(ctx, &msg_buf, non_block); | |
24297 | + if (ret > 0) { | |
24298 | + if (copy_to_user(buf, &msg_buf, sizeof(msg_buf))) | |
24299 | + ret = -EFAULT; | |
24300 | + } | |
24301 | + PFM_DBG_ovfl("ret=%d", ret); | |
24302 | + return ret; | |
24303 | +} | |
24304 | + | |
24305 | +static ssize_t pfm_write(struct file *file, const char __user *ubuf, | |
24306 | + size_t size, loff_t *ppos) | |
24307 | +{ | |
24308 | + PFM_DBG("pfm_write called"); | |
24309 | + return -EINVAL; | |
24310 | +} | |
24311 | + | |
24312 | +static unsigned int pfm_poll(struct file *filp, poll_table *wait) | |
24313 | +{ | |
24314 | + struct pfm_context *ctx; | |
24315 | + unsigned long flags; | |
24316 | + unsigned int mask = 0; | |
24317 | + | |
24318 | + PFM_DBG("pfm_file_ops"); | |
24319 | + | |
24320 | + if (filp->f_op != &pfm_file_ops) { | |
24321 | + PFM_ERR("pfm_poll bad magic"); | |
24322 | + return 0; | |
24323 | + } | |
24324 | + | |
24325 | + ctx = filp->private_data; | |
24326 | + if (ctx == NULL) { | |
24327 | + PFM_ERR("pfm_poll no ctx"); | |
24328 | + return 0; | |
24329 | + } | |
24330 | + | |
24331 | + PFM_DBG("before poll_wait"); | |
24332 | + | |
24333 | + poll_wait(filp, &ctx->msgq_wait, wait); | |
24334 | + | |
24335 | + /* | |
24336 | + * pfm_msgq_is_empty() is non-atomic | |
24337 | + * | |
24338 | + * filp is protected by fget() at upper level | |
24339 | + * context cannot be closed by another thread. | |
24340 | + * | |
24341 | + * There may be a race with a PMU interrupt adding | |
24342 | + * messages to the queue. But we are interested in | |
24343 | + * queue not empty, so adding more messages should | |
24344 | + * not really be a problem. | |
24345 | + * | |
24346 | + * There may be a race with another thread issuing | |
24347 | + * a read() and stealing messages from the queue thus | |
24348 | + * may return the wrong answer. This could potentially | |
24349 | + * lead to a blocking read, because nothing is | |
24350 | + * available in the queue | |
24351 | + */ | |
24352 | + spin_lock_irqsave(&ctx->lock, flags); | |
24353 | + | |
24354 | + if (!pfm_msgq_is_empty(ctx)) | |
24355 | + mask = POLLIN | POLLRDNORM; | |
24356 | + | |
24357 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24358 | + | |
24359 | + PFM_DBG("after poll_wait mask=0x%x", mask); | |
24360 | + | |
24361 | + return mask; | |
24362 | +} | |
24363 | + | |
24364 | +static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | |
24365 | + unsigned long arg) | |
24366 | +{ | |
24367 | + PFM_DBG("pfm_ioctl called"); | |
24368 | + return -EINVAL; | |
24369 | +} | |
24370 | + | |
24371 | +/* | |
24372 | + * interrupt cannot be masked when entering this function | |
24373 | + */ | |
24374 | +static inline int __pfm_fasync(int fd, struct file *filp, | |
24375 | + struct pfm_context *ctx, int on) | |
24376 | +{ | |
24377 | + int ret; | |
24378 | + | |
24379 | + PFM_DBG("in fd=%d on=%d async_q=%p", | |
24380 | + fd, | |
24381 | + on, | |
24382 | + ctx->async_queue); | |
24383 | + | |
24384 | + ret = fasync_helper(fd, filp, on, &ctx->async_queue); | |
24385 | + | |
24386 | + PFM_DBG("out fd=%d on=%d async_q=%p ret=%d", | |
24387 | + fd, | |
24388 | + on, | |
24389 | + ctx->async_queue, ret); | |
24390 | + | |
24391 | + return ret; | |
24392 | +} | |
24393 | + | |
24394 | +static int pfm_fasync(int fd, struct file *filp, int on) | |
24395 | +{ | |
24396 | + struct pfm_context *ctx; | |
24397 | + int ret; | |
24398 | + | |
24399 | + PFM_DBG("pfm_file_ops"); | |
24400 | + | |
24401 | + ctx = filp->private_data; | |
24402 | + if (ctx == NULL) { | |
24403 | + PFM_ERR("pfm_fasync no ctx"); | |
24404 | + return -EBADF; | |
24405 | + } | |
24406 | + | |
24407 | + /* | |
24408 | + * we cannot mask interrupts during this call because this may | |
24409 | + * may go to sleep if memory is not readily avalaible. | |
24410 | + * | |
24411 | + * We are protected from the context disappearing by the | |
24412 | + * get_fd()/put_fd() done in caller. Serialization of this function | |
24413 | + * is ensured by caller. | |
24414 | + */ | |
24415 | + ret = __pfm_fasync(fd, filp, ctx, on); | |
24416 | + | |
24417 | + PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d", | |
24418 | + fd, | |
24419 | + on, | |
24420 | + ctx->async_queue, ret); | |
24421 | + | |
24422 | + return ret; | |
24423 | +} | |
24424 | + | |
24425 | +#ifdef CONFIG_SMP | |
24426 | +static void __pfm_close_remote_cpu(void *info) | |
24427 | +{ | |
24428 | + struct pfm_context *ctx = info; | |
24429 | + int can_release; | |
24430 | + | |
24431 | + BUG_ON(ctx != __get_cpu_var(pmu_ctx)); | |
24432 | + | |
24433 | + /* | |
24434 | + * we are in IPI interrupt handler which has always higher | |
24435 | + * priority than PMU interrupt, therefore we do not need to | |
24436 | + * mask interrupts. context locking is not needed because we | |
24437 | + * are in close(), no more user references. | |
24438 | + * | |
24439 | + * can_release is ignored, release done on calling CPU | |
24440 | + */ | |
24441 | + __pfm_unload_context(ctx, &can_release); | |
24442 | + | |
24443 | + /* | |
24444 | + * we cannot free context here because we are in_interrupt(). | |
24445 | + * we free on the calling CPU | |
24446 | + */ | |
24447 | +} | |
24448 | + | |
24449 | +static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx) | |
24450 | +{ | |
24451 | + BUG_ON(irqs_disabled()); | |
24452 | + return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1); | |
24453 | +} | |
24454 | +#endif /* CONFIG_SMP */ | |
24455 | + | |
24456 | +/* | |
24457 | + * called either on explicit close() or from exit_files(). | |
24458 | + * Only the LAST user of the file gets to this point, i.e., it is | |
24459 | + * called only ONCE. | |
24460 | + * | |
24461 | + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero | |
24462 | + * (fput()),i.e, last task to access the file. Nobody else can access the | |
24463 | + * file at this point. | |
24464 | + * | |
24465 | + * When called from exit_files(), the VMA has been freed because exit_mm() | |
24466 | + * is executed before exit_files(). | |
24467 | + * | |
24468 | + * When called from exit_files(), the current task is not yet ZOMBIE but we | |
24469 | + * flush the PMU state to the context. | |
24470 | + */ | |
24471 | +int __pfm_close(struct pfm_context *ctx, struct file *filp) | |
24472 | +{ | |
24473 | + unsigned long flags; | |
24474 | + int state; | |
24475 | + int can_free = 1, can_unload = 1; | |
24476 | + int is_system, can_release = 0; | |
24477 | + u32 cpu; | |
24478 | + | |
24479 | + /* | |
24480 | + * no risk of ctx of filp disappearing so we can operate outside | |
24481 | + * of spin_lock(). fasync_helper() runs with interrupts masked, | |
24482 | + * thus there is no risk with the PMU interrupt handler | |
24483 | + * | |
24484 | + * In case of zombie, we will not have the async struct anymore | |
24485 | + * thus kill_fasync() will not do anything | |
24486 | + * | |
24487 | + * fd is not used when removing the entry so we pass -1 | |
24488 | + */ | |
24489 | + if (filp->f_flags & FASYNC) | |
24490 | + __pfm_fasync (-1, filp, ctx, 0); | |
24491 | + | |
24492 | + spin_lock_irqsave(&ctx->lock, flags); | |
24493 | + | |
24494 | + state = ctx->state; | |
24495 | + is_system = ctx->flags.system; | |
24496 | + cpu = ctx->cpu; | |
24497 | + | |
24498 | + PFM_DBG("state=%d", state); | |
24499 | + | |
24500 | + /* | |
24501 | + * check if unload is needed | |
24502 | + */ | |
24503 | + if (state == PFM_CTX_UNLOADED) | |
24504 | + goto doit; | |
24505 | + | |
24506 | +#ifdef CONFIG_SMP | |
24507 | + /* | |
24508 | + * we need to release the resource on the ORIGINAL cpu. | |
24509 | + * we need to release the context lock to avoid deadlocks | |
24510 | + * on the original CPU, especially in the context switch | |
24511 | + * routines. It is safe to unlock because we are in close(), | |
24512 | + * in other words, there is no more access from user level. | |
24513 | + * we can also unmask interrupts on this CPU because the | |
24514 | + * context is running on the original CPU. Context will be | |
24515 | + * unloaded and the session will be released on the original | |
24516 | + * CPU. Upon return, the caller is guaranteed that the context | |
24517 | + * is gone from original CPU. | |
24518 | + */ | |
24519 | + if (is_system && cpu != smp_processor_id()) { | |
24520 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24521 | + pfm_close_remote_cpu(cpu, ctx); | |
24522 | + can_release = 1; | |
24523 | + goto free_it; | |
24524 | + } | |
24525 | + | |
24526 | + if (!is_system && ctx->task != current) { | |
24527 | + /* | |
24528 | + * switch context to zombie state | |
24529 | + */ | |
24530 | + ctx->state = PFM_CTX_ZOMBIE; | |
24531 | + | |
24532 | + PFM_DBG("zombie ctx for [%d]", ctx->task->pid); | |
24533 | + /* | |
24534 | + * must check if other thread is using block overflow | |
24535 | + * notification mode. If so make sure it will not block | |
24536 | + * because there will not be any pfm_restart() issued. | |
24537 | + * When the thread notices the ZOMBIE state, it will clean | |
24538 | + * up what is left of the context | |
24539 | + */ | |
24540 | + if (state == PFM_CTX_MASKED && ctx->flags.block) { | |
24541 | + /* | |
24542 | + * force task to wake up from MASKED state | |
24543 | + */ | |
24544 | + PFM_DBG("waking up [%d]", ctx->task->pid); | |
24545 | + | |
24546 | + complete(&ctx->restart_complete); | |
24547 | + } | |
24548 | + /* | |
24549 | + * PMU session will be release by monitored task when it notices | |
24550 | + * ZOMBIE state as part of pfm_unload_context() | |
24551 | + */ | |
24552 | + can_unload = can_free = 0; | |
24553 | + } | |
24554 | +#endif | |
24555 | + if (can_unload) | |
24556 | + __pfm_unload_context(ctx, &can_release); | |
24557 | +doit: | |
24558 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
24559 | + | |
24560 | +#ifdef CONFIG_SMP | |
24561 | +free_it: | |
24562 | +#endif | |
24563 | + if (can_release) | |
24564 | + pfm_session_release(is_system, cpu); | |
24565 | + | |
24566 | + if (can_free) | |
24567 | + pfm_free_context(ctx); | |
24568 | + | |
24569 | + return 0; | |
24570 | +} | |
24571 | + | |
24572 | +static int pfm_close(struct inode *inode, struct file *filp) | |
24573 | +{ | |
24574 | + struct pfm_context *ctx; | |
24575 | + | |
24576 | + PFM_DBG("called filp=%p", filp); | |
24577 | + | |
24578 | + ctx = filp->private_data; | |
24579 | + if (ctx == NULL) { | |
24580 | + PFM_ERR("no ctx"); | |
24581 | + return -EBADF; | |
24582 | + } | |
24583 | + return __pfm_close(ctx, filp); | |
24584 | +} | |
24585 | + | |
24586 | +static int pfm_no_open(struct inode *irrelevant, struct file *dontcare) | |
24587 | +{ | |
24588 | + PFM_DBG("pfm_file_ops"); | |
24589 | + | |
24590 | + return -ENXIO; | |
24591 | +} | |
24592 | + | |
24593 | + | |
24594 | +const struct file_operations pfm_file_ops = { | |
24595 | + .llseek = no_llseek, | |
24596 | + .read = pfm_read, | |
24597 | + .write = pfm_write, | |
24598 | + .poll = pfm_poll, | |
24599 | + .ioctl = pfm_ioctl, | |
24600 | + .open = pfm_no_open, /* special open to disallow open via /proc */ | |
24601 | + .fasync = pfm_fasync, | |
24602 | + .release = pfm_close, | |
24603 | + .mmap = pfm_mmap | |
24604 | +}; | |
24605 | + | |
24606 | +static int pfmfs_get_sb(struct file_system_type *fs_type, | |
24607 | + int flags, const char *dev_name, | |
24608 | + void *data, struct vfsmount *mnt) | |
24609 | +{ | |
24610 | + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); | |
24611 | +} | |
24612 | + | |
24613 | +static struct file_system_type pfm_fs_type = { | |
24614 | + .name = "pfmfs", | |
24615 | + .get_sb = pfmfs_get_sb, | |
24616 | + .kill_sb = kill_anon_super, | |
24617 | +}; | |
24618 | + | |
24619 | +/* | |
24620 | + * pfmfs should _never_ be mounted by userland - too much of security hassle, | |
24621 | + * no real gain from having the whole whorehouse mounted. So we don't need | |
24622 | + * any operations on the root directory. However, we need a non-trivial | |
24623 | + * d_name - pfm: will go nicely and kill the special-casing in procfs. | |
24624 | + */ | |
24625 | +static struct vfsmount *pfmfs_mnt; | |
24626 | + | |
24627 | +int __init pfm_init_fs(void) | |
24628 | +{ | |
24629 | + int err = register_filesystem(&pfm_fs_type); | |
24630 | + if (!err) { | |
24631 | + pfmfs_mnt = kern_mount(&pfm_fs_type); | |
24632 | + err = PTR_ERR(pfmfs_mnt); | |
24633 | + if (IS_ERR(pfmfs_mnt)) | |
24634 | + unregister_filesystem(&pfm_fs_type); | |
24635 | + else | |
24636 | + err = 0; | |
24637 | + } | |
24638 | + return err; | |
24639 | +} | |
24640 | + | |
24641 | +int pfm_alloc_fd(struct file **cfile) | |
24642 | +{ | |
24643 | + int fd, ret = 0; | |
24644 | + struct file *file = NULL; | |
24645 | + struct inode * inode; | |
24646 | + char name[32]; | |
24647 | + struct qstr this; | |
24648 | + | |
24649 | + fd = get_unused_fd(); | |
24650 | + if (fd < 0) | |
24651 | + return -ENFILE; | |
24652 | + | |
24653 | + ret = -ENFILE; | |
24654 | + | |
24655 | + file = get_empty_filp(); | |
24656 | + if (!file) | |
24657 | + goto out; | |
24658 | + | |
24659 | + /* | |
24660 | + * allocate a new inode | |
24661 | + */ | |
24662 | + inode = new_inode(pfmfs_mnt->mnt_sb); | |
24663 | + if (!inode) | |
24664 | + goto out; | |
24665 | + | |
24666 | + PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode); | |
24667 | + | |
24668 | + inode->i_sb = pfmfs_mnt->mnt_sb; | |
24669 | + inode->i_mode = S_IFCHR|S_IRUGO; | |
24670 | + inode->i_uid = current->fsuid; | |
24671 | + inode->i_gid = current->fsgid; | |
24672 | + | |
24673 | + sprintf(name, "[%lu]", inode->i_ino); | |
24674 | + this.name = name; | |
24675 | + this.hash = inode->i_ino; | |
24676 | + this.len = strlen(name); | |
24677 | + | |
24678 | + ret = -ENOMEM; | |
24679 | + | |
24680 | + /* | |
24681 | + * allocate a new dcache entry | |
24682 | + */ | |
24683 | + file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); | |
24684 | + if (!file->f_dentry) | |
24685 | + goto out; | |
24686 | + | |
24687 | + file->f_dentry->d_op = &pfmfs_dentry_operations; | |
24688 | + | |
24689 | + d_add(file->f_dentry, inode); | |
24690 | + file->f_vfsmnt = mntget(pfmfs_mnt); | |
24691 | + file->f_mapping = inode->i_mapping; | |
24692 | + | |
24693 | + file->f_op = &pfm_file_ops; | |
24694 | + file->f_mode = FMODE_READ; | |
24695 | + file->f_flags = O_RDONLY; | |
24696 | + file->f_pos = 0; | |
24697 | + | |
24698 | + *cfile = file; | |
24699 | + | |
24700 | + return fd; | |
24701 | +out: | |
24702 | + if (file) | |
24703 | + put_filp(file); | |
24704 | + put_unused_fd(fd); | |
24705 | + return ret; | |
24706 | +} | |
24707 | --- /dev/null | |
24708 | +++ b/perfmon/perfmon_fmt.c | |
24709 | @@ -0,0 +1,219 @@ | |
24710 | +/* | |
24711 | + * perfmon_fmt.c: perfmon2 sampling buffer format management | |
24712 | + * | |
24713 | + * This file implements the perfmon2 interface which | |
24714 | + * provides access to the hardware performance counters | |
24715 | + * of the host processor. | |
24716 | + * | |
24717 | + * The initial version of perfmon.c was written by | |
24718 | + * Ganesh Venkitachalam, IBM Corp. | |
24719 | + * | |
24720 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
24721 | + * David Mosberger, Hewlett Packard Co. | |
24722 | + * | |
24723 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
24724 | + * by Stephane Eranian, Hewlett Packard Co. | |
24725 | + * | |
24726 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
24727 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
24728 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
24729 | + * | |
24730 | + * More information about perfmon available at: | |
24731 | + * http://perfmon2.sf.net | |
24732 | + * | |
24733 | + * This program is free software; you can redistribute it and/or | |
24734 | + * modify it under the terms of version 2 of the GNU General Public | |
24735 | + * License as published by the Free Software Foundation. | |
24736 | + * | |
24737 | + * This program is distributed in the hope that it will be useful, | |
24738 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24739 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
24740 | + * General Public License for more details. | |
24741 | + * | |
24742 | + * You should have received a copy of the GNU General Public License | |
24743 | + * along with this program; if not, write to the Free Software | |
24744 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
24745 | + * 02111-1307 USA | |
24746 | + */ | |
24747 | +#include <linux/module.h> | |
24748 | +#include <linux/perfmon_kern.h> | |
24749 | +#include "perfmon_priv.h" | |
24750 | + | |
24751 | +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_smpl_fmt_lock); | |
24752 | +static LIST_HEAD(pfm_smpl_fmt_list); | |
24753 | + | |
24754 | +static inline int fmt_is_mod(struct pfm_smpl_fmt *f) | |
24755 | +{ | |
24756 | + return !(f->fmt_flags & PFM_FMTFL_IS_BUILTIN); | |
24757 | +} | |
24758 | + | |
24759 | +static struct pfm_smpl_fmt *pfm_find_fmt(char *name) | |
24760 | +{ | |
24761 | + struct pfm_smpl_fmt *entry; | |
24762 | + | |
24763 | + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) { | |
24764 | + if (!strcmp(entry->fmt_name, name)) | |
24765 | + return entry; | |
24766 | + } | |
24767 | + return NULL; | |
24768 | +} | |
24769 | +/* | |
24770 | + * find a buffer format based on its name | |
24771 | + */ | |
24772 | +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name) | |
24773 | +{ | |
24774 | + struct pfm_smpl_fmt *fmt; | |
24775 | + | |
24776 | + spin_lock(&pfm_smpl_fmt_lock); | |
24777 | + | |
24778 | + fmt = pfm_find_fmt(name); | |
24779 | + | |
24780 | + /* | |
24781 | + * increase module refcount | |
24782 | + */ | |
24783 | + if (fmt && fmt_is_mod(fmt) && !try_module_get(fmt->owner)) | |
24784 | + fmt = NULL; | |
24785 | + | |
24786 | + spin_unlock(&pfm_smpl_fmt_lock); | |
24787 | + | |
24788 | + return fmt; | |
24789 | +} | |
24790 | + | |
24791 | +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt) | |
24792 | +{ | |
24793 | + if (fmt == NULL || !fmt_is_mod(fmt)) | |
24794 | + return; | |
24795 | + BUG_ON(fmt->owner == NULL); | |
24796 | + | |
24797 | + spin_lock(&pfm_smpl_fmt_lock); | |
24798 | + module_put(fmt->owner); | |
24799 | + spin_unlock(&pfm_smpl_fmt_lock); | |
24800 | +} | |
24801 | + | |
24802 | +int pfm_fmt_register(struct pfm_smpl_fmt *fmt) | |
24803 | +{ | |
24804 | + int ret = 0; | |
24805 | + | |
24806 | + if (perfmon_disabled) { | |
24807 | + PFM_INFO("perfmon disabled, cannot add sampling format"); | |
24808 | + return -ENOSYS; | |
24809 | + } | |
24810 | + | |
24811 | + /* some sanity checks */ | |
24812 | + if (fmt == NULL) { | |
24813 | + PFM_INFO("perfmon: NULL format for register"); | |
24814 | + return -EINVAL; | |
24815 | + } | |
24816 | + | |
24817 | + if (fmt->fmt_name == NULL) { | |
24818 | + PFM_INFO("perfmon: format has no name"); | |
24819 | + return -EINVAL; | |
24820 | + } | |
24821 | + | |
24822 | + if (fmt->fmt_qdepth > PFM_MSGS_COUNT) { | |
24823 | + PFM_INFO("perfmon: format %s requires %u msg queue depth (max %d)", | |
24824 | + fmt->fmt_name, | |
24825 | + fmt->fmt_qdepth, | |
24826 | + PFM_MSGS_COUNT); | |
24827 | + return -EINVAL; | |
24828 | + } | |
24829 | + | |
24830 | + /* | |
24831 | + * fmt is missing the initialization of .owner = THIS_MODULE | |
24832 | + * this is only valid when format is compiled as a module | |
24833 | + */ | |
24834 | + if (fmt->owner == NULL && fmt_is_mod(fmt)) { | |
24835 | + PFM_INFO("format %s has no module owner", fmt->fmt_name); | |
24836 | + return -EINVAL; | |
24837 | + } | |
24838 | + /* | |
24839 | + * we need at least a handler | |
24840 | + */ | |
24841 | + if (fmt->fmt_handler == NULL) { | |
24842 | + PFM_INFO("format %s has no handler", fmt->fmt_name); | |
24843 | + return -EINVAL; | |
24844 | + } | |
24845 | + | |
24846 | + /* | |
24847 | + * format argument size cannot be bigger than PAGE_SIZE | |
24848 | + */ | |
24849 | + if (fmt->fmt_arg_size > PAGE_SIZE) { | |
24850 | + PFM_INFO("format %s arguments too big", fmt->fmt_name); | |
24851 | + return -EINVAL; | |
24852 | + } | |
24853 | + | |
24854 | + spin_lock(&pfm_smpl_fmt_lock); | |
24855 | + | |
24856 | + /* | |
24857 | + * because of sysfs, we cannot have two formats with the same name | |
24858 | + */ | |
24859 | + if (pfm_find_fmt(fmt->fmt_name)) { | |
24860 | + PFM_INFO("format %s already registered", fmt->fmt_name); | |
24861 | + ret = -EBUSY; | |
24862 | + goto out; | |
24863 | + } | |
24864 | + | |
24865 | + ret = pfm_sysfs_add_fmt(fmt); | |
24866 | + if (ret) { | |
24867 | + PFM_INFO("sysfs cannot add format entry for %s", fmt->fmt_name); | |
24868 | + goto out; | |
24869 | + } | |
24870 | + | |
24871 | + list_add(&fmt->fmt_list, &pfm_smpl_fmt_list); | |
24872 | + | |
24873 | + PFM_INFO("added sampling format %s", fmt->fmt_name); | |
24874 | +out: | |
24875 | + spin_unlock(&pfm_smpl_fmt_lock); | |
24876 | + | |
24877 | + return ret; | |
24878 | +} | |
24879 | +EXPORT_SYMBOL(pfm_fmt_register); | |
24880 | + | |
24881 | +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt) | |
24882 | +{ | |
24883 | + struct pfm_smpl_fmt *fmt2; | |
24884 | + int ret = 0; | |
24885 | + | |
24886 | + if (!fmt || !fmt->fmt_name) { | |
24887 | + PFM_DBG("invalid fmt"); | |
24888 | + return -EINVAL; | |
24889 | + } | |
24890 | + | |
24891 | + spin_lock(&pfm_smpl_fmt_lock); | |
24892 | + | |
24893 | + fmt2 = pfm_find_fmt(fmt->fmt_name); | |
24894 | + if (!fmt) { | |
24895 | + PFM_INFO("unregister failed, format not registered"); | |
24896 | + ret = -EINVAL; | |
24897 | + goto out; | |
24898 | + } | |
24899 | + list_del_init(&fmt->fmt_list); | |
24900 | + | |
24901 | + pfm_sysfs_remove_fmt(fmt); | |
24902 | + | |
24903 | + PFM_INFO("removed sampling format: %s", fmt->fmt_name); | |
24904 | + | |
24905 | +out: | |
24906 | + spin_unlock(&pfm_smpl_fmt_lock); | |
24907 | + return ret; | |
24908 | + | |
24909 | +} | |
24910 | +EXPORT_SYMBOL(pfm_fmt_unregister); | |
24911 | + | |
24912 | +/* | |
24913 | + * we defer adding the builtin formats to /sys/kernel/perfmon/formats | |
24914 | + * until after the pfm sysfs subsystem is initialized. This function | |
24915 | + * is called from pfm_init_sysfs() | |
24916 | + */ | |
24917 | +void __init pfm_sysfs_builtin_fmt_add(void) | |
24918 | +{ | |
24919 | + struct pfm_smpl_fmt *entry; | |
24920 | + | |
24921 | + /* | |
24922 | + * locking not needed, kernel not fully booted | |
24923 | + * when called | |
24924 | + */ | |
24925 | + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) { | |
24926 | + pfm_sysfs_add_fmt(entry); | |
24927 | + } | |
24928 | +} | |
24929 | --- /dev/null | |
24930 | +++ b/perfmon/perfmon_hotplug.c | |
24931 | @@ -0,0 +1,151 @@ | |
24932 | +/* | |
24933 | + * perfmon_hotplug.c: handling of CPU hotplug | |
24934 | + * | |
24935 | + * The initial version of perfmon.c was written by | |
24936 | + * Ganesh Venkitachalam, IBM Corp. | |
24937 | + * | |
24938 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
24939 | + * David Mosberger, Hewlett Packard Co. | |
24940 | + * | |
24941 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
24942 | + * by Stephane Eranian, Hewlett Packard Co. | |
24943 | + * | |
24944 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
24945 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
24946 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
24947 | + * | |
24948 | + * More information about perfmon available at: | |
24949 | + * http://perfmon2.sf.net | |
24950 | + * | |
24951 | + * This program is free software; you can redistribute it and/or | |
24952 | + * modify it under the terms of version 2 of the GNU General Public | |
24953 | + * License as published by the Free Software Foundation. | |
24954 | + * | |
24955 | + * This program is distributed in the hope that it will be useful, | |
24956 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24957 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
24958 | + * General Public License for more details. | |
24959 | + * | |
24960 | + * You should have received a copy of the GNU General Public License | |
24961 | + * along with this program; if not, write to the Free Software | |
24962 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
24963 | + * 02111-1307 USA | |
24964 | + */ | |
24965 | +#include <linux/kernel.h> | |
24966 | +#include <linux/perfmon_kern.h> | |
24967 | +#include <linux/cpu.h> | |
24968 | +#include "perfmon_priv.h" | |
24969 | + | |
24970 | +#ifndef CONFIG_HOTPLUG_CPU | |
24971 | +void pfm_cpu_disable(void) | |
24972 | +{} | |
24973 | + | |
24974 | +int __init pfm_init_hotplug(void) | |
24975 | +{ | |
24976 | + return 0; | |
24977 | +} | |
24978 | +#else /* CONFIG_HOTPLUG_CPU */ | |
24979 | +/* | |
24980 | + * CPU hotplug event nofication callback | |
24981 | + * | |
24982 | + * We use the callback to do manage the sysfs interface. | |
24983 | + * Note that the actual shutdown of monitoring on the CPU | |
24984 | + * is done in pfm_cpu_disable(), see comments there for more | |
24985 | + * information. | |
24986 | + */ | |
24987 | +static int pfm_cpu_notify(struct notifier_block *nfb, | |
24988 | + unsigned long action, void *hcpu) | |
24989 | +{ | |
24990 | + unsigned int cpu = (unsigned long)hcpu; | |
24991 | + int ret = NOTIFY_OK; | |
24992 | + | |
24993 | + pfm_pmu_conf_get(0); | |
24994 | + | |
24995 | + switch (action) { | |
24996 | + case CPU_ONLINE: | |
24997 | + pfm_debugfs_add_cpu(cpu); | |
24998 | + PFM_INFO("CPU%d is online", cpu); | |
24999 | + break; | |
25000 | + case CPU_UP_PREPARE: | |
25001 | + PFM_INFO("CPU%d prepare online", cpu); | |
25002 | + break; | |
25003 | + case CPU_UP_CANCELED: | |
25004 | + pfm_debugfs_del_cpu(cpu); | |
25005 | + PFM_INFO("CPU%d is up canceled", cpu); | |
25006 | + break; | |
25007 | + case CPU_DOWN_PREPARE: | |
25008 | + PFM_INFO("CPU%d prepare offline", cpu); | |
25009 | + break; | |
25010 | + case CPU_DOWN_FAILED: | |
25011 | + PFM_INFO("CPU%d is down failed", cpu); | |
25012 | + break; | |
25013 | + case CPU_DEAD: | |
25014 | + pfm_debugfs_del_cpu(cpu); | |
25015 | + PFM_INFO("CPU%d is offline", cpu); | |
25016 | + break; | |
25017 | + } | |
25018 | + pfm_pmu_conf_put(); | |
25019 | + return ret; | |
25020 | +} | |
25021 | + | |
25022 | +/* | |
25023 | + * called from cpu_disable() to detach the perfmon context | |
25024 | + * from the CPU going down. | |
25025 | + * | |
25026 | + * We cannot use the cpu hotplug notifier because we MUST run | |
25027 | + * on the CPU that is going down to save the PMU state | |
25028 | + */ | |
25029 | +void pfm_cpu_disable(void) | |
25030 | +{ | |
25031 | + struct pfm_context *ctx; | |
25032 | + unsigned long flags; | |
25033 | + int is_system, release_info = 0; | |
25034 | + u32 cpu; | |
25035 | + int r; | |
25036 | + | |
25037 | + ctx = __get_cpu_var(pmu_ctx); | |
25038 | + if (ctx == NULL) | |
25039 | + return; | |
25040 | + | |
25041 | + is_system = ctx->flags.system; | |
25042 | + cpu = ctx->cpu; | |
25043 | + | |
25044 | + /* | |
25045 | + * context is LOADED or MASKED | |
25046 | + * | |
25047 | + * we unload from CPU. That stops monitoring and does | |
25048 | + * all the bookeeping of saving values and updating duration | |
25049 | + */ | |
25050 | + spin_lock_irqsave(&ctx->lock, flags); | |
25051 | + if (is_system) | |
25052 | + __pfm_unload_context(ctx, &release_info); | |
25053 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
25054 | + | |
25055 | + /* | |
25056 | + * cancel timer | |
25057 | + */ | |
25058 | + if (release_info & 0x2) { | |
25059 | + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); | |
25060 | + PFM_DBG("timeout cancel=%d", r); | |
25061 | + } | |
25062 | + | |
25063 | + if (release_info & 0x1) | |
25064 | + pfm_session_release(is_system, cpu); | |
25065 | +} | |
25066 | + | |
25067 | +static struct notifier_block pfm_cpu_notifier = { | |
25068 | + .notifier_call = pfm_cpu_notify | |
25069 | +}; | |
25070 | + | |
25071 | +int __init pfm_init_hotplug(void) | |
25072 | +{ | |
25073 | + int ret = 0; | |
25074 | + /* | |
25075 | + * register CPU hotplug event notifier | |
25076 | + */ | |
25077 | + ret = register_cpu_notifier(&pfm_cpu_notifier); | |
25078 | + if (!ret) | |
25079 | + PFM_LOG("CPU hotplug support enabled"); | |
25080 | + return ret; | |
25081 | +} | |
25082 | +#endif /* CONFIG_HOTPLUG_CPU */ | |
25083 | --- /dev/null | |
25084 | +++ b/perfmon/perfmon_init.c | |
25085 | @@ -0,0 +1,131 @@ | |
25086 | +/* | |
25087 | + * perfmon.c: perfmon2 global initialization functions | |
25088 | + * | |
25089 | + * This file implements the perfmon2 interface which | |
25090 | + * provides access to the hardware performance counters | |
25091 | + * of the host processor. | |
25092 | + * | |
25093 | + * | |
25094 | + * The initial version of perfmon.c was written by | |
25095 | + * Ganesh Venkitachalam, IBM Corp. | |
25096 | + * | |
25097 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
25098 | + * David Mosberger, Hewlett Packard Co. | |
25099 | + * | |
25100 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
25101 | + * by Stephane Eranian, Hewlett Packard Co. | |
25102 | + * | |
25103 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
25104 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
25105 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
25106 | + * | |
25107 | + * More information about perfmon available at: | |
25108 | + * http://perfmon2.sf.net | |
25109 | + * | |
25110 | + * This program is free software; you can redistribute it and/or | |
25111 | + * modify it under the terms of version 2 of the GNU General Public | |
25112 | + * License as published by the Free Software Foundation. | |
25113 | + * | |
25114 | + * This program is distributed in the hope that it will be useful, | |
25115 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25116 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
25117 | + * General Public License for more details. | |
25118 | + * | |
25119 | + * You should have received a copy of the GNU General Public License | |
25120 | + * along with this program; if not, write to the Free Software | |
25121 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
25122 | + * 02111-1307 USA | |
25123 | + */ | |
25124 | +#include <linux/kernel.h> | |
25125 | +#include <linux/perfmon_kern.h> | |
25126 | +#include "perfmon_priv.h" | |
25127 | + | |
25128 | +/* | |
25129 | + * external variables | |
25130 | + */ | |
25131 | +DEFINE_PER_CPU(struct task_struct *, pmu_owner); | |
25132 | +DEFINE_PER_CPU(struct pfm_context *, pmu_ctx); | |
25133 | +DEFINE_PER_CPU(u64, pmu_activation_number); | |
25134 | +DEFINE_PER_CPU(struct pfm_stats, pfm_stats); | |
25135 | +DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer); | |
25136 | + | |
25137 | + | |
25138 | +int perfmon_disabled; /* >0 if perfmon is disabled */ | |
25139 | + | |
25140 | +/* | |
25141 | + * called from cpu_init() and pfm_pmu_register() | |
25142 | + */ | |
25143 | +void __pfm_init_percpu(void *dummy) | |
25144 | +{ | |
25145 | + struct hrtimer *h; | |
25146 | + | |
25147 | + h = &__get_cpu_var(pfm_hrtimer); | |
25148 | + | |
25149 | + pfm_arch_init_percpu(); | |
25150 | + | |
25151 | + /* | |
25152 | + * initialize per-cpu high res timer | |
25153 | + */ | |
25154 | + hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
25155 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
25156 | + /* | |
25157 | + * avoid potential deadlock on the runqueue lock | |
25158 | + * during context switch when multiplexing. Situation | |
25159 | + * arises on architectures which run switch_to() with | |
25160 | + * the runqueue lock held, e.g., x86. On others, e.g., | |
25161 | + * IA-64, the problem does not exist. | |
25162 | + * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED | |
25163 | + * such that the callback routine is only called on hardirq | |
25164 | + * context not on softirq, thus the context switch will not | |
25165 | + * end up trying to wakeup the softirqd | |
25166 | + */ | |
25167 | + h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; | |
25168 | +#endif | |
25169 | + h->function = pfm_handle_switch_timeout; | |
25170 | +} | |
25171 | + | |
25172 | +/* | |
25173 | + * global initialization routine, executed only once | |
25174 | + */ | |
25175 | +int __init pfm_init(void) | |
25176 | +{ | |
25177 | + PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN); | |
25178 | + | |
25179 | + if (pfm_init_ctx()) | |
25180 | + goto error_disable; | |
25181 | + | |
25182 | + | |
25183 | + if (pfm_init_sets()) | |
25184 | + goto error_disable; | |
25185 | + | |
25186 | + if (pfm_init_fs()) | |
25187 | + goto error_disable; | |
25188 | + | |
25189 | + if (pfm_init_sysfs()) | |
25190 | + goto error_disable; | |
25191 | + | |
25192 | + /* not critical, so no error checking */ | |
25193 | + pfm_init_debugfs(); | |
25194 | + | |
25195 | + /* | |
25196 | + * one time, arch-specific global initialization | |
25197 | + */ | |
25198 | + if (pfm_arch_init()) | |
25199 | + goto error_disable; | |
25200 | + | |
25201 | + if (pfm_init_hotplug()) | |
25202 | + goto error_disable; | |
25203 | + return 0; | |
25204 | + | |
25205 | +error_disable: | |
25206 | + PFM_ERR("perfmon is disabled due to initialization error"); | |
25207 | + perfmon_disabled = 1; | |
25208 | + return -1; | |
25209 | +} | |
25210 | + | |
25211 | +/* | |
25212 | + * must use subsys_initcall() to ensure that the perfmon2 core | |
25213 | + * is initialized before any PMU description module when they are | |
25214 | + * compiled in. | |
25215 | + */ | |
25216 | +subsys_initcall(pfm_init); | |
25217 | --- /dev/null | |
25218 | +++ b/perfmon/perfmon_intr.c | |
25219 | @@ -0,0 +1,648 @@ | |
25220 | +/* | |
25221 | + * perfmon_intr.c: perfmon2 interrupt handling | |
25222 | + * | |
25223 | + * This file implements the perfmon2 interface which | |
25224 | + * provides access to the hardware performance counters | |
25225 | + * of the host processor. | |
25226 | + * | |
25227 | + * The initial version of perfmon.c was written by | |
25228 | + * Ganesh Venkitachalam, IBM Corp. | |
25229 | + * | |
25230 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
25231 | + * David Mosberger, Hewlett Packard Co. | |
25232 | + * | |
25233 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
25234 | + * by Stephane Eranian, Hewlett Packard Co. | |
25235 | + * | |
25236 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
25237 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
25238 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
25239 | + * | |
25240 | + * More information about perfmon available at: | |
25241 | + * http://perfmon2.sf.net | |
25242 | + * | |
25243 | + * This program is free software; you can redistribute it and/or | |
25244 | + * modify it under the terms of version 2 of the GNU General Public | |
25245 | + * License as published by the Free Software Foundation. | |
25246 | + * | |
25247 | + * This program is distributed in the hope that it will be useful, | |
25248 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25249 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
25250 | + * General Public License for more details. | |
25251 | + * | |
25252 | + * You should have received a copy of the GNU General Public License | |
25253 | + * along with this program; if not, write to the Free Software | |
25254 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
25255 | + * 02111-1307 USA | |
25256 | + */ | |
25257 | +#include <linux/kernel.h> | |
25258 | +#include <linux/module.h> | |
25259 | +#include <linux/perfmon_kern.h> | |
25260 | +#include "perfmon_priv.h" | |
25261 | + | |
25262 | +/** | |
25263 | + * pfm_intr_process_64bit_ovfls - handle 64-bit counter emulation | |
25264 | + * @ctx: context to operate on | |
25265 | + * @set: set to operate on | |
25266 | + * | |
25267 | + * The function returns the number of 64-bit overflows detected. | |
25268 | + * | |
25269 | + * 64-bit software pmds are updated for overflowed pmd registers | |
25270 | + * the set->reset_pmds is updated to the list of pmds to reset | |
25271 | + * | |
25272 | + * In any case, set->npend_ovfls is cleared | |
25273 | + */ | |
25274 | +static u16 pfm_intr_process_64bit_ovfls(struct pfm_context *ctx, | |
25275 | + struct pfm_event_set *set, | |
25276 | + u32 *ovfl_ctrl) | |
25277 | +{ | |
25278 | + u16 i, num_ovfls, max_pmd, max_intr; | |
25279 | + u16 num_64b_ovfls, has_ovfl_sw, must_switch; | |
25280 | + u64 ovfl_thres, old_val, new_val, ovfl_mask; | |
25281 | + | |
25282 | + num_64b_ovfls = must_switch = 0; | |
25283 | + | |
25284 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
25285 | + max_pmd = ctx->regs.max_pmd; | |
25286 | + max_intr = ctx->regs.max_intr_pmd; | |
25287 | + | |
25288 | + num_ovfls = set->npend_ovfls; | |
25289 | + has_ovfl_sw = set->flags & PFM_SETFL_OVFL_SWITCH; | |
25290 | + | |
25291 | + bitmap_zero(cast_ulp(set->reset_pmds), max_pmd); | |
25292 | + | |
25293 | + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) { | |
25294 | + /* | |
25295 | + * skip pmd which did not overflow | |
25296 | + */ | |
25297 | + if (!test_bit(i, cast_ulp(set->povfl_pmds))) | |
25298 | + continue; | |
25299 | + | |
25300 | + num_ovfls--; | |
25301 | + | |
25302 | + /* | |
25303 | + * Update software value for counters ONLY | |
25304 | + * | |
25305 | + * Note that the pmd is not necessarily 0 at this point as | |
25306 | + * qualified events may have happened before the PMU was | |
25307 | + * frozen. The residual count is not taken into consideration | |
25308 | + * here but will be with any read of the pmd | |
25309 | + */ | |
25310 | + ovfl_thres = set->pmds[i].ovflsw_thres; | |
25311 | + | |
25312 | + if (likely(test_bit(i, cast_ulp(ctx->regs.cnt_pmds)))) { | |
25313 | + old_val = new_val = set->pmds[i].value; | |
25314 | + new_val += 1 + ovfl_mask; | |
25315 | + set->pmds[i].value = new_val; | |
25316 | + } else { | |
25317 | + /* | |
25318 | + * for non counters which interrupt, e.g., AMD IBS, | |
25319 | + * we consider this equivalent to a 64-bit counter | |
25320 | + * overflow. | |
25321 | + */ | |
25322 | + old_val = 1; new_val = 0; | |
25323 | + } | |
25324 | + | |
25325 | + /* | |
25326 | + * check for 64-bit overflow condition | |
25327 | + */ | |
25328 | + if (likely(old_val > new_val)) { | |
25329 | + num_64b_ovfls++; | |
25330 | + if (has_ovfl_sw && ovfl_thres > 0) { | |
25331 | + if (ovfl_thres == 1) | |
25332 | + must_switch = 1; | |
25333 | + set->pmds[i].ovflsw_thres = ovfl_thres - 1; | |
25334 | + } | |
25335 | + | |
25336 | + /* | |
25337 | + * what to reset because of this overflow | |
25338 | + * - the overflowed register | |
25339 | + * - its reset_smpls | |
25340 | + */ | |
25341 | + __set_bit(i, cast_ulp(set->reset_pmds)); | |
25342 | + | |
25343 | + bitmap_or(cast_ulp(set->reset_pmds), | |
25344 | + cast_ulp(set->reset_pmds), | |
25345 | + cast_ulp(set->pmds[i].reset_pmds), | |
25346 | + max_pmd); | |
25347 | + } else { | |
25348 | + /* | |
25349 | + * only keep track of 64-bit overflows or | |
25350 | + * assimilated | |
25351 | + */ | |
25352 | + __clear_bit(i, cast_ulp(set->povfl_pmds)); | |
25353 | + | |
25354 | + /* | |
25355 | + * on some PMU, it may be necessary to re-arm the PMD | |
25356 | + */ | |
25357 | + pfm_arch_ovfl_reset_pmd(ctx, i); | |
25358 | + } | |
25359 | + | |
25360 | + PFM_DBG_ovfl("ovfl=%s pmd%u new=0x%llx old=0x%llx " | |
25361 | + "hw_pmd=0x%llx o_pmds=0x%llx must_switch=%u " | |
25362 | + "o_thres=%llu o_thres_ref=%llu", | |
25363 | + old_val > new_val ? "64-bit" : "HW", | |
25364 | + i, | |
25365 | + (unsigned long long)new_val, | |
25366 | + (unsigned long long)old_val, | |
25367 | + (unsigned long long)pfm_read_pmd(ctx, i), | |
25368 | + (unsigned long long)set->povfl_pmds[0], | |
25369 | + must_switch, | |
25370 | + (unsigned long long)set->pmds[i].ovflsw_thres, | |
25371 | + (unsigned long long)set->pmds[i].ovflsw_ref_thres); | |
25372 | + } | |
25373 | + /* | |
25374 | + * update public bitmask of 64-bit overflowed pmds | |
25375 | + */ | |
25376 | + if (num_64b_ovfls) | |
25377 | + bitmap_copy(cast_ulp(set->ovfl_pmds), cast_ulp(set->povfl_pmds), | |
25378 | + max_intr); | |
25379 | + | |
25380 | + if (must_switch) | |
25381 | + *ovfl_ctrl |= PFM_OVFL_CTRL_SWITCH; | |
25382 | + | |
25383 | + /* | |
25384 | + * mark the overflows as consumed | |
25385 | + */ | |
25386 | + set->npend_ovfls = 0; | |
25387 | + bitmap_zero(cast_ulp(set->povfl_pmds), max_intr); | |
25388 | + | |
25389 | + return num_64b_ovfls; | |
25390 | +} | |
25391 | + | |
25392 | +/** | |
25393 | + * pfm_intr_get_smpl_pmds_values - copy 64-bit pmd values for sampling format | |
25394 | + * @ctx: context to work on | |
25395 | + * @set: current event set | |
25396 | + * @arg: overflow arg to be passed to format | |
25397 | + * @smpl_pmds: list of PMDs of interest for the overflowed register | |
25398 | + * | |
25399 | + * build an array of 46-bit PMD values based on smpl_pmds. Values are | |
25400 | + * stored in increasing order of the PMD indexes | |
25401 | + */ | |
25402 | +static void pfm_intr_get_smpl_pmds_values(struct pfm_context *ctx, | |
25403 | + struct pfm_event_set *set, | |
25404 | + struct pfm_ovfl_arg *arg, | |
25405 | + u64 *smpl_pmds) | |
25406 | +{ | |
25407 | + u16 j, k, max_pmd; | |
25408 | + u64 new_val, ovfl_mask; | |
25409 | + u64 *cnt_pmds; | |
25410 | + | |
25411 | + cnt_pmds = ctx->regs.cnt_pmds; | |
25412 | + max_pmd = ctx->regs.max_pmd; | |
25413 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
25414 | + | |
25415 | + for (j = k = 0; j < max_pmd; j++) { | |
25416 | + | |
25417 | + if (!test_bit(j, cast_ulp(smpl_pmds))) | |
25418 | + continue; | |
25419 | + | |
25420 | + new_val = pfm_read_pmd(ctx, j); | |
25421 | + | |
25422 | + /* for counters, build 64-bit value */ | |
25423 | + if (test_bit(j, cast_ulp(cnt_pmds))) | |
25424 | + new_val = (set->pmds[j].value & ~ovfl_mask) | |
25425 | + | (new_val & ovfl_mask); | |
25426 | + | |
25427 | + arg->smpl_pmds_values[k++] = new_val; | |
25428 | + | |
25429 | + PFM_DBG_ovfl("s_pmd_val[%u]=pmd%u=0x%llx", k, j, | |
25430 | + (unsigned long long)new_val); | |
25431 | + } | |
25432 | + arg->num_smpl_pmds = k; | |
25433 | +} | |
25434 | + | |
25435 | +/** | |
25436 | + * pfm_intr_process_smpl_fmt -- handle sampling format callback | |
25437 | + * @ctx: context to work on | |
25438 | + * @set: current event set | |
25439 | + * @ip: interrupted instruction pointer | |
25440 | + * @now: timestamp | |
25441 | + * @num_ovfls: number of 64-bit overflows | |
25442 | + * @ovfl_ctrl: set of controls for interrupt handler tail processing | |
25443 | + * @regs: register state | |
25444 | + * | |
25445 | + * Prepare argument (ovfl_arg) to be passed to sampling format callback, then | |
25446 | + * invoke the callback (fmt_handler) | |
25447 | + */ | |
25448 | +static int pfm_intr_process_smpl_fmt(struct pfm_context *ctx, | |
25449 | + struct pfm_event_set *set, | |
25450 | + unsigned long ip, | |
25451 | + u64 now, | |
25452 | + u64 num_ovfls, | |
25453 | + u32 *ovfl_ctrl, | |
25454 | + struct pt_regs *regs) | |
25455 | +{ | |
25456 | + struct pfm_ovfl_arg *ovfl_arg; | |
25457 | + u64 start_cycles, end_cycles; | |
25458 | + u16 i, max_pmd; | |
25459 | + int ret = 0; | |
25460 | + | |
25461 | + ovfl_arg = &ctx->ovfl_arg; | |
25462 | + | |
25463 | + ovfl_arg->active_set = set->id; | |
25464 | + max_pmd = ctx->regs.max_pmd; | |
25465 | + | |
25466 | + /* | |
25467 | + * first_intr_pmd: first PMD which can generate PMU interrupts | |
25468 | + */ | |
25469 | + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) { | |
25470 | + /* | |
25471 | + * skip pmd which did not have 64-bit overflows | |
25472 | + */ | |
25473 | + if (!test_bit(i, cast_ulp(set->ovfl_pmds))) | |
25474 | + continue; | |
25475 | + | |
25476 | + num_ovfls--; | |
25477 | + | |
25478 | + /* | |
25479 | + * prepare argument to fmt_handler | |
25480 | + */ | |
25481 | + ovfl_arg->ovfl_pmd = i; | |
25482 | + ovfl_arg->ovfl_ctrl = 0; | |
25483 | + | |
25484 | + ovfl_arg->pmd_last_reset = set->pmds[i].lval; | |
25485 | + ovfl_arg->pmd_eventid = set->pmds[i].eventid; | |
25486 | + ovfl_arg->num_smpl_pmds = 0; | |
25487 | + | |
25488 | + /* | |
25489 | + * copy values of pmds of interest, if any | |
25490 | + * Sampling format may use them | |
25491 | + * We do not initialize the unused smpl_pmds_values | |
25492 | + */ | |
25493 | + if (!bitmap_empty(cast_ulp(set->pmds[i].smpl_pmds), max_pmd)) | |
25494 | + pfm_intr_get_smpl_pmds_values(ctx, set, ovfl_arg, | |
25495 | + set->pmds[i].smpl_pmds); | |
25496 | + | |
25497 | + pfm_stats_inc(fmt_handler_calls); | |
25498 | + | |
25499 | + /* | |
25500 | + * call format record (handler) routine | |
25501 | + */ | |
25502 | + start_cycles = sched_clock(); | |
25503 | + ret = (*ctx->smpl_fmt->fmt_handler)(ctx, ip, now, regs); | |
25504 | + end_cycles = sched_clock(); | |
25505 | + | |
25506 | + /* | |
25507 | + * The reset_pmds mask is constructed automatically | |
25508 | + * on overflow. When the actual reset takes place | |
25509 | + * depends on the masking, switch and notification | |
25510 | + * status. It may be deferred until pfm_restart(). | |
25511 | + */ | |
25512 | + *ovfl_ctrl |= ovfl_arg->ovfl_ctrl; | |
25513 | + | |
25514 | + pfm_stats_add(fmt_handler_ns, end_cycles - start_cycles); | |
25515 | + } | |
25516 | + /* | |
25517 | + * when the format cannot handle the rest of the overflow, we abort | |
25518 | + */ | |
25519 | + if (ret) | |
25520 | + PFM_DBG_ovfl("handler aborted at PMD%u ret=%d", i, ret); | |
25521 | + return ret; | |
25522 | +} | |
25523 | +/** | |
25524 | + * pfm_overflow_handler - main overflow processing routine. | |
25525 | + * @ctx: context to work on (always current context) | |
25526 | + * @set: current event set | |
25527 | + * @ip: interrupt instruction pointer | |
25528 | + * @regs: machine state | |
25529 | + * | |
25530 | + * set->num_ovfl_pmds is 0 when returning from this function even though | |
25531 | + * set->ovfl_pmds[] may have bits set. When leaving set->num_ovfl_pmds | |
25532 | + * must never be used to determine if there was a pending overflow. | |
25533 | + */ | |
25534 | +static void pfm_overflow_handler(struct pfm_context *ctx, | |
25535 | + struct pfm_event_set *set, | |
25536 | + unsigned long ip, | |
25537 | + struct pt_regs *regs) | |
25538 | +{ | |
25539 | + struct pfm_event_set *set_orig; | |
25540 | + u64 now; | |
25541 | + u32 ovfl_ctrl; | |
25542 | + u16 max_intr, max_pmd; | |
25543 | + u16 num_ovfls; | |
25544 | + int ret, has_notify; | |
25545 | + | |
25546 | + /* | |
25547 | + * take timestamp | |
25548 | + */ | |
25549 | + now = sched_clock(); | |
25550 | + | |
25551 | + max_pmd = ctx->regs.max_pmd; | |
25552 | + max_intr = ctx->regs.max_intr_pmd; | |
25553 | + | |
25554 | + set_orig = set; | |
25555 | + ovfl_ctrl = 0; | |
25556 | + | |
25557 | + /* | |
25558 | + * skip ZOMBIE case | |
25559 | + */ | |
25560 | + if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) | |
25561 | + goto stop_monitoring; | |
25562 | + | |
25563 | + PFM_DBG_ovfl("intr_pmds=0x%llx npend=%u ip=%p, blocking=%d " | |
25564 | + "u_pmds=0x%llx use_fmt=%u", | |
25565 | + (unsigned long long)set->povfl_pmds[0], | |
25566 | + set->npend_ovfls, | |
25567 | + (void *)ip, | |
25568 | + ctx->flags.block, | |
25569 | + (unsigned long long)set->used_pmds[0], | |
25570 | + !!ctx->smpl_fmt); | |
25571 | + | |
25572 | + /* | |
25573 | + * return number of 64-bit overflows | |
25574 | + */ | |
25575 | + num_ovfls = pfm_intr_process_64bit_ovfls(ctx, set, &ovfl_ctrl); | |
25576 | + | |
25577 | + /* | |
25578 | + * there were no 64-bit overflows | |
25579 | + * nothing else to do | |
25580 | + */ | |
25581 | + if (!num_ovfls) | |
25582 | + return; | |
25583 | + | |
25584 | + /* | |
25585 | + * tmp_ovfl_notify = ovfl_pmds & ovfl_notify | |
25586 | + * with: | |
25587 | + * - ovfl_pmds: last 64-bit overflowed pmds | |
25588 | + * - ovfl_notify: notify on overflow registers | |
25589 | + */ | |
25590 | + bitmap_and(cast_ulp(ctx->tmp_ovfl_notify), | |
25591 | + cast_ulp(set->ovfl_pmds), | |
25592 | + cast_ulp(set->ovfl_notify), | |
25593 | + max_intr); | |
25594 | + | |
25595 | + has_notify = !bitmap_empty(cast_ulp(ctx->tmp_ovfl_notify), max_intr); | |
25596 | + | |
25597 | + /* | |
25598 | + * check for sampling format and invoke fmt_handler | |
25599 | + */ | |
25600 | + if (likely(ctx->smpl_fmt)) { | |
25601 | + pfm_intr_process_smpl_fmt(ctx, set, ip, now, num_ovfls, | |
25602 | + &ovfl_ctrl, regs); | |
25603 | + } else { | |
25604 | + /* | |
25605 | + * When no sampling format is used, the default | |
25606 | + * is: | |
25607 | + * - mask monitoring if not switching | |
25608 | + * - notify user if requested | |
25609 | + * | |
25610 | + * If notification is not requested, monitoring is masked | |
25611 | + * and overflowed registers are not reset (saturation). | |
25612 | + * This mimics the behavior of the default sampling format. | |
25613 | + */ | |
25614 | + ovfl_ctrl |= PFM_OVFL_CTRL_NOTIFY; | |
25615 | + if (has_notify || !(ovfl_ctrl & PFM_OVFL_CTRL_SWITCH)) | |
25616 | + ovfl_ctrl |= PFM_OVFL_CTRL_MASK; | |
25617 | + } | |
25618 | + | |
25619 | + PFM_DBG_ovfl("set%u o_notify=0x%llx o_pmds=0x%llx " | |
25620 | + "r_pmds=0x%llx ovfl_ctrl=0x%x", | |
25621 | + set->id, | |
25622 | + (unsigned long long)ctx->tmp_ovfl_notify[0], | |
25623 | + (unsigned long long)set->ovfl_pmds[0], | |
25624 | + (unsigned long long)set->reset_pmds[0], | |
25625 | + ovfl_ctrl); | |
25626 | + | |
25627 | + /* | |
25628 | + * execute the various controls | |
25629 | + * ORDER MATTERS | |
25630 | + */ | |
25631 | + | |
25632 | + | |
25633 | + /* | |
25634 | + * mask monitoring | |
25635 | + */ | |
25636 | + if (ovfl_ctrl & PFM_OVFL_CTRL_MASK) { | |
25637 | + pfm_mask_monitoring(ctx, set); | |
25638 | + /* | |
25639 | + * when masking, reset is deferred until | |
25640 | + * pfm_restart() | |
25641 | + */ | |
25642 | + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET; | |
25643 | + | |
25644 | + /* | |
25645 | + * when masking, switching is deferred until | |
25646 | + * pfm_restart and we need to remember it | |
25647 | + */ | |
25648 | + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) { | |
25649 | + set->priv_flags |= PFM_SETFL_PRIV_SWITCH; | |
25650 | + ovfl_ctrl &= ~PFM_OVFL_CTRL_SWITCH; | |
25651 | + } | |
25652 | + } | |
25653 | + | |
25654 | + /* | |
25655 | + * switch event set | |
25656 | + */ | |
25657 | + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) { | |
25658 | + pfm_switch_sets_from_intr(ctx); | |
25659 | + /* update view of active set */ | |
25660 | + set = ctx->active_set; | |
25661 | + } | |
25662 | + /* | |
25663 | + * send overflow notification | |
25664 | + * | |
25665 | + * only necessary if at least one overflowed | |
25666 | + * register had the notify flag set | |
25667 | + */ | |
25668 | + if (has_notify && (ovfl_ctrl & PFM_OVFL_CTRL_NOTIFY)) { | |
25669 | + /* | |
25670 | + * block on notify, not on masking | |
25671 | + */ | |
25672 | + if (ctx->flags.block) | |
25673 | + pfm_post_work(current, ctx, PFM_WORK_BLOCK); | |
25674 | + | |
25675 | + /* | |
25676 | + * send notification and passed original set id | |
25677 | + * if error, queue full, for instance, then default | |
25678 | + * to masking monitoring, i.e., saturate | |
25679 | + */ | |
25680 | + ret = pfm_ovfl_notify(ctx, set_orig, ip); | |
25681 | + if (unlikely(ret)) { | |
25682 | + if (ctx->state == PFM_CTX_LOADED) { | |
25683 | + pfm_mask_monitoring(ctx, set); | |
25684 | + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET; | |
25685 | + } | |
25686 | + } else { | |
25687 | + ctx->flags.can_restart++; | |
25688 | + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart); | |
25689 | + } | |
25690 | + } | |
25691 | + | |
25692 | + /* | |
25693 | + * reset overflowed registers | |
25694 | + */ | |
25695 | + if (ovfl_ctrl & PFM_OVFL_CTRL_RESET) { | |
25696 | + u16 nn; | |
25697 | + nn = bitmap_weight(cast_ulp(set->reset_pmds), max_pmd); | |
25698 | + if (nn) | |
25699 | + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_SHORT); | |
25700 | + } | |
25701 | + return; | |
25702 | + | |
25703 | +stop_monitoring: | |
25704 | + /* | |
25705 | + * Does not happen for a system-wide context nor for a | |
25706 | + * self-monitored context. We cannot attach to kernel-only | |
25707 | + * thread, thus it is safe to set TIF bits, i.e., the thread | |
25708 | + * will eventually leave the kernel or die and either we will | |
25709 | + * catch the context and clean it up in pfm_handler_work() or | |
25710 | + * pfm_exit_thread(). | |
25711 | + * | |
25712 | + * Mask until we get to pfm_handle_work() | |
25713 | + */ | |
25714 | + pfm_mask_monitoring(ctx, set); | |
25715 | + | |
25716 | + PFM_DBG_ovfl("ctx is zombie, converted to spurious"); | |
25717 | + pfm_post_work(current, ctx, PFM_WORK_ZOMBIE); | |
25718 | +} | |
25719 | + | |
25720 | +/** | |
25721 | + * __pfm_interrupt_handler - 1st level interrupt handler | |
25722 | + * @ip: interrupted instruction pointer | |
25723 | + * @regs: machine state | |
25724 | + * | |
25725 | + * Function is static because we use a wrapper to easily capture timing infos. | |
25726 | + * | |
25727 | + * | |
25728 | + * Context locking necessary to avoid concurrent accesses from other CPUs | |
25729 | + * - For per-thread, we must prevent pfm_restart() which works when | |
25730 | + * context is LOADED or MASKED | |
25731 | + */ | |
25732 | +static void __pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs) | |
25733 | +{ | |
25734 | + struct task_struct *task; | |
25735 | + struct pfm_context *ctx; | |
25736 | + struct pfm_event_set *set; | |
25737 | + | |
25738 | + | |
25739 | + task = __get_cpu_var(pmu_owner); | |
25740 | + ctx = __get_cpu_var(pmu_ctx); | |
25741 | + | |
25742 | + /* | |
25743 | + * verify if there is a context on this CPU | |
25744 | + */ | |
25745 | + if (unlikely(ctx == NULL)) { | |
25746 | + PFM_DBG_ovfl("no ctx"); | |
25747 | + goto spurious; | |
25748 | + } | |
25749 | + | |
25750 | + /* | |
25751 | + * we need to lock context because it could be accessed | |
25752 | + * from another CPU. Depending on the priority level of | |
25753 | + * the PMU interrupt or the arch, it may be necessary to | |
25754 | + * mask interrupts alltogether to avoid race condition with | |
25755 | + * the timer interrupt in case of time-based set switching, | |
25756 | + * for instance. | |
25757 | + */ | |
25758 | + spin_lock(&ctx->lock); | |
25759 | + | |
25760 | + set = ctx->active_set; | |
25761 | + | |
25762 | + /* | |
25763 | + * For SMP per-thread, it is not possible to have | |
25764 | + * owner != NULL && task != current. | |
25765 | + * | |
25766 | + * For UP per-thread, because of lazy save, it | |
25767 | + * is possible to receive an interrupt in another task | |
25768 | + * which is not using the PMU. This means | |
25769 | + * that the interrupt was in-flight at the | |
25770 | + * time of pfm_ctxswout_thread(). In that | |
25771 | + * case, it will be replayed when the task | |
25772 | + * is scheduled again. Hence we convert to spurious. | |
25773 | + * | |
25774 | + * The basic rule is that an overflow is always | |
25775 | + * processed in the context of the task that | |
25776 | + * generated it for all per-thread contexts. | |
25777 | + * | |
25778 | + * for system-wide, task is always NULL | |
25779 | + */ | |
25780 | +#ifndef CONFIG_SMP | |
25781 | + if (unlikely((task && current->pfm_context != ctx))) { | |
25782 | + PFM_DBG_ovfl("spurious: not owned by current task"); | |
25783 | + goto spurious; | |
25784 | + } | |
25785 | +#endif | |
25786 | + if (unlikely(ctx->state == PFM_CTX_MASKED)) { | |
25787 | + PFM_DBG_ovfl("spurious: monitoring masked"); | |
25788 | + goto spurious; | |
25789 | + } | |
25790 | + | |
25791 | + /* | |
25792 | + * check that monitoring is active, otherwise convert | |
25793 | + * to spurious | |
25794 | + */ | |
25795 | + if (unlikely(!pfm_arch_is_active(ctx))) { | |
25796 | + PFM_DBG_ovfl("spurious: monitoring non active"); | |
25797 | + goto spurious; | |
25798 | + } | |
25799 | + | |
25800 | + /* | |
25801 | + * freeze PMU and collect overflowed PMD registers | |
25802 | + * into set->povfl_pmds. Number of overflowed PMDs | |
25803 | + * reported in set->npend_ovfls | |
25804 | + */ | |
25805 | + pfm_arch_intr_freeze_pmu(ctx, set); | |
25806 | + | |
25807 | + /* | |
25808 | + * no overflow detected, interrupt may have come | |
25809 | + * from the previous thread running on this CPU | |
25810 | + */ | |
25811 | + if (unlikely(!set->npend_ovfls)) { | |
25812 | + PFM_DBG_ovfl("no npend_ovfls"); | |
25813 | + goto spurious; | |
25814 | + } | |
25815 | + | |
25816 | + pfm_stats_inc(ovfl_intr_regular_count); | |
25817 | + | |
25818 | + /* | |
25819 | + * invoke actual handler | |
25820 | + */ | |
25821 | + pfm_overflow_handler(ctx, set, ip, regs); | |
25822 | + | |
25823 | + /* | |
25824 | + * unfreeze PMU, monitoring may not actual be restarted | |
25825 | + * if context is MASKED | |
25826 | + */ | |
25827 | + pfm_arch_intr_unfreeze_pmu(ctx); | |
25828 | + | |
25829 | + spin_unlock(&ctx->lock); | |
25830 | + | |
25831 | + return; | |
25832 | + | |
25833 | +spurious: | |
25834 | + /* ctx may be NULL */ | |
25835 | + pfm_arch_intr_unfreeze_pmu(ctx); | |
25836 | + if (ctx) | |
25837 | + spin_unlock(&ctx->lock); | |
25838 | + | |
25839 | + pfm_stats_inc(ovfl_intr_spurious_count); | |
25840 | +} | |
25841 | + | |
25842 | + | |
25843 | +/** | |
25844 | + * pfm_interrupt_handler - 1st level interrupt handler | |
25845 | + * @ip: interrupt instruction pointer | |
25846 | + * @regs: machine state | |
25847 | + * | |
25848 | + * Function called from the low-level assembly code or arch-specific perfmon | |
25849 | + * code. Simple wrapper used for timing purpose. Actual work done in | |
25850 | + * __pfm_overflow_handler() | |
25851 | + */ | |
25852 | +void pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs) | |
25853 | +{ | |
25854 | + u64 start; | |
25855 | + | |
25856 | + pfm_stats_inc(ovfl_intr_all_count); | |
25857 | + | |
25858 | + BUG_ON(!irqs_disabled()); | |
25859 | + | |
25860 | + start = sched_clock(); | |
25861 | + | |
25862 | + __pfm_interrupt_handler(ip, regs); | |
25863 | + | |
25864 | + pfm_stats_add(ovfl_intr_ns, sched_clock() - start); | |
25865 | +} | |
25866 | +EXPORT_SYMBOL(pfm_interrupt_handler); | |
25867 | + | |
25868 | --- /dev/null | |
25869 | +++ b/perfmon/perfmon_msg.c | |
25870 | @@ -0,0 +1,229 @@ | |
25871 | +/* | |
25872 | + * perfmon_msg.c: perfmon2 notification message queue management | |
25873 | + * | |
25874 | + * This file implements the perfmon2 interface which | |
25875 | + * provides access to the hardware performance counters | |
25876 | + * of the host processor. | |
25877 | + * | |
25878 | + * The initial version of perfmon.c was written by | |
25879 | + * Ganesh Venkitachalam, IBM Corp. | |
25880 | + * | |
25881 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
25882 | + * David Mosberger, Hewlett Packard Co. | |
25883 | + * | |
25884 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
25885 | + * by Stephane Eranian, Hewlett Packard Co. | |
25886 | + * | |
25887 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
25888 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
25889 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
25890 | + * | |
25891 | + * More information about perfmon available at: | |
25892 | + * http://perfmon2.sf.net | |
25893 | + * | |
25894 | + * This program is free software; you can redistribute it and/or | |
25895 | + * modify it under the terms of version 2 of the GNU General Public | |
25896 | + * License as published by the Free Software Foundation. | |
25897 | + * | |
25898 | + * This program is distributed in the hope that it will be useful, | |
25899 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25900 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
25901 | + * General Public License for more details. | |
25902 | + * | |
25903 | + * You should have received a copy of the GNU General Public License | |
25904 | + * along with this program; if not, write to the Free Software | |
25905 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
25906 | + * 02111-1307 USA | |
25907 | + */ | |
25908 | +#include <linux/kernel.h> | |
25909 | +#include <linux/poll.h> | |
25910 | +#include <linux/perfmon_kern.h> | |
25911 | + | |
25912 | +/** | |
25913 | + * pfm_get_new_msg - get a new message slot from the queue | |
25914 | + * @ctx: context to operate on | |
25915 | + * | |
25916 | + * if queue if full NULL is returned | |
25917 | + */ | |
25918 | +static union pfarg_msg *pfm_get_new_msg(struct pfm_context *ctx) | |
25919 | +{ | |
25920 | + int next; | |
25921 | + | |
25922 | + next = ctx->msgq_head & PFM_MSGQ_MASK; | |
25923 | + | |
25924 | + if ((ctx->msgq_head - ctx->msgq_tail) == PFM_MSGS_COUNT) | |
25925 | + return NULL; | |
25926 | + | |
25927 | + /* | |
25928 | + * move to next possible slot | |
25929 | + */ | |
25930 | + ctx->msgq_head++; | |
25931 | + | |
25932 | + PFM_DBG_ovfl("head=%d tail=%d msg=%d", | |
25933 | + ctx->msgq_head & PFM_MSGQ_MASK, | |
25934 | + ctx->msgq_tail & PFM_MSGQ_MASK, | |
25935 | + next); | |
25936 | + | |
25937 | + return ctx->msgq+next; | |
25938 | +} | |
25939 | + | |
25940 | +/** | |
25941 | + * pfm_notify_user - wakeup any thread wiating on msg queue, post SIGIO | |
25942 | + * @ctx: context to operate on | |
25943 | + * | |
25944 | + * message is already enqueued | |
25945 | + */ | |
25946 | +static void pfm_notify_user(struct pfm_context *ctx) | |
25947 | +{ | |
25948 | + if (ctx->state == PFM_CTX_ZOMBIE) { | |
25949 | + PFM_DBG("no notification, context is zombie"); | |
25950 | + return; | |
25951 | + } | |
25952 | + | |
25953 | + PFM_DBG_ovfl("waking up"); | |
25954 | + | |
25955 | + wake_up_interruptible(&ctx->msgq_wait); | |
25956 | + | |
25957 | + /* | |
25958 | + * it is safe to call kill_fasync() from an interrupt | |
25959 | + * handler. kill_fasync() grabs two RW locks (fasync_lock, | |
25960 | + * tasklist_lock) in read mode. There is conflict only in | |
25961 | + * case the PMU interrupt occurs during a write mode critical | |
25962 | + * section. This cannot happen because for both locks, the | |
25963 | + * write mode is always using interrupt masking (write_lock_irq). | |
25964 | + */ | |
25965 | + kill_fasync(&ctx->async_queue, SIGIO, POLL_IN); | |
25966 | +} | |
25967 | + | |
25968 | +/** | |
25969 | + * pfm_ovfl_notify - send overflow notification | |
25970 | + * @ctx: context to operate on | |
25971 | + * @set: which set the overflow comes from | |
25972 | + * @ip: overflow interrupt instruction address (IIP) | |
25973 | + * | |
25974 | + * Appends an overflow notification message to context queue. | |
25975 | + * call pfm_notify() to wakeup any threads and/or send a signal | |
25976 | + * | |
25977 | + * Context is locked and interrupts are disabled (no preemption). | |
25978 | + */ | |
25979 | +int pfm_ovfl_notify(struct pfm_context *ctx, | |
25980 | + struct pfm_event_set *set, | |
25981 | + unsigned long ip) | |
25982 | +{ | |
25983 | + union pfarg_msg *msg = NULL; | |
25984 | + u64 *ovfl_pmds; | |
25985 | + | |
25986 | + if (!ctx->flags.no_msg) { | |
25987 | + msg = pfm_get_new_msg(ctx); | |
25988 | + if (msg == NULL) { | |
25989 | + /* | |
25990 | + * when message queue fills up it is because the user | |
25991 | + * did not extract the message, yet issued | |
25992 | + * pfm_restart(). At this point, we stop sending | |
25993 | + * notification, thus the user will not be able to get | |
25994 | + * new samples when using the default format. | |
25995 | + */ | |
25996 | + PFM_DBG_ovfl("no more notification msgs"); | |
25997 | + return -1; | |
25998 | + } | |
25999 | + | |
26000 | + msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; | |
26001 | + msg->pfm_ovfl_msg.msg_ovfl_pid = current->pid; | |
26002 | + msg->pfm_ovfl_msg.msg_active_set = set->id; | |
26003 | + | |
26004 | + ovfl_pmds = msg->pfm_ovfl_msg.msg_ovfl_pmds; | |
26005 | + | |
26006 | + /* | |
26007 | + * copy bitmask of all pmd that interrupted last | |
26008 | + */ | |
26009 | + bitmap_copy(cast_ulp(ovfl_pmds), cast_ulp(set->ovfl_pmds), | |
26010 | + ctx->regs.max_intr_pmd); | |
26011 | + | |
26012 | + msg->pfm_ovfl_msg.msg_ovfl_cpu = smp_processor_id(); | |
26013 | + msg->pfm_ovfl_msg.msg_ovfl_tid = current->tgid; | |
26014 | + msg->pfm_ovfl_msg.msg_ovfl_ip = ip; | |
26015 | + | |
26016 | + pfm_stats_inc(ovfl_notify_count); | |
26017 | + } | |
26018 | + | |
26019 | + PFM_DBG_ovfl("ip=0x%lx o_pmds=0x%llx", | |
26020 | + ip, | |
26021 | + (unsigned long long)set->ovfl_pmds[0]); | |
26022 | + | |
26023 | + pfm_notify_user(ctx); | |
26024 | + return 0; | |
26025 | +} | |
26026 | + | |
26027 | +/** | |
26028 | + * pfm_end_notify_user - notify of thread termination | |
26029 | + * @ctx: context to operate on | |
26030 | + * | |
26031 | + * In per-thread mode, when not self-monitoring, perfmon | |
26032 | + * sends a 'end' notification message when the monitored | |
26033 | + * thread where the context is attached is exiting. | |
26034 | + * | |
26035 | + * This helper message alleviates the need to track the activity | |
26036 | + * of the thread/process when it is not directly related, i.e., | |
26037 | + * was attached. In other words, no needto keep the thread | |
26038 | + * ptraced. | |
26039 | + * | |
26040 | + * The context must be locked and interrupts disabled. | |
26041 | + */ | |
26042 | +int pfm_end_notify(struct pfm_context *ctx) | |
26043 | +{ | |
26044 | + union pfarg_msg *msg; | |
26045 | + | |
26046 | + msg = pfm_get_new_msg(ctx); | |
26047 | + if (msg == NULL) { | |
26048 | + PFM_ERR("%s no more msgs", __func__); | |
26049 | + return -1; | |
26050 | + } | |
26051 | + /* no leak */ | |
26052 | + memset(msg, 0, sizeof(*msg)); | |
26053 | + | |
26054 | + msg->type = PFM_MSG_END; | |
26055 | + | |
26056 | + PFM_DBG("end msg: msg=%p no_msg=%d", | |
26057 | + msg, | |
26058 | + ctx->flags.no_msg); | |
26059 | + | |
26060 | + pfm_notify_user(ctx); | |
26061 | + return 0; | |
26062 | +} | |
26063 | + | |
26064 | +/** | |
26065 | + * pfm_get_next_msg - copy the oldest message from the queue and move tail | |
26066 | + * @ctx: context to use | |
26067 | + * @m: where to copy the message into | |
26068 | + * | |
26069 | + * The tail of the queue is moved as a consequence of this call | |
26070 | + */ | |
26071 | +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m) | |
26072 | +{ | |
26073 | + union pfarg_msg *next; | |
26074 | + | |
26075 | + PFM_DBG_ovfl("in head=%d tail=%d", | |
26076 | + ctx->msgq_head & PFM_MSGQ_MASK, | |
26077 | + ctx->msgq_tail & PFM_MSGQ_MASK); | |
26078 | + | |
26079 | + /* | |
26080 | + * get oldest message | |
26081 | + */ | |
26082 | + next = ctx->msgq + (ctx->msgq_tail & PFM_MSGQ_MASK); | |
26083 | + | |
26084 | + /* | |
26085 | + * move tail forward | |
26086 | + */ | |
26087 | + ctx->msgq_tail++; | |
26088 | + | |
26089 | + /* | |
26090 | + * copy message, we cannot simply point to it | |
26091 | + * as it may be re-used before we copy it out | |
26092 | + */ | |
26093 | + *m = *next; | |
26094 | + | |
26095 | + PFM_DBG_ovfl("out head=%d tail=%d type=%d", | |
26096 | + ctx->msgq_head & PFM_MSGQ_MASK, | |
26097 | + ctx->msgq_tail & PFM_MSGQ_MASK, | |
26098 | + m->type); | |
26099 | +} | |
26100 | --- /dev/null | |
26101 | +++ b/perfmon/perfmon_pmu.c | |
26102 | @@ -0,0 +1,590 @@ | |
26103 | +/* | |
26104 | + * perfmon_pmu.c: perfmon2 PMU configuration management | |
26105 | + * | |
26106 | + * This file implements the perfmon2 interface which | |
26107 | + * provides access to the hardware performance counters | |
26108 | + * of the host processor. | |
26109 | + * | |
26110 | + * The initial version of perfmon.c was written by | |
26111 | + * Ganesh Venkitachalam, IBM Corp. | |
26112 | + * | |
26113 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
26114 | + * David Mosberger, Hewlett Packard Co. | |
26115 | + * | |
26116 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
26117 | + * by Stephane Eranian, Hewlett Packard Co. | |
26118 | + * | |
26119 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
26120 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
26121 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
26122 | + * | |
26123 | + * More information about perfmon available at: | |
26124 | + * http://perfmon2.sf.net | |
26125 | + * | |
26126 | + * This program is free software; you can redistribute it and/or | |
26127 | + * modify it under the terms of version 2 of the GNU General Public | |
26128 | + * License as published by the Free Software Foundation. | |
26129 | + * | |
26130 | + * This program is distributed in the hope that it will be useful, | |
26131 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26132 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
26133 | + * General Public License for more details. | |
26134 | + * | |
26135 | + * You should have received a copy of the GNU General Public License | |
26136 | + * along with this program; if not, write to the Free Software | |
26137 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
26138 | + * 02111-1307 USA | |
26139 | + */ | |
26140 | +#include <linux/module.h> | |
26141 | +#include <linux/perfmon_kern.h> | |
26142 | +#include "perfmon_priv.h" | |
26143 | + | |
26144 | +#ifndef CONFIG_MODULE_UNLOAD | |
26145 | +#define module_refcount(n) 1 | |
26146 | +#endif | |
26147 | + | |
26148 | +static __cacheline_aligned_in_smp int request_mod_in_progress; | |
26149 | +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_conf_lock); | |
26150 | + | |
26151 | +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_acq_lock); | |
26152 | +static u32 pfm_pmu_acquired; | |
26153 | + | |
26154 | +/* | |
26155 | + * perfmon core must acces PMU information ONLY through pfm_pmu_conf | |
26156 | + * if pfm_pmu_conf is NULL, then no description is registered | |
26157 | + */ | |
26158 | +struct pfm_pmu_config *pfm_pmu_conf; | |
26159 | +EXPORT_SYMBOL(pfm_pmu_conf); | |
26160 | + | |
26161 | +static inline int pmu_is_module(struct pfm_pmu_config *c) | |
26162 | +{ | |
26163 | + return !(c->flags & PFM_PMUFL_IS_BUILTIN); | |
26164 | +} | |
26165 | +/** | |
26166 | + * pfm_pmu_regdesc_init -- initialize regdesc structure from PMU table | |
26167 | + * @regs: the regdesc structure to initialize | |
26168 | + * @excl_type: the register type(s) to exclude from this regdesc | |
26169 | + * @unvail_pmcs: unavailable PMC registers | |
26170 | + * @unavail_pmds: unavailable PMD registers | |
26171 | + * | |
26172 | + * Return: | |
26173 | + * 0 success | |
26174 | + * errno in case of error | |
26175 | + */ | |
26176 | +static int pfm_pmu_regdesc_init(struct pfm_regdesc *regs, int excl_type, | |
26177 | + u64 *unavail_pmcs, u64 *unavail_pmds) | |
26178 | +{ | |
26179 | + struct pfm_regmap_desc *d; | |
26180 | + u16 n, n2, n_counters, i; | |
26181 | + int first_intr_pmd = -1, max1, max2, max3; | |
26182 | + | |
26183 | + /* | |
26184 | + * compute the number of implemented PMC from the | |
26185 | + * description table | |
26186 | + */ | |
26187 | + n = 0; | |
26188 | + max1 = max2 = -1; | |
26189 | + d = pfm_pmu_conf->pmc_desc; | |
26190 | + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { | |
26191 | + if (!(d->type & PFM_REG_I)) | |
26192 | + continue; | |
26193 | + | |
26194 | + if (test_bit(i, cast_ulp(unavail_pmcs))) | |
26195 | + continue; | |
26196 | + | |
26197 | + if (d->type & excl_type) | |
26198 | + continue; | |
26199 | + | |
26200 | + __set_bit(i, cast_ulp(regs->pmcs)); | |
26201 | + | |
26202 | + max1 = i; | |
26203 | + n++; | |
26204 | + } | |
26205 | + | |
26206 | + if (!n) { | |
26207 | + PFM_INFO("%s PMU description has no PMC registers", | |
26208 | + pfm_pmu_conf->pmu_name); | |
26209 | + return -EINVAL; | |
26210 | + } | |
26211 | + | |
26212 | + regs->max_pmc = max1 + 1; | |
26213 | + regs->num_pmcs = n; | |
26214 | + | |
26215 | + n = n_counters = n2 = 0; | |
26216 | + max1 = max2 = max3 = -1; | |
26217 | + d = pfm_pmu_conf->pmd_desc; | |
26218 | + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) { | |
26219 | + if (!(d->type & PFM_REG_I)) | |
26220 | + continue; | |
26221 | + | |
26222 | + if (test_bit(i, cast_ulp(unavail_pmds))) | |
26223 | + continue; | |
26224 | + | |
26225 | + if (d->type & excl_type) | |
26226 | + continue; | |
26227 | + | |
26228 | + __set_bit(i, cast_ulp(regs->pmds)); | |
26229 | + max1 = i; | |
26230 | + n++; | |
26231 | + | |
26232 | + /* | |
26233 | + * read-write registers | |
26234 | + */ | |
26235 | + if (!(d->type & PFM_REG_RO)) { | |
26236 | + __set_bit(i, cast_ulp(regs->rw_pmds)); | |
26237 | + max3 = i; | |
26238 | + n2++; | |
26239 | + } | |
26240 | + | |
26241 | + /* | |
26242 | + * counter registers | |
26243 | + */ | |
26244 | + if (d->type & PFM_REG_C64) { | |
26245 | + __set_bit(i, cast_ulp(regs->cnt_pmds)); | |
26246 | + n_counters++; | |
26247 | + } | |
26248 | + | |
26249 | + /* | |
26250 | + * PMD with intr capabilities | |
26251 | + */ | |
26252 | + if (d->type & PFM_REG_INTR) { | |
26253 | + __set_bit(i, cast_ulp(regs->intr_pmds)); | |
26254 | + if (first_intr_pmd == -1) | |
26255 | + first_intr_pmd = i; | |
26256 | + max2 = i; | |
26257 | + } | |
26258 | + } | |
26259 | + | |
26260 | + if (!n) { | |
26261 | + PFM_INFO("%s PMU description has no PMD registers", | |
26262 | + pfm_pmu_conf->pmu_name); | |
26263 | + return -EINVAL; | |
26264 | + } | |
26265 | + | |
26266 | + regs->max_pmd = max1 + 1; | |
26267 | + regs->first_intr_pmd = first_intr_pmd; | |
26268 | + regs->max_intr_pmd = max2 + 1; | |
26269 | + | |
26270 | + regs->num_counters = n_counters; | |
26271 | + regs->num_pmds = n; | |
26272 | + regs->max_rw_pmd = max3 + 1; | |
26273 | + regs->num_rw_pmd = n2; | |
26274 | + | |
26275 | + return 0; | |
26276 | +} | |
26277 | + | |
26278 | +/** | |
26279 | + * pfm_pmu_regdesc_init_all -- initialize all regdesc structures | |
26280 | + * @una_pmcs : unavailable PMC registers | |
26281 | + * @una_pmds : unavailable PMD registers | |
26282 | + * | |
26283 | + * Return: | |
26284 | + * 0 sucess | |
26285 | + * errno if error | |
26286 | + * | |
26287 | + * We maintain 3 regdesc: | |
26288 | + * regs_all: all available registers | |
26289 | + * regs_sys: registers available to system-wide contexts only | |
26290 | + * regs_thr: registers available to per-thread contexts only | |
26291 | + */ | |
26292 | +static int pfm_pmu_regdesc_init_all(u64 *una_pmcs, u64 *una_pmds) | |
26293 | +{ | |
26294 | + int ret; | |
26295 | + | |
26296 | + memset(&pfm_pmu_conf->regs_all, 0, sizeof(struct pfm_regdesc)); | |
26297 | + memset(&pfm_pmu_conf->regs_thr, 0, sizeof(struct pfm_regdesc)); | |
26298 | + memset(&pfm_pmu_conf->regs_sys, 0, sizeof(struct pfm_regdesc)); | |
26299 | + | |
26300 | + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_all, | |
26301 | + 0, | |
26302 | + una_pmcs, una_pmds); | |
26303 | + if (ret) | |
26304 | + return ret; | |
26305 | + | |
26306 | + PFM_DBG("regs_all.pmcs=0x%llx", | |
26307 | + (unsigned long long)pfm_pmu_conf->regs_all.pmcs[0]); | |
26308 | + | |
26309 | + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_thr, | |
26310 | + PFM_REG_SYS, | |
26311 | + una_pmcs, una_pmds); | |
26312 | + if (ret) | |
26313 | + return ret; | |
26314 | + PFM_DBG("regs.thr.pmcs=0x%llx", | |
26315 | + (unsigned long long)pfm_pmu_conf->regs_thr.pmcs[0]); | |
26316 | + | |
26317 | + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_sys, | |
26318 | + PFM_REG_THR, | |
26319 | + una_pmcs, una_pmds); | |
26320 | + | |
26321 | + PFM_DBG("regs_sys.pmcs=0x%llx", | |
26322 | + (unsigned long long)pfm_pmu_conf->regs_sys.pmcs[0]); | |
26323 | + | |
26324 | + return ret; | |
26325 | +} | |
26326 | + | |
26327 | +int pfm_pmu_register(struct pfm_pmu_config *cfg) | |
26328 | +{ | |
26329 | + u16 i, nspec, nspec_ro, num_pmcs, num_pmds, num_wc = 0; | |
26330 | + int type, ret = -EBUSY; | |
26331 | + | |
26332 | + if (perfmon_disabled) { | |
26333 | + PFM_INFO("perfmon disabled, cannot add PMU description"); | |
26334 | + return -ENOSYS; | |
26335 | + } | |
26336 | + | |
26337 | + nspec = nspec_ro = num_pmds = num_pmcs = 0; | |
26338 | + | |
26339 | + /* some sanity checks */ | |
26340 | + if (cfg == NULL || cfg->pmu_name == NULL) { | |
26341 | + PFM_INFO("PMU config descriptor is invalid"); | |
26342 | + return -EINVAL; | |
26343 | + } | |
26344 | + | |
26345 | + /* must have a probe */ | |
26346 | + if (cfg->probe_pmu == NULL) { | |
26347 | + PFM_INFO("PMU config has no probe routine"); | |
26348 | + return -EINVAL; | |
26349 | + } | |
26350 | + | |
26351 | + /* | |
26352 | + * execute probe routine before anything else as it | |
26353 | + * may update configuration tables | |
26354 | + */ | |
26355 | + if ((*cfg->probe_pmu)() == -1) { | |
26356 | + PFM_INFO("%s PMU detection failed", cfg->pmu_name); | |
26357 | + return -EINVAL; | |
26358 | + } | |
26359 | + | |
26360 | + if (!(cfg->flags & PFM_PMUFL_IS_BUILTIN) && cfg->owner == NULL) { | |
26361 | + PFM_INFO("PMU config %s is missing owner", cfg->pmu_name); | |
26362 | + return -EINVAL; | |
26363 | + } | |
26364 | + | |
26365 | + if (!cfg->num_pmd_entries) { | |
26366 | + PFM_INFO("%s needs to define num_pmd_entries", cfg->pmu_name); | |
26367 | + return -EINVAL; | |
26368 | + } | |
26369 | + | |
26370 | + if (!cfg->num_pmc_entries) { | |
26371 | + PFM_INFO("%s needs to define num_pmc_entries", cfg->pmu_name); | |
26372 | + return -EINVAL; | |
26373 | + } | |
26374 | + | |
26375 | + if (!cfg->counter_width) { | |
26376 | + PFM_INFO("PMU config %s, zero width counters", cfg->pmu_name); | |
26377 | + return -EINVAL; | |
26378 | + } | |
26379 | + | |
26380 | + /* | |
26381 | + * REG_RO, REG_V not supported on PMC registers | |
26382 | + */ | |
26383 | + for (i = 0; i < cfg->num_pmc_entries; i++) { | |
26384 | + | |
26385 | + type = cfg->pmc_desc[i].type; | |
26386 | + | |
26387 | + if (type & PFM_REG_I) | |
26388 | + num_pmcs++; | |
26389 | + | |
26390 | + if (type & PFM_REG_WC) | |
26391 | + num_wc++; | |
26392 | + | |
26393 | + if (type & PFM_REG_V) { | |
26394 | + PFM_INFO("PFM_REG_V is not supported on " | |
26395 | + "PMCs (PMC%d)", i); | |
26396 | + return -EINVAL; | |
26397 | + } | |
26398 | + if (type & PFM_REG_RO) { | |
26399 | + PFM_INFO("PFM_REG_RO meaningless on " | |
26400 | + "PMCs (PMC%u)", i); | |
26401 | + return -EINVAL; | |
26402 | + } | |
26403 | + } | |
26404 | + | |
26405 | + if (num_wc && cfg->pmc_write_check == NULL) { | |
26406 | + PFM_INFO("some PMCs have write-checker but no callback provided\n"); | |
26407 | + return -EINVAL; | |
26408 | + } | |
26409 | + | |
26410 | + /* | |
26411 | + * check virtual PMD registers | |
26412 | + */ | |
26413 | + num_wc = 0; | |
26414 | + for (i = 0; i < cfg->num_pmd_entries; i++) { | |
26415 | + | |
26416 | + type = cfg->pmd_desc[i].type; | |
26417 | + | |
26418 | + if (type & PFM_REG_I) | |
26419 | + num_pmds++; | |
26420 | + | |
26421 | + if (type & PFM_REG_V) { | |
26422 | + nspec++; | |
26423 | + if (type & PFM_REG_RO) | |
26424 | + nspec_ro++; | |
26425 | + } | |
26426 | + | |
26427 | + if (type & PFM_REG_WC) | |
26428 | + num_wc++; | |
26429 | + } | |
26430 | + | |
26431 | + if (num_wc && cfg->pmd_write_check == NULL) { | |
26432 | + PFM_INFO("PMD have write-checker but no callback provided\n"); | |
26433 | + return -EINVAL; | |
26434 | + } | |
26435 | + | |
26436 | + if (nspec && cfg->pmd_sread == NULL) { | |
26437 | + PFM_INFO("PMU config is missing pmd_sread()"); | |
26438 | + return -EINVAL; | |
26439 | + } | |
26440 | + | |
26441 | + nspec = nspec - nspec_ro; | |
26442 | + if (nspec && cfg->pmd_swrite == NULL) { | |
26443 | + PFM_INFO("PMU config is missing pmd_swrite()"); | |
26444 | + return -EINVAL; | |
26445 | + } | |
26446 | + | |
26447 | + if (num_pmcs >= PFM_MAX_PMCS) { | |
26448 | + PFM_INFO("%s PMCS registers exceed name space [0-%u]", | |
26449 | + cfg->pmu_name, | |
26450 | + PFM_MAX_PMCS); | |
26451 | + return -EINVAL; | |
26452 | + } | |
26453 | + if (num_pmds >= PFM_MAX_PMDS) { | |
26454 | + PFM_INFO("%s PMDS registers exceed name space [0-%u]", | |
26455 | + cfg->pmu_name, | |
26456 | + PFM_MAX_PMDS); | |
26457 | + return -EINVAL; | |
26458 | + } | |
26459 | + spin_lock(&pfm_pmu_conf_lock); | |
26460 | + | |
26461 | + if (pfm_pmu_conf) | |
26462 | + goto unlock; | |
26463 | + | |
26464 | + if (!cfg->version) | |
26465 | + cfg->version = "0.0"; | |
26466 | + | |
26467 | + pfm_pmu_conf = cfg; | |
26468 | + pfm_pmu_conf->ovfl_mask = (1ULL << cfg->counter_width) - 1; | |
26469 | + | |
26470 | + ret = pfm_arch_pmu_config_init(cfg); | |
26471 | + if (ret) | |
26472 | + goto unlock; | |
26473 | + | |
26474 | + ret = pfm_sysfs_add_pmu(pfm_pmu_conf); | |
26475 | + if (ret) | |
26476 | + pfm_pmu_conf = NULL; | |
26477 | + | |
26478 | +unlock: | |
26479 | + spin_unlock(&pfm_pmu_conf_lock); | |
26480 | + | |
26481 | + if (ret) { | |
26482 | + PFM_INFO("register %s PMU error %d", cfg->pmu_name, ret); | |
26483 | + } else { | |
26484 | + PFM_INFO("%s PMU installed", cfg->pmu_name); | |
26485 | + /* | |
26486 | + * (re)initialize PMU on each PMU now that we have a description | |
26487 | + */ | |
26488 | + on_each_cpu(__pfm_init_percpu, cfg, 0); | |
26489 | + } | |
26490 | + return ret; | |
26491 | +} | |
26492 | +EXPORT_SYMBOL(pfm_pmu_register); | |
26493 | + | |
26494 | +/* | |
26495 | + * remove PMU description. Caller must pass address of current | |
26496 | + * configuration. This is mostly for sanity checking as only | |
26497 | + * one config can exist at any time. | |
26498 | + * | |
26499 | + * We are using the module refcount mechanism to protect against | |
26500 | + * removal while the configuration is being used. As long as there is | |
26501 | + * one context, a PMU configuration cannot be removed. The protection is | |
26502 | + * managed in module logic. | |
26503 | + */ | |
26504 | +void pfm_pmu_unregister(struct pfm_pmu_config *cfg) | |
26505 | +{ | |
26506 | + if (!(cfg || pfm_pmu_conf)) | |
26507 | + return; | |
26508 | + | |
26509 | + spin_lock(&pfm_pmu_conf_lock); | |
26510 | + | |
26511 | + BUG_ON(module_refcount(pfm_pmu_conf->owner)); | |
26512 | + | |
26513 | + if (cfg->owner == pfm_pmu_conf->owner) { | |
26514 | + pfm_sysfs_remove_pmu(pfm_pmu_conf); | |
26515 | + pfm_pmu_conf = NULL; | |
26516 | + } | |
26517 | + | |
26518 | + spin_unlock(&pfm_pmu_conf_lock); | |
26519 | +} | |
26520 | +EXPORT_SYMBOL(pfm_pmu_unregister); | |
26521 | + | |
26522 | +static int pfm_pmu_request_module(void) | |
26523 | +{ | |
26524 | + char *mod_name; | |
26525 | + int ret; | |
26526 | + | |
26527 | + mod_name = pfm_arch_get_pmu_module_name(); | |
26528 | + if (mod_name == NULL) | |
26529 | + return -ENOSYS; | |
26530 | + | |
26531 | + ret = request_module(mod_name); | |
26532 | + | |
26533 | + PFM_DBG("mod=%s ret=%d\n", mod_name, ret); | |
26534 | + return ret; | |
26535 | +} | |
26536 | + | |
26537 | +/* | |
26538 | + * autoload: | |
26539 | + * 0 : do not try to autoload the PMU description module | |
26540 | + * not 0 : try to autoload the PMU description module | |
26541 | + */ | |
26542 | +int pfm_pmu_conf_get(int autoload) | |
26543 | +{ | |
26544 | + int ret; | |
26545 | + | |
26546 | + spin_lock(&pfm_pmu_conf_lock); | |
26547 | + | |
26548 | + if (request_mod_in_progress) { | |
26549 | + ret = -ENOSYS; | |
26550 | + goto skip; | |
26551 | + } | |
26552 | + | |
26553 | + if (autoload && pfm_pmu_conf == NULL) { | |
26554 | + | |
26555 | + request_mod_in_progress = 1; | |
26556 | + | |
26557 | + spin_unlock(&pfm_pmu_conf_lock); | |
26558 | + | |
26559 | + pfm_pmu_request_module(); | |
26560 | + | |
26561 | + spin_lock(&pfm_pmu_conf_lock); | |
26562 | + | |
26563 | + request_mod_in_progress = 0; | |
26564 | + | |
26565 | + /* | |
26566 | + * request_module() may succeed but the module | |
26567 | + * may not have registered properly so we need | |
26568 | + * to check | |
26569 | + */ | |
26570 | + } | |
26571 | + | |
26572 | + ret = pfm_pmu_conf == NULL ? -ENOSYS : 0; | |
26573 | + if (!ret && pmu_is_module(pfm_pmu_conf) | |
26574 | + && !try_module_get(pfm_pmu_conf->owner)) | |
26575 | + ret = -ENOSYS; | |
26576 | + | |
26577 | +skip: | |
26578 | + spin_unlock(&pfm_pmu_conf_lock); | |
26579 | + | |
26580 | + return ret; | |
26581 | +} | |
26582 | + | |
26583 | +void pfm_pmu_conf_put(void) | |
26584 | +{ | |
26585 | + if (pfm_pmu_conf == NULL || !pmu_is_module(pfm_pmu_conf)) | |
26586 | + return; | |
26587 | + | |
26588 | + spin_lock(&pfm_pmu_conf_lock); | |
26589 | + module_put(pfm_pmu_conf->owner); | |
26590 | + spin_unlock(&pfm_pmu_conf_lock); | |
26591 | +} | |
26592 | + | |
26593 | + | |
26594 | +/* | |
26595 | + * acquire PMU resource from lower-level PMU register allocator | |
26596 | + * (currently perfctr-watchdog.c) | |
26597 | + * | |
26598 | + * acquisition is done when the first context is created (and not | |
26599 | + * when it is loaded). We grab all that is defined in the description | |
26600 | + * module and then we make adjustments at the arch-specific level. | |
26601 | + * | |
26602 | + * The PMU resource is released when the last perfmon context is | |
26603 | + * destroyed. | |
26604 | + * | |
26605 | + * interrupts are not masked | |
26606 | + */ | |
26607 | +int pfm_pmu_acquire(struct pfm_context *ctx) | |
26608 | +{ | |
26609 | + u64 unavail_pmcs[PFM_PMC_BV]; | |
26610 | + u64 unavail_pmds[PFM_PMD_BV]; | |
26611 | + int ret = 0; | |
26612 | + | |
26613 | + spin_lock(&pfm_pmu_acq_lock); | |
26614 | + | |
26615 | + PFM_DBG("pmu_acquired=%u", pfm_pmu_acquired); | |
26616 | + | |
26617 | + pfm_pmu_acquired++; | |
26618 | + | |
26619 | + /* | |
26620 | + * we need to initialize regdesc each time we re-acquire | |
26621 | + * the PMU for the first time as there may have been changes | |
26622 | + * in the list of available registers, e.g., NMI may have | |
26623 | + * been disabled. Checking on PMU module insert is not | |
26624 | + * enough | |
26625 | + */ | |
26626 | + if (pfm_pmu_acquired == 1) { | |
26627 | + memset(unavail_pmcs, 0, sizeof(unavail_pmcs)); | |
26628 | + memset(unavail_pmds, 0, sizeof(unavail_pmds)); | |
26629 | + | |
26630 | + ret = pfm_arch_pmu_acquire(unavail_pmcs, unavail_pmds); | |
26631 | + if (ret) { | |
26632 | + pfm_pmu_acquired--; | |
26633 | + } else { | |
26634 | + pfm_pmu_regdesc_init_all(unavail_pmcs, unavail_pmds); | |
26635 | + | |
26636 | + /* available PMU ressources */ | |
26637 | + PFM_DBG("PMU acquired: %u PMCs, %u PMDs, %u counters", | |
26638 | + pfm_pmu_conf->regs_all.num_pmcs, | |
26639 | + pfm_pmu_conf->regs_all.num_pmds, | |
26640 | + pfm_pmu_conf->regs_all.num_counters); | |
26641 | + } | |
26642 | + } | |
26643 | + spin_unlock(&pfm_pmu_acq_lock); | |
26644 | + | |
26645 | + /* | |
26646 | + * copy the regdesc that corresponds to the context | |
26647 | + * we copy and not just point because it helps with | |
26648 | + * memory locality. the regdesc structure is accessed | |
26649 | + * very frequently in performance critical code such | |
26650 | + * as context switch and interrupt handling. By using | |
26651 | + * a local copy, we increase memory footprint, but | |
26652 | + * increase chance to have local memory access, | |
26653 | + * especially for system-wide contexts. | |
26654 | + */ | |
26655 | + if (ctx->flags.system) | |
26656 | + ctx->regs = pfm_pmu_conf->regs_sys; | |
26657 | + else | |
26658 | + ctx->regs = pfm_pmu_conf->regs_thr; | |
26659 | + | |
26660 | + return ret; | |
26661 | +} | |
26662 | + | |
26663 | +/* | |
26664 | + * release the PMU resource | |
26665 | + * | |
26666 | + * actual release happens when last context is destroyed | |
26667 | + * | |
26668 | + * interrupts are not masked | |
26669 | + */ | |
26670 | +void pfm_pmu_release(void) | |
26671 | +{ | |
26672 | + BUG_ON(irqs_disabled()); | |
26673 | + | |
26674 | + /* | |
26675 | + * we need to use a spinlock because release takes some time | |
26676 | + * and we may have a race with pfm_pmu_acquire() | |
26677 | + */ | |
26678 | + spin_lock(&pfm_pmu_acq_lock); | |
26679 | + | |
26680 | + PFM_DBG("pmu_acquired=%d", pfm_pmu_acquired); | |
26681 | + | |
26682 | + /* | |
26683 | + * we decouple test and decrement because if we had errors | |
26684 | + * in pfm_pmu_acquire(), we still come here on pfm_context_free() | |
26685 | + * but with pfm_pmu_acquire=0 | |
26686 | + */ | |
26687 | + if (pfm_pmu_acquired > 0 && --pfm_pmu_acquired == 0) { | |
26688 | + pfm_arch_pmu_release(); | |
26689 | + PFM_DBG("PMU released"); | |
26690 | + } | |
26691 | + spin_unlock(&pfm_pmu_acq_lock); | |
26692 | +} | |
26693 | --- /dev/null | |
26694 | +++ b/perfmon/perfmon_priv.h | |
26695 | @@ -0,0 +1,182 @@ | |
26696 | +/* | |
26697 | + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. | |
26698 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
26699 | + * | |
26700 | + * This program is free software; you can redistribute it and/or | |
26701 | + * modify it under the terms of version 2 of the GNU General Public | |
26702 | + * License as published by the Free Software Foundation. | |
26703 | + * | |
26704 | + * This program is distributed in the hope that it will be useful, | |
26705 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26706 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
26707 | + * General Public License for more details. | |
26708 | + * | |
26709 | + * You should have received a copy of the GNU General Public License | |
26710 | + * along with this program; if not, write to the Free Software | |
26711 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
26712 | + * 02111-1307 USA | |
26713 | + */ | |
26714 | + | |
26715 | +#ifndef __PERFMON_PRIV_H__ | |
26716 | +#define __PERFMON_PRIV_H__ | |
26717 | +/* | |
26718 | + * This file contains all the definitions of data structures, variables, macros | |
26719 | + * that are to private to the generic code, i.e., not shared with any code that | |
26720 | + * lives under arch/ or include/asm-XX | |
26721 | + * | |
26722 | + * For shared definitions, use include/linux/perfmon_kern.h | |
26723 | + */ | |
26724 | + | |
26725 | +#ifdef CONFIG_PERFMON | |
26726 | + | |
26727 | +/* | |
26728 | + * type of PMD reset for pfm_reset_pmds() or pfm_switch_sets*() | |
26729 | + */ | |
26730 | +#define PFM_PMD_RESET_SHORT 1 /* use short reset value */ | |
26731 | +#define PFM_PMD_RESET_LONG 2 /* use long reset value */ | |
26732 | + | |
26733 | +/* | |
26734 | + * context lazy save/restore activation count | |
26735 | + */ | |
26736 | +#define PFM_INVALID_ACTIVATION ((u64)~0) | |
26737 | + | |
26738 | +DECLARE_PER_CPU(u64, pmu_activation_number); | |
26739 | +DECLARE_PER_CPU(struct hrtimer, pfm_hrtimer); | |
26740 | + | |
26741 | +static inline void pfm_set_pmu_owner(struct task_struct *task, | |
26742 | + struct pfm_context *ctx) | |
26743 | +{ | |
26744 | + __get_cpu_var(pmu_owner) = task; | |
26745 | + __get_cpu_var(pmu_ctx) = ctx; | |
26746 | +} | |
26747 | + | |
26748 | +static inline int pfm_msgq_is_empty(struct pfm_context *ctx) | |
26749 | +{ | |
26750 | + return ctx->msgq_head == ctx->msgq_tail; | |
26751 | +} | |
26752 | + | |
26753 | +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m); | |
26754 | +int pfm_end_notify(struct pfm_context *ctx); | |
26755 | +int pfm_ovfl_notify(struct pfm_context *ctx, struct pfm_event_set *set, | |
26756 | + unsigned long ip); | |
26757 | + | |
26758 | +int pfm_alloc_fd(struct file **cfile); | |
26759 | + | |
26760 | +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count); | |
26761 | +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req, | |
26762 | + int count); | |
26763 | +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req, | |
26764 | + int count); | |
26765 | + | |
26766 | + | |
26767 | +int pfm_init_ctx(void); | |
26768 | + | |
26769 | +int pfm_pmu_acquire(struct pfm_context *ctx); | |
26770 | +void pfm_pmu_release(void); | |
26771 | + | |
26772 | +int pfm_session_acquire(int is_system, u32 cpu); | |
26773 | +void pfm_session_release(int is_system, u32 cpu); | |
26774 | + | |
26775 | +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size); | |
26776 | +int pfm_smpl_buf_load_context(struct pfm_context *ctx); | |
26777 | +void pfm_smpl_buf_unload_context(struct pfm_context *ctx); | |
26778 | + | |
26779 | +int pfm_init_sysfs(void); | |
26780 | + | |
26781 | +#ifdef CONFIG_PERFMON_DEBUG_FS | |
26782 | +int pfm_init_debugfs(void); | |
26783 | +int pfm_debugfs_add_cpu(int mycpu); | |
26784 | +void pfm_debugfs_del_cpu(int mycpu); | |
26785 | +#else | |
26786 | +static inline int pfm_init_debugfs(void) | |
26787 | +{ | |
26788 | + return 0; | |
26789 | +} | |
26790 | +static inline int pfm_debugfs_add_cpu(int mycpu) | |
26791 | +{ | |
26792 | + return 0; | |
26793 | +} | |
26794 | + | |
26795 | +static inline void pfm_debugfs_del_cpu(int mycpu) | |
26796 | +{} | |
26797 | +#endif | |
26798 | + | |
26799 | + | |
26800 | +void pfm_reset_pmds(struct pfm_context *ctx, struct pfm_event_set *set, | |
26801 | + int num_pmds, | |
26802 | + int reset_mode); | |
26803 | + | |
26804 | +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set); | |
26805 | +int pfm_init_sets(void); | |
26806 | + | |
26807 | +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what); | |
26808 | + | |
26809 | +void pfm_free_sets(struct pfm_context *ctx); | |
26810 | +int pfm_create_initial_set(struct pfm_context *ctx); | |
26811 | +void pfm_switch_sets_from_intr(struct pfm_context *ctx); | |
26812 | +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set); | |
26813 | +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t); | |
26814 | + | |
26815 | +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx, | |
26816 | + struct pfm_event_set *new_set, | |
26817 | + int reset_mode, | |
26818 | + int no_restart); | |
26819 | + | |
26820 | +/** | |
26821 | + * pfm_save_prev_ctx - check if previous context exists and save state | |
26822 | + * | |
26823 | + * called from pfm_load_ctx_thread() and __pfm_ctxsin_thread() to | |
26824 | + * check if previous context exists. If so saved its PMU state. This is used | |
26825 | + * only for UP kernels. | |
26826 | + * | |
26827 | + * PMU ownership is not cleared because the function is always called while | |
26828 | + * trying to install a new owner. | |
26829 | + */ | |
26830 | +static inline void pfm_check_save_prev_ctx(void) | |
26831 | +{ | |
26832 | +#ifdef CONFIG_SMP | |
26833 | + struct pfm_event_set *set; | |
26834 | + struct pfm_context *ctxp; | |
26835 | + | |
26836 | + ctxp = __get_cpu_var(pmu_ctx); | |
26837 | + if (!ctxp) | |
26838 | + return; | |
26839 | + /* | |
26840 | + * in UP per-thread, due to lazy save | |
26841 | + * there could be a context from another | |
26842 | + * task. We need to push it first before | |
26843 | + * installing our new state | |
26844 | + */ | |
26845 | + set = ctxp->active_set; | |
26846 | + pfm_save_pmds(ctxp, set); | |
26847 | + /* | |
26848 | + * do not clear ownership because we rewrite | |
26849 | + * right away | |
26850 | + */ | |
26851 | +#endif | |
26852 | +} | |
26853 | + | |
26854 | + | |
26855 | +int pfm_init_fs(void); | |
26856 | + | |
26857 | +int pfm_init_hotplug(void); | |
26858 | + | |
26859 | +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set); | |
26860 | +void pfm_resume_after_ovfl(struct pfm_context *ctx); | |
26861 | +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg, | |
26862 | + struct file *filp); | |
26863 | + | |
26864 | +static inline void pfm_post_work(struct task_struct *task, | |
26865 | + struct pfm_context *ctx, int type) | |
26866 | +{ | |
26867 | + ctx->flags.work_type = type; | |
26868 | + set_tsk_thread_flag(task, TIF_PERFMON_WORK); | |
26869 | + pfm_arch_arm_handle_work(task); | |
26870 | +} | |
26871 | + | |
26872 | +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG | |
26873 | +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG | |
26874 | + | |
26875 | +#endif /* CONFIG_PERFMON */ | |
26876 | + | |
26877 | +#endif /* __PERFMON_PRIV_H__ */ | |
26878 | --- /dev/null | |
26879 | +++ b/perfmon/perfmon_res.c | |
26880 | @@ -0,0 +1,450 @@ | |
26881 | +/* | |
26882 | + * perfmon_res.c: perfmon2 resource allocations | |
26883 | + * | |
26884 | + * This file implements the perfmon2 interface which | |
26885 | + * provides access to the hardware performance counters | |
26886 | + * of the host processor. | |
26887 | + * | |
26888 | + * The initial version of perfmon.c was written by | |
26889 | + * Ganesh Venkitachalam, IBM Corp. | |
26890 | + * | |
26891 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
26892 | + * David Mosberger, Hewlett Packard Co. | |
26893 | + * | |
26894 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
26895 | + * by Stephane Eranian, Hewlett Packard Co. | |
26896 | + * | |
26897 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
26898 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
26899 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
26900 | + * | |
26901 | + * More information about perfmon available at: | |
26902 | + * http://perfmon2.sf.net | |
26903 | + * | |
26904 | + * This program is free software; you can redistribute it and/or | |
26905 | + * modify it under the terms of version 2 of the GNU General Public | |
26906 | + * License as published by the Free Software Foundation. | |
26907 | + * | |
26908 | + * This program is distributed in the hope that it will be useful, | |
26909 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26910 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
26911 | + * General Public License for more details. | |
26912 | + * | |
26913 | + * You should have received a copy of the GNU General Public License | |
26914 | + * along with this program; if not, write to the Free Software | |
26915 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
26916 | + * 02111-1307 USA | |
26917 | + */ | |
26918 | +#include <linux/kernel.h> | |
26919 | +#include <linux/module.h> | |
26920 | +#include <linux/perfmon_kern.h> | |
26921 | +#include "perfmon_priv.h" | |
26922 | + | |
26923 | +/* | |
26924 | + * global information about all sessions | |
26925 | + * mostly used to synchronize between system wide and per-process | |
26926 | + */ | |
26927 | +struct pfm_resources { | |
26928 | + size_t smpl_buf_mem_cur;/* current smpl buf mem usage */ | |
26929 | + cpumask_t sys_cpumask; /* bitmask of used cpus */ | |
26930 | + u32 thread_sessions; /* #num loaded per-thread sessions */ | |
26931 | +}; | |
26932 | + | |
26933 | +static struct pfm_resources pfm_res; | |
26934 | + | |
26935 | +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_res_lock); | |
26936 | + | |
26937 | +/** | |
26938 | + * pfm_smpl_buf_space_acquire - check memory resource usage for sampling buffer | |
26939 | + * @ctx: context of interest | |
26940 | + * @size: size fo requested buffer | |
26941 | + * | |
26942 | + * sampling buffer allocated by perfmon must be | |
26943 | + * checked against max locked memory usage thresholds | |
26944 | + * for security reasons. | |
26945 | + * | |
26946 | + * The first level check is against the system wide limit | |
26947 | + * as indicated by the system administrator in /sys/kernel/perfmon | |
26948 | + * | |
26949 | + * The second level check is on a per-process basis using | |
26950 | + * RLIMIT_MEMLOCK limit. | |
26951 | + * | |
26952 | + * Operating on the current task only. | |
26953 | + */ | |
26954 | +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size) | |
26955 | +{ | |
26956 | + struct mm_struct *mm; | |
26957 | + unsigned long locked; | |
26958 | + unsigned long buf_mem, buf_mem_max; | |
26959 | + unsigned long flags; | |
26960 | + | |
26961 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
26962 | + | |
26963 | + /* | |
26964 | + * check against global buffer limit | |
26965 | + */ | |
26966 | + buf_mem_max = pfm_controls.smpl_buffer_mem_max; | |
26967 | + buf_mem = pfm_res.smpl_buf_mem_cur + size; | |
26968 | + | |
26969 | + if (buf_mem <= buf_mem_max) { | |
26970 | + pfm_res.smpl_buf_mem_cur = buf_mem; | |
26971 | + | |
26972 | + PFM_DBG("buf_mem_max=%lu current_buf_mem=%lu", | |
26973 | + buf_mem_max, | |
26974 | + buf_mem); | |
26975 | + } | |
26976 | + | |
26977 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
26978 | + | |
26979 | + if (buf_mem > buf_mem_max) { | |
26980 | + PFM_DBG("smpl buffer memory threshold reached"); | |
26981 | + return -ENOMEM; | |
26982 | + } | |
26983 | + | |
26984 | + /* | |
26985 | + * check against per-process RLIMIT_MEMLOCK | |
26986 | + */ | |
26987 | + mm = get_task_mm(current); | |
26988 | + | |
26989 | + down_write(&mm->mmap_sem); | |
26990 | + | |
26991 | + locked = mm->locked_vm << PAGE_SHIFT; | |
26992 | + locked += size; | |
26993 | + | |
26994 | + if (locked > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) { | |
26995 | + | |
26996 | + PFM_DBG("RLIMIT_MEMLOCK reached ask_locked=%lu rlim_cur=%lu", | |
26997 | + locked, | |
26998 | + current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur); | |
26999 | + | |
27000 | + up_write(&mm->mmap_sem); | |
27001 | + mmput(mm); | |
27002 | + goto unres; | |
27003 | + } | |
27004 | + | |
27005 | + mm->locked_vm = locked >> PAGE_SHIFT; | |
27006 | + | |
27007 | + up_write(&mm->mmap_sem); | |
27008 | + | |
27009 | + mmput(mm); | |
27010 | + | |
27011 | + return 0; | |
27012 | + | |
27013 | +unres: | |
27014 | + /* | |
27015 | + * remove global buffer memory allocation | |
27016 | + */ | |
27017 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27018 | + | |
27019 | + pfm_res.smpl_buf_mem_cur -= size; | |
27020 | + | |
27021 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27022 | + | |
27023 | + return -ENOMEM; | |
27024 | +} | |
27025 | +/** | |
27026 | + * pfm_smpl_buf_space_release - release resource usage for sampling buffer | |
27027 | + * @ctx: perfmon context of interest | |
27028 | + * | |
27029 | + * There exist multiple paths leading to this function. We need to | |
27030 | + * be very careful withlokcing on the mmap_sem as it may already be | |
27031 | + * held by the time we come here. | |
27032 | + * The following paths exist: | |
27033 | + * | |
27034 | + * exit path: | |
27035 | + * sys_exit_group | |
27036 | + * do_group_exit | |
27037 | + * do_exit | |
27038 | + * exit_mm | |
27039 | + * mmput | |
27040 | + * exit_mmap | |
27041 | + * remove_vma | |
27042 | + * fput | |
27043 | + * __fput | |
27044 | + * pfm_close | |
27045 | + * __pfm_close | |
27046 | + * pfm_context_free | |
27047 | + * pfm_release_buf_space | |
27048 | + * munmap path: | |
27049 | + * sys_munmap | |
27050 | + * do_munmap | |
27051 | + * remove_vma | |
27052 | + * fput | |
27053 | + * __fput | |
27054 | + * pfm_close | |
27055 | + * __pfm_close | |
27056 | + * pfm_context_free | |
27057 | + * pfm_release_buf_space | |
27058 | + * | |
27059 | + * close path: | |
27060 | + * sys_close | |
27061 | + * filp_close | |
27062 | + * fput | |
27063 | + * __fput | |
27064 | + * pfm_close | |
27065 | + * __pfm_close | |
27066 | + * pfm_context_free | |
27067 | + * pfm_release_buf_space | |
27068 | + * | |
27069 | + * The issue is that on the munmap() path, the mmap_sem is already held | |
27070 | + * in write-mode by the time we come here. To avoid the deadlock, we need | |
27071 | + * to know where we are coming from and skip down_write(). If is fairly | |
27072 | + * difficult to know this because of the lack of good hooks and | |
27073 | + * the fact that, there may not have been any mmap() of the sampling buffer | |
27074 | + * (i.e. create_context() followed by close() or exit()). | |
27075 | + * | |
27076 | + * We use a set flag ctx->flags.mmap_nlock which is toggled in the vm_ops | |
27077 | + * callback in remove_vma() which is called systematically for the call, so | |
27078 | + * on all but the pure close() path. The exit path does not already hold | |
27079 | + * the lock but this is exit so there is no task->mm by the time we come here. | |
27080 | + * | |
27081 | + * The mmap_nlock is set only when unmapping and this is the LAST reference | |
27082 | + * to the file (i.e., close() followed by munmap()). | |
27083 | + */ | |
27084 | +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size) | |
27085 | +{ | |
27086 | + unsigned long flags; | |
27087 | + struct mm_struct *mm; | |
27088 | + | |
27089 | + mm = get_task_mm(current); | |
27090 | + if (mm) { | |
27091 | + if (ctx->flags.mmap_nlock == 0) { | |
27092 | + PFM_DBG("doing down_write"); | |
27093 | + down_write(&mm->mmap_sem); | |
27094 | + } | |
27095 | + | |
27096 | + mm->locked_vm -= size >> PAGE_SHIFT; | |
27097 | + | |
27098 | + PFM_DBG("size=%zu locked_vm=%lu", size, mm->locked_vm); | |
27099 | + | |
27100 | + if (ctx->flags.mmap_nlock == 0) | |
27101 | + up_write(&mm->mmap_sem); | |
27102 | + | |
27103 | + mmput(mm); | |
27104 | + } | |
27105 | + | |
27106 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27107 | + | |
27108 | + pfm_res.smpl_buf_mem_cur -= size; | |
27109 | + | |
27110 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27111 | +} | |
27112 | + | |
27113 | +/** | |
27114 | + * pfm_session_acquire - reserve a per-thread or per-cpu session | |
27115 | + * @is_system: true if per-cpu session | |
27116 | + * @cpu: cpu number for per-cpu session | |
27117 | + * | |
27118 | + * return: | |
27119 | + * 0 : success | |
27120 | + * -EBUSY: if conflicting session exist | |
27121 | + */ | |
27122 | +int pfm_session_acquire(int is_system, u32 cpu) | |
27123 | +{ | |
27124 | + unsigned long flags; | |
27125 | + u32 nsys_cpus; | |
27126 | + int ret = 0; | |
27127 | + | |
27128 | + /* | |
27129 | + * validy checks on cpu_mask have been done upstream | |
27130 | + */ | |
27131 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27132 | + | |
27133 | + nsys_cpus = cpus_weight(pfm_res.sys_cpumask); | |
27134 | + | |
27135 | + PFM_DBG("in sys=%u task=%u is_sys=%d cpu=%u", | |
27136 | + nsys_cpus, | |
27137 | + pfm_res.thread_sessions, | |
27138 | + is_system, | |
27139 | + cpu); | |
27140 | + | |
27141 | + if (is_system) { | |
27142 | + /* | |
27143 | + * cannot mix system wide and per-task sessions | |
27144 | + */ | |
27145 | + if (pfm_res.thread_sessions > 0) { | |
27146 | + PFM_DBG("%u conflicting thread_sessions", | |
27147 | + pfm_res.thread_sessions); | |
27148 | + ret = -EBUSY; | |
27149 | + goto abort; | |
27150 | + } | |
27151 | + | |
27152 | + if (cpu_isset(cpu, pfm_res.sys_cpumask)) { | |
27153 | + PFM_DBG("conflicting session on CPU%u", cpu); | |
27154 | + ret = -EBUSY; | |
27155 | + goto abort; | |
27156 | + } | |
27157 | + | |
27158 | + PFM_DBG("reserved session on CPU%u", cpu); | |
27159 | + | |
27160 | + cpu_set(cpu, pfm_res.sys_cpumask); | |
27161 | + nsys_cpus++; | |
27162 | + } else { | |
27163 | + if (nsys_cpus) { | |
27164 | + ret = -EBUSY; | |
27165 | + goto abort; | |
27166 | + } | |
27167 | + pfm_res.thread_sessions++; | |
27168 | + } | |
27169 | + | |
27170 | + PFM_DBG("out sys=%u task=%u is_sys=%d cpu=%u", | |
27171 | + nsys_cpus, | |
27172 | + pfm_res.thread_sessions, | |
27173 | + is_system, | |
27174 | + cpu); | |
27175 | + | |
27176 | +abort: | |
27177 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27178 | + | |
27179 | + return ret; | |
27180 | +} | |
27181 | + | |
27182 | +/** | |
27183 | + * pfm_session_release - release a per-cpu or per-thread session | |
27184 | + * @is_system: true if per-cpu session | |
27185 | + * @cpu: cpu number for per-cpu session | |
27186 | + * | |
27187 | + * called from __pfm_unload_context() | |
27188 | + */ | |
27189 | +void pfm_session_release(int is_system, u32 cpu) | |
27190 | +{ | |
27191 | + unsigned long flags; | |
27192 | + | |
27193 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27194 | + | |
27195 | + PFM_DBG("in sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u", | |
27196 | + cpus_weight(pfm_res.sys_cpumask), | |
27197 | + pfm_res.thread_sessions, | |
27198 | + is_system, cpu); | |
27199 | + | |
27200 | + if (is_system) | |
27201 | + cpu_clear(cpu, pfm_res.sys_cpumask); | |
27202 | + else | |
27203 | + pfm_res.thread_sessions--; | |
27204 | + | |
27205 | + PFM_DBG("out sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u", | |
27206 | + cpus_weight(pfm_res.sys_cpumask), | |
27207 | + pfm_res.thread_sessions, | |
27208 | + is_system, cpu); | |
27209 | + | |
27210 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27211 | +} | |
27212 | + | |
27213 | +/** | |
27214 | + * pfm_session_allcpus_acquire - acquire per-cpu sessions on all available cpus | |
27215 | + * | |
27216 | + * currently used by Oprofile on X86 | |
27217 | + */ | |
27218 | +int pfm_session_allcpus_acquire(void) | |
27219 | +{ | |
27220 | + unsigned long flags; | |
27221 | + u32 nsys_cpus, cpu; | |
27222 | + int ret = -EBUSY; | |
27223 | + | |
27224 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27225 | + | |
27226 | + nsys_cpus = cpus_weight(pfm_res.sys_cpumask); | |
27227 | + | |
27228 | + PFM_DBG("in sys=%u task=%u", | |
27229 | + nsys_cpus, | |
27230 | + pfm_res.thread_sessions); | |
27231 | + | |
27232 | + if (nsys_cpus) { | |
27233 | + PFM_DBG("already some system-wide sessions"); | |
27234 | + goto abort; | |
27235 | + } | |
27236 | + | |
27237 | + /* | |
27238 | + * cannot mix system wide and per-task sessions | |
27239 | + */ | |
27240 | + if (pfm_res.thread_sessions) { | |
27241 | + PFM_DBG("%u conflicting thread_sessions", | |
27242 | + pfm_res.thread_sessions); | |
27243 | + goto abort; | |
27244 | + } | |
27245 | + | |
27246 | + for_each_online_cpu(cpu) { | |
27247 | + cpu_set(cpu, pfm_res.sys_cpumask); | |
27248 | + nsys_cpus++; | |
27249 | + } | |
27250 | + | |
27251 | + PFM_DBG("out sys=%u task=%u", | |
27252 | + nsys_cpus, | |
27253 | + pfm_res.thread_sessions); | |
27254 | + | |
27255 | + ret = 0; | |
27256 | +abort: | |
27257 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27258 | + | |
27259 | + return ret; | |
27260 | +} | |
27261 | +EXPORT_SYMBOL(pfm_session_allcpus_acquire); | |
27262 | + | |
27263 | +/** | |
27264 | + * pfm_session_allcpus_release - relase per-cpu sessions on all cpus | |
27265 | + * | |
27266 | + * currently used by Oprofile code | |
27267 | + */ | |
27268 | +void pfm_session_allcpus_release(void) | |
27269 | +{ | |
27270 | + unsigned long flags; | |
27271 | + u32 nsys_cpus, cpu; | |
27272 | + | |
27273 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27274 | + | |
27275 | + nsys_cpus = cpus_weight(pfm_res.sys_cpumask); | |
27276 | + | |
27277 | + PFM_DBG("in sys=%u task=%u", | |
27278 | + nsys_cpus, | |
27279 | + pfm_res.thread_sessions); | |
27280 | + | |
27281 | + /* | |
27282 | + * XXX: could use __cpus_clear() with nbits | |
27283 | + */ | |
27284 | + for_each_online_cpu(cpu) { | |
27285 | + cpu_clear(cpu, pfm_res.sys_cpumask); | |
27286 | + nsys_cpus--; | |
27287 | + } | |
27288 | + | |
27289 | + PFM_DBG("out sys=%u task=%u", | |
27290 | + nsys_cpus, | |
27291 | + pfm_res.thread_sessions); | |
27292 | + | |
27293 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27294 | +} | |
27295 | +EXPORT_SYMBOL(pfm_session_allcpus_release); | |
27296 | + | |
27297 | +/** | |
27298 | + * pfm_sysfs_res_show - return currnt resourcde usage for sysfs | |
27299 | + * @buf: buffer to hold string in return | |
27300 | + * @sz: size of buf | |
27301 | + * @what: what to produce | |
27302 | + * what=0 : thread_sessions | |
27303 | + * what=1 : cpus_weight(sys_cpumask) | |
27304 | + * what=2 : smpl_buf_mem_cur | |
27305 | + * what=3 : pmu model name | |
27306 | + * | |
27307 | + * called from perfmon_sysfs.c | |
27308 | + * return number of bytes written into buf (up to sz) | |
27309 | + */ | |
27310 | +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what) | |
27311 | +{ | |
27312 | + unsigned long flags; | |
27313 | + | |
27314 | + spin_lock_irqsave(&pfm_res_lock, flags); | |
27315 | + | |
27316 | + switch (what) { | |
27317 | + case 0: snprintf(buf, sz, "%u\n", pfm_res.thread_sessions); | |
27318 | + break; | |
27319 | + case 1: snprintf(buf, sz, "%d\n", cpus_weight(pfm_res.sys_cpumask)); | |
27320 | + break; | |
27321 | + case 2: snprintf(buf, sz, "%zu\n", pfm_res.smpl_buf_mem_cur); | |
27322 | + break; | |
27323 | + case 3: | |
27324 | + snprintf(buf, sz, "%s\n", | |
27325 | + pfm_pmu_conf ? pfm_pmu_conf->pmu_name | |
27326 | + : "unknown\n"); | |
27327 | + } | |
27328 | + spin_unlock_irqrestore(&pfm_res_lock, flags); | |
27329 | + return strlen(buf); | |
27330 | +} | |
27331 | --- /dev/null | |
27332 | +++ b/perfmon/perfmon_rw.c | |
27333 | @@ -0,0 +1,733 @@ | |
27334 | +/* | |
27335 | + * perfmon.c: perfmon2 PMC/PMD read/write system calls | |
27336 | + * | |
27337 | + * This file implements the perfmon2 interface which | |
27338 | + * provides access to the hardware performance counters | |
27339 | + * of the host processor. | |
27340 | + * | |
27341 | + * The initial version of perfmon.c was written by | |
27342 | + * Ganesh Venkitachalam, IBM Corp. | |
27343 | + * | |
27344 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
27345 | + * David Mosberger, Hewlett Packard Co. | |
27346 | + * | |
27347 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
27348 | + * by Stephane Eranian, Hewlett Packard Co. | |
27349 | + * | |
27350 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
27351 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
27352 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
27353 | + * | |
27354 | + * More information about perfmon available at: | |
27355 | + * http://perfmon2.sf.net/ | |
27356 | + * | |
27357 | + * This program is free software; you can redistribute it and/or | |
27358 | + * modify it under the terms of version 2 of the GNU General Public | |
27359 | + * License as published by the Free Software Foundation. | |
27360 | + * | |
27361 | + * This program is distributed in the hope that it will be useful, | |
27362 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27363 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
27364 | + * General Public License for more details. | |
27365 | + * | |
27366 | + * You should have received a copy of the GNU General Public License | |
27367 | + * along with this program; if not, write to the Free Software | |
27368 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
27369 | + * 02111-1307 USA | |
27370 | + */ | |
27371 | +#include <linux/module.h> | |
27372 | +#include <linux/kernel.h> | |
27373 | +#include <linux/perfmon_kern.h> | |
27374 | +#include "perfmon_priv.h" | |
27375 | + | |
27376 | +#define PFM_REGFL_PMC_ALL (PFM_REGFL_NO_EMUL64) | |
27377 | +#define PFM_REGFL_PMD_ALL (PFM_REGFL_RANDOM|PFM_REGFL_OVFL_NOTIFY) | |
27378 | + | |
27379 | +/** | |
27380 | + * update_used_reg -- updated used_pmcs for a single PMD | |
27381 | + * @set: set to update | |
27382 | + * @cnum: new PMD to add | |
27383 | + * | |
27384 | + * This function adds the pmds and pmcs depending on PMD cnum | |
27385 | + */ | |
27386 | +static inline void update_used_reg(struct pfm_context *ctx, | |
27387 | + struct pfm_event_set *set, u16 cnum) | |
27388 | +{ | |
27389 | + bitmap_or(cast_ulp(set->used_pmcs), | |
27390 | + cast_ulp(set->used_pmcs), | |
27391 | + cast_ulp(pfm_pmu_conf->pmd_desc[cnum].dep_pmcs), | |
27392 | + ctx->regs.max_pmc); | |
27393 | +} | |
27394 | + | |
27395 | +/** | |
27396 | + * update_used -- update used_pmcs bitmask | |
27397 | + * @set: event set to update | |
27398 | + * @bv: bitmask to inspect for new PMD registers | |
27399 | + * | |
27400 | + * This function updates the used_pmcs bitmask for | |
27401 | + * the set using bv, a bitmask of pmds. For each pmd in bv, | |
27402 | + * its depending pmcs are added to used_pmcs. | |
27403 | + */ | |
27404 | +static void update_used_pmcs(struct pfm_context *ctx, | |
27405 | + struct pfm_event_set *set, unsigned long *bv) | |
27406 | +{ | |
27407 | + u16 max_pmd; | |
27408 | + int n, p, q; | |
27409 | + | |
27410 | + max_pmd = ctx->regs.max_pmd; | |
27411 | + | |
27412 | + n = bitmap_weight(bv, max_pmd); | |
27413 | + for(p = 0; n; n--, p = q+1) { | |
27414 | + q = find_next_bit(bv, max_pmd, p); | |
27415 | + update_used_reg(ctx, set, q); | |
27416 | + } | |
27417 | +} | |
27418 | + | |
27419 | +/** | |
27420 | + * update_changes -- update nused_pmcs, nused_pmds, write newly touched pmcs | |
27421 | + * @ctx: context to use | |
27422 | + * @set: event set to use | |
27423 | + * @old_used_pmcs: former used_pmc bitmask | |
27424 | + * @can_access: non-zero if PMU is accessible, i.e., can be written to | |
27425 | + * | |
27426 | + * This function updates nused_pmcs and nused_pmds after the last modificiation | |
27427 | + * to an event set. When new pmcs are used, then they must be initialized such | |
27428 | + * that we do not pick up stale values from another session. | |
27429 | + */ | |
27430 | +static inline int update_changes(struct pfm_context *ctx, struct pfm_event_set *set, | |
27431 | + unsigned long *old_used_pmcs) | |
27432 | +{ | |
27433 | + struct pfarg_pmc req; | |
27434 | + u16 max_pmc, max_pmd; | |
27435 | + int n, p, q, ret = 0; | |
27436 | + | |
27437 | + max_pmd = ctx->regs.max_pmd; | |
27438 | + max_pmc = ctx->regs.max_pmc; | |
27439 | + | |
27440 | + /* | |
27441 | + * update used counts | |
27442 | + */ | |
27443 | + set->nused_pmds = bitmap_weight(cast_ulp(set->used_pmds), max_pmd); | |
27444 | + set->nused_pmcs = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc); | |
27445 | + | |
27446 | + PFM_DBG("set%u u_pmds=0x%llx nu_pmds=%u u_pmcs=0x%llx nu_pmcs=%u", | |
27447 | + set->id, | |
27448 | + (unsigned long long)set->used_pmds[0], | |
27449 | + set->nused_pmds, | |
27450 | + (unsigned long long)set->used_pmcs[0], | |
27451 | + set->nused_pmcs); | |
27452 | + | |
27453 | + memset(&req, 0, sizeof(req)); | |
27454 | + | |
27455 | + n = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc); | |
27456 | + for(p = 0; n; n--, p = q+1) { | |
27457 | + q = find_next_bit(cast_ulp(set->used_pmcs), max_pmc, p); | |
27458 | + | |
27459 | + if (test_bit(q, cast_ulp(old_used_pmcs))) | |
27460 | + continue; | |
27461 | + | |
27462 | + req.reg_num = q; | |
27463 | + req.reg_value = set->pmcs[q]; | |
27464 | + | |
27465 | + ret = __pfm_write_pmcs(ctx, &req, 1); | |
27466 | + if (ret) | |
27467 | + break; | |
27468 | + } | |
27469 | + return ret; | |
27470 | +} | |
27471 | + | |
27472 | +/** | |
27473 | + * handle_smpl_bv - checks sampling bitmasks for new PMDs | |
27474 | + * @ctx: context to use | |
27475 | + * @set: set to use | |
27476 | + * @bv: sampling bitmask | |
27477 | + * | |
27478 | + * scans the smpl bitmask looking for new PMDs (not yet used), if found | |
27479 | + * invoke pfm_write_pmds() on them to get them initialized and marked used | |
27480 | + */ | |
27481 | +static int handle_smpl_bv(struct pfm_context *ctx, struct pfm_event_set *set, | |
27482 | + unsigned long *bv) | |
27483 | +{ | |
27484 | + struct pfarg_pmd req; | |
27485 | + int p, q, n, ret = 0; | |
27486 | + u16 max_pmd; | |
27487 | + | |
27488 | + memset(&req, 0, sizeof(req)); | |
27489 | + | |
27490 | + max_pmd = ctx->regs.max_pmd; | |
27491 | + | |
27492 | + n = bitmap_weight(cast_ulp(bv), max_pmd); | |
27493 | + | |
27494 | + for(p = 0; n; n--, p = q+1) { | |
27495 | + q = find_next_bit(cast_ulp(bv), max_pmd, p); | |
27496 | + | |
27497 | + if (test_bit(q, cast_ulp(set->used_pmds))) | |
27498 | + continue; | |
27499 | + | |
27500 | + req.reg_num = q; | |
27501 | + req.reg_value = 0; | |
27502 | + | |
27503 | + ret = __pfm_write_pmds(ctx, &req, 1, 0); | |
27504 | + if (ret) | |
27505 | + break; | |
27506 | + } | |
27507 | + return ret; | |
27508 | +} | |
27509 | + | |
27510 | +/** | |
27511 | + * is_invalid -- check if register index is within limits | |
27512 | + * @cnum: register index | |
27513 | + * @impl: bitmask of implemented registers | |
27514 | + * @max: highest implemented registers + 1 | |
27515 | + * | |
27516 | + * return: | |
27517 | + * 0 is register index is valid | |
27518 | + * 1 if invalid | |
27519 | + */ | |
27520 | +static inline int is_invalid(u16 cnum, unsigned long *impl, u16 max) | |
27521 | +{ | |
27522 | + return cnum >= max || !test_bit(cnum, impl); | |
27523 | +} | |
27524 | + | |
27525 | +/** | |
27526 | + * __pfm_write_pmds - modified data registers | |
27527 | + * @ctx: context to operate on | |
27528 | + * @req: pfarg_pmd_t request from user | |
27529 | + * @count: number of element in the pfarg_pmd_t vector | |
27530 | + * @compat: used only on IA-64 to maintain backward compatibility with v2.0 | |
27531 | + * | |
27532 | + * The function succeeds whether the context is attached or not. | |
27533 | + * When attached to another thread, that thread must be stopped. | |
27534 | + * | |
27535 | + * The context is locked and interrupts are disabled. | |
27536 | + */ | |
27537 | +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count, | |
27538 | + int compat) | |
27539 | +{ | |
27540 | + struct pfm_event_set *set, *active_set; | |
27541 | + u64 old_used_pmcs[PFM_PMC_BV]; | |
27542 | + unsigned long *smpl_pmds, *reset_pmds, *impl_pmds, *impl_rw_pmds; | |
27543 | + u32 req_flags, flags; | |
27544 | + u16 cnum, pmd_type, max_pmd; | |
27545 | + u16 set_id; | |
27546 | + int i, can_access_pmu; | |
27547 | + int ret; | |
27548 | + pfm_pmd_check_t wr_func; | |
27549 | + | |
27550 | + active_set = ctx->active_set; | |
27551 | + max_pmd = ctx->regs.max_pmd; | |
27552 | + impl_pmds = cast_ulp(ctx->regs.pmds); | |
27553 | + impl_rw_pmds = cast_ulp(ctx->regs.rw_pmds); | |
27554 | + wr_func = pfm_pmu_conf->pmd_write_check; | |
27555 | + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
27556 | + | |
27557 | + can_access_pmu = 0; | |
27558 | + | |
27559 | + /* | |
27560 | + * we cannot access the actual PMD registers when monitoring is masked | |
27561 | + */ | |
27562 | + if (unlikely(ctx->state == PFM_CTX_LOADED)) | |
27563 | + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task | |
27564 | + || ctx->flags.system; | |
27565 | + | |
27566 | + bitmap_copy(cast_ulp(old_used_pmcs), | |
27567 | + cast_ulp(set->used_pmcs), | |
27568 | + ctx->regs.max_pmc); | |
27569 | + | |
27570 | + ret = -EINVAL; | |
27571 | + for (i = 0; i < count; i++, req++) { | |
27572 | + | |
27573 | + cnum = req->reg_num; | |
27574 | + set_id = req->reg_set; | |
27575 | + req_flags = req->reg_flags; | |
27576 | + smpl_pmds = cast_ulp(req->reg_smpl_pmds); | |
27577 | + reset_pmds = cast_ulp(req->reg_reset_pmds); | |
27578 | + flags = 0; | |
27579 | + | |
27580 | + /* | |
27581 | + * cannot write to unexisting | |
27582 | + * writes to read-only register are ignored | |
27583 | + */ | |
27584 | + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) { | |
27585 | + PFM_DBG("pmd%u is not available", cnum); | |
27586 | + goto error; | |
27587 | + } | |
27588 | + | |
27589 | + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type; | |
27590 | + | |
27591 | + /* | |
27592 | + * ensure only valid flags are set | |
27593 | + */ | |
27594 | + if (req_flags & ~(PFM_REGFL_PMD_ALL)) { | |
27595 | + PFM_DBG("pmd%u: invalid flags=0x%x", | |
27596 | + cnum, req_flags); | |
27597 | + goto error; | |
27598 | + } | |
27599 | + | |
27600 | + /* | |
27601 | + * OVFL_NOTIFY is valid for all types of PMD. | |
27602 | + * non counting PMD may trigger PMU interrupt | |
27603 | + * and thus may trigger recording of a sample. | |
27604 | + * This is true with IBS on AMD family 16. | |
27605 | + */ | |
27606 | + if (req_flags & PFM_REGFL_OVFL_NOTIFY) | |
27607 | + flags |= PFM_REGFL_OVFL_NOTIFY; | |
27608 | + | |
27609 | + /* | |
27610 | + * We allow randomization to non counting PMD | |
27611 | + */ | |
27612 | + if (req_flags & PFM_REGFL_RANDOM) | |
27613 | + flags |= PFM_REGFL_RANDOM; | |
27614 | + | |
27615 | + /* | |
27616 | + * verify validity of smpl_pmds | |
27617 | + */ | |
27618 | + if (unlikely(!bitmap_subset(smpl_pmds, impl_pmds, PFM_MAX_PMDS))) { | |
27619 | + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u", | |
27620 | + (unsigned long long)req->reg_smpl_pmds[0], | |
27621 | + cnum); | |
27622 | + goto error; | |
27623 | + } | |
27624 | + | |
27625 | + /* | |
27626 | + * verify validity of reset_pmds | |
27627 | + * check against impl_rw_pmds because it is not | |
27628 | + * possible to reset read-only PMDs | |
27629 | + */ | |
27630 | + if (unlikely(!bitmap_subset(reset_pmds, impl_rw_pmds, PFM_MAX_PMDS))) { | |
27631 | + PFM_DBG("invalid reset_pmds=0x%llx for pmd%u", | |
27632 | + (unsigned long long)req->reg_reset_pmds[0], | |
27633 | + cnum); | |
27634 | + goto error; | |
27635 | + } | |
27636 | + | |
27637 | + /* | |
27638 | + * locate event set | |
27639 | + */ | |
27640 | + if (set_id != set->id) { | |
27641 | + /* update number of used register for previous set */ | |
27642 | + if (i) { | |
27643 | + ret = update_changes(ctx, set, cast_ulp(old_used_pmcs)); | |
27644 | + if (ret) | |
27645 | + goto error; | |
27646 | + } | |
27647 | + | |
27648 | + set = pfm_find_set(ctx, set_id, 0); | |
27649 | + if (set == NULL) { | |
27650 | + PFM_DBG("event set%u does not exist", | |
27651 | + set_id); | |
27652 | + goto error; | |
27653 | + } | |
27654 | + bitmap_copy(cast_ulp(old_used_pmcs), | |
27655 | + cast_ulp(set->used_pmcs), | |
27656 | + ctx->regs.max_pmc); | |
27657 | + } | |
27658 | + | |
27659 | + /* | |
27660 | + * execute write checker, if any | |
27661 | + */ | |
27662 | + if (unlikely(wr_func && (pmd_type & PFM_REG_WC))) { | |
27663 | + ret = (*wr_func)(ctx, set, req); | |
27664 | + if (ret) | |
27665 | + goto error; | |
27666 | + | |
27667 | + } | |
27668 | + | |
27669 | + | |
27670 | + /* | |
27671 | + * now commit changes to software state | |
27672 | + */ | |
27673 | + | |
27674 | + if (unlikely(compat)) | |
27675 | + goto skip_set; | |
27676 | + | |
27677 | + if (bitmap_weight(smpl_pmds, max_pmd)) { | |
27678 | + ret = handle_smpl_bv(ctx, set, smpl_pmds); | |
27679 | + if (ret) | |
27680 | + goto error; | |
27681 | + update_used_pmcs(ctx, set, cast_ulp(smpl_pmds)); | |
27682 | + } | |
27683 | + | |
27684 | + bitmap_copy(cast_ulp(set->pmds[cnum].smpl_pmds), | |
27685 | + smpl_pmds, | |
27686 | + max_pmd); | |
27687 | + | |
27688 | + | |
27689 | + if (bitmap_weight(reset_pmds, max_pmd)) { | |
27690 | + ret = handle_smpl_bv(ctx, set, reset_pmds); | |
27691 | + if (ret) | |
27692 | + goto error; | |
27693 | + update_used_pmcs(ctx, set, cast_ulp(reset_pmds)); | |
27694 | + } | |
27695 | + | |
27696 | + bitmap_copy(cast_ulp(set->pmds[cnum].reset_pmds), | |
27697 | + reset_pmds, | |
27698 | + max_pmd); | |
27699 | + | |
27700 | + set->pmds[cnum].flags = flags; | |
27701 | + | |
27702 | + __set_bit(cnum, cast_ulp(set->used_pmds)); | |
27703 | + update_used_reg(ctx, set, cnum); | |
27704 | + | |
27705 | + /* | |
27706 | + * we reprogram the PMD hence, we clear any pending | |
27707 | + * ovfl. Does affect ovfl switch on restart but new | |
27708 | + * value has already been established here | |
27709 | + */ | |
27710 | + if (test_bit(cnum, cast_ulp(set->povfl_pmds))) { | |
27711 | + set->npend_ovfls--; | |
27712 | + __clear_bit(cnum, cast_ulp(set->povfl_pmds)); | |
27713 | + } | |
27714 | + __clear_bit(cnum, cast_ulp(set->ovfl_pmds)); | |
27715 | + | |
27716 | + /* | |
27717 | + * update ovfl_notify | |
27718 | + */ | |
27719 | + if (flags & PFM_REGFL_OVFL_NOTIFY) | |
27720 | + __set_bit(cnum, cast_ulp(set->ovfl_notify)); | |
27721 | + else | |
27722 | + __clear_bit(cnum, cast_ulp(set->ovfl_notify)); | |
27723 | + | |
27724 | + /* | |
27725 | + * establish new switch count | |
27726 | + */ | |
27727 | + set->pmds[cnum].ovflsw_thres = req->reg_ovfl_switch_cnt; | |
27728 | + set->pmds[cnum].ovflsw_ref_thres = req->reg_ovfl_switch_cnt; | |
27729 | +skip_set: | |
27730 | + | |
27731 | + /* | |
27732 | + * set last value to new value for all types of PMD | |
27733 | + */ | |
27734 | + set->pmds[cnum].lval = req->reg_value; | |
27735 | + set->pmds[cnum].value = req->reg_value; | |
27736 | + | |
27737 | + /* | |
27738 | + * update reset values (not just for counters) | |
27739 | + */ | |
27740 | + set->pmds[cnum].long_reset = req->reg_long_reset; | |
27741 | + set->pmds[cnum].short_reset = req->reg_short_reset; | |
27742 | + | |
27743 | + /* | |
27744 | + * update randomization mask | |
27745 | + */ | |
27746 | + set->pmds[cnum].mask = req->reg_random_mask; | |
27747 | + | |
27748 | + set->pmds[cnum].eventid = req->reg_smpl_eventid; | |
27749 | + | |
27750 | + if (set == active_set) { | |
27751 | + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMDS; | |
27752 | + if (can_access_pmu) | |
27753 | + pfm_write_pmd(ctx, cnum, req->reg_value); | |
27754 | + } | |
27755 | + | |
27756 | + | |
27757 | + PFM_DBG("set%u pmd%u=0x%llx flags=0x%x a_pmu=%d " | |
27758 | + "ctx_pmd=0x%llx s_reset=0x%llx " | |
27759 | + "l_reset=0x%llx s_pmds=0x%llx " | |
27760 | + "r_pmds=0x%llx o_pmds=0x%llx " | |
27761 | + "o_thres=%llu compat=%d eventid=%llx", | |
27762 | + set->id, | |
27763 | + cnum, | |
27764 | + (unsigned long long)req->reg_value, | |
27765 | + set->pmds[cnum].flags, | |
27766 | + can_access_pmu, | |
27767 | + (unsigned long long)set->pmds[cnum].value, | |
27768 | + (unsigned long long)set->pmds[cnum].short_reset, | |
27769 | + (unsigned long long)set->pmds[cnum].long_reset, | |
27770 | + (unsigned long long)set->pmds[cnum].smpl_pmds[0], | |
27771 | + (unsigned long long)set->pmds[cnum].reset_pmds[0], | |
27772 | + (unsigned long long)set->ovfl_pmds[0], | |
27773 | + (unsigned long long)set->pmds[cnum].ovflsw_thres, | |
27774 | + compat, | |
27775 | + (unsigned long long)set->pmds[cnum].eventid); | |
27776 | + } | |
27777 | + ret = 0; | |
27778 | + | |
27779 | +error: | |
27780 | + update_changes(ctx, set, cast_ulp(old_used_pmcs)); | |
27781 | + | |
27782 | + /* | |
27783 | + * make changes visible | |
27784 | + */ | |
27785 | + if (can_access_pmu) | |
27786 | + pfm_arch_serialize(); | |
27787 | + | |
27788 | + return ret; | |
27789 | +} | |
27790 | + | |
27791 | +/** | |
27792 | + * __pfm_write_pmcs - modified config registers | |
27793 | + * @ctx: context to operate on | |
27794 | + * @req: pfarg_pmc_t request from user | |
27795 | + * @count: number of element in the pfarg_pmc_t vector | |
27796 | + * | |
27797 | + * | |
27798 | + * The function succeeds whether the context is * attached or not. | |
27799 | + * When attached to another thread, that thread must be stopped. | |
27800 | + * | |
27801 | + * The context is locked and interrupts are disabled. | |
27802 | + */ | |
27803 | +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, int count) | |
27804 | +{ | |
27805 | + struct pfm_event_set *set, *active_set; | |
27806 | + u64 value, dfl_val, rsvd_msk; | |
27807 | + unsigned long *impl_pmcs; | |
27808 | + int i, can_access_pmu; | |
27809 | + int ret; | |
27810 | + u16 set_id; | |
27811 | + u16 cnum, pmc_type, max_pmc; | |
27812 | + u32 flags, expert; | |
27813 | + pfm_pmc_check_t wr_func; | |
27814 | + | |
27815 | + active_set = ctx->active_set; | |
27816 | + | |
27817 | + wr_func = pfm_pmu_conf->pmc_write_check; | |
27818 | + max_pmc = ctx->regs.max_pmc; | |
27819 | + impl_pmcs = cast_ulp(ctx->regs.pmcs); | |
27820 | + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
27821 | + | |
27822 | + expert = pfm_controls.flags & PFM_CTRL_FL_RW_EXPERT; | |
27823 | + | |
27824 | + can_access_pmu = 0; | |
27825 | + | |
27826 | + /* | |
27827 | + * we cannot access the actual PMC registers when monitoring is masked | |
27828 | + */ | |
27829 | + if (unlikely(ctx->state == PFM_CTX_LOADED)) | |
27830 | + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task | |
27831 | + || ctx->flags.system; | |
27832 | + | |
27833 | + ret = -EINVAL; | |
27834 | + | |
27835 | + for (i = 0; i < count; i++, req++) { | |
27836 | + | |
27837 | + cnum = req->reg_num; | |
27838 | + set_id = req->reg_set; | |
27839 | + value = req->reg_value; | |
27840 | + flags = req->reg_flags; | |
27841 | + | |
27842 | + /* | |
27843 | + * no access to unavailable PMC register | |
27844 | + */ | |
27845 | + if (unlikely(is_invalid(cnum, impl_pmcs, max_pmc))) { | |
27846 | + PFM_DBG("pmc%u is not available", cnum); | |
27847 | + goto error; | |
27848 | + } | |
27849 | + | |
27850 | + pmc_type = pfm_pmu_conf->pmc_desc[cnum].type; | |
27851 | + dfl_val = pfm_pmu_conf->pmc_desc[cnum].dfl_val; | |
27852 | + rsvd_msk = pfm_pmu_conf->pmc_desc[cnum].rsvd_msk; | |
27853 | + | |
27854 | + /* | |
27855 | + * ensure only valid flags are set | |
27856 | + */ | |
27857 | + if (flags & ~PFM_REGFL_PMC_ALL) { | |
27858 | + PFM_DBG("pmc%u: invalid flags=0x%x", cnum, flags); | |
27859 | + goto error; | |
27860 | + } | |
27861 | + | |
27862 | + /* | |
27863 | + * locate event set | |
27864 | + */ | |
27865 | + if (set_id != set->id) { | |
27866 | + set = pfm_find_set(ctx, set_id, 0); | |
27867 | + if (set == NULL) { | |
27868 | + PFM_DBG("event set%u does not exist", | |
27869 | + set_id); | |
27870 | + goto error; | |
27871 | + } | |
27872 | + } | |
27873 | + | |
27874 | + /* | |
27875 | + * set reserved bits to default values | |
27876 | + * (reserved bits must be 1 in rsvd_msk) | |
27877 | + * | |
27878 | + * bypass via /sys/kernel/perfmon/mode = 1 | |
27879 | + */ | |
27880 | + if (likely(!expert)) | |
27881 | + value = (value & ~rsvd_msk) | (dfl_val & rsvd_msk); | |
27882 | + | |
27883 | + if (flags & PFM_REGFL_NO_EMUL64) { | |
27884 | + if (!(pmc_type & PFM_REG_NO64)) { | |
27885 | + PFM_DBG("pmc%u no support for " | |
27886 | + "PFM_REGFL_NO_EMUL64", cnum); | |
27887 | + goto error; | |
27888 | + } | |
27889 | + value &= ~pfm_pmu_conf->pmc_desc[cnum].no_emul64_msk; | |
27890 | + } | |
27891 | + | |
27892 | + /* | |
27893 | + * execute write checker, if any | |
27894 | + */ | |
27895 | + if (likely(wr_func && (pmc_type & PFM_REG_WC))) { | |
27896 | + req->reg_value = value; | |
27897 | + ret = (*wr_func)(ctx, set, req); | |
27898 | + if (ret) | |
27899 | + goto error; | |
27900 | + value = req->reg_value; | |
27901 | + } | |
27902 | + | |
27903 | + /* | |
27904 | + * Now we commit the changes | |
27905 | + */ | |
27906 | + | |
27907 | + /* | |
27908 | + * mark PMC register as used | |
27909 | + * We do not track associated PMC register based on | |
27910 | + * the fact that they will likely need to be written | |
27911 | + * in order to become useful at which point the statement | |
27912 | + * below will catch that. | |
27913 | + * | |
27914 | + * The used_pmcs bitmask is only useful on architectures where | |
27915 | + * the PMC needs to be modified for particular bits, especially | |
27916 | + * on overflow or to stop/start. | |
27917 | + */ | |
27918 | + if (!test_bit(cnum, cast_ulp(set->used_pmcs))) { | |
27919 | + __set_bit(cnum, cast_ulp(set->used_pmcs)); | |
27920 | + set->nused_pmcs++; | |
27921 | + } | |
27922 | + | |
27923 | + set->pmcs[cnum] = value; | |
27924 | + | |
27925 | + if (set == active_set) { | |
27926 | + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; | |
27927 | + if (can_access_pmu) | |
27928 | + pfm_arch_write_pmc(ctx, cnum, value); | |
27929 | + } | |
27930 | + | |
27931 | + PFM_DBG("set%u pmc%u=0x%llx a_pmu=%d " | |
27932 | + "u_pmcs=0x%llx nu_pmcs=%u", | |
27933 | + set->id, | |
27934 | + cnum, | |
27935 | + (unsigned long long)value, | |
27936 | + can_access_pmu, | |
27937 | + (unsigned long long)set->used_pmcs[0], | |
27938 | + set->nused_pmcs); | |
27939 | + } | |
27940 | + ret = 0; | |
27941 | +error: | |
27942 | + /* | |
27943 | + * make sure the changes are visible | |
27944 | + */ | |
27945 | + if (can_access_pmu) | |
27946 | + pfm_arch_serialize(); | |
27947 | + | |
27948 | + return ret; | |
27949 | +} | |
27950 | + | |
27951 | +/** | |
27952 | + * __pfm_read_pmds - read data registers | |
27953 | + * @ctx: context to operate on | |
27954 | + * @req: pfarg_pmd_t request from user | |
27955 | + * @count: number of element in the pfarg_pmd_t vector | |
27956 | + * | |
27957 | + * | |
27958 | + * The function succeeds whether the context is attached or not. | |
27959 | + * When attached to another thread, that thread must be stopped. | |
27960 | + * | |
27961 | + * The context is locked and interrupts are disabled. | |
27962 | + */ | |
27963 | +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count) | |
27964 | +{ | |
27965 | + u64 val = 0, lval, ovfl_mask, hw_val; | |
27966 | + u64 sw_cnt; | |
27967 | + unsigned long *impl_pmds; | |
27968 | + struct pfm_event_set *set, *active_set; | |
27969 | + int i, ret, can_access_pmu = 0; | |
27970 | + u16 cnum, pmd_type, set_id, max_pmd; | |
27971 | + | |
27972 | + ovfl_mask = pfm_pmu_conf->ovfl_mask; | |
27973 | + impl_pmds = cast_ulp(ctx->regs.pmds); | |
27974 | + max_pmd = ctx->regs.max_pmd; | |
27975 | + active_set = ctx->active_set; | |
27976 | + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
27977 | + | |
27978 | + if (likely(ctx->state == PFM_CTX_LOADED)) { | |
27979 | + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task | |
27980 | + || ctx->flags.system; | |
27981 | + | |
27982 | + if (can_access_pmu) | |
27983 | + pfm_arch_serialize(); | |
27984 | + } | |
27985 | + | |
27986 | + /* | |
27987 | + * on both UP and SMP, we can only read the PMD from the hardware | |
27988 | + * register when the task is the owner of the local PMU. | |
27989 | + */ | |
27990 | + ret = -EINVAL; | |
27991 | + for (i = 0; i < count; i++, req++) { | |
27992 | + | |
27993 | + cnum = req->reg_num; | |
27994 | + set_id = req->reg_set; | |
27995 | + | |
27996 | + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) { | |
27997 | + PFM_DBG("pmd%u is not implemented/unaccessible", cnum); | |
27998 | + goto error; | |
27999 | + } | |
28000 | + | |
28001 | + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type; | |
28002 | + | |
28003 | + /* | |
28004 | + * locate event set | |
28005 | + */ | |
28006 | + if (set_id != set->id) { | |
28007 | + set = pfm_find_set(ctx, set_id, 0); | |
28008 | + if (set == NULL) { | |
28009 | + PFM_DBG("event set%u does not exist", | |
28010 | + set_id); | |
28011 | + goto error; | |
28012 | + } | |
28013 | + } | |
28014 | + /* | |
28015 | + * it is not possible to read a PMD which was not requested: | |
28016 | + * - explicitly written via pfm_write_pmds() | |
28017 | + * - provided as a reg_smpl_pmds[] to another PMD during | |
28018 | + * pfm_write_pmds() | |
28019 | + * | |
28020 | + * This is motivated by security and for optimization purposes: | |
28021 | + * - on context switch restore, we can restore only what | |
28022 | + * we use (except when regs directly readable at user | |
28023 | + * level, e.g., IA-64 self-monitoring, I386 RDPMC). | |
28024 | + * - do not need to maintain PMC -> PMD dependencies | |
28025 | + */ | |
28026 | + if (unlikely(!test_bit(cnum, cast_ulp(set->used_pmds)))) { | |
28027 | + PFM_DBG("pmd%u cannot read, because not used", cnum); | |
28028 | + goto error; | |
28029 | + } | |
28030 | + | |
28031 | + val = set->pmds[cnum].value; | |
28032 | + lval = set->pmds[cnum].lval; | |
28033 | + | |
28034 | + /* | |
28035 | + * extract remaining ovfl to switch | |
28036 | + */ | |
28037 | + sw_cnt = set->pmds[cnum].ovflsw_thres; | |
28038 | + | |
28039 | + /* | |
28040 | + * If the task is not the current one, then we check if the | |
28041 | + * PMU state is still in the local live register due to lazy | |
28042 | + * ctxsw. If true, then we read directly from the registers. | |
28043 | + */ | |
28044 | + if (set == active_set && can_access_pmu) { | |
28045 | + hw_val = pfm_read_pmd(ctx, cnum); | |
28046 | + if (pmd_type & PFM_REG_C64) | |
28047 | + val = (val & ~ovfl_mask) | (hw_val & ovfl_mask); | |
28048 | + else | |
28049 | + val = hw_val; | |
28050 | + } | |
28051 | + | |
28052 | + PFM_DBG("set%u pmd%u=0x%llx sw_thr=%llu lval=0x%llx", | |
28053 | + set->id, | |
28054 | + cnum, | |
28055 | + (unsigned long long)val, | |
28056 | + (unsigned long long)sw_cnt, | |
28057 | + (unsigned long long)lval); | |
28058 | + | |
28059 | + req->reg_value = val; | |
28060 | + req->reg_last_reset_val = lval; | |
28061 | + req->reg_ovfl_switch_cnt = sw_cnt; | |
28062 | + } | |
28063 | + ret = 0; | |
28064 | +error: | |
28065 | + return ret; | |
28066 | +} | |
28067 | --- /dev/null | |
28068 | +++ b/perfmon/perfmon_sets.c | |
28069 | @@ -0,0 +1,873 @@ | |
28070 | +/* | |
28071 | + * perfmon_sets.c: perfmon2 event sets and multiplexing functions | |
28072 | + * | |
28073 | + * This file implements the perfmon2 interface which | |
28074 | + * provides access to the hardware performance counters | |
28075 | + * of the host processor. | |
28076 | + * | |
28077 | + * The initial version of perfmon.c was written by | |
28078 | + * Ganesh Venkitachalam, IBM Corp. | |
28079 | + * | |
28080 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
28081 | + * David Mosberger, Hewlett Packard Co. | |
28082 | + * | |
28083 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
28084 | + * by Stephane Eranian, Hewlett Packard Co. | |
28085 | + * | |
28086 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
28087 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
28088 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
28089 | + * | |
28090 | + * More information about perfmon available at: | |
28091 | + * http://perfmon2.sf.net | |
28092 | + * | |
28093 | + * This program is free software; you can redistribute it and/or | |
28094 | + * modify it under the terms of version 2 of the GNU General Public | |
28095 | + * License as published by the Free Software Foundation. | |
28096 | + * | |
28097 | + * This program is distributed in the hope that it will be useful, | |
28098 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
28099 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
28100 | + * General Public License for more details. | |
28101 | + * | |
28102 | + * You should have received a copy of the GNU General Public License | |
28103 | + * along with this program; if not, write to the Free Software | |
28104 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
28105 | + * 02111-1307 USA | |
28106 | + */ | |
28107 | +#include <linux/kernel.h> | |
28108 | +#include <linux/perfmon_kern.h> | |
28109 | +#include "perfmon_priv.h" | |
28110 | + | |
28111 | +static struct kmem_cache *pfm_set_cachep; | |
28112 | + | |
28113 | +/** | |
28114 | + * pfm_reload_switch_thresholds - reload overflow-based switch thresholds per set | |
28115 | + * @set: the set for which to reload thresholds | |
28116 | + * | |
28117 | + */ | |
28118 | +static void pfm_reload_switch_thresholds(struct pfm_context *ctx, | |
28119 | + struct pfm_event_set *set) | |
28120 | +{ | |
28121 | + u64 *used_pmds; | |
28122 | + u16 i, max, first; | |
28123 | + | |
28124 | + used_pmds = set->used_pmds; | |
28125 | + first = ctx->regs.first_intr_pmd; | |
28126 | + max = ctx->regs.max_intr_pmd; | |
28127 | + | |
28128 | + for (i = first; i < max; i++) { | |
28129 | + if (test_bit(i, cast_ulp(used_pmds))) { | |
28130 | + set->pmds[i].ovflsw_thres = set->pmds[i].ovflsw_ref_thres; | |
28131 | + | |
28132 | + PFM_DBG("set%u pmd%u ovflsw_thres=%llu", | |
28133 | + set->id, | |
28134 | + i, | |
28135 | + (unsigned long long)set->pmds[i].ovflsw_thres); | |
28136 | + } | |
28137 | + } | |
28138 | +} | |
28139 | + | |
28140 | +/** | |
28141 | + * pfm_prepare_sets - initialize sets on pfm_load_context | |
28142 | + * @ctx : context to operate on | |
28143 | + * @load_set: set to activate first | |
28144 | + * | |
28145 | + * connect all sets, reset internal fields | |
28146 | + */ | |
28147 | +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set) | |
28148 | +{ | |
28149 | + struct pfm_event_set *set, *p; | |
28150 | + u16 max; | |
28151 | + | |
28152 | + /* | |
28153 | + * locate first set to activate | |
28154 | + */ | |
28155 | + set = pfm_find_set(ctx, load_set, 0); | |
28156 | + if (!set) | |
28157 | + return NULL; | |
28158 | + | |
28159 | + if (set->flags & PFM_SETFL_OVFL_SWITCH) | |
28160 | + pfm_reload_switch_thresholds(ctx, set); | |
28161 | + | |
28162 | + max = ctx->regs.max_intr_pmd; | |
28163 | + | |
28164 | + list_for_each_entry(p, &ctx->set_list, list) { | |
28165 | + /* | |
28166 | + * cleanup bitvectors | |
28167 | + */ | |
28168 | + bitmap_zero(cast_ulp(p->ovfl_pmds), max); | |
28169 | + bitmap_zero(cast_ulp(p->povfl_pmds), max); | |
28170 | + | |
28171 | + p->npend_ovfls = 0; | |
28172 | + | |
28173 | + /* | |
28174 | + * we cannot just use plain clear because of arch-specific flags | |
28175 | + */ | |
28176 | + p->priv_flags &= ~(PFM_SETFL_PRIV_MOD_BOTH|PFM_SETFL_PRIV_SWITCH); | |
28177 | + /* | |
28178 | + * neither duration nor runs are reset because typically loading/unloading | |
28179 | + * does not mean counts are reset. To reset, the set must be modified | |
28180 | + */ | |
28181 | + } | |
28182 | + return set; | |
28183 | +} | |
28184 | + | |
28185 | +/* | |
28186 | + * called by hrtimer_interrupt() | |
28187 | + * | |
28188 | + * This is the only function where we come with | |
28189 | + * cpu_base->lock held before ctx->lock | |
28190 | + * | |
28191 | + * interrupts are disabled | |
28192 | + */ | |
28193 | +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t) | |
28194 | +{ | |
28195 | + struct pfm_event_set *set; | |
28196 | + struct pfm_context *ctx; | |
28197 | + unsigned long flags; | |
28198 | + enum hrtimer_restart ret = HRTIMER_NORESTART; | |
28199 | + | |
28200 | + /* | |
28201 | + * prevent against race with unload | |
28202 | + */ | |
28203 | + ctx = __get_cpu_var(pmu_ctx); | |
28204 | + if (!ctx) | |
28205 | + return HRTIMER_NORESTART; | |
28206 | + | |
28207 | + spin_lock_irqsave(&ctx->lock, flags); | |
28208 | + | |
28209 | + set = ctx->active_set; | |
28210 | + | |
28211 | + /* | |
28212 | + * switching occurs only when context is attached | |
28213 | + */ | |
28214 | + if (ctx->state != PFM_CTX_LOADED) | |
28215 | + goto done; | |
28216 | + /* | |
28217 | + * timer does not run while monitoring is inactive (not started) | |
28218 | + */ | |
28219 | + if (!pfm_arch_is_active(ctx)) | |
28220 | + goto done; | |
28221 | + | |
28222 | + pfm_stats_inc(handle_timeout_count); | |
28223 | + | |
28224 | + ret = pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_SHORT, 0); | |
28225 | +done: | |
28226 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
28227 | + return ret; | |
28228 | +} | |
28229 | + | |
28230 | +/* | |
28231 | + * | |
28232 | + * always operating on the current task | |
28233 | + * interrupts are masked | |
28234 | + * | |
28235 | + * input: | |
28236 | + * - new_set: new set to switch to, if NULL follow normal chain | |
28237 | + */ | |
28238 | +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx, | |
28239 | + struct pfm_event_set *new_set, | |
28240 | + int reset_mode, | |
28241 | + int no_restart) | |
28242 | +{ | |
28243 | + struct pfm_event_set *set; | |
28244 | + u64 now, end; | |
28245 | + u32 new_flags; | |
28246 | + int is_system, is_active, nn; | |
28247 | + enum hrtimer_restart ret = HRTIMER_NORESTART; | |
28248 | + | |
28249 | + now = sched_clock(); | |
28250 | + set = ctx->active_set; | |
28251 | + is_active = pfm_arch_is_active(ctx); | |
28252 | + | |
28253 | + /* | |
28254 | + * if no set is explicitly requested, | |
28255 | + * use the set_switch_next field | |
28256 | + */ | |
28257 | + if (!new_set) { | |
28258 | + /* | |
28259 | + * we use round-robin unless the user specified | |
28260 | + * a particular set to go to. | |
28261 | + */ | |
28262 | + new_set = list_first_entry(&set->list, struct pfm_event_set, list); | |
28263 | + if (&new_set->list == &ctx->set_list) | |
28264 | + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
28265 | + } | |
28266 | + | |
28267 | + PFM_DBG_ovfl("state=%d act=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u " | |
28268 | + "next_runs=%llu new_npend=%d reset_mode=%d reset_pmds=%llx", | |
28269 | + ctx->state, | |
28270 | + is_active, | |
28271 | + set->id, | |
28272 | + (unsigned long long)set->runs, | |
28273 | + set->npend_ovfls, | |
28274 | + new_set->id, | |
28275 | + (unsigned long long)new_set->runs, | |
28276 | + new_set->npend_ovfls, | |
28277 | + reset_mode, | |
28278 | + (unsigned long long)new_set->reset_pmds[0]); | |
28279 | + | |
28280 | + is_system = ctx->flags.system; | |
28281 | + new_flags = new_set->flags; | |
28282 | + | |
28283 | + /* | |
28284 | + * nothing more to do | |
28285 | + */ | |
28286 | + if (new_set == set) | |
28287 | + goto skip_same_set; | |
28288 | + | |
28289 | + if (is_active) { | |
28290 | + pfm_arch_stop(current, ctx); | |
28291 | + pfm_save_pmds(ctx, set); | |
28292 | + /* | |
28293 | + * compute elapsed ns for active set | |
28294 | + */ | |
28295 | + set->duration += now - set->duration_start; | |
28296 | + } | |
28297 | + | |
28298 | + pfm_arch_restore_pmds(ctx, new_set); | |
28299 | + /* | |
28300 | + * if masked, we must restore the pmcs such that they | |
28301 | + * do not capture anything. | |
28302 | + */ | |
28303 | + pfm_arch_restore_pmcs(ctx, new_set); | |
28304 | + | |
28305 | + if (new_set->npend_ovfls) { | |
28306 | + pfm_arch_resend_irq(ctx); | |
28307 | + pfm_stats_inc(ovfl_intr_replay_count); | |
28308 | + } | |
28309 | + | |
28310 | + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; | |
28311 | + | |
28312 | +skip_same_set: | |
28313 | + new_set->runs++; | |
28314 | + /* | |
28315 | + * reset switch threshold | |
28316 | + */ | |
28317 | + if (new_flags & PFM_SETFL_OVFL_SWITCH) | |
28318 | + pfm_reload_switch_thresholds(ctx, new_set); | |
28319 | + | |
28320 | + /* | |
28321 | + * reset overflowed PMD registers in new set | |
28322 | + */ | |
28323 | + nn = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd); | |
28324 | + if (nn) | |
28325 | + pfm_reset_pmds(ctx, new_set, nn, reset_mode); | |
28326 | + | |
28327 | + | |
28328 | + /* | |
28329 | + * This is needed when coming from pfm_start() | |
28330 | + * | |
28331 | + * When switching to the same set, there is no | |
28332 | + * need to restart | |
28333 | + */ | |
28334 | + if (no_restart) | |
28335 | + goto skip_restart; | |
28336 | + | |
28337 | + if (is_active) { | |
28338 | + /* | |
28339 | + * do not need to restart when same set | |
28340 | + */ | |
28341 | + if (new_set != set) { | |
28342 | + ctx->active_set = new_set; | |
28343 | + new_set->duration_start = now; | |
28344 | + pfm_arch_start(current, ctx); | |
28345 | + } | |
28346 | + /* | |
28347 | + * install new timeout if necessary | |
28348 | + */ | |
28349 | + if (new_flags & PFM_SETFL_TIME_SWITCH) { | |
28350 | + struct hrtimer *h; | |
28351 | + h = &__get_cpu_var(pfm_hrtimer); | |
28352 | + hrtimer_forward(h, h->base->get_time(), new_set->hrtimer_exp); | |
28353 | + new_set->hrtimer_rem = new_set->hrtimer_exp; | |
28354 | + ret = HRTIMER_RESTART; | |
28355 | + } | |
28356 | + } | |
28357 | + | |
28358 | +skip_restart: | |
28359 | + ctx->active_set = new_set; | |
28360 | + | |
28361 | + end = sched_clock(); | |
28362 | + | |
28363 | + pfm_stats_inc(set_switch_count); | |
28364 | + pfm_stats_add(set_switch_ns, end - now); | |
28365 | + | |
28366 | + return ret; | |
28367 | +} | |
28368 | + | |
28369 | +/* | |
28370 | + * called from __pfm_overflow_handler() to switch event sets. | |
28371 | + * monitoring is stopped, task is current, interrupts are masked. | |
28372 | + * compared to pfm_switch_sets(), this version is simplified because | |
28373 | + * it knows about the call path. There is no need to stop monitoring | |
28374 | + * because it is already frozen by PMU handler. | |
28375 | + */ | |
28376 | +void pfm_switch_sets_from_intr(struct pfm_context *ctx) | |
28377 | +{ | |
28378 | + struct pfm_event_set *set, *new_set; | |
28379 | + u64 now, end; | |
28380 | + u32 new_flags; | |
28381 | + int is_system, n; | |
28382 | + | |
28383 | + now = sched_clock(); | |
28384 | + set = ctx->active_set; | |
28385 | + new_set = list_first_entry(&set->list, struct pfm_event_set, list); | |
28386 | + if (&new_set->list == &ctx->set_list) | |
28387 | + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
28388 | + | |
28389 | + PFM_DBG_ovfl("state=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u " | |
28390 | + "next_runs=%llu new_npend=%d new_r_pmds=%llx", | |
28391 | + ctx->state, | |
28392 | + set->id, | |
28393 | + (unsigned long long)set->runs, | |
28394 | + set->npend_ovfls, | |
28395 | + new_set->id, | |
28396 | + (unsigned long long)new_set->runs, | |
28397 | + new_set->npend_ovfls, | |
28398 | + (unsigned long long)new_set->reset_pmds[0]); | |
28399 | + | |
28400 | + is_system = ctx->flags.system; | |
28401 | + new_flags = new_set->flags; | |
28402 | + | |
28403 | + /* | |
28404 | + * nothing more to do | |
28405 | + */ | |
28406 | + if (new_set == set) | |
28407 | + goto skip_same_set; | |
28408 | + | |
28409 | + /* | |
28410 | + * switch on intr only when set has OVFL_SWITCH | |
28411 | + */ | |
28412 | + BUG_ON(set->flags & PFM_SETFL_TIME_SWITCH); | |
28413 | + | |
28414 | + /* | |
28415 | + * when called from PMU intr handler, monitoring | |
28416 | + * is already stopped | |
28417 | + * | |
28418 | + * save current PMD registers, we use a special | |
28419 | + * form for performance reason. On some architectures, | |
28420 | + * such as x86, the pmds are already saved when entering | |
28421 | + * the PMU interrupt handler via pfm-arch_intr_freeze() | |
28422 | + * so we don't need to save them again. On the contrary, | |
28423 | + * on IA-64, they are not saved by freeze, thus we have to | |
28424 | + * to it here. | |
28425 | + */ | |
28426 | + pfm_arch_save_pmds_from_intr(ctx, set); | |
28427 | + | |
28428 | + /* | |
28429 | + * compute elapsed ns for active set | |
28430 | + */ | |
28431 | + set->duration += now - set->duration_start; | |
28432 | + | |
28433 | + pfm_arch_restore_pmds(ctx, new_set); | |
28434 | + | |
28435 | + /* | |
28436 | + * must not be restored active as we are still executing in the | |
28437 | + * PMU interrupt handler. activation is deferred to unfreeze PMU | |
28438 | + */ | |
28439 | + pfm_arch_restore_pmcs(ctx, new_set); | |
28440 | + | |
28441 | + /* | |
28442 | + * check for pending interrupt on incoming set. | |
28443 | + * interrupts are masked so handler call deferred | |
28444 | + */ | |
28445 | + if (new_set->npend_ovfls) { | |
28446 | + pfm_arch_resend_irq(ctx); | |
28447 | + pfm_stats_inc(ovfl_intr_replay_count); | |
28448 | + } | |
28449 | + /* | |
28450 | + * no need to restore anything, that is already done | |
28451 | + */ | |
28452 | + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; | |
28453 | + /* | |
28454 | + * reset duration counter | |
28455 | + */ | |
28456 | + new_set->duration_start = now; | |
28457 | + | |
28458 | +skip_same_set: | |
28459 | + new_set->runs++; | |
28460 | + | |
28461 | + /* | |
28462 | + * reset switch threshold | |
28463 | + */ | |
28464 | + if (new_flags & PFM_SETFL_OVFL_SWITCH) | |
28465 | + pfm_reload_switch_thresholds(ctx, new_set); | |
28466 | + | |
28467 | + /* | |
28468 | + * reset overflowed PMD registers | |
28469 | + */ | |
28470 | + n = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd); | |
28471 | + if (n) | |
28472 | + pfm_reset_pmds(ctx, new_set, n, PFM_PMD_RESET_SHORT); | |
28473 | + | |
28474 | + /* | |
28475 | + * XXX: isactive? | |
28476 | + * | |
28477 | + * Came here following a interrupt which triggered a switch, i.e., | |
28478 | + * previous set was using OVFL_SWITCH, thus we just need to arm | |
28479 | + * check if the next set is using timeout, and if so arm the timer. | |
28480 | + * | |
28481 | + * Timeout is always at least one tick away. No risk of having to | |
28482 | + * invoke the timeout handler right now. In any case, cb_mode is | |
28483 | + * set to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ such that hrtimer_start | |
28484 | + * will not try to wakeup the softirqd which could cause a locking | |
28485 | + * problem. | |
28486 | + */ | |
28487 | + if (new_flags & PFM_SETFL_TIME_SWITCH) { | |
28488 | + hrtimer_start(&__get_cpu_var(pfm_hrtimer), set->hrtimer_exp, HRTIMER_MODE_REL); | |
28489 | + PFM_DBG("armed new timeout for set%u", new_set->id); | |
28490 | + } | |
28491 | + | |
28492 | + ctx->active_set = new_set; | |
28493 | + | |
28494 | + end = sched_clock(); | |
28495 | + | |
28496 | + pfm_stats_inc(set_switch_count); | |
28497 | + pfm_stats_add(set_switch_ns, end - now); | |
28498 | +} | |
28499 | + | |
28500 | + | |
28501 | +static int pfm_setfl_sane(struct pfm_context *ctx, u32 flags) | |
28502 | +{ | |
28503 | +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH) | |
28504 | + int ret; | |
28505 | + | |
28506 | + ret = pfm_arch_setfl_sane(ctx, flags); | |
28507 | + if (ret) | |
28508 | + return ret; | |
28509 | + | |
28510 | + if ((flags & PFM_SETFL_BOTH_SWITCH) == PFM_SETFL_BOTH_SWITCH) { | |
28511 | + PFM_DBG("both switch ovfl and switch time are set"); | |
28512 | + return -EINVAL; | |
28513 | + } | |
28514 | + return 0; | |
28515 | +} | |
28516 | + | |
28517 | +/* | |
28518 | + * it is never possible to change the identification of an existing set | |
28519 | + */ | |
28520 | +static int pfm_change_evtset(struct pfm_context *ctx, | |
28521 | + struct pfm_event_set *set, | |
28522 | + struct pfarg_setdesc *req) | |
28523 | +{ | |
28524 | + struct timeval tv; | |
28525 | + struct timespec ts; | |
28526 | + ktime_t kt; | |
28527 | + long d, res_ns; | |
28528 | + s32 rem; | |
28529 | + u32 flags; | |
28530 | + int ret; | |
28531 | + u16 set_id; | |
28532 | + | |
28533 | + BUG_ON(ctx->state == PFM_CTX_LOADED); | |
28534 | + | |
28535 | + set_id = req->set_id; | |
28536 | + flags = req->set_flags; | |
28537 | + | |
28538 | + ret = pfm_setfl_sane(ctx, flags); | |
28539 | + if (ret) { | |
28540 | + PFM_DBG("invalid flags 0x%x set %u", flags, set_id); | |
28541 | + return -EINVAL; | |
28542 | + } | |
28543 | + | |
28544 | + /* | |
28545 | + * compute timeout value | |
28546 | + */ | |
28547 | + if (flags & PFM_SETFL_TIME_SWITCH) { | |
28548 | + /* | |
28549 | + * timeout value of zero is illegal | |
28550 | + */ | |
28551 | + if (req->set_timeout == 0) { | |
28552 | + PFM_DBG("invalid timeout 0"); | |
28553 | + return -EINVAL; | |
28554 | + } | |
28555 | + | |
28556 | + hrtimer_get_res(CLOCK_MONOTONIC, &ts); | |
28557 | + res_ns = (long)ktime_to_ns(timespec_to_ktime(ts)); | |
28558 | + | |
28559 | + /* | |
28560 | + * round-up to multiple of clock resolution | |
28561 | + * timeout = ((req->set_timeout+res_ns-1)/res_ns)*res_ns; | |
28562 | + * | |
28563 | + * u64 division missing on 32-bit arch, so use div_s64_rem | |
28564 | + */ | |
28565 | + d = div_s64_rem(req->set_timeout, res_ns, &rem); | |
28566 | + | |
28567 | + PFM_DBG("set%u flags=0x%x req_timeout=%lluns " | |
28568 | + "HZ=%u TICK_NSEC=%lu clock_res=%ldns rem=%dns", | |
28569 | + set_id, | |
28570 | + flags, | |
28571 | + (unsigned long long)req->set_timeout, | |
28572 | + HZ, TICK_NSEC, | |
28573 | + res_ns, | |
28574 | + rem); | |
28575 | + | |
28576 | + /* | |
28577 | + * Only accept timeout, we can actually achieve. | |
28578 | + * users can invoke clock_getres(CLOCK_MONOTONIC) | |
28579 | + * to figure out resolution and adjust timeout | |
28580 | + */ | |
28581 | + if (rem) { | |
28582 | + PFM_DBG("set%u invalid timeout=%llu", | |
28583 | + set_id, | |
28584 | + (unsigned long long)req->set_timeout); | |
28585 | + return -EINVAL; | |
28586 | + } | |
28587 | + | |
28588 | + tv = ns_to_timeval(req->set_timeout); | |
28589 | + kt = timeval_to_ktime(tv); | |
28590 | + set->hrtimer_exp = kt; | |
28591 | + } else { | |
28592 | + set->hrtimer_exp = ktime_set(0, 0); | |
28593 | + } | |
28594 | + | |
28595 | + /* | |
28596 | + * commit changes | |
28597 | + */ | |
28598 | + set->id = set_id; | |
28599 | + set->flags = flags; | |
28600 | + set->priv_flags = 0; | |
28601 | + | |
28602 | + /* | |
28603 | + * activation and duration counters are reset as | |
28604 | + * most likely major things will change in the set | |
28605 | + */ | |
28606 | + set->runs = 0; | |
28607 | + set->duration = 0; | |
28608 | + | |
28609 | + return 0; | |
28610 | +} | |
28611 | + | |
28612 | +/* | |
28613 | + * this function does not modify the next field | |
28614 | + */ | |
28615 | +static void pfm_initialize_set(struct pfm_context *ctx, | |
28616 | + struct pfm_event_set *set) | |
28617 | +{ | |
28618 | + u64 *impl_pmcs; | |
28619 | + u16 i, max_pmc; | |
28620 | + | |
28621 | + max_pmc = ctx->regs.max_pmc; | |
28622 | + impl_pmcs = ctx->regs.pmcs; | |
28623 | + | |
28624 | + /* | |
28625 | + * install default values for all PMC registers | |
28626 | + */ | |
28627 | + for (i = 0; i < max_pmc; i++) { | |
28628 | + if (test_bit(i, cast_ulp(impl_pmcs))) { | |
28629 | + set->pmcs[i] = pfm_pmu_conf->pmc_desc[i].dfl_val; | |
28630 | + PFM_DBG("set%u pmc%u=0x%llx", | |
28631 | + set->id, | |
28632 | + i, | |
28633 | + (unsigned long long)set->pmcs[i]); | |
28634 | + } | |
28635 | + } | |
28636 | + | |
28637 | + /* | |
28638 | + * PMD registers are set to 0 when the event set is allocated, | |
28639 | + * hence we do not need to explicitly initialize them. | |
28640 | + * | |
28641 | + * For virtual PMD registers (i.e., those tied to a SW resource) | |
28642 | + * their value becomes meaningful once the context is attached. | |
28643 | + */ | |
28644 | +} | |
28645 | + | |
28646 | +/* | |
28647 | + * look for an event set using its identification. If the set does not | |
28648 | + * exist: | |
28649 | + * - if alloc == 0 then return error | |
28650 | + * - if alloc == 1 then allocate set | |
28651 | + * | |
28652 | + * alloc is one ONLY when coming from pfm_create_evtsets() which can only | |
28653 | + * be called when the context is detached, i.e. monitoring is stopped. | |
28654 | + */ | |
28655 | +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, int alloc) | |
28656 | +{ | |
28657 | + struct pfm_event_set *set = NULL, *prev, *new_set; | |
28658 | + | |
28659 | + PFM_DBG("looking for set=%u", set_id); | |
28660 | + | |
28661 | + prev = NULL; | |
28662 | + list_for_each_entry(set, &ctx->set_list, list) { | |
28663 | + if (set->id == set_id) | |
28664 | + return set; | |
28665 | + if (set->id > set_id) | |
28666 | + break; | |
28667 | + prev = set; | |
28668 | + } | |
28669 | + | |
28670 | + if (!alloc) | |
28671 | + return NULL; | |
28672 | + | |
28673 | + /* | |
28674 | + * we are holding the context spinlock and interrupts | |
28675 | + * are unmasked. We must use GFP_ATOMIC as we cannot | |
28676 | + * sleep while holding a spin lock. | |
28677 | + */ | |
28678 | + new_set = kmem_cache_zalloc(pfm_set_cachep, GFP_ATOMIC); | |
28679 | + if (!new_set) | |
28680 | + return NULL; | |
28681 | + | |
28682 | + new_set->id = set_id; | |
28683 | + | |
28684 | + INIT_LIST_HEAD(&new_set->list); | |
28685 | + | |
28686 | + if (prev == NULL) { | |
28687 | + list_add(&(new_set->list), &ctx->set_list); | |
28688 | + } else { | |
28689 | + PFM_DBG("add after set=%u", prev->id); | |
28690 | + list_add(&(new_set->list), &prev->list); | |
28691 | + } | |
28692 | + return new_set; | |
28693 | +} | |
28694 | + | |
28695 | +/** | |
28696 | + * pfm_create_initial_set - create initial set from __pfm_c reate_context | |
28697 | + * @ctx: context to atatched the set to | |
28698 | + */ | |
28699 | +int pfm_create_initial_set(struct pfm_context *ctx) | |
28700 | +{ | |
28701 | + struct pfm_event_set *set; | |
28702 | + | |
28703 | + /* | |
28704 | + * create initial set0 | |
28705 | + */ | |
28706 | + if (!pfm_find_set(ctx, 0, 1)) | |
28707 | + return -ENOMEM; | |
28708 | + | |
28709 | + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); | |
28710 | + | |
28711 | + pfm_initialize_set(ctx, set); | |
28712 | + | |
28713 | + return 0; | |
28714 | +} | |
28715 | + | |
28716 | +/* | |
28717 | + * context is unloaded for this command. Interrupts are enabled | |
28718 | + */ | |
28719 | +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req, | |
28720 | + int count) | |
28721 | +{ | |
28722 | + struct pfm_event_set *set; | |
28723 | + u16 set_id; | |
28724 | + int i, ret; | |
28725 | + | |
28726 | + for (i = 0; i < count; i++, req++) { | |
28727 | + set_id = req->set_id; | |
28728 | + | |
28729 | + PFM_DBG("set_id=%u", set_id); | |
28730 | + | |
28731 | + set = pfm_find_set(ctx, set_id, 1); | |
28732 | + if (set == NULL) | |
28733 | + goto error_mem; | |
28734 | + | |
28735 | + ret = pfm_change_evtset(ctx, set, req); | |
28736 | + if (ret) | |
28737 | + goto error_params; | |
28738 | + | |
28739 | + pfm_initialize_set(ctx, set); | |
28740 | + } | |
28741 | + return 0; | |
28742 | +error_mem: | |
28743 | + PFM_DBG("cannot allocate set %u", set_id); | |
28744 | + return -ENOMEM; | |
28745 | +error_params: | |
28746 | + return ret; | |
28747 | +} | |
28748 | + | |
28749 | +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req, | |
28750 | + int count) | |
28751 | +{ | |
28752 | + struct pfm_event_set *set; | |
28753 | + int i, is_system, is_loaded, is_self, ret; | |
28754 | + u16 set_id; | |
28755 | + u64 end; | |
28756 | + | |
28757 | + end = sched_clock(); | |
28758 | + | |
28759 | + is_system = ctx->flags.system; | |
28760 | + is_loaded = ctx->state == PFM_CTX_LOADED; | |
28761 | + is_self = ctx->task == current || is_system; | |
28762 | + | |
28763 | + ret = -EINVAL; | |
28764 | + for (i = 0; i < count; i++, req++) { | |
28765 | + | |
28766 | + set_id = req->set_id; | |
28767 | + | |
28768 | + list_for_each_entry(set, &ctx->set_list, list) { | |
28769 | + if (set->id == set_id) | |
28770 | + goto found; | |
28771 | + if (set->id > set_id) | |
28772 | + goto error; | |
28773 | + } | |
28774 | +found: | |
28775 | + req->set_flags = set->flags; | |
28776 | + | |
28777 | + /* | |
28778 | + * compute leftover timeout | |
28779 | + * | |
28780 | + * lockdep may complain about lock inversion | |
28781 | + * because of get_remaining() however, this | |
28782 | + * applies to self-montoring only, thus the | |
28783 | + * thread cannot be in the timeout handler | |
28784 | + * and here at the same time given that we | |
28785 | + * run with interrupts disabled | |
28786 | + */ | |
28787 | + if (is_loaded && is_self) { | |
28788 | + struct hrtimer *h; | |
28789 | + h = &__get_cpu_var(pfm_hrtimer); | |
28790 | + req->set_timeout = ktime_to_ns(hrtimer_get_remaining(h)); | |
28791 | + } else { | |
28792 | + /* | |
28793 | + * hrtimer_rem zero when not using | |
28794 | + * timeout-based switching | |
28795 | + */ | |
28796 | + req->set_timeout = ktime_to_ns(set->hrtimer_rem); | |
28797 | + } | |
28798 | + | |
28799 | + req->set_runs = set->runs; | |
28800 | + req->set_act_duration = set->duration; | |
28801 | + | |
28802 | + /* | |
28803 | + * adjust for active set if needed | |
28804 | + */ | |
28805 | + if (is_system && is_loaded && ctx->flags.started | |
28806 | + && set == ctx->active_set) | |
28807 | + req->set_act_duration += end - set->duration_start; | |
28808 | + | |
28809 | + /* | |
28810 | + * copy the list of pmds which last overflowed | |
28811 | + */ | |
28812 | + bitmap_copy(cast_ulp(req->set_ovfl_pmds), | |
28813 | + cast_ulp(set->ovfl_pmds), | |
28814 | + PFM_MAX_PMDS); | |
28815 | + | |
28816 | + /* | |
28817 | + * copy bitmask of available PMU registers | |
28818 | + * | |
28819 | + * must copy over the entire vector to avoid | |
28820 | + * returning bogus upper bits pass by user | |
28821 | + */ | |
28822 | + bitmap_copy(cast_ulp(req->set_avail_pmcs), | |
28823 | + cast_ulp(ctx->regs.pmcs), | |
28824 | + PFM_MAX_PMCS); | |
28825 | + | |
28826 | + bitmap_copy(cast_ulp(req->set_avail_pmds), | |
28827 | + cast_ulp(ctx->regs.pmds), | |
28828 | + PFM_MAX_PMDS); | |
28829 | + | |
28830 | + PFM_DBG("set%u flags=0x%x eff_usec=%llu runs=%llu " | |
28831 | + "a_pmcs=0x%llx a_pmds=0x%llx", | |
28832 | + set_id, | |
28833 | + set->flags, | |
28834 | + (unsigned long long)req->set_timeout, | |
28835 | + (unsigned long long)set->runs, | |
28836 | + (unsigned long long)ctx->regs.pmcs[0], | |
28837 | + (unsigned long long)ctx->regs.pmds[0]); | |
28838 | + } | |
28839 | + ret = 0; | |
28840 | +error: | |
28841 | + return ret; | |
28842 | +} | |
28843 | + | |
28844 | +/* | |
28845 | + * context is unloaded for this command. Interrupts are enabled | |
28846 | + */ | |
28847 | +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count) | |
28848 | +{ | |
28849 | + struct pfarg_setdesc *req = arg; | |
28850 | + struct pfm_event_set *set; | |
28851 | + u16 set_id; | |
28852 | + int i, ret; | |
28853 | + | |
28854 | + ret = -EINVAL; | |
28855 | + for (i = 0; i < count; i++, req++) { | |
28856 | + set_id = req->set_id; | |
28857 | + | |
28858 | + list_for_each_entry(set, &ctx->set_list, list) { | |
28859 | + if (set->id == set_id) | |
28860 | + goto found; | |
28861 | + if (set->id > set_id) | |
28862 | + goto error; | |
28863 | + } | |
28864 | + goto error; | |
28865 | +found: | |
28866 | + /* | |
28867 | + * clear active set if necessary. | |
28868 | + * will be updated when context is loaded | |
28869 | + */ | |
28870 | + if (set == ctx->active_set) | |
28871 | + ctx->active_set = NULL; | |
28872 | + | |
28873 | + list_del(&set->list); | |
28874 | + | |
28875 | + kmem_cache_free(pfm_set_cachep, set); | |
28876 | + | |
28877 | + PFM_DBG("set%u deleted", set_id); | |
28878 | + } | |
28879 | + ret = 0; | |
28880 | +error: | |
28881 | + return ret; | |
28882 | +} | |
28883 | + | |
28884 | +/* | |
28885 | + * called from pfm_context_free() to free all sets | |
28886 | + */ | |
28887 | +void pfm_free_sets(struct pfm_context *ctx) | |
28888 | +{ | |
28889 | + struct pfm_event_set *set, *tmp; | |
28890 | + | |
28891 | + list_for_each_entry_safe(set, tmp, &ctx->set_list, list) { | |
28892 | + list_del(&set->list); | |
28893 | + kmem_cache_free(pfm_set_cachep, set); | |
28894 | + } | |
28895 | +} | |
28896 | + | |
28897 | +/** | |
28898 | + * pfm_restart_timer - restart hrtimer taking care of expired timeout | |
28899 | + * @ctx : context to work with | |
28900 | + * @set : current active set | |
28901 | + * | |
28902 | + * Must be called on the processor on which the timer is to be armed. | |
28903 | + * Assumes context is locked and interrupts are masked | |
28904 | + * | |
28905 | + * Upon return the active set for the context may have changed | |
28906 | + */ | |
28907 | +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set) | |
28908 | +{ | |
28909 | + struct hrtimer *h; | |
28910 | + enum hrtimer_restart ret; | |
28911 | + | |
28912 | + h = &__get_cpu_var(pfm_hrtimer); | |
28913 | + | |
28914 | + PFM_DBG_ovfl("hrtimer=%lld", (long long)ktime_to_ns(set->hrtimer_rem)); | |
28915 | + | |
28916 | + if (ktime_to_ns(set->hrtimer_rem) > 0) { | |
28917 | + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL); | |
28918 | + } else { | |
28919 | + /* | |
28920 | + * timer was not re-armed because it has already expired | |
28921 | + * timer was not enqueued, we need to switch set now | |
28922 | + */ | |
28923 | + pfm_stats_inc(set_switch_exp); | |
28924 | + | |
28925 | + ret = pfm_switch_sets(ctx, NULL, 1, 0); | |
28926 | + set = ctx->active_set; | |
28927 | + if (ret == HRTIMER_RESTART) | |
28928 | + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL); | |
28929 | + } | |
28930 | +} | |
28931 | + | |
28932 | +int __init pfm_init_sets(void) | |
28933 | +{ | |
28934 | + pfm_set_cachep = kmem_cache_create("pfm_event_set", | |
28935 | + sizeof(struct pfm_event_set), | |
28936 | + SLAB_HWCACHE_ALIGN, 0, NULL); | |
28937 | + if (!pfm_set_cachep) { | |
28938 | + PFM_ERR("cannot initialize event set slab"); | |
28939 | + return -ENOMEM; | |
28940 | + } | |
28941 | + return 0; | |
28942 | +} | |
28943 | --- /dev/null | |
28944 | +++ b/perfmon/perfmon_smpl.c | |
28945 | @@ -0,0 +1,865 @@ | |
28946 | +/* | |
28947 | + * perfmon_smpl.c: perfmon2 sampling management | |
28948 | + * | |
28949 | + * This file implements the perfmon2 interface which | |
28950 | + * provides access to the hardware performance counters | |
28951 | + * of the host processor. | |
28952 | + * | |
28953 | + * | |
28954 | + * The initial version of perfmon.c was written by | |
28955 | + * Ganesh Venkitachalam, IBM Corp. | |
28956 | + * | |
28957 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
28958 | + * David Mosberger, Hewlett Packard Co. | |
28959 | + * | |
28960 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
28961 | + * by Stephane Eranian, Hewlett Packard Co. | |
28962 | + * | |
28963 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
28964 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
28965 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
28966 | + * | |
28967 | + * More information about perfmon available at: | |
28968 | + * http://perfmon2.sf.net | |
28969 | + * | |
28970 | + * This program is free software; you can redistribute it and/or | |
28971 | + * modify it under the terms of version 2 of the GNU General Public | |
28972 | + * License as published by the Free Software Foundation. | |
28973 | + * | |
28974 | + * This program is distributed in the hope that it will be useful, | |
28975 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
28976 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
28977 | + * General Public License for more details. | |
28978 | + * | |
28979 | + * You should have received a copy of the GNU General Public License | |
28980 | + * along with this program; if not, write to the Free Software | |
28981 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
28982 | + * 02111-1307 USA | |
28983 | + */ | |
28984 | +#include <linux/module.h> | |
28985 | +#include <linux/kernel.h> | |
28986 | +#include <linux/vmalloc.h> | |
28987 | +#include <linux/fs.h> | |
28988 | +#include <linux/mm.h> | |
28989 | +#include <linux/random.h> | |
28990 | +#include <linux/uaccess.h> | |
28991 | +#include <linux/perfmon_kern.h> | |
28992 | + | |
28993 | +#include "perfmon_priv.h" | |
28994 | + | |
28995 | +/** | |
28996 | + * pfm_smpl_buf_alloc - allocate memory for sampling buffer | |
28997 | + * @ctx: context to operate on | |
28998 | + * @rsize: requested size | |
28999 | + * | |
29000 | + * called from pfm_smpl_buffer_alloc_old() (IA64-COMPAT) | |
29001 | + * and pfm_setup_smpl_fmt() | |
29002 | + * | |
29003 | + * interrupts are enabled, context is not locked. | |
29004 | + * | |
29005 | + * function is not static because it is called from the IA-64 | |
29006 | + * compatibility module (perfmon_compat.c) | |
29007 | + */ | |
29008 | +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize) | |
29009 | +{ | |
29010 | +#if PFM_ARCH_SMPL_ALIGN_SIZE > 0 | |
29011 | +#define PFM_ALIGN_SMPL(a, f) (void *)((((unsigned long)(a))+(f-1)) & ~(f-1)) | |
29012 | +#else | |
29013 | +#define PFM_ALIGN_SMPL(a, f) (a) | |
29014 | +#endif | |
29015 | + void *addr, *real_addr; | |
29016 | + size_t size, real_size; | |
29017 | + int ret; | |
29018 | + | |
29019 | + might_sleep(); | |
29020 | + | |
29021 | + /* | |
29022 | + * align page boundary | |
29023 | + */ | |
29024 | + size = PAGE_ALIGN(rsize); | |
29025 | + | |
29026 | + /* | |
29027 | + * On some arch, it may be necessary to get an alignment greater | |
29028 | + * than page size to avoid certain cache effects (e.g., MIPS). | |
29029 | + * This is the reason for PFM_ARCH_SMPL_ALIGN_SIZE. | |
29030 | + */ | |
29031 | + real_size = size + PFM_ARCH_SMPL_ALIGN_SIZE; | |
29032 | + | |
29033 | + PFM_DBG("req_size=%zu size=%zu real_size=%zu", | |
29034 | + rsize, | |
29035 | + size, | |
29036 | + real_size); | |
29037 | + | |
29038 | + ret = pfm_smpl_buf_space_acquire(ctx, real_size); | |
29039 | + if (ret) | |
29040 | + return ret; | |
29041 | + | |
29042 | + /* | |
29043 | + * vmalloc can sleep. we do not hold | |
29044 | + * any spinlock and interrupts are enabled | |
29045 | + */ | |
29046 | + real_addr = addr = vmalloc(real_size); | |
29047 | + if (!real_addr) { | |
29048 | + PFM_DBG("cannot allocate sampling buffer"); | |
29049 | + goto unres; | |
29050 | + } | |
29051 | + | |
29052 | + /* | |
29053 | + * align the useable sampling buffer address to the arch requirement | |
29054 | + * This is a nop on most architectures | |
29055 | + */ | |
29056 | + addr = PFM_ALIGN_SMPL(real_addr, PFM_ARCH_SMPL_ALIGN_SIZE); | |
29057 | + | |
29058 | + memset(addr, 0, real_size); | |
29059 | + | |
29060 | + /* | |
29061 | + * due to cache aliasing, it may be necessary to flush the pages | |
29062 | + * on certain architectures (e.g., MIPS) | |
29063 | + */ | |
29064 | + pfm_cacheflush(addr, real_size); | |
29065 | + | |
29066 | + /* | |
29067 | + * what needs to be freed | |
29068 | + */ | |
29069 | + ctx->smpl_real_addr = real_addr; | |
29070 | + ctx->smpl_real_size = real_size; | |
29071 | + | |
29072 | + /* | |
29073 | + * what is actually available to user | |
29074 | + */ | |
29075 | + ctx->smpl_addr = addr; | |
29076 | + ctx->smpl_size = size; | |
29077 | + | |
29078 | + PFM_DBG("addr=%p real_addr=%p", addr, real_addr); | |
29079 | + | |
29080 | + return 0; | |
29081 | +unres: | |
29082 | + /* | |
29083 | + * smpl_addr is NULL, no double freeing possible in pfm_context_free() | |
29084 | + */ | |
29085 | + pfm_smpl_buf_space_release(ctx, real_size); | |
29086 | + | |
29087 | + return -ENOMEM; | |
29088 | +} | |
29089 | + | |
29090 | +/** | |
29091 | + * pfm_smpl_buf_free - free resources associated with sampling | |
29092 | + * @ctx: context to operate on | |
29093 | + */ | |
29094 | +void pfm_smpl_buf_free(struct pfm_context *ctx) | |
29095 | +{ | |
29096 | + struct pfm_smpl_fmt *fmt; | |
29097 | + | |
29098 | + fmt = ctx->smpl_fmt; | |
29099 | + | |
29100 | + /* | |
29101 | + * some formats may not use a buffer, yet they may | |
29102 | + * need to be called on exit | |
29103 | + */ | |
29104 | + if (fmt) { | |
29105 | + if (fmt->fmt_exit) | |
29106 | + (*fmt->fmt_exit)(ctx->smpl_addr); | |
29107 | + /* | |
29108 | + * decrease refcount of sampling format | |
29109 | + */ | |
29110 | + pfm_smpl_fmt_put(fmt); | |
29111 | + } | |
29112 | + | |
29113 | + if (ctx->smpl_addr) { | |
29114 | + pfm_smpl_buf_space_release(ctx, ctx->smpl_real_size); | |
29115 | + | |
29116 | + PFM_DBG("free buffer real_addr=0x%p real_size=%zu", | |
29117 | + ctx->smpl_real_addr, | |
29118 | + ctx->smpl_real_size); | |
29119 | + | |
29120 | + vfree(ctx->smpl_real_addr); | |
29121 | + } | |
29122 | +} | |
29123 | + | |
29124 | +/** | |
29125 | + * pfm_setup_smpl_fmt - initialization of sampling format and buffer | |
29126 | + * @ctx: context to operate on | |
29127 | + * @fmt_arg: smapling format arguments | |
29128 | + * @ctx_flags: context flags as passed by user | |
29129 | + * @filp: file descriptor associated with context | |
29130 | + * | |
29131 | + * called from __pfm_create_context() | |
29132 | + */ | |
29133 | +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg, | |
29134 | + struct file *filp) | |
29135 | +{ | |
29136 | + struct pfm_smpl_fmt *fmt; | |
29137 | + size_t size = 0; | |
29138 | + int ret = 0; | |
29139 | + | |
29140 | + fmt = ctx->smpl_fmt; | |
29141 | + | |
29142 | + /* | |
29143 | + * validate parameters | |
29144 | + */ | |
29145 | + if (fmt->fmt_validate) { | |
29146 | + ret = (*fmt->fmt_validate)(ctx_flags, | |
29147 | + ctx->regs.num_pmds, | |
29148 | + fmt_arg); | |
29149 | + PFM_DBG("validate(0x%x,%p)=%d", ctx_flags, fmt_arg, ret); | |
29150 | + if (ret) | |
29151 | + goto error; | |
29152 | + } | |
29153 | + | |
29154 | + /* | |
29155 | + * check if buffer format needs buffer allocation | |
29156 | + */ | |
29157 | + size = 0; | |
29158 | + if (fmt->fmt_getsize) { | |
29159 | + ret = (*fmt->fmt_getsize)(ctx_flags, fmt_arg, &size); | |
29160 | + if (ret) { | |
29161 | + PFM_DBG("cannot get size ret=%d", ret); | |
29162 | + goto error; | |
29163 | + } | |
29164 | + } | |
29165 | + | |
29166 | + /* | |
29167 | + * allocate buffer | |
29168 | + * v20_compat is for IA-64 backward compatibility with perfmon v2.0 | |
29169 | + */ | |
29170 | + if (size) { | |
29171 | +#ifdef CONFIG_IA64_PERFMON_COMPAT | |
29172 | + /* | |
29173 | + * backward compatibility with perfmon v2.0 on Ia-64 | |
29174 | + */ | |
29175 | + if (ctx->flags.ia64_v20_compat) | |
29176 | + ret = pfm_smpl_buf_alloc_compat(ctx, size, filp); | |
29177 | + else | |
29178 | +#endif | |
29179 | + ret = pfm_smpl_buf_alloc(ctx, size); | |
29180 | + | |
29181 | + if (ret) | |
29182 | + goto error; | |
29183 | + | |
29184 | + } | |
29185 | + | |
29186 | + if (fmt->fmt_init) { | |
29187 | + ret = (*fmt->fmt_init)(ctx, ctx->smpl_addr, ctx_flags, | |
29188 | + ctx->regs.num_pmds, | |
29189 | + fmt_arg); | |
29190 | + } | |
29191 | + /* | |
29192 | + * if there was an error, the buffer/resource will be freed by | |
29193 | + * via pfm_context_free() | |
29194 | + */ | |
29195 | +error: | |
29196 | + return ret; | |
29197 | +} | |
29198 | + | |
29199 | +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) | |
29200 | +{ | |
29201 | + u64 now; | |
29202 | + | |
29203 | + now = sched_clock(); | |
29204 | + | |
29205 | + /* | |
29206 | + * we save the PMD values such that we can read them while | |
29207 | + * MASKED without having the thread stopped | |
29208 | + * because monitoring is stopped | |
29209 | + * | |
29210 | + * pfm_save_pmds() could be avoided if we knew | |
29211 | + * that pfm_arch_intr_freeze() had saved them already | |
29212 | + */ | |
29213 | + pfm_save_pmds(ctx, set); | |
29214 | + pfm_arch_mask_monitoring(ctx, set); | |
29215 | + /* | |
29216 | + * accumulate the set duration up to this point | |
29217 | + */ | |
29218 | + set->duration += now - set->duration_start; | |
29219 | + | |
29220 | + ctx->state = PFM_CTX_MASKED; | |
29221 | + | |
29222 | + /* | |
29223 | + * need to stop timer and remember remaining time | |
29224 | + * will be reloaded in pfm_unmask_monitoring | |
29225 | + * hrtimer is cancelled in the tail of the interrupt | |
29226 | + * handler once the context is unlocked | |
29227 | + */ | |
29228 | + if (set->flags & PFM_SETFL_TIME_SWITCH) { | |
29229 | + struct hrtimer *h = &__get_cpu_var(pfm_hrtimer); | |
29230 | + hrtimer_cancel(h); | |
29231 | + set->hrtimer_rem = hrtimer_get_remaining(h); | |
29232 | + } | |
29233 | + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart); | |
29234 | +} | |
29235 | + | |
29236 | +/** | |
29237 | + * pfm_unmask_monitoring - unmask monitoring | |
29238 | + * @ctx: context to work with | |
29239 | + * @set: current active set | |
29240 | + * | |
29241 | + * interrupts are masked when entering this function. | |
29242 | + * context must be in MASKED state when calling. | |
29243 | + * | |
29244 | + * Upon return, the active set may have changed when using timeout | |
29245 | + * based switching. | |
29246 | + */ | |
29247 | +static void pfm_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) | |
29248 | +{ | |
29249 | + if (ctx->state != PFM_CTX_MASKED) | |
29250 | + return; | |
29251 | + | |
29252 | + PFM_DBG_ovfl("unmasking monitoring"); | |
29253 | + | |
29254 | + /* | |
29255 | + * must be done before calling | |
29256 | + * pfm_arch_unmask_monitoring() | |
29257 | + */ | |
29258 | + ctx->state = PFM_CTX_LOADED; | |
29259 | + | |
29260 | + /* | |
29261 | + * we need to restore the PMDs because they | |
29262 | + * may have been modified by user while MASKED in | |
29263 | + * which case the actual registers have no yet | |
29264 | + * been updated | |
29265 | + */ | |
29266 | + pfm_arch_restore_pmds(ctx, set); | |
29267 | + | |
29268 | + /* | |
29269 | + * call arch specific handler | |
29270 | + */ | |
29271 | + pfm_arch_unmask_monitoring(ctx, set); | |
29272 | + | |
29273 | + /* | |
29274 | + * clear force reload flag. May have been set | |
29275 | + * in pfm_write_pmcs or pfm_write_pmds | |
29276 | + */ | |
29277 | + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; | |
29278 | + | |
29279 | + /* | |
29280 | + * reset set duration timer | |
29281 | + */ | |
29282 | + set->duration_start = sched_clock(); | |
29283 | + | |
29284 | + /* | |
29285 | + * restart hrtimer if needed | |
29286 | + */ | |
29287 | + if (set->flags & PFM_SETFL_TIME_SWITCH) { | |
29288 | + pfm_restart_timer(ctx, set); | |
29289 | + /* careful here as pfm_restart_timer may switch sets */ | |
29290 | + } | |
29291 | +} | |
29292 | + | |
29293 | +void pfm_reset_pmds(struct pfm_context *ctx, | |
29294 | + struct pfm_event_set *set, | |
29295 | + int num_pmds, | |
29296 | + int reset_mode) | |
29297 | +{ | |
29298 | + u64 val, mask, new_seed; | |
29299 | + struct pfm_pmd *reg; | |
29300 | + unsigned int i, not_masked; | |
29301 | + | |
29302 | + not_masked = ctx->state != PFM_CTX_MASKED; | |
29303 | + | |
29304 | + PFM_DBG_ovfl("%s r_pmds=0x%llx not_masked=%d", | |
29305 | + reset_mode == PFM_PMD_RESET_LONG ? "long" : "short", | |
29306 | + (unsigned long long)set->reset_pmds[0], | |
29307 | + not_masked); | |
29308 | + | |
29309 | + pfm_stats_inc(reset_pmds_count); | |
29310 | + | |
29311 | + for (i = 0; num_pmds; i++) { | |
29312 | + if (test_bit(i, cast_ulp(set->reset_pmds))) { | |
29313 | + num_pmds--; | |
29314 | + | |
29315 | + reg = set->pmds + i; | |
29316 | + | |
29317 | + val = reset_mode == PFM_PMD_RESET_LONG ? | |
29318 | + reg->long_reset : reg->short_reset; | |
29319 | + | |
29320 | + if (reg->flags & PFM_REGFL_RANDOM) { | |
29321 | + mask = reg->mask; | |
29322 | + new_seed = random32(); | |
29323 | + | |
29324 | + /* construct a full 64-bit random value: */ | |
29325 | + if ((unlikely(mask >> 32) != 0)) | |
29326 | + new_seed |= (u64)random32() << 32; | |
29327 | + | |
29328 | + /* counter values are negative numbers! */ | |
29329 | + val -= (new_seed & mask); | |
29330 | + } | |
29331 | + | |
29332 | + set->pmds[i].value = val; | |
29333 | + reg->lval = val; | |
29334 | + | |
29335 | + /* | |
29336 | + * not all PMD to reset are necessarily | |
29337 | + * counters | |
29338 | + */ | |
29339 | + if (not_masked) | |
29340 | + pfm_write_pmd(ctx, i, val); | |
29341 | + | |
29342 | + PFM_DBG_ovfl("set%u pmd%u sval=0x%llx", | |
29343 | + set->id, | |
29344 | + i, | |
29345 | + (unsigned long long)val); | |
29346 | + } | |
29347 | + } | |
29348 | + | |
29349 | + /* | |
29350 | + * done with reset | |
29351 | + */ | |
29352 | + bitmap_zero(cast_ulp(set->reset_pmds), i); | |
29353 | + | |
29354 | + /* | |
29355 | + * make changes visible | |
29356 | + */ | |
29357 | + if (not_masked) | |
29358 | + pfm_arch_serialize(); | |
29359 | +} | |
29360 | + | |
29361 | +/* | |
29362 | + * called from pfm_handle_work() and __pfm_restart() | |
29363 | + * for system-wide and per-thread context to resume | |
29364 | + * monitoring after a user level notification. | |
29365 | + * | |
29366 | + * In both cases, the context is locked and interrupts | |
29367 | + * are disabled. | |
29368 | + */ | |
29369 | +void pfm_resume_after_ovfl(struct pfm_context *ctx) | |
29370 | +{ | |
29371 | + struct pfm_smpl_fmt *fmt; | |
29372 | + u32 rst_ctrl; | |
29373 | + struct pfm_event_set *set; | |
29374 | + u64 *reset_pmds; | |
29375 | + void *hdr; | |
29376 | + int state, ret; | |
29377 | + | |
29378 | + hdr = ctx->smpl_addr; | |
29379 | + fmt = ctx->smpl_fmt; | |
29380 | + state = ctx->state; | |
29381 | + set = ctx->active_set; | |
29382 | + ret = 0; | |
29383 | + | |
29384 | + if (hdr) { | |
29385 | + rst_ctrl = 0; | |
29386 | + prefetch(hdr); | |
29387 | + } else { | |
29388 | + rst_ctrl = PFM_OVFL_CTRL_RESET; | |
29389 | + } | |
29390 | + | |
29391 | + /* | |
29392 | + * if using a sampling buffer format and it has a restart callback, | |
29393 | + * then invoke it. hdr may be NULL, if the format does not use a | |
29394 | + * perfmon buffer | |
29395 | + */ | |
29396 | + if (fmt && fmt->fmt_restart) | |
29397 | + ret = (*fmt->fmt_restart)(state == PFM_CTX_LOADED, &rst_ctrl, | |
29398 | + hdr); | |
29399 | + | |
29400 | + reset_pmds = set->reset_pmds; | |
29401 | + | |
29402 | + PFM_DBG("fmt_restart=%d reset_count=%d set=%u r_pmds=0x%llx switch=%d " | |
29403 | + "ctx_state=%d", | |
29404 | + ret, | |
29405 | + ctx->flags.reset_count, | |
29406 | + set->id, | |
29407 | + (unsigned long long)reset_pmds[0], | |
29408 | + (set->priv_flags & PFM_SETFL_PRIV_SWITCH), | |
29409 | + state); | |
29410 | + | |
29411 | + if (!ret) { | |
29412 | + /* | |
29413 | + * switch set if needed | |
29414 | + */ | |
29415 | + if (set->priv_flags & PFM_SETFL_PRIV_SWITCH) { | |
29416 | + set->priv_flags &= ~PFM_SETFL_PRIV_SWITCH; | |
29417 | + pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_LONG, 0); | |
29418 | + set = ctx->active_set; | |
29419 | + } else if (rst_ctrl & PFM_OVFL_CTRL_RESET) { | |
29420 | + int nn; | |
29421 | + nn = bitmap_weight(cast_ulp(set->reset_pmds), | |
29422 | + ctx->regs.max_pmd); | |
29423 | + if (nn) | |
29424 | + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_LONG); | |
29425 | + } | |
29426 | + | |
29427 | + if (!(rst_ctrl & PFM_OVFL_CTRL_MASK)) | |
29428 | + pfm_unmask_monitoring(ctx, set); | |
29429 | + else | |
29430 | + PFM_DBG("stopping monitoring?"); | |
29431 | + ctx->state = PFM_CTX_LOADED; | |
29432 | + } | |
29433 | +} | |
29434 | + | |
29435 | +/* | |
29436 | + * This function is called when we need to perform asynchronous | |
29437 | + * work on a context. This function is called ONLY when about to | |
29438 | + * return to user mode (very much like with signal handling). | |
29439 | + * | |
29440 | + * There are several reasons why we come here: | |
29441 | + * | |
29442 | + * - per-thread mode, not self-monitoring, to reset the counters | |
29443 | + * after a pfm_restart() | |
29444 | + * | |
29445 | + * - we are zombie and we need to cleanup our state | |
29446 | + * | |
29447 | + * - we need to block after an overflow notification | |
29448 | + * on a context with the PFM_OVFL_NOTIFY_BLOCK flag | |
29449 | + * | |
29450 | + * This function is never called for a system-wide context. | |
29451 | + * | |
29452 | + * pfm_handle_work() can be called with interrupts enabled | |
29453 | + * (TIF_NEED_RESCHED) or disabled. The down_interruptible | |
29454 | + * call may sleep, therefore we must re-enable interrupts | |
29455 | + * to avoid deadlocks. It is safe to do so because this function | |
29456 | + * is called ONLY when returning to user level, in which case | |
29457 | + * there is no risk of kernel stack overflow due to deep | |
29458 | + * interrupt nesting. | |
29459 | + */ | |
29460 | +void pfm_handle_work(struct pt_regs *regs) | |
29461 | +{ | |
29462 | + struct pfm_context *ctx; | |
29463 | + unsigned long flags, dummy_flags; | |
29464 | + int type, ret, info; | |
29465 | + | |
29466 | +#ifdef CONFIG_PPC | |
29467 | + /* | |
29468 | + * This is just a temporary fix. Obviously we'd like to fix the powerpc | |
29469 | + * code to make that check before calling __pfm_handle_work() to | |
29470 | + * prevent the function call overhead, but the call is made from | |
29471 | + * assembly code, so it will take a little while to figure out how to | |
29472 | + * perform the check correctly. | |
29473 | + */ | |
29474 | + if (!test_thread_flag(TIF_PERFMON_WORK)) | |
29475 | + return; | |
29476 | +#endif | |
29477 | + | |
29478 | + if (!user_mode(regs)) | |
29479 | + return; | |
29480 | + | |
29481 | + clear_thread_flag(TIF_PERFMON_WORK); | |
29482 | + | |
29483 | + pfm_stats_inc(handle_work_count); | |
29484 | + | |
29485 | + ctx = current->pfm_context; | |
29486 | + if (ctx == NULL) { | |
29487 | + PFM_DBG("[%d] has no ctx", current->pid); | |
29488 | + return; | |
29489 | + } | |
29490 | + | |
29491 | + BUG_ON(ctx->flags.system); | |
29492 | + | |
29493 | + spin_lock_irqsave(&ctx->lock, flags); | |
29494 | + | |
29495 | + type = ctx->flags.work_type; | |
29496 | + ctx->flags.work_type = PFM_WORK_NONE; | |
29497 | + | |
29498 | + PFM_DBG("work_type=%d reset_count=%d", | |
29499 | + type, | |
29500 | + ctx->flags.reset_count); | |
29501 | + | |
29502 | + switch (type) { | |
29503 | + case PFM_WORK_ZOMBIE: | |
29504 | + goto do_zombie; | |
29505 | + case PFM_WORK_RESET: | |
29506 | + /* simply reset, no blocking */ | |
29507 | + goto skip_blocking; | |
29508 | + case PFM_WORK_NONE: | |
29509 | + PFM_DBG("unexpected PFM_WORK_NONE"); | |
29510 | + goto nothing_todo; | |
29511 | + case PFM_WORK_BLOCK: | |
29512 | + break; | |
29513 | + default: | |
29514 | + PFM_DBG("unkown type=%d", type); | |
29515 | + goto nothing_todo; | |
29516 | + } | |
29517 | + | |
29518 | + /* | |
29519 | + * restore interrupt mask to what it was on entry. | |
29520 | + * Could be enabled/disabled. | |
29521 | + */ | |
29522 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
29523 | + | |
29524 | + /* | |
29525 | + * force interrupt enable because of down_interruptible() | |
29526 | + */ | |
29527 | + local_irq_enable(); | |
29528 | + | |
29529 | + PFM_DBG("before block sleeping"); | |
29530 | + | |
29531 | + /* | |
29532 | + * may go through without blocking on SMP systems | |
29533 | + * if restart has been received already by the time we call down() | |
29534 | + */ | |
29535 | + ret = wait_for_completion_interruptible(&ctx->restart_complete); | |
29536 | + | |
29537 | + PFM_DBG("after block sleeping ret=%d", ret); | |
29538 | + | |
29539 | + /* | |
29540 | + * lock context and mask interrupts again | |
29541 | + * We save flags into a dummy because we may have | |
29542 | + * altered interrupts mask compared to entry in this | |
29543 | + * function. | |
29544 | + */ | |
29545 | + spin_lock_irqsave(&ctx->lock, dummy_flags); | |
29546 | + | |
29547 | + if (ctx->state == PFM_CTX_ZOMBIE) | |
29548 | + goto do_zombie; | |
29549 | + | |
29550 | + /* | |
29551 | + * in case of interruption of down() we don't restart anything | |
29552 | + */ | |
29553 | + if (ret < 0) | |
29554 | + goto nothing_todo; | |
29555 | + | |
29556 | +skip_blocking: | |
29557 | + /* | |
29558 | + * iterate over the number of pending resets | |
29559 | + * There are certain situations where there may be | |
29560 | + * multiple notifications sent before a pfm_restart(). | |
29561 | + * As such, it may be that multiple pfm_restart() are | |
29562 | + * issued before the monitored thread gets to | |
29563 | + * pfm_handle_work(). To avoid losing restarts, pfm_restart() | |
29564 | + * increments a counter (reset_counts). Here, we take this | |
29565 | + * into account by potentially calling pfm_resume_after_ovfl() | |
29566 | + * multiple times. It is up to the sampling format to take the | |
29567 | + * appropriate actions. | |
29568 | + */ | |
29569 | + while (ctx->flags.reset_count) { | |
29570 | + pfm_resume_after_ovfl(ctx); | |
29571 | + /* careful as active set may have changed */ | |
29572 | + ctx->flags.reset_count--; | |
29573 | + } | |
29574 | + | |
29575 | +nothing_todo: | |
29576 | + /* | |
29577 | + * restore flags as they were upon entry | |
29578 | + */ | |
29579 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
29580 | + return; | |
29581 | + | |
29582 | +do_zombie: | |
29583 | + PFM_DBG("context is zombie, bailing out"); | |
29584 | + | |
29585 | + __pfm_unload_context(ctx, &info); | |
29586 | + | |
29587 | + /* | |
29588 | + * keep the spinlock check happy | |
29589 | + */ | |
29590 | + spin_unlock(&ctx->lock); | |
29591 | + | |
29592 | + /* | |
29593 | + * enable interrupt for vfree() | |
29594 | + */ | |
29595 | + local_irq_enable(); | |
29596 | + | |
29597 | + /* | |
29598 | + * cancel timer now that context is unlocked | |
29599 | + */ | |
29600 | + if (info & 0x2) { | |
29601 | + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); | |
29602 | + PFM_DBG("timeout cancel=%d", ret); | |
29603 | + } | |
29604 | + | |
29605 | + /* | |
29606 | + * actual context free | |
29607 | + */ | |
29608 | + pfm_free_context(ctx); | |
29609 | + | |
29610 | + /* | |
29611 | + * restore interrupts as they were upon entry | |
29612 | + */ | |
29613 | + local_irq_restore(flags); | |
29614 | + | |
29615 | + /* always true */ | |
29616 | + if (info & 0x1) | |
29617 | + pfm_session_release(0, 0); | |
29618 | +} | |
29619 | + | |
29620 | +/** | |
29621 | + * __pfm_restart - resume monitoring after user-level notification | |
29622 | + * @ctx: context to operate on | |
29623 | + * @info: return information used to free resource once unlocked | |
29624 | + * | |
29625 | + * function called from sys_pfm_restart(). It is used when overflow | |
29626 | + * notification is requested. For each notification received, the user | |
29627 | + * must call pfm_restart() to indicate to the kernel that it is done | |
29628 | + * processing the notification. | |
29629 | + * | |
29630 | + * When the caller is doing user level sampling, this function resets | |
29631 | + * the overflowed counters and resumes monitoring which is normally stopped | |
29632 | + * during notification (always the consequence of a counter overflow). | |
29633 | + * | |
29634 | + * When using a sampling format, the format restart() callback is invoked, | |
29635 | + * overflowed PMDS may be reset based upon decision from sampling format. | |
29636 | + * | |
29637 | + * When operating in per-thread mode, and when not self-monitoring, the | |
29638 | + * monitored thread DOES NOT need to be stopped, unlike for many other calls. | |
29639 | + * | |
29640 | + * This means that the effect of the restart may not necessarily be observed | |
29641 | + * right when returning from the call. For instance, counters may not already | |
29642 | + * be reset in the other thread. | |
29643 | + * | |
29644 | + * When operating in system-wide, the caller must be running on the monitored | |
29645 | + * CPU. | |
29646 | + * | |
29647 | + * The context is locked and interrupts are disabled. | |
29648 | + * | |
29649 | + * info value upon return: | |
29650 | + * - bit 0: when set, mudt issue complete() on restart semaphore | |
29651 | + */ | |
29652 | +int __pfm_restart(struct pfm_context *ctx, int *info) | |
29653 | +{ | |
29654 | + int state; | |
29655 | + | |
29656 | + state = ctx->state; | |
29657 | + | |
29658 | + PFM_DBG("state=%d can_restart=%d reset_count=%d", | |
29659 | + state, | |
29660 | + ctx->flags.can_restart, | |
29661 | + ctx->flags.reset_count); | |
29662 | + | |
29663 | + *info = 0; | |
29664 | + | |
29665 | + switch (state) { | |
29666 | + case PFM_CTX_MASKED: | |
29667 | + break; | |
29668 | + case PFM_CTX_LOADED: | |
29669 | + if (ctx->smpl_addr && ctx->smpl_fmt->fmt_restart) | |
29670 | + break; | |
29671 | + default: | |
29672 | + PFM_DBG("invalid state=%d", state); | |
29673 | + return -EBUSY; | |
29674 | + } | |
29675 | + | |
29676 | + /* | |
29677 | + * first check if allowed to restart, i.e., notifications received | |
29678 | + */ | |
29679 | + if (!ctx->flags.can_restart) { | |
29680 | + PFM_DBG("no restart can_restart=0"); | |
29681 | + return -EBUSY; | |
29682 | + } | |
29683 | + | |
29684 | + pfm_stats_inc(pfm_restart_count); | |
29685 | + | |
29686 | + /* | |
29687 | + * at this point, the context is either LOADED or MASKED | |
29688 | + */ | |
29689 | + ctx->flags.can_restart--; | |
29690 | + | |
29691 | + /* | |
29692 | + * handle self-monitoring case and system-wide | |
29693 | + */ | |
29694 | + if (ctx->task == current || ctx->flags.system) { | |
29695 | + pfm_resume_after_ovfl(ctx); | |
29696 | + return 0; | |
29697 | + } | |
29698 | + | |
29699 | + /* | |
29700 | + * restart another task | |
29701 | + */ | |
29702 | + | |
29703 | + /* | |
29704 | + * if blocking, then post the semaphore if PFM_CTX_MASKED, i.e. | |
29705 | + * the task is blocked or on its way to block. That's the normal | |
29706 | + * restart path. If the monitoring is not masked, then the task | |
29707 | + * can be actively monitoring and we cannot directly intervene. | |
29708 | + * Therefore we use the trap mechanism to catch the task and | |
29709 | + * force it to reset the buffer/reset PMDs. | |
29710 | + * | |
29711 | + * if non-blocking, then we ensure that the task will go into | |
29712 | + * pfm_handle_work() before returning to user mode. | |
29713 | + * | |
29714 | + * We cannot explicitly reset another task, it MUST always | |
29715 | + * be done by the task itself. This works for system wide because | |
29716 | + * the tool that is controlling the session is logically doing | |
29717 | + * "self-monitoring". | |
29718 | + */ | |
29719 | + if (ctx->flags.block && state == PFM_CTX_MASKED) { | |
29720 | + PFM_DBG("unblocking [%d]", ctx->task->pid); | |
29721 | + /* | |
29722 | + * It is not possible to call complete() with the context locked | |
29723 | + * otherwise we have a potential deadlock with the PMU context | |
29724 | + * switch code due to a lock inversion between task_rq_lock() | |
29725 | + * and the context lock. | |
29726 | + * Instead we mark whether or not we need to issue the complete | |
29727 | + * and we invoke the function once the context lock is released | |
29728 | + * in sys_pfm_restart() | |
29729 | + */ | |
29730 | + *info = 1; | |
29731 | + } else { | |
29732 | + PFM_DBG("[%d] armed exit trap", ctx->task->pid); | |
29733 | + pfm_post_work(ctx->task, ctx, PFM_WORK_RESET); | |
29734 | + } | |
29735 | + ctx->flags.reset_count++; | |
29736 | + return 0; | |
29737 | +} | |
29738 | + | |
29739 | +/** | |
29740 | + * pfm_get_smpl_arg -- copy user arguments to pfm_create_context() related to sampling format | |
29741 | + * @name: format name as passed by user | |
29742 | + * @fmt_arg: format optional argument as passed by user | |
29743 | + * @uszie: size of structure pass in fmt_arg | |
29744 | + * @arg: kernel copy of fmt_arg | |
29745 | + * @fmt: pointer to sampling format upon success | |
29746 | + * | |
29747 | + * arg is kmalloc'ed, thus it needs a kfree by caller | |
29748 | + */ | |
29749 | +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *fmt_uarg, size_t usize, void **arg, | |
29750 | + struct pfm_smpl_fmt **fmt) | |
29751 | +{ | |
29752 | + struct pfm_smpl_fmt *f; | |
29753 | + char *fmt_name; | |
29754 | + void *addr = NULL; | |
29755 | + size_t sz; | |
29756 | + int ret; | |
29757 | + | |
29758 | + fmt_name = getname(fmt_uname); | |
29759 | + if (!fmt_name) { | |
29760 | + PFM_DBG("getname failed"); | |
29761 | + return -ENOMEM; | |
29762 | + } | |
29763 | + | |
29764 | + /* | |
29765 | + * find fmt and increase refcount | |
29766 | + */ | |
29767 | + f = pfm_smpl_fmt_get(fmt_name); | |
29768 | + | |
29769 | + putname(fmt_name); | |
29770 | + | |
29771 | + if (f == NULL) { | |
29772 | + PFM_DBG("buffer format not found"); | |
29773 | + return -EINVAL; | |
29774 | + } | |
29775 | + | |
29776 | + /* | |
29777 | + * expected format argument size | |
29778 | + */ | |
29779 | + sz = f->fmt_arg_size; | |
29780 | + | |
29781 | + /* | |
29782 | + * check user size matches expected size | |
29783 | + * usize = -1 is for IA-64 backward compatibility | |
29784 | + */ | |
29785 | + ret = -EINVAL; | |
29786 | + if (sz != usize && usize != -1) { | |
29787 | + PFM_DBG("invalid arg size %zu, format expects %zu", | |
29788 | + usize, sz); | |
29789 | + goto error; | |
29790 | + } | |
29791 | + | |
29792 | + if (sz) { | |
29793 | + ret = -ENOMEM; | |
29794 | + addr = kmalloc(sz, GFP_KERNEL); | |
29795 | + if (addr == NULL) | |
29796 | + goto error; | |
29797 | + | |
29798 | + ret = -EFAULT; | |
29799 | + if (copy_from_user(addr, fmt_uarg, sz)) | |
29800 | + goto error; | |
29801 | + } | |
29802 | + *arg = addr; | |
29803 | + *fmt = f; | |
29804 | + return 0; | |
29805 | + | |
29806 | +error: | |
29807 | + kfree(addr); | |
29808 | + pfm_smpl_fmt_put(f); | |
29809 | + return ret; | |
29810 | +} | |
29811 | --- /dev/null | |
29812 | +++ b/perfmon/perfmon_syscalls.c | |
29813 | @@ -0,0 +1,1060 @@ | |
29814 | +/* | |
29815 | + * perfmon_syscalls.c: perfmon2 system call interface | |
29816 | + * | |
29817 | + * This file implements the perfmon2 interface which | |
29818 | + * provides access to the hardware performance counters | |
29819 | + * of the host processor. | |
29820 | + * | |
29821 | + * The initial version of perfmon.c was written by | |
29822 | + * Ganesh Venkitachalam, IBM Corp. | |
29823 | + * | |
29824 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
29825 | + * David Mosberger, Hewlett Packard Co. | |
29826 | + * | |
29827 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
29828 | + * by Stephane Eranian, Hewlett Packard Co. | |
29829 | + * | |
29830 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
29831 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
29832 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
29833 | + * | |
29834 | + * More information about perfmon available at: | |
29835 | + * http://perfmon2.sf.net | |
29836 | + * | |
29837 | + * This program is free software; you can redistribute it and/or | |
29838 | + * modify it under the terms of version 2 of the GNU General Public | |
29839 | + * License as published by the Free Software Foundation. | |
29840 | + * | |
29841 | + * This program is distributed in the hope that it will be useful, | |
29842 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
29843 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
29844 | + * General Public License for more details. | |
29845 | + * | |
29846 | + * You should have received a copy of the GNU General Public License | |
29847 | + * along with this program; if not, write to the Free Software | |
29848 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
29849 | + * 02111-1307 USA | |
29850 | + */ | |
29851 | +#include <linux/kernel.h> | |
29852 | +#include <linux/fs.h> | |
29853 | +#include <linux/ptrace.h> | |
29854 | +#include <linux/perfmon_kern.h> | |
29855 | +#include <linux/uaccess.h> | |
29856 | +#include "perfmon_priv.h" | |
29857 | + | |
29858 | +/* | |
29859 | + * Context locking rules: | |
29860 | + * --------------------- | |
29861 | + * - any thread with access to the file descriptor of a context can | |
29862 | + * potentially issue perfmon calls | |
29863 | + * | |
29864 | + * - calls must be serialized to guarantee correctness | |
29865 | + * | |
29866 | + * - as soon as a context is attached to a thread or CPU, it may be | |
29867 | + * actively monitoring. On some architectures, such as IA-64, this | |
29868 | + * is true even though the pfm_start() call has not been made. This | |
29869 | + * comes from the fact that on some architectures, it is possible to | |
29870 | + * start/stop monitoring from userland. | |
29871 | + * | |
29872 | + * - If monitoring is active, then there can PMU interrupts. Because | |
29873 | + * context accesses must be serialized, the perfmon system calls | |
29874 | + * must mask interrupts as soon as the context is attached. | |
29875 | + * | |
29876 | + * - perfmon system calls that operate with the context unloaded cannot | |
29877 | + * assume it is actually unloaded when they are called. They first need | |
29878 | + * to check and for that they need interrupts masked. Then, if the | |
29879 | + * context is actually unloaded, they can unmask interrupts. | |
29880 | + * | |
29881 | + * - interrupt masking holds true for other internal perfmon functions as | |
29882 | + * well. Except for PMU interrupt handler because those interrupts | |
29883 | + * cannot be nested. | |
29884 | + * | |
29885 | + * - we mask ALL interrupts instead of just the PMU interrupt because we | |
29886 | + * also need to protect against timer interrupts which could trigger | |
29887 | + * a set switch. | |
29888 | + */ | |
29889 | +#ifdef CONFIG_UTRACE | |
29890 | +#include <linux/utrace.h> | |
29891 | + | |
29892 | +static u32 | |
29893 | +stopper_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk) | |
29894 | +{ | |
29895 | + PFM_DBG("quiesced [%d]", tsk->pid); | |
29896 | + complete(engine->data); | |
29897 | + return UTRACE_ACTION_RESUME; | |
29898 | +} | |
29899 | + | |
29900 | +void | |
29901 | +pfm_resume_task(struct task_struct *t, void *data) | |
29902 | +{ | |
29903 | + PFM_DBG("utrace detach [%d]", t->pid); | |
29904 | + (void) utrace_detach(t, data); | |
29905 | +} | |
29906 | + | |
29907 | +static const struct utrace_engine_ops utrace_ops = | |
29908 | +{ | |
29909 | + .report_quiesce = stopper_quiesce, | |
29910 | +}; | |
29911 | + | |
29912 | +static int pfm_wait_task_stopped(struct task_struct *task, void **data) | |
29913 | +{ | |
29914 | + DECLARE_COMPLETION_ONSTACK(done); | |
29915 | + struct utrace_attached_engine *eng; | |
29916 | + int ret; | |
29917 | + | |
29918 | + eng = utrace_attach(task, UTRACE_ATTACH_CREATE, &utrace_ops, &done); | |
29919 | + if (IS_ERR(eng)) | |
29920 | + return PTR_ERR(eng); | |
29921 | + | |
29922 | + ret = utrace_set_flags(task, eng, | |
29923 | + UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE)); | |
29924 | + PFM_DBG("wait quiesce [%d]", task->pid); | |
29925 | + if (!ret) | |
29926 | + ret = wait_for_completion_interruptible(&done); | |
29927 | + | |
29928 | + if (ret) | |
29929 | + (void) utrace_detach(task, eng); | |
29930 | + else | |
29931 | + *data = eng; | |
29932 | + return 0; | |
29933 | +} | |
29934 | +#else /* !CONFIG_UTRACE */ | |
29935 | +static int pfm_wait_task_stopped(struct task_struct *task, void **data) | |
29936 | +{ | |
29937 | + int ret; | |
29938 | + | |
29939 | + *data = NULL; | |
29940 | + | |
29941 | + /* | |
29942 | + * returns 0 if cannot attach | |
29943 | + */ | |
29944 | + ret = ptrace_may_access(task, PTRACE_MODE_ATTACH); | |
29945 | + PFM_DBG("may_attach=%d", ret); | |
29946 | + if (!ret) | |
29947 | + return -EPERM; | |
29948 | + | |
29949 | + ret = ptrace_check_attach(task, 0); | |
29950 | + PFM_DBG("check_attach=%d", ret); | |
29951 | + return ret; | |
29952 | +} | |
29953 | +void pfm_resume_task(struct task_struct *t, void *data) | |
29954 | +{} | |
29955 | +#endif | |
29956 | + | |
29957 | +struct pfm_syscall_cookie { | |
29958 | + struct file *filp; | |
29959 | + int fput_needed; | |
29960 | +}; | |
29961 | + | |
29962 | +/* | |
29963 | + * cannot attach if : | |
29964 | + * - kernel task | |
29965 | + * - task not owned by caller (checked by ptrace_may_attach()) | |
29966 | + * - task is dead or zombie | |
29967 | + * - cannot use blocking notification when self-monitoring | |
29968 | + */ | |
29969 | +static int pfm_task_incompatible(struct pfm_context *ctx, | |
29970 | + struct task_struct *task) | |
29971 | +{ | |
29972 | + /* | |
29973 | + * cannot attach to a kernel thread | |
29974 | + */ | |
29975 | + if (!task->mm) { | |
29976 | + PFM_DBG("cannot attach to kernel thread [%d]", task->pid); | |
29977 | + return -EPERM; | |
29978 | + } | |
29979 | + | |
29980 | + /* | |
29981 | + * cannot use block on notification when | |
29982 | + * self-monitoring. | |
29983 | + */ | |
29984 | + if (ctx->flags.block && task == current) { | |
29985 | + PFM_DBG("cannot use block on notification when self-monitoring" | |
29986 | + "[%d]", task->pid); | |
29987 | + return -EINVAL; | |
29988 | + } | |
29989 | + /* | |
29990 | + * cannot attach to a zombie task | |
29991 | + */ | |
29992 | + if (task->exit_state == EXIT_ZOMBIE || task->exit_state == EXIT_DEAD) { | |
29993 | + PFM_DBG("cannot attach to zombie/dead task [%d]", task->pid); | |
29994 | + return -EBUSY; | |
29995 | + } | |
29996 | + return 0; | |
29997 | +} | |
29998 | + | |
29999 | +/** | |
30000 | + * pfm_get_task -- check permission and acquire task to monitor | |
30001 | + * @ctx: perfmon context | |
30002 | + * @pid: identification of the task to check | |
30003 | + * @task: upon return, a pointer to the task to monitor | |
30004 | + * | |
30005 | + * This function is used in per-thread mode only AND when not | |
30006 | + * self-monitoring. It finds the task to monitor and checks | |
30007 | + * that the caller has permissions to attach. It also checks | |
30008 | + * that the task is stopped via ptrace so that we can safely | |
30009 | + * modify its state. | |
30010 | + * | |
30011 | + * task refcount is incremented when succesful. | |
30012 | + */ | |
30013 | +static int pfm_get_task(struct pfm_context *ctx, pid_t pid, | |
30014 | + struct task_struct **task, void **data) | |
30015 | +{ | |
30016 | + struct task_struct *p; | |
30017 | + int ret = 0, ret1 = 0; | |
30018 | + | |
30019 | + *data = NULL; | |
30020 | + | |
30021 | + /* | |
30022 | + * When attaching to another thread we must ensure | |
30023 | + * that the thread is actually stopped. | |
30024 | + * | |
30025 | + * As a consequence, only the ptracing parent can actually | |
30026 | + * attach a context to a thread. Obviously, this constraint | |
30027 | + * does not exist for self-monitoring threads. | |
30028 | + * | |
30029 | + * We use ptrace_may_attach() to check for permission. | |
30030 | + */ | |
30031 | + read_lock(&tasklist_lock); | |
30032 | + | |
30033 | + p = find_task_by_vpid(pid); | |
30034 | + if (p) | |
30035 | + get_task_struct(p); | |
30036 | + | |
30037 | + read_unlock(&tasklist_lock); | |
30038 | + | |
30039 | + if (!p) { | |
30040 | + PFM_DBG("task not found %d", pid); | |
30041 | + return -ESRCH; | |
30042 | + } | |
30043 | + | |
30044 | + ret = pfm_task_incompatible(ctx, p); | |
30045 | + if (ret) | |
30046 | + goto error; | |
30047 | + | |
30048 | + ret = pfm_wait_task_stopped(p, data); | |
30049 | + if (ret) | |
30050 | + goto error; | |
30051 | + | |
30052 | + *task = p; | |
30053 | + | |
30054 | + return 0; | |
30055 | +error: | |
30056 | + if (!(ret1 || ret)) | |
30057 | + ret = -EPERM; | |
30058 | + | |
30059 | + put_task_struct(p); | |
30060 | + | |
30061 | + return ret; | |
30062 | +} | |
30063 | + | |
30064 | +/* | |
30065 | + * context must be locked when calling this function | |
30066 | + */ | |
30067 | +int pfm_check_task_state(struct pfm_context *ctx, int check_mask, | |
30068 | + unsigned long *flags, void **resume) | |
30069 | +{ | |
30070 | + struct task_struct *task; | |
30071 | + unsigned long local_flags, new_flags; | |
30072 | + int state, ret; | |
30073 | + | |
30074 | + *resume = NULL; | |
30075 | + | |
30076 | +recheck: | |
30077 | + /* | |
30078 | + * task is NULL for system-wide context | |
30079 | + */ | |
30080 | + task = ctx->task; | |
30081 | + state = ctx->state; | |
30082 | + local_flags = *flags; | |
30083 | + | |
30084 | + PFM_DBG("state=%d check_mask=0x%x", state, check_mask); | |
30085 | + /* | |
30086 | + * if the context is detached, then we do not touch | |
30087 | + * hardware, therefore there is not restriction on when we can | |
30088 | + * access it. | |
30089 | + */ | |
30090 | + if (state == PFM_CTX_UNLOADED) | |
30091 | + return 0; | |
30092 | + /* | |
30093 | + * no command can operate on a zombie context. | |
30094 | + * A context becomes zombie when the file that identifies | |
30095 | + * it is closed while the context is still attached to the | |
30096 | + * thread it monitors. | |
30097 | + */ | |
30098 | + if (state == PFM_CTX_ZOMBIE) | |
30099 | + return -EINVAL; | |
30100 | + | |
30101 | + /* | |
30102 | + * at this point, state is PFM_CTX_LOADED or PFM_CTX_MASKED | |
30103 | + */ | |
30104 | + | |
30105 | + /* | |
30106 | + * some commands require the context to be unloaded to operate | |
30107 | + */ | |
30108 | + if (check_mask & PFM_CMD_UNLOADED) { | |
30109 | + PFM_DBG("state=%d, cmd needs context unloaded", state); | |
30110 | + return -EBUSY; | |
30111 | + } | |
30112 | + | |
30113 | + /* | |
30114 | + * self-monitoring always ok. | |
30115 | + */ | |
30116 | + if (task == current) | |
30117 | + return 0; | |
30118 | + | |
30119 | + /* | |
30120 | + * for syswide, the calling thread must be running on the cpu | |
30121 | + * the context is bound to. | |
30122 | + */ | |
30123 | + if (ctx->flags.system) { | |
30124 | + if (ctx->cpu != smp_processor_id()) | |
30125 | + return -EBUSY; | |
30126 | + return 0; | |
30127 | + } | |
30128 | + | |
30129 | + /* | |
30130 | + * at this point, monitoring another thread | |
30131 | + */ | |
30132 | + | |
30133 | + /* | |
30134 | + * the pfm_unload_context() command is allowed on masked context | |
30135 | + */ | |
30136 | + if (state == PFM_CTX_MASKED && !(check_mask & PFM_CMD_UNLOAD)) | |
30137 | + return 0; | |
30138 | + | |
30139 | + /* | |
30140 | + * When we operate on another thread, we must wait for it to be | |
30141 | + * stopped and completely off any CPU as we need to access the | |
30142 | + * PMU state (or machine state). | |
30143 | + * | |
30144 | + * A thread can be put in the STOPPED state in various ways | |
30145 | + * including PTRACE_ATTACH, or when it receives a SIGSTOP signal. | |
30146 | + * We enforce that the thread must be ptraced, so it is stopped | |
30147 | + * AND it CANNOT wake up while we operate on it because this | |
30148 | + * would require an action from the ptracing parent which is the | |
30149 | + * thread that is calling this function. | |
30150 | + * | |
30151 | + * The dependency on ptrace, imposes that only the ptracing | |
30152 | + * parent can issue command on a thread. This is unfortunate | |
30153 | + * but we do not know of a better way of doing this. | |
30154 | + */ | |
30155 | + if (check_mask & PFM_CMD_STOPPED) { | |
30156 | + | |
30157 | + spin_unlock_irqrestore(&ctx->lock, local_flags); | |
30158 | + | |
30159 | + /* | |
30160 | + * check that the thread is ptraced AND STOPPED | |
30161 | + */ | |
30162 | + ret = pfm_wait_task_stopped(task, resume); | |
30163 | + | |
30164 | + spin_lock_irqsave(&ctx->lock, new_flags); | |
30165 | + | |
30166 | + /* | |
30167 | + * flags may be different than when we released the lock | |
30168 | + */ | |
30169 | + *flags = new_flags; | |
30170 | + | |
30171 | + if (ret) | |
30172 | + return ret; | |
30173 | + /* | |
30174 | + * we must recheck to verify if state has changed | |
30175 | + */ | |
30176 | + if (unlikely(ctx->state != state)) { | |
30177 | + PFM_DBG("old_state=%d new_state=%d", | |
30178 | + state, | |
30179 | + ctx->state); | |
30180 | + goto recheck; | |
30181 | + } | |
30182 | + } | |
30183 | + return 0; | |
30184 | +} | |
30185 | + | |
30186 | +/* | |
30187 | + * pfm_get_args - Function used to copy the syscall argument into kernel memory. | |
30188 | + * @ureq: user argument | |
30189 | + * @sz: user argument size | |
30190 | + * @lsz: size of stack buffer | |
30191 | + * @laddr: stack buffer address | |
30192 | + * @req: point to start of kernel copy of the argument | |
30193 | + * @ptr_free: address of kernel copy to free | |
30194 | + * | |
30195 | + * There are two options: | |
30196 | + * - use a stack buffer described by laddr (addresses) and lsz (size) | |
30197 | + * - allocate memory | |
30198 | + * | |
30199 | + * return: | |
30200 | + * < 0 : in case of error (ptr_free may not be updated) | |
30201 | + * 0 : success | |
30202 | + * - req: points to base of kernel copy of arguments | |
30203 | + * - ptr_free: address of buffer to free by caller on exit. | |
30204 | + * NULL if using the stack buffer | |
30205 | + * | |
30206 | + * when ptr_free is not NULL upon return, the caller must kfree() | |
30207 | + */ | |
30208 | +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr, | |
30209 | + void **req, void **ptr_free) | |
30210 | +{ | |
30211 | + void *addr; | |
30212 | + | |
30213 | + /* | |
30214 | + * check syadmin argument limit | |
30215 | + */ | |
30216 | + if (unlikely(sz > pfm_controls.arg_mem_max)) { | |
30217 | + PFM_DBG("argument too big %zu max=%zu", | |
30218 | + sz, | |
30219 | + pfm_controls.arg_mem_max); | |
30220 | + return -E2BIG; | |
30221 | + } | |
30222 | + | |
30223 | + /* | |
30224 | + * check if vector fits on stack buffer | |
30225 | + */ | |
30226 | + if (sz > lsz) { | |
30227 | + addr = kmalloc(sz, GFP_KERNEL); | |
30228 | + if (unlikely(addr == NULL)) | |
30229 | + return -ENOMEM; | |
30230 | + *ptr_free = addr; | |
30231 | + } else { | |
30232 | + addr = laddr; | |
30233 | + *req = laddr; | |
30234 | + *ptr_free = NULL; | |
30235 | + } | |
30236 | + | |
30237 | + /* | |
30238 | + * bring the data in | |
30239 | + */ | |
30240 | + if (unlikely(copy_from_user(addr, ureq, sz))) { | |
30241 | + if (addr != laddr) | |
30242 | + kfree(addr); | |
30243 | + return -EFAULT; | |
30244 | + } | |
30245 | + | |
30246 | + /* | |
30247 | + * base address of kernel buffer | |
30248 | + */ | |
30249 | + *req = addr; | |
30250 | + | |
30251 | + return 0; | |
30252 | +} | |
30253 | + | |
30254 | +/** | |
30255 | + * pfm_acquire_ctx_from_fd -- get ctx from file descriptor | |
30256 | + * @fd: file descriptor | |
30257 | + * @ctx: pointer to pointer of context updated on return | |
30258 | + * @cookie: opaque structure to use for release | |
30259 | + * | |
30260 | + * This helper function extracts the ctx from the file descriptor. | |
30261 | + * It also increments the refcount of the file structure. Thus | |
30262 | + * it updates the cookie so the refcount can be decreased when | |
30263 | + * leaving the perfmon syscall via pfm_release_ctx_from_fd | |
30264 | + */ | |
30265 | +static int pfm_acquire_ctx_from_fd(int fd, struct pfm_context **ctx, | |
30266 | + struct pfm_syscall_cookie *cookie) | |
30267 | +{ | |
30268 | + struct file *filp; | |
30269 | + int fput_needed; | |
30270 | + | |
30271 | + filp = fget_light(fd, &fput_needed); | |
30272 | + if (unlikely(filp == NULL)) { | |
30273 | + PFM_DBG("invalid fd %d", fd); | |
30274 | + return -EBADF; | |
30275 | + } | |
30276 | + | |
30277 | + *ctx = filp->private_data; | |
30278 | + | |
30279 | + if (unlikely(!*ctx || filp->f_op != &pfm_file_ops)) { | |
30280 | + PFM_DBG("fd %d not related to perfmon", fd); | |
30281 | + return -EBADF; | |
30282 | + } | |
30283 | + cookie->filp = filp; | |
30284 | + cookie->fput_needed = fput_needed; | |
30285 | + | |
30286 | + return 0; | |
30287 | +} | |
30288 | + | |
30289 | +/** | |
30290 | + * pfm_release_ctx_from_fd -- decrease refcount of file associated with context | |
30291 | + * @cookie: the cookie structure initialized by pfm_acquire_ctx_from_fd | |
30292 | + */ | |
30293 | +static inline void pfm_release_ctx_from_fd(struct pfm_syscall_cookie *cookie) | |
30294 | +{ | |
30295 | + fput_light(cookie->filp, cookie->fput_needed); | |
30296 | +} | |
30297 | + | |
30298 | +/* | |
30299 | + * unlike the other perfmon system calls, this one returns a file descriptor | |
30300 | + * or a value < 0 in case of error, very much like open() or socket() | |
30301 | + */ | |
30302 | +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq, | |
30303 | + char __user *fmt_name, | |
30304 | + void __user *fmt_uarg, size_t fmt_size) | |
30305 | +{ | |
30306 | + struct pfarg_ctx req; | |
30307 | + struct pfm_smpl_fmt *fmt = NULL; | |
30308 | + void *fmt_arg = NULL; | |
30309 | + int ret; | |
30310 | + | |
30311 | + PFM_DBG("req=%p fmt=%p fmt_arg=%p size=%zu", | |
30312 | + ureq, fmt_name, fmt_uarg, fmt_size); | |
30313 | + | |
30314 | + if (perfmon_disabled) | |
30315 | + return -ENOSYS; | |
30316 | + | |
30317 | + if (copy_from_user(&req, ureq, sizeof(req))) | |
30318 | + return -EFAULT; | |
30319 | + | |
30320 | + if (fmt_name) { | |
30321 | + ret = pfm_get_smpl_arg(fmt_name, fmt_uarg, fmt_size, &fmt_arg, &fmt); | |
30322 | + if (ret) | |
30323 | + goto abort; | |
30324 | + } | |
30325 | + | |
30326 | + ret = __pfm_create_context(&req, fmt, fmt_arg, PFM_NORMAL, NULL); | |
30327 | + | |
30328 | + kfree(fmt_arg); | |
30329 | +abort: | |
30330 | + return ret; | |
30331 | +} | |
30332 | + | |
30333 | +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, int count) | |
30334 | +{ | |
30335 | + struct pfm_context *ctx; | |
30336 | + struct task_struct *task; | |
30337 | + struct pfm_syscall_cookie cookie; | |
30338 | + struct pfarg_pmc pmcs[PFM_PMC_STK_ARG]; | |
30339 | + struct pfarg_pmc *req; | |
30340 | + void *fptr, *resume; | |
30341 | + unsigned long flags; | |
30342 | + size_t sz; | |
30343 | + int ret; | |
30344 | + | |
30345 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30346 | + | |
30347 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) { | |
30348 | + PFM_DBG("invalid arg count %d", count); | |
30349 | + return -EINVAL; | |
30350 | + } | |
30351 | + | |
30352 | + sz = count*sizeof(*ureq); | |
30353 | + | |
30354 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30355 | + if (ret) | |
30356 | + return ret; | |
30357 | + | |
30358 | + ret = pfm_get_args(ureq, sz, sizeof(pmcs), pmcs, (void **)&req, &fptr); | |
30359 | + if (ret) | |
30360 | + goto error; | |
30361 | + | |
30362 | + spin_lock_irqsave(&ctx->lock, flags); | |
30363 | + | |
30364 | + task = ctx->task; | |
30365 | + | |
30366 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
30367 | + if (!ret) | |
30368 | + ret = __pfm_write_pmcs(ctx, req, count); | |
30369 | + | |
30370 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30371 | + | |
30372 | + if (resume) | |
30373 | + pfm_resume_task(task, resume); | |
30374 | + | |
30375 | + /* | |
30376 | + * This function may be on the critical path. | |
30377 | + * We want to avoid the branch if unecessary. | |
30378 | + */ | |
30379 | + if (fptr) | |
30380 | + kfree(fptr); | |
30381 | +error: | |
30382 | + pfm_release_ctx_from_fd(&cookie); | |
30383 | + return ret; | |
30384 | +} | |
30385 | + | |
30386 | +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, int count) | |
30387 | +{ | |
30388 | + struct pfm_context *ctx; | |
30389 | + struct task_struct *task; | |
30390 | + struct pfm_syscall_cookie cookie; | |
30391 | + struct pfarg_pmd pmds[PFM_PMD_STK_ARG]; | |
30392 | + struct pfarg_pmd *req; | |
30393 | + void *fptr, *resume; | |
30394 | + unsigned long flags; | |
30395 | + size_t sz; | |
30396 | + int ret; | |
30397 | + | |
30398 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30399 | + | |
30400 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) { | |
30401 | + PFM_DBG("invalid arg count %d", count); | |
30402 | + return -EINVAL; | |
30403 | + } | |
30404 | + | |
30405 | + sz = count*sizeof(*ureq); | |
30406 | + | |
30407 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30408 | + if (ret) | |
30409 | + return ret; | |
30410 | + | |
30411 | + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr); | |
30412 | + if (ret) | |
30413 | + goto error; | |
30414 | + | |
30415 | + spin_lock_irqsave(&ctx->lock, flags); | |
30416 | + | |
30417 | + task = ctx->task; | |
30418 | + | |
30419 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
30420 | + if (!ret) | |
30421 | + ret = __pfm_write_pmds(ctx, req, count, 0); | |
30422 | + | |
30423 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30424 | + | |
30425 | + if (resume) | |
30426 | + pfm_resume_task(task, resume); | |
30427 | + | |
30428 | + if (fptr) | |
30429 | + kfree(fptr); | |
30430 | +error: | |
30431 | + pfm_release_ctx_from_fd(&cookie); | |
30432 | + return ret; | |
30433 | +} | |
30434 | + | |
30435 | +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, int count) | |
30436 | +{ | |
30437 | + struct pfm_context *ctx; | |
30438 | + struct task_struct *task; | |
30439 | + struct pfm_syscall_cookie cookie; | |
30440 | + struct pfarg_pmd pmds[PFM_PMD_STK_ARG]; | |
30441 | + struct pfarg_pmd *req; | |
30442 | + void *fptr, *resume; | |
30443 | + unsigned long flags; | |
30444 | + size_t sz; | |
30445 | + int ret; | |
30446 | + | |
30447 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30448 | + | |
30449 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) | |
30450 | + return -EINVAL; | |
30451 | + | |
30452 | + sz = count*sizeof(*ureq); | |
30453 | + | |
30454 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30455 | + if (ret) | |
30456 | + return ret; | |
30457 | + | |
30458 | + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr); | |
30459 | + if (ret) | |
30460 | + goto error; | |
30461 | + | |
30462 | + spin_lock_irqsave(&ctx->lock, flags); | |
30463 | + | |
30464 | + task = ctx->task; | |
30465 | + | |
30466 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
30467 | + if (!ret) | |
30468 | + ret = __pfm_read_pmds(ctx, req, count); | |
30469 | + | |
30470 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30471 | + | |
30472 | + if (copy_to_user(ureq, req, sz)) | |
30473 | + ret = -EFAULT; | |
30474 | + | |
30475 | + if (resume) | |
30476 | + pfm_resume_task(task, resume); | |
30477 | + | |
30478 | + if (fptr) | |
30479 | + kfree(fptr); | |
30480 | +error: | |
30481 | + pfm_release_ctx_from_fd(&cookie); | |
30482 | + return ret; | |
30483 | +} | |
30484 | + | |
30485 | +asmlinkage long sys_pfm_restart(int fd) | |
30486 | +{ | |
30487 | + struct pfm_context *ctx; | |
30488 | + struct task_struct *task; | |
30489 | + struct pfm_syscall_cookie cookie; | |
30490 | + void *resume; | |
30491 | + unsigned long flags; | |
30492 | + int ret, info; | |
30493 | + | |
30494 | + PFM_DBG("fd=%d", fd); | |
30495 | + | |
30496 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30497 | + if (ret) | |
30498 | + return ret; | |
30499 | + | |
30500 | + spin_lock_irqsave(&ctx->lock, flags); | |
30501 | + | |
30502 | + task = ctx->task; | |
30503 | + | |
30504 | + ret = pfm_check_task_state(ctx, 0, &flags, &resume); | |
30505 | + if (!ret) | |
30506 | + ret = __pfm_restart(ctx, &info); | |
30507 | + | |
30508 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30509 | + | |
30510 | + if (resume) | |
30511 | + pfm_resume_task(task, resume); | |
30512 | + /* | |
30513 | + * In per-thread mode with blocking notification, i.e. | |
30514 | + * ctx->flags.blocking=1, we need to defer issuing the | |
30515 | + * complete to unblock the blocked monitored thread. | |
30516 | + * Otherwise we have a potential deadlock due to a lock | |
30517 | + * inversion between the context lock and the task_rq_lock() | |
30518 | + * which can happen if one thread is in this call and the other | |
30519 | + * (the monitored thread) is in the context switch code. | |
30520 | + * | |
30521 | + * It is safe to access the context outside the critical section | |
30522 | + * because: | |
30523 | + * - we are protected by the fget_light(), thus the context | |
30524 | + * cannot disappear | |
30525 | + */ | |
30526 | + if (ret == 0 && info == 1) | |
30527 | + complete(&ctx->restart_complete); | |
30528 | + | |
30529 | + pfm_release_ctx_from_fd(&cookie); | |
30530 | + return ret; | |
30531 | +} | |
30532 | + | |
30533 | +asmlinkage long sys_pfm_stop(int fd) | |
30534 | +{ | |
30535 | + struct pfm_context *ctx; | |
30536 | + struct task_struct *task; | |
30537 | + struct pfm_syscall_cookie cookie; | |
30538 | + void *resume; | |
30539 | + unsigned long flags; | |
30540 | + int ret; | |
30541 | + int release_info; | |
30542 | + | |
30543 | + PFM_DBG("fd=%d", fd); | |
30544 | + | |
30545 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30546 | + if (ret) | |
30547 | + return ret; | |
30548 | + | |
30549 | + spin_lock_irqsave(&ctx->lock, flags); | |
30550 | + | |
30551 | + task = ctx->task; | |
30552 | + | |
30553 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
30554 | + if (!ret) | |
30555 | + ret = __pfm_stop(ctx, &release_info); | |
30556 | + | |
30557 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30558 | + | |
30559 | + if (resume) | |
30560 | + pfm_resume_task(task, resume); | |
30561 | + | |
30562 | + /* | |
30563 | + * defer cancellation of timer to avoid race | |
30564 | + * with pfm_handle_switch_timeout() | |
30565 | + * | |
30566 | + * applies only when self-monitoring | |
30567 | + */ | |
30568 | + if (release_info & 0x2) | |
30569 | + hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); | |
30570 | + | |
30571 | + pfm_release_ctx_from_fd(&cookie); | |
30572 | + return ret; | |
30573 | +} | |
30574 | + | |
30575 | +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq) | |
30576 | +{ | |
30577 | + struct pfm_context *ctx; | |
30578 | + struct task_struct *task; | |
30579 | + struct pfm_syscall_cookie cookie; | |
30580 | + void *resume; | |
30581 | + struct pfarg_start req; | |
30582 | + unsigned long flags; | |
30583 | + int ret; | |
30584 | + | |
30585 | + PFM_DBG("fd=%d req=%p", fd, ureq); | |
30586 | + | |
30587 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30588 | + if (ret) | |
30589 | + return ret; | |
30590 | + | |
30591 | + /* | |
30592 | + * the one argument is actually optional | |
30593 | + */ | |
30594 | + if (ureq && copy_from_user(&req, ureq, sizeof(req))) | |
30595 | + return -EFAULT; | |
30596 | + | |
30597 | + spin_lock_irqsave(&ctx->lock, flags); | |
30598 | + | |
30599 | + task = ctx->task; | |
30600 | + | |
30601 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); | |
30602 | + if (!ret) | |
30603 | + ret = __pfm_start(ctx, ureq ? &req : NULL); | |
30604 | + | |
30605 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30606 | + | |
30607 | + if (resume) | |
30608 | + pfm_resume_task(task, resume); | |
30609 | + | |
30610 | + pfm_release_ctx_from_fd(&cookie); | |
30611 | + return ret; | |
30612 | +} | |
30613 | + | |
30614 | +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq) | |
30615 | +{ | |
30616 | + struct pfm_context *ctx; | |
30617 | + struct task_struct *task; | |
30618 | + struct pfm_syscall_cookie cookie; | |
30619 | + void *resume, *dummy_resume; | |
30620 | + unsigned long flags; | |
30621 | + struct pfarg_load req; | |
30622 | + int ret; | |
30623 | + | |
30624 | + PFM_DBG("fd=%d req=%p", fd, ureq); | |
30625 | + | |
30626 | + if (copy_from_user(&req, ureq, sizeof(req))) | |
30627 | + return -EFAULT; | |
30628 | + | |
30629 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30630 | + if (ret) | |
30631 | + return ret; | |
30632 | + | |
30633 | + task = current; | |
30634 | + | |
30635 | + /* | |
30636 | + * in per-thread mode (not self-monitoring), get a reference | |
30637 | + * on task to monitor. This must be done with interrupts enabled | |
30638 | + * Upon succesful return, refcount on task is increased. | |
30639 | + * | |
30640 | + * fget_light() is protecting the context. | |
30641 | + */ | |
30642 | + if (!ctx->flags.system && req.load_pid != current->pid) { | |
30643 | + ret = pfm_get_task(ctx, req.load_pid, &task, &resume); | |
30644 | + if (ret) | |
30645 | + goto error; | |
30646 | + } | |
30647 | + | |
30648 | + /* | |
30649 | + * irqsave is required to avoid race in case context is already | |
30650 | + * loaded or with switch timeout in the case of self-monitoring | |
30651 | + */ | |
30652 | + spin_lock_irqsave(&ctx->lock, flags); | |
30653 | + | |
30654 | + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &dummy_resume); | |
30655 | + if (!ret) | |
30656 | + ret = __pfm_load_context(ctx, &req, task); | |
30657 | + | |
30658 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30659 | + | |
30660 | + if (resume) | |
30661 | + pfm_resume_task(task, resume); | |
30662 | + | |
30663 | + /* | |
30664 | + * in per-thread mode (not self-monitoring), we need | |
30665 | + * to decrease refcount on task to monitor: | |
30666 | + * - load successful: we have a reference to the task in ctx->task | |
30667 | + * - load failed : undo the effect of pfm_get_task() | |
30668 | + */ | |
30669 | + if (task != current) | |
30670 | + put_task_struct(task); | |
30671 | +error: | |
30672 | + pfm_release_ctx_from_fd(&cookie); | |
30673 | + return ret; | |
30674 | +} | |
30675 | + | |
30676 | +asmlinkage long sys_pfm_unload_context(int fd) | |
30677 | +{ | |
30678 | + struct pfm_context *ctx; | |
30679 | + struct task_struct *task; | |
30680 | + struct pfm_syscall_cookie cookie; | |
30681 | + void *resume; | |
30682 | + unsigned long flags; | |
30683 | + int ret; | |
30684 | + int is_system, release_info = 0; | |
30685 | + u32 cpu; | |
30686 | + | |
30687 | + PFM_DBG("fd=%d", fd); | |
30688 | + | |
30689 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30690 | + if (ret) | |
30691 | + return ret; | |
30692 | + | |
30693 | + is_system = ctx->flags.system; | |
30694 | + | |
30695 | + spin_lock_irqsave(&ctx->lock, flags); | |
30696 | + | |
30697 | + cpu = ctx->cpu; | |
30698 | + task = ctx->task; | |
30699 | + | |
30700 | + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED|PFM_CMD_UNLOAD, | |
30701 | + &flags, &resume); | |
30702 | + if (!ret) | |
30703 | + ret = __pfm_unload_context(ctx, &release_info); | |
30704 | + | |
30705 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30706 | + | |
30707 | + if (resume) | |
30708 | + pfm_resume_task(task, resume); | |
30709 | + | |
30710 | + /* | |
30711 | + * cancel time now that context is unlocked | |
30712 | + * avoid race with pfm_handle_switch_timeout() | |
30713 | + */ | |
30714 | + if (release_info & 0x2) { | |
30715 | + int r; | |
30716 | + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); | |
30717 | + PFM_DBG("timeout cancel=%d", r); | |
30718 | + } | |
30719 | + | |
30720 | + if (release_info & 0x1) | |
30721 | + pfm_session_release(is_system, cpu); | |
30722 | + | |
30723 | + pfm_release_ctx_from_fd(&cookie); | |
30724 | + return ret; | |
30725 | +} | |
30726 | + | |
30727 | +asmlinkage long sys_pfm_create_evtsets(int fd, struct pfarg_setdesc __user *ureq, int count) | |
30728 | +{ | |
30729 | + struct pfm_context *ctx; | |
30730 | + struct pfm_syscall_cookie cookie; | |
30731 | + struct pfarg_setdesc *req; | |
30732 | + void *fptr, *resume; | |
30733 | + unsigned long flags; | |
30734 | + size_t sz; | |
30735 | + int ret; | |
30736 | + | |
30737 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30738 | + | |
30739 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) | |
30740 | + return -EINVAL; | |
30741 | + | |
30742 | + sz = count*sizeof(*ureq); | |
30743 | + | |
30744 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30745 | + if (ret) | |
30746 | + return ret; | |
30747 | + | |
30748 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
30749 | + if (ret) | |
30750 | + goto error; | |
30751 | + | |
30752 | + /* | |
30753 | + * must mask interrupts because we do not know the state of context, | |
30754 | + * could be attached and we could be getting PMU interrupts. So | |
30755 | + * we mask and lock context and we check and possibly relax masking | |
30756 | + */ | |
30757 | + spin_lock_irqsave(&ctx->lock, flags); | |
30758 | + | |
30759 | + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume); | |
30760 | + if (!ret) | |
30761 | + ret = __pfm_create_evtsets(ctx, req, count); | |
30762 | + | |
30763 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30764 | + /* | |
30765 | + * context must be unloaded for this command. The resume pointer | |
30766 | + * is necessarily NULL, thus no need to call pfm_resume_task() | |
30767 | + */ | |
30768 | + kfree(fptr); | |
30769 | + | |
30770 | +error: | |
30771 | + pfm_release_ctx_from_fd(&cookie); | |
30772 | + return ret; | |
30773 | +} | |
30774 | + | |
30775 | +asmlinkage long sys_pfm_getinfo_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count) | |
30776 | +{ | |
30777 | + struct pfm_context *ctx; | |
30778 | + struct task_struct *task; | |
30779 | + struct pfm_syscall_cookie cookie; | |
30780 | + struct pfarg_setinfo *req; | |
30781 | + void *fptr, *resume; | |
30782 | + unsigned long flags; | |
30783 | + size_t sz; | |
30784 | + int ret; | |
30785 | + | |
30786 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30787 | + | |
30788 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) | |
30789 | + return -EINVAL; | |
30790 | + | |
30791 | + sz = count*sizeof(*ureq); | |
30792 | + | |
30793 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30794 | + if (ret) | |
30795 | + return ret; | |
30796 | + | |
30797 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
30798 | + if (ret) | |
30799 | + goto error; | |
30800 | + | |
30801 | + /* | |
30802 | + * this command operates even when context is loaded, so we need | |
30803 | + * to keep interrupts masked to avoid a race with PMU interrupt | |
30804 | + * which may switch the active set | |
30805 | + */ | |
30806 | + spin_lock_irqsave(&ctx->lock, flags); | |
30807 | + | |
30808 | + task = ctx->task; | |
30809 | + | |
30810 | + ret = pfm_check_task_state(ctx, 0, &flags, &resume); | |
30811 | + if (!ret) | |
30812 | + ret = __pfm_getinfo_evtsets(ctx, req, count); | |
30813 | + | |
30814 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30815 | + | |
30816 | + if (resume) | |
30817 | + pfm_resume_task(task, resume); | |
30818 | + | |
30819 | + if (copy_to_user(ureq, req, sz)) | |
30820 | + ret = -EFAULT; | |
30821 | + | |
30822 | + kfree(fptr); | |
30823 | +error: | |
30824 | + pfm_release_ctx_from_fd(&cookie); | |
30825 | + return ret; | |
30826 | +} | |
30827 | + | |
30828 | +asmlinkage long sys_pfm_delete_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count) | |
30829 | +{ | |
30830 | + struct pfm_context *ctx; | |
30831 | + struct pfm_syscall_cookie cookie; | |
30832 | + struct pfarg_setinfo *req; | |
30833 | + void *fptr, *resume; | |
30834 | + unsigned long flags; | |
30835 | + size_t sz; | |
30836 | + int ret; | |
30837 | + | |
30838 | + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); | |
30839 | + | |
30840 | + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) | |
30841 | + return -EINVAL; | |
30842 | + | |
30843 | + sz = count*sizeof(*ureq); | |
30844 | + | |
30845 | + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); | |
30846 | + if (ret) | |
30847 | + return ret; | |
30848 | + | |
30849 | + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); | |
30850 | + if (ret) | |
30851 | + goto error; | |
30852 | + | |
30853 | + /* | |
30854 | + * must mask interrupts because we do not know the state of context, | |
30855 | + * could be attached and we could be getting PMU interrupts | |
30856 | + */ | |
30857 | + spin_lock_irqsave(&ctx->lock, flags); | |
30858 | + | |
30859 | + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume); | |
30860 | + if (!ret) | |
30861 | + ret = __pfm_delete_evtsets(ctx, req, count); | |
30862 | + | |
30863 | + spin_unlock_irqrestore(&ctx->lock, flags); | |
30864 | + /* | |
30865 | + * context must be unloaded for this command. The resume pointer | |
30866 | + * is necessarily NULL, thus no need to call pfm_resume_task() | |
30867 | + */ | |
30868 | + kfree(fptr); | |
30869 | + | |
30870 | +error: | |
30871 | + pfm_release_ctx_from_fd(&cookie); | |
30872 | + return ret; | |
30873 | +} | |
30874 | --- /dev/null | |
30875 | +++ b/perfmon/perfmon_sysfs.c | |
30876 | @@ -0,0 +1,525 @@ | |
30877 | +/* | |
30878 | + * perfmon_sysfs.c: perfmon2 sysfs interface | |
30879 | + * | |
30880 | + * This file implements the perfmon2 interface which | |
30881 | + * provides access to the hardware performance counters | |
30882 | + * of the host processor. | |
30883 | + * | |
30884 | + * The initial version of perfmon.c was written by | |
30885 | + * Ganesh Venkitachalam, IBM Corp. | |
30886 | + * | |
30887 | + * Then it was modified for perfmon-1.x by Stephane Eranian and | |
30888 | + * David Mosberger, Hewlett Packard Co. | |
30889 | + * | |
30890 | + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x | |
30891 | + * by Stephane Eranian, Hewlett Packard Co. | |
30892 | + * | |
30893 | + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. | |
30894 | + * Contributed by Stephane Eranian <eranian@hpl.hp.com> | |
30895 | + * David Mosberger-Tang <davidm@hpl.hp.com> | |
30896 | + * | |
30897 | + * More information about perfmon available at: | |
30898 | + * http://perfmon2.sf.net | |
30899 | + * | |
30900 | + * This program is free software; you can redistribute it and/or | |
30901 | + * modify it under the terms of version 2 of the GNU General Public | |
30902 | + * License as published by the Free Software Foundation. | |
30903 | + * | |
30904 | + * This program is distributed in the hope that it will be useful, | |
30905 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
30906 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
30907 | + * General Public License for more details. | |
30908 | + * | |
30909 | + * You should have received a copy of the GNU General Public License | |
30910 | + * along with this program; if not, write to the Free Software | |
30911 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
30912 | + * 02111-1307 USA | |
30913 | + */ | |
30914 | +#include <linux/kernel.h> | |
30915 | +#include <linux/module.h> /* for EXPORT_SYMBOL */ | |
30916 | +#include <linux/perfmon_kern.h> | |
30917 | +#include "perfmon_priv.h" | |
30918 | + | |
30919 | +struct pfm_attribute { | |
30920 | + struct attribute attr; | |
30921 | + ssize_t (*show)(void *, struct pfm_attribute *attr, char *); | |
30922 | + ssize_t (*store)(void *, const char *, size_t); | |
30923 | +}; | |
30924 | +#define to_attr(n) container_of(n, struct pfm_attribute, attr); | |
30925 | + | |
30926 | +#define PFM_RO_ATTR(_name, _show) \ | |
30927 | + struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL) | |
30928 | + | |
30929 | +#define PFM_RW_ATTR(_name, _show, _store) \ | |
30930 | + struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store) | |
30931 | + | |
30932 | +#define PFM_ROS_ATTR(_name, _show) \ | |
30933 | + struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL) | |
30934 | + | |
30935 | +#define is_attr_name(a, n) (!strcmp((a)->attr.name, n)) | |
30936 | +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu); | |
30937 | + | |
30938 | +static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj; | |
30939 | +static struct kobject *pfm_pmu_kobj; | |
30940 | + | |
30941 | +static ssize_t pfm_regs_attr_show(struct kobject *kobj, | |
30942 | + struct attribute *attr, char *buf) | |
30943 | +{ | |
30944 | + struct pfm_regmap_desc *reg = to_reg(kobj); | |
30945 | + struct pfm_attribute *attribute = to_attr(attr); | |
30946 | + return attribute->show ? attribute->show(reg, attribute, buf) : -EIO; | |
30947 | +} | |
30948 | + | |
30949 | +static ssize_t pfm_fmt_attr_show(struct kobject *kobj, | |
30950 | + struct attribute *attr, char *buf) | |
30951 | +{ | |
30952 | + struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj); | |
30953 | + struct pfm_attribute *attribute = to_attr(attr); | |
30954 | + return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO; | |
30955 | +} | |
30956 | + | |
30957 | +static struct sysfs_ops pfm_regs_sysfs_ops = { | |
30958 | + .show = pfm_regs_attr_show | |
30959 | +}; | |
30960 | + | |
30961 | +static struct sysfs_ops pfm_fmt_sysfs_ops = { | |
30962 | + .show = pfm_fmt_attr_show | |
30963 | +}; | |
30964 | + | |
30965 | +static struct kobj_type pfm_regs_ktype = { | |
30966 | + .sysfs_ops = &pfm_regs_sysfs_ops, | |
30967 | +}; | |
30968 | + | |
30969 | +static struct kobj_type pfm_fmt_ktype = { | |
30970 | + .sysfs_ops = &pfm_fmt_sysfs_ops, | |
30971 | +}; | |
30972 | + | |
30973 | +static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) | |
30974 | +{ | |
30975 | + int base; | |
30976 | + | |
30977 | + if (is_attr_name(attr, "version")) | |
30978 | + return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN); | |
30979 | + | |
30980 | + if (is_attr_name(attr, "task_sessions_count")) | |
30981 | + return pfm_sysfs_res_show(buf, PAGE_SIZE, 0); | |
30982 | + | |
30983 | + if (is_attr_name(attr, "debug")) | |
30984 | + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug); | |
30985 | + | |
30986 | + if (is_attr_name(attr, "task_group")) | |
30987 | + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group); | |
30988 | + | |
30989 | + if (is_attr_name(attr, "mode")) | |
30990 | + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags); | |
30991 | + | |
30992 | + if (is_attr_name(attr, "arg_mem_max")) | |
30993 | + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max); | |
30994 | + | |
30995 | + if (is_attr_name(attr, "syscall")) { | |
30996 | + base = pfm_arch_get_base_syscall(); | |
30997 | + return snprintf(buf, PAGE_SIZE, "%d\n", base); | |
30998 | + } | |
30999 | + | |
31000 | + if (is_attr_name(attr, "sys_sessions_count")) | |
31001 | + return pfm_sysfs_res_show(buf, PAGE_SIZE, 1); | |
31002 | + | |
31003 | + if (is_attr_name(attr, "smpl_buffer_mem_max")) | |
31004 | + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max); | |
31005 | + | |
31006 | + if (is_attr_name(attr, "smpl_buffer_mem_cur")) | |
31007 | + return pfm_sysfs_res_show(buf, PAGE_SIZE, 2); | |
31008 | + | |
31009 | + if (is_attr_name(attr, "sys_group")) | |
31010 | + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group); | |
31011 | + | |
31012 | + /* XXX: could be set to write-only */ | |
31013 | + if (is_attr_name(attr, "reset_stats")) { | |
31014 | + buf[0] = '0'; | |
31015 | + buf[1] = '\0'; | |
31016 | + return strnlen(buf, PAGE_SIZE); | |
31017 | + } | |
31018 | + return 0; | |
31019 | +} | |
31020 | + | |
31021 | +static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr, | |
31022 | + const char *buf, size_t count) | |
31023 | +{ | |
31024 | + int i; | |
31025 | + size_t d; | |
31026 | + | |
31027 | + if (sscanf(buf, "%zu", &d) != 1) | |
31028 | + goto skip; | |
31029 | + | |
31030 | + if (is_attr_name(attr, "debug")) | |
31031 | + pfm_controls.debug = d; | |
31032 | + | |
31033 | + if (is_attr_name(attr, "task_group")) | |
31034 | + pfm_controls.task_group = d; | |
31035 | + | |
31036 | + if (is_attr_name(attr, "sys_group")) | |
31037 | + pfm_controls.sys_group = d; | |
31038 | + | |
31039 | + if (is_attr_name(attr, "mode")) | |
31040 | + pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0; | |
31041 | + | |
31042 | + if (is_attr_name(attr, "arg_mem_max")) { | |
31043 | + /* | |
31044 | + * we impose a page as the minimum. | |
31045 | + * | |
31046 | + * This limit may be smaller than the stack buffer | |
31047 | + * available and that is fine. | |
31048 | + */ | |
31049 | + if (d >= PAGE_SIZE) | |
31050 | + pfm_controls.arg_mem_max = d; | |
31051 | + } | |
31052 | + if (is_attr_name(attr, "reset_stats")) { | |
31053 | + for_each_online_cpu(i) { | |
31054 | + pfm_reset_stats(i); | |
31055 | + } | |
31056 | + } | |
31057 | + | |
31058 | + if (is_attr_name(attr, "smpl_buffer_mem_max")) { | |
31059 | + if (d >= PAGE_SIZE) | |
31060 | + pfm_controls.smpl_buffer_mem_max = d; | |
31061 | + } | |
31062 | +skip: | |
31063 | + return count; | |
31064 | +} | |
31065 | + | |
31066 | +/* | |
31067 | + * /sys/kernel/perfmon attributes | |
31068 | + */ | |
31069 | +static PFM_RO_ATTR(version, pfm_controls_show); | |
31070 | +static PFM_RO_ATTR(task_sessions_count, pfm_controls_show); | |
31071 | +static PFM_RO_ATTR(syscall, pfm_controls_show); | |
31072 | +static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show); | |
31073 | +static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show); | |
31074 | + | |
31075 | +static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store); | |
31076 | +static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store); | |
31077 | +static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store); | |
31078 | +static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store); | |
31079 | +static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store); | |
31080 | +static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store); | |
31081 | +static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store); | |
31082 | + | |
31083 | +static struct attribute *pfm_kernel_attrs[] = { | |
31084 | + &attr_version.attr, | |
31085 | + &attr_syscall.attr, | |
31086 | + &attr_task_sessions_count.attr, | |
31087 | + &attr_sys_sessions_count.attr, | |
31088 | + &attr_smpl_buffer_mem_cur.attr, | |
31089 | + &attr_debug.attr, | |
31090 | + &attr_reset_stats.attr, | |
31091 | + &attr_sys_group.attr, | |
31092 | + &attr_task_group.attr, | |
31093 | + &attr_mode.attr, | |
31094 | + &attr_smpl_buffer_mem_max.attr, | |
31095 | + &attr_arg_mem_max.attr, | |
31096 | + NULL | |
31097 | +}; | |
31098 | + | |
31099 | +static struct attribute_group pfm_kernel_attr_group = { | |
31100 | + .attrs = pfm_kernel_attrs, | |
31101 | +}; | |
31102 | + | |
31103 | +/* | |
31104 | + * per-reg attributes | |
31105 | + */ | |
31106 | +static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf) | |
31107 | +{ | |
31108 | + struct pfm_regmap_desc *reg; | |
31109 | + int w; | |
31110 | + | |
31111 | + reg = data; | |
31112 | + | |
31113 | + if (is_attr_name(attr, "name")) | |
31114 | + return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc); | |
31115 | + | |
31116 | + if (is_attr_name(attr, "dfl_val")) | |
31117 | + return snprintf(buf, PAGE_SIZE, "0x%llx\n", | |
31118 | + (unsigned long long)reg->dfl_val); | |
31119 | + | |
31120 | + if (is_attr_name(attr, "width")) { | |
31121 | + w = (reg->type & PFM_REG_C64) ? | |
31122 | + pfm_pmu_conf->counter_width : 64; | |
31123 | + return snprintf(buf, PAGE_SIZE, "%d\n", w); | |
31124 | + } | |
31125 | + | |
31126 | + if (is_attr_name(attr, "rsvd_msk")) | |
31127 | + return snprintf(buf, PAGE_SIZE, "0x%llx\n", | |
31128 | + (unsigned long long)reg->rsvd_msk); | |
31129 | + | |
31130 | + if (is_attr_name(attr, "addr")) | |
31131 | + return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr); | |
31132 | + | |
31133 | + return 0; | |
31134 | +} | |
31135 | + | |
31136 | +static PFM_ROS_ATTR(name, pfm_reg_show); | |
31137 | +static PFM_ROS_ATTR(dfl_val, pfm_reg_show); | |
31138 | +static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show); | |
31139 | +static PFM_ROS_ATTR(width, pfm_reg_show); | |
31140 | +static PFM_ROS_ATTR(addr, pfm_reg_show); | |
31141 | + | |
31142 | +static struct attribute *pfm_reg_attrs[] = { | |
31143 | + &attr_name.attr, | |
31144 | + &attr_dfl_val.attr, | |
31145 | + &attr_rsvd_msk.attr, | |
31146 | + &attr_width.attr, | |
31147 | + &attr_addr.attr, | |
31148 | + NULL | |
31149 | +}; | |
31150 | + | |
31151 | +static struct attribute_group pfm_reg_attr_group = { | |
31152 | + .attrs = pfm_reg_attrs, | |
31153 | +}; | |
31154 | + | |
31155 | +static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) | |
31156 | +{ | |
31157 | + if (is_attr_name(attr, "model")) | |
31158 | + return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name); | |
31159 | + return 0; | |
31160 | +} | |
31161 | +static PFM_RO_ATTR(model, pfm_pmu_show); | |
31162 | + | |
31163 | +static struct attribute *pfm_pmu_desc_attrs[] = { | |
31164 | + &attr_model.attr, | |
31165 | + NULL | |
31166 | +}; | |
31167 | + | |
31168 | +static struct attribute_group pfm_pmu_desc_attr_group = { | |
31169 | + .attrs = pfm_pmu_desc_attrs, | |
31170 | +}; | |
31171 | + | |
31172 | +static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu) | |
31173 | +{ | |
31174 | + struct pfm_regmap_desc *reg; | |
31175 | + unsigned int i, k; | |
31176 | + int ret; | |
31177 | + | |
31178 | + reg = pmu->pmc_desc; | |
31179 | + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) { | |
31180 | + | |
31181 | + if (!(reg->type & PFM_REG_I)) | |
31182 | + continue; | |
31183 | + | |
31184 | + ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype, | |
31185 | + pfm_pmu_kobj, "pmc%u", i); | |
31186 | + if (ret) | |
31187 | + goto undo_pmcs; | |
31188 | + | |
31189 | + ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group); | |
31190 | + if (ret) { | |
31191 | + kobject_del(®->kobj); | |
31192 | + goto undo_pmcs; | |
31193 | + } | |
31194 | + } | |
31195 | + | |
31196 | + reg = pmu->pmd_desc; | |
31197 | + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) { | |
31198 | + | |
31199 | + if (!(reg->type & PFM_REG_I)) | |
31200 | + continue; | |
31201 | + | |
31202 | + ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype, | |
31203 | + pfm_pmu_kobj, "pmd%u", i); | |
31204 | + if (ret) | |
31205 | + goto undo_pmds; | |
31206 | + | |
31207 | + ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group); | |
31208 | + if (ret) { | |
31209 | + kobject_del(®->kobj); | |
31210 | + goto undo_pmds; | |
31211 | + } | |
31212 | + } | |
31213 | + return 0; | |
31214 | +undo_pmds: | |
31215 | + reg = pmu->pmd_desc; | |
31216 | + for (k = 0; k < i; k++, reg++) { | |
31217 | + if (!(reg->type & PFM_REG_I)) | |
31218 | + continue; | |
31219 | + sysfs_remove_group(®->kobj, &pfm_reg_attr_group); | |
31220 | + kobject_del(®->kobj); | |
31221 | + } | |
31222 | + i = pmu->num_pmc_entries; | |
31223 | + /* fall through */ | |
31224 | +undo_pmcs: | |
31225 | + reg = pmu->pmc_desc; | |
31226 | + for (k = 0; k < i; k++, reg++) { | |
31227 | + if (!(reg->type & PFM_REG_I)) | |
31228 | + continue; | |
31229 | + sysfs_remove_group(®->kobj, &pfm_reg_attr_group); | |
31230 | + kobject_del(®->kobj); | |
31231 | + } | |
31232 | + return ret; | |
31233 | +} | |
31234 | + | |
31235 | +static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu) | |
31236 | +{ | |
31237 | + struct pfm_regmap_desc *reg; | |
31238 | + unsigned int i; | |
31239 | + | |
31240 | + reg = pmu->pmc_desc; | |
31241 | + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) { | |
31242 | + | |
31243 | + if (!(reg->type & PFM_REG_I)) | |
31244 | + continue; | |
31245 | + | |
31246 | + sysfs_remove_group(®->kobj, &pfm_reg_attr_group); | |
31247 | + kobject_del(®->kobj); | |
31248 | + } | |
31249 | + | |
31250 | + reg = pmu->pmd_desc; | |
31251 | + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) { | |
31252 | + | |
31253 | + if (!(reg->type & PFM_REG_I)) | |
31254 | + continue; | |
31255 | + | |
31256 | + sysfs_remove_group(®->kobj, &pfm_reg_attr_group); | |
31257 | + kobject_del(®->kobj); | |
31258 | + } | |
31259 | + return 0; | |
31260 | +} | |
31261 | + | |
31262 | +/* | |
31263 | + * when a PMU description module is inserted, we create | |
31264 | + * a pmu_desc subdir in sysfs and we populate it with | |
31265 | + * PMU specific information, such as register mappings | |
31266 | + */ | |
31267 | +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu) | |
31268 | +{ | |
31269 | + int ret; | |
31270 | + | |
31271 | + pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj); | |
31272 | + if (!pfm_pmu_kobj) | |
31273 | + return -ENOMEM; | |
31274 | + | |
31275 | + ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); | |
31276 | + if (ret) { | |
31277 | + /* will release pfm_pmu_kobj */ | |
31278 | + kobject_put(pfm_pmu_kobj); | |
31279 | + return ret; | |
31280 | + } | |
31281 | + | |
31282 | + ret = pfm_sysfs_add_pmu_regs(pmu); | |
31283 | + if (ret) { | |
31284 | + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); | |
31285 | + /* will release pfm_pmu_kobj */ | |
31286 | + kobject_put(pfm_pmu_kobj); | |
31287 | + } else | |
31288 | + kobject_uevent(pfm_pmu_kobj, KOBJ_ADD); | |
31289 | + | |
31290 | + return ret; | |
31291 | +} | |
31292 | + | |
31293 | +/* | |
31294 | + * when a PMU description module is removed, we also remove | |
31295 | + * all its information from sysfs, i.e., the pmu_desc subdir | |
31296 | + * disappears | |
31297 | + */ | |
31298 | +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu) | |
31299 | +{ | |
31300 | + pfm_sysfs_del_pmu_regs(pmu); | |
31301 | + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); | |
31302 | + kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE); | |
31303 | + kobject_put(pfm_pmu_kobj); | |
31304 | + pfm_pmu_kobj = NULL; | |
31305 | + return 0; | |
31306 | +} | |
31307 | + | |
31308 | +static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf) | |
31309 | +{ | |
31310 | + struct pfm_smpl_fmt *fmt = data; | |
31311 | + | |
31312 | + if (is_attr_name(attr, "version")) | |
31313 | + return snprintf(buf, PAGE_SIZE, "%u.%u\n", | |
31314 | + fmt->fmt_version >> 16 & 0xffff, | |
31315 | + fmt->fmt_version & 0xffff); | |
31316 | + return 0; | |
31317 | +} | |
31318 | + | |
31319 | +/* | |
31320 | + * do not use predefined macros because of name conflict | |
31321 | + * with /sys/kernel/perfmon/version | |
31322 | + */ | |
31323 | +struct pfm_attribute attr_fmt_version = { | |
31324 | + .attr = { .name = "version", .mode = 0444 }, | |
31325 | + .show = pfm_fmt_show, | |
31326 | +}; | |
31327 | + | |
31328 | +static struct attribute *pfm_fmt_attrs[] = { | |
31329 | + &attr_fmt_version.attr, | |
31330 | + NULL | |
31331 | +}; | |
31332 | + | |
31333 | +static struct attribute_group pfm_fmt_attr_group = { | |
31334 | + .attrs = pfm_fmt_attrs, | |
31335 | +}; | |
31336 | + | |
31337 | +/* | |
31338 | + * when a sampling format module is inserted, we populate | |
31339 | + * sysfs with some information | |
31340 | + */ | |
31341 | +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt) | |
31342 | +{ | |
31343 | + int ret; | |
31344 | + | |
31345 | + ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype, | |
31346 | + pfm_fmt_kobj, fmt->fmt_name); | |
31347 | + if (ret) | |
31348 | + return ret; | |
31349 | + | |
31350 | + ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group); | |
31351 | + if (ret) | |
31352 | + kobject_del(&fmt->kobj); | |
31353 | + else | |
31354 | + kobject_uevent(&fmt->kobj, KOBJ_ADD); | |
31355 | + | |
31356 | + return ret; | |
31357 | +} | |
31358 | + | |
31359 | +/* | |
31360 | + * when a sampling format module is removed, its information | |
31361 | + * must also be removed from sysfs | |
31362 | + */ | |
31363 | +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt) | |
31364 | +{ | |
31365 | + sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group); | |
31366 | + kobject_uevent(&fmt->kobj, KOBJ_REMOVE); | |
31367 | + kobject_del(&fmt->kobj); | |
31368 | +} | |
31369 | + | |
31370 | +int __init pfm_init_sysfs(void) | |
31371 | +{ | |
31372 | + int ret; | |
31373 | + | |
31374 | + pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj); | |
31375 | + if (!pfm_kernel_kobj) { | |
31376 | + PFM_ERR("cannot add kernel object: /sys/kernel/perfmon"); | |
31377 | + return -ENOMEM; | |
31378 | + } | |
31379 | + | |
31380 | + ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group); | |
31381 | + if (ret) { | |
31382 | + kobject_put(pfm_kernel_kobj); | |
31383 | + return ret; | |
31384 | + } | |
31385 | + | |
31386 | + pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj); | |
31387 | + if (ret) { | |
31388 | + PFM_ERR("cannot add fmt object: %d", ret); | |
31389 | + goto error_fmt; | |
31390 | + } | |
31391 | + if (pfm_pmu_conf) | |
31392 | + pfm_sysfs_add_pmu(pfm_pmu_conf); | |
31393 | + | |
31394 | + pfm_sysfs_builtin_fmt_add(); | |
31395 | + | |
31396 | + return 0; | |
31397 | + | |
31398 | +error_fmt: | |
31399 | + kobject_del(pfm_kernel_kobj); | |
31400 | + return ret; | |
31401 | +} |