]>
Commit | Line | Data |
---|---|---|
15289576 | 1 | #!/usr/bin/python3 |
7b72d14a AK |
2 | # Generate Intel taken branches Linux perf event script for autofdo profiling. |
3 | ||
a945c346 | 4 | # Copyright (C) 2016-2024 Free Software Foundation, Inc. |
7b72d14a AK |
5 | # |
6 | # GCC is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free | |
8 | # Software Foundation; either version 3, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | # for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License | |
17 | # along with GCC; see the file COPYING3. If not see | |
29abd09a | 18 | # <http://www.gnu.org/licenses/>. |
7b72d14a AK |
19 | |
20 | # Run it with perf record -b -e EVENT program ... | |
21 | # The Linux Kernel needs to support the PMU of the current CPU, and | |
22 | # It will likely not work in VMs. | |
23 | # Add --all to print for all cpus, otherwise for current cpu. | |
24 | # Add --script to generate shell script to run correct event. | |
25 | # | |
26 | # Requires internet (https) access. This may require setting up a proxy | |
27 | # with export https_proxy=... | |
28 | # | |
15289576 | 29 | import urllib.request |
7b72d14a AK |
30 | import sys |
31 | import json | |
32 | import argparse | |
33 | import collections | |
15289576 | 34 | import os |
950fa855 | 35 | import fnmatch |
7b72d14a | 36 | |
950fa855 | 37 | baseurl = "https://raw.githubusercontent.com/intel/perfmon/main" |
7b72d14a | 38 | |
15289576 XR |
39 | target_events = ('BR_INST_RETIRED.NEAR_TAKEN', |
40 | 'BR_INST_EXEC.TAKEN', | |
41 | 'BR_INST_RETIRED.TAKEN_JCC', | |
42 | 'BR_INST_TYPE_RETIRED.COND_TAKEN') | |
7b72d14a AK |
43 | |
44 | ap = argparse.ArgumentParser() | |
45 | ap.add_argument('--all', '-a', help='Print for all CPUs', action='store_true') | |
46 | ap.add_argument('--script', help='Generate shell script', action='store_true') | |
47 | args = ap.parse_args() | |
48 | ||
49 | eventmap = collections.defaultdict(list) | |
50 | ||
01d402c5 ER |
51 | def get_cpustr(): |
52 | cpuinfo = os.getenv("CPUINFO") | |
53 | if cpuinfo is None: | |
54 | cpuinfo = '/proc/cpuinfo' | |
55 | f = open(cpuinfo, 'r') | |
56 | cpu = [None, None, None, None] | |
57 | for j in f: | |
58 | n = j.split() | |
59 | if n[0] == 'vendor_id': | |
60 | cpu[0] = n[2] | |
61 | elif n[0] == 'model' and n[1] == ':': | |
62 | cpu[2] = int(n[2]) | |
63 | elif n[0] == 'cpu' and n[1] == 'family': | |
64 | cpu[1] = int(n[3]) | |
65 | elif n[0] == 'stepping' and n[1] == ':': | |
66 | cpu[3] = int(n[2]) | |
67 | if all(v is not None for v in cpu): | |
68 | break | |
69 | # stepping for SKX only | |
70 | stepping = cpu[0] == "GenuineIntel" and cpu[1] == 6 and cpu[2] == 0x55 | |
71 | if stepping: | |
72 | return "%s-%d-%X-%X" % tuple(cpu) | |
73 | return "%s-%d-%X" % tuple(cpu)[:3] | |
7b72d14a AK |
74 | |
75 | def find_event(eventurl, model): | |
15289576 XR |
76 | print("Downloading", eventurl, file = sys.stderr) |
77 | u = urllib.request.urlopen(eventurl) | |
950fa855 | 78 | events = json.loads(u.read())["Events"] |
7b72d14a AK |
79 | u.close() |
80 | ||
81 | found = 0 | |
82 | for j in events: | |
15289576 XR |
83 | if j['EventName'] in target_events: |
84 | event = "cpu/event=%s,umask=%s/" % (j['EventCode'], j['UMask']) | |
85 | if 'PEBS' in j and int(j['PEBS']) > 0: | |
7b72d14a AK |
86 | event += "p" |
87 | if args.script: | |
88 | eventmap[event].append(model) | |
89 | else: | |
15289576 | 90 | print(j['EventName'], "event for model", model, "is", event) |
7b72d14a AK |
91 | found += 1 |
92 | return found | |
93 | ||
94 | if not args.all: | |
15289576 | 95 | cpu = get_cpustr() |
7b72d14a AK |
96 | if not cpu: |
97 | sys.exit("Unknown CPU type") | |
98 | ||
99 | url = baseurl + "/mapfile.csv" | |
15289576 XR |
100 | print("Downloading", url, file = sys.stderr) |
101 | u = urllib.request.urlopen(url) | |
7b72d14a AK |
102 | found = 0 |
103 | cpufound = 0 | |
104 | for j in u: | |
15289576 | 105 | n = j.rstrip().decode().split(',') |
950fa855 | 106 | if len(n) >= 4 and (args.all or fnmatch.fnmatch(cpu, n[0])) and n[3] == "core": |
15289576 XR |
107 | components = n[0].split("-") |
108 | model = components[2] | |
109 | model = int(model, 16) | |
7b72d14a AK |
110 | cpufound += 1 |
111 | found += find_event(baseurl + n[2], model) | |
112 | u.close() | |
113 | ||
114 | if args.script: | |
15289576 | 115 | print('''#!/bin/sh |
7b72d14a AK |
116 | # Profile workload for gcc profile feedback (autofdo) using Linux perf. |
117 | # Auto generated. To regenerate for new CPUs run | |
9f06c15a | 118 | # contrib/gen_autofdo_event.py --script --all in gcc source |
7b72d14a AK |
119 | |
120 | # usages: | |
121 | # gcc-auto-profile program (profile program and children) | |
122 | # gcc-auto-profile -a sleep X (profile all for X secs, may need root) | |
123 | # gcc-auto-profile -p PID sleep X (profile PID) | |
124 | # gcc-auto-profile --kernel -a sleep X (profile kernel) | |
125 | # gcc-auto-profile --all -a sleep X (profile kernel and user space) | |
126 | ||
127 | # Identify branches taken event for CPU. | |
128 | # | |
129 | ||
130 | FLAGS=u | |
131 | ||
132 | if [ "$1" = "--kernel" ] ; then | |
133 | FLAGS=k | |
134 | shift | |
135 | fi | |
136 | if [ "$1" = "--all" ] ; then | |
137 | FLAGS=uk | |
138 | shift | |
139 | fi | |
140 | ||
141 | if ! grep -q Intel /proc/cpuinfo ; then | |
142 | echo >&2 "Only Intel CPUs supported" | |
143 | exit 1 | |
144 | fi | |
145 | ||
146 | if grep -q hypervisor /proc/cpuinfo ; then | |
147 | echo >&2 "Warning: branch profiling may not be functional in VMs" | |
148 | fi | |
149 | ||
15289576 XR |
150 | case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo && |
151 | grep -E "^model\s*:" /proc/cpuinfo | head -n1` in''') | |
152 | for event, mod in eventmap.items(): | |
7b72d14a | 153 | for m in mod[:-1]: |
15289576 XR |
154 | print("model*:\ %s|\\" % m) |
155 | print('model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event)) | |
156 | print('''*) | |
7b72d14a | 157 | echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script." |
15289576 XR |
158 | exit 1 ;;''') |
159 | print("esac") | |
160 | print("set -x") | |
161 | print('if ! perf record -e $E -b "$@" ; then') | |
162 | print(' # PEBS may not actually be working even if the processor supports it') | |
163 | print(' # (e.g., in a virtual machine). Trying to run without /p.') | |
164 | print(' set +x') | |
165 | print(' echo >&2 "Retrying without /p."') | |
166 | print(' E="$(echo "${E}" | sed -e \'s/\/p/\//\')"') | |
167 | print(' set -x') | |
168 | print(' exec perf record -e $E -b "$@"') | |
169 | print(' set +x') | |
170 | print('fi') | |
7b72d14a AK |
171 | |
172 | if cpufound == 0 and not args.all: | |
173 | sys.exit('CPU %s not found' % cpu) | |
174 | ||
175 | if found == 0: | |
176 | sys.exit('Branch event not found') |