]>
Commit | Line | Data |
---|---|---|
3ca235ed JM |
1 | #!/usr/bin/python |
2 | # Check that use of symbols declared in a given header does not result | |
3 | # in any symbols being brought in that are not reserved with external | |
4 | # linkage for the given standard. | |
04277e02 | 5 | # Copyright (C) 2014-2019 Free Software Foundation, Inc. |
3ca235ed JM |
6 | # This file is part of the GNU C Library. |
7 | # | |
8 | # The GNU C Library is free software; you can redistribute it and/or | |
9 | # modify it under the terms of the GNU Lesser General Public | |
10 | # License as published by the Free Software Foundation; either | |
11 | # version 2.1 of the License, or (at your option) any later version. | |
12 | # | |
13 | # The GNU C Library is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | # Lesser General Public License for more details. | |
17 | # | |
18 | # You should have received a copy of the GNU Lesser General Public | |
19 | # License along with the GNU C Library; if not, see | |
20 | # <http://www.gnu.org/licenses/>. | |
21 | ||
22 | import argparse | |
23 | from collections import defaultdict | |
24 | import os.path | |
25 | import re | |
26 | import subprocess | |
27 | import sys | |
28 | import tempfile | |
29 | ||
30 | import glibcconform | |
31 | ||
32 | # The following whitelisted symbols are also allowed for now. | |
33 | # | |
34 | # * Bug 17576: stdin, stdout, stderr only reserved with external | |
35 | # linkage when stdio.h included (and possibly not then), not | |
36 | # generally. | |
37 | # | |
38 | # * Bug 18442: re_syntax_options wrongly brought in by regcomp and | |
39 | # used by re_comp. | |
40 | # | |
41 | WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'} | |
42 | ||
43 | ||
44 | def list_syms(filename): | |
45 | """Return information about GLOBAL and WEAK symbols listed in readelf | |
46 | -s output.""" | |
47 | ret = [] | |
48 | cur_file = filename | |
49 | with open(filename, 'r') as syms_file: | |
50 | for line in syms_file: | |
51 | line = line.rstrip() | |
52 | if line.startswith('File: '): | |
53 | cur_file = line[len('File: '):] | |
54 | cur_file = cur_file.split('/')[-1] | |
55 | continue | |
56 | # Architecture-specific st_other bits appear inside [] and | |
57 | # disrupt the format of readelf output. | |
58 | line = re.sub(r'\[.*?\]', '', line) | |
59 | fields = line.split() | |
60 | if len(fields) < 8: | |
61 | continue | |
62 | bind = fields[4] | |
63 | ndx = fields[6] | |
64 | sym = fields[7] | |
65 | if bind not in ('GLOBAL', 'WEAK'): | |
66 | continue | |
67 | if not re.fullmatch('[A-Za-z0-9_]+', sym): | |
68 | continue | |
69 | ret.append((cur_file, sym, bind, ndx != 'UND')) | |
70 | return ret | |
71 | ||
72 | ||
73 | def main(): | |
74 | """The main entry point.""" | |
75 | parser = argparse.ArgumentParser(description='Check link-time namespace.') | |
76 | parser.add_argument('--header', metavar='HEADER', | |
77 | help='name of header') | |
78 | parser.add_argument('--standard', metavar='STD', | |
79 | help='standard to use when processing header') | |
80 | parser.add_argument('--cc', metavar='CC', | |
81 | help='C compiler to use') | |
82 | parser.add_argument('--flags', metavar='CFLAGS', | |
83 | help='Compiler flags to use with CC') | |
84 | parser.add_argument('--stdsyms', metavar='FILE', | |
85 | help='File with list of standard symbols') | |
86 | parser.add_argument('--libsyms', metavar='FILE', | |
87 | help='File with symbol information from libraries') | |
88 | parser.add_argument('--readelf', metavar='READELF', | |
89 | help='readelf program to use') | |
90 | args = parser.parse_args() | |
91 | ||
92 | # Load the list of symbols that are OK. | |
93 | stdsyms = set() | |
94 | with open(args.stdsyms, 'r') as stdsyms_file: | |
95 | for line in stdsyms_file: | |
96 | stdsyms.add(line.rstrip()) | |
97 | stdsyms |= WHITELIST | |
98 | ||
99 | # Load information about GLOBAL and WEAK symbols defined or used | |
100 | # in the standard libraries. | |
101 | # Symbols from a given object, except for weak defined symbols. | |
102 | seen_syms = defaultdict(list) | |
103 | # Strong undefined symbols from a given object. | |
104 | strong_undef_syms = defaultdict(list) | |
105 | # Objects defining a given symbol (strongly or weakly). | |
106 | sym_objs = defaultdict(list) | |
107 | for file, name, bind, defined in list_syms(args.libsyms): | |
108 | if defined: | |
109 | sym_objs[name].append(file) | |
110 | if bind == 'GLOBAL' or not defined: | |
111 | seen_syms[file].append(name) | |
112 | if bind == 'GLOBAL' and not defined: | |
113 | strong_undef_syms[file].append(name) | |
114 | ||
115 | # Determine what ELF-level symbols are brought in by use of C-level | |
116 | # symbols declared in the given header. | |
117 | # | |
118 | # The rules followed are heuristic and so may produce false | |
119 | # positives and false negatives. | |
120 | # | |
121 | # * All undefined symbols are considered of signficance, but it is | |
122 | # possible that (a) any standard library definition is weak, so | |
123 | # can be overridden by the user's definition, and (b) the symbol | |
124 | # is only used conditionally and not if the program is limited to | |
125 | # standard functionality. | |
126 | # | |
127 | # * If a symbol reference is only brought in by the user using a | |
128 | # data symbol rather than a function from the standard library, | |
129 | # this will not be detected. | |
130 | # | |
131 | # * If a symbol reference is only brought in by crt*.o or libgcc, | |
132 | # this will not be detected. | |
133 | # | |
134 | # * If a symbol reference is only brought in through __builtin_foo | |
135 | # in a standard macro being compiled to call foo, this will not be | |
136 | # detected. | |
137 | # | |
138 | # * Header inclusions should be compiled several times with | |
139 | # different options such as -O2, -D_FORTIFY_SOURCE and | |
140 | # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined | |
141 | # from such a compilation; this is not yet implemented. | |
142 | # | |
143 | # * This script finds symbols referenced through use of macros on | |
144 | # the basis that if a macro calls an internal function, that | |
145 | # function must also be declared in the header. However, the | |
146 | # header might also declare implementation-namespace functions | |
147 | # that are not called by any standard macro in the header, | |
148 | # resulting in false positives for any symbols brought in only | |
149 | # through use of those implementation-namespace functions. | |
150 | # | |
151 | # * Namespace issues can apply for dynamic linking as well as | |
152 | # static linking, when a call is from one shared library to | |
153 | # another or uses a PLT entry for a call within a shared library; | |
154 | # such issues are only detected by this script if the same | |
155 | # namespace issue applies for static linking. | |
156 | seen_where = {} | |
157 | files_seen = set() | |
158 | all_undef = {} | |
159 | current_undef = {} | |
160 | compiler = '%s %s' % (args.cc, args.flags) | |
161 | c_syms = glibcconform.list_exported_functions(compiler, args.standard, | |
162 | args.header) | |
163 | with tempfile.TemporaryDirectory() as temp_dir: | |
164 | cincfile_name = os.path.join(temp_dir, 'undef.c') | |
165 | cincfile_o_name = os.path.join(temp_dir, 'undef.o') | |
166 | cincfile_sym_name = os.path.join(temp_dir, 'undef.sym') | |
167 | cincfile_text = ('#include <%s>\n%s\n' | |
168 | % (args.header, | |
169 | '\n'.join('void *__glibc_test_%s = (void *) &%s;' | |
170 | % (sym, sym) for sym in sorted(c_syms)))) | |
171 | with open(cincfile_name, 'w') as cincfile: | |
172 | cincfile.write(cincfile_text) | |
173 | cmd = ('%s %s -D_ISOMAC %s -c %s -o %s' | |
174 | % (args.cc, args.flags, glibcconform.CFLAGS[args.standard], | |
175 | cincfile_name, cincfile_o_name)) | |
176 | subprocess.check_call(cmd, shell=True) | |
177 | cmd = ('LC_ALL=C %s -W -s %s > %s' | |
178 | % (args.readelf, cincfile_o_name, cincfile_sym_name)) | |
179 | subprocess.check_call(cmd, shell=True) | |
180 | for file, name, bind, defined in list_syms(cincfile_sym_name): | |
181 | if bind == 'GLOBAL' and not defined: | |
182 | sym_text = '[initial] %s' % name | |
183 | seen_where[name] = sym_text | |
184 | all_undef[name] = sym_text | |
185 | current_undef[name] = sym_text | |
186 | ||
187 | while current_undef: | |
188 | new_undef = {} | |
189 | for sym, cu_sym in sorted(current_undef.items()): | |
190 | for file in sym_objs[sym]: | |
191 | if file in files_seen: | |
192 | continue | |
193 | files_seen.add(file) | |
194 | for ssym in seen_syms[file]: | |
195 | if ssym not in seen_where: | |
196 | seen_where[ssym] = ('%s -> [%s] %s' | |
197 | % (cu_sym, file, ssym)) | |
198 | for usym in strong_undef_syms[file]: | |
199 | if usym not in all_undef: | |
200 | usym_text = '%s -> [%s] %s' % (cu_sym, file, usym) | |
201 | all_undef[usym] = usym_text | |
202 | new_undef[usym] = usym_text | |
203 | current_undef = new_undef | |
204 | ||
205 | ret = 0 | |
206 | for sym in sorted(seen_where): | |
207 | if sym.startswith('_'): | |
208 | continue | |
209 | if sym in stdsyms: | |
210 | continue | |
211 | print(seen_where[sym]) | |
212 | ret = 1 | |
213 | sys.exit(ret) | |
214 | ||
215 | ||
216 | if __name__ == '__main__': | |
217 | main() |