]>
Commit | Line | Data |
---|---|---|
f35db108 WM |
1 | //===-- sanitizer_symbolizer.cc -------------------------------------------===// |
2 | // | |
3 | // This file is distributed under the University of Illinois Open Source | |
4 | // License. See LICENSE.TXT for details. | |
5 | // | |
6 | //===----------------------------------------------------------------------===// | |
7 | // | |
8 | // This file is shared between AddressSanitizer and ThreadSanitizer | |
9 | // run-time libraries. See sanitizer_symbolizer.h for details. | |
10 | //===----------------------------------------------------------------------===// | |
11 | ||
12 | #include "sanitizer_common.h" | |
13 | #include "sanitizer_placement_new.h" | |
14 | #include "sanitizer_procmaps.h" | |
15 | #include "sanitizer_symbolizer.h" | |
16 | ||
17 | namespace __sanitizer { | |
18 | ||
19 | void AddressInfo::Clear() { | |
20 | InternalFree(module); | |
21 | InternalFree(function); | |
22 | InternalFree(file); | |
23 | internal_memset(this, 0, sizeof(AddressInfo)); | |
24 | } | |
25 | ||
26 | LoadedModule::LoadedModule(const char *module_name, uptr base_address) { | |
27 | full_name_ = internal_strdup(module_name); | |
28 | base_address_ = base_address; | |
29 | n_ranges_ = 0; | |
30 | } | |
31 | ||
32 | void LoadedModule::addAddressRange(uptr beg, uptr end) { | |
33 | CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); | |
34 | ranges_[n_ranges_].beg = beg; | |
35 | ranges_[n_ranges_].end = end; | |
36 | n_ranges_++; | |
37 | } | |
38 | ||
39 | bool LoadedModule::containsAddress(uptr address) const { | |
40 | for (uptr i = 0; i < n_ranges_; i++) { | |
41 | if (ranges_[i].beg <= address && address < ranges_[i].end) | |
42 | return true; | |
43 | } | |
44 | return false; | |
45 | } | |
46 | ||
47 | // Extracts the prefix of "str" that consists of any characters not | |
48 | // present in "delims" string, and copies this prefix to "result", allocating | |
49 | // space for it. | |
50 | // Returns a pointer to "str" after skipping extracted prefix and first | |
51 | // delimiter char. | |
52 | static const char *ExtractToken(const char *str, const char *delims, | |
53 | char **result) { | |
54 | uptr prefix_len = internal_strcspn(str, delims); | |
55 | *result = (char*)InternalAlloc(prefix_len + 1); | |
56 | internal_memcpy(*result, str, prefix_len); | |
57 | (*result)[prefix_len] = '\0'; | |
58 | const char *prefix_end = str + prefix_len; | |
59 | if (*prefix_end != '\0') prefix_end++; | |
60 | return prefix_end; | |
61 | } | |
62 | ||
63 | // Same as ExtractToken, but converts extracted token to integer. | |
64 | static const char *ExtractInt(const char *str, const char *delims, | |
65 | int *result) { | |
66 | char *buff; | |
67 | const char *ret = ExtractToken(str, delims, &buff); | |
68 | if (buff != 0) { | |
69 | *result = internal_atoll(buff); | |
70 | } | |
71 | InternalFree(buff); | |
72 | return ret; | |
73 | } | |
74 | ||
75 | // ExternalSymbolizer encapsulates communication between the tool and | |
76 | // external symbolizer program, running in a different subprocess, | |
77 | // For now we assume the following protocol: | |
78 | // For each request of the form | |
79 | // <module_name> <module_offset> | |
80 | // passed to STDIN, external symbolizer prints to STDOUT response: | |
81 | // <function_name> | |
82 | // <file_name>:<line_number>:<column_number> | |
83 | // <function_name> | |
84 | // <file_name>:<line_number>:<column_number> | |
85 | // ... | |
86 | // <empty line> | |
87 | class ExternalSymbolizer { | |
88 | public: | |
89 | ExternalSymbolizer(const char *path, int input_fd, int output_fd) | |
90 | : path_(path), | |
91 | input_fd_(input_fd), | |
92 | output_fd_(output_fd), | |
93 | times_restarted_(0) { | |
94 | CHECK(path_); | |
95 | CHECK_NE(input_fd_, kInvalidFd); | |
96 | CHECK_NE(output_fd_, kInvalidFd); | |
97 | } | |
98 | ||
99 | // Returns the number of frames for a given address, or zero if | |
100 | // symbolization failed. | |
101 | uptr SymbolizeCode(uptr addr, const char *module_name, uptr module_offset, | |
102 | AddressInfo *frames, uptr max_frames) { | |
103 | CHECK(module_name); | |
104 | // FIXME: Make sure this buffer always has sufficient size to hold | |
105 | // large debug info. | |
106 | static const int kMaxBufferSize = 4096; | |
107 | InternalScopedBuffer<char> buffer(kMaxBufferSize); | |
108 | char *buffer_data = buffer.data(); | |
109 | internal_snprintf(buffer_data, kMaxBufferSize, "%s 0x%zx\n", | |
110 | module_name, module_offset); | |
111 | if (!writeToSymbolizer(buffer_data, internal_strlen(buffer_data))) | |
112 | return 0; | |
113 | ||
114 | if (!readFromSymbolizer(buffer_data, kMaxBufferSize)) | |
115 | return 0; | |
116 | const char *str = buffer_data; | |
117 | uptr frame_id; | |
118 | CHECK_GT(max_frames, 0); | |
119 | for (frame_id = 0; frame_id < max_frames; frame_id++) { | |
120 | AddressInfo *info = &frames[frame_id]; | |
121 | char *function_name = 0; | |
122 | str = ExtractToken(str, "\n", &function_name); | |
123 | CHECK(function_name); | |
124 | if (function_name[0] == '\0') { | |
125 | // There are no more frames. | |
126 | break; | |
127 | } | |
128 | info->Clear(); | |
129 | info->FillAddressAndModuleInfo(addr, module_name, module_offset); | |
130 | info->function = function_name; | |
131 | // Parse <file>:<line>:<column> buffer. | |
132 | char *file_line_info = 0; | |
133 | str = ExtractToken(str, "\n", &file_line_info); | |
134 | CHECK(file_line_info); | |
135 | const char *line_info = ExtractToken(file_line_info, ":", &info->file); | |
136 | line_info = ExtractInt(line_info, ":", &info->line); | |
137 | line_info = ExtractInt(line_info, "", &info->column); | |
138 | InternalFree(file_line_info); | |
139 | ||
140 | // Functions and filenames can be "??", in which case we write 0 | |
141 | // to address info to mark that names are unknown. | |
142 | if (0 == internal_strcmp(info->function, "??")) { | |
143 | InternalFree(info->function); | |
144 | info->function = 0; | |
145 | } | |
146 | if (0 == internal_strcmp(info->file, "??")) { | |
147 | InternalFree(info->file); | |
148 | info->file = 0; | |
149 | } | |
150 | } | |
151 | if (frame_id == 0) { | |
152 | // Make sure we return at least one frame. | |
153 | AddressInfo *info = &frames[0]; | |
154 | info->Clear(); | |
155 | info->FillAddressAndModuleInfo(addr, module_name, module_offset); | |
156 | frame_id = 1; | |
157 | } | |
158 | return frame_id; | |
159 | } | |
160 | ||
161 | bool Restart() { | |
162 | if (times_restarted_ >= kMaxTimesRestarted) return false; | |
163 | times_restarted_++; | |
164 | internal_close(input_fd_); | |
165 | internal_close(output_fd_); | |
166 | return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); | |
167 | } | |
168 | ||
169 | private: | |
170 | bool readFromSymbolizer(char *buffer, uptr max_length) { | |
171 | if (max_length == 0) | |
172 | return true; | |
173 | uptr read_len = 0; | |
174 | while (true) { | |
175 | uptr just_read = internal_read(input_fd_, buffer + read_len, | |
176 | max_length - read_len); | |
177 | // We can't read 0 bytes, as we don't expect external symbolizer to close | |
178 | // its stdout. | |
179 | if (just_read == 0 || just_read == (uptr)-1) { | |
180 | Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); | |
181 | return false; | |
182 | } | |
183 | read_len += just_read; | |
184 | // Empty line marks the end of symbolizer output. | |
185 | if (read_len >= 2 && buffer[read_len - 1] == '\n' && | |
186 | buffer[read_len - 2] == '\n') { | |
187 | break; | |
188 | } | |
189 | } | |
190 | return true; | |
191 | } | |
192 | bool writeToSymbolizer(const char *buffer, uptr length) { | |
193 | if (length == 0) | |
194 | return true; | |
195 | uptr write_len = internal_write(output_fd_, buffer, length); | |
196 | if (write_len == 0 || write_len == (uptr)-1) { | |
197 | Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); | |
198 | return false; | |
199 | } | |
200 | return true; | |
201 | } | |
202 | ||
203 | const char *path_; | |
204 | int input_fd_; | |
205 | int output_fd_; | |
206 | ||
207 | static const uptr kMaxTimesRestarted = 5; | |
208 | uptr times_restarted_; | |
209 | }; | |
210 | ||
211 | static LowLevelAllocator symbolizer_allocator; // Linker initialized. | |
212 | ||
213 | class Symbolizer { | |
214 | public: | |
215 | uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { | |
216 | if (max_frames == 0) | |
217 | return 0; | |
218 | LoadedModule *module = FindModuleForAddress(addr); | |
219 | if (module == 0) | |
220 | return 0; | |
221 | const char *module_name = module->full_name(); | |
222 | uptr module_offset = addr - module->base_address(); | |
223 | uptr actual_frames = 0; | |
224 | if (external_symbolizer_ == 0) { | |
225 | ReportExternalSymbolizerError( | |
226 | "WARNING: Trying to symbolize code, but external " | |
227 | "symbolizer is not initialized!\n"); | |
228 | } else { | |
229 | while (true) { | |
230 | actual_frames = external_symbolizer_->SymbolizeCode( | |
231 | addr, module_name, module_offset, frames, max_frames); | |
232 | if (actual_frames > 0) { | |
233 | // Symbolization was successful. | |
234 | break; | |
235 | } | |
236 | // Try to restart symbolizer subprocess. If we don't succeed, forget | |
237 | // about it and don't try to use it later. | |
238 | if (!external_symbolizer_->Restart()) { | |
239 | ReportExternalSymbolizerError( | |
240 | "WARNING: Failed to use and restart external symbolizer!\n"); | |
241 | external_symbolizer_ = 0; | |
242 | break; | |
243 | } | |
244 | } | |
245 | } | |
246 | if (external_symbolizer_ == 0) { | |
247 | // External symbolizer was not initialized or failed. Fill only data | |
248 | // about module name and offset. | |
249 | AddressInfo *info = &frames[0]; | |
250 | info->Clear(); | |
251 | info->FillAddressAndModuleInfo(addr, module_name, module_offset); | |
252 | return 1; | |
253 | } | |
254 | // Otherwise, the data was filled by external symbolizer. | |
255 | return actual_frames; | |
256 | } | |
a0408454 KS |
257 | |
258 | bool SymbolizeData(uptr addr, AddressInfo *frame) { | |
259 | LoadedModule *module = FindModuleForAddress(addr); | |
260 | if (module == 0) | |
261 | return false; | |
262 | const char *module_name = module->full_name(); | |
263 | uptr module_offset = addr - module->base_address(); | |
264 | frame->FillAddressAndModuleInfo(addr, module_name, module_offset); | |
265 | return true; | |
266 | } | |
267 | ||
f35db108 WM |
268 | bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { |
269 | int input_fd, output_fd; | |
270 | if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) | |
271 | return false; | |
272 | void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); | |
273 | external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, | |
274 | input_fd, output_fd); | |
275 | return true; | |
276 | } | |
277 | ||
278 | private: | |
279 | LoadedModule *FindModuleForAddress(uptr address) { | |
280 | if (modules_ == 0) { | |
281 | modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( | |
282 | kMaxNumberOfModuleContexts * sizeof(LoadedModule))); | |
283 | CHECK(modules_); | |
284 | n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts); | |
285 | CHECK_GT(n_modules_, 0); | |
286 | CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); | |
287 | } | |
288 | for (uptr i = 0; i < n_modules_; i++) { | |
289 | if (modules_[i].containsAddress(address)) { | |
290 | return &modules_[i]; | |
291 | } | |
292 | } | |
293 | return 0; | |
294 | } | |
295 | void ReportExternalSymbolizerError(const char *msg) { | |
296 | // Don't use atomics here for now, as SymbolizeCode can't be called | |
297 | // from multiple threads anyway. | |
298 | static bool reported; | |
299 | if (!reported) { | |
300 | Report(msg); | |
301 | reported = true; | |
302 | } | |
303 | } | |
304 | ||
e297eb60 KS |
305 | // 16K loaded modules should be enough for everyone. |
306 | static const uptr kMaxNumberOfModuleContexts = 1 << 14; | |
f35db108 WM |
307 | LoadedModule *modules_; // Array of module descriptions is leaked. |
308 | uptr n_modules_; | |
309 | ||
310 | ExternalSymbolizer *external_symbolizer_; // Leaked. | |
311 | }; | |
312 | ||
313 | static Symbolizer symbolizer; // Linker initialized. | |
314 | ||
315 | uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { | |
316 | return symbolizer.SymbolizeCode(address, frames, max_frames); | |
317 | } | |
318 | ||
a0408454 KS |
319 | bool SymbolizeData(uptr address, AddressInfo *frame) { |
320 | return symbolizer.SymbolizeData(address, frame); | |
321 | } | |
322 | ||
f35db108 WM |
323 | bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { |
324 | return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); | |
325 | } | |
326 | ||
327 | } // namespace __sanitizer |