]>
Commit | Line | Data |
---|---|---|
dee5ea7a KS |
1 | //===-- sanitizer_procmaps_mac.cc -----------------------------------------===// |
2 | // | |
3 | // This file is distributed under the University of Illinois Open Source | |
4 | // License. See LICENSE.TXT for details. | |
5 | // | |
6 | //===----------------------------------------------------------------------===// | |
7 | // | |
8 | // Information about the process mappings (Mac-specific parts). | |
9 | //===----------------------------------------------------------------------===// | |
10 | ||
11 | #include "sanitizer_platform.h" | |
12 | #if SANITIZER_MAC | |
13 | #include "sanitizer_common.h" | |
14 | #include "sanitizer_placement_new.h" | |
15 | #include "sanitizer_procmaps.h" | |
16 | ||
17 | #include <mach-o/dyld.h> | |
18 | #include <mach-o/loader.h> | |
5d3805fc JJ |
19 | #include <mach/mach.h> |
20 | ||
21 | // These are not available in older macOS SDKs. | |
22 | #ifndef CPU_SUBTYPE_X86_64_H | |
23 | #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ | |
24 | #endif | |
25 | #ifndef CPU_SUBTYPE_ARM_V7S | |
26 | #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ | |
27 | #endif | |
28 | #ifndef CPU_SUBTYPE_ARM_V7K | |
29 | #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) | |
30 | #endif | |
31 | #ifndef CPU_TYPE_ARM64 | |
32 | #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) | |
33 | #endif | |
dee5ea7a KS |
34 | |
35 | namespace __sanitizer { | |
36 | ||
5d3805fc JJ |
37 | // Contains information used to iterate through sections. |
38 | struct MemoryMappedSegmentData { | |
39 | char name[kMaxSegName]; | |
40 | uptr nsects; | |
41 | char *current_load_cmd_addr; | |
42 | u32 lc_type; | |
43 | uptr base_virt_addr; | |
44 | uptr addr_mask; | |
45 | }; | |
46 | ||
47 | template <typename Section> | |
48 | static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, | |
49 | bool isWritable) { | |
50 | const Section *sc = (const Section *)data->current_load_cmd_addr; | |
51 | data->current_load_cmd_addr += sizeof(Section); | |
52 | ||
53 | uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; | |
54 | uptr sec_end = sec_start + sc->size; | |
55 | module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, | |
56 | sc->sectname); | |
57 | } | |
58 | ||
59 | void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { | |
60 | // Don't iterate over sections when the caller hasn't set up the | |
61 | // data pointer, when there are no sections, or when the segment | |
62 | // is executable. Avoid iterating over executable sections because | |
63 | // it will confuse libignore, and because the extra granularity | |
64 | // of information is not needed by any sanitizers. | |
65 | if (!data_ || !data_->nsects || IsExecutable()) { | |
66 | module->addAddressRange(start, end, IsExecutable(), IsWritable(), | |
67 | data_ ? data_->name : nullptr); | |
68 | return; | |
69 | } | |
70 | ||
71 | do { | |
72 | if (data_->lc_type == LC_SEGMENT) { | |
73 | NextSectionLoad<struct section>(module, data_, IsWritable()); | |
74 | #ifdef MH_MAGIC_64 | |
75 | } else if (data_->lc_type == LC_SEGMENT_64) { | |
76 | NextSectionLoad<struct section_64>(module, data_, IsWritable()); | |
77 | #endif | |
78 | } | |
79 | } while (--data_->nsects); | |
80 | } | |
81 | ||
dee5ea7a KS |
82 | MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { |
83 | Reset(); | |
84 | } | |
85 | ||
86 | MemoryMappingLayout::~MemoryMappingLayout() { | |
87 | } | |
88 | ||
89 | // More information about Mach-O headers can be found in mach-o/loader.h | |
90 | // Each Mach-O image has a header (mach_header or mach_header_64) starting with | |
91 | // a magic number, and a list of linker load commands directly following the | |
92 | // header. | |
93 | // A load command is at least two 32-bit words: the command type and the | |
94 | // command size in bytes. We're interested only in segment load commands | |
95 | // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped | |
96 | // into the task's address space. | |
97 | // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or | |
98 | // segment_command_64 correspond to the memory address, memory size and the | |
99 | // file offset of the current memory segment. | |
100 | // Because these fields are taken from the images as is, one needs to add | |
101 | // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. | |
102 | ||
103 | void MemoryMappingLayout::Reset() { | |
104 | // Count down from the top. | |
105 | // TODO(glider): as per man 3 dyld, iterating over the headers with | |
106 | // _dyld_image_count is thread-unsafe. We need to register callbacks for | |
107 | // adding and removing images which will invalidate the MemoryMappingLayout | |
108 | // state. | |
5d3805fc JJ |
109 | data_.current_image = _dyld_image_count(); |
110 | data_.current_load_cmd_count = -1; | |
111 | data_.current_load_cmd_addr = 0; | |
112 | data_.current_magic = 0; | |
113 | data_.current_filetype = 0; | |
114 | data_.current_arch = kModuleArchUnknown; | |
115 | internal_memset(data_.current_uuid, 0, kModuleUUIDSize); | |
dee5ea7a KS |
116 | } |
117 | ||
5d3805fc JJ |
118 | // The dyld load address should be unchanged throughout process execution, |
119 | // and it is expensive to compute once many libraries have been loaded, | |
120 | // so cache it here and do not reset. | |
121 | static mach_header *dyld_hdr = 0; | |
122 | static const char kDyldPath[] = "/usr/lib/dyld"; | |
123 | static const int kDyldImageIdx = -1; | |
124 | ||
dee5ea7a KS |
125 | // static |
126 | void MemoryMappingLayout::CacheMemoryMappings() { | |
127 | // No-op on Mac for now. | |
128 | } | |
129 | ||
130 | void MemoryMappingLayout::LoadFromCache() { | |
131 | // No-op on Mac for now. | |
132 | } | |
133 | ||
5d3805fc JJ |
134 | // _dyld_get_image_header() and related APIs don't report dyld itself. |
135 | // We work around this by manually recursing through the memory map | |
136 | // until we hit a Mach header matching dyld instead. These recurse | |
137 | // calls are expensive, but the first memory map generation occurs | |
138 | // early in the process, when dyld is one of the only images loaded, | |
139 | // so it will be hit after only a few iterations. | |
140 | static mach_header *get_dyld_image_header() { | |
141 | mach_port_name_t port; | |
142 | if (task_for_pid(mach_task_self(), internal_getpid(), &port) != | |
143 | KERN_SUCCESS) { | |
144 | return nullptr; | |
145 | } | |
146 | ||
147 | unsigned depth = 1; | |
148 | vm_size_t size = 0; | |
149 | vm_address_t address = 0; | |
150 | kern_return_t err = KERN_SUCCESS; | |
151 | mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; | |
152 | ||
153 | while (true) { | |
154 | struct vm_region_submap_info_64 info; | |
155 | err = vm_region_recurse_64(port, &address, &size, &depth, | |
156 | (vm_region_info_t)&info, &count); | |
157 | if (err != KERN_SUCCESS) return nullptr; | |
158 | ||
159 | if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { | |
160 | mach_header *hdr = (mach_header *)address; | |
161 | if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && | |
162 | hdr->filetype == MH_DYLINKER) { | |
163 | return hdr; | |
164 | } | |
165 | } | |
166 | address += size; | |
167 | } | |
168 | } | |
169 | ||
170 | const mach_header *get_dyld_hdr() { | |
171 | if (!dyld_hdr) dyld_hdr = get_dyld_image_header(); | |
172 | ||
173 | return dyld_hdr; | |
174 | } | |
175 | ||
dee5ea7a | 176 | // Next and NextSegmentLoad were inspired by base/sysinfo.cc in |
10189819 | 177 | // Google Perftools, https://github.com/gperftools/gperftools. |
dee5ea7a KS |
178 | |
179 | // NextSegmentLoad scans the current image for the next segment load command | |
180 | // and returns the start and end addresses and file offset of the corresponding | |
181 | // segment. | |
182 | // Note that the segment addresses are not necessarily sorted. | |
5d3805fc JJ |
183 | template <u32 kLCSegment, typename SegmentCommand> |
184 | static bool NextSegmentLoad(MemoryMappedSegment *segment, | |
185 | MemoryMappedSegmentData *seg_data, MemoryMappingLayoutData &layout_data) { | |
186 | const char *lc = layout_data.current_load_cmd_addr; | |
187 | layout_data.current_load_cmd_addr += ((const load_command *)lc)->cmdsize; | |
dee5ea7a | 188 | if (((const load_command *)lc)->cmd == kLCSegment) { |
dee5ea7a | 189 | const SegmentCommand* sc = (const SegmentCommand *)lc; |
5d3805fc JJ |
190 | uptr base_virt_addr, addr_mask; |
191 | if (layout_data.current_image == kDyldImageIdx) { | |
192 | base_virt_addr = (uptr)get_dyld_hdr(); | |
193 | // vmaddr is masked with 0xfffff because on macOS versions < 10.12, | |
194 | // it contains an absolute address rather than an offset for dyld. | |
195 | // To make matters even more complicated, this absolute address | |
196 | // isn't actually the absolute segment address, but the offset portion | |
197 | // of the address is accurate when combined with the dyld base address, | |
198 | // and the mask will give just this offset. | |
199 | addr_mask = 0xfffff; | |
200 | } else { | |
201 | base_virt_addr = | |
202 | (uptr)_dyld_get_image_vmaddr_slide(layout_data.current_image); | |
203 | addr_mask = ~0; | |
866e32ad | 204 | } |
5d3805fc JJ |
205 | |
206 | segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; | |
207 | segment->end = segment->start + sc->vmsize; | |
208 | // Most callers don't need section information, so only fill this struct | |
209 | // when required. | |
210 | if (seg_data) { | |
211 | seg_data->nsects = sc->nsects; | |
212 | seg_data->current_load_cmd_addr = | |
213 | (char *)lc + sizeof(SegmentCommand); | |
214 | seg_data->lc_type = kLCSegment; | |
215 | seg_data->base_virt_addr = base_virt_addr; | |
216 | seg_data->addr_mask = addr_mask; | |
217 | internal_strncpy(seg_data->name, sc->segname, | |
218 | ARRAY_SIZE(seg_data->name)); | |
dee5ea7a | 219 | } |
5d3805fc JJ |
220 | |
221 | // Return the initial protection. | |
222 | segment->protection = sc->initprot; | |
223 | segment->offset = (layout_data.current_filetype == | |
224 | /*MH_EXECUTE*/ 0x2) | |
225 | ? sc->vmaddr | |
226 | : sc->fileoff; | |
227 | if (segment->filename) { | |
228 | const char *src = (layout_data.current_image == kDyldImageIdx) | |
229 | ? kDyldPath | |
230 | : _dyld_get_image_name(layout_data.current_image); | |
231 | internal_strncpy(segment->filename, src, segment->filename_size); | |
dee5ea7a | 232 | } |
5d3805fc JJ |
233 | segment->arch = layout_data.current_arch; |
234 | internal_memcpy(segment->uuid, layout_data.current_uuid, kModuleUUIDSize); | |
dee5ea7a KS |
235 | return true; |
236 | } | |
237 | return false; | |
238 | } | |
239 | ||
5d3805fc JJ |
240 | ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { |
241 | cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; | |
242 | switch (cputype) { | |
243 | case CPU_TYPE_I386: | |
244 | return kModuleArchI386; | |
245 | case CPU_TYPE_X86_64: | |
246 | if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; | |
247 | if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; | |
248 | CHECK(0 && "Invalid subtype of x86_64"); | |
249 | return kModuleArchUnknown; | |
250 | case CPU_TYPE_ARM: | |
251 | if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; | |
252 | if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; | |
253 | if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; | |
254 | if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; | |
255 | CHECK(0 && "Invalid subtype of ARM"); | |
256 | return kModuleArchUnknown; | |
257 | case CPU_TYPE_ARM64: | |
258 | return kModuleArchARM64; | |
259 | default: | |
260 | CHECK(0 && "Invalid CPU type"); | |
261 | return kModuleArchUnknown; | |
262 | } | |
263 | } | |
264 | ||
265 | static const load_command *NextCommand(const load_command *lc) { | |
266 | return (const load_command *)((char *)lc + lc->cmdsize); | |
267 | } | |
268 | ||
269 | static void FindUUID(const load_command *first_lc, u8 *uuid_output) { | |
270 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
271 | if (lc->cmd != LC_UUID) continue; | |
272 | ||
273 | const uuid_command *uuid_lc = (const uuid_command *)lc; | |
274 | const uint8_t *uuid = &uuid_lc->uuid[0]; | |
275 | internal_memcpy(uuid_output, uuid, kModuleUUIDSize); | |
276 | return; | |
277 | } | |
278 | } | |
279 | ||
280 | static bool IsModuleInstrumented(const load_command *first_lc) { | |
281 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
282 | if (lc->cmd != LC_LOAD_DYLIB) continue; | |
283 | ||
284 | const dylib_command *dylib_lc = (const dylib_command *)lc; | |
285 | uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; | |
286 | const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; | |
287 | dylib_name = StripModuleName(dylib_name); | |
288 | if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) { | |
289 | return true; | |
290 | } | |
291 | } | |
292 | return false; | |
293 | } | |
294 | ||
295 | bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { | |
296 | for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { | |
297 | const mach_header *hdr = (data_.current_image == kDyldImageIdx) | |
298 | ? get_dyld_hdr() | |
299 | : _dyld_get_image_header(data_.current_image); | |
dee5ea7a | 300 | if (!hdr) continue; |
5d3805fc | 301 | if (data_.current_load_cmd_count < 0) { |
dee5ea7a | 302 | // Set up for this image; |
5d3805fc JJ |
303 | data_.current_load_cmd_count = hdr->ncmds; |
304 | data_.current_magic = hdr->magic; | |
305 | data_.current_filetype = hdr->filetype; | |
306 | data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); | |
307 | switch (data_.current_magic) { | |
dee5ea7a KS |
308 | #ifdef MH_MAGIC_64 |
309 | case MH_MAGIC_64: { | |
5d3805fc | 310 | data_.current_load_cmd_addr = (char *)hdr + sizeof(mach_header_64); |
dee5ea7a KS |
311 | break; |
312 | } | |
313 | #endif | |
314 | case MH_MAGIC: { | |
5d3805fc | 315 | data_.current_load_cmd_addr = (char *)hdr + sizeof(mach_header); |
dee5ea7a KS |
316 | break; |
317 | } | |
318 | default: { | |
319 | continue; | |
320 | } | |
321 | } | |
5d3805fc JJ |
322 | FindUUID((const load_command *)data_.current_load_cmd_addr, |
323 | data_.current_uuid); | |
324 | data_.current_instrumented = IsModuleInstrumented( | |
325 | (const load_command *)data_.current_load_cmd_addr); | |
dee5ea7a KS |
326 | } |
327 | ||
5d3805fc JJ |
328 | for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) { |
329 | switch (data_.current_magic) { | |
330 | // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. | |
dee5ea7a KS |
331 | #ifdef MH_MAGIC_64 |
332 | case MH_MAGIC_64: { | |
333 | if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( | |
5d3805fc | 334 | segment, segment->data_, data_)) |
dee5ea7a KS |
335 | return true; |
336 | break; | |
337 | } | |
338 | #endif | |
339 | case MH_MAGIC: { | |
340 | if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( | |
5d3805fc | 341 | segment, segment->data_, data_)) |
dee5ea7a KS |
342 | return true; |
343 | break; | |
344 | } | |
345 | } | |
346 | } | |
347 | // If we get here, no more load_cmd's in this image talk about | |
348 | // segments. Go on to the next image. | |
349 | } | |
350 | return false; | |
351 | } | |
352 | ||
10189819 | 353 | void MemoryMappingLayout::DumpListOfModules( |
5d3805fc | 354 | InternalMmapVectorNoCtor<LoadedModule> *modules) { |
dee5ea7a | 355 | Reset(); |
696d846a | 356 | InternalScopedString module_name(kMaxPathLength); |
5d3805fc JJ |
357 | MemoryMappedSegment segment(module_name.data(), kMaxPathLength); |
358 | MemoryMappedSegmentData data; | |
359 | segment.data_ = &data; | |
360 | while (Next(&segment)) { | |
361 | if (segment.filename[0] == '\0') continue; | |
696d846a | 362 | LoadedModule *cur_module = nullptr; |
10189819 | 363 | if (!modules->empty() && |
5d3805fc | 364 | 0 == internal_strcmp(segment.filename, modules->back().full_name())) { |
10189819 | 365 | cur_module = &modules->back(); |
dee5ea7a | 366 | } else { |
10189819 MO |
367 | modules->push_back(LoadedModule()); |
368 | cur_module = &modules->back(); | |
5d3805fc JJ |
369 | cur_module->set(segment.filename, segment.start, segment.arch, |
370 | segment.uuid, data_.current_instrumented); | |
dee5ea7a | 371 | } |
5d3805fc | 372 | segment.AddAddressRanges(cur_module); |
dee5ea7a | 373 | } |
dee5ea7a KS |
374 | } |
375 | ||
376 | } // namespace __sanitizer | |
377 | ||
378 | #endif // SANITIZER_MAC |