]>
Commit | Line | Data |
---|---|---|
dee5ea7a KS |
1 | //===-- sanitizer_procmaps_mac.cc -----------------------------------------===// |
2 | // | |
3 | // This file is distributed under the University of Illinois Open Source | |
4 | // License. See LICENSE.TXT for details. | |
5 | // | |
6 | //===----------------------------------------------------------------------===// | |
7 | // | |
8 | // Information about the process mappings (Mac-specific parts). | |
9 | //===----------------------------------------------------------------------===// | |
10 | ||
11 | #include "sanitizer_platform.h" | |
12 | #if SANITIZER_MAC | |
13 | #include "sanitizer_common.h" | |
14 | #include "sanitizer_placement_new.h" | |
15 | #include "sanitizer_procmaps.h" | |
16 | ||
17 | #include <mach-o/dyld.h> | |
18 | #include <mach-o/loader.h> | |
5d3805fc JJ |
19 | #include <mach/mach.h> |
20 | ||
21 | // These are not available in older macOS SDKs. | |
22 | #ifndef CPU_SUBTYPE_X86_64_H | |
23 | #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ | |
24 | #endif | |
25 | #ifndef CPU_SUBTYPE_ARM_V7S | |
26 | #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ | |
27 | #endif | |
28 | #ifndef CPU_SUBTYPE_ARM_V7K | |
29 | #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) | |
30 | #endif | |
31 | #ifndef CPU_TYPE_ARM64 | |
32 | #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) | |
33 | #endif | |
dee5ea7a KS |
34 | |
35 | namespace __sanitizer { | |
36 | ||
5d3805fc JJ |
37 | // Contains information used to iterate through sections. |
38 | struct MemoryMappedSegmentData { | |
39 | char name[kMaxSegName]; | |
40 | uptr nsects; | |
eac97531 | 41 | const char *current_load_cmd_addr; |
5d3805fc JJ |
42 | u32 lc_type; |
43 | uptr base_virt_addr; | |
44 | uptr addr_mask; | |
45 | }; | |
46 | ||
47 | template <typename Section> | |
48 | static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, | |
49 | bool isWritable) { | |
50 | const Section *sc = (const Section *)data->current_load_cmd_addr; | |
51 | data->current_load_cmd_addr += sizeof(Section); | |
52 | ||
53 | uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; | |
54 | uptr sec_end = sec_start + sc->size; | |
55 | module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, | |
56 | sc->sectname); | |
57 | } | |
58 | ||
59 | void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { | |
60 | // Don't iterate over sections when the caller hasn't set up the | |
61 | // data pointer, when there are no sections, or when the segment | |
62 | // is executable. Avoid iterating over executable sections because | |
63 | // it will confuse libignore, and because the extra granularity | |
64 | // of information is not needed by any sanitizers. | |
65 | if (!data_ || !data_->nsects || IsExecutable()) { | |
66 | module->addAddressRange(start, end, IsExecutable(), IsWritable(), | |
67 | data_ ? data_->name : nullptr); | |
68 | return; | |
69 | } | |
70 | ||
71 | do { | |
72 | if (data_->lc_type == LC_SEGMENT) { | |
73 | NextSectionLoad<struct section>(module, data_, IsWritable()); | |
74 | #ifdef MH_MAGIC_64 | |
75 | } else if (data_->lc_type == LC_SEGMENT_64) { | |
76 | NextSectionLoad<struct section_64>(module, data_, IsWritable()); | |
77 | #endif | |
78 | } | |
79 | } while (--data_->nsects); | |
80 | } | |
81 | ||
dee5ea7a KS |
82 | MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { |
83 | Reset(); | |
84 | } | |
85 | ||
86 | MemoryMappingLayout::~MemoryMappingLayout() { | |
87 | } | |
88 | ||
89 | // More information about Mach-O headers can be found in mach-o/loader.h | |
90 | // Each Mach-O image has a header (mach_header or mach_header_64) starting with | |
91 | // a magic number, and a list of linker load commands directly following the | |
92 | // header. | |
93 | // A load command is at least two 32-bit words: the command type and the | |
94 | // command size in bytes. We're interested only in segment load commands | |
95 | // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped | |
96 | // into the task's address space. | |
97 | // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or | |
98 | // segment_command_64 correspond to the memory address, memory size and the | |
99 | // file offset of the current memory segment. | |
100 | // Because these fields are taken from the images as is, one needs to add | |
101 | // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. | |
102 | ||
103 | void MemoryMappingLayout::Reset() { | |
104 | // Count down from the top. | |
105 | // TODO(glider): as per man 3 dyld, iterating over the headers with | |
106 | // _dyld_image_count is thread-unsafe. We need to register callbacks for | |
107 | // adding and removing images which will invalidate the MemoryMappingLayout | |
108 | // state. | |
5d3805fc JJ |
109 | data_.current_image = _dyld_image_count(); |
110 | data_.current_load_cmd_count = -1; | |
111 | data_.current_load_cmd_addr = 0; | |
112 | data_.current_magic = 0; | |
113 | data_.current_filetype = 0; | |
114 | data_.current_arch = kModuleArchUnknown; | |
115 | internal_memset(data_.current_uuid, 0, kModuleUUIDSize); | |
dee5ea7a KS |
116 | } |
117 | ||
5d3805fc JJ |
118 | // The dyld load address should be unchanged throughout process execution, |
119 | // and it is expensive to compute once many libraries have been loaded, | |
120 | // so cache it here and do not reset. | |
121 | static mach_header *dyld_hdr = 0; | |
122 | static const char kDyldPath[] = "/usr/lib/dyld"; | |
123 | static const int kDyldImageIdx = -1; | |
124 | ||
dee5ea7a KS |
125 | // static |
126 | void MemoryMappingLayout::CacheMemoryMappings() { | |
127 | // No-op on Mac for now. | |
128 | } | |
129 | ||
130 | void MemoryMappingLayout::LoadFromCache() { | |
131 | // No-op on Mac for now. | |
132 | } | |
133 | ||
5d3805fc JJ |
134 | // _dyld_get_image_header() and related APIs don't report dyld itself. |
135 | // We work around this by manually recursing through the memory map | |
136 | // until we hit a Mach header matching dyld instead. These recurse | |
137 | // calls are expensive, but the first memory map generation occurs | |
138 | // early in the process, when dyld is one of the only images loaded, | |
139 | // so it will be hit after only a few iterations. | |
140 | static mach_header *get_dyld_image_header() { | |
5d3805fc JJ |
141 | unsigned depth = 1; |
142 | vm_size_t size = 0; | |
143 | vm_address_t address = 0; | |
144 | kern_return_t err = KERN_SUCCESS; | |
145 | mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; | |
146 | ||
147 | while (true) { | |
148 | struct vm_region_submap_info_64 info; | |
eac97531 | 149 | err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth, |
5d3805fc JJ |
150 | (vm_region_info_t)&info, &count); |
151 | if (err != KERN_SUCCESS) return nullptr; | |
152 | ||
153 | if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { | |
154 | mach_header *hdr = (mach_header *)address; | |
155 | if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && | |
156 | hdr->filetype == MH_DYLINKER) { | |
157 | return hdr; | |
158 | } | |
159 | } | |
160 | address += size; | |
161 | } | |
162 | } | |
163 | ||
164 | const mach_header *get_dyld_hdr() { | |
165 | if (!dyld_hdr) dyld_hdr = get_dyld_image_header(); | |
166 | ||
167 | return dyld_hdr; | |
168 | } | |
169 | ||
dee5ea7a | 170 | // Next and NextSegmentLoad were inspired by base/sysinfo.cc in |
10189819 | 171 | // Google Perftools, https://github.com/gperftools/gperftools. |
dee5ea7a KS |
172 | |
173 | // NextSegmentLoad scans the current image for the next segment load command | |
174 | // and returns the start and end addresses and file offset of the corresponding | |
175 | // segment. | |
176 | // Note that the segment addresses are not necessarily sorted. | |
5d3805fc JJ |
177 | template <u32 kLCSegment, typename SegmentCommand> |
178 | static bool NextSegmentLoad(MemoryMappedSegment *segment, | |
179 | MemoryMappedSegmentData *seg_data, MemoryMappingLayoutData &layout_data) { | |
180 | const char *lc = layout_data.current_load_cmd_addr; | |
181 | layout_data.current_load_cmd_addr += ((const load_command *)lc)->cmdsize; | |
dee5ea7a | 182 | if (((const load_command *)lc)->cmd == kLCSegment) { |
dee5ea7a | 183 | const SegmentCommand* sc = (const SegmentCommand *)lc; |
5d3805fc JJ |
184 | uptr base_virt_addr, addr_mask; |
185 | if (layout_data.current_image == kDyldImageIdx) { | |
186 | base_virt_addr = (uptr)get_dyld_hdr(); | |
187 | // vmaddr is masked with 0xfffff because on macOS versions < 10.12, | |
188 | // it contains an absolute address rather than an offset for dyld. | |
189 | // To make matters even more complicated, this absolute address | |
190 | // isn't actually the absolute segment address, but the offset portion | |
191 | // of the address is accurate when combined with the dyld base address, | |
192 | // and the mask will give just this offset. | |
193 | addr_mask = 0xfffff; | |
194 | } else { | |
195 | base_virt_addr = | |
196 | (uptr)_dyld_get_image_vmaddr_slide(layout_data.current_image); | |
197 | addr_mask = ~0; | |
866e32ad | 198 | } |
5d3805fc JJ |
199 | |
200 | segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; | |
201 | segment->end = segment->start + sc->vmsize; | |
202 | // Most callers don't need section information, so only fill this struct | |
203 | // when required. | |
204 | if (seg_data) { | |
205 | seg_data->nsects = sc->nsects; | |
206 | seg_data->current_load_cmd_addr = | |
eac97531 | 207 | (const char *)lc + sizeof(SegmentCommand); |
5d3805fc JJ |
208 | seg_data->lc_type = kLCSegment; |
209 | seg_data->base_virt_addr = base_virt_addr; | |
210 | seg_data->addr_mask = addr_mask; | |
211 | internal_strncpy(seg_data->name, sc->segname, | |
212 | ARRAY_SIZE(seg_data->name)); | |
dee5ea7a | 213 | } |
5d3805fc JJ |
214 | |
215 | // Return the initial protection. | |
216 | segment->protection = sc->initprot; | |
217 | segment->offset = (layout_data.current_filetype == | |
218 | /*MH_EXECUTE*/ 0x2) | |
219 | ? sc->vmaddr | |
220 | : sc->fileoff; | |
221 | if (segment->filename) { | |
222 | const char *src = (layout_data.current_image == kDyldImageIdx) | |
223 | ? kDyldPath | |
224 | : _dyld_get_image_name(layout_data.current_image); | |
225 | internal_strncpy(segment->filename, src, segment->filename_size); | |
dee5ea7a | 226 | } |
5d3805fc JJ |
227 | segment->arch = layout_data.current_arch; |
228 | internal_memcpy(segment->uuid, layout_data.current_uuid, kModuleUUIDSize); | |
dee5ea7a KS |
229 | return true; |
230 | } | |
231 | return false; | |
232 | } | |
233 | ||
5d3805fc JJ |
234 | ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { |
235 | cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; | |
236 | switch (cputype) { | |
237 | case CPU_TYPE_I386: | |
238 | return kModuleArchI386; | |
239 | case CPU_TYPE_X86_64: | |
240 | if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; | |
241 | if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; | |
242 | CHECK(0 && "Invalid subtype of x86_64"); | |
243 | return kModuleArchUnknown; | |
244 | case CPU_TYPE_ARM: | |
245 | if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; | |
246 | if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; | |
247 | if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; | |
248 | if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; | |
249 | CHECK(0 && "Invalid subtype of ARM"); | |
250 | return kModuleArchUnknown; | |
251 | case CPU_TYPE_ARM64: | |
252 | return kModuleArchARM64; | |
253 | default: | |
254 | CHECK(0 && "Invalid CPU type"); | |
255 | return kModuleArchUnknown; | |
256 | } | |
257 | } | |
258 | ||
259 | static const load_command *NextCommand(const load_command *lc) { | |
eac97531 | 260 | return (const load_command *)((const char *)lc + lc->cmdsize); |
5d3805fc JJ |
261 | } |
262 | ||
263 | static void FindUUID(const load_command *first_lc, u8 *uuid_output) { | |
264 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
265 | if (lc->cmd != LC_UUID) continue; | |
266 | ||
267 | const uuid_command *uuid_lc = (const uuid_command *)lc; | |
268 | const uint8_t *uuid = &uuid_lc->uuid[0]; | |
269 | internal_memcpy(uuid_output, uuid, kModuleUUIDSize); | |
270 | return; | |
271 | } | |
272 | } | |
273 | ||
274 | static bool IsModuleInstrumented(const load_command *first_lc) { | |
275 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
276 | if (lc->cmd != LC_LOAD_DYLIB) continue; | |
277 | ||
278 | const dylib_command *dylib_lc = (const dylib_command *)lc; | |
279 | uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; | |
280 | const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; | |
281 | dylib_name = StripModuleName(dylib_name); | |
282 | if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) { | |
283 | return true; | |
284 | } | |
285 | } | |
286 | return false; | |
287 | } | |
288 | ||
289 | bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { | |
290 | for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { | |
291 | const mach_header *hdr = (data_.current_image == kDyldImageIdx) | |
292 | ? get_dyld_hdr() | |
293 | : _dyld_get_image_header(data_.current_image); | |
dee5ea7a | 294 | if (!hdr) continue; |
5d3805fc | 295 | if (data_.current_load_cmd_count < 0) { |
dee5ea7a | 296 | // Set up for this image; |
5d3805fc JJ |
297 | data_.current_load_cmd_count = hdr->ncmds; |
298 | data_.current_magic = hdr->magic; | |
299 | data_.current_filetype = hdr->filetype; | |
300 | data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); | |
301 | switch (data_.current_magic) { | |
dee5ea7a KS |
302 | #ifdef MH_MAGIC_64 |
303 | case MH_MAGIC_64: { | |
eac97531 ML |
304 | data_.current_load_cmd_addr = |
305 | (const char *)hdr + sizeof(mach_header_64); | |
dee5ea7a KS |
306 | break; |
307 | } | |
308 | #endif | |
309 | case MH_MAGIC: { | |
eac97531 | 310 | data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); |
dee5ea7a KS |
311 | break; |
312 | } | |
313 | default: { | |
314 | continue; | |
315 | } | |
316 | } | |
5d3805fc JJ |
317 | FindUUID((const load_command *)data_.current_load_cmd_addr, |
318 | data_.current_uuid); | |
319 | data_.current_instrumented = IsModuleInstrumented( | |
320 | (const load_command *)data_.current_load_cmd_addr); | |
dee5ea7a KS |
321 | } |
322 | ||
5d3805fc JJ |
323 | for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) { |
324 | switch (data_.current_magic) { | |
325 | // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. | |
dee5ea7a KS |
326 | #ifdef MH_MAGIC_64 |
327 | case MH_MAGIC_64: { | |
328 | if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( | |
5d3805fc | 329 | segment, segment->data_, data_)) |
dee5ea7a KS |
330 | return true; |
331 | break; | |
332 | } | |
333 | #endif | |
334 | case MH_MAGIC: { | |
335 | if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( | |
5d3805fc | 336 | segment, segment->data_, data_)) |
dee5ea7a KS |
337 | return true; |
338 | break; | |
339 | } | |
340 | } | |
341 | } | |
342 | // If we get here, no more load_cmd's in this image talk about | |
343 | // segments. Go on to the next image. | |
344 | } | |
345 | return false; | |
346 | } | |
347 | ||
10189819 | 348 | void MemoryMappingLayout::DumpListOfModules( |
5d3805fc | 349 | InternalMmapVectorNoCtor<LoadedModule> *modules) { |
dee5ea7a | 350 | Reset(); |
696d846a | 351 | InternalScopedString module_name(kMaxPathLength); |
5d3805fc JJ |
352 | MemoryMappedSegment segment(module_name.data(), kMaxPathLength); |
353 | MemoryMappedSegmentData data; | |
354 | segment.data_ = &data; | |
355 | while (Next(&segment)) { | |
356 | if (segment.filename[0] == '\0') continue; | |
696d846a | 357 | LoadedModule *cur_module = nullptr; |
10189819 | 358 | if (!modules->empty() && |
5d3805fc | 359 | 0 == internal_strcmp(segment.filename, modules->back().full_name())) { |
10189819 | 360 | cur_module = &modules->back(); |
dee5ea7a | 361 | } else { |
10189819 MO |
362 | modules->push_back(LoadedModule()); |
363 | cur_module = &modules->back(); | |
5d3805fc JJ |
364 | cur_module->set(segment.filename, segment.start, segment.arch, |
365 | segment.uuid, data_.current_instrumented); | |
dee5ea7a | 366 | } |
5d3805fc | 367 | segment.AddAddressRanges(cur_module); |
dee5ea7a | 368 | } |
dee5ea7a KS |
369 | } |
370 | ||
371 | } // namespace __sanitizer | |
372 | ||
373 | #endif // SANITIZER_MAC |