]>
Commit | Line | Data |
---|---|---|
b667dd70 | 1 | //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===// |
dee5ea7a | 2 | // |
b667dd70 ML |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. | |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
dee5ea7a KS |
6 | // |
7 | //===----------------------------------------------------------------------===// | |
8 | // | |
9 | // Information about the process mappings (Mac-specific parts). | |
10 | //===----------------------------------------------------------------------===// | |
11 | ||
12 | #include "sanitizer_platform.h" | |
13 | #if SANITIZER_MAC | |
14 | #include "sanitizer_common.h" | |
15 | #include "sanitizer_placement_new.h" | |
16 | #include "sanitizer_procmaps.h" | |
17 | ||
18 | #include <mach-o/dyld.h> | |
19 | #include <mach-o/loader.h> | |
5d3805fc JJ |
20 | #include <mach/mach.h> |
21 | ||
22 | // These are not available in older macOS SDKs. | |
23 | #ifndef CPU_SUBTYPE_X86_64_H | |
24 | #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ | |
25 | #endif | |
26 | #ifndef CPU_SUBTYPE_ARM_V7S | |
27 | #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ | |
28 | #endif | |
29 | #ifndef CPU_SUBTYPE_ARM_V7K | |
30 | #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) | |
31 | #endif | |
32 | #ifndef CPU_TYPE_ARM64 | |
33 | #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) | |
34 | #endif | |
dee5ea7a KS |
35 | |
36 | namespace __sanitizer { | |
37 | ||
5d3805fc JJ |
38 | // Contains information used to iterate through sections. |
39 | struct MemoryMappedSegmentData { | |
40 | char name[kMaxSegName]; | |
41 | uptr nsects; | |
eac97531 | 42 | const char *current_load_cmd_addr; |
5d3805fc JJ |
43 | u32 lc_type; |
44 | uptr base_virt_addr; | |
45 | uptr addr_mask; | |
46 | }; | |
47 | ||
48 | template <typename Section> | |
49 | static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, | |
50 | bool isWritable) { | |
51 | const Section *sc = (const Section *)data->current_load_cmd_addr; | |
52 | data->current_load_cmd_addr += sizeof(Section); | |
53 | ||
54 | uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; | |
55 | uptr sec_end = sec_start + sc->size; | |
56 | module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, | |
57 | sc->sectname); | |
58 | } | |
59 | ||
60 | void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { | |
61 | // Don't iterate over sections when the caller hasn't set up the | |
62 | // data pointer, when there are no sections, or when the segment | |
63 | // is executable. Avoid iterating over executable sections because | |
64 | // it will confuse libignore, and because the extra granularity | |
65 | // of information is not needed by any sanitizers. | |
66 | if (!data_ || !data_->nsects || IsExecutable()) { | |
67 | module->addAddressRange(start, end, IsExecutable(), IsWritable(), | |
68 | data_ ? data_->name : nullptr); | |
69 | return; | |
70 | } | |
71 | ||
72 | do { | |
73 | if (data_->lc_type == LC_SEGMENT) { | |
74 | NextSectionLoad<struct section>(module, data_, IsWritable()); | |
75 | #ifdef MH_MAGIC_64 | |
76 | } else if (data_->lc_type == LC_SEGMENT_64) { | |
77 | NextSectionLoad<struct section_64>(module, data_, IsWritable()); | |
78 | #endif | |
79 | } | |
80 | } while (--data_->nsects); | |
81 | } | |
82 | ||
dee5ea7a KS |
83 | MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { |
84 | Reset(); | |
85 | } | |
86 | ||
87 | MemoryMappingLayout::~MemoryMappingLayout() { | |
88 | } | |
89 | ||
b667dd70 ML |
90 | bool MemoryMappingLayout::Error() const { |
91 | return false; | |
92 | } | |
93 | ||
dee5ea7a KS |
94 | // More information about Mach-O headers can be found in mach-o/loader.h |
95 | // Each Mach-O image has a header (mach_header or mach_header_64) starting with | |
96 | // a magic number, and a list of linker load commands directly following the | |
97 | // header. | |
98 | // A load command is at least two 32-bit words: the command type and the | |
99 | // command size in bytes. We're interested only in segment load commands | |
100 | // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped | |
101 | // into the task's address space. | |
102 | // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or | |
103 | // segment_command_64 correspond to the memory address, memory size and the | |
104 | // file offset of the current memory segment. | |
105 | // Because these fields are taken from the images as is, one needs to add | |
106 | // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. | |
107 | ||
108 | void MemoryMappingLayout::Reset() { | |
109 | // Count down from the top. | |
110 | // TODO(glider): as per man 3 dyld, iterating over the headers with | |
111 | // _dyld_image_count is thread-unsafe. We need to register callbacks for | |
112 | // adding and removing images which will invalidate the MemoryMappingLayout | |
113 | // state. | |
5d3805fc JJ |
114 | data_.current_image = _dyld_image_count(); |
115 | data_.current_load_cmd_count = -1; | |
116 | data_.current_load_cmd_addr = 0; | |
117 | data_.current_magic = 0; | |
118 | data_.current_filetype = 0; | |
119 | data_.current_arch = kModuleArchUnknown; | |
120 | internal_memset(data_.current_uuid, 0, kModuleUUIDSize); | |
dee5ea7a KS |
121 | } |
122 | ||
5d3805fc JJ |
123 | // The dyld load address should be unchanged throughout process execution, |
124 | // and it is expensive to compute once many libraries have been loaded, | |
125 | // so cache it here and do not reset. | |
126 | static mach_header *dyld_hdr = 0; | |
127 | static const char kDyldPath[] = "/usr/lib/dyld"; | |
128 | static const int kDyldImageIdx = -1; | |
129 | ||
dee5ea7a KS |
130 | // static |
131 | void MemoryMappingLayout::CacheMemoryMappings() { | |
132 | // No-op on Mac for now. | |
133 | } | |
134 | ||
135 | void MemoryMappingLayout::LoadFromCache() { | |
136 | // No-op on Mac for now. | |
137 | } | |
138 | ||
5d3805fc JJ |
139 | // _dyld_get_image_header() and related APIs don't report dyld itself. |
140 | // We work around this by manually recursing through the memory map | |
141 | // until we hit a Mach header matching dyld instead. These recurse | |
142 | // calls are expensive, but the first memory map generation occurs | |
143 | // early in the process, when dyld is one of the only images loaded, | |
144 | // so it will be hit after only a few iterations. | |
145 | static mach_header *get_dyld_image_header() { | |
5d3805fc JJ |
146 | unsigned depth = 1; |
147 | vm_size_t size = 0; | |
148 | vm_address_t address = 0; | |
149 | kern_return_t err = KERN_SUCCESS; | |
150 | mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; | |
151 | ||
152 | while (true) { | |
153 | struct vm_region_submap_info_64 info; | |
eac97531 | 154 | err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth, |
5d3805fc JJ |
155 | (vm_region_info_t)&info, &count); |
156 | if (err != KERN_SUCCESS) return nullptr; | |
157 | ||
158 | if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { | |
159 | mach_header *hdr = (mach_header *)address; | |
160 | if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && | |
161 | hdr->filetype == MH_DYLINKER) { | |
162 | return hdr; | |
163 | } | |
164 | } | |
165 | address += size; | |
166 | } | |
167 | } | |
168 | ||
169 | const mach_header *get_dyld_hdr() { | |
170 | if (!dyld_hdr) dyld_hdr = get_dyld_image_header(); | |
171 | ||
172 | return dyld_hdr; | |
173 | } | |
174 | ||
dee5ea7a | 175 | // Next and NextSegmentLoad were inspired by base/sysinfo.cc in |
10189819 | 176 | // Google Perftools, https://github.com/gperftools/gperftools. |
dee5ea7a KS |
177 | |
178 | // NextSegmentLoad scans the current image for the next segment load command | |
179 | // and returns the start and end addresses and file offset of the corresponding | |
180 | // segment. | |
181 | // Note that the segment addresses are not necessarily sorted. | |
5d3805fc JJ |
182 | template <u32 kLCSegment, typename SegmentCommand> |
183 | static bool NextSegmentLoad(MemoryMappedSegment *segment, | |
3ca75cd5 ML |
184 | MemoryMappedSegmentData *seg_data, |
185 | MemoryMappingLayoutData *layout_data) { | |
186 | const char *lc = layout_data->current_load_cmd_addr; | |
187 | layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize; | |
dee5ea7a | 188 | if (((const load_command *)lc)->cmd == kLCSegment) { |
dee5ea7a | 189 | const SegmentCommand* sc = (const SegmentCommand *)lc; |
5d3805fc | 190 | uptr base_virt_addr, addr_mask; |
3ca75cd5 | 191 | if (layout_data->current_image == kDyldImageIdx) { |
5d3805fc JJ |
192 | base_virt_addr = (uptr)get_dyld_hdr(); |
193 | // vmaddr is masked with 0xfffff because on macOS versions < 10.12, | |
194 | // it contains an absolute address rather than an offset for dyld. | |
195 | // To make matters even more complicated, this absolute address | |
196 | // isn't actually the absolute segment address, but the offset portion | |
197 | // of the address is accurate when combined with the dyld base address, | |
198 | // and the mask will give just this offset. | |
199 | addr_mask = 0xfffff; | |
200 | } else { | |
201 | base_virt_addr = | |
3ca75cd5 | 202 | (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image); |
5d3805fc | 203 | addr_mask = ~0; |
866e32ad | 204 | } |
5d3805fc JJ |
205 | |
206 | segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; | |
207 | segment->end = segment->start + sc->vmsize; | |
208 | // Most callers don't need section information, so only fill this struct | |
209 | // when required. | |
210 | if (seg_data) { | |
211 | seg_data->nsects = sc->nsects; | |
212 | seg_data->current_load_cmd_addr = | |
eac97531 | 213 | (const char *)lc + sizeof(SegmentCommand); |
5d3805fc JJ |
214 | seg_data->lc_type = kLCSegment; |
215 | seg_data->base_virt_addr = base_virt_addr; | |
216 | seg_data->addr_mask = addr_mask; | |
217 | internal_strncpy(seg_data->name, sc->segname, | |
218 | ARRAY_SIZE(seg_data->name)); | |
dee5ea7a | 219 | } |
5d3805fc JJ |
220 | |
221 | // Return the initial protection. | |
222 | segment->protection = sc->initprot; | |
3ca75cd5 | 223 | segment->offset = (layout_data->current_filetype == |
5d3805fc JJ |
224 | /*MH_EXECUTE*/ 0x2) |
225 | ? sc->vmaddr | |
226 | : sc->fileoff; | |
227 | if (segment->filename) { | |
3ca75cd5 | 228 | const char *src = (layout_data->current_image == kDyldImageIdx) |
5d3805fc | 229 | ? kDyldPath |
3ca75cd5 | 230 | : _dyld_get_image_name(layout_data->current_image); |
5d3805fc | 231 | internal_strncpy(segment->filename, src, segment->filename_size); |
dee5ea7a | 232 | } |
3ca75cd5 ML |
233 | segment->arch = layout_data->current_arch; |
234 | internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize); | |
dee5ea7a KS |
235 | return true; |
236 | } | |
237 | return false; | |
238 | } | |
239 | ||
5d3805fc JJ |
240 | ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { |
241 | cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; | |
242 | switch (cputype) { | |
243 | case CPU_TYPE_I386: | |
244 | return kModuleArchI386; | |
245 | case CPU_TYPE_X86_64: | |
246 | if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; | |
247 | if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; | |
248 | CHECK(0 && "Invalid subtype of x86_64"); | |
249 | return kModuleArchUnknown; | |
250 | case CPU_TYPE_ARM: | |
251 | if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; | |
252 | if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; | |
253 | if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; | |
254 | if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; | |
255 | CHECK(0 && "Invalid subtype of ARM"); | |
256 | return kModuleArchUnknown; | |
257 | case CPU_TYPE_ARM64: | |
258 | return kModuleArchARM64; | |
259 | default: | |
260 | CHECK(0 && "Invalid CPU type"); | |
261 | return kModuleArchUnknown; | |
262 | } | |
263 | } | |
264 | ||
265 | static const load_command *NextCommand(const load_command *lc) { | |
eac97531 | 266 | return (const load_command *)((const char *)lc + lc->cmdsize); |
5d3805fc JJ |
267 | } |
268 | ||
269 | static void FindUUID(const load_command *first_lc, u8 *uuid_output) { | |
270 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
271 | if (lc->cmd != LC_UUID) continue; | |
272 | ||
273 | const uuid_command *uuid_lc = (const uuid_command *)lc; | |
274 | const uint8_t *uuid = &uuid_lc->uuid[0]; | |
275 | internal_memcpy(uuid_output, uuid, kModuleUUIDSize); | |
276 | return; | |
277 | } | |
278 | } | |
279 | ||
280 | static bool IsModuleInstrumented(const load_command *first_lc) { | |
281 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { | |
282 | if (lc->cmd != LC_LOAD_DYLIB) continue; | |
283 | ||
284 | const dylib_command *dylib_lc = (const dylib_command *)lc; | |
285 | uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; | |
286 | const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; | |
287 | dylib_name = StripModuleName(dylib_name); | |
288 | if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) { | |
289 | return true; | |
290 | } | |
291 | } | |
292 | return false; | |
293 | } | |
294 | ||
295 | bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { | |
296 | for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { | |
297 | const mach_header *hdr = (data_.current_image == kDyldImageIdx) | |
298 | ? get_dyld_hdr() | |
299 | : _dyld_get_image_header(data_.current_image); | |
dee5ea7a | 300 | if (!hdr) continue; |
5d3805fc | 301 | if (data_.current_load_cmd_count < 0) { |
dee5ea7a | 302 | // Set up for this image; |
5d3805fc JJ |
303 | data_.current_load_cmd_count = hdr->ncmds; |
304 | data_.current_magic = hdr->magic; | |
305 | data_.current_filetype = hdr->filetype; | |
306 | data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); | |
307 | switch (data_.current_magic) { | |
dee5ea7a KS |
308 | #ifdef MH_MAGIC_64 |
309 | case MH_MAGIC_64: { | |
eac97531 ML |
310 | data_.current_load_cmd_addr = |
311 | (const char *)hdr + sizeof(mach_header_64); | |
dee5ea7a KS |
312 | break; |
313 | } | |
314 | #endif | |
315 | case MH_MAGIC: { | |
eac97531 | 316 | data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); |
dee5ea7a KS |
317 | break; |
318 | } | |
319 | default: { | |
320 | continue; | |
321 | } | |
322 | } | |
5d3805fc JJ |
323 | FindUUID((const load_command *)data_.current_load_cmd_addr, |
324 | data_.current_uuid); | |
325 | data_.current_instrumented = IsModuleInstrumented( | |
326 | (const load_command *)data_.current_load_cmd_addr); | |
dee5ea7a KS |
327 | } |
328 | ||
5d3805fc JJ |
329 | for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) { |
330 | switch (data_.current_magic) { | |
331 | // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. | |
dee5ea7a KS |
332 | #ifdef MH_MAGIC_64 |
333 | case MH_MAGIC_64: { | |
334 | if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( | |
3ca75cd5 | 335 | segment, segment->data_, &data_)) |
dee5ea7a KS |
336 | return true; |
337 | break; | |
338 | } | |
339 | #endif | |
340 | case MH_MAGIC: { | |
341 | if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( | |
3ca75cd5 | 342 | segment, segment->data_, &data_)) |
dee5ea7a KS |
343 | return true; |
344 | break; | |
345 | } | |
346 | } | |
347 | } | |
348 | // If we get here, no more load_cmd's in this image talk about | |
349 | // segments. Go on to the next image. | |
350 | } | |
351 | return false; | |
352 | } | |
353 | ||
10189819 | 354 | void MemoryMappingLayout::DumpListOfModules( |
5d3805fc | 355 | InternalMmapVectorNoCtor<LoadedModule> *modules) { |
dee5ea7a | 356 | Reset(); |
d0fee87e ML |
357 | InternalMmapVector<char> module_name(kMaxPathLength); |
358 | MemoryMappedSegment segment(module_name.data(), module_name.size()); | |
5d3805fc JJ |
359 | MemoryMappedSegmentData data; |
360 | segment.data_ = &data; | |
361 | while (Next(&segment)) { | |
362 | if (segment.filename[0] == '\0') continue; | |
696d846a | 363 | LoadedModule *cur_module = nullptr; |
10189819 | 364 | if (!modules->empty() && |
5d3805fc | 365 | 0 == internal_strcmp(segment.filename, modules->back().full_name())) { |
10189819 | 366 | cur_module = &modules->back(); |
dee5ea7a | 367 | } else { |
10189819 MO |
368 | modules->push_back(LoadedModule()); |
369 | cur_module = &modules->back(); | |
5d3805fc JJ |
370 | cur_module->set(segment.filename, segment.start, segment.arch, |
371 | segment.uuid, data_.current_instrumented); | |
dee5ea7a | 372 | } |
5d3805fc | 373 | segment.AddAddressRanges(cur_module); |
dee5ea7a | 374 | } |
dee5ea7a KS |
375 | } |
376 | ||
377 | } // namespace __sanitizer | |
378 | ||
379 | #endif // SANITIZER_MAC |