]> git.ipfire.org Git - thirdparty/gcc.git/blame - libiberty/rust-demangle.c
[Ada] Use new API when creating a special SPARK heap entity
[thirdparty/gcc.git] / libiberty / rust-demangle.c
CommitLineData
10d48c59 1/* Demangler for the Rust programming language
8d9254fc 2 Copyright (C) 2016-2020 Free Software Foundation, Inc.
10d48c59
DT
3 Written by David Tolnay (dtolnay@gmail.com).
4
5This file is part of the libiberty library.
6Libiberty is free software; you can redistribute it and/or
7modify it under the terms of the GNU Library General Public
8License as published by the Free Software Foundation; either
9version 2 of the License, or (at your option) any later version.
10
11In addition to the permissions in the GNU Library General Public
12License, the Free Software Foundation gives you unlimited permission
13to link the compiled version of this file into combinations with other
14programs, and to distribute those combinations without any restriction
15coming from the use of this file. (The Library Public License
16restrictions do apply in other respects; for example, they cover
17modification of the file, and distribution when not linked into a
18combined executable.)
19
20Libiberty is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23Library General Public License for more details.
24
25You should have received a copy of the GNU Library General Public
26License along with libiberty; see the file COPYING.LIB.
27If not, see <http://www.gnu.org/licenses/>. */
28
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "safe-ctype.h"
35
32fc3719 36#include <inttypes.h>
10d48c59
DT
37#include <sys/types.h>
38#include <string.h>
39#include <stdio.h>
32fc3719 40#include <stdlib.h>
10d48c59
DT
41
42#ifdef HAVE_STRING_H
43#include <string.h>
44#else
45extern size_t strlen(const char *s);
46extern int strncmp(const char *s1, const char *s2, size_t n);
47extern void *memset(void *s, int c, size_t n);
48#endif
49
50#include <demangle.h>
51#include "libiberty.h"
52
32fc3719
EMB
53struct rust_demangler
54{
55 const char *sym;
56 size_t sym_len;
10d48c59 57
32fc3719
EMB
58 void *callback_opaque;
59 demangle_callbackref callback;
10d48c59 60
32fc3719
EMB
61 /* Position of the next character to read from the symbol. */
62 size_t next;
10d48c59 63
32fc3719
EMB
64 /* Non-zero if any error occurred. */
65 int errored;
10d48c59 66
32fc3719
EMB
67 /* Non-zero if printing should be verbose (e.g. include hashes). */
68 int verbose;
10d48c59 69
32fc3719
EMB
70 /* Rust mangling version, with legacy mangling being -1. */
71 int version;
72};
10d48c59 73
32fc3719 74/* Parsing functions. */
10d48c59 75
32fc3719
EMB
76static char
77peek (const struct rust_demangler *rdm)
10d48c59 78{
32fc3719
EMB
79 if (rdm->next < rdm->sym_len)
80 return rdm->sym[rdm->next];
81 return 0;
82}
10d48c59 83
32fc3719
EMB
84static char
85next (struct rust_demangler *rdm)
86{
87 char c = peek (rdm);
88 if (!c)
89 rdm->errored = 1;
90 else
91 rdm->next++;
92 return c;
93}
10d48c59 94
32fc3719
EMB
95struct rust_mangled_ident
96{
97 /* ASCII part of the identifier. */
98 const char *ascii;
99 size_t ascii_len;
100};
10d48c59 101
32fc3719
EMB
102static struct rust_mangled_ident
103parse_ident (struct rust_demangler *rdm)
104{
105 char c;
106 size_t start, len;
107 struct rust_mangled_ident ident;
10d48c59 108
32fc3719
EMB
109 ident.ascii = NULL;
110 ident.ascii_len = 0;
e1cb00db 111
32fc3719
EMB
112 c = next (rdm);
113 if (!ISDIGIT (c))
e1cb00db 114 {
32fc3719
EMB
115 rdm->errored = 1;
116 return ident;
e1cb00db 117 }
32fc3719 118 len = c - '0';
e1cb00db 119
32fc3719
EMB
120 if (c != '0')
121 while (ISDIGIT (peek (rdm)))
122 len = len * 10 + (next (rdm) - '0');
10d48c59 123
32fc3719
EMB
124 start = rdm->next;
125 rdm->next += len;
126 /* Check for overflows. */
127 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
42bf58bb 128 {
32fc3719
EMB
129 rdm->errored = 1;
130 return ident;
42bf58bb 131 }
10d48c59 132
32fc3719
EMB
133 ident.ascii = rdm->sym + start;
134 ident.ascii_len = len;
10d48c59 135
32fc3719
EMB
136 if (ident.ascii_len == 0)
137 ident.ascii = NULL;
10d48c59 138
32fc3719
EMB
139 return ident;
140}
10d48c59 141
32fc3719 142/* Printing functions. */
10d48c59 143
32fc3719
EMB
144static void
145print_str (struct rust_demangler *rdm, const char *data, size_t len)
10d48c59 146{
32fc3719
EMB
147 if (!rdm->errored)
148 rdm->callback (data, len, rdm->callback_opaque);
10d48c59
DT
149}
150
32fc3719
EMB
151#define PRINT(s) print_str (rdm, s, strlen (s))
152
42bf58bb 153/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
10d48c59 154static int
32fc3719 155decode_lower_hex_nibble (char nibble)
10d48c59 156{
42bf58bb
EMB
157 if ('0' <= nibble && nibble <= '9')
158 return nibble - '0';
159 if ('a' <= nibble && nibble <= 'f')
160 return 0xa + (nibble - 'a');
161 return -1;
162}
10d48c59 163
42bf58bb
EMB
164/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
165static char
32fc3719 166decode_legacy_escape (const char *e, size_t len, size_t *out_len)
42bf58bb
EMB
167{
168 char c = 0;
42bf58bb
EMB
169 size_t escape_len = 0;
170 int lo_nibble = -1, hi_nibble = -1;
10d48c59 171
32fc3719 172 if (len < 3 || e[0] != '$')
42bf58bb 173 return 0;
10d48c59 174
32fc3719
EMB
175 e++;
176 len--;
42bf58bb
EMB
177
178 if (e[0] == 'C')
179 {
180 escape_len = 1;
181
182 c = ',';
183 }
32fc3719 184 else if (len > 2)
42bf58bb
EMB
185 {
186 escape_len = 2;
187
188 if (e[0] == 'S' && e[1] == 'P')
189 c = '@';
190 else if (e[0] == 'B' && e[1] == 'P')
191 c = '*';
192 else if (e[0] == 'R' && e[1] == 'F')
193 c = '&';
194 else if (e[0] == 'L' && e[1] == 'T')
195 c = '<';
196 else if (e[0] == 'G' && e[1] == 'T')
197 c = '>';
198 else if (e[0] == 'L' && e[1] == 'P')
199 c = '(';
200 else if (e[0] == 'R' && e[1] == 'P')
201 c = ')';
32fc3719 202 else if (e[0] == 'u' && len > 3)
42bf58bb
EMB
203 {
204 escape_len = 3;
205
32fc3719 206 hi_nibble = decode_lower_hex_nibble (e[1]);
42bf58bb
EMB
207 if (hi_nibble < 0)
208 return 0;
32fc3719 209 lo_nibble = decode_lower_hex_nibble (e[2]);
42bf58bb
EMB
210 if (lo_nibble < 0)
211 return 0;
212
213 /* Only allow non-control ASCII characters. */
214 if (hi_nibble > 7)
215 return 0;
216 c = (hi_nibble << 4) | lo_nibble;
217 if (c < 0x20)
218 return 0;
219 }
220 }
221
32fc3719 222 if (!c || len <= escape_len || e[escape_len] != '$')
42bf58bb 223 return 0;
10d48c59 224
32fc3719 225 *out_len = 2 + escape_len;
42bf58bb 226 return c;
10d48c59 227}
32fc3719
EMB
228
229static void
230print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
231{
232 char unescaped;
233 size_t len;
234
235 if (rdm->errored)
236 return;
237
238 if (rdm->version == -1)
239 {
240 /* Ignore leading underscores preceding escape sequences.
241 The mangler inserts an underscore to make sure the
242 identifier begins with a XID_Start character. */
243 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
244 && ident.ascii[1] == '$')
245 {
246 ident.ascii++;
247 ident.ascii_len--;
248 }
249
250 while (ident.ascii_len > 0)
251 {
252 /* Handle legacy escape sequences ("$...$", ".." or "."). */
253 if (ident.ascii[0] == '$')
254 {
255 unescaped
256 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
257 if (unescaped)
258 print_str (rdm, &unescaped, 1);
259 else
260 {
261 /* Unexpected escape sequence, print the rest verbatim. */
262 print_str (rdm, ident.ascii, ident.ascii_len);
263 return;
264 }
265 }
266 else if (ident.ascii[0] == '.')
267 {
268 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
269 {
270 /* ".." becomes "::" */
271 PRINT ("::");
272 len = 2;
273 }
274 else
275 {
276 /* "." becomes "-" */
277 PRINT ("-");
278 len = 1;
279 }
280 }
281 else
282 {
283 /* Print everything before the next escape sequence, at once. */
284 for (len = 0; len < ident.ascii_len; len++)
285 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
286 break;
287
288 print_str (rdm, ident.ascii, len);
289 }
290
291 ident.ascii += len;
292 ident.ascii_len -= len;
293 }
294
295 return;
296 }
297}
298
299/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
300 The hex digits must contain at least 5 distinct digits. */
301static int
302is_legacy_prefixed_hash (struct rust_mangled_ident ident)
303{
304 uint16_t seen;
305 int nibble;
306 size_t i, count;
307
308 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
309 return 0;
310
311 seen = 0;
312 for (i = 0; i < 16; i++)
313 {
314 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
315 if (nibble < 0)
316 return 0;
317 seen |= (uint16_t)1 << nibble;
318 }
319
320 /* Count how many distinct digits were seen. */
321 count = 0;
322 while (seen)
323 {
324 if (seen & 1)
325 count++;
326 seen >>= 1;
327 }
328
329 return count >= 5;
330}
331
332int
333rust_demangle_callback (const char *mangled, int options,
334 demangle_callbackref callback, void *opaque)
335{
336 const char *p;
337 struct rust_demangler rdm;
338 struct rust_mangled_ident ident;
339
340 rdm.sym = mangled;
341 rdm.sym_len = 0;
342
343 rdm.callback_opaque = opaque;
344 rdm.callback = callback;
345
346 rdm.next = 0;
347 rdm.errored = 0;
348 rdm.verbose = (options & DMGL_VERBOSE) != 0;
349 rdm.version = 0;
350
351 /* Rust symbols always start with _ZN (legacy). */
352 if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
353 {
354 rdm.sym += 3;
355 rdm.version = -1;
356 }
357 else
358 return 0;
359
360 /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
361 for (p = rdm.sym; *p; p++)
362 {
363 rdm.sym_len++;
364
365 if (*p == '_' || ISALNUM (*p))
366 continue;
367
368 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
369 continue;
370
371 return 0;
372 }
373
374 /* Legacy Rust symbols need to be handled separately. */
375 if (rdm.version == -1)
376 {
377 /* Legacy Rust symbols always end with E. */
378 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
379 return 0;
380 rdm.sym_len--;
381
382 /* Legacy Rust symbols also always end with a path segment
383 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
384 This early check, before any parse_ident calls, should
385 quickly filter out most C++ symbols unrelated to Rust. */
386 if (!(rdm.sym_len > 19
387 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
388 return 0;
389
390 do
391 {
392 ident = parse_ident (&rdm);
393 if (rdm.errored || !ident.ascii)
394 return 0;
395 }
396 while (rdm.next < rdm.sym_len);
397
398 /* The last path segment should be the hash. */
399 if (!is_legacy_prefixed_hash (ident))
400 return 0;
401
402 /* Reset the state for a second pass, to print the symbol. */
403 rdm.next = 0;
404 if (!rdm.verbose && rdm.sym_len > 19)
405 {
406 /* Hide the last segment, containing the hash, if not verbose. */
407 rdm.sym_len -= 19;
408 }
409
410 do
411 {
412 if (rdm.next > 0)
413 print_str (&rdm, "::", 2);
414
415 ident = parse_ident (&rdm);
416 print_ident (&rdm, ident);
417 }
418 while (rdm.next < rdm.sym_len);
419 }
420 else
421 return 0;
422
423 return !rdm.errored;
424}
425
426/* Growable string buffers. */
427struct str_buf
428{
429 char *ptr;
430 size_t len;
431 size_t cap;
432 int errored;
433};
434
435static void
436str_buf_reserve (struct str_buf *buf, size_t extra)
437{
438 size_t available, min_new_cap, new_cap;
439 char *new_ptr;
440
441 /* Allocation failed before. */
442 if (buf->errored)
443 return;
444
445 available = buf->cap - buf->len;
446
447 if (extra <= available)
448 return;
449
450 min_new_cap = buf->cap + (extra - available);
451
452 /* Check for overflows. */
453 if (min_new_cap < buf->cap)
454 {
455 buf->errored = 1;
456 return;
457 }
458
459 new_cap = buf->cap;
460
461 if (new_cap == 0)
462 new_cap = 4;
463
464 /* Double capacity until sufficiently large. */
465 while (new_cap < min_new_cap)
466 {
467 new_cap *= 2;
468
469 /* Check for overflows. */
470 if (new_cap < buf->cap)
471 {
472 buf->errored = 1;
473 return;
474 }
475 }
476
477 new_ptr = (char *)realloc (buf->ptr, new_cap);
478 if (new_ptr == NULL)
479 {
480 free (buf->ptr);
481 buf->ptr = NULL;
482 buf->len = 0;
483 buf->cap = 0;
484 buf->errored = 1;
485 }
486 else
487 {
488 buf->ptr = new_ptr;
489 buf->cap = new_cap;
490 }
491}
492
493static void
494str_buf_append (struct str_buf *buf, const char *data, size_t len)
495{
496 str_buf_reserve (buf, len);
497 if (buf->errored)
498 return;
499
500 memcpy (buf->ptr + buf->len, data, len);
501 buf->len += len;
502}
503
504static void
505str_buf_demangle_callback (const char *data, size_t len, void *opaque)
506{
507 str_buf_append ((struct str_buf *)opaque, data, len);
508}
509
510char *
511rust_demangle (const char *mangled, int options)
512{
513 struct str_buf out;
514 int success;
515
516 out.ptr = NULL;
517 out.len = 0;
518 out.cap = 0;
519 out.errored = 0;
520
521 success = rust_demangle_callback (mangled, options,
522 str_buf_demangle_callback, &out);
523
524 if (!success)
525 {
526 free (out.ptr);
527 return NULL;
528 }
529
530 str_buf_append (&out, "\0", 1);
531 return out.ptr;
532}