]> git.ipfire.org Git - thirdparty/gcc.git/blame - libiberty/rust-demangle.c
Update copyright years.
[thirdparty/gcc.git] / libiberty / rust-demangle.c
CommitLineData
10d48c59 1/* Demangler for the Rust programming language
99dee823 2 Copyright (C) 2016-2021 Free Software Foundation, Inc.
10d48c59 3 Written by David Tolnay (dtolnay@gmail.com).
84096498 4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
10d48c59
DT
5
6This file is part of the libiberty library.
7Libiberty is free software; you can redistribute it and/or
8modify it under the terms of the GNU Library General Public
9License as published by the Free Software Foundation; either
10version 2 of the License, or (at your option) any later version.
11
12In addition to the permissions in the GNU Library General Public
13License, the Free Software Foundation gives you unlimited permission
14to link the compiled version of this file into combinations with other
15programs, and to distribute those combinations without any restriction
16coming from the use of this file. (The Library Public License
17restrictions do apply in other respects; for example, they cover
18modification of the file, and distribution when not linked into a
19combined executable.)
20
21Libiberty is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24Library General Public License for more details.
25
26You should have received a copy of the GNU Library General Public
27License along with libiberty; see the file COPYING.LIB.
28If not, see <http://www.gnu.org/licenses/>. */
29
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#include "safe-ctype.h"
36
32fc3719 37#include <inttypes.h>
10d48c59
DT
38#include <sys/types.h>
39#include <string.h>
40#include <stdio.h>
32fc3719 41#include <stdlib.h>
10d48c59
DT
42
43#ifdef HAVE_STRING_H
44#include <string.h>
45#else
46extern size_t strlen(const char *s);
47extern int strncmp(const char *s1, const char *s2, size_t n);
48extern void *memset(void *s, int c, size_t n);
49#endif
50
51#include <demangle.h>
52#include "libiberty.h"
53
32fc3719
EMB
54struct rust_demangler
55{
56 const char *sym;
57 size_t sym_len;
10d48c59 58
32fc3719
EMB
59 void *callback_opaque;
60 demangle_callbackref callback;
10d48c59 61
32fc3719
EMB
62 /* Position of the next character to read from the symbol. */
63 size_t next;
10d48c59 64
32fc3719
EMB
65 /* Non-zero if any error occurred. */
66 int errored;
10d48c59 67
84096498
EMB
68 /* Non-zero if nothing should be printed. */
69 int skipping_printing;
70
32fc3719
EMB
71 /* Non-zero if printing should be verbose (e.g. include hashes). */
72 int verbose;
10d48c59 73
32fc3719
EMB
74 /* Rust mangling version, with legacy mangling being -1. */
75 int version;
84096498
EMB
76
77 uint64_t bound_lifetime_depth;
32fc3719 78};
10d48c59 79
32fc3719 80/* Parsing functions. */
10d48c59 81
32fc3719
EMB
82static char
83peek (const struct rust_demangler *rdm)
10d48c59 84{
32fc3719
EMB
85 if (rdm->next < rdm->sym_len)
86 return rdm->sym[rdm->next];
87 return 0;
88}
10d48c59 89
84096498
EMB
90static int
91eat (struct rust_demangler *rdm, char c)
92{
93 if (peek (rdm) == c)
94 {
95 rdm->next++;
96 return 1;
97 }
98 else
99 return 0;
100}
101
32fc3719
EMB
102static char
103next (struct rust_demangler *rdm)
104{
105 char c = peek (rdm);
106 if (!c)
107 rdm->errored = 1;
108 else
109 rdm->next++;
110 return c;
111}
10d48c59 112
84096498
EMB
113static uint64_t
114parse_integer_62 (struct rust_demangler *rdm)
115{
116 char c;
117 uint64_t x;
118
119 if (eat (rdm, '_'))
120 return 0;
121
122 x = 0;
123 while (!eat (rdm, '_'))
124 {
125 c = next (rdm);
126 x *= 62;
127 if (ISDIGIT (c))
128 x += c - '0';
129 else if (ISLOWER (c))
130 x += 10 + (c - 'a');
131 else if (ISUPPER (c))
132 x += 10 + 26 + (c - 'A');
133 else
134 {
135 rdm->errored = 1;
136 return 0;
137 }
138 }
139 return x + 1;
140}
141
142static uint64_t
143parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
144{
145 if (!eat (rdm, tag))
146 return 0;
147 return 1 + parse_integer_62 (rdm);
148}
149
150static uint64_t
151parse_disambiguator (struct rust_demangler *rdm)
152{
153 return parse_opt_integer_62 (rdm, 's');
154}
155
156static size_t
157parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
158{
159 char c;
160 size_t hex_len;
161
162 hex_len = 0;
163 *value = 0;
164
165 while (!eat (rdm, '_'))
166 {
167 *value <<= 4;
168
169 c = next (rdm);
170 if (ISDIGIT (c))
171 *value |= c - '0';
172 else if (c >= 'a' && c <= 'f')
173 *value |= 10 + (c - 'a');
174 else
175 {
176 rdm->errored = 1;
177 return 0;
178 }
179 hex_len++;
180 }
181
182 return hex_len;
183}
184
32fc3719
EMB
185struct rust_mangled_ident
186{
187 /* ASCII part of the identifier. */
188 const char *ascii;
189 size_t ascii_len;
84096498
EMB
190
191 /* Punycode insertion codes for Unicode codepoints, if any. */
192 const char *punycode;
193 size_t punycode_len;
32fc3719 194};
10d48c59 195
32fc3719
EMB
196static struct rust_mangled_ident
197parse_ident (struct rust_demangler *rdm)
198{
199 char c;
200 size_t start, len;
84096498 201 int is_punycode = 0;
32fc3719 202 struct rust_mangled_ident ident;
10d48c59 203
32fc3719
EMB
204 ident.ascii = NULL;
205 ident.ascii_len = 0;
84096498
EMB
206 ident.punycode = NULL;
207 ident.punycode_len = 0;
208
209 if (rdm->version != -1)
210 is_punycode = eat (rdm, 'u');
e1cb00db 211
32fc3719
EMB
212 c = next (rdm);
213 if (!ISDIGIT (c))
e1cb00db 214 {
32fc3719
EMB
215 rdm->errored = 1;
216 return ident;
e1cb00db 217 }
32fc3719 218 len = c - '0';
e1cb00db 219
32fc3719
EMB
220 if (c != '0')
221 while (ISDIGIT (peek (rdm)))
222 len = len * 10 + (next (rdm) - '0');
10d48c59 223
84096498
EMB
224 /* Skip past the optional `_` separator (v0). */
225 if (rdm->version != -1)
226 eat (rdm, '_');
227
32fc3719
EMB
228 start = rdm->next;
229 rdm->next += len;
230 /* Check for overflows. */
231 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
42bf58bb 232 {
32fc3719
EMB
233 rdm->errored = 1;
234 return ident;
42bf58bb 235 }
10d48c59 236
32fc3719
EMB
237 ident.ascii = rdm->sym + start;
238 ident.ascii_len = len;
10d48c59 239
84096498
EMB
240 if (is_punycode)
241 {
242 ident.punycode_len = 0;
243 while (ident.ascii_len > 0)
244 {
245 ident.ascii_len--;
246
247 /* The last '_' is a separator between ascii & punycode. */
248 if (ident.ascii[ident.ascii_len] == '_')
249 break;
250
251 ident.punycode_len++;
252 }
253 if (!ident.punycode_len)
254 {
255 rdm->errored = 1;
256 return ident;
257 }
258 ident.punycode = ident.ascii + (len - ident.punycode_len);
259 }
260
32fc3719
EMB
261 if (ident.ascii_len == 0)
262 ident.ascii = NULL;
10d48c59 263
32fc3719
EMB
264 return ident;
265}
10d48c59 266
32fc3719 267/* Printing functions. */
10d48c59 268
32fc3719
EMB
269static void
270print_str (struct rust_demangler *rdm, const char *data, size_t len)
10d48c59 271{
84096498 272 if (!rdm->errored && !rdm->skipping_printing)
32fc3719 273 rdm->callback (data, len, rdm->callback_opaque);
10d48c59
DT
274}
275
32fc3719
EMB
276#define PRINT(s) print_str (rdm, s, strlen (s))
277
84096498
EMB
278static void
279print_uint64 (struct rust_demangler *rdm, uint64_t x)
280{
281 char s[21];
282 snprintf (s, 21, "%" PRIu64, x);
283 PRINT (s);
284}
285
286static void
287print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
288{
289 char s[17];
290 snprintf (s, 17, "%" PRIx64, x);
291 PRINT (s);
292}
293
42bf58bb 294/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
10d48c59 295static int
32fc3719 296decode_lower_hex_nibble (char nibble)
10d48c59 297{
42bf58bb
EMB
298 if ('0' <= nibble && nibble <= '9')
299 return nibble - '0';
300 if ('a' <= nibble && nibble <= 'f')
301 return 0xa + (nibble - 'a');
302 return -1;
303}
10d48c59 304
42bf58bb
EMB
305/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
306static char
32fc3719 307decode_legacy_escape (const char *e, size_t len, size_t *out_len)
42bf58bb
EMB
308{
309 char c = 0;
42bf58bb
EMB
310 size_t escape_len = 0;
311 int lo_nibble = -1, hi_nibble = -1;
10d48c59 312
32fc3719 313 if (len < 3 || e[0] != '$')
42bf58bb 314 return 0;
10d48c59 315
32fc3719
EMB
316 e++;
317 len--;
42bf58bb
EMB
318
319 if (e[0] == 'C')
320 {
321 escape_len = 1;
322
323 c = ',';
324 }
32fc3719 325 else if (len > 2)
42bf58bb
EMB
326 {
327 escape_len = 2;
328
329 if (e[0] == 'S' && e[1] == 'P')
330 c = '@';
331 else if (e[0] == 'B' && e[1] == 'P')
332 c = '*';
333 else if (e[0] == 'R' && e[1] == 'F')
334 c = '&';
335 else if (e[0] == 'L' && e[1] == 'T')
336 c = '<';
337 else if (e[0] == 'G' && e[1] == 'T')
338 c = '>';
339 else if (e[0] == 'L' && e[1] == 'P')
340 c = '(';
341 else if (e[0] == 'R' && e[1] == 'P')
342 c = ')';
32fc3719 343 else if (e[0] == 'u' && len > 3)
42bf58bb
EMB
344 {
345 escape_len = 3;
346
32fc3719 347 hi_nibble = decode_lower_hex_nibble (e[1]);
42bf58bb
EMB
348 if (hi_nibble < 0)
349 return 0;
32fc3719 350 lo_nibble = decode_lower_hex_nibble (e[2]);
42bf58bb
EMB
351 if (lo_nibble < 0)
352 return 0;
353
354 /* Only allow non-control ASCII characters. */
355 if (hi_nibble > 7)
356 return 0;
357 c = (hi_nibble << 4) | lo_nibble;
358 if (c < 0x20)
359 return 0;
360 }
361 }
362
32fc3719 363 if (!c || len <= escape_len || e[escape_len] != '$')
42bf58bb 364 return 0;
10d48c59 365
32fc3719 366 *out_len = 2 + escape_len;
42bf58bb 367 return c;
10d48c59 368}
32fc3719
EMB
369
370static void
371print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
372{
373 char unescaped;
84096498
EMB
374 uint8_t *out, *p, d;
375 size_t len, cap, punycode_pos, j;
376 /* Punycode parameters and state. */
377 uint32_t c;
378 size_t base, t_min, t_max, skew, damp, bias, i;
379 size_t delta, w, k, t;
380
381 if (rdm->errored || rdm->skipping_printing)
32fc3719
EMB
382 return;
383
384 if (rdm->version == -1)
385 {
386 /* Ignore leading underscores preceding escape sequences.
387 The mangler inserts an underscore to make sure the
388 identifier begins with a XID_Start character. */
389 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
390 && ident.ascii[1] == '$')
391 {
392 ident.ascii++;
393 ident.ascii_len--;
394 }
395
396 while (ident.ascii_len > 0)
397 {
398 /* Handle legacy escape sequences ("$...$", ".." or "."). */
399 if (ident.ascii[0] == '$')
400 {
401 unescaped
402 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
403 if (unescaped)
404 print_str (rdm, &unescaped, 1);
405 else
406 {
407 /* Unexpected escape sequence, print the rest verbatim. */
408 print_str (rdm, ident.ascii, ident.ascii_len);
409 return;
410 }
411 }
412 else if (ident.ascii[0] == '.')
413 {
414 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
415 {
416 /* ".." becomes "::" */
417 PRINT ("::");
418 len = 2;
419 }
420 else
421 {
84096498 422 PRINT (".");
32fc3719
EMB
423 len = 1;
424 }
425 }
426 else
427 {
428 /* Print everything before the next escape sequence, at once. */
429 for (len = 0; len < ident.ascii_len; len++)
430 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
431 break;
432
433 print_str (rdm, ident.ascii, len);
434 }
435
436 ident.ascii += len;
437 ident.ascii_len -= len;
438 }
439
440 return;
441 }
84096498
EMB
442
443 if (!ident.punycode)
444 {
445 print_str (rdm, ident.ascii, ident.ascii_len);
446 return;
447 }
448
449 len = 0;
450 cap = 4;
451 while (cap < ident.ascii_len)
452 {
453 cap *= 2;
454 /* Check for overflows. */
455 if ((cap * 4) / 4 != cap)
456 {
457 rdm->errored = 1;
458 return;
459 }
460 }
461
462 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
463 out = (uint8_t *)malloc (cap * 4);
464 if (!out)
465 {
466 rdm->errored = 1;
467 return;
468 }
469
470 /* Populate initial output from ASCII fragment. */
471 for (len = 0; len < ident.ascii_len; len++)
472 {
473 p = out + 4 * len;
474 p[0] = 0;
475 p[1] = 0;
476 p[2] = 0;
477 p[3] = ident.ascii[len];
478 }
479
480 /* Punycode parameters and initial state. */
481 base = 36;
482 t_min = 1;
483 t_max = 26;
484 skew = 38;
485 damp = 700;
486 bias = 72;
487 i = 0;
488 c = 0x80;
489
490 punycode_pos = 0;
491 while (punycode_pos < ident.punycode_len)
492 {
493 /* Read one delta value. */
494 delta = 0;
495 w = 1;
496 k = 0;
497 do
498 {
499 k += base;
500 t = k < bias ? 0 : (k - bias);
501 if (t < t_min)
502 t = t_min;
503 if (t > t_max)
504 t = t_max;
505
506 if (punycode_pos >= ident.punycode_len)
507 goto cleanup;
508 d = ident.punycode[punycode_pos++];
509
510 if (ISLOWER (d))
511 d = d - 'a';
512 else if (ISDIGIT (d))
513 d = 26 + (d - '0');
514 else
515 {
516 rdm->errored = 1;
517 goto cleanup;
518 }
519
520 delta += d * w;
521 w *= base - t;
522 }
523 while (d >= t);
524
525 /* Compute the new insert position and character. */
526 len++;
527 i += delta;
528 c += i / len;
529 i %= len;
530
531 /* Ensure enough space is available. */
532 if (cap < len)
533 {
534 cap *= 2;
535 /* Check for overflows. */
536 if ((cap * 4) / 4 != cap || cap < len)
537 {
538 rdm->errored = 1;
539 goto cleanup;
540 }
541 }
542 p = (uint8_t *)realloc (out, cap * 4);
543 if (!p)
544 {
545 rdm->errored = 1;
546 goto cleanup;
547 }
548 out = p;
549
550 /* Move the characters after the insert position. */
551 p = out + i * 4;
552 memmove (p + 4, p, (len - i - 1) * 4);
553
554 /* Insert the new character, as UTF-8 bytes. */
555 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
556 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
557 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
558 p[3] = 0x80 | (c & 0x3f);
559
560 /* If there are no more deltas, decoding is complete. */
561 if (punycode_pos == ident.punycode_len)
562 break;
563
564 i++;
565
566 /* Perform bias adaptation. */
567 delta /= damp;
568 damp = 2;
569
570 delta += delta / len;
571 k = 0;
572 while (delta > ((base - t_min) * t_max) / 2)
573 {
574 delta /= base - t_min;
575 k += base;
576 }
577 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
578 }
579
580 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
581 for (i = 0, j = 0; i < len * 4; i++)
582 if (out[i] != 0)
583 out[j++] = out[i];
584
585 print_str (rdm, (const char *)out, j);
586
587cleanup:
588 free (out);
589}
590
591/* Print the lifetime according to the previously decoded index.
592 An index of `0` always refers to `'_`, but starting with `1`,
593 indices refer to late-bound lifetimes introduced by a binder. */
594static void
595print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
596{
597 char c;
598 uint64_t depth;
599
600 PRINT ("'");
601 if (lt == 0)
602 {
603 PRINT ("_");
604 return;
605 }
606
607 depth = rdm->bound_lifetime_depth - lt;
608 /* Try to print lifetimes alphabetically first. */
609 if (depth < 26)
610 {
611 c = 'a' + depth;
612 print_str (rdm, &c, 1);
613 }
614 else
615 {
616 /* Use `'_123` after running out of letters. */
617 PRINT ("_");
618 print_uint64 (rdm, depth);
619 }
620}
621
622/* Demangling functions. */
623
624static void demangle_binder (struct rust_demangler *rdm);
625static void demangle_path (struct rust_demangler *rdm, int in_value);
626static void demangle_generic_arg (struct rust_demangler *rdm);
627static void demangle_type (struct rust_demangler *rdm);
628static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
629static void demangle_dyn_trait (struct rust_demangler *rdm);
630static void demangle_const (struct rust_demangler *rdm);
631static void demangle_const_uint (struct rust_demangler *rdm);
632static void demangle_const_int (struct rust_demangler *rdm);
633static void demangle_const_bool (struct rust_demangler *rdm);
634static void demangle_const_char (struct rust_demangler *rdm);
635
636/* Optionally enter a binder ('G') for late-bound lifetimes,
637 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
638 to the caller (via depth level, which the caller should reset). */
639static void
640demangle_binder (struct rust_demangler *rdm)
641{
642 uint64_t i, bound_lifetimes;
643
644 if (rdm->errored)
645 return;
646
647 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
648 if (bound_lifetimes > 0)
649 {
650 PRINT ("for<");
651 for (i = 0; i < bound_lifetimes; i++)
652 {
653 if (i > 0)
654 PRINT (", ");
655 rdm->bound_lifetime_depth++;
656 print_lifetime_from_index (rdm, 1);
657 }
658 PRINT ("> ");
659 }
660}
661
662static void
663demangle_path (struct rust_demangler *rdm, int in_value)
664{
665 char tag, ns;
666 int was_skipping_printing;
667 size_t i, backref, old_next;
668 uint64_t dis;
669 struct rust_mangled_ident name;
670
671 if (rdm->errored)
672 return;
673
674 switch (tag = next (rdm))
675 {
676 case 'C':
677 dis = parse_disambiguator (rdm);
678 name = parse_ident (rdm);
679
680 print_ident (rdm, name);
681 if (rdm->verbose)
682 {
683 PRINT ("[");
684 print_uint64_hex (rdm, dis);
685 PRINT ("]");
686 }
687 break;
688 case 'N':
689 ns = next (rdm);
690 if (!ISLOWER (ns) && !ISUPPER (ns))
691 {
692 rdm->errored = 1;
693 return;
694 }
695
696 demangle_path (rdm, in_value);
697
698 dis = parse_disambiguator (rdm);
699 name = parse_ident (rdm);
700
701 if (ISUPPER (ns))
702 {
703 /* Special namespaces, like closures and shims. */
704 PRINT ("::{");
705 switch (ns)
706 {
707 case 'C':
708 PRINT ("closure");
709 break;
710 case 'S':
711 PRINT ("shim");
712 break;
713 default:
714 print_str (rdm, &ns, 1);
715 }
716 if (name.ascii || name.punycode)
717 {
718 PRINT (":");
719 print_ident (rdm, name);
720 }
721 PRINT ("#");
722 print_uint64 (rdm, dis);
723 PRINT ("}");
724 }
725 else
726 {
727 /* Implementation-specific/unspecified namespaces. */
728
729 if (name.ascii || name.punycode)
730 {
731 PRINT ("::");
732 print_ident (rdm, name);
733 }
734 }
735 break;
736 case 'M':
737 case 'X':
738 /* Ignore the `impl`'s own path.*/
739 parse_disambiguator (rdm);
740 was_skipping_printing = rdm->skipping_printing;
741 rdm->skipping_printing = 1;
742 demangle_path (rdm, in_value);
743 rdm->skipping_printing = was_skipping_printing;
744 /* fallthrough */
745 case 'Y':
746 PRINT ("<");
747 demangle_type (rdm);
748 if (tag != 'M')
749 {
750 PRINT (" as ");
751 demangle_path (rdm, 0);
752 }
753 PRINT (">");
754 break;
755 case 'I':
756 demangle_path (rdm, in_value);
757 if (in_value)
758 PRINT ("::");
759 PRINT ("<");
760 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
761 {
762 if (i > 0)
763 PRINT (", ");
764 demangle_generic_arg (rdm);
765 }
766 PRINT (">");
767 break;
768 case 'B':
769 backref = parse_integer_62 (rdm);
770 if (!rdm->skipping_printing)
771 {
772 old_next = rdm->next;
773 rdm->next = backref;
774 demangle_path (rdm, in_value);
775 rdm->next = old_next;
776 }
777 break;
778 default:
779 rdm->errored = 1;
780 return;
781 }
782}
783
784static void
785demangle_generic_arg (struct rust_demangler *rdm)
786{
787 uint64_t lt;
788 if (eat (rdm, 'L'))
789 {
790 lt = parse_integer_62 (rdm);
791 print_lifetime_from_index (rdm, lt);
792 }
793 else if (eat (rdm, 'K'))
794 demangle_const (rdm);
795 else
796 demangle_type (rdm);
797}
798
799static const char *
800basic_type (char tag)
801{
802 switch (tag)
803 {
804 case 'b':
805 return "bool";
806 case 'c':
807 return "char";
808 case 'e':
809 return "str";
810 case 'u':
811 return "()";
812 case 'a':
813 return "i8";
814 case 's':
815 return "i16";
816 case 'l':
817 return "i32";
818 case 'x':
819 return "i64";
820 case 'n':
821 return "i128";
822 case 'i':
823 return "isize";
824 case 'h':
825 return "u8";
826 case 't':
827 return "u16";
828 case 'm':
829 return "u32";
830 case 'y':
831 return "u64";
832 case 'o':
833 return "u128";
834 case 'j':
835 return "usize";
836 case 'f':
837 return "f32";
838 case 'd':
839 return "f64";
840 case 'z':
841 return "!";
842 case 'p':
843 return "_";
844 case 'v':
845 return "...";
846
847 default:
848 return NULL;
849 }
850}
851
852static void
853demangle_type (struct rust_demangler *rdm)
854{
855 char tag;
856 size_t i, old_next, backref;
857 uint64_t lt, old_bound_lifetime_depth;
858 const char *basic;
859 struct rust_mangled_ident abi;
860
861 if (rdm->errored)
862 return;
863
864 tag = next (rdm);
865
866 basic = basic_type (tag);
867 if (basic)
868 {
869 PRINT (basic);
870 return;
871 }
872
873 switch (tag)
874 {
875 case 'R':
876 case 'Q':
877 PRINT ("&");
878 if (eat (rdm, 'L'))
879 {
880 lt = parse_integer_62 (rdm);
881 if (lt)
882 {
883 print_lifetime_from_index (rdm, lt);
884 PRINT (" ");
885 }
886 }
887 if (tag != 'R')
888 PRINT ("mut ");
889 demangle_type (rdm);
890 break;
891 case 'P':
892 case 'O':
893 PRINT ("*");
894 if (tag != 'P')
895 PRINT ("mut ");
896 else
897 PRINT ("const ");
898 demangle_type (rdm);
899 break;
900 case 'A':
901 case 'S':
902 PRINT ("[");
903 demangle_type (rdm);
904 if (tag == 'A')
905 {
906 PRINT ("; ");
907 demangle_const (rdm);
908 }
909 PRINT ("]");
910 break;
911 case 'T':
912 PRINT ("(");
913 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
914 {
915 if (i > 0)
916 PRINT (", ");
917 demangle_type (rdm);
918 }
919 if (i == 1)
920 PRINT (",");
921 PRINT (")");
922 break;
923 case 'F':
924 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
925 demangle_binder (rdm);
926
927 if (eat (rdm, 'U'))
928 PRINT ("unsafe ");
929
930 if (eat (rdm, 'K'))
931 {
932 if (eat (rdm, 'C'))
933 {
934 abi.ascii = "C";
935 abi.ascii_len = 1;
936 }
937 else
938 {
939 abi = parse_ident (rdm);
940 if (!abi.ascii || abi.punycode)
941 {
942 rdm->errored = 1;
943 goto restore;
944 }
945 }
946
947 PRINT ("extern \"");
948
949 /* If the ABI had any `-`, they were replaced with `_`,
950 so the parts between `_` have to be re-joined with `-`. */
951 for (i = 0; i < abi.ascii_len; i++)
952 {
953 if (abi.ascii[i] == '_')
954 {
955 print_str (rdm, abi.ascii, i);
956 PRINT ("-");
957 abi.ascii += i + 1;
958 abi.ascii_len -= i + 1;
959 i = 0;
960 }
961 }
962 print_str (rdm, abi.ascii, abi.ascii_len);
963
964 PRINT ("\" ");
965 }
966
967 PRINT ("fn(");
968 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
969 {
970 if (i > 0)
971 PRINT (", ");
972 demangle_type (rdm);
973 }
974 PRINT (")");
975
976 if (eat (rdm, 'u'))
977 {
978 /* Skip printing the return type if it's 'u', i.e. `()`. */
979 }
980 else
981 {
982 PRINT (" -> ");
983 demangle_type (rdm);
984 }
985
986 /* Restore `bound_lifetime_depth` to outside the binder. */
987 restore:
988 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
989 break;
990 case 'D':
991 PRINT ("dyn ");
992
993 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
994 demangle_binder (rdm);
995
996 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
997 {
998 if (i > 0)
999 PRINT (" + ");
1000 demangle_dyn_trait (rdm);
1001 }
1002
1003 /* Restore `bound_lifetime_depth` to outside the binder. */
1004 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1005
1006 if (!eat (rdm, 'L'))
1007 {
1008 rdm->errored = 1;
1009 return;
1010 }
1011 lt = parse_integer_62 (rdm);
1012 if (lt)
1013 {
1014 PRINT (" + ");
1015 print_lifetime_from_index (rdm, lt);
1016 }
1017 break;
1018 case 'B':
1019 backref = parse_integer_62 (rdm);
1020 if (!rdm->skipping_printing)
1021 {
1022 old_next = rdm->next;
1023 rdm->next = backref;
1024 demangle_type (rdm);
1025 rdm->next = old_next;
1026 }
1027 break;
1028 default:
1029 /* Go back to the tag, so `demangle_path` also sees it. */
1030 rdm->next--;
1031 demangle_path (rdm, 0);
1032 }
1033}
1034
1035/* A trait in a trait object may have some "existential projections"
1036 (i.e. associated type bindings) after it, which should be printed
1037 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1038 To this end, this method will keep the `<...>` of an 'I' path
1039 open, by omitting the `>`, and return `Ok(true)` in that case. */
1040static int
1041demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1042{
1043 int open;
1044 size_t i, old_next, backref;
1045
1046 open = 0;
1047
1048 if (rdm->errored)
1049 return open;
1050
1051 if (eat (rdm, 'B'))
1052 {
1053 backref = parse_integer_62 (rdm);
1054 if (!rdm->skipping_printing)
1055 {
1056 old_next = rdm->next;
1057 rdm->next = backref;
1058 open = demangle_path_maybe_open_generics (rdm);
1059 rdm->next = old_next;
1060 }
1061 }
1062 else if (eat (rdm, 'I'))
1063 {
1064 demangle_path (rdm, 0);
1065 PRINT ("<");
1066 open = 1;
1067 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1068 {
1069 if (i > 0)
1070 PRINT (", ");
1071 demangle_generic_arg (rdm);
1072 }
1073 }
1074 else
1075 demangle_path (rdm, 0);
1076 return open;
1077}
1078
1079static void
1080demangle_dyn_trait (struct rust_demangler *rdm)
1081{
1082 int open;
1083 struct rust_mangled_ident name;
1084
1085 if (rdm->errored)
1086 return;
1087
1088 open = demangle_path_maybe_open_generics (rdm);
1089
1090 while (eat (rdm, 'p'))
1091 {
1092 if (!open)
1093 PRINT ("<");
1094 else
1095 PRINT (", ");
1096 open = 1;
1097
1098 name = parse_ident (rdm);
1099 print_ident (rdm, name);
1100 PRINT (" = ");
1101 demangle_type (rdm);
1102 }
1103
1104 if (open)
1105 PRINT (">");
1106}
1107
1108static void
1109demangle_const (struct rust_demangler *rdm)
1110{
1111 char ty_tag;
1112 size_t old_next, backref;
1113
1114 if (rdm->errored)
1115 return;
1116
1117 if (eat (rdm, 'B'))
1118 {
1119 backref = parse_integer_62 (rdm);
1120 if (!rdm->skipping_printing)
1121 {
1122 old_next = rdm->next;
1123 rdm->next = backref;
1124 demangle_const (rdm);
1125 rdm->next = old_next;
1126 }
1127 return;
1128 }
1129
1130 ty_tag = next (rdm);
1131 switch (ty_tag)
1132 {
1133 /* Placeholder. */
1134 case 'p':
1135 PRINT ("_");
1136 return;
1137
1138 /* Unsigned integer types. */
1139 case 'h':
1140 case 't':
1141 case 'm':
1142 case 'y':
1143 case 'o':
1144 case 'j':
1145 demangle_const_uint (rdm);
1146 break;
1147
1148 /* Signed integer types. */
1149 case 'a':
1150 case 's':
1151 case 'l':
1152 case 'x':
1153 case 'n':
1154 case 'i':
1155 demangle_const_int (rdm);
1156 break;
1157
1158 /* Boolean. */
1159 case 'b':
1160 demangle_const_bool (rdm);
1161 break;
1162
1163 /* Character. */
1164 case 'c':
1165 demangle_const_char (rdm);
1166 break;
1167
1168 default:
1169 rdm->errored = 1;
1170 return;
1171 }
1172
1173 if (rdm->errored)
1174 return;
1175
1176 if (rdm->verbose)
1177 {
1178 PRINT (": ");
1179 PRINT (basic_type (ty_tag));
1180 }
1181}
1182
1183static void
1184demangle_const_uint (struct rust_demangler *rdm)
1185{
1186 size_t hex_len;
1187 uint64_t value;
1188
1189 if (rdm->errored)
1190 return;
1191
1192 hex_len = parse_hex_nibbles (rdm, &value);
1193
1194 if (hex_len > 16)
1195 {
1196 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1197 PRINT ("0x");
1198 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1199 }
1200 else if (hex_len > 0)
1201 print_uint64 (rdm, value);
1202 else
1203 rdm->errored = 1;
1204}
1205
1206static void
1207demangle_const_int (struct rust_demangler *rdm)
1208{
1209 if (eat (rdm, 'n'))
1210 PRINT ("-");
1211 demangle_const_uint (rdm);
1212}
1213
1214static void
1215demangle_const_bool (struct rust_demangler *rdm)
1216{
1217 uint64_t value;
1218
1219 if (parse_hex_nibbles (rdm, &value) != 1)
1220 {
1221 rdm->errored = 1;
1222 return;
1223 }
1224
1225 if (value == 0)
1226 PRINT ("false");
1227 else if (value == 1)
1228 PRINT ("true");
1229 else
1230 rdm->errored = 1;
1231}
1232
1233static void
1234demangle_const_char (struct rust_demangler *rdm)
1235{
1236 size_t hex_len;
1237 uint64_t value;
1238
1239 hex_len = parse_hex_nibbles (rdm, &value);
1240
1241 if (hex_len == 0 || hex_len > 8)
1242 {
1243 rdm->errored = 1;
1244 return;
1245 }
1246
1247 /* Match Rust's character "debug" output as best as we can. */
1248 PRINT ("'");
1249 if (value == '\t')
1250 PRINT ("\\t");
1251 else if (value == '\r')
1252 PRINT ("\\r");
1253 else if (value == '\n')
1254 PRINT ("\\n");
1255 else if (value > ' ' && value < '~')
1256 /* Rust also considers many non-ASCII codepoints to be printable, but
1257 that logic is not easily ported to C. */
1258 print_str (rdm, (char *) &value, 1);
1259 else
1260 {
1261 PRINT ("\\u{");
1262 print_uint64_hex (rdm, value);
1263 PRINT ("}");
1264 }
1265 PRINT ("'");
32fc3719
EMB
1266}
1267
1268/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1269 The hex digits must contain at least 5 distinct digits. */
1270static int
1271is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1272{
1273 uint16_t seen;
1274 int nibble;
1275 size_t i, count;
1276
1277 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1278 return 0;
1279
1280 seen = 0;
1281 for (i = 0; i < 16; i++)
1282 {
1283 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1284 if (nibble < 0)
1285 return 0;
1286 seen |= (uint16_t)1 << nibble;
1287 }
1288
1289 /* Count how many distinct digits were seen. */
1290 count = 0;
1291 while (seen)
1292 {
1293 if (seen & 1)
1294 count++;
1295 seen >>= 1;
1296 }
1297
1298 return count >= 5;
1299}
1300
1301int
1302rust_demangle_callback (const char *mangled, int options,
1303 demangle_callbackref callback, void *opaque)
1304{
1305 const char *p;
1306 struct rust_demangler rdm;
1307 struct rust_mangled_ident ident;
1308
1309 rdm.sym = mangled;
1310 rdm.sym_len = 0;
1311
1312 rdm.callback_opaque = opaque;
1313 rdm.callback = callback;
1314
1315 rdm.next = 0;
1316 rdm.errored = 0;
84096498 1317 rdm.skipping_printing = 0;
32fc3719
EMB
1318 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1319 rdm.version = 0;
84096498 1320 rdm.bound_lifetime_depth = 0;
32fc3719 1321
84096498
EMB
1322 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1323 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1324 rdm.sym += 2;
1325 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
32fc3719
EMB
1326 {
1327 rdm.sym += 3;
1328 rdm.version = -1;
1329 }
1330 else
1331 return 0;
1332
84096498
EMB
1333 /* Paths (v0) always start with uppercase characters. */
1334 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1335 return 0;
1336
1337 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
32fc3719
EMB
1338 for (p = rdm.sym; *p; p++)
1339 {
1340 rdm.sym_len++;
1341
1342 if (*p == '_' || ISALNUM (*p))
1343 continue;
1344
84096498 1345 /* Legacy Rust symbols can also contain [.:$] characters. */
32fc3719
EMB
1346 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1347 continue;
1348
1349 return 0;
1350 }
1351
1352 /* Legacy Rust symbols need to be handled separately. */
1353 if (rdm.version == -1)
1354 {
1355 /* Legacy Rust symbols always end with E. */
1356 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1357 return 0;
1358 rdm.sym_len--;
1359
1360 /* Legacy Rust symbols also always end with a path segment
1361 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1362 This early check, before any parse_ident calls, should
1363 quickly filter out most C++ symbols unrelated to Rust. */
1364 if (!(rdm.sym_len > 19
1365 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1366 return 0;
1367
1368 do
1369 {
1370 ident = parse_ident (&rdm);
1371 if (rdm.errored || !ident.ascii)
1372 return 0;
1373 }
1374 while (rdm.next < rdm.sym_len);
1375
1376 /* The last path segment should be the hash. */
1377 if (!is_legacy_prefixed_hash (ident))
1378 return 0;
1379
1380 /* Reset the state for a second pass, to print the symbol. */
1381 rdm.next = 0;
1382 if (!rdm.verbose && rdm.sym_len > 19)
1383 {
1384 /* Hide the last segment, containing the hash, if not verbose. */
1385 rdm.sym_len -= 19;
1386 }
1387
1388 do
1389 {
1390 if (rdm.next > 0)
1391 print_str (&rdm, "::", 2);
1392
1393 ident = parse_ident (&rdm);
1394 print_ident (&rdm, ident);
1395 }
1396 while (rdm.next < rdm.sym_len);
1397 }
1398 else
84096498
EMB
1399 {
1400 demangle_path (&rdm, 1);
1401
1402 /* Skip instantiating crate. */
1403 if (!rdm.errored && rdm.next < rdm.sym_len)
1404 {
1405 rdm.skipping_printing = 1;
1406 demangle_path (&rdm, 0);
1407 }
1408
1409 /* It's an error to not reach the end. */
1410 rdm.errored |= rdm.next != rdm.sym_len;
1411 }
32fc3719
EMB
1412
1413 return !rdm.errored;
1414}
1415
1416/* Growable string buffers. */
1417struct str_buf
1418{
1419 char *ptr;
1420 size_t len;
1421 size_t cap;
1422 int errored;
1423};
1424
1425static void
1426str_buf_reserve (struct str_buf *buf, size_t extra)
1427{
1428 size_t available, min_new_cap, new_cap;
1429 char *new_ptr;
1430
1431 /* Allocation failed before. */
1432 if (buf->errored)
1433 return;
1434
1435 available = buf->cap - buf->len;
1436
1437 if (extra <= available)
1438 return;
1439
1440 min_new_cap = buf->cap + (extra - available);
1441
1442 /* Check for overflows. */
1443 if (min_new_cap < buf->cap)
1444 {
1445 buf->errored = 1;
1446 return;
1447 }
1448
1449 new_cap = buf->cap;
1450
1451 if (new_cap == 0)
1452 new_cap = 4;
1453
1454 /* Double capacity until sufficiently large. */
1455 while (new_cap < min_new_cap)
1456 {
1457 new_cap *= 2;
1458
1459 /* Check for overflows. */
1460 if (new_cap < buf->cap)
1461 {
1462 buf->errored = 1;
1463 return;
1464 }
1465 }
1466
1467 new_ptr = (char *)realloc (buf->ptr, new_cap);
1468 if (new_ptr == NULL)
1469 {
1470 free (buf->ptr);
1471 buf->ptr = NULL;
1472 buf->len = 0;
1473 buf->cap = 0;
1474 buf->errored = 1;
1475 }
1476 else
1477 {
1478 buf->ptr = new_ptr;
1479 buf->cap = new_cap;
1480 }
1481}
1482
1483static void
1484str_buf_append (struct str_buf *buf, const char *data, size_t len)
1485{
1486 str_buf_reserve (buf, len);
1487 if (buf->errored)
1488 return;
1489
1490 memcpy (buf->ptr + buf->len, data, len);
1491 buf->len += len;
1492}
1493
1494static void
1495str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1496{
1497 str_buf_append ((struct str_buf *)opaque, data, len);
1498}
1499
1500char *
1501rust_demangle (const char *mangled, int options)
1502{
1503 struct str_buf out;
1504 int success;
1505
1506 out.ptr = NULL;
1507 out.len = 0;
1508 out.cap = 0;
1509 out.errored = 0;
1510
1511 success = rust_demangle_callback (mangled, options,
1512 str_buf_demangle_callback, &out);
1513
1514 if (!success)
1515 {
1516 free (out.ptr);
1517 return NULL;
1518 }
1519
1520 str_buf_append (&out, "\0", 1);
1521 return out.ptr;
1522}