]>
Commit | Line | Data |
---|---|---|
1b43b6be APB |
1 | /* Shared functions related to mangling names for the GNU compiler |
2 | for the Java(TM) language. | |
5624e564 | 3 | Copyright (C) 2001-2015 Free Software Foundation, Inc. |
1b43b6be | 4 | |
f309ff0a | 5 | This file is part of GCC. |
1b43b6be | 6 | |
f309ff0a | 7 | GCC is free software; you can redistribute it and/or modify |
1b43b6be | 8 | it under the terms of the GNU General Public License as published by |
8328d52a | 9 | the Free Software Foundation; either version 3, or (at your option) |
1b43b6be APB |
10 | any later version. |
11 | ||
f309ff0a | 12 | GCC is distributed in the hope that it will be useful, |
1b43b6be APB |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8328d52a NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. | |
1b43b6be APB |
20 | |
21 | Java and all Java-based marks are trademarks or registered trademarks | |
22 | of Sun Microsystems, Inc. in the United States and other countries. | |
23 | The Free Software Foundation is independent of Sun Microsystems, Inc. */ | |
24 | ||
25 | /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */ | |
26 | ||
27 | #include "config.h" | |
28 | #include "system.h" | |
4977bab6 | 29 | #include "coretypes.h" |
1b43b6be | 30 | #include "jcf.h" |
40e23961 | 31 | #include "alias.h" |
1b43b6be | 32 | #include "tree.h" |
c7131fb2 | 33 | #include "options.h" |
1b43b6be APB |
34 | #include "java-tree.h" |
35 | #include "obstack.h" | |
718f9c0f | 36 | #include "diagnostic-core.h" |
1b43b6be | 37 | |
d2097937 | 38 | static void append_unicode_mangled_name (const char *, int); |
1b43b6be | 39 | #ifndef HAVE_AS_UTF8 |
d2097937 | 40 | static int unicode_mangling_length (const char *, int); |
1b43b6be APB |
41 | #endif |
42 | ||
43 | extern struct obstack *mangle_obstack; | |
44 | ||
3ad1aba1 AH |
45 | static int |
46 | utf8_cmp (const unsigned char *str, int length, const char *name) | |
47 | { | |
48 | const unsigned char *limit = str + length; | |
49 | int i; | |
50 | ||
51 | for (i = 0; name[i]; ++i) | |
52 | { | |
53 | int ch = UTF8_GET (str, limit); | |
54 | if (ch != name[i]) | |
55 | return ch - name[i]; | |
56 | } | |
57 | ||
58 | return str == limit ? 0 : 1; | |
59 | } | |
60 | ||
61 | /* A sorted list of all C++ keywords. If you change this, be sure | |
62 | also to change the list in | |
63 | libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */ | |
64 | static const char *const cxx_keywords[] = | |
65 | { | |
66 | "_Complex", | |
67 | "__alignof", | |
68 | "__alignof__", | |
69 | "__asm", | |
70 | "__asm__", | |
71 | "__attribute", | |
72 | "__attribute__", | |
73 | "__builtin_va_arg", | |
74 | "__complex", | |
75 | "__complex__", | |
76 | "__const", | |
77 | "__const__", | |
78 | "__extension__", | |
79 | "__imag", | |
80 | "__imag__", | |
81 | "__inline", | |
82 | "__inline__", | |
83 | "__label__", | |
84 | "__null", | |
85 | "__real", | |
86 | "__real__", | |
87 | "__restrict", | |
88 | "__restrict__", | |
89 | "__signed", | |
90 | "__signed__", | |
91 | "__typeof", | |
92 | "__typeof__", | |
93 | "__volatile", | |
94 | "__volatile__", | |
95 | "and", | |
96 | "and_eq", | |
97 | "asm", | |
98 | "auto", | |
99 | "bitand", | |
100 | "bitor", | |
101 | "bool", | |
102 | "break", | |
103 | "case", | |
104 | "catch", | |
105 | "char", | |
106 | "class", | |
107 | "compl", | |
108 | "const", | |
109 | "const_cast", | |
110 | "continue", | |
111 | "default", | |
112 | "delete", | |
113 | "do", | |
114 | "double", | |
115 | "dynamic_cast", | |
116 | "else", | |
117 | "enum", | |
118 | "explicit", | |
119 | "export", | |
120 | "extern", | |
121 | "false", | |
122 | "float", | |
123 | "for", | |
124 | "friend", | |
125 | "goto", | |
126 | "if", | |
127 | "inline", | |
128 | "int", | |
129 | "long", | |
130 | "mutable", | |
131 | "namespace", | |
132 | "new", | |
133 | "not", | |
134 | "not_eq", | |
135 | "operator", | |
136 | "or", | |
137 | "or_eq", | |
138 | "private", | |
139 | "protected", | |
140 | "public", | |
141 | "register", | |
142 | "reinterpret_cast", | |
143 | "return", | |
144 | "short", | |
145 | "signed", | |
146 | "sizeof", | |
147 | "static", | |
148 | "static_cast", | |
149 | "struct", | |
150 | "switch", | |
151 | "template", | |
152 | "this", | |
153 | "throw", | |
154 | "true", | |
155 | "try", | |
156 | "typedef", | |
157 | "typeid", | |
158 | "typename", | |
159 | "typeof", | |
160 | "union", | |
161 | "unsigned", | |
162 | "using", | |
163 | "virtual", | |
164 | "void", | |
165 | "volatile", | |
166 | "wchar_t", | |
167 | "while", | |
168 | "xor", | |
169 | "xor_eq" | |
170 | }; | |
171 | ||
172 | /* Return true if NAME is a C++ keyword. */ | |
173 | int | |
174 | cxx_keyword_p (const char *name, int length) | |
175 | { | |
176 | int last = ARRAY_SIZE (cxx_keywords); | |
177 | int first = 0; | |
178 | int mid = (last + first) / 2; | |
179 | int old = -1; | |
180 | ||
181 | for (mid = (last + first) / 2; | |
182 | mid != old; | |
183 | old = mid, mid = (last + first) / 2) | |
184 | { | |
185 | int kwl = strlen (cxx_keywords[mid]); | |
186 | int min_length = kwl > length ? length : kwl; | |
187 | int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); | |
188 | ||
189 | if (r == 0) | |
190 | { | |
191 | int i; | |
192 | /* We've found a match if all the remaining characters are `$'. */ | |
193 | for (i = min_length; i < length && name[i] == '$'; ++i) | |
194 | ; | |
195 | if (i == length) | |
196 | return 1; | |
197 | r = 1; | |
198 | } | |
199 | ||
200 | if (r < 0) | |
201 | last = mid; | |
202 | else | |
203 | first = mid; | |
204 | } | |
205 | return 0; | |
206 | } | |
207 | ||
208 | /* If NAME happens to be a C++ keyword, add `$'. */ | |
209 | #define MANGLE_CXX_KEYWORDS(NAME, LEN) \ | |
210 | do \ | |
211 | { \ | |
212 | if (cxx_keyword_p ((NAME), (LEN))) \ | |
213 | { \ | |
214 | char *tmp_buf = (char *)alloca ((LEN)+1); \ | |
215 | memcpy (tmp_buf, (NAME), (LEN)); \ | |
216 | tmp_buf[LEN]= '$'; \ | |
217 | (NAME) = tmp_buf; \ | |
218 | (LEN)++; \ | |
219 | } \ | |
220 | } \ | |
221 | while (0) | |
222 | ||
223 | ||
1b43b6be APB |
224 | /* If the assembler doesn't support UTF8 in symbol names, some |
225 | characters might need to be escaped. */ | |
226 | ||
227 | #ifndef HAVE_AS_UTF8 | |
228 | ||
229 | /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string | |
230 | appropriately mangled (with Unicode escapes if needed) to | |
231 | MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so | |
232 | frequently that they could be cached. */ | |
233 | ||
234 | void | |
0a2f0c54 | 235 | append_gpp_mangled_name (const char *name, int len) |
1b43b6be | 236 | { |
3ad1aba1 | 237 | int encoded_len, needs_escapes; |
1b43b6be APB |
238 | char buf[6]; |
239 | ||
3ad1aba1 AH |
240 | MANGLE_CXX_KEYWORDS (name, len); |
241 | ||
242 | encoded_len = unicode_mangling_length (name, len); | |
243 | needs_escapes = encoded_len > 0; | |
244 | ||
1b43b6be APB |
245 | sprintf (buf, "%d", (needs_escapes ? encoded_len : len)); |
246 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
247 | ||
248 | if (needs_escapes) | |
249 | append_unicode_mangled_name (name, len); | |
250 | else | |
251 | obstack_grow (mangle_obstack, name, len); | |
252 | } | |
253 | ||
254 | /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string | |
255 | appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK. | |
256 | Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in | |
de4984af | 257 | which case `__U' will be mangled `__U_'. */ |
1b43b6be APB |
258 | |
259 | static void | |
0a2f0c54 | 260 | append_unicode_mangled_name (const char *name, int len) |
1b43b6be APB |
261 | { |
262 | const unsigned char *ptr; | |
263 | const unsigned char *limit = (const unsigned char *)name + len; | |
264 | int uuU = 0; | |
265 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
266 | { | |
267 | int ch = UTF8_GET(ptr, limit); | |
268 | ||
de4984af | 269 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
5a3a8eb1 JD |
270 | { |
271 | obstack_1grow (mangle_obstack, ch); | |
272 | uuU = 0; | |
273 | } | |
1b43b6be APB |
274 | /* Everything else needs encoding */ |
275 | else | |
276 | { | |
277 | char buf [9]; | |
278 | if (ch == '_' || ch == 'U') | |
279 | { | |
280 | /* Prepare to recognize __U */ | |
281 | if (ch == '_' && (uuU < 3)) | |
282 | { | |
283 | uuU++; | |
284 | obstack_1grow (mangle_obstack, ch); | |
285 | } | |
286 | /* We recognize __U that we wish to encode | |
287 | __U_. Finish the encoding. */ | |
288 | else if (ch == 'U' && (uuU == 2)) | |
289 | { | |
290 | uuU = 0; | |
291 | obstack_grow (mangle_obstack, "U_", 2); | |
292 | } | |
1e97aa40 APB |
293 | /* Otherwise, just reset uuU and emit the character we |
294 | have. */ | |
295 | else | |
296 | { | |
297 | uuU = 0; | |
298 | obstack_1grow (mangle_obstack, ch); | |
299 | } | |
1b43b6be APB |
300 | continue; |
301 | } | |
302 | sprintf (buf, "__U%x_", ch); | |
303 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
304 | uuU = 0; | |
305 | } | |
306 | } | |
307 | } | |
308 | ||
309 | /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the | |
310 | length of the string as mangled (a la g++) including Unicode | |
311 | escapes. If no escapes are needed, return 0. */ | |
312 | ||
313 | static int | |
0a2f0c54 | 314 | unicode_mangling_length (const char *name, int len) |
1b43b6be APB |
315 | { |
316 | const unsigned char *ptr; | |
317 | const unsigned char *limit = (const unsigned char *)name + len; | |
318 | int need_escapes = 0; /* Whether we need an escape or not */ | |
319 | int num_chars = 0; /* Number of characters in the mangled name */ | |
320 | int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ | |
321 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
322 | { | |
323 | int ch = UTF8_GET(ptr, limit); | |
324 | ||
325 | if (ch < 0) | |
326 | error ("internal error - invalid Utf8 name"); | |
de4984af | 327 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
5a3a8eb1 JD |
328 | { |
329 | num_chars++; | |
330 | uuU = 0; | |
331 | } | |
1b43b6be APB |
332 | /* Everything else needs encoding */ |
333 | else | |
334 | { | |
335 | int encoding_length = 2; | |
336 | ||
337 | if (ch == '_' || ch == 'U') | |
338 | { | |
1e97aa40 APB |
339 | /* It's always at least one character. */ |
340 | num_chars++; | |
341 | ||
1b43b6be APB |
342 | /* Prepare to recognize __U */ |
343 | if (ch == '_' && (uuU < 3)) | |
1e97aa40 APB |
344 | uuU++; |
345 | ||
346 | /* We recognize __U that we wish to encode __U_, we | |
347 | count one more character. */ | |
1b43b6be APB |
348 | else if (ch == 'U' && (uuU == 2)) |
349 | { | |
1e97aa40 | 350 | num_chars++; |
1b43b6be APB |
351 | need_escapes = 1; |
352 | uuU = 0; | |
353 | } | |
1e97aa40 APB |
354 | /* Otherwise, just reset uuU */ |
355 | else | |
356 | uuU = 0; | |
357 | ||
1b43b6be APB |
358 | continue; |
359 | } | |
360 | ||
361 | if (ch > 0xff) | |
362 | encoding_length++; | |
363 | if (ch > 0xfff) | |
364 | encoding_length++; | |
365 | ||
366 | num_chars += (4 + encoding_length); | |
367 | need_escapes = 1; | |
368 | uuU = 0; | |
369 | } | |
370 | } | |
371 | if (need_escapes) | |
372 | return num_chars; | |
373 | else | |
374 | return 0; | |
375 | } | |
376 | ||
377 | #else | |
378 | ||
379 | /* The assembler supports UTF8, we don't use escapes. Mangling is | |
380 | simply <N>NAME. <N> is the number of UTF8 encoded characters that | |
381 | are found in NAME. Note that `java', `lang' and `Object' are used | |
382 | so frequently that they could be cached. */ | |
383 | ||
384 | void | |
0a2f0c54 | 385 | append_gpp_mangled_name (const char *name, int len) |
1b43b6be APB |
386 | { |
387 | const unsigned char *ptr; | |
3ad1aba1 | 388 | const unsigned char *limit; |
1b43b6be APB |
389 | int encoded_len; |
390 | char buf [6]; | |
391 | ||
3ad1aba1 AH |
392 | MANGLE_CXX_KEYWORDS (name, len); |
393 | ||
394 | limit = (const unsigned char *)name + len; | |
395 | ||
1b43b6be APB |
396 | /* Compute the length of the string we wish to mangle. */ |
397 | for (encoded_len = 0, ptr = (const unsigned char *) name; | |
398 | ptr < limit; encoded_len++) | |
399 | { | |
400 | int ch = UTF8_GET(ptr, limit); | |
401 | ||
402 | if (ch < 0) | |
403 | error ("internal error - invalid Utf8 name"); | |
404 | } | |
405 | ||
406 | sprintf (buf, "%d", encoded_len); | |
407 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
408 | obstack_grow (mangle_obstack, name, len); | |
409 | } | |
410 | ||
411 | #endif /* HAVE_AS_UTF8 */ |