]>
Commit | Line | Data |
---|---|---|
bca8957b | 1 | /* Shared functions related to mangling names for the GNU compiler |
2 | for the Java(TM) language. | |
d353bf18 | 3 | Copyright (C) 2001-2015 Free Software Foundation, Inc. |
bca8957b | 4 | |
7d82ed5e | 5 | This file is part of GCC. |
bca8957b | 6 | |
7d82ed5e | 7 | GCC is free software; you can redistribute it and/or modify |
bca8957b | 8 | it under the terms of the GNU General Public License as published by |
e4b52719 | 9 | the Free Software Foundation; either version 3, or (at your option) |
bca8957b | 10 | any later version. |
11 | ||
7d82ed5e | 12 | GCC is distributed in the hope that it will be useful, |
bca8957b | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
e4b52719 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. | |
bca8957b | 20 | |
21 | Java and all Java-based marks are trademarks or registered trademarks | |
22 | of Sun Microsystems, Inc. in the United States and other countries. | |
23 | The Free Software Foundation is independent of Sun Microsystems, Inc. */ | |
24 | ||
25 | /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */ | |
26 | ||
27 | #include "config.h" | |
28 | #include "system.h" | |
805e22b2 | 29 | #include "coretypes.h" |
bca8957b | 30 | #include "jcf.h" |
b20a8bb4 | 31 | #include "alias.h" |
32 | #include "symtab.h" | |
33 | #include "options.h" | |
bca8957b | 34 | #include "tree.h" |
35 | #include "java-tree.h" | |
36 | #include "obstack.h" | |
0b205f4c | 37 | #include "diagnostic-core.h" |
bca8957b | 38 | |
6852521a | 39 | static void append_unicode_mangled_name (const char *, int); |
bca8957b | 40 | #ifndef HAVE_AS_UTF8 |
6852521a | 41 | static int unicode_mangling_length (const char *, int); |
bca8957b | 42 | #endif |
43 | ||
44 | extern struct obstack *mangle_obstack; | |
45 | ||
fce8df10 | 46 | static int |
47 | utf8_cmp (const unsigned char *str, int length, const char *name) | |
48 | { | |
49 | const unsigned char *limit = str + length; | |
50 | int i; | |
51 | ||
52 | for (i = 0; name[i]; ++i) | |
53 | { | |
54 | int ch = UTF8_GET (str, limit); | |
55 | if (ch != name[i]) | |
56 | return ch - name[i]; | |
57 | } | |
58 | ||
59 | return str == limit ? 0 : 1; | |
60 | } | |
61 | ||
62 | /* A sorted list of all C++ keywords. If you change this, be sure | |
63 | also to change the list in | |
64 | libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */ | |
65 | static const char *const cxx_keywords[] = | |
66 | { | |
67 | "_Complex", | |
68 | "__alignof", | |
69 | "__alignof__", | |
70 | "__asm", | |
71 | "__asm__", | |
72 | "__attribute", | |
73 | "__attribute__", | |
74 | "__builtin_va_arg", | |
75 | "__complex", | |
76 | "__complex__", | |
77 | "__const", | |
78 | "__const__", | |
79 | "__extension__", | |
80 | "__imag", | |
81 | "__imag__", | |
82 | "__inline", | |
83 | "__inline__", | |
84 | "__label__", | |
85 | "__null", | |
86 | "__real", | |
87 | "__real__", | |
88 | "__restrict", | |
89 | "__restrict__", | |
90 | "__signed", | |
91 | "__signed__", | |
92 | "__typeof", | |
93 | "__typeof__", | |
94 | "__volatile", | |
95 | "__volatile__", | |
96 | "and", | |
97 | "and_eq", | |
98 | "asm", | |
99 | "auto", | |
100 | "bitand", | |
101 | "bitor", | |
102 | "bool", | |
103 | "break", | |
104 | "case", | |
105 | "catch", | |
106 | "char", | |
107 | "class", | |
108 | "compl", | |
109 | "const", | |
110 | "const_cast", | |
111 | "continue", | |
112 | "default", | |
113 | "delete", | |
114 | "do", | |
115 | "double", | |
116 | "dynamic_cast", | |
117 | "else", | |
118 | "enum", | |
119 | "explicit", | |
120 | "export", | |
121 | "extern", | |
122 | "false", | |
123 | "float", | |
124 | "for", | |
125 | "friend", | |
126 | "goto", | |
127 | "if", | |
128 | "inline", | |
129 | "int", | |
130 | "long", | |
131 | "mutable", | |
132 | "namespace", | |
133 | "new", | |
134 | "not", | |
135 | "not_eq", | |
136 | "operator", | |
137 | "or", | |
138 | "or_eq", | |
139 | "private", | |
140 | "protected", | |
141 | "public", | |
142 | "register", | |
143 | "reinterpret_cast", | |
144 | "return", | |
145 | "short", | |
146 | "signed", | |
147 | "sizeof", | |
148 | "static", | |
149 | "static_cast", | |
150 | "struct", | |
151 | "switch", | |
152 | "template", | |
153 | "this", | |
154 | "throw", | |
155 | "true", | |
156 | "try", | |
157 | "typedef", | |
158 | "typeid", | |
159 | "typename", | |
160 | "typeof", | |
161 | "union", | |
162 | "unsigned", | |
163 | "using", | |
164 | "virtual", | |
165 | "void", | |
166 | "volatile", | |
167 | "wchar_t", | |
168 | "while", | |
169 | "xor", | |
170 | "xor_eq" | |
171 | }; | |
172 | ||
173 | /* Return true if NAME is a C++ keyword. */ | |
174 | int | |
175 | cxx_keyword_p (const char *name, int length) | |
176 | { | |
177 | int last = ARRAY_SIZE (cxx_keywords); | |
178 | int first = 0; | |
179 | int mid = (last + first) / 2; | |
180 | int old = -1; | |
181 | ||
182 | for (mid = (last + first) / 2; | |
183 | mid != old; | |
184 | old = mid, mid = (last + first) / 2) | |
185 | { | |
186 | int kwl = strlen (cxx_keywords[mid]); | |
187 | int min_length = kwl > length ? length : kwl; | |
188 | int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); | |
189 | ||
190 | if (r == 0) | |
191 | { | |
192 | int i; | |
193 | /* We've found a match if all the remaining characters are `$'. */ | |
194 | for (i = min_length; i < length && name[i] == '$'; ++i) | |
195 | ; | |
196 | if (i == length) | |
197 | return 1; | |
198 | r = 1; | |
199 | } | |
200 | ||
201 | if (r < 0) | |
202 | last = mid; | |
203 | else | |
204 | first = mid; | |
205 | } | |
206 | return 0; | |
207 | } | |
208 | ||
209 | /* If NAME happens to be a C++ keyword, add `$'. */ | |
210 | #define MANGLE_CXX_KEYWORDS(NAME, LEN) \ | |
211 | do \ | |
212 | { \ | |
213 | if (cxx_keyword_p ((NAME), (LEN))) \ | |
214 | { \ | |
215 | char *tmp_buf = (char *)alloca ((LEN)+1); \ | |
216 | memcpy (tmp_buf, (NAME), (LEN)); \ | |
217 | tmp_buf[LEN]= '$'; \ | |
218 | (NAME) = tmp_buf; \ | |
219 | (LEN)++; \ | |
220 | } \ | |
221 | } \ | |
222 | while (0) | |
223 | ||
224 | ||
bca8957b | 225 | /* If the assembler doesn't support UTF8 in symbol names, some |
226 | characters might need to be escaped. */ | |
227 | ||
228 | #ifndef HAVE_AS_UTF8 | |
229 | ||
230 | /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string | |
231 | appropriately mangled (with Unicode escapes if needed) to | |
232 | MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so | |
233 | frequently that they could be cached. */ | |
234 | ||
235 | void | |
2883a3ed | 236 | append_gpp_mangled_name (const char *name, int len) |
bca8957b | 237 | { |
fce8df10 | 238 | int encoded_len, needs_escapes; |
bca8957b | 239 | char buf[6]; |
240 | ||
fce8df10 | 241 | MANGLE_CXX_KEYWORDS (name, len); |
242 | ||
243 | encoded_len = unicode_mangling_length (name, len); | |
244 | needs_escapes = encoded_len > 0; | |
245 | ||
bca8957b | 246 | sprintf (buf, "%d", (needs_escapes ? encoded_len : len)); |
247 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
248 | ||
249 | if (needs_escapes) | |
250 | append_unicode_mangled_name (name, len); | |
251 | else | |
252 | obstack_grow (mangle_obstack, name, len); | |
253 | } | |
254 | ||
255 | /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string | |
256 | appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK. | |
257 | Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in | |
193252e8 | 258 | which case `__U' will be mangled `__U_'. */ |
bca8957b | 259 | |
260 | static void | |
2883a3ed | 261 | append_unicode_mangled_name (const char *name, int len) |
bca8957b | 262 | { |
263 | const unsigned char *ptr; | |
264 | const unsigned char *limit = (const unsigned char *)name + len; | |
265 | int uuU = 0; | |
266 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
267 | { | |
268 | int ch = UTF8_GET(ptr, limit); | |
269 | ||
193252e8 | 270 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
df6cfbc5 | 271 | { |
272 | obstack_1grow (mangle_obstack, ch); | |
273 | uuU = 0; | |
274 | } | |
bca8957b | 275 | /* Everything else needs encoding */ |
276 | else | |
277 | { | |
278 | char buf [9]; | |
279 | if (ch == '_' || ch == 'U') | |
280 | { | |
281 | /* Prepare to recognize __U */ | |
282 | if (ch == '_' && (uuU < 3)) | |
283 | { | |
284 | uuU++; | |
285 | obstack_1grow (mangle_obstack, ch); | |
286 | } | |
287 | /* We recognize __U that we wish to encode | |
288 | __U_. Finish the encoding. */ | |
289 | else if (ch == 'U' && (uuU == 2)) | |
290 | { | |
291 | uuU = 0; | |
292 | obstack_grow (mangle_obstack, "U_", 2); | |
293 | } | |
53c42c23 | 294 | /* Otherwise, just reset uuU and emit the character we |
295 | have. */ | |
296 | else | |
297 | { | |
298 | uuU = 0; | |
299 | obstack_1grow (mangle_obstack, ch); | |
300 | } | |
bca8957b | 301 | continue; |
302 | } | |
303 | sprintf (buf, "__U%x_", ch); | |
304 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
305 | uuU = 0; | |
306 | } | |
307 | } | |
308 | } | |
309 | ||
310 | /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the | |
311 | length of the string as mangled (a la g++) including Unicode | |
312 | escapes. If no escapes are needed, return 0. */ | |
313 | ||
314 | static int | |
2883a3ed | 315 | unicode_mangling_length (const char *name, int len) |
bca8957b | 316 | { |
317 | const unsigned char *ptr; | |
318 | const unsigned char *limit = (const unsigned char *)name + len; | |
319 | int need_escapes = 0; /* Whether we need an escape or not */ | |
320 | int num_chars = 0; /* Number of characters in the mangled name */ | |
321 | int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ | |
322 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
323 | { | |
324 | int ch = UTF8_GET(ptr, limit); | |
325 | ||
326 | if (ch < 0) | |
327 | error ("internal error - invalid Utf8 name"); | |
193252e8 | 328 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
df6cfbc5 | 329 | { |
330 | num_chars++; | |
331 | uuU = 0; | |
332 | } | |
bca8957b | 333 | /* Everything else needs encoding */ |
334 | else | |
335 | { | |
336 | int encoding_length = 2; | |
337 | ||
338 | if (ch == '_' || ch == 'U') | |
339 | { | |
53c42c23 | 340 | /* It's always at least one character. */ |
341 | num_chars++; | |
342 | ||
bca8957b | 343 | /* Prepare to recognize __U */ |
344 | if (ch == '_' && (uuU < 3)) | |
53c42c23 | 345 | uuU++; |
346 | ||
347 | /* We recognize __U that we wish to encode __U_, we | |
348 | count one more character. */ | |
bca8957b | 349 | else if (ch == 'U' && (uuU == 2)) |
350 | { | |
53c42c23 | 351 | num_chars++; |
bca8957b | 352 | need_escapes = 1; |
353 | uuU = 0; | |
354 | } | |
53c42c23 | 355 | /* Otherwise, just reset uuU */ |
356 | else | |
357 | uuU = 0; | |
358 | ||
bca8957b | 359 | continue; |
360 | } | |
361 | ||
362 | if (ch > 0xff) | |
363 | encoding_length++; | |
364 | if (ch > 0xfff) | |
365 | encoding_length++; | |
366 | ||
367 | num_chars += (4 + encoding_length); | |
368 | need_escapes = 1; | |
369 | uuU = 0; | |
370 | } | |
371 | } | |
372 | if (need_escapes) | |
373 | return num_chars; | |
374 | else | |
375 | return 0; | |
376 | } | |
377 | ||
378 | #else | |
379 | ||
380 | /* The assembler supports UTF8, we don't use escapes. Mangling is | |
381 | simply <N>NAME. <N> is the number of UTF8 encoded characters that | |
382 | are found in NAME. Note that `java', `lang' and `Object' are used | |
383 | so frequently that they could be cached. */ | |
384 | ||
385 | void | |
2883a3ed | 386 | append_gpp_mangled_name (const char *name, int len) |
bca8957b | 387 | { |
388 | const unsigned char *ptr; | |
fce8df10 | 389 | const unsigned char *limit; |
bca8957b | 390 | int encoded_len; |
391 | char buf [6]; | |
392 | ||
fce8df10 | 393 | MANGLE_CXX_KEYWORDS (name, len); |
394 | ||
395 | limit = (const unsigned char *)name + len; | |
396 | ||
bca8957b | 397 | /* Compute the length of the string we wish to mangle. */ |
398 | for (encoded_len = 0, ptr = (const unsigned char *) name; | |
399 | ptr < limit; encoded_len++) | |
400 | { | |
401 | int ch = UTF8_GET(ptr, limit); | |
402 | ||
403 | if (ch < 0) | |
404 | error ("internal error - invalid Utf8 name"); | |
405 | } | |
406 | ||
407 | sprintf (buf, "%d", encoded_len); | |
408 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
409 | obstack_grow (mangle_obstack, name, len); | |
410 | } | |
411 | ||
412 | #endif /* HAVE_AS_UTF8 */ |