]>
Commit | Line | Data |
---|---|---|
bca8957b | 1 | /* Shared functions related to mangling names for the GNU compiler |
2 | for the Java(TM) language. | |
92468061 | 3 | Copyright (C) 2001, 2002, 2003, 2007, 2009, 2010 |
4 | Free Software Foundation, Inc. | |
bca8957b | 5 | |
7d82ed5e | 6 | This file is part of GCC. |
bca8957b | 7 | |
7d82ed5e | 8 | GCC is free software; you can redistribute it and/or modify |
bca8957b | 9 | it under the terms of the GNU General Public License as published by |
e4b52719 | 10 | the Free Software Foundation; either version 3, or (at your option) |
bca8957b | 11 | any later version. |
12 | ||
7d82ed5e | 13 | GCC is distributed in the hope that it will be useful, |
bca8957b | 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
e4b52719 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. | |
bca8957b | 21 | |
22 | Java and all Java-based marks are trademarks or registered trademarks | |
23 | of Sun Microsystems, Inc. in the United States and other countries. | |
24 | The Free Software Foundation is independent of Sun Microsystems, Inc. */ | |
25 | ||
26 | /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */ | |
27 | ||
28 | #include "config.h" | |
29 | #include "system.h" | |
805e22b2 | 30 | #include "coretypes.h" |
bca8957b | 31 | #include "jcf.h" |
32 | #include "tree.h" | |
33 | #include "java-tree.h" | |
34 | #include "obstack.h" | |
0b205f4c | 35 | #include "diagnostic-core.h" |
bca8957b | 36 | |
6852521a | 37 | static void append_unicode_mangled_name (const char *, int); |
bca8957b | 38 | #ifndef HAVE_AS_UTF8 |
6852521a | 39 | static int unicode_mangling_length (const char *, int); |
bca8957b | 40 | #endif |
41 | ||
42 | extern struct obstack *mangle_obstack; | |
43 | ||
fce8df10 | 44 | static int |
45 | utf8_cmp (const unsigned char *str, int length, const char *name) | |
46 | { | |
47 | const unsigned char *limit = str + length; | |
48 | int i; | |
49 | ||
50 | for (i = 0; name[i]; ++i) | |
51 | { | |
52 | int ch = UTF8_GET (str, limit); | |
53 | if (ch != name[i]) | |
54 | return ch - name[i]; | |
55 | } | |
56 | ||
57 | return str == limit ? 0 : 1; | |
58 | } | |
59 | ||
60 | /* A sorted list of all C++ keywords. If you change this, be sure | |
61 | also to change the list in | |
62 | libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */ | |
63 | static const char *const cxx_keywords[] = | |
64 | { | |
65 | "_Complex", | |
66 | "__alignof", | |
67 | "__alignof__", | |
68 | "__asm", | |
69 | "__asm__", | |
70 | "__attribute", | |
71 | "__attribute__", | |
72 | "__builtin_va_arg", | |
73 | "__complex", | |
74 | "__complex__", | |
75 | "__const", | |
76 | "__const__", | |
77 | "__extension__", | |
78 | "__imag", | |
79 | "__imag__", | |
80 | "__inline", | |
81 | "__inline__", | |
82 | "__label__", | |
83 | "__null", | |
84 | "__real", | |
85 | "__real__", | |
86 | "__restrict", | |
87 | "__restrict__", | |
88 | "__signed", | |
89 | "__signed__", | |
90 | "__typeof", | |
91 | "__typeof__", | |
92 | "__volatile", | |
93 | "__volatile__", | |
94 | "and", | |
95 | "and_eq", | |
96 | "asm", | |
97 | "auto", | |
98 | "bitand", | |
99 | "bitor", | |
100 | "bool", | |
101 | "break", | |
102 | "case", | |
103 | "catch", | |
104 | "char", | |
105 | "class", | |
106 | "compl", | |
107 | "const", | |
108 | "const_cast", | |
109 | "continue", | |
110 | "default", | |
111 | "delete", | |
112 | "do", | |
113 | "double", | |
114 | "dynamic_cast", | |
115 | "else", | |
116 | "enum", | |
117 | "explicit", | |
118 | "export", | |
119 | "extern", | |
120 | "false", | |
121 | "float", | |
122 | "for", | |
123 | "friend", | |
124 | "goto", | |
125 | "if", | |
126 | "inline", | |
127 | "int", | |
128 | "long", | |
129 | "mutable", | |
130 | "namespace", | |
131 | "new", | |
132 | "not", | |
133 | "not_eq", | |
134 | "operator", | |
135 | "or", | |
136 | "or_eq", | |
137 | "private", | |
138 | "protected", | |
139 | "public", | |
140 | "register", | |
141 | "reinterpret_cast", | |
142 | "return", | |
143 | "short", | |
144 | "signed", | |
145 | "sizeof", | |
146 | "static", | |
147 | "static_cast", | |
148 | "struct", | |
149 | "switch", | |
150 | "template", | |
151 | "this", | |
152 | "throw", | |
153 | "true", | |
154 | "try", | |
155 | "typedef", | |
156 | "typeid", | |
157 | "typename", | |
158 | "typeof", | |
159 | "union", | |
160 | "unsigned", | |
161 | "using", | |
162 | "virtual", | |
163 | "void", | |
164 | "volatile", | |
165 | "wchar_t", | |
166 | "while", | |
167 | "xor", | |
168 | "xor_eq" | |
169 | }; | |
170 | ||
171 | /* Return true if NAME is a C++ keyword. */ | |
172 | int | |
173 | cxx_keyword_p (const char *name, int length) | |
174 | { | |
175 | int last = ARRAY_SIZE (cxx_keywords); | |
176 | int first = 0; | |
177 | int mid = (last + first) / 2; | |
178 | int old = -1; | |
179 | ||
180 | for (mid = (last + first) / 2; | |
181 | mid != old; | |
182 | old = mid, mid = (last + first) / 2) | |
183 | { | |
184 | int kwl = strlen (cxx_keywords[mid]); | |
185 | int min_length = kwl > length ? length : kwl; | |
186 | int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); | |
187 | ||
188 | if (r == 0) | |
189 | { | |
190 | int i; | |
191 | /* We've found a match if all the remaining characters are `$'. */ | |
192 | for (i = min_length; i < length && name[i] == '$'; ++i) | |
193 | ; | |
194 | if (i == length) | |
195 | return 1; | |
196 | r = 1; | |
197 | } | |
198 | ||
199 | if (r < 0) | |
200 | last = mid; | |
201 | else | |
202 | first = mid; | |
203 | } | |
204 | return 0; | |
205 | } | |
206 | ||
207 | /* If NAME happens to be a C++ keyword, add `$'. */ | |
208 | #define MANGLE_CXX_KEYWORDS(NAME, LEN) \ | |
209 | do \ | |
210 | { \ | |
211 | if (cxx_keyword_p ((NAME), (LEN))) \ | |
212 | { \ | |
213 | char *tmp_buf = (char *)alloca ((LEN)+1); \ | |
214 | memcpy (tmp_buf, (NAME), (LEN)); \ | |
215 | tmp_buf[LEN]= '$'; \ | |
216 | (NAME) = tmp_buf; \ | |
217 | (LEN)++; \ | |
218 | } \ | |
219 | } \ | |
220 | while (0) | |
221 | ||
222 | ||
bca8957b | 223 | /* If the assembler doesn't support UTF8 in symbol names, some |
224 | characters might need to be escaped. */ | |
225 | ||
226 | #ifndef HAVE_AS_UTF8 | |
227 | ||
228 | /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string | |
229 | appropriately mangled (with Unicode escapes if needed) to | |
230 | MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so | |
231 | frequently that they could be cached. */ | |
232 | ||
233 | void | |
2883a3ed | 234 | append_gpp_mangled_name (const char *name, int len) |
bca8957b | 235 | { |
fce8df10 | 236 | int encoded_len, needs_escapes; |
bca8957b | 237 | char buf[6]; |
238 | ||
fce8df10 | 239 | MANGLE_CXX_KEYWORDS (name, len); |
240 | ||
241 | encoded_len = unicode_mangling_length (name, len); | |
242 | needs_escapes = encoded_len > 0; | |
243 | ||
bca8957b | 244 | sprintf (buf, "%d", (needs_escapes ? encoded_len : len)); |
245 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
246 | ||
247 | if (needs_escapes) | |
248 | append_unicode_mangled_name (name, len); | |
249 | else | |
250 | obstack_grow (mangle_obstack, name, len); | |
251 | } | |
252 | ||
253 | /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string | |
254 | appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK. | |
255 | Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in | |
193252e8 | 256 | which case `__U' will be mangled `__U_'. */ |
bca8957b | 257 | |
258 | static void | |
2883a3ed | 259 | append_unicode_mangled_name (const char *name, int len) |
bca8957b | 260 | { |
261 | const unsigned char *ptr; | |
262 | const unsigned char *limit = (const unsigned char *)name + len; | |
263 | int uuU = 0; | |
264 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
265 | { | |
266 | int ch = UTF8_GET(ptr, limit); | |
267 | ||
193252e8 | 268 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
df6cfbc5 | 269 | { |
270 | obstack_1grow (mangle_obstack, ch); | |
271 | uuU = 0; | |
272 | } | |
bca8957b | 273 | /* Everything else needs encoding */ |
274 | else | |
275 | { | |
276 | char buf [9]; | |
277 | if (ch == '_' || ch == 'U') | |
278 | { | |
279 | /* Prepare to recognize __U */ | |
280 | if (ch == '_' && (uuU < 3)) | |
281 | { | |
282 | uuU++; | |
283 | obstack_1grow (mangle_obstack, ch); | |
284 | } | |
285 | /* We recognize __U that we wish to encode | |
286 | __U_. Finish the encoding. */ | |
287 | else if (ch == 'U' && (uuU == 2)) | |
288 | { | |
289 | uuU = 0; | |
290 | obstack_grow (mangle_obstack, "U_", 2); | |
291 | } | |
53c42c23 | 292 | /* Otherwise, just reset uuU and emit the character we |
293 | have. */ | |
294 | else | |
295 | { | |
296 | uuU = 0; | |
297 | obstack_1grow (mangle_obstack, ch); | |
298 | } | |
bca8957b | 299 | continue; |
300 | } | |
301 | sprintf (buf, "__U%x_", ch); | |
302 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
303 | uuU = 0; | |
304 | } | |
305 | } | |
306 | } | |
307 | ||
308 | /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the | |
309 | length of the string as mangled (a la g++) including Unicode | |
310 | escapes. If no escapes are needed, return 0. */ | |
311 | ||
312 | static int | |
2883a3ed | 313 | unicode_mangling_length (const char *name, int len) |
bca8957b | 314 | { |
315 | const unsigned char *ptr; | |
316 | const unsigned char *limit = (const unsigned char *)name + len; | |
317 | int need_escapes = 0; /* Whether we need an escape or not */ | |
318 | int num_chars = 0; /* Number of characters in the mangled name */ | |
319 | int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ | |
320 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
321 | { | |
322 | int ch = UTF8_GET(ptr, limit); | |
323 | ||
324 | if (ch < 0) | |
325 | error ("internal error - invalid Utf8 name"); | |
193252e8 | 326 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
df6cfbc5 | 327 | { |
328 | num_chars++; | |
329 | uuU = 0; | |
330 | } | |
bca8957b | 331 | /* Everything else needs encoding */ |
332 | else | |
333 | { | |
334 | int encoding_length = 2; | |
335 | ||
336 | if (ch == '_' || ch == 'U') | |
337 | { | |
53c42c23 | 338 | /* It's always at least one character. */ |
339 | num_chars++; | |
340 | ||
bca8957b | 341 | /* Prepare to recognize __U */ |
342 | if (ch == '_' && (uuU < 3)) | |
53c42c23 | 343 | uuU++; |
344 | ||
345 | /* We recognize __U that we wish to encode __U_, we | |
346 | count one more character. */ | |
bca8957b | 347 | else if (ch == 'U' && (uuU == 2)) |
348 | { | |
53c42c23 | 349 | num_chars++; |
bca8957b | 350 | need_escapes = 1; |
351 | uuU = 0; | |
352 | } | |
53c42c23 | 353 | /* Otherwise, just reset uuU */ |
354 | else | |
355 | uuU = 0; | |
356 | ||
bca8957b | 357 | continue; |
358 | } | |
359 | ||
360 | if (ch > 0xff) | |
361 | encoding_length++; | |
362 | if (ch > 0xfff) | |
363 | encoding_length++; | |
364 | ||
365 | num_chars += (4 + encoding_length); | |
366 | need_escapes = 1; | |
367 | uuU = 0; | |
368 | } | |
369 | } | |
370 | if (need_escapes) | |
371 | return num_chars; | |
372 | else | |
373 | return 0; | |
374 | } | |
375 | ||
376 | #else | |
377 | ||
378 | /* The assembler supports UTF8, we don't use escapes. Mangling is | |
379 | simply <N>NAME. <N> is the number of UTF8 encoded characters that | |
380 | are found in NAME. Note that `java', `lang' and `Object' are used | |
381 | so frequently that they could be cached. */ | |
382 | ||
383 | void | |
2883a3ed | 384 | append_gpp_mangled_name (const char *name, int len) |
bca8957b | 385 | { |
386 | const unsigned char *ptr; | |
fce8df10 | 387 | const unsigned char *limit; |
bca8957b | 388 | int encoded_len; |
389 | char buf [6]; | |
390 | ||
fce8df10 | 391 | MANGLE_CXX_KEYWORDS (name, len); |
392 | ||
393 | limit = (const unsigned char *)name + len; | |
394 | ||
bca8957b | 395 | /* Compute the length of the string we wish to mangle. */ |
396 | for (encoded_len = 0, ptr = (const unsigned char *) name; | |
397 | ptr < limit; encoded_len++) | |
398 | { | |
399 | int ch = UTF8_GET(ptr, limit); | |
400 | ||
401 | if (ch < 0) | |
402 | error ("internal error - invalid Utf8 name"); | |
403 | } | |
404 | ||
405 | sprintf (buf, "%d", encoded_len); | |
406 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
407 | obstack_grow (mangle_obstack, name, len); | |
408 | } | |
409 | ||
410 | #endif /* HAVE_AS_UTF8 */ |