]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/java/mangle_name.c
Update copyright years.
[thirdparty/gcc.git] / gcc / java / mangle_name.c
1 /* Shared functions related to mangling names for the GNU compiler
2 for the Java(TM) language.
3 Copyright (C) 2001-2016 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
20
21 Java and all Java-based marks are trademarks or registered trademarks
22 of Sun Microsystems, Inc. in the United States and other countries.
23 The Free Software Foundation is independent of Sun Microsystems, Inc. */
24
25 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "obstack.h"
31 #include "diagnostic-core.h"
32 #include "jcf.h"
33
34 static void append_unicode_mangled_name (const char *, int);
35 #ifndef HAVE_AS_UTF8
36 static int unicode_mangling_length (const char *, int);
37 #endif
38
39 extern struct obstack *mangle_obstack;
40
41 static int
42 utf8_cmp (const unsigned char *str, int length, const char *name)
43 {
44 const unsigned char *limit = str + length;
45 int i;
46
47 for (i = 0; name[i]; ++i)
48 {
49 int ch = UTF8_GET (str, limit);
50 if (ch != name[i])
51 return ch - name[i];
52 }
53
54 return str == limit ? 0 : 1;
55 }
56
57 /* A sorted list of all C++ keywords. If you change this, be sure
58 also to change the list in
59 libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */
60 static const char *const cxx_keywords[] =
61 {
62 "_Complex",
63 "__alignof",
64 "__alignof__",
65 "__asm",
66 "__asm__",
67 "__attribute",
68 "__attribute__",
69 "__builtin_va_arg",
70 "__complex",
71 "__complex__",
72 "__const",
73 "__const__",
74 "__extension__",
75 "__imag",
76 "__imag__",
77 "__inline",
78 "__inline__",
79 "__label__",
80 "__null",
81 "__real",
82 "__real__",
83 "__restrict",
84 "__restrict__",
85 "__signed",
86 "__signed__",
87 "__typeof",
88 "__typeof__",
89 "__volatile",
90 "__volatile__",
91 "and",
92 "and_eq",
93 "asm",
94 "auto",
95 "bitand",
96 "bitor",
97 "bool",
98 "break",
99 "case",
100 "catch",
101 "char",
102 "class",
103 "compl",
104 "const",
105 "const_cast",
106 "continue",
107 "default",
108 "delete",
109 "do",
110 "double",
111 "dynamic_cast",
112 "else",
113 "enum",
114 "explicit",
115 "export",
116 "extern",
117 "false",
118 "float",
119 "for",
120 "friend",
121 "goto",
122 "if",
123 "inline",
124 "int",
125 "long",
126 "mutable",
127 "namespace",
128 "new",
129 "not",
130 "not_eq",
131 "operator",
132 "or",
133 "or_eq",
134 "private",
135 "protected",
136 "public",
137 "register",
138 "reinterpret_cast",
139 "return",
140 "short",
141 "signed",
142 "sizeof",
143 "static",
144 "static_cast",
145 "struct",
146 "switch",
147 "template",
148 "this",
149 "throw",
150 "true",
151 "try",
152 "typedef",
153 "typeid",
154 "typename",
155 "typeof",
156 "union",
157 "unsigned",
158 "using",
159 "virtual",
160 "void",
161 "volatile",
162 "wchar_t",
163 "while",
164 "xor",
165 "xor_eq"
166 };
167
168 /* Return true if NAME is a C++ keyword. */
169 int
170 cxx_keyword_p (const char *name, int length)
171 {
172 int last = ARRAY_SIZE (cxx_keywords);
173 int first = 0;
174 int mid = (last + first) / 2;
175 int old = -1;
176
177 for (mid = (last + first) / 2;
178 mid != old;
179 old = mid, mid = (last + first) / 2)
180 {
181 int kwl = strlen (cxx_keywords[mid]);
182 int min_length = kwl > length ? length : kwl;
183 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
184
185 if (r == 0)
186 {
187 int i;
188 /* We've found a match if all the remaining characters are `$'. */
189 for (i = min_length; i < length && name[i] == '$'; ++i)
190 ;
191 if (i == length)
192 return 1;
193 r = 1;
194 }
195
196 if (r < 0)
197 last = mid;
198 else
199 first = mid;
200 }
201 return 0;
202 }
203
204 /* If NAME happens to be a C++ keyword, add `$'. */
205 #define MANGLE_CXX_KEYWORDS(NAME, LEN) \
206 do \
207 { \
208 if (cxx_keyword_p ((NAME), (LEN))) \
209 { \
210 char *tmp_buf = (char *)alloca ((LEN)+1); \
211 memcpy (tmp_buf, (NAME), (LEN)); \
212 tmp_buf[LEN]= '$'; \
213 (NAME) = tmp_buf; \
214 (LEN)++; \
215 } \
216 } \
217 while (0)
218
219
220 /* If the assembler doesn't support UTF8 in symbol names, some
221 characters might need to be escaped. */
222
223 #ifndef HAVE_AS_UTF8
224
225 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
226 appropriately mangled (with Unicode escapes if needed) to
227 MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
228 frequently that they could be cached. */
229
230 void
231 append_gpp_mangled_name (const char *name, int len)
232 {
233 int encoded_len, needs_escapes;
234 char buf[6];
235
236 MANGLE_CXX_KEYWORDS (name, len);
237
238 encoded_len = unicode_mangling_length (name, len);
239 needs_escapes = encoded_len > 0;
240
241 sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
242 obstack_grow (mangle_obstack, buf, strlen (buf));
243
244 if (needs_escapes)
245 append_unicode_mangled_name (name, len);
246 else
247 obstack_grow (mangle_obstack, name, len);
248 }
249
250 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
251 appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
252 Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
253 which case `__U' will be mangled `__U_'. */
254
255 static void
256 append_unicode_mangled_name (const char *name, int len)
257 {
258 const unsigned char *ptr;
259 const unsigned char *limit = (const unsigned char *)name + len;
260 int uuU = 0;
261 for (ptr = (const unsigned char *) name; ptr < limit; )
262 {
263 int ch = UTF8_GET(ptr, limit);
264
265 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
266 {
267 obstack_1grow (mangle_obstack, ch);
268 uuU = 0;
269 }
270 /* Everything else needs encoding */
271 else
272 {
273 char buf [9];
274 if (ch == '_' || ch == 'U')
275 {
276 /* Prepare to recognize __U */
277 if (ch == '_' && (uuU < 3))
278 {
279 uuU++;
280 obstack_1grow (mangle_obstack, ch);
281 }
282 /* We recognize __U that we wish to encode
283 __U_. Finish the encoding. */
284 else if (ch == 'U' && (uuU == 2))
285 {
286 uuU = 0;
287 obstack_grow (mangle_obstack, "U_", 2);
288 }
289 /* Otherwise, just reset uuU and emit the character we
290 have. */
291 else
292 {
293 uuU = 0;
294 obstack_1grow (mangle_obstack, ch);
295 }
296 continue;
297 }
298 sprintf (buf, "__U%x_", ch);
299 obstack_grow (mangle_obstack, buf, strlen (buf));
300 uuU = 0;
301 }
302 }
303 }
304
305 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
306 length of the string as mangled (a la g++) including Unicode
307 escapes. If no escapes are needed, return 0. */
308
309 static int
310 unicode_mangling_length (const char *name, int len)
311 {
312 const unsigned char *ptr;
313 const unsigned char *limit = (const unsigned char *)name + len;
314 int need_escapes = 0; /* Whether we need an escape or not */
315 int num_chars = 0; /* Number of characters in the mangled name */
316 int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
317 for (ptr = (const unsigned char *) name; ptr < limit; )
318 {
319 int ch = UTF8_GET(ptr, limit);
320
321 if (ch < 0)
322 error ("internal error - invalid Utf8 name");
323 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
324 {
325 num_chars++;
326 uuU = 0;
327 }
328 /* Everything else needs encoding */
329 else
330 {
331 int encoding_length = 2;
332
333 if (ch == '_' || ch == 'U')
334 {
335 /* It's always at least one character. */
336 num_chars++;
337
338 /* Prepare to recognize __U */
339 if (ch == '_' && (uuU < 3))
340 uuU++;
341
342 /* We recognize __U that we wish to encode __U_, we
343 count one more character. */
344 else if (ch == 'U' && (uuU == 2))
345 {
346 num_chars++;
347 need_escapes = 1;
348 uuU = 0;
349 }
350 /* Otherwise, just reset uuU */
351 else
352 uuU = 0;
353
354 continue;
355 }
356
357 if (ch > 0xff)
358 encoding_length++;
359 if (ch > 0xfff)
360 encoding_length++;
361
362 num_chars += (4 + encoding_length);
363 need_escapes = 1;
364 uuU = 0;
365 }
366 }
367 if (need_escapes)
368 return num_chars;
369 else
370 return 0;
371 }
372
373 #else
374
375 /* The assembler supports UTF8, we don't use escapes. Mangling is
376 simply <N>NAME. <N> is the number of UTF8 encoded characters that
377 are found in NAME. Note that `java', `lang' and `Object' are used
378 so frequently that they could be cached. */
379
380 void
381 append_gpp_mangled_name (const char *name, int len)
382 {
383 const unsigned char *ptr;
384 const unsigned char *limit;
385 int encoded_len;
386 char buf [6];
387
388 MANGLE_CXX_KEYWORDS (name, len);
389
390 limit = (const unsigned char *)name + len;
391
392 /* Compute the length of the string we wish to mangle. */
393 for (encoded_len = 0, ptr = (const unsigned char *) name;
394 ptr < limit; encoded_len++)
395 {
396 int ch = UTF8_GET(ptr, limit);
397
398 if (ch < 0)
399 error ("internal error - invalid Utf8 name");
400 }
401
402 sprintf (buf, "%d", encoded_len);
403 obstack_grow (mangle_obstack, buf, strlen (buf));
404 obstack_grow (mangle_obstack, name, len);
405 }
406
407 #endif /* HAVE_AS_UTF8 */