]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | // Copyright (C) 2020-2023 Free Software Foundation, Inc. |
18f6990f JP |
2 | |
3 | // This file is part of GCC. | |
4 | ||
5 | // GCC is free software; you can redistribute it and/or modify it under | |
6 | // the terms of the GNU General Public License as published by the Free | |
7 | // Software Foundation; either version 3, or (at your option) any later | |
8 | // version. | |
9 | ||
10 | // GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | // for more details. | |
14 | ||
15 | // You should have received a copy of the GNU General Public License | |
16 | // along with GCC; see the file COPYING3. If not see | |
17 | // <http://www.gnu.org/licenses/>. | |
18 | ||
19 | #ifndef RUST_TOKEN_H | |
20 | #define RUST_TOKEN_H | |
21 | ||
22 | #include "rust-system.h" | |
23 | #include "rust-linemap.h" | |
24 | #include "rust-codepoint.h" | |
25 | ||
26 | namespace Rust { | |
27 | // "Primitive core types" in Rust - the different int and float types, as well | |
28 | // as some others | |
29 | enum PrimitiveCoreType | |
30 | { | |
31 | CORETYPE_UNKNOWN, | |
32 | // named primitives | |
33 | CORETYPE_BOOL, | |
34 | CORETYPE_CHAR, | |
35 | CORETYPE_STR, | |
36 | // okay technically int and uint are arch-dependent (pointer size) | |
37 | CORETYPE_INT, | |
38 | CORETYPE_UINT, | |
39 | // numbered number primitives | |
40 | CORETYPE_F32, | |
41 | CORETYPE_F64, | |
42 | CORETYPE_I8, | |
43 | CORETYPE_I16, | |
44 | CORETYPE_I32, | |
45 | CORETYPE_I64, | |
46 | CORETYPE_I128, | |
47 | CORETYPE_U8, | |
48 | CORETYPE_U16, | |
49 | CORETYPE_U32, | |
50 | CORETYPE_U64, | |
51 | CORETYPE_U128, | |
52 | // Pure decimals are used for tuple index. | |
53 | // Also means there is no type hint. | |
54 | CORETYPE_PURE_DECIMAL, | |
55 | // arch-dependent pointer sizes | |
56 | CORETYPE_ISIZE = CORETYPE_INT, | |
57 | CORETYPE_USIZE = CORETYPE_UINT | |
58 | }; | |
59 | ||
60 | // RS_TOKEN(name, description) | |
61 | // RS_TOKEN_KEYWORD(name, identifier) | |
62 | // | |
63 | // Keep RS_TOKEN_KEYWORD sorted | |
64 | ||
65 | /* note that abstract, async, become, box, do, final, macro, override, priv, | |
66 | * try, typeof, unsized, virtual, and yield are unused */ | |
67 | #define RS_TOKEN_LIST \ | |
68 | RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \ | |
69 | RS_TOKEN (END_OF_FILE, "end of file") \ | |
70 | RS_TOKEN (EXCLAM, "!") \ | |
71 | RS_TOKEN (NOT_EQUAL, "!=") \ | |
72 | RS_TOKEN (PERCENT, "%") \ | |
73 | RS_TOKEN (PERCENT_EQ, "%=") \ | |
74 | RS_TOKEN (AMP, "&") \ | |
75 | RS_TOKEN (AMP_EQ, "&=") \ | |
76 | RS_TOKEN (LOGICAL_AND, "&&") \ | |
77 | RS_TOKEN (ASTERISK, "*") \ | |
78 | RS_TOKEN (ASTERISK_EQ, "*=") \ | |
79 | RS_TOKEN (PLUS, "+") \ | |
80 | RS_TOKEN (PLUS_EQ, "+=") \ | |
81 | RS_TOKEN (COMMA, ",") \ | |
82 | RS_TOKEN (MINUS, "-") \ | |
83 | RS_TOKEN (MINUS_EQ, "-=") \ | |
84 | RS_TOKEN (RETURN_TYPE, "->") \ | |
85 | RS_TOKEN (DOT, ".") \ | |
86 | RS_TOKEN (DOT_DOT, "..") \ | |
87 | RS_TOKEN (DOT_DOT_EQ, "..=") \ | |
88 | RS_TOKEN (ELLIPSIS, "...") \ | |
89 | RS_TOKEN (DIV, "/") \ | |
90 | RS_TOKEN (DIV_EQ, "/=") \ | |
91 | RS_TOKEN (COLON, ":") \ | |
92 | RS_TOKEN (SEMICOLON, ";") \ | |
93 | RS_TOKEN (LEFT_SHIFT, "<<") \ | |
94 | RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \ | |
95 | RS_TOKEN (LEFT_ANGLE, "<") \ | |
96 | RS_TOKEN (LESS_OR_EQUAL, "<=") \ | |
97 | RS_TOKEN (EQUAL, "=") \ | |
98 | RS_TOKEN (EQUAL_EQUAL, "==") \ | |
99 | RS_TOKEN (MATCH_ARROW, "=>") \ | |
100 | RS_TOKEN (RIGHT_ANGLE, ">") \ | |
101 | RS_TOKEN (GREATER_OR_EQUAL, ">=") \ | |
102 | RS_TOKEN (RIGHT_SHIFT, ">>") \ | |
103 | RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \ | |
104 | RS_TOKEN (PATTERN_BIND, "@") \ | |
105 | RS_TOKEN (TILDE, "~") \ | |
106 | RS_TOKEN (BACKSLASH, "\\") \ | |
107 | RS_TOKEN (BACKTICK, "`") \ | |
108 | RS_TOKEN (CARET, "^") \ | |
109 | RS_TOKEN (CARET_EQ, "^=") \ | |
110 | RS_TOKEN (PIPE, "|") \ | |
111 | RS_TOKEN (PIPE_EQ, "|=") \ | |
112 | RS_TOKEN (OR, "||") \ | |
113 | RS_TOKEN (QUESTION_MARK, "?") \ | |
114 | RS_TOKEN (HASH, "#") \ | |
115 | /* from here on, dodgy and may not be correct. not operators and may be \ | |
116 | * symbols */ \ | |
117 | /* RS_TOKEN(SPACE, " ") probably too dodgy */ \ | |
118 | /* RS_TOKEN(NEWLINE, "\n")*/ \ | |
119 | RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \ | |
120 | RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \ | |
121 | RS_TOKEN (DOUBLE_QUOTE, "\"") \ | |
122 | RS_TOKEN (UNDERSCORE, \ | |
123 | "_") /* TODO: treat as reserved word like mrustc instead? */ \ | |
124 | RS_TOKEN (IDENTIFIER, "identifier") \ | |
125 | RS_TOKEN (INT_LITERAL, \ | |
126 | "integer literal") /* do different int and float types need \ | |
127 | different literal types? */ \ | |
128 | RS_TOKEN (FLOAT_LITERAL, "float literal") \ | |
129 | RS_TOKEN (STRING_LITERAL, "string literal") \ | |
130 | RS_TOKEN (CHAR_LITERAL, "character literal") \ | |
131 | RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \ | |
132 | RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \ | |
133 | RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \ | |
134 | /* Have "interpolated" tokens (whatever that means)? identifer, path, type, \ | |
135 | * pattern, */ \ | |
136 | /* expression, statement, block, meta, item in mrustc (but not directly in \ | |
137 | * lexer). */ \ | |
138 | RS_TOKEN (LEFT_PAREN, "(") \ | |
139 | RS_TOKEN (RIGHT_PAREN, ")") \ | |
140 | RS_TOKEN (LEFT_CURLY, "{") \ | |
141 | RS_TOKEN (RIGHT_CURLY, "}") \ | |
142 | RS_TOKEN (LEFT_SQUARE, "[") \ | |
143 | RS_TOKEN (RIGHT_SQUARE, "]") \ | |
144 | /* Macros */ \ | |
145 | RS_TOKEN (DOLLAR_SIGN, "$") \ | |
146 | /* Doc Comments */ \ | |
147 | RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \ | |
148 | RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \ | |
149 | /* have "weak" union and 'static keywords? */ \ | |
150 | RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */ \ | |
151 | RS_TOKEN_KEYWORD (AS, "as") \ | |
152 | RS_TOKEN_KEYWORD (ASYNC, "async") /* unused */ \ | |
153 | RS_TOKEN_KEYWORD (BECOME, "become") /* unused */ \ | |
154 | RS_TOKEN_KEYWORD (BOX, "box") /* unused */ \ | |
155 | RS_TOKEN_KEYWORD (BREAK, "break") \ | |
156 | RS_TOKEN_KEYWORD (CONST, "const") \ | |
157 | RS_TOKEN_KEYWORD (CONTINUE, "continue") \ | |
158 | RS_TOKEN_KEYWORD (CRATE, "crate") \ | |
159 | /* FIXME: Do we need to add $crate (DOLLAR_CRATE) as a reserved kw? */ \ | |
160 | RS_TOKEN_KEYWORD (DO, "do") /* unused */ \ | |
161 | RS_TOKEN_KEYWORD (DYN, "dyn") \ | |
162 | RS_TOKEN_KEYWORD (ELSE, "else") \ | |
163 | RS_TOKEN_KEYWORD (ENUM_TOK, "enum") \ | |
164 | RS_TOKEN_KEYWORD (EXTERN_TOK, "extern") \ | |
165 | RS_TOKEN_KEYWORD (FALSE_LITERAL, "false") \ | |
166 | RS_TOKEN_KEYWORD (FINAL_TOK, "final") /* unused */ \ | |
167 | RS_TOKEN_KEYWORD (FN_TOK, "fn") \ | |
168 | RS_TOKEN_KEYWORD (FOR, "for") \ | |
169 | RS_TOKEN_KEYWORD (IF, "if") \ | |
170 | RS_TOKEN_KEYWORD (IMPL, "impl") \ | |
171 | RS_TOKEN_KEYWORD (IN, "in") \ | |
172 | RS_TOKEN_KEYWORD (LET, "let") \ | |
173 | RS_TOKEN_KEYWORD (LOOP, "loop") \ | |
174 | RS_TOKEN_KEYWORD (MACRO, "macro") /* unused */ \ | |
175 | RS_TOKEN_KEYWORD (MATCH_TOK, "match") \ | |
176 | RS_TOKEN_KEYWORD (MOD, "mod") \ | |
177 | RS_TOKEN_KEYWORD (MOVE, "move") \ | |
178 | RS_TOKEN_KEYWORD (MUT, "mut") \ | |
179 | RS_TOKEN_KEYWORD (OVERRIDE_TOK, "override") /* unused */ \ | |
180 | RS_TOKEN_KEYWORD (PRIV, "priv") /* unused */ \ | |
181 | RS_TOKEN_KEYWORD (PUB, "pub") \ | |
182 | RS_TOKEN_KEYWORD (REF, "ref") \ | |
183 | RS_TOKEN_KEYWORD (RETURN_TOK, "return") \ | |
184 | RS_TOKEN_KEYWORD (SELF_ALIAS, \ | |
185 | "Self") /* mrustc does not treat this as a reserved word*/ \ | |
186 | RS_TOKEN_KEYWORD (SELF, "self") \ | |
187 | RS_TOKEN_KEYWORD (STATIC_TOK, "static") \ | |
188 | RS_TOKEN_KEYWORD (STRUCT_TOK, "struct") \ | |
189 | RS_TOKEN_KEYWORD (SUPER, "super") \ | |
190 | RS_TOKEN_KEYWORD (TRAIT, "trait") \ | |
191 | RS_TOKEN_KEYWORD (TRUE_LITERAL, "true") \ | |
192 | RS_TOKEN_KEYWORD (TRY, "try") /* unused */ \ | |
193 | RS_TOKEN_KEYWORD (TYPE, "type") \ | |
194 | RS_TOKEN_KEYWORD (TYPEOF, "typeof") /* unused */ \ | |
195 | RS_TOKEN_KEYWORD (UNSAFE, "unsafe") \ | |
196 | RS_TOKEN_KEYWORD (UNSIZED, "unsized") /* unused */ \ | |
197 | RS_TOKEN_KEYWORD (USE, "use") \ | |
198 | RS_TOKEN_KEYWORD (VIRTUAL, "virtual") /* unused */ \ | |
199 | RS_TOKEN_KEYWORD (WHERE, "where") \ | |
200 | RS_TOKEN_KEYWORD (WHILE, "while") \ | |
201 | RS_TOKEN_KEYWORD (YIELD, "yield") /* unused */ \ | |
202 | RS_TOKEN (LAST_TOKEN, "<last-token-marker>") | |
203 | ||
204 | // Contains all token types. Crappy implementation via x-macros. | |
205 | enum TokenId | |
206 | { | |
207 | #define RS_TOKEN(name, _) name, | |
208 | #define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y) | |
209 | RS_TOKEN_LIST | |
210 | #undef RS_TOKEN_KEYWORD | |
211 | #undef RS_TOKEN | |
212 | }; | |
213 | ||
214 | // dodgy "TokenPtr" declaration with Token forward declaration | |
215 | class Token; | |
216 | // A smart pointer (shared_ptr) to Token. | |
217 | typedef std::shared_ptr<Token> TokenPtr; | |
218 | // A smart pointer (shared_ptr) to a constant Token. | |
219 | typedef std::shared_ptr<const Token> const_TokenPtr; | |
220 | ||
221 | // Hackily defined way to get token description for enum value using x-macros | |
222 | const char * | |
223 | get_token_description (TokenId id); | |
224 | /* Hackily defined way to get token description as a string for enum value using | |
225 | * x-macros */ | |
226 | const char * | |
227 | token_id_to_str (TokenId id); | |
228 | // Get type hint description as a string. | |
229 | const char * | |
230 | get_type_hint_string (PrimitiveCoreType type); | |
231 | ||
232 | // Represents a single token. Create using factory static methods. | |
233 | class Token | |
234 | { | |
235 | private: | |
236 | // Token kind. | |
237 | TokenId token_id; | |
238 | // Token location. | |
239 | Location locus; | |
240 | // Associated text (if any) of token. | |
241 | std::unique_ptr<std::string> str; | |
242 | // TODO: maybe remove issues and just store std::string as value? | |
243 | /* Type hint for token based on lexer data (e.g. type suffix). Does not exist | |
244 | * for most tokens. */ | |
245 | PrimitiveCoreType type_hint; | |
246 | ||
247 | // Token constructor from token id and location. Has a null string. | |
248 | Token (TokenId token_id, Location location) | |
249 | : token_id (token_id), locus (location), str (nullptr), | |
250 | type_hint (CORETYPE_UNKNOWN) | |
251 | {} | |
252 | ||
253 | // Token constructor from token id, location, and a string. | |
254 | Token (TokenId token_id, Location location, std::string &¶mStr) | |
255 | : token_id (token_id), locus (location), | |
256 | str (new std::string (std::move (paramStr))), type_hint (CORETYPE_UNKNOWN) | |
257 | {} | |
258 | ||
259 | // Token constructor from token id, location, and a char. | |
260 | Token (TokenId token_id, Location location, char paramChar) | |
261 | : token_id (token_id), locus (location), | |
262 | str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN) | |
263 | {} | |
264 | ||
265 | // Token constructor from token id, location, and a "codepoint". | |
266 | Token (TokenId token_id, Location location, Codepoint paramCodepoint) | |
267 | : token_id (token_id), locus (location), | |
268 | str (new std::string (paramCodepoint.as_string ())), | |
269 | type_hint (CORETYPE_UNKNOWN) | |
270 | {} | |
271 | ||
272 | // Token constructor from token id, location, a string, and type hint. | |
273 | Token (TokenId token_id, Location location, std::string &¶mStr, | |
274 | PrimitiveCoreType parType) | |
275 | : token_id (token_id), locus (location), | |
276 | str (new std::string (std::move (paramStr))), type_hint (parType) | |
277 | {} | |
278 | ||
279 | public: | |
280 | // No default constructor. | |
281 | Token () = delete; | |
282 | // Do not copy/assign tokens. | |
283 | Token (const Token &) = delete; | |
284 | Token &operator= (const Token &) = delete; | |
285 | ||
286 | // Allow moving tokens. | |
287 | Token (Token &&other) = default; | |
288 | Token &operator= (Token &&other) = default; | |
289 | ||
290 | ~Token () = default; | |
291 | ||
292 | /* TODO: make_shared (which saves a heap allocation) does not work with the | |
293 | * private constructor */ | |
294 | ||
295 | // Makes and returns a new TokenPtr (with null string). | |
296 | static TokenPtr make (TokenId token_id, Location locus) | |
297 | { | |
298 | // return std::make_shared<Token> (token_id, locus); | |
299 | return TokenPtr (new Token (token_id, locus)); | |
300 | } | |
301 | ||
302 | // Makes and returns a new TokenPtr of type IDENTIFIER. | |
303 | static TokenPtr make_identifier (Location locus, std::string &&str) | |
304 | { | |
305 | // return std::make_shared<Token> (IDENTIFIER, locus, str); | |
306 | return TokenPtr (new Token (IDENTIFIER, locus, std::move (str))); | |
307 | } | |
308 | ||
309 | // Makes and returns a new TokenPtr of type INT_LITERAL. | |
310 | static TokenPtr make_int (Location locus, std::string &&str, | |
311 | PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) | |
312 | { | |
313 | // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint); | |
314 | return TokenPtr ( | |
315 | new Token (INT_LITERAL, locus, std::move (str), type_hint)); | |
316 | } | |
317 | ||
318 | // Makes and returns a new TokenPtr of type FLOAT_LITERAL. | |
319 | static TokenPtr make_float (Location locus, std::string &&str, | |
320 | PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) | |
321 | { | |
322 | // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint); | |
323 | return TokenPtr ( | |
324 | new Token (FLOAT_LITERAL, locus, std::move (str), type_hint)); | |
325 | } | |
326 | ||
327 | // Makes and returns a new TokenPtr of type STRING_LITERAL. | |
328 | static TokenPtr make_string (Location locus, std::string &&str) | |
329 | { | |
330 | // return std::make_shared<Token> (STRING_LITERAL, locus, str, | |
331 | // CORETYPE_STR); | |
332 | return TokenPtr ( | |
333 | new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR)); | |
334 | } | |
335 | ||
336 | // Makes and returns a new TokenPtr of type CHAR_LITERAL. | |
337 | static TokenPtr make_char (Location locus, Codepoint char_lit) | |
338 | { | |
339 | // return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit); | |
340 | return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit)); | |
341 | } | |
342 | ||
343 | // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL. | |
344 | static TokenPtr make_byte_char (Location locus, char byte_char) | |
345 | { | |
346 | // return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char); | |
347 | return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char)); | |
348 | } | |
349 | ||
350 | // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix). | |
351 | static TokenPtr make_byte_string (Location locus, std::string &&str) | |
352 | { | |
353 | // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str); | |
354 | return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); | |
355 | } | |
356 | ||
357 | // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. | |
358 | static TokenPtr make_inner_doc_comment (Location locus, std::string &&str) | |
359 | { | |
360 | return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); | |
361 | } | |
362 | ||
363 | // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. | |
364 | static TokenPtr make_outer_doc_comment (Location locus, std::string &&str) | |
365 | { | |
366 | return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); | |
367 | } | |
368 | ||
369 | // Makes and returns a new TokenPtr of type LIFETIME. | |
370 | static TokenPtr make_lifetime (Location locus, std::string &&str) | |
371 | { | |
372 | // return std::make_shared<Token> (LIFETIME, locus, str); | |
373 | return TokenPtr (new Token (LIFETIME, locus, std::move (str))); | |
374 | } | |
375 | ||
376 | // Gets id of the token. | |
377 | TokenId get_id () const { return token_id; } | |
378 | ||
379 | // Gets location of the token. | |
380 | Location get_locus () const { return locus; } | |
381 | ||
382 | // Gets string description of the token. | |
383 | const std::string & | |
384 | get_str () const; /*{ | |
385 | // FIXME: put in header again when fix null problem | |
386 | //gcc_assert(str != nullptr); | |
387 | if (str == nullptr) { | |
388 | error_at(get_locus(), "attempted to get string for '%s', which has no string. | |
389 | returning empty string instead.", get_token_description()); return ""; | |
390 | } | |
391 | return *str; | |
392 | }*/ | |
393 | ||
394 | // Gets token's type hint info. | |
395 | PrimitiveCoreType get_type_hint () const | |
396 | { | |
397 | return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint; | |
398 | } | |
399 | ||
400 | // diagnostics (error reporting) | |
401 | const char *get_token_description () const | |
402 | { | |
403 | return Rust::get_token_description (token_id); | |
404 | } | |
405 | ||
406 | // debugging | |
407 | const char *token_id_to_str () const | |
408 | { | |
409 | return Rust::token_id_to_str (token_id); | |
410 | } | |
411 | ||
412 | // debugging | |
413 | const char *get_type_hint_str () const; | |
414 | ||
415 | /* Returns whether the token is a literal of any type (int, float, char, | |
416 | * string, byte char, byte string). */ | |
417 | bool is_literal () const | |
418 | { | |
419 | switch (token_id) | |
420 | { | |
421 | case INT_LITERAL: | |
422 | case FLOAT_LITERAL: | |
423 | case CHAR_LITERAL: | |
424 | case STRING_LITERAL: | |
425 | case BYTE_CHAR_LITERAL: | |
426 | case BYTE_STRING_LITERAL: | |
427 | return true; | |
428 | default: | |
429 | return false; | |
430 | } | |
431 | } | |
432 | ||
433 | /* Returns whether the token actually has a string (regardless of whether it | |
434 | * should or not). */ | |
435 | bool has_str () const { return str != nullptr; } | |
436 | ||
437 | // Returns whether the token should have a string. | |
438 | bool should_have_str () const | |
439 | { | |
440 | return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME; | |
441 | } | |
442 | ||
443 | // Returns whether the token is a pure decimal int literal | |
444 | bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; } | |
445 | }; | |
446 | } // namespace Rust | |
447 | ||
448 | #endif |