1 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
3 // This file is part of GCC.
5 // GCC is free software; you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3, or (at your option) any later
10 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 // You should have received a copy of the GNU General Public License
16 // along with GCC; see the file COPYING3. If not see
17 // <http://www.gnu.org/licenses/>.
22 #include "rust-system.h"
23 #include "rust-linemap.h"
24 #include "rust-codepoint.h"
27 // "Primitive core types" in Rust - the different int and float types, as well
29 enum PrimitiveCoreType
36 // okay technically int and uint are arch-dependent (pointer size)
39 // numbered number primitives
52 // Pure decimals are used for tuple index.
53 // Also means there is no type hint.
54 CORETYPE_PURE_DECIMAL
,
55 // arch-dependent pointer sizes
56 CORETYPE_ISIZE
= CORETYPE_INT
,
57 CORETYPE_USIZE
= CORETYPE_UINT
60 // RS_TOKEN(name, description)
61 // RS_TOKEN_KEYWORD(name, identifier)
63 // Keep RS_TOKEN_KEYWORD sorted
65 /* note that abstract, async, become, box, do, final, macro, override, priv,
66 * try, typeof, unsized, virtual, and yield are unused */
67 #define RS_TOKEN_LIST \
68 RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \
69 RS_TOKEN (END_OF_FILE, "end of file") \
70 RS_TOKEN (EXCLAM, "!") \
71 RS_TOKEN (NOT_EQUAL, "!=") \
72 RS_TOKEN (PERCENT, "%") \
73 RS_TOKEN (PERCENT_EQ, "%=") \
75 RS_TOKEN (AMP_EQ, "&=") \
76 RS_TOKEN (LOGICAL_AND, "&&") \
77 RS_TOKEN (ASTERISK, "*") \
78 RS_TOKEN (ASTERISK_EQ, "*=") \
79 RS_TOKEN (PLUS, "+") \
80 RS_TOKEN (PLUS_EQ, "+=") \
81 RS_TOKEN (COMMA, ",") \
82 RS_TOKEN (MINUS, "-") \
83 RS_TOKEN (MINUS_EQ, "-=") \
84 RS_TOKEN (RETURN_TYPE, "->") \
86 RS_TOKEN (DOT_DOT, "..") \
87 RS_TOKEN (DOT_DOT_EQ, "..=") \
88 RS_TOKEN (ELLIPSIS, "...") \
90 RS_TOKEN (DIV_EQ, "/=") \
91 RS_TOKEN (COLON, ":") \
92 RS_TOKEN (SEMICOLON, ";") \
93 RS_TOKEN (LEFT_SHIFT, "<<") \
94 RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \
95 RS_TOKEN (LEFT_ANGLE, "<") \
96 RS_TOKEN (LESS_OR_EQUAL, "<=") \
97 RS_TOKEN (EQUAL, "=") \
98 RS_TOKEN (EQUAL_EQUAL, "==") \
99 RS_TOKEN (MATCH_ARROW, "=>") \
100 RS_TOKEN (RIGHT_ANGLE, ">") \
101 RS_TOKEN (GREATER_OR_EQUAL, ">=") \
102 RS_TOKEN (RIGHT_SHIFT, ">>") \
103 RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \
104 RS_TOKEN (PATTERN_BIND, "@") \
105 RS_TOKEN (TILDE, "~") \
106 RS_TOKEN (BACKSLASH, "\\") \
107 RS_TOKEN (BACKTICK, "`") \
108 RS_TOKEN (CARET, "^") \
109 RS_TOKEN (CARET_EQ, "^=") \
110 RS_TOKEN (PIPE, "|") \
111 RS_TOKEN (PIPE_EQ, "|=") \
112 RS_TOKEN (OR, "||") \
113 RS_TOKEN (QUESTION_MARK, "?") \
114 RS_TOKEN (HASH, "#") \
115 /* from here on, dodgy and may not be correct. not operators and may be \
117 /* RS_TOKEN(SPACE, " ") probably too dodgy */ \
118 /* RS_TOKEN(NEWLINE, "\n")*/ \
119 RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \
120 RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \
121 RS_TOKEN (DOUBLE_QUOTE, "\"") \
122 RS_TOKEN (UNDERSCORE, \
123 "_") /* TODO: treat as reserved word like mrustc instead? */ \
124 RS_TOKEN (IDENTIFIER, "identifier") \
125 RS_TOKEN (INT_LITERAL, \
126 "integer literal") /* do different int and float types need \
127 different literal types? */ \
128 RS_TOKEN (FLOAT_LITERAL, "float literal") \
129 RS_TOKEN (STRING_LITERAL, "string literal") \
130 RS_TOKEN (CHAR_LITERAL, "character literal") \
131 RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \
132 RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \
133 RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \
134 /* Have "interpolated" tokens (whatever that means)? identifer, path, type, \
136 /* expression, statement, block, meta, item in mrustc (but not directly in \
138 RS_TOKEN (LEFT_PAREN, "(") \
139 RS_TOKEN (RIGHT_PAREN, ")") \
140 RS_TOKEN (LEFT_CURLY, "{") \
141 RS_TOKEN (RIGHT_CURLY, "}") \
142 RS_TOKEN (LEFT_SQUARE, "[") \
143 RS_TOKEN (RIGHT_SQUARE, "]") \
145 RS_TOKEN (DOLLAR_SIGN, "$") \
147 RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \
148 RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \
149 /* have "weak" union and 'static keywords? */ \
150 RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */ \
151 RS_TOKEN_KEYWORD (AS, "as") \
152 RS_TOKEN_KEYWORD (ASYNC, "async") /* unused */ \
153 RS_TOKEN_KEYWORD (BECOME, "become") /* unused */ \
154 RS_TOKEN_KEYWORD (BOX, "box") /* unused */ \
155 RS_TOKEN_KEYWORD (BREAK, "break") \
156 RS_TOKEN_KEYWORD (CONST, "const") \
157 RS_TOKEN_KEYWORD (CONTINUE, "continue") \
158 RS_TOKEN_KEYWORD (CRATE, "crate") \
159 /* FIXME: Do we need to add $crate (DOLLAR_CRATE) as a reserved kw? */ \
160 RS_TOKEN_KEYWORD (DO, "do") /* unused */ \
161 RS_TOKEN_KEYWORD (DYN, "dyn") \
162 RS_TOKEN_KEYWORD (ELSE, "else") \
163 RS_TOKEN_KEYWORD (ENUM_TOK, "enum") \
164 RS_TOKEN_KEYWORD (EXTERN_TOK, "extern") \
165 RS_TOKEN_KEYWORD (FALSE_LITERAL, "false") \
166 RS_TOKEN_KEYWORD (FINAL_TOK, "final") /* unused */ \
167 RS_TOKEN_KEYWORD (FN_TOK, "fn") \
168 RS_TOKEN_KEYWORD (FOR, "for") \
169 RS_TOKEN_KEYWORD (IF, "if") \
170 RS_TOKEN_KEYWORD (IMPL, "impl") \
171 RS_TOKEN_KEYWORD (IN, "in") \
172 RS_TOKEN_KEYWORD (LET, "let") \
173 RS_TOKEN_KEYWORD (LOOP, "loop") \
174 RS_TOKEN_KEYWORD (MACRO, "macro") \
175 RS_TOKEN_KEYWORD (MATCH_TOK, "match") \
176 RS_TOKEN_KEYWORD (MOD, "mod") \
177 RS_TOKEN_KEYWORD (MOVE, "move") \
178 RS_TOKEN_KEYWORD (MUT, "mut") \
179 RS_TOKEN_KEYWORD (OVERRIDE_TOK, "override") /* unused */ \
180 RS_TOKEN_KEYWORD (PRIV, "priv") /* unused */ \
181 RS_TOKEN_KEYWORD (PUB, "pub") \
182 RS_TOKEN_KEYWORD (REF, "ref") \
183 RS_TOKEN_KEYWORD (RETURN_TOK, "return") \
184 RS_TOKEN_KEYWORD (SELF_ALIAS, \
185 "Self") /* mrustc does not treat this as a reserved word*/ \
186 RS_TOKEN_KEYWORD (SELF, "self") \
187 RS_TOKEN_KEYWORD (STATIC_TOK, "static") \
188 RS_TOKEN_KEYWORD (STRUCT_TOK, "struct") \
189 RS_TOKEN_KEYWORD (SUPER, "super") \
190 RS_TOKEN_KEYWORD (TRAIT, "trait") \
191 RS_TOKEN_KEYWORD (TRUE_LITERAL, "true") \
192 RS_TOKEN_KEYWORD (TRY, "try") /* unused */ \
193 RS_TOKEN_KEYWORD (TYPE, "type") \
194 RS_TOKEN_KEYWORD (TYPEOF, "typeof") /* unused */ \
195 RS_TOKEN_KEYWORD (UNSAFE, "unsafe") \
196 RS_TOKEN_KEYWORD (UNSIZED, "unsized") /* unused */ \
197 RS_TOKEN_KEYWORD (USE, "use") \
198 RS_TOKEN_KEYWORD (VIRTUAL, "virtual") /* unused */ \
199 RS_TOKEN_KEYWORD (WHERE, "where") \
200 RS_TOKEN_KEYWORD (WHILE, "while") \
201 RS_TOKEN_KEYWORD (YIELD, "yield") /* unused */ \
202 RS_TOKEN (LAST_TOKEN, "<last-token-marker>")
204 // Contains all token types. Crappy implementation via x-macros.
207 #define RS_TOKEN(name, _) name,
208 #define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
210 #undef RS_TOKEN_KEYWORD
214 // dodgy "TokenPtr" declaration with Token forward declaration
216 // A smart pointer (shared_ptr) to Token.
217 typedef std::shared_ptr
<Token
> TokenPtr
;
218 // A smart pointer (shared_ptr) to a constant Token.
219 typedef std::shared_ptr
<const Token
> const_TokenPtr
;
221 // Hackily defined way to get token description for enum value using x-macros
223 get_token_description (TokenId id
);
224 /* Hackily defined way to get token description as a string for enum value using
227 token_id_to_str (TokenId id
);
228 // Get type hint description as a string.
230 get_type_hint_string (PrimitiveCoreType type
);
232 // Represents a single token. Create using factory static methods.
240 // Associated text (if any) of token.
241 std::unique_ptr
<std::string
> str
;
242 // TODO: maybe remove issues and just store std::string as value?
243 /* Type hint for token based on lexer data (e.g. type suffix). Does not exist
244 * for most tokens. */
245 PrimitiveCoreType type_hint
;
247 // Token constructor from token id and location. Has a null string.
248 Token (TokenId token_id
, Location location
)
249 : token_id (token_id
), locus (location
), str (nullptr),
250 type_hint (CORETYPE_UNKNOWN
)
253 // Token constructor from token id, location, and a string.
254 Token (TokenId token_id
, Location location
, std::string
&¶mStr
)
255 : token_id (token_id
), locus (location
),
256 str (new std::string (std::move (paramStr
))), type_hint (CORETYPE_UNKNOWN
)
259 // Token constructor from token id, location, and a char.
260 Token (TokenId token_id
, Location location
, char paramChar
)
261 : token_id (token_id
), locus (location
),
262 str (new std::string (1, paramChar
)), type_hint (CORETYPE_UNKNOWN
)
265 // Token constructor from token id, location, and a "codepoint".
266 Token (TokenId token_id
, Location location
, Codepoint paramCodepoint
)
267 : token_id (token_id
), locus (location
),
268 str (new std::string (paramCodepoint
.as_string ())),
269 type_hint (CORETYPE_UNKNOWN
)
272 // Token constructor from token id, location, a string, and type hint.
273 Token (TokenId token_id
, Location location
, std::string
&¶mStr
,
274 PrimitiveCoreType parType
)
275 : token_id (token_id
), locus (location
),
276 str (new std::string (std::move (paramStr
))), type_hint (parType
)
280 // No default constructor.
282 // Do not copy/assign tokens.
283 Token (const Token
&) = delete;
284 Token
&operator= (const Token
&) = delete;
286 // Allow moving tokens.
287 Token (Token
&&other
) = default;
288 Token
&operator= (Token
&&other
) = default;
292 /* TODO: make_shared (which saves a heap allocation) does not work with the
293 * private constructor */
295 // Makes and returns a new TokenPtr (with null string).
296 static TokenPtr
make (TokenId token_id
, Location locus
)
298 // return std::make_shared<Token> (token_id, locus);
299 return TokenPtr (new Token (token_id
, locus
));
302 // Makes and returns a new TokenPtr of type IDENTIFIER.
303 static TokenPtr
make_identifier (Location locus
, std::string
&&str
)
305 // return std::make_shared<Token> (IDENTIFIER, locus, str);
306 return TokenPtr (new Token (IDENTIFIER
, locus
, std::move (str
)));
309 // Makes and returns a new TokenPtr of type INT_LITERAL.
310 static TokenPtr
make_int (Location locus
, std::string
&&str
,
311 PrimitiveCoreType type_hint
= CORETYPE_UNKNOWN
)
313 // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
315 new Token (INT_LITERAL
, locus
, std::move (str
), type_hint
));
318 // Makes and returns a new TokenPtr of type FLOAT_LITERAL.
319 static TokenPtr
make_float (Location locus
, std::string
&&str
,
320 PrimitiveCoreType type_hint
= CORETYPE_UNKNOWN
)
322 // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
324 new Token (FLOAT_LITERAL
, locus
, std::move (str
), type_hint
));
327 // Makes and returns a new TokenPtr of type STRING_LITERAL.
328 static TokenPtr
make_string (Location locus
, std::string
&&str
)
330 // return std::make_shared<Token> (STRING_LITERAL, locus, str,
333 new Token (STRING_LITERAL
, locus
, std::move (str
), CORETYPE_STR
));
336 // Makes and returns a new TokenPtr of type CHAR_LITERAL.
337 static TokenPtr
make_char (Location locus
, Codepoint char_lit
)
339 // return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit);
340 return TokenPtr (new Token (CHAR_LITERAL
, locus
, char_lit
));
343 // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL.
344 static TokenPtr
make_byte_char (Location locus
, char byte_char
)
346 // return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char);
347 return TokenPtr (new Token (BYTE_CHAR_LITERAL
, locus
, byte_char
));
350 // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
351 static TokenPtr
make_byte_string (Location locus
, std::string
&&str
)
353 // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
354 return TokenPtr (new Token (BYTE_STRING_LITERAL
, locus
, std::move (str
)));
357 // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
358 static TokenPtr
make_inner_doc_comment (Location locus
, std::string
&&str
)
360 return TokenPtr (new Token (INNER_DOC_COMMENT
, locus
, std::move (str
)));
363 // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
364 static TokenPtr
make_outer_doc_comment (Location locus
, std::string
&&str
)
366 return TokenPtr (new Token (OUTER_DOC_COMMENT
, locus
, std::move (str
)));
369 // Makes and returns a new TokenPtr of type LIFETIME.
370 static TokenPtr
make_lifetime (Location locus
, std::string
&&str
)
372 // return std::make_shared<Token> (LIFETIME, locus, str);
373 return TokenPtr (new Token (LIFETIME
, locus
, std::move (str
)));
376 // Gets id of the token.
377 TokenId
get_id () const { return token_id
; }
379 // Gets location of the token.
380 Location
get_locus () const { return locus
; }
382 // Gets string description of the token.
384 get_str () const; /*{
385 // FIXME: put in header again when fix null problem
386 //gcc_assert(str != nullptr);
387 if (str == nullptr) {
388 error_at(get_locus(), "attempted to get string for '%s', which has no string.
389 returning empty string instead.", get_token_description()); return "";
394 // Gets token's type hint info.
395 PrimitiveCoreType
get_type_hint () const
397 return type_hint
== CORETYPE_PURE_DECIMAL
? CORETYPE_UNKNOWN
: type_hint
;
400 // diagnostics (error reporting)
401 const char *get_token_description () const
403 return Rust::get_token_description (token_id
);
407 const char *token_id_to_str () const
409 return Rust::token_id_to_str (token_id
);
413 const char *get_type_hint_str () const;
415 /* Returns whether the token is a literal of any type (int, float, char,
416 * string, byte char, byte string). */
417 bool is_literal () const
425 case BYTE_CHAR_LITERAL
:
426 case BYTE_STRING_LITERAL
:
433 /* Returns whether the token actually has a string (regardless of whether it
435 bool has_str () const { return str
!= nullptr; }
437 // Returns whether the token should have a string.
438 bool should_have_str () const
440 return is_literal () || token_id
== IDENTIFIER
|| token_id
== LIFETIME
;
443 // Returns whether the token is a pure decimal int literal
444 bool is_pure_decimal () const { return type_hint
== CORETYPE_PURE_DECIMAL
; }