]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/rust/lex/rust-token.h
Update copyright years.
[thirdparty/gcc.git] / gcc / rust / lex / rust-token.h
1 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
2
3 // This file is part of GCC.
4
5 // GCC is free software; you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3, or (at your option) any later
8 // version.
9
10 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // for more details.
14
15 // You should have received a copy of the GNU General Public License
16 // along with GCC; see the file COPYING3. If not see
17 // <http://www.gnu.org/licenses/>.
18
19 #ifndef RUST_TOKEN_H
20 #define RUST_TOKEN_H
21
22 #include "rust-system.h"
23 #include "rust-linemap.h"
24 #include "rust-codepoint.h"
25
26 namespace Rust {
27 // "Primitive core types" in Rust - the different int and float types, as well
28 // as some others
29 enum PrimitiveCoreType
30 {
31 CORETYPE_UNKNOWN,
32 // named primitives
33 CORETYPE_BOOL,
34 CORETYPE_CHAR,
35 CORETYPE_STR,
36 // okay technically int and uint are arch-dependent (pointer size)
37 CORETYPE_INT,
38 CORETYPE_UINT,
39 // numbered number primitives
40 CORETYPE_F32,
41 CORETYPE_F64,
42 CORETYPE_I8,
43 CORETYPE_I16,
44 CORETYPE_I32,
45 CORETYPE_I64,
46 CORETYPE_I128,
47 CORETYPE_U8,
48 CORETYPE_U16,
49 CORETYPE_U32,
50 CORETYPE_U64,
51 CORETYPE_U128,
52 // Pure decimals are used for tuple index.
53 // Also means there is no type hint.
54 CORETYPE_PURE_DECIMAL,
55 // arch-dependent pointer sizes
56 CORETYPE_ISIZE = CORETYPE_INT,
57 CORETYPE_USIZE = CORETYPE_UINT
58 };
59
60 // RS_TOKEN(name, description)
61 // RS_TOKEN_KEYWORD(name, identifier)
62 //
63 // Keep RS_TOKEN_KEYWORD sorted
64
65 /* note that abstract, async, become, box, do, final, macro, override, priv,
66 * try, typeof, unsized, virtual, and yield are unused */
67 #define RS_TOKEN_LIST \
68 RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \
69 RS_TOKEN (END_OF_FILE, "end of file") \
70 RS_TOKEN (EXCLAM, "!") \
71 RS_TOKEN (NOT_EQUAL, "!=") \
72 RS_TOKEN (PERCENT, "%") \
73 RS_TOKEN (PERCENT_EQ, "%=") \
74 RS_TOKEN (AMP, "&") \
75 RS_TOKEN (AMP_EQ, "&=") \
76 RS_TOKEN (LOGICAL_AND, "&&") \
77 RS_TOKEN (ASTERISK, "*") \
78 RS_TOKEN (ASTERISK_EQ, "*=") \
79 RS_TOKEN (PLUS, "+") \
80 RS_TOKEN (PLUS_EQ, "+=") \
81 RS_TOKEN (COMMA, ",") \
82 RS_TOKEN (MINUS, "-") \
83 RS_TOKEN (MINUS_EQ, "-=") \
84 RS_TOKEN (RETURN_TYPE, "->") \
85 RS_TOKEN (DOT, ".") \
86 RS_TOKEN (DOT_DOT, "..") \
87 RS_TOKEN (DOT_DOT_EQ, "..=") \
88 RS_TOKEN (ELLIPSIS, "...") \
89 RS_TOKEN (DIV, "/") \
90 RS_TOKEN (DIV_EQ, "/=") \
91 RS_TOKEN (COLON, ":") \
92 RS_TOKEN (SEMICOLON, ";") \
93 RS_TOKEN (LEFT_SHIFT, "<<") \
94 RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \
95 RS_TOKEN (LEFT_ANGLE, "<") \
96 RS_TOKEN (LESS_OR_EQUAL, "<=") \
97 RS_TOKEN (EQUAL, "=") \
98 RS_TOKEN (EQUAL_EQUAL, "==") \
99 RS_TOKEN (MATCH_ARROW, "=>") \
100 RS_TOKEN (RIGHT_ANGLE, ">") \
101 RS_TOKEN (GREATER_OR_EQUAL, ">=") \
102 RS_TOKEN (RIGHT_SHIFT, ">>") \
103 RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \
104 RS_TOKEN (PATTERN_BIND, "@") \
105 RS_TOKEN (TILDE, "~") \
106 RS_TOKEN (BACKSLASH, "\\") \
107 RS_TOKEN (BACKTICK, "`") \
108 RS_TOKEN (CARET, "^") \
109 RS_TOKEN (CARET_EQ, "^=") \
110 RS_TOKEN (PIPE, "|") \
111 RS_TOKEN (PIPE_EQ, "|=") \
112 RS_TOKEN (OR, "||") \
113 RS_TOKEN (QUESTION_MARK, "?") \
114 RS_TOKEN (HASH, "#") \
115 /* from here on, dodgy and may not be correct. not operators and may be \
116 * symbols */ \
117 /* RS_TOKEN(SPACE, " ") probably too dodgy */ \
118 /* RS_TOKEN(NEWLINE, "\n")*/ \
119 RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \
120 RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \
121 RS_TOKEN (DOUBLE_QUOTE, "\"") \
122 RS_TOKEN (UNDERSCORE, \
123 "_") /* TODO: treat as reserved word like mrustc instead? */ \
124 RS_TOKEN (IDENTIFIER, "identifier") \
125 RS_TOKEN (INT_LITERAL, \
126 "integer literal") /* do different int and float types need \
127 different literal types? */ \
128 RS_TOKEN (FLOAT_LITERAL, "float literal") \
129 RS_TOKEN (STRING_LITERAL, "string literal") \
130 RS_TOKEN (CHAR_LITERAL, "character literal") \
131 RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \
132 RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \
133 RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \
134 /* Have "interpolated" tokens (whatever that means)? identifer, path, type, \
135 * pattern, */ \
136 /* expression, statement, block, meta, item in mrustc (but not directly in \
137 * lexer). */ \
138 RS_TOKEN (LEFT_PAREN, "(") \
139 RS_TOKEN (RIGHT_PAREN, ")") \
140 RS_TOKEN (LEFT_CURLY, "{") \
141 RS_TOKEN (RIGHT_CURLY, "}") \
142 RS_TOKEN (LEFT_SQUARE, "[") \
143 RS_TOKEN (RIGHT_SQUARE, "]") \
144 /* Macros */ \
145 RS_TOKEN (DOLLAR_SIGN, "$") \
146 /* Doc Comments */ \
147 RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \
148 RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \
149 /* have "weak" union and 'static keywords? */ \
150 RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */ \
151 RS_TOKEN_KEYWORD (AS, "as") \
152 RS_TOKEN_KEYWORD (ASYNC, "async") /* unused */ \
153 RS_TOKEN_KEYWORD (BECOME, "become") /* unused */ \
154 RS_TOKEN_KEYWORD (BOX, "box") /* unused */ \
155 RS_TOKEN_KEYWORD (BREAK, "break") \
156 RS_TOKEN_KEYWORD (CONST, "const") \
157 RS_TOKEN_KEYWORD (CONTINUE, "continue") \
158 RS_TOKEN_KEYWORD (CRATE, "crate") \
159 /* FIXME: Do we need to add $crate (DOLLAR_CRATE) as a reserved kw? */ \
160 RS_TOKEN_KEYWORD (DO, "do") /* unused */ \
161 RS_TOKEN_KEYWORD (DYN, "dyn") \
162 RS_TOKEN_KEYWORD (ELSE, "else") \
163 RS_TOKEN_KEYWORD (ENUM_TOK, "enum") \
164 RS_TOKEN_KEYWORD (EXTERN_TOK, "extern") \
165 RS_TOKEN_KEYWORD (FALSE_LITERAL, "false") \
166 RS_TOKEN_KEYWORD (FINAL_TOK, "final") /* unused */ \
167 RS_TOKEN_KEYWORD (FN_TOK, "fn") \
168 RS_TOKEN_KEYWORD (FOR, "for") \
169 RS_TOKEN_KEYWORD (IF, "if") \
170 RS_TOKEN_KEYWORD (IMPL, "impl") \
171 RS_TOKEN_KEYWORD (IN, "in") \
172 RS_TOKEN_KEYWORD (LET, "let") \
173 RS_TOKEN_KEYWORD (LOOP, "loop") \
174 RS_TOKEN_KEYWORD (MACRO, "macro") \
175 RS_TOKEN_KEYWORD (MATCH_TOK, "match") \
176 RS_TOKEN_KEYWORD (MOD, "mod") \
177 RS_TOKEN_KEYWORD (MOVE, "move") \
178 RS_TOKEN_KEYWORD (MUT, "mut") \
179 RS_TOKEN_KEYWORD (OVERRIDE_TOK, "override") /* unused */ \
180 RS_TOKEN_KEYWORD (PRIV, "priv") /* unused */ \
181 RS_TOKEN_KEYWORD (PUB, "pub") \
182 RS_TOKEN_KEYWORD (REF, "ref") \
183 RS_TOKEN_KEYWORD (RETURN_TOK, "return") \
184 RS_TOKEN_KEYWORD (SELF_ALIAS, \
185 "Self") /* mrustc does not treat this as a reserved word*/ \
186 RS_TOKEN_KEYWORD (SELF, "self") \
187 RS_TOKEN_KEYWORD (STATIC_TOK, "static") \
188 RS_TOKEN_KEYWORD (STRUCT_TOK, "struct") \
189 RS_TOKEN_KEYWORD (SUPER, "super") \
190 RS_TOKEN_KEYWORD (TRAIT, "trait") \
191 RS_TOKEN_KEYWORD (TRUE_LITERAL, "true") \
192 RS_TOKEN_KEYWORD (TRY, "try") /* unused */ \
193 RS_TOKEN_KEYWORD (TYPE, "type") \
194 RS_TOKEN_KEYWORD (TYPEOF, "typeof") /* unused */ \
195 RS_TOKEN_KEYWORD (UNSAFE, "unsafe") \
196 RS_TOKEN_KEYWORD (UNSIZED, "unsized") /* unused */ \
197 RS_TOKEN_KEYWORD (USE, "use") \
198 RS_TOKEN_KEYWORD (VIRTUAL, "virtual") /* unused */ \
199 RS_TOKEN_KEYWORD (WHERE, "where") \
200 RS_TOKEN_KEYWORD (WHILE, "while") \
201 RS_TOKEN_KEYWORD (YIELD, "yield") /* unused */ \
202 RS_TOKEN (LAST_TOKEN, "<last-token-marker>")
203
204 // Contains all token types. Crappy implementation via x-macros.
205 enum TokenId
206 {
207 #define RS_TOKEN(name, _) name,
208 #define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
209 RS_TOKEN_LIST
210 #undef RS_TOKEN_KEYWORD
211 #undef RS_TOKEN
212 };
213
214 // dodgy "TokenPtr" declaration with Token forward declaration
215 class Token;
216 // A smart pointer (shared_ptr) to Token.
217 typedef std::shared_ptr<Token> TokenPtr;
218 // A smart pointer (shared_ptr) to a constant Token.
219 typedef std::shared_ptr<const Token> const_TokenPtr;
220
221 // Hackily defined way to get token description for enum value using x-macros
222 const char *
223 get_token_description (TokenId id);
224 /* Hackily defined way to get token description as a string for enum value using
225 * x-macros */
226 const char *
227 token_id_to_str (TokenId id);
228 // Get type hint description as a string.
229 const char *
230 get_type_hint_string (PrimitiveCoreType type);
231
232 // Represents a single token. Create using factory static methods.
233 class Token
234 {
235 private:
236 // Token kind.
237 TokenId token_id;
238 // Token location.
239 Location locus;
240 // Associated text (if any) of token.
241 std::unique_ptr<std::string> str;
242 // TODO: maybe remove issues and just store std::string as value?
243 /* Type hint for token based on lexer data (e.g. type suffix). Does not exist
244 * for most tokens. */
245 PrimitiveCoreType type_hint;
246
247 // Token constructor from token id and location. Has a null string.
248 Token (TokenId token_id, Location location)
249 : token_id (token_id), locus (location), str (nullptr),
250 type_hint (CORETYPE_UNKNOWN)
251 {}
252
253 // Token constructor from token id, location, and a string.
254 Token (TokenId token_id, Location location, std::string &&paramStr)
255 : token_id (token_id), locus (location),
256 str (new std::string (std::move (paramStr))), type_hint (CORETYPE_UNKNOWN)
257 {}
258
259 // Token constructor from token id, location, and a char.
260 Token (TokenId token_id, Location location, char paramChar)
261 : token_id (token_id), locus (location),
262 str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN)
263 {}
264
265 // Token constructor from token id, location, and a "codepoint".
266 Token (TokenId token_id, Location location, Codepoint paramCodepoint)
267 : token_id (token_id), locus (location),
268 str (new std::string (paramCodepoint.as_string ())),
269 type_hint (CORETYPE_UNKNOWN)
270 {}
271
272 // Token constructor from token id, location, a string, and type hint.
273 Token (TokenId token_id, Location location, std::string &&paramStr,
274 PrimitiveCoreType parType)
275 : token_id (token_id), locus (location),
276 str (new std::string (std::move (paramStr))), type_hint (parType)
277 {}
278
279 public:
280 // No default constructor.
281 Token () = delete;
282 // Do not copy/assign tokens.
283 Token (const Token &) = delete;
284 Token &operator= (const Token &) = delete;
285
286 // Allow moving tokens.
287 Token (Token &&other) = default;
288 Token &operator= (Token &&other) = default;
289
290 ~Token () = default;
291
292 /* TODO: make_shared (which saves a heap allocation) does not work with the
293 * private constructor */
294
295 // Makes and returns a new TokenPtr (with null string).
296 static TokenPtr make (TokenId token_id, Location locus)
297 {
298 // return std::make_shared<Token> (token_id, locus);
299 return TokenPtr (new Token (token_id, locus));
300 }
301
302 // Makes and returns a new TokenPtr of type IDENTIFIER.
303 static TokenPtr make_identifier (Location locus, std::string &&str)
304 {
305 // return std::make_shared<Token> (IDENTIFIER, locus, str);
306 return TokenPtr (new Token (IDENTIFIER, locus, std::move (str)));
307 }
308
309 // Makes and returns a new TokenPtr of type INT_LITERAL.
310 static TokenPtr make_int (Location locus, std::string &&str,
311 PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
312 {
313 // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
314 return TokenPtr (
315 new Token (INT_LITERAL, locus, std::move (str), type_hint));
316 }
317
318 // Makes and returns a new TokenPtr of type FLOAT_LITERAL.
319 static TokenPtr make_float (Location locus, std::string &&str,
320 PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
321 {
322 // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
323 return TokenPtr (
324 new Token (FLOAT_LITERAL, locus, std::move (str), type_hint));
325 }
326
327 // Makes and returns a new TokenPtr of type STRING_LITERAL.
328 static TokenPtr make_string (Location locus, std::string &&str)
329 {
330 // return std::make_shared<Token> (STRING_LITERAL, locus, str,
331 // CORETYPE_STR);
332 return TokenPtr (
333 new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR));
334 }
335
336 // Makes and returns a new TokenPtr of type CHAR_LITERAL.
337 static TokenPtr make_char (Location locus, Codepoint char_lit)
338 {
339 // return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit);
340 return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit));
341 }
342
343 // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL.
344 static TokenPtr make_byte_char (Location locus, char byte_char)
345 {
346 // return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char);
347 return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char));
348 }
349
350 // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
351 static TokenPtr make_byte_string (Location locus, std::string &&str)
352 {
353 // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
354 return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str)));
355 }
356
357 // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
358 static TokenPtr make_inner_doc_comment (Location locus, std::string &&str)
359 {
360 return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str)));
361 }
362
363 // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
364 static TokenPtr make_outer_doc_comment (Location locus, std::string &&str)
365 {
366 return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str)));
367 }
368
369 // Makes and returns a new TokenPtr of type LIFETIME.
370 static TokenPtr make_lifetime (Location locus, std::string &&str)
371 {
372 // return std::make_shared<Token> (LIFETIME, locus, str);
373 return TokenPtr (new Token (LIFETIME, locus, std::move (str)));
374 }
375
376 // Gets id of the token.
377 TokenId get_id () const { return token_id; }
378
379 // Gets location of the token.
380 Location get_locus () const { return locus; }
381
382 // Gets string description of the token.
383 const std::string &
384 get_str () const; /*{
385 // FIXME: put in header again when fix null problem
386 //gcc_assert(str != nullptr);
387 if (str == nullptr) {
388 error_at(get_locus(), "attempted to get string for '%s', which has no string.
389 returning empty string instead.", get_token_description()); return "";
390 }
391 return *str;
392 }*/
393
394 // Gets token's type hint info.
395 PrimitiveCoreType get_type_hint () const
396 {
397 return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint;
398 }
399
400 // diagnostics (error reporting)
401 const char *get_token_description () const
402 {
403 return Rust::get_token_description (token_id);
404 }
405
406 // debugging
407 const char *token_id_to_str () const
408 {
409 return Rust::token_id_to_str (token_id);
410 }
411
412 // debugging
413 const char *get_type_hint_str () const;
414
415 /* Returns whether the token is a literal of any type (int, float, char,
416 * string, byte char, byte string). */
417 bool is_literal () const
418 {
419 switch (token_id)
420 {
421 case INT_LITERAL:
422 case FLOAT_LITERAL:
423 case CHAR_LITERAL:
424 case STRING_LITERAL:
425 case BYTE_CHAR_LITERAL:
426 case BYTE_STRING_LITERAL:
427 return true;
428 default:
429 return false;
430 }
431 }
432
433 /* Returns whether the token actually has a string (regardless of whether it
434 * should or not). */
435 bool has_str () const { return str != nullptr; }
436
437 // Returns whether the token should have a string.
438 bool should_have_str () const
439 {
440 return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME;
441 }
442
443 // Returns whether the token is a pure decimal int literal
444 bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; }
445 };
446 } // namespace Rust
447
448 #endif