Number literal evaluation and suffix validation should be done after macro expansion,
so we defer these to the parser phase. This preserves source fidelity for macro token
trees.
gcc/rust/ChangeLog:
* ast/rust-ast-collector.cc (TokenCollector::visit): Update Token::make_int and
Token::make_float calls to include suffix_start and IntegerLiteralBase::Decimal.
* expand/rust-macro-builtins-location.cc (MacroBuiltin::column_handler): Pass string
length and base to Token::make_int.
(MacroBuiltin::line_handler): Likewise.
* lex/rust-lex.cc (Lexer::parse_in_type_suffix): Rename to parse_in_suffix and return
string instead of PrimitiveCoreType.
(Lexer::parse_in_suffix): Remove underscore stripping to preserve source fidelity for
macros.
(Lexer::parse_in_exponent_part): Preserve '+' and '-' characters in the raw string.
(Lexer::parse_in_decimal): Remove underscore stripping.
(Lexer::parse_non_decimal_int_literal): Track suffix start index and pass literal base.
(Lexer::parse_non_decimal_int_literals): Use IntegerLiteralBase enum values instead of
raw integers.
(Lexer::parse_decimal_int_or_float): Track suffix string length and pass base parameters
to token creation.
* lex/rust-lex.h: Update method signatures for suffix parsing.
* lex/rust-token.h (enum class IntegerLiteralBase): New enum to represent numeric bases.
* parse/rust-parse-impl-expr.hxx: use LiteralResolve functions to evaluate raw token
strings.
* parse/rust-parse-impl-pattern.hxx: Use evaluated literal strings for INT and FLOAT
tokens.
* parse/rust-parse.cc (resolve_literal_suffix): Move suffix validation logic from lexer
to parser.
(evaluate_integer_literal): New function to strip underscores and convert to decimal via
GMP.
(evaluate_float_literal): New function to strip underscores from floats.
* parse/rust-parse.h (evaluate_integer_literal): Declare in LiteralResolve namespace.
(evaluate_float_literal): Likewise.
(resolve_literal_suffix): Likewise.
* util/rust-token-converter.cc (from_literal): Safely reconstruct raw text and suffix to
dynamically determine base and suffix_start for ProcMacros.
gcc/testsuite/ChangeLog:
* rust/compile/deferred-suffix-validation.rs: New test.
* rust/compile/evaluate-integer-or-float.rs: New test.
* rust/compile/tuple-index.rs: New test.
Signed-off-by: Enes Cevik <nsvke@proton.me>
push (Rust::Token::make_identifier (tok.get_locus (), std::move (data)));
break;
case INT_LITERAL:
- push (Rust::Token::make_int (tok.get_locus (), std::move (data),
- tok.get_type_hint ()));
- break;
- case FLOAT_LITERAL:
- push (Rust::Token::make_float (tok.get_locus (), std::move (data),
+ {
+ auto suffix_start = data.length ();
+ push (Rust::Token::make_int (tok.get_locus (), std::move (data),
+ suffix_start, IntegerLiteralBase::Decimal,
tok.get_type_hint ()));
- break;
+ break;
+ }
+ case FLOAT_LITERAL:
+ {
+ auto suffix_start = data.length ();
+ push (Rust::Token::make_float (tok.get_locus (), std::move (data),
+ suffix_start, tok.get_type_hint ()));
+ break;
+ }
case STRING_LITERAL:
push (Rust::Token::make_string (tok.get_locus (), std::move (data)));
break;
push (Rust::Token::make_raw_string (locus, std::move (value)));
break;
case Literal::LitType::INT:
- push (
- Rust::Token::make_int (locus, std::move (value), lit.get_type_hint ()));
- break;
- case Literal::LitType::FLOAT:
- push (Rust::Token::make_float (locus, std::move (value),
+ {
+ auto val_len = value.length ();
+ push (Rust::Token::make_int (locus, std::move (value), val_len,
+ IntegerLiteralBase::Decimal,
lit.get_type_hint ()));
- break;
+ break;
+ }
+ case Literal::LitType::FLOAT:
+ {
+ auto val_len = value.length ();
+ push (Rust::Token::make_float (locus, std::move (value), val_len,
+ lit.get_type_hint ()));
+ break;
+ }
case Literal::LitType::BOOL:
{
if (value == Values::Keywords::FALSE_LITERAL)
describe_node (std::string ("TupleIndexExpr"), [this, &expr] () {
visit (expr.get_tuple_expr ());
push (Rust::Token::make (DOT, expr.get_locus ()));
- push (Rust::Token::make_int (UNDEF_LOCATION,
- std::to_string (expr.get_tuple_index ())));
+ auto str = std::to_string (expr.get_tuple_index ());
+ auto suffix_start = str.length ();
+ push (Rust::Token::make_int (UNDEF_LOCATION, str, suffix_start,
+ IntegerLiteralBase::Decimal));
});
}
{
describe_node (std::string ("StructExprFieldIndexValue"), [this, &expr] () {
visit_items_as_lines (expr.get_outer_attrs ());
- push (Rust::Token::make_int (expr.get_locus (),
- std::to_string (expr.get_index ())));
+ auto str = std::to_string (expr.get_index ());
+ auto suffix_start = str.length ();
+ push (Rust::Token::make_int (expr.get_locus (), str, suffix_start,
+ IntegerLiteralBase::Decimal));
push (Rust::Token::make (COLON, UNDEF_LOCATION));
visit (expr.get_value ());
});
describe_node (std::string ("StructPatternFieldTuplePat"), [this,
&pattern] () {
visit_items_as_lines (pattern.get_outer_attrs ());
- push (Rust::Token::make_int (pattern.get_locus (),
- std::to_string (pattern.get_index ())));
+ auto str = std::to_string (pattern.get_index ());
+ auto suffix_start = str.length ();
+ push (Rust::Token::make_int (pattern.get_locus (), str, suffix_start,
+ IntegerLiteralBase::Decimal));
push (Rust::Token::make (COLON, pattern.get_locus ()));
visit (pattern.get_index_pattern ());
});
{
auto current_column = LOCATION_COLUMN (invoc_locus);
+ auto str = std::to_string (current_column);
+ auto str_len = str.length ();
auto column_tok = make_token (
- Token::make_int (invoc_locus, std::to_string (current_column)));
+ Token::make_int (invoc_locus, str, str_len, IntegerLiteralBase::Decimal));
auto column_no = AST::SingleASTNode (std::unique_ptr<AST::Expr> (
new AST::LiteralExpr (std::to_string (current_column), AST::Literal::INT,
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
auto line_no = AST::SingleASTNode (std::unique_ptr<AST::Expr> (
new AST::LiteralExpr (std::to_string (current_line), AST::Literal::INT,
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
- auto tok
- = make_token (Token::make_int (invoc_locus, std::to_string (current_line)));
+ auto str = std::to_string (current_line);
+ auto str_len = str.length ();
+ auto tok = make_token (
+ Token::make_int (invoc_locus, str, str_len, IntegerLiteralBase::Decimal));
return AST::Fragment ({line_no}, std::move (tok));
}
}
}
-// Parses in a type suffix.
-std::pair<PrimitiveCoreType, int>
-Lexer::parse_in_type_suffix ()
+// Parses in a suffix
+std::pair<std::string, int>
+Lexer::parse_in_suffix ()
{
std::string suffix;
- suffix.reserve (5);
int additional_length_offset = 0;
while (ISALPHA (current_char.value) || ISDIGIT (current_char.value)
|| current_char == '_')
{
- if (current_char == '_')
- {
- // don't add _ to suffix
- skip_input ();
- current_char = peek_input ();
-
- additional_length_offset++;
-
- continue;
- }
-
additional_length_offset++;
suffix += current_char;
current_char = peek_input ();
}
- if (suffix.empty ())
- {
- // no type suffix: do nothing but also no error
- return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset);
- }
- else if (suffix == "f32")
- {
- return std::make_pair (CORETYPE_F32, additional_length_offset);
- }
- else if (suffix == "f64")
- {
- return std::make_pair (CORETYPE_F64, additional_length_offset);
- }
- else if (suffix == "i8")
- {
- return std::make_pair (CORETYPE_I8, additional_length_offset);
- }
- else if (suffix == "i16")
- {
- return std::make_pair (CORETYPE_I16, additional_length_offset);
- }
- else if (suffix == "i32")
- {
- return std::make_pair (CORETYPE_I32, additional_length_offset);
- }
- else if (suffix == "i64")
- {
- return std::make_pair (CORETYPE_I64, additional_length_offset);
- }
- else if (suffix == "i128")
- {
- return std::make_pair (CORETYPE_I128, additional_length_offset);
- }
- else if (suffix == "isize")
- {
- return std::make_pair (CORETYPE_ISIZE, additional_length_offset);
- }
- else if (suffix == "u8")
- {
- return std::make_pair (CORETYPE_U8, additional_length_offset);
- }
- else if (suffix == "u16")
- {
- return std::make_pair (CORETYPE_U16, additional_length_offset);
- }
- else if (suffix == "u32")
- {
- return std::make_pair (CORETYPE_U32, additional_length_offset);
- }
- else if (suffix == "u64")
- {
- return std::make_pair (CORETYPE_U64, additional_length_offset);
- }
- else if (suffix == "u128")
- {
- return std::make_pair (CORETYPE_U128, additional_length_offset);
- }
- else if (suffix == "usize")
- {
- return std::make_pair (CORETYPE_USIZE, additional_length_offset);
- }
- else
- {
- rust_error_at (get_current_location (), "unknown number suffix %qs",
- suffix.c_str ());
-
- return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset);
- }
+ return std::make_pair (std::move (suffix), additional_length_offset);
}
// Parses in the exponent part (if any) of a float literal.
additional_length_offset++;
// special - and + handling
- if (current_char == '-')
+ if (current_char == '-' || current_char == '+')
{
- str += '-';
+ str += current_char;
skip_input ();
current_char = peek_input ();
- additional_length_offset++;
- }
- else if (current_char == '+')
- {
- // don't add + but still skip input
- skip_input ();
- current_char = peek_input ();
-
additional_length_offset++;
}
if (current_char == '_')
{
pure_decimal = false;
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- additional_length_offset++;
-
- continue;
}
-
additional_length_offset++;
str += current_char;
template <typename IsDigitFunc>
TokenPtr
Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func,
- int base)
+ IntegerLiteralBase base)
{
- std::string raw_str;
+ std::string raw_str = "0";
+ raw_str += current_char; // x, o, b
+ skip_input ();
int length = 1;
- skip_input ();
current_char = peek_input ();
length++;
// loop through to add entire number to string
while (is_digit_func (current_char.value) || current_char == '_')
{
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
length++;
- // add raw numbers
raw_str += current_char;
skip_input ();
current_char = peek_input ();
}
- // convert value to decimal representation
- mpz_t dec_num;
- mpz_init (dec_num);
- mpz_set_str (dec_num, raw_str.c_str (), base);
- char *s = mpz_get_str (NULL, 10, dec_num);
- std::string dec_str = s;
- free (s);
- mpz_clear (dec_num);
+ int suffix_start = raw_str.length ();
- // parse in type suffix if it exists
- auto type_suffix_pair = parse_in_type_suffix ();
- PrimitiveCoreType type_hint = type_suffix_pair.first;
- length += type_suffix_pair.second;
+ // parse in suffix if it exists
+ auto suffix_pair = parse_in_suffix ();
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
+ raw_str += suffix_pair.first;
+ length += suffix_pair.second;
current_column += length;
- if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
- {
- rust_error_at (get_current_location (),
- "invalid type suffix %qs for integer (%s) literal",
- get_type_hint_string (type_hint),
- base == 16
- ? "hex"
- : (base == 8 ? "octal"
- : (base == 2 ? "binary"
- : "<insert unknown base>")));
- return nullptr;
- }
-
loc += length - 1;
- return Token::make_int (loc, std::move (dec_str), type_hint);
+ return Token::make_int (loc, std::move (raw_str), suffix_start, base,
+ type_hint);
}
// Parses a hex, binary or octal int literal.
if (current_char == 'x')
{
// hex (integer only)
- return parse_non_decimal_int_literal (loc, is_x_digit, 16);
+ return parse_non_decimal_int_literal (loc, is_x_digit,
+ IntegerLiteralBase::Hex);
}
else if (current_char == 'o')
{
// octal (integer only)
- return parse_non_decimal_int_literal (loc, is_octal_digit, 8);
+ return parse_non_decimal_int_literal (loc, is_octal_digit,
+ IntegerLiteralBase::Octal);
}
else if (current_char == 'b')
{
// binary (integer only)
- return parse_non_decimal_int_literal (loc, is_bin_digit, 2);
+ return parse_non_decimal_int_literal (loc, is_bin_digit,
+ IntegerLiteralBase::Binary);
}
else
{
str += exponent_pair.first;
length += exponent_pair.second;
- // parse in type suffix if it exists
- auto type_suffix_pair = parse_in_type_suffix ();
- PrimitiveCoreType type_hint = type_suffix_pair.first;
- length += type_suffix_pair.second;
+ int suffix_start = str.length ();
- if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
- && type_hint != CORETYPE_UNKNOWN)
- {
- rust_error_at (get_current_location (),
- "invalid type suffix %qs for floating-point literal",
- get_type_hint_string (type_hint));
- // ignore invalid type suffix as everything else seems fine
- type_hint = CORETYPE_UNKNOWN;
- }
+ // parse in type suffix if it exists
+ auto suffix_pair = parse_in_suffix ();
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
+ str += suffix_pair.first;
+ length += suffix_pair.second;
current_column += length;
loc += length - 1;
str.shrink_to_fit ();
- return Token::make_float (loc, std::move (str), type_hint);
+ return Token::make_float (loc, std::move (str), suffix_start, type_hint);
}
else if (current_char == '.'
&& check_valid_float_dot_end (peek_input (1).value))
loc += length - 1;
str.shrink_to_fit ();
- return Token::make_float (loc, std::move (str), CORETYPE_UNKNOWN);
+ return Token::make_float (loc, std::move (str), str.length (),
+ CORETYPE_UNKNOWN);
}
else if (current_char == 'E' || current_char == 'e')
{
str += exponent_pair.first;
length += exponent_pair.second;
- // parse in type suffix if it exists
- auto type_suffix_pair = parse_in_type_suffix ();
- PrimitiveCoreType type_hint = type_suffix_pair.first;
- length += type_suffix_pair.second;
+ int suffix_start = str.length ();
- if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
- && type_hint != CORETYPE_UNKNOWN)
- {
- rust_error_at (get_current_location (),
- "invalid type suffix %qs for floating-point literal",
- get_type_hint_string (type_hint));
- // ignore invalid type suffix as everything else seems fine
- type_hint = CORETYPE_UNKNOWN;
- }
+ // parse in type suffix if it exists
+ auto suffix_pair = parse_in_suffix ();
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
+ str += suffix_pair.first;
+ length += suffix_pair.second;
current_column += length;
loc += length - 1;
str.shrink_to_fit ();
- return Token::make_float (loc, std::move (str), type_hint);
+ return Token::make_float (loc, std::move (str), suffix_start, type_hint);
}
else
{
// is an integer
+ int suffix_start = str.length ();
+
// parse in type suffix if it exists
- auto type_suffix_pair = parse_in_type_suffix ();
- PrimitiveCoreType type_hint = type_suffix_pair.first;
+ auto suffix_pair = parse_in_suffix ();
+ str += suffix_pair.first;
+
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
+
/* A "real" pure decimal doesn't have a suffix and no zero prefix. */
- if (type_hint == CORETYPE_UNKNOWN)
- {
- bool pure_decimal = std::get<2> (initial_decimal);
- if (pure_decimal && (!first_zero || str.size () == 1))
- type_hint = CORETYPE_PURE_DECIMAL;
- }
- length += type_suffix_pair.second;
+ bool pure_decimal = std::get<2> (initial_decimal);
+ if (pure_decimal && (!first_zero || suffix_start == 1)
+ && suffix_pair.first.empty ())
+ type_hint = CORETYPE_PURE_DECIMAL;
+
+ length += suffix_pair.second;
current_column += length;
loc += length - 1;
str.shrink_to_fit ();
- return Token::make_int (loc, std::move (str), type_hint);
+ return Token::make_int (loc, std::move (str), suffix_start,
+ IntegerLiteralBase::Decimal, type_hint);
}
}
std::tuple<std::string, int, bool> parse_in_decimal ();
std::pair<std::string, int> parse_in_exponent_part ();
- std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
+ std::pair<std::string, int> parse_in_suffix ();
std::tuple<char, int, bool> parse_escape (char opening_char);
std::tuple<Codepoint, int, bool> parse_utf8_escape ();
int parse_partial_string_continue ();
template <typename IsDigitFunc>
TokenPtr parse_non_decimal_int_literal (location_t loc,
- IsDigitFunc is_digit_func, int base);
+ IsDigitFunc is_digit_func,
+ IntegerLiteralBase base);
public:
// Construct lexer with input file and filename provided
#undef RS_TOKEN
};
+// Holds the base information for integer-literal tokens. For other tokens, its
+// value is 0.
+enum class IntegerLiteralBase : uint8_t
+{
+ None = 0,
+ Binary,
+ Octal,
+ Decimal,
+ Hex,
+};
+
// dodgy "TokenPtr" declaration with Token forward declaration
class Token;
// A smart pointer (shared_ptr) to Token.
* for most tokens. */
PrimitiveCoreType type_hint;
+ // Suffix start index if it exist. Otherwise has token str's length
+ uint16_t suffix_start;
+
+ // Base if it is an integer literal. Otherwise has LITERALBASE_NONE
+ IntegerLiteralBase integer_literal_base;
+
// Token constructor from token id and location. Has a null string.
Token (TokenId token_id, location_t location)
- : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
+ : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN),
+ suffix_start (0), integer_literal_base (IntegerLiteralBase::None)
{}
// Token constructor from token id, location, and a string.
Token (TokenId token_id, location_t location, std::string paramStr)
- : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
+ : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN),
+ integer_literal_base (IntegerLiteralBase::None)
{
// Normalize identifier tokens
str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
+ suffix_start = str.length ();
}
// Token constructor from token id, location, and a char.
Token (TokenId token_id, location_t location, char paramChar)
: token_id (token_id), locus (location), str (1, paramChar),
- type_hint (CORETYPE_UNKNOWN)
+ type_hint (CORETYPE_UNKNOWN), suffix_start (1),
+ integer_literal_base (IntegerLiteralBase::None)
{
// Do not need to normalize 1byte char
}
// Token constructor from token id, location, and a "codepoint".
Token (TokenId token_id, location_t location, Codepoint paramCodepoint)
- : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
+ : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN),
+ integer_literal_base (IntegerLiteralBase::None)
{
// Normalize identifier tokens
str = nfc_normalize_token_string (location, token_id,
paramCodepoint.as_string ());
+ suffix_start = str.length ();
}
// Token constructor from token id, location, a string, and type hint.
Token (TokenId token_id, location_t location, std::string paramStr,
PrimitiveCoreType parType)
- : token_id (token_id), locus (location), type_hint (parType)
+ : token_id (token_id), locus (location), type_hint (parType),
+ integer_literal_base (IntegerLiteralBase::None)
{
// Normalize identifier tokens
str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
+ suffix_start = str.length ();
+ }
+
+ // Token constructor from token id, location, a string, a suffix start index,
+ // a integer base type and type hint.
+ Token (TokenId token_id, location_t location, std::string paramStr,
+ PrimitiveCoreType parType, uint16_t suffix_start,
+ IntegerLiteralBase base)
+ : token_id (token_id), locus (location), str (std::move (paramStr)),
+ type_hint (parType), suffix_start (suffix_start),
+ integer_literal_base (base)
+ {
+ // Do not need to normalize literal str
}
public:
// Makes and returns a new TokenPtr of type INT_LITERAL.
static TokenPtr make_int (location_t locus, std::string str,
+ uint16_t suffix_start, IntegerLiteralBase base,
PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
{
- // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
- return TokenPtr (
- new Token (INT_LITERAL, locus, std::move (str), type_hint));
+ // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint,
+ // suffix_start, base);
+ return TokenPtr (new Token (INT_LITERAL, locus, std::move (str), type_hint,
+ suffix_start, base));
}
// Makes and returns a new TokenPtr of type FLOAT_LITERAL.
- static TokenPtr make_float (location_t locus, std::string str,
- PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
+ static TokenPtr
+ make_float (location_t locus, std::string str, uint16_t suffix_start,
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN,
+ IntegerLiteralBase base = IntegerLiteralBase::None)
{
// return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
- return TokenPtr (
- new Token (FLOAT_LITERAL, locus, std::move (str), type_hint));
+ return TokenPtr (new Token (FLOAT_LITERAL, locus, std::move (str),
+ type_hint, suffix_start, base));
}
// Makes and returns a new TokenPtr of type STRING_LITERAL.
return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint;
}
+ // Gets suffix_start of the token
+ uint16_t get_suffix_start () const { return suffix_start; }
+
+ // Gets literal base of the token
+ IntegerLiteralBase get_literal_base () const { return integer_literal_base; }
+
// diagnostics (error reporting)
const char *get_token_description () const
{
break;
case INT_LITERAL:
type = AST::Literal::INT;
- literal_value = t->get_str ();
+ literal_value = LiteralResolve::evaluate_integer_literal (t);
lexer.skip_token ();
break;
case FLOAT_LITERAL:
type = AST::Literal::FLOAT;
- literal_value = t->get_str ();
+ literal_value = LiteralResolve::evaluate_float_literal (t);
lexer.skip_token ();
break;
// case BOOL_LITERAL
return tl::unexpected<Parse::Error::Node> (Parse::Error::Node::MALFORMED);
}
+ auto type_hint
+ = (t->get_id () == INT_LITERAL || t->get_id () == FLOAT_LITERAL)
+ ? LiteralResolve::resolve_literal_suffix (t)
+ : t->get_type_hint ();
+
// create literal based on stuff in switch
return std::unique_ptr<AST::LiteralExpr> (
new AST::LiteralExpr (std::move (literal_value), std::move (type),
- t->get_type_hint (), std::move (outer_attrs),
- t->get_locus ()));
+ type_hint, std::move (outer_attrs), t->get_locus ()));
}
template <typename ManagedTokenSource>
return tl::unexpected<Parse::Error::Expr> (Parse::Error::Expr::CHILD_ERROR);
if (expr.value () == nullptr)
return tl::unexpected<Parse::Error::Expr> (Parse::Error::Expr::CHILD_ERROR);
-
+
return left_denotations (std::move (expr), right_binding_power,
std::move (outer_attrs), restrictions);
}
case INT_LITERAL:
// we should check the range, but ignore for now
// encode as int?
- return std::unique_ptr<AST::LiteralExpr> (
- new AST::LiteralExpr (tok->get_str (), AST::Literal::INT,
- tok->get_type_hint (), {}, tok->get_locus ()));
+ return std::unique_ptr<AST::LiteralExpr> (new AST::LiteralExpr (
+ LiteralResolve::evaluate_integer_literal (tok), AST::Literal::INT,
+ LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ()));
case FLOAT_LITERAL:
// encode as float?
- return std::unique_ptr<AST::LiteralExpr> (
- new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT,
- tok->get_type_hint (), {}, tok->get_locus ()));
+ return std::unique_ptr<AST::LiteralExpr> (new AST::LiteralExpr (
+ LiteralResolve::evaluate_float_literal (tok), AST::Literal::FLOAT,
+ LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ()));
case STRING_LITERAL:
return std::unique_ptr<AST::LiteralExpr> (
new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING,
auto prefix = str.substr (0, dot_pos);
auto suffix = str.substr (dot_pos + 1);
if (dot_pos == str.size () - 1)
- lexer.split_current_token (
- {Token::make_int (current_loc, std::move (prefix),
- CORETYPE_PURE_DECIMAL),
- Token::make (DOT, current_loc + 1)});
+ {
+ auto prefix_len = prefix.length ();
+ lexer.split_current_token (
+ {Token::make_int (current_loc, std::move (prefix), prefix_len,
+ IntegerLiteralBase::Decimal,
+ CORETYPE_PURE_DECIMAL),
+ Token::make (DOT, current_loc + 1)});
+ }
else
- lexer.split_current_token (
- {Token::make_int (current_loc, std::move (prefix),
- CORETYPE_PURE_DECIMAL),
- Token::make (DOT, current_loc + 1),
- Token::make_int (current_loc + 2, std::move (suffix),
- CORETYPE_PURE_DECIMAL)});
+ {
+ auto prefix_len = prefix.length ();
+ auto suffix_len = suffix.length ();
+ lexer.split_current_token (
+ {Token::make_int (current_loc, std::move (prefix), prefix_len,
+ IntegerLiteralBase::Decimal,
+ CORETYPE_PURE_DECIMAL),
+ Token::make (DOT, current_loc + 1),
+ Token::make_int (current_loc + 2, std::move (suffix),
+ suffix_len, IntegerLiteralBase::Decimal,
+ CORETYPE_PURE_DECIMAL)});
+ }
return parse_tuple_index_expr (tok, std::move (left),
std::move (outer_attrs),
restrictions);
return nullptr;
}
+ std::string literal_value;
+ if (range_lower->get_id () == INT_LITERAL)
+ literal_value = LiteralResolve::evaluate_integer_literal (range_lower);
+ else if (range_lower->get_id () == FLOAT_LITERAL)
+ literal_value = LiteralResolve::evaluate_float_literal (range_lower);
+ else
+ literal_value = range_lower->get_str ();
+
const_TokenPtr next = lexer.peek_token ();
if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS
|| next->get_id () == DOT_DOT)
lexer.skip_token ();
std::unique_ptr<AST::RangePatternBound> lower (
new AST::RangePatternBoundLiteral (
- AST::Literal (range_lower->get_str (), type,
+ AST::Literal (std::move (literal_value), type,
PrimitiveCoreType::CORETYPE_UNKNOWN),
range_lower->get_locus (), has_minus));
else
{
// literal pattern
+
+ auto type_hint = (range_lower->get_id () == INT_LITERAL
+ || range_lower->get_id () == FLOAT_LITERAL)
+ ? LiteralResolve::resolve_literal_suffix (range_lower)
+ : range_lower->get_type_hint ();
+
return std::unique_ptr<AST::LiteralPattern> (
- new AST::LiteralPattern (range_lower->get_str (), type,
- range_lower->get_locus (),
- range_lower->get_type_hint (), has_minus));
+ new AST::LiteralPattern (std::move (literal_value), type,
+ range_lower->get_locus (), type_hint,
+ has_minus));
}
}
lexer.skip_token ();
return std::unique_ptr<AST::RangePatternBoundLiteral> (
new AST::RangePatternBoundLiteral (
- AST::Literal (range_lower->get_str (), AST::Literal::INT,
- range_lower->get_type_hint ()),
+ AST::Literal (LiteralResolve::evaluate_integer_literal (range_lower),
+ AST::Literal::INT,
+ LiteralResolve::resolve_literal_suffix (range_lower)),
range_lower_locus));
case FLOAT_LITERAL:
lexer.skip_token ();
rust_debug ("warning: used deprecated float range pattern bound");
return std::unique_ptr<AST::RangePatternBoundLiteral> (
new AST::RangePatternBoundLiteral (
- AST::Literal (range_lower->get_str (), AST::Literal::FLOAT,
- range_lower->get_type_hint ()),
+ AST::Literal (LiteralResolve::evaluate_float_literal (range_lower),
+ AST::Literal::FLOAT,
+ LiteralResolve::resolve_literal_suffix (range_lower)),
range_lower_locus));
case MINUS:
// branch on next token
lexer.skip_token (1);
return std::unique_ptr<AST::RangePatternBoundLiteral> (
new AST::RangePatternBoundLiteral (
- AST::Literal (range_lower->get_str (), AST::Literal::INT,
- range_lower->get_type_hint ()),
+ AST::Literal (
+ LiteralResolve::evaluate_integer_literal (range_lower),
+ AST::Literal::INT,
+ LiteralResolve::resolve_literal_suffix (range_lower)),
range_lower_locus, true));
case FLOAT_LITERAL:
lexer.skip_token (1);
rust_debug ("warning: used deprecated float range pattern bound");
return std::unique_ptr<AST::RangePatternBoundLiteral> (
new AST::RangePatternBoundLiteral (
- AST::Literal (range_lower->get_str (), AST::Literal::FLOAT,
- range_lower->get_type_hint ()),
+ AST::Literal (
+ LiteralResolve::evaluate_float_literal (range_lower),
+ AST::Literal::FLOAT,
+ LiteralResolve::resolve_literal_suffix (range_lower)),
range_lower_locus, true));
default:
add_error (Error (range_lower->get_locus (),
// FIXME: Does expansion depth/limit matter here?
return is_match_compatible (*new_last, match);
}
+
+namespace LiteralResolve {
+
+PrimitiveCoreType
+resolve_literal_suffix (const_TokenPtr token)
+{
+ const std::string &raw_str = token->get_str ();
+ uint16_t start = token->get_suffix_start ();
+
+ if (start >= raw_str.length ())
+ {
+ return token->is_pure_decimal () ? CORETYPE_PURE_DECIMAL
+ : CORETYPE_UNKNOWN;
+ }
+
+ std::string suffix = raw_str.substr (start);
+
+ if (suffix == "f32" || suffix == "f64")
+ {
+ auto base = token->get_literal_base ();
+ if (base == IntegerLiteralBase::Hex || base == IntegerLiteralBase::Octal
+ || base == IntegerLiteralBase::Binary)
+ {
+ rust_error_at (token->get_locus (),
+ "invalid type suffix %qs for integer (%s) literal",
+ suffix.c_str (),
+ base == IntegerLiteralBase::Hex
+ ? "hex"
+ : (base == IntegerLiteralBase::Octal
+ ? "octal"
+ : (base == IntegerLiteralBase::Binary
+ ? "binary"
+ : "<insert unknown base>")));
+ return CORETYPE_UNKNOWN;
+ }
+ return suffix == "f32" ? CORETYPE_F32 : CORETYPE_F64;
+ }
+ else if (suffix == "i8")
+ {
+ return CORETYPE_I8;
+ }
+ else if (suffix == "i16")
+ {
+ return CORETYPE_I16;
+ }
+ else if (suffix == "i32")
+ {
+ return CORETYPE_I32;
+ }
+ else if (suffix == "i64")
+ {
+ return CORETYPE_I64;
+ }
+ else if (suffix == "i128")
+ {
+ return CORETYPE_I128;
+ }
+ else if (suffix == "isize")
+ {
+ return CORETYPE_ISIZE;
+ }
+ else if (suffix == "u8")
+ {
+ return CORETYPE_U8;
+ }
+ else if (suffix == "u16")
+ {
+ return CORETYPE_U16;
+ }
+ else if (suffix == "u32")
+ {
+ return CORETYPE_U32;
+ }
+ else if (suffix == "u64")
+ {
+ return CORETYPE_U64;
+ }
+ else if (suffix == "u128")
+ {
+ return CORETYPE_U128;
+ }
+ else if (suffix == "usize")
+ {
+ return CORETYPE_USIZE;
+ }
+ else
+
+ rust_error_at (token->get_locus (), "invalid suffix %qs for number literal",
+ suffix.c_str ());
+
+ return CORETYPE_UNKNOWN;
+}
+
+std::string
+evaluate_integer_literal (const_TokenPtr token)
+{
+ const std::string &raw_str = token->get_str ();
+ uint16_t suffix_start = token->get_suffix_start ();
+
+ std::string num_str = raw_str.substr (0, suffix_start);
+
+ num_str.erase (std::remove (num_str.begin (), num_str.end (), '_'),
+ num_str.end ());
+
+ auto base = token->get_literal_base ();
+
+ if (base == IntegerLiteralBase::Decimal || base == IntegerLiteralBase::None)
+ return num_str;
+
+ num_str = num_str.substr (2);
+
+ int base_int = 10;
+ if (base == IntegerLiteralBase::Hex)
+ base_int = 16;
+ else if (base == IntegerLiteralBase::Octal)
+ base_int = 8;
+ else if (base == IntegerLiteralBase::Binary)
+ base_int = 2;
+
+ mpz_t dec_num;
+ mpz_init (dec_num);
+ mpz_set_str (dec_num, num_str.c_str (), base_int);
+ char *s = mpz_get_str (NULL, 10, dec_num);
+ std::string dec_str = s;
+ free (s);
+ mpz_clear (dec_num);
+
+ return dec_str;
+}
+
+std::string
+evaluate_float_literal (const_TokenPtr token)
+{
+ std::string raw_str
+ = token->get_str ().substr (0, token->get_suffix_start ());
+ raw_str.erase (std::remove (raw_str.begin (), raw_str.end (), '_'),
+ raw_str.end ());
+
+ return raw_str;
+}
+
+} // namespace LiteralResolve
} // namespace Rust
*/
bool is_match_compatible (const AST::MacroMatch &last_match,
const AST::MacroMatch ¤t_match);
+
+namespace LiteralResolve {
+
+// Converts a raw string to a decimal number string.
+std::string evaluate_integer_literal (const_TokenPtr token);
+
+// Converts a raw float string to a decimal float number string.
+std::string evaluate_float_literal (const_TokenPtr token);
+
+// Evaluates the suffix of the raw string, if it exists, and returns coretype.
+PrimitiveCoreType resolve_literal_suffix (const_TokenPtr token);
+
+} // namespace LiteralResolve
} // namespace Rust
#endif // RUST_PARSE_H
{
auto lookup = suffixes.lookup (literal.suffix.to_string ());
auto loc = convert (literal.span);
- auto suffix = lookup.value_or (CORETYPE_UNKNOWN);
+ auto type_hint = lookup.value_or (CORETYPE_UNKNOWN);
// FIXME: Add spans instead of empty locations
switch (literal.kind.tag)
{
result.push_back (Token::make_char (loc, literal.text.to_string ()[0]));
break;
case ProcMacro::INTEGER:
- result.push_back (
- Token::make_int (loc, literal.text.to_string (), suffix));
- break;
+ {
+ std::string text = literal.text.to_string ();
+ std::string suffix_str = literal.suffix.to_string ();
+ int suffix_start = text.length ();
+
+ if (!suffix_str.empty ())
+ {
+ bool ends_with_suffix
+ = text.size () >= suffix_str.size ()
+ && text.compare (text.size () - suffix_str.size (),
+ suffix_str.size (), suffix_str)
+ == 0;
+
+ if (!ends_with_suffix)
+ text += suffix_str;
+ else
+ suffix_start = text.length () - suffix_str.length ();
+ }
+ auto base = IntegerLiteralBase::Decimal;
+ if (suffix_start >= 2 && text[0] == '0')
+ {
+ if (text[1] == 'x' || text[1] == 'X')
+ base = Rust::IntegerLiteralBase::Hex;
+ else if (text[1] == 'o' || text[1] == 'O')
+ base = Rust::IntegerLiteralBase::Octal;
+ else if (text[1] == 'b' || text[1] == 'B')
+ base = Rust::IntegerLiteralBase::Binary;
+ }
+
+ result.push_back (
+ Token::make_int (loc, text, suffix_start, base, type_hint));
+ break;
+ }
case ProcMacro::FLOAT:
- result.push_back (
- Token::make_float (loc, literal.text.to_string (), suffix));
- break;
+ {
+ std::string text = literal.text.to_string ();
+ std::string suffix_str = literal.suffix.to_string ();
+ auto suffix_start = text.length ();
+ if (!suffix_str.empty ())
+ {
+ bool ends_with_suffix
+ = text.size () >= suffix_str.size ()
+ && text.compare (text.size () - suffix_str.size (),
+ suffix_str.size (), suffix_str)
+ == 0;
+
+ if (!ends_with_suffix)
+ {
+ text += suffix_str;
+ }
+ else
+ {
+ suffix_start = text.length () - suffix_str.length ();
+ }
+ }
+
+ result.push_back (Token::make_float (loc, text, suffix_start, type_hint,
+ IntegerLiteralBase::Decimal));
+ break;
+ }
case ProcMacro::STR:
result.push_back (Token::make_string (loc, literal.text.to_string ()));
break;
}
/**
- * Iterate over a Group and append all inner tokens to a vector enclosed by its
- * delimiters.
+ * Iterate over a Group and append all inner tokens to a vector enclosed by
+ * its delimiters.
*
* @param g Reference to the Group to convert.
* @param result Reference to the vector tokens should be appended to.
--- /dev/null
+#![feature(no_core)]
+#![no_core]
+
+macro_rules! foo {
+ ($x:tt) => {};
+}
+
+fn main() {
+ foo!(123invalid);
+ foo!(3.14_f128_invalid);
+ foo!(0x1h12);
+ foo!(0o1h13);
+ foo!(0b1h14);
+
+ let _ = 123invalid; // { dg-error "invalid suffix .invalid. for number literal" }
+}
--- /dev/null
+// { dg-options "-frust-dump-lex" }
+#![feature(no_core)]
+#![no_core]
+
+fn main() {
+ let _a: u32 = 1_000u32;
+ let _b: u8 = 0xFF_u8;
+ let _c: f64 = 3.14_15_f64;
+
+ let _d = 0b1010;
+ let _e = 10.5;
+
+ let _f = 2.71e+10_f32;
+}
+
+// { dg-final { scan-file gccrs.lex.dump "1_000u32" } }
+// { dg-final { scan-file gccrs.lex.dump "0xFF_u8" } }
+// { dg-final { scan-file gccrs.lex.dump "3.14_15_f64" } }
+// { dg-final { scan-file gccrs.lex.dump "0b1010" } }
+// { dg-final { scan-file gccrs.lex.dump "10.5" } }
+// { dg-final { scan-file gccrs.lex.dump "2.71e\\+10_f32" } }
--- /dev/null
+#![feature(no_core)]
+#![no_core]
+
+fn main() {
+ let t = (10, 20);
+ let _a = t.0;
+ let _b = t.1;
+
+ struct S(u8, u8);
+ let s = S(1, 2);
+ let _c = s.0;
+}