From: Tom Tromey Date: Tue, 17 Dec 2024 19:35:44 +0000 (-0700) Subject: Don't lex floating-point number in Rust field expression X-Git-Tag: binutils-2_44~255 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=350609bb98cee92c6b4fbd334b99066683b9e5c1;p=thirdparty%2Fbinutils-gdb.git Don't lex floating-point number in Rust field expression Consider this Rust tuple: let tuple_tuple = ((23i32, 24i32), 25i32); Here, the value is a tuple whose first element is also a tuple. You should be able to print this with: (gdb) print tuple_tuple.0.1 However, currently the Rust lexer sees "0.1" as a floating-point number. This patch fixes the problem by introducing a special case in the lexer: when parsing a field expression, the parser informs the lexer that a number should be handled as a decimal integer only. This change then lets us remove the decimal integer special case from lex_number. v2: I realized that the other DECIMAL_INTEGER cases aren't needed any more. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32472 --- diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c index b44f119723f..490adb378b3 100644 --- a/gdb/rust-parse.c +++ b/gdb/rust-parse.c @@ -235,27 +235,31 @@ struct rust_parser std::string super_name (const std::string &ident, unsigned int n_supers); int lex_character (); + int lex_decimal_integer (); int lex_number (); int lex_string (); int lex_identifier (); uint32_t lex_hex (int min, int max); uint32_t lex_escape (bool is_byte); int lex_operator (); - int lex_one_token (); + int lex_one_token (bool decimal_only); void push_back (char c); /* The main interface to lexing. Lexes one token and updates the - internal state. */ - void lex () + internal state. DECIMAL_ONLY is true in the special case where + we want to tell the lexer not to parse a number as a float, but + instead only as a decimal integer. See parse_field. */ + void lex (bool decimal_only = false) { - current_token = lex_one_token (); + current_token = lex_one_token (decimal_only); } - /* Assuming the current token is TYPE, lex the next token. */ - void assume (int type) + /* Assuming the current token is TYPE, lex the next token. + DECIMAL_ONLY is passed to 'lex', which see. */ + void assume (int type, bool decimal_only = false) { gdb_assert (current_token == type); - lex (); + lex (decimal_only); } /* Require the single-character token C, and lex the next token; or @@ -898,6 +902,26 @@ rust_parser::lex_operator () return *pstate->lexptr++; } +/* Lex a decimal integer. */ + +int +rust_parser::lex_decimal_integer () +{ + gdb_assert (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'); + + std::string copy; + while (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') + { + copy.push_back (pstate->lexptr[0]); + ++pstate->lexptr; + } + + /* No need to set the value's type in this situation. */ + current_int_val.val.set (copy.c_str (), 10); + + return DECIMAL_INTEGER; +} + /* Lex a number. */ int @@ -906,7 +930,6 @@ rust_parser::lex_number () regmatch_t subexps[NUM_SUBEXPRESSIONS]; int match; bool is_integer = false; - bool could_be_decimal = true; bool implicit_i32 = false; const char *type_name = NULL; struct type *type; @@ -929,10 +952,7 @@ rust_parser::lex_number () implicit_i32 = true; } else - { - type_index = INT_TYPE; - could_be_decimal = false; - } + type_index = INT_TYPE; } else if (subexps[FLOAT_TYPE1].rm_so != -1) { @@ -968,7 +988,6 @@ rust_parser::lex_number () is_integer = true; end_index = subexps[0].rm_eo; type_name = "i32"; - could_be_decimal = true; implicit_i32 = true; } } @@ -992,9 +1011,7 @@ rust_parser::lex_number () std::string number; for (int i = 0; i < end_index && pstate->lexptr[i]; ++i) { - if (pstate->lexptr[i] == '_') - could_be_decimal = false; - else + if (pstate->lexptr[i] != '_') number.push_back (pstate->lexptr[i]); } @@ -1016,10 +1033,7 @@ rust_parser::lex_number () else if (number[1] == 'b') radix = 2; if (radix != 10) - { - offset = 2; - could_be_decimal = false; - } + offset = 2; } if (!current_int_val.val.set (number.c_str () + offset, radix)) @@ -1049,13 +1063,13 @@ rust_parser::lex_number () gdb_assert (parsed); } - return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT; + return is_integer ? INTEGER : FLOAT; } /* The lexer. */ int -rust_parser::lex_one_token () +rust_parser::lex_one_token (bool decimal_only) { /* Skip all leading whitespace. */ while (pstate->lexptr[0] == ' ' @@ -1082,7 +1096,7 @@ rust_parser::lex_one_token () } if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') - return lex_number (); + return decimal_only ? lex_decimal_integer () : lex_number (); else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'') return lex_character (); else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"') @@ -1537,7 +1551,7 @@ rust_parser::parse_addr () operation_up rust_parser::parse_field (operation_up &&lhs) { - assume ('.'); + assume ('.', true); operation_up result; switch (current_token) @@ -1567,9 +1581,6 @@ rust_parser::parse_field (operation_up &&lhs) } break; - case INTEGER: - error (_("'_' not allowed in integers in anonymous field references")); - default: error (_("field name expected")); } @@ -1663,7 +1674,7 @@ rust_parser::parse_array_type () struct type *elt_type = parse_type (); require (';'); - if (current_token != INTEGER && current_token != DECIMAL_INTEGER) + if (current_token != INTEGER) error (_("integer expected")); ULONGEST val = current_int_val.val.as_integer (); lex (); @@ -2036,7 +2047,6 @@ rust_parser::parse_atom (bool required) break; case INTEGER: - case DECIMAL_INTEGER: result = make_operation (current_int_val.type, current_int_val.val); lex (); @@ -2177,12 +2187,12 @@ rust_lex_test_one (rust_parser *parser, const char *input, int expected) parser->reset (input); - token = parser->lex_one_token (); + token = parser->lex_one_token (false); SELF_CHECK (token == expected); if (token) { - token = parser->lex_one_token (); + token = parser->lex_one_token (false); SELF_CHECK (token == 0); } } @@ -2236,7 +2246,7 @@ rust_lex_test_sequence (rust_parser *parser, const char *input, int len, for (int i = 0; i < len; ++i) { - int token = parser->lex_one_token (); + int token = parser->lex_one_token (false); SELF_CHECK (token == expected[i]); } } @@ -2246,10 +2256,10 @@ rust_lex_test_sequence (rust_parser *parser, const char *input, int len, static void rust_lex_test_trailing_dot (rust_parser *parser) { - const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 }; + const int expected1[] = { INTEGER, '.', IDENT, '(', ')', 0 }; const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 }; const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 }; - const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 }; + const int expected4[] = { INTEGER, DOTDOT, INTEGER, 0 }; rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1); rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2), @@ -2285,16 +2295,16 @@ rust_lex_test_push_back (rust_parser *parser) parser->reset (">>="); - token = parser->lex_one_token (); + token = parser->lex_one_token (false); SELF_CHECK (token == COMPOUND_ASSIGN); SELF_CHECK (parser->current_opcode == BINOP_RSH); parser->push_back ('='); - token = parser->lex_one_token (); + token = parser->lex_one_token (false); SELF_CHECK (token == '='); - token = parser->lex_one_token (); + token = parser->lex_one_token (false); SELF_CHECK (token == 0); } @@ -2352,7 +2362,7 @@ rust_lex_tests (void) rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal"); rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal"); - rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER); + rust_lex_int_test (&parser, "23", 23, INTEGER); rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER); rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER); rust_lex_int_test (&parser, "23usize", 23, INTEGER); @@ -2377,7 +2387,7 @@ rust_lex_tests (void) rust_lex_stringish_test (&parser, "thread", "thread", IDENT); rust_lex_stringish_test (&parser, "r#true", "true", IDENT); - const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 }; + const int expected1[] = { IDENT, INTEGER, 0 }; rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1), expected1); const int expected2[] = { IDENT, '#', 0 }; diff --git a/gdb/testsuite/gdb.rust/simple.exp b/gdb/testsuite/gdb.rust/simple.exp index 37a2e079b6c..e269e413848 100644 --- a/gdb/testsuite/gdb.rust/simple.exp +++ b/gdb/testsuite/gdb.rust/simple.exp @@ -138,7 +138,7 @@ gdb_test "print z.1" " = 8" # Some error checks. gdb_test "print z.1_0" \ - "'_' not allowed in integers in anonymous field references" + "Syntax error near '_0'" gdb_test "print z.mut" "field name expected" gdb_test "print univariant" " = simple::Univariant::Foo{a: 1}" @@ -424,3 +424,6 @@ gdb_test "print \$one = \$two = 75" " = \\\(\\\)" gdb_test "info symbol 0xffffffffffffffff" \ "No symbol matches 0xffffffffffffffff." + +# This used to confound the lexer into thinking "0.1" is a float. +gdb_test "print tuple_tuple.0.1" " = 24" diff --git a/gdb/testsuite/gdb.rust/simple.rs b/gdb/testsuite/gdb.rust/simple.rs index 2dcbea2dee1..d9d6b323fad 100644 --- a/gdb/testsuite/gdb.rust/simple.rs +++ b/gdb/testsuite/gdb.rust/simple.rs @@ -181,6 +181,8 @@ fn main () { let nonzero_offset = EnumWithNonzeroOffset { a: Some(1), b: None }; + let tuple_tuple = ((23i32, 24i32), 25i32); + println!("{}, {}", x.0, x.1); // set breakpoint here println!("{}", diff2(92, 45)); empty();