]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
gccrs: Improve parsing of simple paths
authorOwen Avery <powerboat9.gamer@gmail.com>
Tue, 15 Jul 2025 02:47:07 +0000 (22:47 -0400)
committerArthur Cohen <arthur.cohen@embecosm.com>
Tue, 5 Aug 2025 14:36:56 +0000 (16:36 +0200)
gcc/rust/ChangeLog:

* parse/rust-parse-impl.h (Parser::parse_simple_path): Be more
careful about skipping SCOPE_RESOLUTION tokens.
(Parser::parse_simple_path_segment): Allow parsing from a
starting offset.
(Parser::parse_use_tree): Handle a non-skipped SCOPE_RESOLUTION
token.
* parse/rust-parse.h (Parser::parse_simple_path_segment): Add
parameter for parsing from a starting offset.

gcc/testsuite/ChangeLog:

* rust/compile/parse_simple_path_fail_1.rs: New test.
* rust/compile/parse_simple_path_fail_2.rs: New test.

Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
gcc/rust/parse/rust-parse-impl.h
gcc/rust/parse/rust-parse.h
gcc/testsuite/rust/compile/parse_simple_path_fail_1.rs [new file with mode: 0644]
gcc/testsuite/rust/compile/parse_simple_path_fail_2.rs [new file with mode: 0644]

index 9608cd885f4a73c6809d349216613a5256309839..8233af2b0a324239a2e7fda7fbd3f4180ab3ff2b 100644 (file)
@@ -654,10 +654,7 @@ Parser<ManagedTokenSource>::parse_simple_path ()
   // Parse all other simple path segments
   while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
     {
-      // Skip scope resolution operator
-      lexer.skip_token ();
-
-      AST::SimplePathSegment new_segment = parse_simple_path_segment ();
+      AST::SimplePathSegment new_segment = parse_simple_path_segment (1);
 
       // Return path as currently constructed if segment in error state.
       if (new_segment.is_error ())
@@ -685,35 +682,36 @@ Parser<ManagedTokenSource>::parse_simple_path ()
 }
 
 /* Parses a single SimplePathSegment (does not handle the scope resolution
- * operators) */
+ * operators)
+ * Starts parsing at an offset of base_peek */
 template <typename ManagedTokenSource>
 AST::SimplePathSegment
-Parser<ManagedTokenSource>::parse_simple_path_segment ()
+Parser<ManagedTokenSource>::parse_simple_path_segment (int base_peek)
 {
   using namespace Values;
-  const_TokenPtr t = lexer.peek_token ();
+  const_TokenPtr t = lexer.peek_token (base_peek);
   switch (t->get_id ())
     {
     case IDENTIFIER:
-      lexer.skip_token ();
+      lexer.skip_token (base_peek);
 
       return AST::SimplePathSegment (t->get_str (), t->get_locus ());
     case SUPER:
-      lexer.skip_token ();
+      lexer.skip_token (base_peek);
 
       return AST::SimplePathSegment (Keywords::SUPER, t->get_locus ());
     case SELF:
-      lexer.skip_token ();
+      lexer.skip_token (base_peek);
 
       return AST::SimplePathSegment (Keywords::SELF, t->get_locus ());
     case CRATE:
-      lexer.skip_token ();
+      lexer.skip_token (base_peek);
 
       return AST::SimplePathSegment (Keywords::CRATE, t->get_locus ());
     case DOLLAR_SIGN:
-      if (lexer.peek_token (1)->get_id () == CRATE)
+      if (lexer.peek_token (base_peek + 1)->get_id () == CRATE)
        {
-         lexer.skip_token (1);
+         lexer.skip_token (base_peek + 1);
 
          return AST::SimplePathSegment ("$crate", t->get_locus ());
        }
@@ -2807,58 +2805,10 @@ Parser<ManagedTokenSource>::parse_use_tree ()
     }
   else
     {
-      /* Due to aforementioned implementation issues, the trailing :: token is
-       * consumed by the path, so it can not be used as a disambiguator.
-       * NOPE, not true anymore - TODO what are the consequences of this? */
-
       const_TokenPtr t = lexer.peek_token ();
+
       switch (t->get_id ())
        {
-       case ASTERISK:
-         // glob UseTree type
-         lexer.skip_token ();
-
-         return std::unique_ptr<AST::UseTreeGlob> (
-           new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
-                                 std::move (path), locus));
-       case LEFT_CURLY:
-         {
-           // nested tree UseTree type
-           lexer.skip_token ();
-
-           std::vector<std::unique_ptr<AST::UseTree>> use_trees;
-
-           // TODO: think of better control structure
-           const_TokenPtr t = lexer.peek_token ();
-           while (t->get_id () != RIGHT_CURLY)
-             {
-               std::unique_ptr<AST::UseTree> use_tree = parse_use_tree ();
-               if (use_tree == nullptr)
-                 {
-                   break;
-                 }
-
-               use_trees.push_back (std::move (use_tree));
-
-               if (lexer.peek_token ()->get_id () != COMMA)
-                 break;
-
-               lexer.skip_token ();
-               t = lexer.peek_token ();
-             }
-
-           // skip end curly delimiter
-           if (!skip_token (RIGHT_CURLY))
-             {
-               // skip after somewhere?
-               return nullptr;
-             }
-
-           return std::unique_ptr<AST::UseTreeList> (
-             new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED,
-                                   std::move (path), std::move (use_trees),
-                                   locus));
-         }
        case AS:
          {
            // rebind UseTree type
@@ -2899,16 +2849,72 @@ Parser<ManagedTokenSource>::parse_use_tree ()
 
          // don't skip semicolon - handled in parse_use_tree
          // lexer.skip_token();
-
-         return std::unique_ptr<AST::UseTreeRebind> (
-           new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
-                                   locus));
        case COMMA:
        case RIGHT_CURLY:
          // this may occur in recursive calls - assume it is ok and ignore it
          return std::unique_ptr<AST::UseTreeRebind> (
            new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
                                    locus));
+       case SCOPE_RESOLUTION:
+         // keep going
+         break;
+       default:
+         add_error (Error (t->get_locus (),
+                           "unexpected token %qs in use tree with valid path",
+                           t->get_token_description ()));
+         return nullptr;
+       }
+
+      skip_token ();
+      t = lexer.peek_token ();
+
+      switch (t->get_id ())
+       {
+       case ASTERISK:
+         // glob UseTree type
+         lexer.skip_token ();
+
+         return std::unique_ptr<AST::UseTreeGlob> (
+           new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
+                                 std::move (path), locus));
+       case LEFT_CURLY:
+         {
+           // nested tree UseTree type
+           lexer.skip_token ();
+
+           std::vector<std::unique_ptr<AST::UseTree>> use_trees;
+
+           // TODO: think of better control structure
+           const_TokenPtr t = lexer.peek_token ();
+           while (t->get_id () != RIGHT_CURLY)
+             {
+               std::unique_ptr<AST::UseTree> use_tree = parse_use_tree ();
+               if (use_tree == nullptr)
+                 {
+                   break;
+                 }
+
+               use_trees.push_back (std::move (use_tree));
+
+               if (lexer.peek_token ()->get_id () != COMMA)
+                 break;
+
+               lexer.skip_token ();
+               t = lexer.peek_token ();
+             }
+
+           // skip end curly delimiter
+           if (!skip_token (RIGHT_CURLY))
+             {
+               // skip after somewhere?
+               return nullptr;
+             }
+
+           return std::unique_ptr<AST::UseTreeList> (
+             new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED,
+                                   std::move (path), std::move (use_trees),
+                                   locus));
+         }
        default:
          add_error (Error (t->get_locus (),
                            "unexpected token %qs in use tree with valid path",
index 36426d56f8d73cb079fd96bce401fe41881cf424..7983beb69bc2554781419ea60cd426a384398080 100644 (file)
@@ -227,7 +227,7 @@ private:
 
   // Path-related
   AST::SimplePath parse_simple_path ();
-  AST::SimplePathSegment parse_simple_path_segment ();
+  AST::SimplePathSegment parse_simple_path_segment (int base_peek = 0);
   AST::TypePath parse_type_path ();
   std::unique_ptr<AST::TypePathSegment> parse_type_path_segment ();
   AST::PathIdentSegment parse_path_ident_segment ();
diff --git a/gcc/testsuite/rust/compile/parse_simple_path_fail_1.rs b/gcc/testsuite/rust/compile/parse_simple_path_fail_1.rs
new file mode 100644 (file)
index 0000000..c112e40
--- /dev/null
@@ -0,0 +1,3 @@
+pub(in crate::) struct S;
+// { dg-error "expecting ... but .::. found" "" { target *-*-* } .-1 }
+// { dg-error "failed to parse item in crate" "" { target *-*-* } .-2 }
diff --git a/gcc/testsuite/rust/compile/parse_simple_path_fail_2.rs b/gcc/testsuite/rust/compile/parse_simple_path_fail_2.rs
new file mode 100644 (file)
index 0000000..94c49c3
--- /dev/null
@@ -0,0 +1,9 @@
+mod A {
+    struct B;
+}
+
+use A{B};
+// { dg-error "unexpected token" "" { target *-*-* } .-1 }
+// { dg-error "could not parse use tree" "" { target *-*-* } .-2 }
+// { dg-error "failed to parse item in crate" "" { target *-*-* } 10 }
+// ^^^ TODO: should the above error happen at line 10?