]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
gccrs: Implemented UTF-8 checking for include_str!()
authorOwen Avery <powerboat9.gamer@gmail.com>
Sun, 8 Jan 2023 22:19:12 +0000 (17:19 -0500)
committerArthur Cohen <arthur.cohen@embecosm.com>
Thu, 6 Apr 2023 08:47:23 +0000 (10:47 +0200)
gcc/rust/ChangeLog:

* expand/rust-macro-builtins.cc
(MacroBuiltin::include_str_handler): Add check for valid UTF-8.

gcc/testsuite/ChangeLog:

* rust/compile/builtin_macro_include_str.rs:
Include test of invalid UTF-8.
* rust/compile/invalid_utf8: File with invalid UTF-8.

Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
gcc/rust/expand/rust-macro-builtins.cc
gcc/testsuite/rust/compile/builtin_macro_include_str.rs
gcc/testsuite/rust/compile/invalid_utf8 [new file with mode: 0644]

index e594a2500d0f54779bd6fc5e7b33f485364b5304..3b6f69bbd6968db0a78a2f3acff9e0870e05cfe7 100644 (file)
@@ -389,8 +389,55 @@ MacroBuiltin::include_str_handler (Location invoc_locus,
 
   std::vector<uint8_t> bytes = load_file_bytes (target_filename.c_str ());
 
-  /* FIXME: Enforce that the file contents are valid UTF-8.  */
-  std::string str ((const char *) &bytes[0], bytes.size ());
+  /* FIXME: reuse lexer */
+  int expect_single = 0;
+  for (uint8_t b : bytes)
+    {
+      if (expect_single)
+       {
+         if ((b & 0xC0) != 0x80)
+           /* character was truncated, exit with expect_single != 0 */
+           break;
+         expect_single--;
+       }
+      else if (b & 0x80)
+       {
+         if (b >= 0xF8)
+           {
+             /* more than 4 leading 1s */
+             expect_single = 1;
+             break;
+           }
+         else if (b >= 0xF0)
+           {
+             /* 4 leading 1s */
+             expect_single = 3;
+           }
+         else if (b >= 0xE0)
+           {
+             /* 3 leading 1s */
+             expect_single = 2;
+           }
+         else if (b >= 0xC0)
+           {
+             /* 2 leading 1s */
+             expect_single = 1;
+           }
+         else
+           {
+             /* only 1 leading 1 */
+             expect_single = 1;
+             break;
+           }
+       }
+    }
+
+  std::string str;
+  if (expect_single)
+    rust_error_at (invoc_locus, "%s was not a valid utf-8 file",
+                  target_filename.c_str ());
+  else
+    str = std::string ((const char *) &bytes[0], bytes.size ());
 
   auto node = AST::SingleASTNode (make_string (invoc_locus, str));
   auto str_tok = make_token (Token::make_string (invoc_locus, std::move (str)));
index 38f5e3b73349b57571ee1aa4c66dc9d4340f9ccf..8092193195d9dd7e689f66d35b371ce36255ad6d 100644 (file)
@@ -10,4 +10,5 @@ fn main () {
   include_str! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" }
   include_str! ("builtin_macro_include_str.rs"); // ok
   include_str! ("builtin_macro_include_str.rs",); // trailing comma ok
+  include_str! ("invalid_utf8"); // { dg-error "invalid_utf8 was not a valid utf-8 file" "" }
 }
diff --git a/gcc/testsuite/rust/compile/invalid_utf8 b/gcc/testsuite/rust/compile/invalid_utf8
new file mode 100644 (file)
index 0000000..29e181e
--- /dev/null
@@ -0,0 +1 @@
+ΓΏ