gh-103824: fix use-after-free error in Parser/tokenizer.c (#103993)

author chgnrdv <52372310+chgnrdv@users.noreply.github.com>

Mon, 1 May 2023 15:26:43 +0000 (18:26 +0300)

committer GitHub <noreply@github.com>

Mon, 1 May 2023 15:26:43 +0000 (15:26 +0000)
author chgnrdv <52372310+chgnrdv@users.noreply.github.com>
Mon, 1 May 2023 15:26:43 +0000 (18:26 +0300)
committer GitHub <noreply@github.com>
Mon, 1 May 2023 15:26:43 +0000 (15:26 +0000)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 283a7c23609e67d2893c1b2de440b794aab3f0e2..911b53e58165881425bf1217f2ffffd126032829 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -11,7 +11,7 @@ from unittest import TestCase, mock
  from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
                                 INVALID_UNDERSCORE_LITERALS)
  from test.support import os_helper
-from test.support.script_helper import run_test_script, make_script
+from test.support.script_helper import run_test_script, make_script, run_python_until_end
  import os
  import token
  
@@ -1470,6 +1470,19 @@ class TestTokenize(TestCase):
              self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
          self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
  
+    def test_invalid_character_in_fstring_middle(self):
+        # See gh-103824
+        script = b'''F"""
+        \xe5"""'''
+
+        with os_helper.temp_dir() as temp_dir:
+            filename = os.path.join(temp_dir, "script.py")
+            with open(filename, 'wb') as file:
+                file.write(script)
+            rs, _ = run_python_until_end(filename)
+            self.assertIn(b"SyntaxError", rs.err)
+
+
  class UntokenizeTest(TestCase):
  
      def test_bad_input_order(self):
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 8de0572a1fc459fa81a4b5b52aaf369446bb77c8..8fb9be7bfd0182ba1b3ff29fcb71ab437cc78b81 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2552,6 +2552,10 @@ f_string_middle:
      while (end_quote_size != current_tok->f_string_quote_size) {
          int c = tok_nextc(tok);
          if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+            if (tok->decoding_erred) {
+                return MAKE_TOKEN(ERRORTOKEN);
+            }
+
              assert(tok->multi_line_start != NULL);
              // shift the tok_state's location into
              // the start of string, and report the error
author	chgnrdv <52372310+chgnrdv@users.noreply.github.com>
	Mon, 1 May 2023 15:26:43 +0000 (18:26 +0300)
committer	GitHub <noreply@github.com>
	Mon, 1 May 2023 15:26:43 +0000 (15:26 +0000)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Parser/tokenizer.c		patch \| blob \| blame \| history