bpo-44396: Update multi-line-start location when reallocating tokenizer buffers ...

author Pablo Galindo <Pablogsal@gmail.com>

Sat, 12 Jun 2021 17:53:49 +0000 (18:53 +0100)

committer GitHub <noreply@github.com>

Sat, 12 Jun 2021 17:53:49 +0000 (10:53 -0700)
author Pablo Galindo <Pablogsal@gmail.com>
Sat, 12 Jun 2021 17:53:49 +0000 (18:53 +0100)
committer GitHub <noreply@github.com>
Sat, 12 Jun 2021 17:53:49 +0000 (10:53 -0700)
diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py

index 2d3b4ae4e591ef494b501f0816b6fa84d0539ae5..abcbf046e2cc22462a142f12809cec84f0e508ed 100644 (file)
--- a/Lib/test/test_eof.py
+++ b/Lib/test/test_eof.py
@@ -29,6 +29,13 @@ class EOFTestCase(unittest.TestCase):
          else:
              raise support.TestFailed
  
+    def test_EOFS_with_file(self):
+        expect = ("(<string>, line 1)")
+        with os_helper.temp_dir() as temp_dir:
+            file_name = script_helper.make_script(temp_dir, 'foo', """'''this is \na \ntest""")
+            rc, out, err = script_helper.assert_python_failure(file_name)
+        self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
+
      def test_eof_with_line_continuation(self):
          expect = "unexpected EOF while parsing (<string>, line 1)"
          try:
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-11-18-17-42.bpo-44396.Z9EKim.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-11-18-17-42.bpo-44396.Z9EKim.rst

new file mode 100644 (file)

index 0000000..be72a71
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-11-18-17-42.bpo-44396.Z9EKim.rst
@@ -0,0 +1,2 @@
+Fix a possible crash in the tokenizer when raising syntax errors for
+unclosed strings. Patch by Pablo Galindo.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 6002f3e05a890c65f021458321a68a60f6e2301a..be9b13ebabb8e38f21f0e13b9ca4b2b712df08e9 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -372,6 +372,8 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
      if (newsize > tok->end - tok->buf) {
          char *newbuf = tok->buf;
          Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;
+        Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;
+        Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;
          newbuf = (char *)PyMem_Realloc(newbuf, newsize);
          if (newbuf == NULL) {
              tok->done = E_NOMEM;
@@ -382,6 +384,8 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
          tok->inp = tok->buf + oldsize;
          tok->end = tok->buf + newsize;
          tok->start = start < 0 ? NULL : tok->buf + start;
+        tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;
+        tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;
      }
      return 1;
  }
@@ -1883,6 +1887,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
          while (end_quote_size != quote_size) {
              c = tok_nextc(tok);
              if (c == EOF || (quote_size == 1 && c == '\n')) {
+                assert(tok->multi_line_start != NULL);
                  // shift the tok_state's location into
                  // the start of string, and report the error
                  // from the initial quote character
author	Pablo Galindo <Pablogsal@gmail.com>
	Sat, 12 Jun 2021 17:53:49 +0000 (18:53 +0100)
committer	GitHub <noreply@github.com>
	Sat, 12 Jun 2021 17:53:49 +0000 (10:53 -0700)
Lib/test/test_eof.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2021-06-11-18-17-42.bpo-44396.Z9EKim.rst	[new file with mode: 0644]	patch \| blob
Parser/tokenizer.c		patch \| blob \| blame \| history