Issue #25388: Fixed tokenizer hang when processing undecodable source code

author Serhiy Storchaka <storchaka@gmail.com>

Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py

index cfc6389b59a7c2c306ae71f387f3986994eb5b76..c166ff16b21e431561c21aae48471d2901fb24b8 100644 (file)
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -3,6 +3,9 @@ import unittest
  import sys
  import _ast
  from test import test_support
+from test import script_helper
+import os
+import tempfile
  import textwrap
  
  class TestSpecifics(unittest.TestCase):
@@ -555,6 +558,19 @@ if 1:
          ast.body = [_ast.BoolOp()]
          self.assertRaises(TypeError, compile, ast, '<ast>', 'exec')
  
+    def test_yet_more_evil_still_undecodable(self):
+        # Issue #25388
+        src = b"#\x00\n#\xfd\n"
+        tmpd = tempfile.mkdtemp()
+        try:
+            fn = os.path.join(tmpd, "bad.py")
+            with open(fn, "wb") as fp:
+                fp.write(src)
+            rc, out, err = script_helper.assert_python_failure(fn)
+        finally:
+            test_support.rmtree(tmpd)
+        self.assertIn(b"Non-ASCII", err)
+
  
  class TestStackSize(unittest.TestCase):
      # These tests check that the computed stack size for a code object
diff --git a/Misc/NEWS b/Misc/NEWS

index 43e0418b4d9eda2c82e3a6d158ca47e9999c25e6..5d30b1a5e2ec84ce1026aecdb0746a952f3cc9ee 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 2.7.11?
  Core and Builtins
  -----------------
  
+- Issue #25388: Fixed tokenizer hang when processing undecodable source code
+  with a null byte.
+
  - Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now
    rejects builtin types with not defined __new__.
  
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 109c0eee928326c9f7e6aa3e3b54d203d6cf0ecd..7e4a300a6f98055b2898c55e40b60e3cce9215de 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -169,7 +169,8 @@ error_ret(struct tok_state *tok) /* XXX */
      tok->decoding_erred = 1;
      if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
          PyMem_FREE(tok->buf);
-    tok->buf = NULL;
+    tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+    tok->done = E_DECODE;
      return NULL;                /* as if it were EOF */
  }
  
@@ -921,7 +922,6 @@ tok_nextc(register struct tok_state *tok)
                  if (tok->buf != NULL)
                      PyMem_FREE(tok->buf);
                  tok->buf = newtok;
-                tok->line_start = tok->buf;
                  tok->cur = tok->buf;
                  tok->line_start = tok->buf;
                  tok->inp = strchr(tok->buf, '\0');
@@ -944,7 +944,8 @@ tok_nextc(register struct tok_state *tok)
                  }
                  if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
                            tok) == NULL) {
-                    tok->done = E_EOF;
+                    if (!tok->decoding_erred)
+                        tok->done = E_EOF;
                      done = 1;
                  }
                  else {
@@ -978,6 +979,8 @@ tok_nextc(register struct tok_state *tok)
                      return EOF;
                  }
                  tok->buf = newbuf;
+                tok->cur = tok->buf + cur;
+                tok->line_start = tok->cur;
                  tok->inp = tok->buf + curvalid;
                  tok->end = tok->buf + newsize;
                  tok->start = curstart < 0 ? NULL :
author	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 14 Nov 2015 13:14:29 +0000 (15:14 +0200)
Lib/test/test_compile.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Parser/tokenizer.c		patch \| blob \| blame \| history