bpo-44667: Treat correctly lines ending with comments and no newlines in the Python...

author Pablo Galindo Salgado <Pablogsal@gmail.com>

Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)

committer GitHub <noreply@github.com>

Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)
author Pablo Galindo Salgado <Pablogsal@gmail.com>
Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)
committer GitHub <noreply@github.com>
Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 681f2c72f9c37854fa2a8648bd79a47d0cd43025..4bce1ca9c76f7cfc8c7d7d1d582c96bad164dd6b 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1458,6 +1458,16 @@ class TestTokenize(TestCase):
          # See http://bugs.python.org/issue16152
          self.assertExactTypeEqual('@          ', token.AT)
  
+    def test_comment_at_the_end_of_the_source_without_newline(self):
+        # See http://bugs.python.org/issue44667
+        source = 'b = 1\n\n#test'
+        expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
+
+        tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
+        self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
+        for i in range(6):
+            self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
+        self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
  
  class UntokenizeTest(TestCase):
  
diff --git a/Lib/tokenize.py b/Lib/tokenize.py

index 42c1f10373de9b5819547f75145bfac79dfd5cee..7d7736fe985981febeda85733b7252e8bca85d18 100644 (file)
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -604,7 +604,7 @@ def _tokenize(readline, encoding):
                  pos += 1
  
      # Add an implicit NEWLINE if the input doesn't end in one
-    if last_line and last_line[-1] not in '\r\n':
+    if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
          yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
      for indent in indents[1:]:                 # pop remaining indent levels
          yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
diff --git a/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst

new file mode 100644 (file)

index 0000000..5b7e20e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst
@@ -0,0 +1,4 @@
+The :func:`tokenize.tokenize` doesn't incorrectly generate a ``NEWLINE``
+token if the source doesn't end with a new line character but the last line
+is a comment, as the function is already generating a ``NL`` token. Patch by
+Pablo Galindo
author	Pablo Galindo Salgado <Pablogsal@gmail.com>
	Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)
committer	GitHub <noreply@github.com>
	Sat, 31 Jul 2021 01:17:09 +0000 (02:17 +0100)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Lib/tokenize.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst	[new file with mode: 0644]	patch \| blob