[3.12] gh-105435: Fix spurious NEWLINE token if file ends with comment without a...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Wed, 7 Jun 2023 13:07:57 +0000 (06:07 -0700)

committer GitHub <noreply@github.com>

Wed, 7 Jun 2023 13:07:57 +0000 (14:07 +0100)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 7 Jun 2023 13:07:57 +0000 (06:07 -0700)
committer GitHub <noreply@github.com>
Wed, 7 Jun 2023 13:07:57 +0000 (14:07 +0100)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index f2847b2fb327f86c0df7caa104ddf50bce80e759..6747b0d8f65a17b1f4806c237df95f4fff859a3b 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1631,13 +1631,34 @@ class TestTokenize(TestCase):
      def test_comment_at_the_end_of_the_source_without_newline(self):
          # See http://bugs.python.org/issue44667
          source = 'b = 1\n\n#test'
-        expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
+        expected_tokens = [
+            TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
+            TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
+            TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
+            TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
+            TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
+            TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
+            TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test\n'),
+            TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test\n'),
+            TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
+        ]
+
+        tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+    def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
+        # See https://github.com/python/cpython/issues/105435
+        source = 'a\n '
+        expected_tokens = [
+            TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
+            TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
+            TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
+            TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' \n'),
+            TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
+        ]
  
          tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
-        self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
-        for i in range(6):
-            self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
-        self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
+        self.assertEqual(tokens, expected_tokens)
  
      def test_invalid_character_in_fstring_middle(self):
          # See gh-103824
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst

new file mode 100644 (file)

index 0000000..9e4d7e1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst
@@ -0,0 +1,2 @@
+Fix spurious newline character if file ends on a comment without a newline.
+Patch by Pablo Galindo
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c

index 4d2179348eed20d768b0bf8e52f8912f3c1a88fb..83a129c138b1740c8e76492d8609a0c17ce0aee9 100644 (file)
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -247,6 +247,17 @@ tokenizeriter_next(tokenizeriterobject *it)
              }
              end_col_offset++;
          }
+        else if (type == NL) {
+            if (it->tok->implicit_newline) {
+                Py_DECREF(str);
+                str = PyUnicode_FromString("");
+            }
+        }
+
+        if (str == NULL) {
+            Py_DECREF(line);
+            goto exit;
+        }
      }
  
      result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Wed, 7 Jun 2023 13:07:57 +0000 (06:07 -0700)
committer	GitHub <noreply@github.com>
	Wed, 7 Jun 2023 13:07:57 +0000 (14:07 +0100)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst	[new file with mode: 0644]	patch \| blob
Python/Python-tokenize.c		patch \| blob \| blame \| history