gh-104825: Remove implicit newline in the line attribute in tokens emitted in the...

author Pablo Galindo Salgado <Pablogsal@gmail.com>

Wed, 24 May 2023 09:59:18 +0000 (10:59 +0100)

committer GitHub <noreply@github.com>

Wed, 24 May 2023 09:59:18 +0000 (09:59 +0000)
author Pablo Galindo Salgado <Pablogsal@gmail.com>
Wed, 24 May 2023 09:59:18 +0000 (10:59 +0100)
committer GitHub <noreply@github.com>
Wed, 24 May 2023 09:59:18 +0000 (09:59 +0000)
diff --git a/Lib/idlelib/idle_test/test_editor.py b/Lib/idlelib/idle_test/test_editor.py

index 9296a6d235fbbe3b61c96fe19c6cb91734949330..ba59c40dc6dde5012f873ee9af8f875959090e57 100644 (file)
--- a/Lib/idlelib/idle_test/test_editor.py
+++ b/Lib/idlelib/idle_test/test_editor.py
@@ -201,8 +201,8 @@ class IndentSearcherTest(unittest.TestCase):
          test_info = (# text, (block, indent))
                       ("", (None, None)),
                       ("[1,", (None, None)),  # TokenError
-                     ("if 1:\n", ('if 1:\n', None)),
-                     ("if 1:\n  2\n  3\n", ('if 1:\n', '  2\n')),
+                     ("if 1:\n", ('if 1:', None)),
+                     ("if 1:\n  2\n  3\n", ('if 1:', '  2')),
                       )
          for code, expected_pair in test_info:
              with self.subTest(code=code):
diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py

index aa700118f735d904113d5ff498f80ee9b7866c0c..cc122cafc7985cb05d789e8ba52d574c4ef9a890 100644 (file)
--- a/Lib/test/test_tabnanny.py
+++ b/Lib/test/test_tabnanny.py
@@ -222,7 +222,7 @@ class TestCheck(TestCase):
          """
          with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
              out = f"{file_path!r}: *** Line 3: trouble in tab city! ***\n"
-            out += "offending line: '\\tprint(\"world\")\\n'\n"
+            out += "offending line: '\\tprint(\"world\")'\n"
              out += "inconsistent use of tabs and spaces in indentation\n"
  
              tabnanny.verbose = 1
@@ -231,7 +231,7 @@ class TestCheck(TestCase):
      def test_when_nannynag_error(self):
          """A python source code file eligible for raising `tabnanny.NannyNag`."""
          with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
-            out = f"{file_path} 3 '\\tprint(\"world\")\\n'\n"
+            out = f"{file_path} 3 '\\tprint(\"world\")'\n"
              self.verify_tabnanny_check(file_path, out=out)
  
      def test_when_no_file(self):
@@ -341,7 +341,7 @@ class TestCommandLine(TestCase):
          """Should display more error information if verbose mode is on."""
          with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
              stdout = textwrap.dedent(
-                "offending line: '\\tprint(\"world\")\\n'"
+                "offending line: '\\tprint(\"world\")'"
              ).strip()
              self.validate_cmd("-v", path, stdout=stdout, partial=True)
  
@@ -349,6 +349,6 @@ class TestCommandLine(TestCase):
          """Should display detailed error information if double verbose is on."""
          with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
              stdout = textwrap.dedent(
-                "offending line: '\\tprint(\"world\")\\n'"
+                "offending line: '\\tprint(\"world\")'"
              ).strip()
              self.validate_cmd("-vv", path, stdout=stdout, partial=True)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 8e7ab3d4b7b57832dc5c7e1cb2870b389b548967..fd9c919ce6a0d10d780da7d78fa1de306aedfff4 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -103,7 +103,7 @@ def k(x):
              e.exception.msg,
              'unindent does not match any outer indentation level')
          self.assertEqual(e.exception.offset, 9)
-        self.assertEqual(e.exception.text, '  x += 5\n')
+        self.assertEqual(e.exception.text, '  x += 5')
  
      def test_int(self):
          # Ordinary integers and binary operators
@@ -1157,7 +1157,7 @@ class Test_Tokenize(TestCase):
  
          # skip the initial encoding token and the end tokens
          tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
-        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
+        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
          self.assertEqual(tokens, expected_tokens,
                           "bytes not decoded with encoding")
  
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst

new file mode 100644 (file)

index 0000000..caf5d35
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst
@@ -0,0 +1,2 @@
+Tokens emitted by the :mod:`tokenize` module do not include an implicit
+``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c

index f7e32d3af9a9f7796348f97cf827d23f0eb31650..0023e303b96e836230630e0dcac8d8fd38f3718c 100644 (file)
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok)
      int result = 0;
  
      Py_ssize_t size = tok->inp - tok->buf;
+    assert(tok->buf[size-1] == '\n');
+    size -= 1; // Remove the newline character from the end of the line
      error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
      if (!error_line) {
          result = -1;
@@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it)
      }
  
      Py_ssize_t size = it->tok->inp - it->tok->buf;
+    assert(it->tok->buf[size-1] == '\n');
+    size -= 1; // Remove the newline character from the end of the line
      PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
      if (line == NULL) {
          Py_DECREF(str);
author	Pablo Galindo Salgado <Pablogsal@gmail.com>
	Wed, 24 May 2023 09:59:18 +0000 (10:59 +0100)
committer	GitHub <noreply@github.com>
	Wed, 24 May 2023 09:59:18 +0000 (09:59 +0000)
Lib/idlelib/idle_test/test_editor.py		patch \| blob \| blame \| history
Lib/test/test_tabnanny.py		patch \| blob \| blame \| history
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst	[new file with mode: 0644]	patch \| blob
Python/Python-tokenize.c		patch \| blob \| blame \| history