[3.12] gh-105259: Ensure we don't show newline characters for trailing NEWLINE tokens...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Tue, 6 Jun 2023 12:47:45 +0000 (05:47 -0700)

committer GitHub <noreply@github.com>

Tue, 6 Jun 2023 12:47:45 +0000 (14:47 +0200)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 6 Jun 2023 12:47:45 +0000 (05:47 -0700)
committer GitHub <noreply@github.com>
Tue, 6 Jun 2023 12:47:45 +0000 (14:47 +0200)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index a9a2b7673887c99189cda58c6820ad3b839efea6..5ac17095b185f598e3874eaae9bfcaf845df82b8 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1870,7 +1870,7 @@ class CTokenizeTest(TestCase):
              TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
              TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
              TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
-            TokenInfo(type=NEWLINE, string='\n', start=(1, 3), end=(1, 4), line='1+1\n'),
+            TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'),
              TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
          ]
          for encoding in ["utf-8", "latin-1", "utf-16"]:
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst

new file mode 100644 (file)

index 0000000..75a6303
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst
@@ -0,0 +1,2 @@
+Don't include newline character for trailing ``NEWLINE`` tokens emitted in
+the :mod:`tokenize` module. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index fae613e3a18c1d0ebf41684b4453e5340aa95363..89594e6974fe04fb23df4b9d2cf10eb2d2ec5335 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -114,6 +114,7 @@ tok_new(void)
      tok->report_warnings = 1;
      tok->tok_extra_tokens = 0;
      tok->comment_newline = 0;
+    tok->implicit_newline = 0;
      tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
      tok->tok_mode_stack_index = 0;
      tok->tok_report_warnings = 1;
@@ -355,10 +356,12 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
          return -1;
      }
      strcpy(new_str + current_size, line);
+    tok->implicit_newline = 0;
      if (last_char != '\n') {
          /* Last line does not end in \n, fake one */
          new_str[current_size + line_size - 1] = '\n';
          new_str[current_size + line_size] = '\0';
+        tok->implicit_newline = 1;
      }
      tok->interactive_src_start = new_str;
      tok->interactive_src_end = new_str + current_size + line_size;
@@ -1262,11 +1265,13 @@ tok_underflow_file(struct tok_state *tok) {
          tok->done = E_EOF;
          return 0;
      }
+    tok->implicit_newline = 0;
      if (tok->inp[-1] != '\n') {
          assert(tok->inp + 1 < tok->end);
          /* Last line does not end in \n, fake one */
          *tok->inp++ = '\n';
          *tok->inp = '\0';
+        tok->implicit_newline = 1;
      }
  
      ADVANCE_LINENO();
@@ -1304,11 +1309,13 @@ tok_underflow_readline(struct tok_state* tok) {
          tok->done = E_EOF;
          return 0;
      }
+    tok->implicit_newline = 0;
      if (tok->inp[-1] != '\n') {
          assert(tok->inp + 1 < tok->end);
          /* Last line does not end in \n, fake one */
          *tok->inp++ = '\n';
          *tok->inp = '\0';
+        tok->implicit_newline = 1;
      }
  
      ADVANCE_LINENO();
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h

index 600d4297b6865af02ae74a5c1cd80d82c2d2af47..16e919a8931edd4f6610c6c137de53eb3ce5b923 100644 (file)
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -131,6 +131,7 @@ struct tok_state {
      int tok_report_warnings;
      int tok_extra_tokens;
      int comment_newline;
+    int implicit_newline;
  #ifdef Py_DEBUG
      int debug;
  #endif
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c

index a7933b2d6b01875e3dd435b7c43c1d01c757d33b..223de54d658507792d9d2cb3f8d67698e0ffc5a5 100644 (file)
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -243,10 +243,12 @@ tokenizeriter_next(tokenizeriterobject *it)
          }
          else if (type == NEWLINE) {
              Py_DECREF(str);
-            if (it->tok->start[0] == '\r') {
-                str = PyUnicode_FromString("\r\n");
-            } else {
-                str = PyUnicode_FromString("\n");
+            if (!it->tok->implicit_newline) {
+                if (it->tok->start[0] == '\r') {
+                    str = PyUnicode_FromString("\r\n");
+                } else {
+                    str = PyUnicode_FromString("\n");
+                }
              }
              end_col_offset++;
          }
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Tue, 6 Jun 2023 12:47:45 +0000 (05:47 -0700)
committer	GitHub <noreply@github.com>
	Tue, 6 Jun 2023 12:47:45 +0000 (14:47 +0200)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst	[new file with mode: 0644]	patch \| blob
Parser/tokenizer.c		patch \| blob \| blame \| history
Parser/tokenizer.h		patch \| blob \| blame \| history
Python/Python-tokenize.c		patch \| blob \| blame \| history