gh-120343: Do not reset byte_col_offset_diff after multiline tokens (#120352)

author Lysandros Nikolaou <lisandrosnik@gmail.com>

Tue, 11 Jun 2024 17:00:53 +0000 (19:00 +0200)

committer GitHub <noreply@github.com>

Tue, 11 Jun 2024 17:00:53 +0000 (17:00 +0000)
author Lysandros Nikolaou <lisandrosnik@gmail.com>
Tue, 11 Jun 2024 17:00:53 +0000 (19:00 +0200)
committer GitHub <noreply@github.com>
Tue, 11 Jun 2024 17:00:53 +0000 (17:00 +0000)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 4428e8cea1964cc04f96e25ffaf636a91dbd50ff..36dba71766cc206a4ea212807659a8ca608f2ae8 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1199,6 +1199,17 @@ async def f():
      NAME       'x'           (1, 3) (1, 4)
      """)
  
+    def test_multiline_non_ascii_fstring(self):
+        self.check_tokenize("""\
+a = f'''
+    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
+    NAME       'a'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    FSTRING_START "f\'\'\'"        (1, 4) (1, 8)
+    FSTRING_MIDDLE '\\n    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
+    FSTRING_END "\'\'\'"         (2, 68) (2, 71)
+    """)
+
  class GenerateTokensTest(TokenizeTest):
      def check_tokenize(self, s, expected):
          # Format the tokens in s in a table format.
diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst

new file mode 100644 (file)

index 0000000..76714b0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
@@ -0,0 +1 @@
+Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c

index 09fad18b5b4df73e332dcd2c3c21ffc2ca8f5afb..2591dae35736ba3d7740e14b92df677cb2ed3c19 100644 (file)
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -36,6 +36,7 @@ typedef struct
      /* Needed to cache line for performance */
      PyObject *last_line;
      Py_ssize_t last_lineno;
+    Py_ssize_t last_end_lineno;
      Py_ssize_t byte_col_offset_diff;
  } tokenizeriterobject;
  
@@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
      self->last_line = NULL;
      self->byte_col_offset_diff = 0;
      self->last_lineno = 0;
+    self->last_end_lineno = 0;
  
      return (PyObject *)self;
  }
@@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it)
              Py_XDECREF(it->last_line);
              line = PyUnicode_DecodeUTF8(line_start, size, "replace");
              it->last_line = line;
-            it->byte_col_offset_diff = 0;
+            if (it->tok->lineno != it->last_end_lineno) {
+                it->byte_col_offset_diff = 0;
+            }
          } else {
              // Line hasn't changed so we reuse the cached one.
              line = it->last_line;
@@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it)
      Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
      Py_ssize_t end_lineno = it->tok->lineno;
      it->last_lineno = lineno;
+    it->last_end_lineno = end_lineno;
  
      Py_ssize_t col_offset = -1;
      Py_ssize_t end_col_offset = -1;
author	Lysandros Nikolaou <lisandrosnik@gmail.com>
	Tue, 11 Jun 2024 17:00:53 +0000 (19:00 +0200)
committer	GitHub <noreply@github.com>
	Tue, 11 Jun 2024 17:00:53 +0000 (17:00 +0000)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst	[new file with mode: 0644]	patch \| blob
Python/Python-tokenize.c		patch \| blob \| blame \| history