From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:25:57 +0000 (+0200) Subject: [3.13] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352... X-Git-Tag: v3.13.0b3~134 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=51bcb67405cceee1f18067fb2ae510dec47191bc;p=thirdparty%2FPython%2Fcpython.git [3.13] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) (#120355) (cherry picked from commit 1b62bcee941e54244b3ce6476aef8913604987c9) Co-authored-by: Lysandros Nikolaou Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4428e8cea196..36dba71766cc 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1199,6 +1199,17 @@ async def f(): NAME 'x' (1, 3) (1, 4) """) + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst new file mode 100644 index 000000000000..76714b0c394e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst @@ -0,0 +1 @@ +Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 09fad18b5b4d..2591dae35736 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -36,6 +36,7 @@ typedef struct /* Needed to cache line for performance */ PyObject *last_line; Py_ssize_t last_lineno; + Py_ssize_t last_end_lineno; Py_ssize_t byte_col_offset_diff; } tokenizeriterobject; @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, self->last_line = NULL; self->byte_col_offset_diff = 0; self->last_lineno = 0; + self->last_end_lineno = 0; return (PyObject *)self; } @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - it->byte_col_offset_diff = 0; + if (it->tok->lineno != it->last_end_lineno) { + it->byte_col_offset_diff = 0; + } } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; Py_ssize_t end_lineno = it->tok->lineno; it->last_lineno = lineno; + it->last_end_lineno = end_lineno; Py_ssize_t col_offset = -1; Py_ssize_t end_col_offset = -1;