NAME 'False' (4, 11) (4, 16)
COMMENT '# NEWLINE' (4, 17) (4, 26)
NEWLINE '\\n' (4, 26) (4, 27)
- DEDENT '' (4, 27) (4, 27)
+ DEDENT '' (5, 0) (5, 0)
""")
indent_error_file = b"""\
def k(x):
NEWLINE '\\n' (2, 5) (2, 6)
INDENT ' \\t' (3, 0) (3, 9)
NAME 'pass' (3, 9) (3, 13)
- DEDENT '' (3, 14) (3, 14)
- DEDENT '' (3, 14) (3, 14)
+ DEDENT '' (4, 0) (4, 0)
+ DEDENT '' (4, 0) (4, 0)
""")
def test_non_ascii_identifiers(self):
NUMBER '1' (2, 17) (2, 18)
OP ':' (2, 18) (2, 19)
NAME 'pass' (2, 20) (2, 24)
- DEDENT '' (2, 25) (2, 25)
+ DEDENT '' (3, 0) (3, 0)
""")
self.check_tokenize('''async def foo(async): await''', """\
NAME 'await' (6, 2) (6, 7)
OP '=' (6, 8) (6, 9)
NUMBER '2' (6, 10) (6, 11)
- DEDENT '' (6, 12) (6, 12)
+ DEDENT '' (7, 0) (7, 0)
""")
self.check_tokenize('''\
NAME 'await' (6, 2) (6, 7)
OP '=' (6, 8) (6, 9)
NUMBER '2' (6, 10) (6, 11)
- DEDENT '' (6, 12) (6, 12)
+ DEDENT '' (7, 0) (7, 0)
""")
def test_newline_after_parenthesized_block_with_comment(self):
valid = generate_source(MAXINDENT - 1)
tokens = list(_generate_tokens_from_c_tokenizer(valid))
- self.assertEqual(tokens[-1].type, DEDENT)
+ self.assertEqual(tokens[-2].type, DEDENT)
+ self.assertEqual(tokens[-1].type, ENDMARKER)
compile(valid, "<string>", "exec")
invalid = generate_source(MAXINDENT)
def _tokenize(rl_gen, encoding):
source = b"".join(rl_gen).decode(encoding)
- token = None
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
yield token
- if token is not None:
- last_line, _ = token.start
- yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')
-
def generate_tokens(readline):
"""Tokenize a source reading Python code as unicode strings.
typedef struct
{
PyObject_HEAD struct tok_state *tok;
+ int done;
} tokenizeriterobject;
/*[clinic input]
if (extra_tokens) {
self->tok->tok_extra_tokens = 1;
}
+ self->done = 0;
return (PyObject *)self;
}
}
goto exit;
}
- if (type == ERRORTOKEN || type == ENDMARKER) {
+ if (it->done || type == ERRORTOKEN) {
PyErr_SetString(PyExc_StopIteration, "EOF");
+ it->done = 1;
goto exit;
}
PyObject *str = NULL;
goto exit;
}
+ int is_trailing_token = 0;
+ if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) {
+ is_trailing_token = 1;
+ }
+
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
- Py_ssize_t size = it->tok->inp - line_start;
- PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ PyObject* line = NULL;
+ if (it->tok->tok_extra_tokens && is_trailing_token) {
+ line = PyUnicode_FromString("");
+ } else {
+ Py_ssize_t size = it->tok->inp - line_start;
+ line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ }
if (line == NULL) {
Py_DECREF(str);
goto exit;
}
if (it->tok->tok_extra_tokens) {
+ if (is_trailing_token) {
+ lineno = end_lineno = lineno + 1;
+ col_offset = end_col_offset = 0;
+ }
// Necessary adjustments to match the original Python tokenize
// implementation
if (type > DEDENT && type < OP) {
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
exit:
_PyToken_Free(&token);
+ if (type == ENDMARKER) {
+ it->done = 1;
+ }
return result;
}