**undefined** when providing invalid Python code and it can change at any
point.
-Tokenizing Input
+Tokenizing input
----------------
The primary entry point is a :term:`generator`:
.. _tokenize-cli:
-Command-Line Usage
+Command-line usage
------------------
.. versionadded:: 3.3
If :file:`filename.py` is specified its contents are tokenized to stdout.
Otherwise, tokenization is performed on stdin.
+.. versionadded:: next
+ Output is in color by default and can be
+ :ref:`controlled using environment variables <using-on-controlling-color>`.
+
Examples
-------------------
+--------
Example of a script rewriter that transforms float literals into Decimal
objects::
will be tokenized to the following output where the first column is the range
of the line/column coordinates where the token is found, the second column is
-the name of the token, and the final column is the value of the token (if any)
+the name of the token, and the final column is the value of the token (if any):
.. code-block:: shell-session
reset: str = ANSIColors.RESET
+@dataclass(frozen=True, kw_only=True)
+class Tokenize(ThemeSection):
+ whitespace: str = ANSIColors.GREY
+ error: str = ANSIColors.BOLD_RED
+ position: str = ANSIColors.GREY
+ delimiter: str = ANSIColors.RESET
+
+
@dataclass(frozen=True, kw_only=True)
class Traceback(ThemeSection):
type: str = ANSIColors.BOLD_MAGENTA
live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
syntax: Syntax = field(default_factory=Syntax)
timeit: Timeit = field(default_factory=Timeit)
+ tokenize: Tokenize = field(default_factory=Tokenize)
traceback: Traceback = field(default_factory=Traceback)
unittest: Unittest = field(default_factory=Unittest)
live_profiler: LiveProfiler | None = None,
syntax: Syntax | None = None,
timeit: Timeit | None = None,
+ tokenize: Tokenize | None = None,
traceback: Traceback | None = None,
unittest: Unittest | None = None,
) -> Self:
live_profiler=live_profiler or self.live_profiler,
syntax=syntax or self.syntax,
timeit=timeit or self.timeit,
+ tokenize=tokenize or self.tokenize,
traceback=traceback or self.traceback,
unittest=unittest or self.unittest,
)
live_profiler=LiveProfiler.no_colors(),
syntax=Syntax.no_colors(),
timeit=Timeit.no_colors(),
+ tokenize=Tokenize.no_colors(),
traceback=Traceback.no_colors(),
unittest=Unittest.no_colors(),
)
from token import *
from token import EXACT_TOKEN_TYPES
import _tokenize
+lazy import _colorize
cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
"""
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
+
+def _get_token_colors(syntax, tokenize):
+ """Map token type numbers to theme colors."""
+ return frozendict({
+ COMMENT: syntax.comment,
+ DEDENT: tokenize.whitespace,
+ ENCODING: tokenize.whitespace,
+ ENDMARKER: tokenize.whitespace,
+ ERRORTOKEN: tokenize.error,
+ FSTRING_START: syntax.string,
+ FSTRING_MIDDLE: syntax.string,
+ FSTRING_END: syntax.string,
+ INDENT: tokenize.whitespace,
+ NAME: syntax.reset,
+ NEWLINE: tokenize.whitespace,
+ NL: tokenize.whitespace,
+ NUMBER: syntax.number,
+ OP: syntax.op,
+ SOFT_KEYWORD: syntax.soft_keyword,
+ STRING: syntax.string,
+ TSTRING_START: syntax.string,
+ TSTRING_MIDDLE: syntax.string,
+ TSTRING_END: syntax.string,
+ })
+
+
+def _format_tokens(tokens, *, color=False, exact=False):
+ theme = _colorize.get_theme(force_no_color=not color)
+ s = theme.syntax
+ t = theme.tokenize
+ token_colors = _get_token_colors(s, t)
+ for token in tokens:
+ token_range = (
+ f"{t.position}{token.start[0]}"
+ f"{t.delimiter},{t.position}{token.start[1]}"
+ f"{t.delimiter}-"
+ f"{t.position}{token.end[0]}"
+ f"{t.delimiter},{t.position}{token.end[1]}"
+ f"{t.delimiter}:"
+ )
+ token_color = token_colors.get(token.type, s.reset)
+ token_name = tok_name[token.exact_type if exact else token.type]
+ visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
+ yield (
+ f"{token_range}{' ' * (20 - len(visible_range))}"
+ f"{token_color}{token_name:<15}"
+ f"{s.reset}{token.string!r:<15}"
+ )
+
+
def _main(args=None):
import argparse
sys.exit(1)
# Parse the arguments and options
- parser = argparse.ArgumentParser(color=True)
+ parser = argparse.ArgumentParser()
parser.add_argument(dest='filename', nargs='?',
metavar='filename.py',
help='the file to tokenize; defaults to stdin')
# Output the tokenization
- for token in tokens:
- token_type = token.type
- if args.exact:
- token_type = token.exact_type
- token_range = "%d,%d-%d,%d:" % (token.start + token.end)
- print("%-20s%-15s%-15r" %
- (token_range, tok_name[token_type], token.string))
+ for line in _format_tokens(tokens, color=True, exact=args.exact):
+ print(line)
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))