.. Auto-generated by Tools/build/generate_token.py
-.. data:: ENDMARKER
-.. data:: NAME
-
-.. data:: NUMBER
-
-.. data:: STRING
-
-.. data:: NEWLINE
-
-.. data:: INDENT
-
-.. data:: DEDENT
-
-.. data:: LPAR
-
- Token value for ``"("``.
-
-.. data:: RPAR
-
- Token value for ``")"``.
-
-.. data:: LSQB
-
- Token value for ``"["``.
-
-.. data:: RSQB
-
- Token value for ``"]"``.
-
-.. data:: COLON
-
- Token value for ``":"``.
-
-.. data:: COMMA
-
- Token value for ``","``.
-
-.. data:: SEMI
-
- Token value for ``";"``.
-
-.. data:: PLUS
-
- Token value for ``"+"``.
-
-.. data:: MINUS
-
- Token value for ``"-"``.
-
-.. data:: STAR
-
- Token value for ``"*"``.
-
-.. data:: SLASH
-
- Token value for ``"/"``.
-
-.. data:: VBAR
-
- Token value for ``"|"``.
-
-.. data:: AMPER
-
- Token value for ``"&"``.
-
-.. data:: LESS
-
- Token value for ``"<"``.
-
-.. data:: GREATER
-
- Token value for ``">"``.
-
-.. data:: EQUAL
-
- Token value for ``"="``.
-
-.. data:: DOT
-
- Token value for ``"."``.
-
-.. data:: PERCENT
-
- Token value for ``"%"``.
-
-.. data:: LBRACE
-
- Token value for ``"{"``.
-
-.. data:: RBRACE
-
- Token value for ``"}"``.
-
-.. data:: EQEQUAL
-
- Token value for ``"=="``.
-
-.. data:: NOTEQUAL
-
- Token value for ``"!="``.
-
-.. data:: LESSEQUAL
-
- Token value for ``"<="``.
-
-.. data:: GREATEREQUAL
-
- Token value for ``">="``.
-
-.. data:: TILDE
-
- Token value for ``"~"``.
-
-.. data:: CIRCUMFLEX
-
- Token value for ``"^"``.
-
-.. data:: LEFTSHIFT
-
- Token value for ``"<<"``.
-
-.. data:: RIGHTSHIFT
-
- Token value for ``">>"``.
-
-.. data:: DOUBLESTAR
-
- Token value for ``"**"``.
-
-.. data:: PLUSEQUAL
-
- Token value for ``"+="``.
-
-.. data:: MINEQUAL
-
- Token value for ``"-="``.
-
-.. data:: STAREQUAL
-
- Token value for ``"*="``.
-
-.. data:: SLASHEQUAL
-
- Token value for ``"/="``.
-
-.. data:: PERCENTEQUAL
-
- Token value for ``"%="``.
-
-.. data:: AMPEREQUAL
-
- Token value for ``"&="``.
-
-.. data:: VBAREQUAL
-
- Token value for ``"|="``.
-
-.. data:: CIRCUMFLEXEQUAL
-
- Token value for ``"^="``.
-
-.. data:: LEFTSHIFTEQUAL
-
- Token value for ``"<<="``.
-
-.. data:: RIGHTSHIFTEQUAL
-
- Token value for ``">>="``.
-
-.. data:: DOUBLESTAREQUAL
-
- Token value for ``"**="``.
-
-.. data:: DOUBLESLASH
-
- Token value for ``"//"``.
-
-.. data:: DOUBLESLASHEQUAL
-
- Token value for ``"//="``.
-
-.. data:: AT
-
- Token value for ``"@"``.
-
-.. data:: ATEQUAL
-
- Token value for ``"@="``.
-
-.. data:: RARROW
-
- Token value for ``"->"``.
-
-.. data:: ELLIPSIS
-
- Token value for ``"..."``.
-
-.. data:: COLONEQUAL
-
- Token value for ``":="``.
-
-.. data:: EXCLAMATION
-
- Token value for ``"!"``.
-
-.. data:: OP
-
-.. data:: TYPE_IGNORE
-
-.. data:: TYPE_COMMENT
-
-.. data:: SOFT_KEYWORD
-
-.. data:: FSTRING_START
-
-.. data:: FSTRING_MIDDLE
-
-.. data:: FSTRING_END
-
-.. data:: COMMENT
-
-.. data:: NL
-
-.. data:: ERRORTOKEN
-
-.. data:: N_TOKENS
-
-.. data:: NT_OFFSET
+.. list-table::
+ :align: left
+ :header-rows: 1
+
+ * - Token
+ - Value
+ * - .. data:: LPAR
+ - ``"("``
+ * - .. data:: RPAR
+ - ``")"``
+ * - .. data:: LSQB
+ - ``"["``
+ * - .. data:: RSQB
+ - ``"]"``
+ * - .. data:: COLON
+ - ``":"``
+ * - .. data:: COMMA
+ - ``","``
+ * - .. data:: SEMI
+ - ``";"``
+ * - .. data:: PLUS
+ - ``"+"``
+ * - .. data:: MINUS
+ - ``"-"``
+ * - .. data:: STAR
+ - ``"*"``
+ * - .. data:: SLASH
+ - ``"/"``
+ * - .. data:: VBAR
+ - ``"|"``
+ * - .. data:: AMPER
+ - ``"&"``
+ * - .. data:: LESS
+ - ``"<"``
+ * - .. data:: GREATER
+ - ``">"``
+ * - .. data:: EQUAL
+ - ``"="``
+ * - .. data:: DOT
+ - ``"."``
+ * - .. data:: PERCENT
+ - ``"%"``
+ * - .. data:: LBRACE
+ - ``"{"``
+ * - .. data:: RBRACE
+ - ``"}"``
+ * - .. data:: EQEQUAL
+ - ``"=="``
+ * - .. data:: NOTEQUAL
+ - ``"!="``
+ * - .. data:: LESSEQUAL
+ - ``"<="``
+ * - .. data:: GREATEREQUAL
+ - ``">="``
+ * - .. data:: TILDE
+ - ``"~"``
+ * - .. data:: CIRCUMFLEX
+ - ``"^"``
+ * - .. data:: LEFTSHIFT
+ - ``"<<"``
+ * - .. data:: RIGHTSHIFT
+ - ``">>"``
+ * - .. data:: DOUBLESTAR
+ - ``"**"``
+ * - .. data:: PLUSEQUAL
+ - ``"+="``
+ * - .. data:: MINEQUAL
+ - ``"-="``
+ * - .. data:: STAREQUAL
+ - ``"*="``
+ * - .. data:: SLASHEQUAL
+ - ``"/="``
+ * - .. data:: PERCENTEQUAL
+ - ``"%="``
+ * - .. data:: AMPEREQUAL
+ - ``"&="``
+ * - .. data:: VBAREQUAL
+ - ``"|="``
+ * - .. data:: CIRCUMFLEXEQUAL
+ - ``"^="``
+ * - .. data:: LEFTSHIFTEQUAL
+ - ``"<<="``
+ * - .. data:: RIGHTSHIFTEQUAL
+ - ``">>="``
+ * - .. data:: DOUBLESTAREQUAL
+ - ``"**="``
+ * - .. data:: DOUBLESLASH
+ - ``"//"``
+ * - .. data:: DOUBLESLASHEQUAL
+ - ``"//="``
+ * - .. data:: AT
+ - ``"@"``
+ * - .. data:: ATEQUAL
+ - ``"@="``
+ * - .. data:: RARROW
+ - ``"->"``
+ * - .. data:: ELLIPSIS
+ - ``"..."``
+ * - .. data:: COLONEQUAL
+ - ``":="``
+ * - .. data:: EXCLAMATION
+ - ``"!"``
The module also provides a mapping from numeric codes to names and some
functions. The functions mirror definitions in the Python C header files.
+Note that a token's value may depend on tokenizer options. For example, a
+``"+"`` token may be reported as either :data:`PLUS` or :data:`OP`, or
+a ``"match"`` token may be either :data:`NAME` or :data:`SOFT_KEYWORD`.
+
.. data:: tok_name
The token constants are:
-.. include:: token-list.inc
+.. data:: NAME
+
+ Token value that indicates an :ref:`identifier <identifiers>`.
+ Note that keywords are also initially tokenized an ``NAME`` tokens.
+
+.. data:: NUMBER
+
+ Token value that indicates a :ref:`numeric literal <numbers>`
+
+.. data:: STRING
+
+ Token value that indicates a :ref:`string or byte literal <strings>`,
+ excluding :ref:`formatted string literals <f-strings>`.
+ The token string is not interpreted:
+ it includes the surrounding quotation marks and the prefix (if given);
+ backslashes are included literally, without processing escape sequences.
-The following token type values aren't used by the C tokenizer but are needed for
-the :mod:`tokenize` module.
+.. data:: OP
+
+ A generic token value that indicates an
+ :ref:`operator <operators>` or :ref:`delimiter <delimiters>`.
+
+ .. impl-detail::
+
+ This value is only reported by the :mod:`tokenize` module.
+ Internally, the tokenizer uses
+ :ref:`exact token types <token_operators_delimiters>` instead.
.. data:: COMMENT
- :noindex:
Token value used to indicate a comment.
+ The parser ignores :data:`!COMMENT` tokens.
+.. data:: NEWLINE
+
+ Token value that indicates the end of a :ref:`logical line <logical-lines>`.
.. data:: NL
- :noindex:
- Token value used to indicate a non-terminating newline. The
- :data:`NEWLINE` token indicates the end of a logical line of Python code;
- ``NL`` tokens are generated when a logical line of code is continued over
- multiple physical lines.
+ Token value used to indicate a non-terminating newline.
+ :data:`!NL` tokens are generated when a logical line of code is continued
+ over multiple physical lines. The parser ignores :data:`!NL` tokens.
+
+.. data:: INDENT
+
+ Token value used at the beginning of a :ref:`logical line <logical-lines>`
+ to indicate the start of an :ref:`indented block <indentation>`.
+
+.. data:: DEDENT
+
+ Token value used at the beginning of a :ref:`logical line <logical-lines>`
+ to indicate the end of an :ref:`indented block <indentation>`.
+
+.. data:: FSTRING_START
+
+ Token value used to indicate the beginning of an
+ :ref:`f-string literal <f-strings>`.
+
+ .. impl-detail::
+
+ The token string includes the prefix and the opening quote(s), but none
+ of the contents of the literal.
+
+.. data:: FSTRING_MIDDLE
+
+ Token value used for literal text inside an :ref:`f-string literal <f-strings>`,
+ including format specifications.
+
+ .. impl-detail::
+
+ Replacement fields (that is, the non-literal parts of f-strings) use
+ the same tokens as other expressions, and are delimited by
+ :data:`LBRACE`, :data:`RBRACE`, :data:`EXCLAMATION` and :data:`COLON`
+ tokens.
+
+.. data:: FSTRING_END
+
+ Token value used to indicate the end of a :ref:`f-string <f-strings>`.
+ .. impl-detail::
+
+ The token string contains the closing quote(s).
+
+.. data:: ENDMARKER
+
+ Token value that indicates the end of input.
+ Used in :ref:`top-level grammar rules <top-level>`.
.. data:: ENCODING
into text. The first token returned by :func:`tokenize.tokenize` will
always be an ``ENCODING`` token.
+ .. impl-detail::
+
+ This token type isn't used by the C tokenizer but is needed for
+ the :mod:`tokenize` module.
+
+
+The following token types are not produced by the :mod:`tokenize` module,
+and are defined for special uses in the tokenizer or parser:
+
+.. data:: TYPE_IGNORE
+
+ Token value indicating that a ``type: ignore`` comment was recognized.
+ Such tokens are produced instead of regular :data:`COMMENT` tokens only
+ with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
.. data:: TYPE_COMMENT
- :noindex:
- Token value indicating that a type comment was recognized. Such
- tokens are only produced when :func:`ast.parse` is invoked with
- ``type_comments=True``.
+ Token value indicating that a type comment was recognized.
+ Such tokens are produced instead of regular :data:`COMMENT` tokens only
+ with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
+
+.. data:: SOFT_KEYWORD
+
+ Token value indicating a :ref:`soft keyword <soft-keywords>`.
+
+ The tokenizer never produces this value.
+ To check for a soft keyword, pass a :data:`NAME` token's string to
+ :func:`keyword.issoftkeyword`.
+
+.. data:: ERRORTOKEN
+ Token value used to indicate wrong input.
+
+ The :mod:`tokenize` module generally indicates errors by
+ raising exceptions instead of emitting this token.
+ It can also emit tokens such as :data:`OP` or :data:`NAME` with strings that
+ are later rejected by the parser.
+
+
+.. _token_operators_delimiters:
+
+The remaining tokens represent specific :ref:`operators <operators>` and
+:ref:`delimiters <delimiters>`.
+(The :mod:`tokenize` module reports these as :data:`OP`; see ``exact_type``
+in the :mod:`tokenize` documentation for details.)
+
+.. include:: token-list.inc
+
+
+The following non-token constants are provided:
+
+.. data:: N_TOKENS
+
+ The number of token types defined in this module.
+
+.. NT_OFFSET is deliberately undocumented; if you need it you should be
+ reading the source
.. data:: EXACT_TOKEN_TYPES
to support parsing older Python versions for :func:`ast.parse` with
``feature_version`` set to 6 or lower).
+.. versionchanged:: 3.12
+ Added :data:`EXCLAMATION`.
+
.. versionchanged:: 3.13
Removed :data:`!AWAIT` and :data:`!ASYNC` tokens again.
.. grammar-snippet::
:group: python-grammar
- file_input: (NEWLINE | `statement`)*
+ file_input: (NEWLINE | `statement`)* ENDMARKER
This syntax is used in the following situations:
.. grammar-snippet::
:group: python-grammar
- interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE
+ interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE | ENDMARKER
Note that a (top-level) compound statement must be followed by a blank line in
interactive mode; this is needed to help the parser detect the end of the input.
:func:`eval` is used for expression input. It ignores leading whitespace. The
string argument to :func:`eval` must have the following form:
-.. productionlist:: python-grammar
- eval_input: `expression_list` NEWLINE*
+.. grammar-snippet::
+ :group: python-grammar
+
+ eval_input: `expression_list` NEWLINE* ENDMARKER
#! /usr/bin/env python3
# This script generates token related files from Grammar/Tokens:
#
-# Doc/library/token-list.inc
-# Include/token.h
-# Parser/token.c
-# Lib/token.py
+# make_rst:
+# Doc/library/token-list.inc
+# Doc/library/token.rst (checked, not generated)
+# make_h:
+# Include/token.h
+# make_c:
+# Parser/token.c
+# make_py:
+# Lib/token.py
+
+import re
SCRIPT_NAME = 'Tools/build/generate_token.py'
token_inc_template = f"""\
.. {AUTO_GENERATED_BY_SCRIPT}
-%s
-.. data:: N_TOKENS
-.. data:: NT_OFFSET
+.. list-table::
+ :align: left
+ :header-rows: 1
+
+ * - Token
+ - Value
+%s
"""
-def make_rst(infile, outfile='Doc/library/token-list.inc'):
+def make_rst(infile, outfile='Doc/library/token-list.inc',
+ rstfile='Doc/library/token.rst'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
tok_to_string = {value: s for s, value in string_to_tok.items()}
+ needs_handwritten_doc = set()
+
names = []
- for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
- names.append('.. data:: %s' % (name,))
+ for value, name in enumerate(tok_names):
if value in tok_to_string:
- names.append('')
- names.append(' Token value for ``"%s"``.' % tok_to_string[value])
- names.append('')
+ assert name.isupper()
+ names.append(f' * - .. data:: {name}')
+ names.append(f' - ``"{tok_to_string[value]}"``')
+ else:
+ needs_handwritten_doc.add(name)
+
+ has_handwritten_doc = set()
+ with open(rstfile) as fileobj:
+ tokendef_re = re.compile(r'.. data:: ([0-9A-Z_]+)\s*')
+ for line in fileobj:
+ if match := tokendef_re.fullmatch(line):
+ has_handwritten_doc.add(match[1])
+
+ # Exclude non-token constants in token.py
+ has_handwritten_doc -= {'N_TOKENS', 'NT_OFFSET', 'EXACT_TOKEN_TYPES'}
+
+ if needs_handwritten_doc != has_handwritten_doc:
+ message_parts = [f'ERROR: {rstfile} does not document all tokens!']
+ undocumented = needs_handwritten_doc - has_handwritten_doc
+ extra = has_handwritten_doc - needs_handwritten_doc
+ if undocumented:
+ message_parts.append(f'Undocumented tokens: {undocumented}')
+ if extra:
+ message_parts.append(f'Documented nonexistent tokens: {extra}')
+ exit('\n'.join(message_parts))
if update_file(outfile, token_inc_template % '\n'.join(names)):
print("%s regenerated from %s" % (outfile, infile))