.. module:: tomllib
:synopsis: Parse TOML files.
-.. versionadded:: 3.11
-
.. moduleauthor:: Taneli Hukkinen
.. sectionauthor:: Taneli Hukkinen
--------------
-This module provides an interface for parsing TOML 1.0.0 (Tom's Obvious Minimal
+This module provides an interface for parsing TOML 1.1.0 (Tom's Obvious Minimal
Language, `https://toml.io <https://toml.io/en/>`_). This module does not
support writing TOML.
+.. versionadded:: 3.11
+ The module was added with support for TOML 1.0.0.
+
+.. versionchanged:: next
+ Added TOML 1.1.0 support.
+ See the :ref:`What's New <whatsnew315-tomllib-1-1-0>` for details.
+
+
.. seealso::
The :pypi:`Tomli-W package <tomli-w>`
with outdated names.
(Contributed by Serhiy Storchaka in :gh:`143754`)
+
+.. _whatsnew315-tomllib-1-1-0:
+
+tomllib
+-------
+
+* The :mod:`tomllib` module now supports TOML 1.1.0.
+ This is a backwards compatible update, meaning that all valid TOML 1.0.0
+ documents are parsed the same way.
+
+ The changes, according to the `official TOML changelog`_, are:
+
+ - Allow newlines and trailing commas in inline tables.
+
+ Previously an inline table had to be on a single line and couldn't end
+ with a trailing comma. This is now relaxed so that the following is valid:
+
+ .. syntax highlighting needs TOML 1.1.0 support in Pygments,
+ see https://github.com/pygments/pygments/issues/3026
+
+ .. code-block:: text
+
+ tbl = {
+ key = "a string",
+ moar-tbl = {
+ key = 1,
+ },
+ }
+
+ - Add ``\xHH`` notation to basic strings for codepoints under 255,
+ and the ``\e`` escape for the escape character:
+
+ .. code-block:: text
+
+ null = "null byte: \x00; letter a: \x61"
+ csi = "\e["
+
+ - Seconds in datetime and time values are now optional.
+ The following are now valid:
+
+ .. code-block:: text
+
+ dt = 2010-02-03 14:15
+ t = 14:15
+
+ (Contributed by Taneli Hukkinen in :gh:`142956`.)
+
+.. _official TOML changelog: https://github.com/toml-lang/toml/blob/main/CHANGELOG.md
+
+
types
------
import datetime
from typing import Any
-# Aliases for converting TOML compliance format [1] to BurntSushi format [2]
-# [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501
-# [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501
-_aliases = {
- "boolean": "bool",
- "offset datetime": "datetime",
- "local datetime": "datetime-local",
- "local date": "date-local",
- "local time": "time-local",
-}
-
-
-def convert(obj): # noqa: C901
+
+def convert(obj):
if isinstance(obj, str):
return {"type": "string", "value": obj}
elif isinstance(obj, bool):
def normalize(obj: Any) -> Any:
"""Normalize test objects.
- This normalizes primitive values (e.g. floats), and also converts from
- TOML compliance format [1] to BurntSushi format [2].
-
- [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501
- [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501
- """
+ This normalizes primitive values (e.g. floats)."""
if isinstance(obj, list):
return [normalize(item) for item in obj]
if isinstance(obj, dict):
if "type" in obj and "value" in obj:
type_ = obj["type"]
- norm_type = _aliases.get(type_, type_)
value = obj["value"]
- if norm_type == "float":
+ if type_ == "float":
norm_value = _normalize_float_str(value)
- elif norm_type in {"datetime", "datetime-local"}:
+ elif type_ in {"datetime", "datetime-local"}:
norm_value = _normalize_datetime_str(value)
- elif norm_type == "time-local":
+ elif type_ == "time-local":
norm_value = _normalize_localtime_str(value)
else:
norm_value = value
- if norm_type == "array":
+ if type_ == "array":
return [normalize(item) for item in value]
- return {"type": norm_type, "value": norm_value}
+ return {"type": type_, "value": norm_value}
return {k: normalize(v) for k, v in obj.items()}
raise AssertionError("Burntsushi fixtures should be dicts/lists only")
{
"local-dt": {"type":"datetime-local","value":"1988-10-27t01:01:01"},
+ "local-dt-no-seconds": {"type":"datetime-local","value":"2025-04-18t20:05:00"},
"zulu-dt": {"type":"datetime","value":"1988-10-27t01:01:01z"}
}
local-dt=1988-10-27t01:01:01
+local-dt-no-seconds=2025-04-18T20:05
zulu-dt=1988-10-27t01:01:01z
{"t":
- {"type":"time-local","value":"00:00:00.999999"}}
+ {"type":"time-local","value":"00:00:00.999999"},
+"t2":
+ {"type":"time-local","value":"00:00:00"}}
-t=00:00:00.99999999999999
\ No newline at end of file
+t=00:00:00.99999999999999
+t2=00:00
\ No newline at end of file
--- /dev/null
+{
+ "multiline": {
+ "a": {
+ "type": "integer",
+ "value": "1"
+ },
+ "b": {
+ "type": "integer",
+ "value": "2"
+ },
+ "c": [
+ {
+ "type": "integer",
+ "value": "1"
+ },
+ {
+ "type": "integer",
+ "value": "2"
+ },
+ {
+ "type": "integer",
+ "value": "3"
+ }
+ ],
+ "d": {
+ "type": "integer",
+ "value": "3"
+ },
+ "e": {
+ "type": "integer",
+ "value": "4"
+ },
+ "f": {}
+ }
+}
\ No newline at end of file
--- /dev/null
+multiline = {
+ "a" = 1, "b" = 2,
+ c = [
+ 1,
+ 2,
+ 3,
+ ],# comment
+ d = 3,
+ e = 4, f = {
+ # comment
+ },
+}
--- /dev/null
+{
+ "escape": {"type":"string","value":"\u001B"},
+ "tab": {"type":"string","value":"\t"},
+ "upper-j": {"type":"string","value":"J"},
+ "upper-j-2": {"type":"string","value":"J"}
+}
--- /dev/null
+escape = "\e"
+tab = "\x09"
+upper-j = "\x4a"
+upper-j-2 = "\x4A"
from . import burntsushi, tomllib
-
-class MissingFile:
- def __init__(self, path: Path):
- self.path = path
-
-
DATA_DIR = Path(__file__).parent / "data"
VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml"))
_expected_files = []
for p in VALID_FILES:
json_path = p.with_suffix(".json")
- try:
- text = json.loads(json_path.read_bytes().decode())
- except FileNotFoundError:
- text = MissingFile(json_path)
+ text = json.loads(json_path.read_bytes().decode())
_expected_files.append(text)
VALID_FILES_EXPECTED = tuple(_expected_files)
def test_valid(self):
for valid, expected in zip(VALID_FILES, VALID_FILES_EXPECTED):
with self.subTest(msg=valid.stem):
- if isinstance(expected, MissingFile):
- # For a poor man's xfail, assert that this is one of the
- # test cases where expected data is known to be missing.
- assert valid.stem in {
- "qa-array-inline-nested-1000",
- "qa-table-inline-nested-1000",
- }
- continue
toml_str = valid.read_bytes().decode()
actual = tomllib.loads(toml_str)
actual = burntsushi.convert(actual)
TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable
- from typing import IO, Any
+ from typing import IO, Any, Final
from ._types import Key, ParseFloat, Pos
-ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
+ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
# Neither of these sets include quotation mark or backslash. They are
# currently handled as separate cases in the parser functions.
-ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
-ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
+ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
+ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
-ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
-ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
+ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
+ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
-ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
+ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
-TOML_WS = frozenset(" \t")
-TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
-BARE_KEY_CHARS = frozenset(
+TOML_WS: Final = frozenset(" \t")
+TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
+BARE_KEY_CHARS: Final = frozenset(
"abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_"
)
-KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
-HEXDIGIT_CHARS = frozenset("abcdef" "ABCDEF" "0123456789")
+KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
+HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
-BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
+BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
{
"\\b": "\u0008", # backspace
"\\t": "\u0009", # tab
- "\\n": "\u000A", # linefeed
- "\\f": "\u000C", # form feed
- "\\r": "\u000D", # carriage return
+ "\\n": "\u000a", # linefeed
+ "\\f": "\u000c", # form feed
+ "\\r": "\u000d", # carriage return
+ "\\e": "\u001b", # escape
'\\"': "\u0022", # quote
- "\\\\": "\u005C", # backslash
+ "\\\\": "\u005c", # backslash
}
)
return loads(s, parse_float=parse_float)
-def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
+def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]:
"""Parse TOML from a string."""
# The spec allows converting "\r\n" to "\n", even in string
"""Flags that map to parsed keys/namespaces."""
# Marks an immutable namespace (inline array or inline table).
- FROZEN = 0
+ FROZEN: Final = 0
# Marks a nest that has been explicitly created and can no longer
# be opened using the "[table]" syntax.
- EXPLICIT_NEST = 1
+ EXPLICIT_NEST: Final = 1
def __init__(self) -> None:
self._flags: dict[str, dict[Any, Any]] = {}
cont = inner_cont["nested"]
key_stem = key[-1]
if key_stem in cont:
- cont = cont[key_stem]
- return flag in cont["flags"] or flag in cont["recursive_flags"]
+ inner_cont = cont[key_stem]
+ return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
return False
nested_dict = NestedDict()
flags = Flags()
- pos = skip_chars(src, pos, TOML_WS)
+ pos = skip_comments_and_array_ws(src, pos)
if src.startswith("}", pos):
return pos + 1, nested_dict.dict
while True:
if key_stem in nest:
raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
nest[key_stem] = value
- pos = skip_chars(src, pos, TOML_WS)
+ pos = skip_comments_and_array_ws(src, pos)
c = src[pos : pos + 1]
if c == "}":
return pos + 1, nested_dict.dict
if c != ",":
raise TOMLDecodeError("Unclosed inline table", src, pos)
+ pos += 1
+ pos = skip_comments_and_array_ws(src, pos)
+ if src.startswith("}", pos):
+ return pos + 1, nested_dict.dict
if isinstance(value, (dict, list)):
flags.set(key, Flags.FROZEN, recursive=True)
- pos += 1
- pos = skip_chars(src, pos, TOML_WS)
def parse_basic_str_escape(
pos += 1
pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
return pos, ""
+ if escape_id == "\\x":
+ return parse_hex_char(src, pos, 2)
if escape_id == "\\u":
return parse_hex_char(src, pos, 4)
if escape_id == "\\U":
pos += 1
-def parse_value( # noqa: C901
+def parse_value(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Any]:
try:
TYPE_CHECKING = False
if TYPE_CHECKING:
- from typing import Any
+ from typing import Any, Final
from ._types import ParseFloat
-# E.g.
-# - 00:32:00.999999
-# - 00:32:00
-_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
+_TIME_RE_STR: Final = r"""
+([01][0-9]|2[0-3]) # hours
+:([0-5][0-9]) # minutes
+(?:
+ :([0-5][0-9]) # optional seconds
+ (?:\.([0-9]{1,6})[0-9]*)? # optional fractions of a second
+)?
+"""
-RE_NUMBER = re.compile(
+RE_NUMBER: Final = re.compile(
r"""
0
(?:
""",
flags=re.VERBOSE,
)
-RE_LOCALTIME = re.compile(_TIME_RE_STR)
-RE_DATETIME = re.compile(
+RE_LOCALTIME: Final = re.compile(_TIME_RE_STR, flags=re.VERBOSE)
+RE_DATETIME: Final = re.compile(
rf"""
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
year, month, day = int(year_str), int(month_str), int(day_str)
if hour_str is None:
return date(year, month, day)
- hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
+ hour, minute = int(hour_str), int(minute_str)
+ sec = int(sec_str) if sec_str else 0
micros = int(micros_str.ljust(6, "0")) if micros_str else 0
if offset_sign_str:
tz: tzinfo | None = cached_tz(
def match_to_localtime(match: re.Match[str]) -> time:
hour_str, minute_str, sec_str, micros_str = match.groups()
+ sec = int(sec_str) if sec_str else 0
micros = int(micros_str.ljust(6, "0")) if micros_str else 0
- return time(int(hour_str), int(minute_str), int(sec_str), micros)
+ return time(int(hour_str), int(minute_str), sec, micros)
def match_to_number(match: re.Match[str], parse_float: ParseFloat) -> Any:
test/test_tomllib/data/valid \
test/test_tomllib/data/valid/array \
test/test_tomllib/data/valid/dates-and-times \
+ test/test_tomllib/data/valid/inline-table \
test/test_tomllib/data/valid/multiline-basic-str \
test/test_tools \
test/test_tools/i18n_data \
--- /dev/null
+Updated :mod:`tomllib` to parse TOML 1.1.0.