Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Lib/test/test_tomllib/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import tempfile
import textwrap
import time
import unittest
from test import support
from test.support.script_helper import assert_python_ok
Expand All @@ -18,6 +19,46 @@


class TestMiscellaneous(unittest.TestCase):
def test_many_keys_under_deep_header(self):
# gh-152930: many keys under one deep table header parse correctly.
depth = 200
num_keys = 2000
header = "[" + ".".join(f"h{i}" for i in range(depth)) + "]\n"
body = "".join(f"k{i} = {i}\n" for i in range(num_keys))
nest = tomllib.loads(header + body)
for i in range(depth):
nest = nest[f"h{i}"]
self.assertEqual(len(nest), num_keys)
for i in (0, num_keys // 2, num_keys - 1):
with self.subTest(key=i):
self.assertEqual(nest[f"k{i}"], i)

@support.requires_resource('cpu')
def test_deep_header_does_not_scale_with_depth(self):
# gh-152930: a deep table header must not be re-walked for every key
# under it. With the key count fixed, doubling the header depth should
# leave the parse time about the same (linear in depth + keys), not
# double it (quadratic in depth * keys). The ratio is machine
# independent, so a generous bound tolerates load and slow platforms.
num_keys = 10000
body = "".join(f"k{i} = 1\n" for i in range(num_keys))

def parse_time(depth):
doc = "[" + ".".join(f"h{i}" for i in range(depth)) + "]\n" + body
tomllib.loads(doc) # warm up
best = None
for _ in range(3):
start = time.perf_counter()
tomllib.loads(doc)
elapsed = time.perf_counter() - start
best = elapsed if best is None else min(best, elapsed)
return best

max_depth = tomllib._parser.MAX_KEY_PARTS - 2
shallow = parse_time(max_depth // 2)
deep = parse_time(max_depth)
self.assertLess(deep, shallow * 1.5)

def test_load(self):
content = "one=1 \n two='two' \n arr=[]"
expected = {"one": 1, "two": "two", "arr": []}
Expand Down
66 changes: 53 additions & 13 deletions Lib/tomllib/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]:
pos = 0
out = Output()
header: Key = ()
# Resolve the section once; per-key work is then relative to it.
header_flags = out.flags.resolve(header)
header_nest = out.data.get_or_create_nest(header)
parse_float = make_safe_parse_float(parse_float)

# Parse one statement at a time
Expand All @@ -186,7 +189,9 @@ def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]:
pos += 1
continue
if char in KEY_INITIAL_CHARS:
pos = key_value_rule(src, pos, out, header, parse_float)
pos = key_value_rule(
src, pos, out, header, header_flags, header_nest, parse_float
)
pos = skip_chars(src, pos, TOML_WS)
elif char == "[":
try:
Expand All @@ -198,6 +203,8 @@ def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]:
pos, header = create_list_rule(src, pos, out)
else:
pos, header = create_dict_rule(src, pos, out)
header_flags = out.flags.resolve(header)
header_nest = out.data.get_or_create_nest(header)
pos = skip_chars(src, pos, TOML_WS)
elif char != "#":
raise TOMLDecodeError("Invalid statement", src, pos)
Expand Down Expand Up @@ -259,10 +266,24 @@ def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)

def is_(self, key: Key, flag: int) -> bool:
def is_(
self,
key: Key,
flag: int,
*,
start: tuple[dict[Any, Any], dict[Any, Any] | None] | None = None,
) -> bool:
# `key` is relative to the section anchor `start`; the section is never
# frozen when resolve() runs, so this matches a walk from the root.
if start is None:
cont: dict[Any, Any] = self._flags
inner: dict[Any, Any] | None = None
else:
cont, inner = start
if not key:
return False # document root has no flags
cont = self._flags
if inner is None:
return False # document root has no flags
return flag in inner["flags"] or flag in inner["recursive_flags"]
for k in key[:-1]:
if k not in cont:
return False
Expand All @@ -276,6 +297,16 @@ def is_(self, key: Key, flag: int) -> bool:
return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
return False

def resolve(
self, header: Key
) -> tuple[dict[Any, Any], dict[Any, Any] | None]:
inner: dict[Any, Any] | None = None
cont = self._flags
for k in header:
inner = cont[k]
cont = inner["nested"]
return cont, inner


class NestedDict:
def __init__(self) -> None:
Expand All @@ -287,8 +318,10 @@ def get_or_create_nest(
key: Key,
*,
access_lists: bool = True,
start: dict[str, Any] | None = None,
) -> dict[str, Any]:
cont: Any = self.dict
# `start` anchors the walk so `key` is relative to the section's nest.
cont: Any = self.dict if start is None else start
for k in key:
if k not in cont:
cont[k] = {}
Expand Down Expand Up @@ -413,28 +446,35 @@ def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:


def key_value_rule(
src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
src: str,
pos: Pos,
out: Output,
header: Key,
header_flags: tuple[dict[Any, Any], dict[Any, Any] | None],
header_nest: dict[str, Any],
parse_float: ParseFloat,
) -> Pos:
pos, key, value = parse_key_value_pair(src, pos, parse_float)
key_parent, key_stem = key[:-1], key[-1]
abs_key_parent = header + key_parent

relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
for cont_key in relative_path_cont_keys:
for i in range(1, len(key)):
rel = key[:i]
# Check that dotted key syntax does not redefine an existing table
if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
if out.flags.is_(rel, Flags.EXPLICIT_NEST, start=header_flags):
cont_key = header + rel
raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
# Containers in the relative path can't be opened with the table syntax or
# dotted key/value syntax in following table sections.
out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
out.flags.add_pending(header + rel, Flags.EXPLICIT_NEST)

if out.flags.is_(abs_key_parent, Flags.FROZEN):
if out.flags.is_(key_parent, Flags.FROZEN, start=header_flags):
abs_key_parent = header + key_parent
raise TOMLDecodeError(
f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
)

try:
nest = out.data.get_or_create_nest(abs_key_parent)
nest = out.data.get_or_create_nest(key_parent, start=header_nest)
except KeyError:
raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
if key_stem in nest:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`tomllib` now parses each table section in linear time instead of
re-walking the section header for every key. Patch by tonghuaroot.
Loading