diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index acec4ec2ca257c4..19b0ce843d906a3 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3477,6 +3477,37 @@ def test_find_xpath(self): self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') + def test_find_xpath_index_no_quadratic_complexity(self): + class CountingElement(ET.Element): + findall_calls = 0 + def findall(self, *args, **kwargs): + type(self).findall_calls += 1 + return super().findall(*args, **kwargs) + + def work(n, pattern): + root = CountingElement("root") + for _ in range(n): + ET.SubElement(root, "a") + CountingElement.findall_calls = 0 + root.findall(pattern) + return CountingElement.findall_calls + + for pattern in [".//a[1]", ".//a[last()]"]: + w1 = work(1024, pattern) + w2 = work(2048, pattern) + w3 = work(4096, pattern) + + self.assertGreater(w1, 0) + r1 = w2 / w1 + r2 = w3 / w2 + # Doubling N must not ~double the parent.findall calls. + # Linear-in-N call counts indicate the cache is missing. + self.assertLess( + max(r1, r2), 1.5, + msg=f"Possible quadratic behavior on {pattern!r}: " + f"calls={w1, w2, w3} ratios={r1, r2}", + ) + def test_findall(self): e = ET.XML(SAMPLE_XML) e[2] = ET.XML(SAMPLE_SECTION) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index dc6bd28c03137de..de1fd203ff6a0f6 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -324,15 +324,22 @@ def select_negated(context, result): index = -1 def select(context, result): parent_map = get_parent_map(context) + cache = {} for elem in result: try: parent = parent_map[elem] + except KeyError: + continue + key = (parent, elem.tag) + if key not in cache: # FIXME: what if the selector is "*" ? - elems = list(parent.findall(elem.tag)) - if elems[index] is elem: - yield elem - except (IndexError, KeyError): - pass + elems = parent.findall(elem.tag) + try: + cache[key] = elems[index] + except IndexError: + cache[key] = None + if cache[key] is elem: + yield elem return select raise SyntaxError("invalid predicate") diff --git a/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst b/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst new file mode 100644 index 000000000000000..69e73008af79f14 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst @@ -0,0 +1,6 @@ +The :class:`xml.etree.ElementTree.Element` methods +:meth:`~xml.etree.ElementTree.Element.findall`, +:meth:`~xml.etree.ElementTree.Element.iterfind` and +:meth:`~xml.etree.ElementTree.Element.find` avoid quadratic behavior when +using XPath index predicates (``[1]``, ``[last()]``, ``[last()-N]``) on XML +documents with many same-tag siblings.