From ce3d3cd24c871be0c695e1d5b014e1b42063f082 Mon Sep 17 00:00:00 2001 From: Alexis Date: Thu, 23 Apr 2026 10:50:07 +0200 Subject: [PATCH 1/5] Fix GHSA-55rc-7hww-hf4p --- Lib/test/test_xml_etree.py | 24 ++++++++++++++++++++++++ Lib/xml/etree/ElementPath.py | 6 +++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index acec4ec2ca257c4..dd69503e3cbc457 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -16,6 +16,7 @@ import pyexpat import sys import textwrap +import time import types import unittest import unittest.mock as mock @@ -3477,6 +3478,29 @@ def test_find_xpath(self): self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') + def test_find_xpath_index_no_quadratic_complexity(self): + root = ET.Element("root") + first_a = ET.SubElement(root, "a") + first_a.set("pos", "first") + n = 2 ** 15 + for i in range(n): + ET.SubElement(root, "a") + last_a = ET.SubElement(root, "a") + last_a.set("pos", "last") + + for pattern in [".//a[1]", ".//a[last()]"]: + start = time.time() + result = root.findall(pattern) + end = time.time() + + # Before the fix these took 30+ seconds. + self.assertLess(end - start, 1) + + self.assertIs(root.find(".//a[1]"), first_a) + self.assertEqual(root.find(".//a[1]").get("pos"), "first") + self.assertIs(root.find(".//a[last()]"), last_a) + self.assertEqual(root.find(".//a[last()]").get("pos"), "last") + def test_findall(self): e = ET.XML(SAMPLE_XML) e[2] = ET.XML(SAMPLE_SECTION) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index dc6bd28c03137de..cb8f28fa4718233 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -324,11 +324,15 @@ def select_negated(context, result): index = -1 def select(context, result): parent_map = get_parent_map(context) + sibling_cache = {} for elem in result: try: parent = parent_map[elem] # FIXME: what if the selector is "*" ? - elems = list(parent.findall(elem.tag)) + cache_key = (parent, elem.tag) + if cache_key not in sibling_cache: + sibling_cache[cache_key] = list(parent.findall(elem.tag)) + elems = sibling_cache[cache_key] if elems[index] is elem: yield elem except (IndexError, KeyError): From be3168539131869cf531bec297355beddf88ded6 Mon Sep 17 00:00:00 2001 From: Alexis Date: Tue, 28 Apr 2026 10:39:51 +0200 Subject: [PATCH 2/5] Fix test --- Lib/test/test_xml_etree.py | 46 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index dd69503e3cbc457..2b1260fdc28c02c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3479,27 +3479,35 @@ def test_find_xpath(self): self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') def test_find_xpath_index_no_quadratic_complexity(self): - root = ET.Element("root") - first_a = ET.SubElement(root, "a") - first_a.set("pos", "first") - n = 2 ** 15 - for i in range(n): - ET.SubElement(root, "a") - last_a = ET.SubElement(root, "a") - last_a.set("pos", "last") + class CountingElement(ET.Element): + findall_calls = 0 + def findall(self, *args, **kwargs): + type(self).findall_calls += 1 + return super().findall(*args, **kwargs) + + def work(n, pattern): + root = CountingElement("root") + for _ in range(n): + ET.SubElement(root, "a") + CountingElement.findall_calls = 0 + root.findall(pattern) + return CountingElement.findall_calls for pattern in [".//a[1]", ".//a[last()]"]: - start = time.time() - result = root.findall(pattern) - end = time.time() - - # Before the fix these took 30+ seconds. - self.assertLess(end - start, 1) - - self.assertIs(root.find(".//a[1]"), first_a) - self.assertEqual(root.find(".//a[1]").get("pos"), "first") - self.assertIs(root.find(".//a[last()]"), last_a) - self.assertEqual(root.find(".//a[last()]").get("pos"), "last") + w1 = work(1024, pattern) + w2 = work(2048, pattern) + w3 = work(4096, pattern) + + self.assertGreater(w1, 0) + r1 = w2 / w1 + r2 = w3 / w2 + # Doubling N must not ~double the parent.findall calls. + # Linear-in-N call counts indicate the cache is missing. + self.assertLess( + max(r1, r2), 1.5, + msg=f"Possible quadratic behavior on {pattern!r}: " + f"calls={w1, w2, w3} ratios={r1, r2}", + ) def test_findall(self): e = ET.XML(SAMPLE_XML) From 9158932a451db5425a07b2ea88c99b222d7a96d9 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 16 Jun 2026 15:25:55 +0200 Subject: [PATCH 3/5] Only cache the element at the given index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/xml/etree/ElementPath.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index cb8f28fa4718233..de1fd203ff6a0f6 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -324,19 +324,22 @@ def select_negated(context, result): index = -1 def select(context, result): parent_map = get_parent_map(context) - sibling_cache = {} + cache = {} for elem in result: try: parent = parent_map[elem] + except KeyError: + continue + key = (parent, elem.tag) + if key not in cache: # FIXME: what if the selector is "*" ? - cache_key = (parent, elem.tag) - if cache_key not in sibling_cache: - sibling_cache[cache_key] = list(parent.findall(elem.tag)) - elems = sibling_cache[cache_key] - if elems[index] is elem: - yield elem - except (IndexError, KeyError): - pass + elems = parent.findall(elem.tag) + try: + cache[key] = elems[index] + except IndexError: + cache[key] = None + if cache[key] is elem: + yield elem return select raise SyntaxError("invalid predicate") From 553173a3071863533665c1d88c13b1d4096f6f26 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 30 Jun 2026 13:27:18 +0200 Subject: [PATCH 4/5] Add blurb --- .../Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst diff --git a/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst b/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst new file mode 100644 index 000000000000000..69e73008af79f14 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-06-30-13-24-13.gh-issue-152674.-2QVoL.rst @@ -0,0 +1,6 @@ +The :class:`xml.etree.ElementTree.Element` methods +:meth:`~xml.etree.ElementTree.Element.findall`, +:meth:`~xml.etree.ElementTree.Element.iterfind` and +:meth:`~xml.etree.ElementTree.Element.find` avoid quadratic behavior when +using XPath index predicates (``[1]``, ``[last()]``, ``[last()-N]``) on XML +documents with many same-tag siblings. From f39efdc63774c075e2d4bcc20abbb03b3e15ea98 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 30 Jun 2026 13:35:19 +0200 Subject: [PATCH 5/5] Remove unused import --- Lib/test/test_xml_etree.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 2b1260fdc28c02c..19b0ce843d906a3 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -16,7 +16,6 @@ import pyexpat import sys import textwrap -import time import types import unittest import unittest.mock as mock