diff --git a/nameparser/parser.py b/nameparser/parser.py index 73cbd06..067b586 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -980,7 +980,12 @@ def join_on_conjunctions(self, pieces: list[str], additional_parts_count: int = try: # if there are no more prefixes, look for a suffix to stop at stop_at = next(iter(filter(self.is_suffix, pieces[i + 1:]))) - j = pieces.index(stop_at) + # search from i + 1: filter() finds the value of stop_at + # in pieces[i+1:] but pieces.index() without a start + # argument searches from 0, so an earlier occurrence of + # the same token (e.g. a suffix token that also appears + # before the prefix) would be matched instead. + j = pieces.index(stop_at, i + 1) new_piece = ' '.join(pieces[i:j]) pieces = pieces[:i] + [new_piece] + pieces[j:] except StopIteration: diff --git a/pyproject.toml b/pyproject.toml index c8d7932..86da3e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,8 @@ dev = [ "dill (>=0.2.5)", "sphinx (>=8)", "mypy (>=2.1)", - "ruff (>=0.15)" + "ruff (>=0.15)", + "pytest-timeout>=2.4.0", ] [tool.mypy] diff --git a/tests/test_prefixes.py b/tests/test_prefixes.py index 134f962..0d18449 100644 --- a/tests/test_prefixes.py +++ b/tests/test_prefixes.py @@ -1,3 +1,5 @@ +import pytest + from nameparser import HumanName from tests.base import HumanNameTestBase @@ -43,6 +45,50 @@ def test_prefix_before_two_part_last_name_with_acronym_suffix(self) -> None: self.m(hn.last, "von bergen wessels", hn) self.m(hn.suffix, "M.D.", hn) + def test_title_before_and_after_prefixed_last_name(self) -> None: + # Issue #100: a repeated title/suffix token ("dr") before AND after a + # prefixed last name used to corrupt the middle name into + # " dr Vincent van" because the suffix-boundary lookup matched the + # LEADING "dr" instead of the trailing one. + hn = HumanName("dr Vincent van Gogh dr") + self.m(hn.title, "dr", hn) + self.m(hn.first, "Vincent", hn) + self.m(hn.middle, "", hn) + self.m(hn.last, "van Gogh", hn) + self.m(hn.suffix, "dr", hn) + + def test_suffix_token_collision_with_two_word_prefix(self) -> None: + # Same fix as #100 but with a two-word prefix ("van der"). Exercises a + # different iteration count through the prefix-joining loop. + hn = HumanName("dr Vincent van der Gogh dr") + self.m(hn.title, "dr", hn) + self.m(hn.first, "Vincent", hn) + self.m(hn.middle, "", hn) + self.m(hn.last, "van der Gogh", hn) + self.m(hn.suffix, "dr", hn) + + def test_title_before_and_after_prefixed_last_name_with_middle(self) -> None: + # The pre-fix bug corrupted the middle field; verify it is not disturbed + # when a genuine middle name is present alongside the repeated token. + hn = HumanName("dr Vincent James van Gogh dr") + self.m(hn.title, "dr", hn) + self.m(hn.first, "Vincent", hn) + self.m(hn.middle, "James", hn) + self.m(hn.last, "van Gogh", hn) + self.m(hn.suffix, "dr", hn) + + @pytest.mark.timeout(2) + def test_many_repeated_prefixes_does_not_blow_up(self) -> None: + # Issue #108: a name with a long run of repeated prefixes used to grow + # the pieces list exponentially and exhaust memory. The 2-second timeout + # enforces this locally and in CI — if the test hangs, an exponential + # regression has been reintroduced. + name = "Jan " + "van der " * 30 + "Berg" + hn = HumanName(name) + self.assertFalse(hn.unparsable) + self.m(hn.first, "Jan", hn) + self.assertIn("Berg", hn.last) + def test_two_part_last_name_with_suffix_comma(self) -> None: hn = HumanName("pennie von bergen wessels, III") self.m(hn.first, "pennie", hn) diff --git a/uv.lock b/uv.lock index c01beca..339eca6 100644 --- a/uv.lock +++ b/uv.lock @@ -229,7 +229,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -525,6 +525,7 @@ dev = [ { name = "dill" }, { name = "mypy" }, { name = "pytest" }, + { name = "pytest-timeout" }, { name = "ruff" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" }, @@ -539,6 +540,7 @@ dev = [ { name = "dill", specifier = ">=0.2.5" }, { name = "mypy", specifier = ">=2.1" }, { name = "pytest", specifier = ">=8" }, + { name = "pytest-timeout", specifier = ">=2.4.0" }, { name = "ruff", specifier = ">=0.15" }, { name = "sphinx", specifier = ">=8" }, ] @@ -597,6 +599,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/5a/ba30a81239b909821b3153e303e7def45178bf353da4f72380e6c5e8793b/pytest-9.1.0-py3-none-any.whl", hash = "sha256:8ebb0e7888bdf2bdfc602ec51f8f62d50200af37356c74e503c79a94f5c81f32", size = 386453, upload-time = "2026-06-13T18:52:44.045Z" }, ] +[[package]] +name = "pytest-timeout" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, +] + [[package]] name = "requests" version = "2.34.2" @@ -663,23 +677,23 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "alabaster", marker = "python_full_version < '3.11'" }, - { name = "babel", marker = "python_full_version < '3.11'" }, - { name = "colorama", marker = "python_full_version < '3.11' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "imagesize", marker = "python_full_version < '3.11'" }, - { name = "jinja2", marker = "python_full_version < '3.11'" }, - { name = "packaging", marker = "python_full_version < '3.11'" }, - { name = "pygments", marker = "python_full_version < '3.11'" }, - { name = "requests", marker = "python_full_version < '3.11'" }, - { name = "snowballstemmer", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.11'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.11'" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "alabaster" }, + { name = "babel" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" } }, + { name = "imagesize" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pygments" }, + { name = "requests" }, + { name = "snowballstemmer" }, + { name = "sphinxcontrib-applehelp" }, + { name = "sphinxcontrib-devhelp" }, + { name = "sphinxcontrib-htmlhelp" }, + { name = "sphinxcontrib-jsmath" }, + { name = "sphinxcontrib-qthelp" }, + { name = "sphinxcontrib-serializinghtml" }, + { name = "tomli" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611, upload-time = "2024-10-13T20:27:13.93Z" } wheels = [ @@ -694,23 +708,23 @@ resolution-markers = [ "python_full_version == '3.11.*'", ] dependencies = [ - { name = "alabaster", marker = "python_full_version == '3.11.*'" }, - { name = "babel", marker = "python_full_version == '3.11.*'" }, - { name = "colorama", marker = "python_full_version == '3.11.*' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" }, - { name = "imagesize", marker = "python_full_version == '3.11.*'" }, - { name = "jinja2", marker = "python_full_version == '3.11.*'" }, - { name = "packaging", marker = "python_full_version == '3.11.*'" }, - { name = "pygments", marker = "python_full_version == '3.11.*'" }, - { name = "requests", marker = "python_full_version == '3.11.*'" }, - { name = "roman-numerals", marker = "python_full_version == '3.11.*'" }, - { name = "snowballstemmer", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version == '3.11.*'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version == '3.11.*'" }, + { name = "alabaster" }, + { name = "babel" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" } }, + { name = "imagesize" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pygments" }, + { name = "requests" }, + { name = "roman-numerals" }, + { name = "snowballstemmer" }, + { name = "sphinxcontrib-applehelp" }, + { name = "sphinxcontrib-devhelp" }, + { name = "sphinxcontrib-htmlhelp" }, + { name = "sphinxcontrib-jsmath" }, + { name = "sphinxcontrib-qthelp" }, + { name = "sphinxcontrib-serializinghtml" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/50/a8c6ccc36d5eacdfd7913ddccd15a9cee03ecafc5ee2bc40e1f168d85022/sphinx-9.0.4.tar.gz", hash = "sha256:594ef59d042972abbc581d8baa577404abe4e6c3b04ef61bd7fc2acbd51f3fa3", size = 8710502, upload-time = "2025-12-04T07:45:27.343Z" } wheels = [ @@ -726,23 +740,23 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.15'", ] dependencies = [ - { name = "alabaster", marker = "python_full_version >= '3.12'" }, - { name = "babel", marker = "python_full_version >= '3.12'" }, - { name = "colorama", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "imagesize", marker = "python_full_version >= '3.12'" }, - { name = "jinja2", marker = "python_full_version >= '3.12'" }, - { name = "packaging", marker = "python_full_version >= '3.12'" }, - { name = "pygments", marker = "python_full_version >= '3.12'" }, - { name = "requests", marker = "python_full_version >= '3.12'" }, - { name = "roman-numerals", marker = "python_full_version >= '3.12'" }, - { name = "snowballstemmer", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.12'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.12'" }, + { name = "alabaster" }, + { name = "babel" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" } }, + { name = "imagesize" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pygments" }, + { name = "requests" }, + { name = "roman-numerals" }, + { name = "snowballstemmer" }, + { name = "sphinxcontrib-applehelp" }, + { name = "sphinxcontrib-devhelp" }, + { name = "sphinxcontrib-htmlhelp" }, + { name = "sphinxcontrib-jsmath" }, + { name = "sphinxcontrib-qthelp" }, + { name = "sphinxcontrib-serializinghtml" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cd/bd/f08eb0f4eed5c83f1ba2a3bd18f7745a2b1525fad70660a1c00224ec468a/sphinx-9.1.0.tar.gz", hash = "sha256:7741722357dd75f8190766926071fed3bdc211c74dd2d7d4df5404da95930ddb", size = 8718324, upload-time = "2025-12-31T15:09:27.646Z" } wheels = [