From 2a96957341729ff966c67f04a1c36d385f56cda3 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:22:36 -0700 Subject: [PATCH 1/9] feat: add first_name_prefixes set to Constants (#150) --- nameparser/config/__init__.py | 8 +++++++- nameparser/config/first_name_prefixes.py | 12 ++++++++++++ tests/conftest.py | 1 + tests/test_first_name_prefixes.py | 10 ++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 nameparser/config/first_name_prefixes.py create mode 100644 tests/test_first_name_prefixes.py diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index 4d7bae4..7b3981c 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -37,6 +37,7 @@ from nameparser.util import lc from nameparser.config.prefixes import PREFIXES +from nameparser.config.first_name_prefixes import FIRST_NAME_PREFIXES from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS from nameparser.config.conjunctions import CONJUNCTIONS from nameparser.config.suffixes import SUFFIX_ACRONYMS @@ -227,8 +228,10 @@ class Constants: :py:attr:`~suffixes.SUFFIX_ACRONYMS` wrapped with :py:class:`SetManager`. :param set suffix_not_acronyms: :py:attr:`~suffixes.SUFFIX_NOT_ACRONYMS` wrapped with :py:class:`SetManager`. - :param set conjunctions: + :param set conjunctions: :py:attr:`conjunctions` wrapped with :py:class:`SetManager`. + :param set first_name_prefixes: + :py:attr:`~first_name_prefixes.FIRST_NAME_PREFIXES` wrapped with :py:class:`SetManager`. :type capitalization_exceptions: tuple or dict :param capitalization_exceptions: :py:attr:`~capitalization.CAPITALIZATION_EXCEPTIONS` wrapped with :py:class:`TupleManager`. @@ -243,6 +246,7 @@ class Constants: titles = _CachedUnionMember() first_name_titles: SetManager conjunctions: SetManager + first_name_prefixes: SetManager capitalization_exceptions: TupleManager[str] regexes: RegexTupleManager _pst: Set[str] | None @@ -377,6 +381,7 @@ def __init__(self, titles: Iterable[str] = TITLES, first_name_titles: Iterable[str] = FIRST_NAME_TITLES, conjunctions: Iterable[str] = CONJUNCTIONS, + first_name_prefixes: Iterable[str] = FIRST_NAME_PREFIXES, capitalization_exceptions: TupleManager[str] | Iterable[tuple[str, str]] = CAPITALIZATION_EXCEPTIONS, regexes: RegexTupleManager | TupleManager[re.Pattern[str]] | Iterable[tuple[str, re.Pattern[str]]] = REGEXES, patronymic_name_order: bool = False, @@ -390,6 +395,7 @@ def __init__(self, self.titles = SetManager(titles) self.first_name_titles = SetManager(first_name_titles) self.conjunctions = SetManager(conjunctions) + self.first_name_prefixes = SetManager(first_name_prefixes) self.capitalization_exceptions = TupleManager(capitalization_exceptions) self.regexes = RegexTupleManager(regexes) self.patronymic_name_order = patronymic_name_order diff --git a/nameparser/config/first_name_prefixes.py b/nameparser/config/first_name_prefixes.py new file mode 100644 index 0000000..3f569db --- /dev/null +++ b/nameparser/config/first_name_prefixes.py @@ -0,0 +1,12 @@ +#: Bound Arabic given-name prefixes that attach to the following word to form +#: one first name (e.g. "abdul salam" → first name "abdul salam"). They are +#: never standalone names. Join logic runs in the given-name region only, +#: mirroring :py:data:`~nameparser.config.prefixes.PREFIXES` for last names. +FIRST_NAME_PREFIXES: set[str] = { + 'abdul', + 'abdel', + 'abdal', + 'abu', + 'abou', + 'umm', +} diff --git a/tests/conftest.py b/tests/conftest.py index a35d515..093e431 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,6 +32,7 @@ "titles", "first_name_titles", "conjunctions", + "first_name_prefixes", "capitalization_exceptions", "regexes", ) diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py new file mode 100644 index 0000000..68e6f15 --- /dev/null +++ b/tests/test_first_name_prefixes.py @@ -0,0 +1,10 @@ +from nameparser.config import CONSTANTS + +from tests.base import HumanNameTestBase + + +class FirstNamePrefixesTestCase(HumanNameTestBase): + + def test_default_set_contents(self) -> None: + for word in ("abdul", "abdel", "abdal", "abu", "abou", "umm"): + assert word in CONSTANTS.first_name_prefixes, f"{word!r} missing from first_name_prefixes" From 53a0909ee04b9bb668c0215a9fec66d1b711632c Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:30:57 -0700 Subject: [PATCH 2/9] feat: add is_first_name_prefix method to HumanName (#150) --- nameparser/parser.py | 4 ++++ tests/test_first_name_prefixes.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/nameparser/parser.py b/nameparser/parser.py index 1ffcb53..6aaa553 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -561,6 +561,10 @@ def is_prefix(self, piece: str) -> bool: else: return lc(piece) in self.C.prefixes + def is_first_name_prefix(self, piece: str) -> bool: + """Lowercase and no periods version of piece is in :py:attr:`~nameparser.config.Constants.first_name_prefixes`.""" + return lc(piece) in self.C.first_name_prefixes + def is_roman_numeral(self, value: str) -> bool: """ Matches the ``roman_numeral`` regular expression in diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index 68e6f15..b1f939f 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -1,3 +1,4 @@ +from nameparser import HumanName from nameparser.config import CONSTANTS from tests.base import HumanNameTestBase @@ -8,3 +9,11 @@ class FirstNamePrefixesTestCase(HumanNameTestBase): def test_default_set_contents(self) -> None: for word in ("abdul", "abdel", "abdal", "abu", "abou", "umm"): assert word in CONSTANTS.first_name_prefixes, f"{word!r} missing from first_name_prefixes" + + def test_is_first_name_prefix_true(self) -> None: + hn = HumanName("test") + assert hn.is_first_name_prefix("Abdul") + + def test_is_first_name_prefix_false(self) -> None: + hn = HumanName("test") + assert not hn.is_first_name_prefix("Ahmed") From 9954bd13e3a4fbcb3501b7c808ceae179e3f9e16 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:35:09 -0700 Subject: [PATCH 3/9] feat: implement first-name prefix joining in no-comma parse path (#150) Co-Authored-By: Claude Sonnet 4.6 --- nameparser/parser.py | 23 +++++++++++++++++++++++ tests/test_first_name_prefixes.py | 19 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/nameparser/parser.py b/nameparser/parser.py index 6aaa553..407558a 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -565,6 +565,28 @@ def is_first_name_prefix(self, piece: str) -> bool: """Lowercase and no periods version of piece is in :py:attr:`~nameparser.config.Constants.first_name_prefixes`.""" return lc(piece) in self.C.first_name_prefixes + def _join_first_name_prefix(self, pieces: list[str], reserve_last: bool) -> list[str]: + """Join a first-name prefix to its following piece. + + Finds the first non-title piece; if it is in ``first_name_prefixes``, + merges it with the next piece — unless ``reserve_last`` is True and no + further piece would remain for the last name. + """ + fi = next((i for i, p in enumerate(pieces) if not self.is_title(p)), None) + if fi is None: + return pieces + if not self.is_first_name_prefix(pieces[fi]): + return pieces + next_i = fi + 1 + if next_i >= len(pieces): + return pieces + if reserve_last and next_i >= len(pieces) - 1: + # Joining would consume the only remaining piece (the last name). + return pieces + pieces[fi] = pieces[fi] + " " + pieces[next_i] + del pieces[next_i] + return pieces + def is_roman_numeral(self, value: str) -> bool: """ Matches the ``roman_numeral`` regular expression in @@ -831,6 +853,7 @@ def parse_full_name(self) -> None: # part[0] pieces = self.parse_pieces(parts) + pieces = self._join_first_name_prefix(pieces, reserve_last=True) p_len = len(pieces) for i, piece in enumerate(pieces): try: diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index b1f939f..df1f81d 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -17,3 +17,22 @@ def test_is_first_name_prefix_true(self) -> None: def test_is_first_name_prefix_false(self) -> None: hn = HumanName("test") assert not hn.is_first_name_prefix("Ahmed") + + # --- no-comma: basic joining --- + def test_no_comma_basic_join(self) -> None: + hn = HumanName("abdul salam ahmed salem") + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "ahmed", hn) + self.m(hn.last, "salem", hn) + + def test_no_comma_three_tokens_no_middle(self) -> None: + hn = HumanName("abdul salam salem") + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "", hn) + self.m(hn.last, "salem", hn) + + def test_no_comma_guard_two_tokens_no_join(self) -> None: + """Guard: only last name remains after prefix → no join.""" + hn = HumanName("abdul salam") + self.m(hn.first, "abdul", hn) + self.m(hn.last, "salam", hn) From 0c13b2f952fdc4f48a47117e77e9c4b4ed6db930 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:39:20 -0700 Subject: [PATCH 4/9] feat: wire first-name prefix join into lastname-comma path (#150) --- nameparser/parser.py | 1 + tests/test_first_name_prefixes.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/nameparser/parser.py b/nameparser/parser.py index 407558a..7b44fe7 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -936,6 +936,7 @@ def parse_full_name(self) -> None: # parts[0], parts[1], parts[2:...] log.debug("post-comma pieces: %s", str(post_comma_pieces)) + post_comma_pieces = self._join_first_name_prefix(post_comma_pieces, reserve_last=False) # lastname part may have suffixes in it lastname_pieces = self.parse_pieces(parts[0].split(' '), 1) diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index df1f81d..45714c8 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -36,3 +36,16 @@ def test_no_comma_guard_two_tokens_no_join(self) -> None: hn = HumanName("abdul salam") self.m(hn.first, "abdul", hn) self.m(hn.last, "salam", hn) + + # --- lastname-comma path --- + def test_lastname_comma_join(self) -> None: + hn = HumanName("salem, abdul salam") + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "", hn) + self.m(hn.last, "salem", hn) + + def test_lastname_comma_join_with_middle(self) -> None: + hn = HumanName("salem, abdul salam ahmed") + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "ahmed", hn) + self.m(hn.last, "salem", hn) From 4ec3e5562260b108d4bcd78bade4359622a11102 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:42:56 -0700 Subject: [PATCH 5/9] test: complete edge-case coverage for first-name prefix join (#150) Co-Authored-By: Claude Sonnet 4.6 --- tests/test_first_name_prefixes.py | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index 45714c8..0ca7027 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -49,3 +49,62 @@ def test_lastname_comma_join_with_middle(self) -> None: self.m(hn.first, "abdul salam", hn) self.m(hn.middle, "ahmed", hn) self.m(hn.last, "salem", hn) + + # --- interaction with titles --- + def test_title_kept_prefix_joins(self) -> None: + hn = HumanName("Dr. abdul salam ahmed salem") + self.m(hn.title, "Dr.", hn) + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "ahmed", hn) + self.m(hn.last, "salem", hn) + + # --- interaction with last-name prefixes --- + def test_abu_bakr_al_baghdadi(self) -> None: + """abu joins forward as first-prefix; al joins forward as last-prefix.""" + hn = HumanName("abu bakr al baghdadi") + self.m(hn.first, "abu bakr", hn) + self.m(hn.last, "al baghdadi", hn) + + # --- interaction with suffixes --- + def test_suffix_kept_prefix_joins(self) -> None: + hn = HumanName("abdul salam ahmed salem jr") + self.m(hn.first, "abdul salam", hn) + self.m(hn.middle, "ahmed", hn) + self.m(hn.last, "salem", hn) + self.m(hn.suffix, "jr", hn) + + # --- guard / no-op --- + def test_mohamad_unchanged(self) -> None: + """mohamad is deliberately not in first_name_prefixes.""" + hn = HumanName("Mohamad Ali Khalil") + self.m(hn.first, "Mohamad", hn) + self.m(hn.middle, "Ali", hn) + self.m(hn.last, "Khalil", hn) + + def test_single_token_already_joined_unchanged(self) -> None: + """abdulsalam is one token — not in the set, no join.""" + hn = HumanName("abdulsalam ahmed salem") + self.m(hn.first, "abdulsalam", hn) + self.m(hn.middle, "ahmed", hn) + self.m(hn.last, "salem", hn) + + def test_prefix_alone_no_join(self) -> None: + """Single-word name that is a prefix: nothing to join.""" + hn = HumanName("abdul") + self.m(hn.first, "abdul", hn) + + def test_lastname_comma_prefix_only_no_join(self) -> None: + """Prefix as sole post-comma token: nothing to join.""" + hn = HumanName("salem, abdul") + self.m(hn.first, "abdul", hn) + self.m(hn.last, "salem", hn) + + # --- opt-out --- + def test_opt_out_via_clear(self) -> None: + """Clearing first_name_prefixes restores prior behavior.""" + from nameparser.config import Constants + c = Constants(first_name_prefixes=set()) + hn = HumanName("abdul salam ahmed salem", constants=c) + self.m(hn.first, "abdul", hn) + self.m(hn.middle, "salam ahmed", hn) + self.m(hn.last, "salem", hn) From 6e14ee70c8ddb889f5f3db5a69677d733e8d0b3e Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 00:46:30 -0700 Subject: [PATCH 6/9] docs: document first_name_prefixes and add release note (#150) --- docs/customize.rst | 37 ++++++++++++++++++++++++++++++- docs/release_log.rst | 6 +++++ nameparser/config/__init__.py | 7 ++++++ tests/test_constants.py | 8 +++++++ tests/test_first_name_prefixes.py | 7 ++++++ 5 files changed, 64 insertions(+), 1 deletion(-) diff --git a/docs/customize.rst b/docs/customize.rst index 9df179b..fb763ce 100644 --- a/docs/customize.rst +++ b/docs/customize.rst @@ -131,6 +131,41 @@ a secondary key:: sorted_names = sorted(names, key=lambda n: (n.last_base.lower(), n.first.lower())) +First-Name Prefixes +------------------- + +``CONSTANTS.first_name_prefixes`` controls bound given-name prefixes that attach +to the following word to form one first name. By default it contains +``{'abdul', 'abdel', 'abdal', 'abu', 'abou', 'umm'}``. + +Example:: + + >>> from nameparser import HumanName + >>> hn = HumanName("abdul salam ahmed salem") + >>> hn.first, hn.middle, hn.last + ('abdul salam', 'ahmed', 'salem') + +To **disable** the feature entirely:: + + >>> from nameparser.config import CONSTANTS + >>> CONSTANTS.first_name_prefixes.clear() + +To **add** a word (e.g. if your data uses ``mohamad`` as a bound prefix):: + + >>> CONSTANTS.first_name_prefixes.add('mohamad') + +To **remove** a single entry:: + + >>> CONSTANTS.first_name_prefixes.remove('umm') + +You can also pass a custom set per ``Constants`` instance:: + + >>> from nameparser.config import Constants + >>> c = Constants(first_name_prefixes={'abu', 'umm'}) + >>> hn2 = HumanName("abu bakr al saud", constants=c) + >>> hn2.first, hn2.last + ('abu bakr', 'al saud') + Parser Customization Examples ----------------------------- @@ -181,7 +216,7 @@ constant so that "Hon" can be parsed as a first name. If you don't want to detect any titles at all, you can remove all of them: - >>> CONSTANTS.titles.remove(*CONSTANTS.titles) + >>> CONSTANTS.titles.clear() Adding a Title diff --git a/docs/release_log.rst b/docs/release_log.rst index ca9104d..381d9ed 100644 --- a/docs/release_log.rst +++ b/docs/release_log.rst @@ -20,6 +20,12 @@ Release Log - Fix ``'apn aprn'`` split into separate ``suffix_acronyms`` entries so each is recognized independently (closes #155) - Add ``last_base``, ``last_prefixes`` (and ``_list`` variants) plus ``family`` / ``family_prefixes`` aliases for splitting last-name prefix particles (tussenvoegsels) from the core surname (#130, #132) - Add ``patronymic_name_order`` flag to ``Constants`` and ``HumanName`` for opt-in detection and reordering of Russian formal-order names (Surname GivenName Patronymic) (#85) + - Add ``first_name_prefixes`` set to ``Constants``; bound Arabic given-name + prefixes (``abdul``, ``abu``, etc.) now join forward to form a single first + name (e.g. ``"abdul salam ahmed salem"`` → ``first="abdul salam"``, + ``middle="ahmed"``, ``last="salem"``). Disable via + ``CONSTANTS.first_name_prefixes.clear()``. **Default-on: changes parsing + output for names with these prefixes.** (#150) * 1.2.1 - June 19, 2026 - Fix ``initials()`` interpolating the literal ``None`` for empty name parts when ``empty_attribute_default = None`` (e.g. ``"J. None D."``); empty parts now render as an empty string and a fully-empty result returns ``empty_attribute_default`` - Add ``python -m nameparser "Name String"`` command-line helper that prints a parsed name diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index 7b3981c..038fa2e 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -125,6 +125,13 @@ def remove(self, *strings: str) -> Self: self._on_change() return self + def clear(self) -> Self: + """Remove all entries from the set. Returns ``self`` for chaining.""" + self.elements.clear() + if self._on_change: + self._on_change() + return self + T = TypeVar('T') diff --git a/tests/test_constants.py b/tests/test_constants.py index decae95..281bdf3 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -77,6 +77,14 @@ def test_chain_multiple_arguments(self) -> None: self.m(hn.middle, "Hon", hn) self.m(hn.last, "Solo", hn) + def test_clear_removes_all_entries(self) -> None: + hn = HumanName("Ms Hon Solo", constants=None) + hn.C.titles.clear() + hn.parse_full_name() + self.m(hn.first, "Ms", hn) + self.m(hn.middle, "Hon", hn) + self.m(hn.last, "Solo", hn) + def test_empty_attribute_default(self) -> None: from nameparser.config import CONSTANTS _orig = CONSTANTS.empty_attribute_default diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index 0ca7027..36b7739 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -99,6 +99,12 @@ def test_lastname_comma_prefix_only_no_join(self) -> None: self.m(hn.first, "abdul", hn) self.m(hn.last, "salem", hn) + def test_mid_name_prefix_becomes_last_prefix(self) -> None: + """abu in non-first position is handled as a last-name prefix, not first-name.""" + hn = HumanName("ahmed abu bakr") + self.m(hn.first, "ahmed", hn) + self.m(hn.last, "abu bakr", hn) + # --- opt-out --- def test_opt_out_via_clear(self) -> None: """Clearing first_name_prefixes restores prior behavior.""" @@ -108,3 +114,4 @@ def test_opt_out_via_clear(self) -> None: self.m(hn.first, "abdul", hn) self.m(hn.middle, "salam ahmed", hn) self.m(hn.last, "salem", hn) + From c130d401a0d4f0a19ea9be3a1d296ed85a96723e Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 01:10:52 -0700 Subject: [PATCH 7/9] fix: guard first-name prefix join against trailing suffixes (#150) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reserve_last guard previously counted suffix tokens as potential last-name candidates, causing "abdul salam jr" to parse as first="abdul salam", last="jr" instead of first="abdul", last="salam", suffix="jr". Fix: count only non-suffix pieces from next_i onward; require ≥2 so the join target and at least one non-suffix last-name piece both exist. --- nameparser/parser.py | 11 ++++++++--- tests/test_first_name_prefixes.py | 7 +++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/nameparser/parser.py b/nameparser/parser.py index 7b44fe7..14d093c 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -580,9 +580,14 @@ def _join_first_name_prefix(self, pieces: list[str], reserve_last: bool) -> list next_i = fi + 1 if next_i >= len(pieces): return pieces - if reserve_last and next_i >= len(pieces) - 1: - # Joining would consume the only remaining piece (the last name). - return pieces + if reserve_last: + # Count non-suffix pieces from next_i onward; need ≥2 so the join + # target and at least one last-name piece both exist. + non_suffix_remaining = sum( + 1 for p in pieces[next_i:] if not self.is_suffix(p) + ) + if non_suffix_remaining <= 1: + return pieces pieces[fi] = pieces[fi] + " " + pieces[next_i] del pieces[next_i] return pieces diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index 36b7739..e908328 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -37,6 +37,13 @@ def test_no_comma_guard_two_tokens_no_join(self) -> None: self.m(hn.first, "abdul", hn) self.m(hn.last, "salam", hn) + def test_no_comma_guard_suffix_not_swallowed(self) -> None: + """Guard: prefix + one name + suffix — suffix must not become last.""" + hn = HumanName("abdul salam jr") + self.m(hn.first, "abdul", hn) + self.m(hn.last, "salam", hn) + self.m(hn.suffix, "jr", hn) + # --- lastname-comma path --- def test_lastname_comma_join(self) -> None: hn = HumanName("salem, abdul salam") From 56b2620185c7cb306833e92122707c3eedb33983 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 01:18:31 -0700 Subject: [PATCH 8/9] fix: guard _on_change in add_with_encoding/clear against no-op mutations; correct lc() docstrings - add_with_encoding and clear now only fire _on_change when the set actually changes, consistent with remove()'s existing changed guard - Correct 'no periods' to 'leading/trailing-periods-stripped' in lc(), is_prefix, is_first_name_prefix, and add_with_encoding docstrings --- nameparser/config/__init__.py | 19 +++++++++++-------- nameparser/parser.py | 4 ++-- nameparser/util.py | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index 038fa2e..c143299 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -87,7 +87,7 @@ def __len__(self) -> int: def add_with_encoding(self, s: str, encoding: str | None = None) -> None: """ - Add the lower case and no-period version of the string to the set. Pass an + Add the lowercased, leading/trailing-periods-stripped version of the string to the set. Pass an explicit `encoding` parameter to specify the encoding of binary strings that are not DEFAULT_ENCODING (UTF-8). """ @@ -97,13 +97,15 @@ def add_with_encoding(self, s: str, encoding: str | None = None) -> None: encoding = encoding or stdin_encoding or DEFAULT_ENCODING if isinstance(s, bytes): s = s.decode(encoding) - self.elements.add(lc(s)) - if self._on_change: - self._on_change() + normalized = lc(s) + if normalized not in self.elements: + self.elements.add(normalized) + if self._on_change: + self._on_change() def add(self, *strings: str) -> Self: """ - Add the lower case and no-period version of the string arguments to the set. + Add the lowercased, leading/trailing-periods-stripped version of the string arguments to the set. Can pass a list of strings. Returns ``self`` for chaining. """ for s in strings: @@ -127,9 +129,10 @@ def remove(self, *strings: str) -> Self: def clear(self) -> Self: """Remove all entries from the set. Returns ``self`` for chaining.""" - self.elements.clear() - if self._on_change: - self._on_change() + if self.elements: + self.elements.clear() + if self._on_change: + self._on_change() return self diff --git a/nameparser/parser.py b/nameparser/parser.py index 14d093c..4cda41a 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -551,7 +551,7 @@ def is_conjunction(self, piece: str) -> bool: def is_prefix(self, piece: str) -> bool: """ - Lowercase and no periods version of piece is in the + Lowercased, leading/trailing-periods-stripped version of piece is in the :py:data:`~nameparser.config.prefixes.PREFIXES` set. """ if isinstance(piece, list): @@ -562,7 +562,7 @@ def is_prefix(self, piece: str) -> bool: return lc(piece) in self.C.prefixes def is_first_name_prefix(self, piece: str) -> bool: - """Lowercase and no periods version of piece is in :py:attr:`~nameparser.config.Constants.first_name_prefixes`.""" + """Lowercased, leading/trailing-periods-stripped version of piece is in :py:attr:`~nameparser.config.Constants.first_name_prefixes`.""" return lc(piece) in self.C.first_name_prefixes def _join_first_name_prefix(self, pieces: list[str], reserve_last: bool) -> list[str]: diff --git a/nameparser/util.py b/nameparser/util.py index 3b28fe7..1987dae 100644 --- a/nameparser/util.py +++ b/nameparser/util.py @@ -11,7 +11,7 @@ def lc(value: str) -> str: - """Lower case and remove any periods to normalize for comparison.""" + """Lowercase and strip leading/trailing periods to normalize for comparison.""" if not value: return '' return value.lower().strip('.') From b8feba4e38970beac275257776b4c82710d1f366 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Tue, 30 Jun 2026 01:35:34 -0700 Subject: [PATCH 9/9] test: remove test_default_set_contents (parsing tests cover membership) --- tests/test_first_name_prefixes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_first_name_prefixes.py b/tests/test_first_name_prefixes.py index e908328..db274da 100644 --- a/tests/test_first_name_prefixes.py +++ b/tests/test_first_name_prefixes.py @@ -1,15 +1,9 @@ from nameparser import HumanName -from nameparser.config import CONSTANTS - from tests.base import HumanNameTestBase class FirstNamePrefixesTestCase(HumanNameTestBase): - def test_default_set_contents(self) -> None: - for word in ("abdul", "abdel", "abdal", "abu", "abou", "umm"): - assert word in CONSTANTS.first_name_prefixes, f"{word!r} missing from first_name_prefixes" - def test_is_first_name_prefix_true(self) -> None: hn = HumanName("test") assert hn.is_first_name_prefix("Abdul")