diff --git a/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152054.Ci7Set.rst b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152054.Ci7Set.rst new file mode 100644 index 00000000000000..7298995ab1c120 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152054.Ci7Set.rst @@ -0,0 +1,2 @@ +Speed up matching of case-insensitive character sets in :mod:`re`, such as +``[a-z]+`` used with the :const:`re.IGNORECASE` flag. Patch by Pieter Eendebak. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 6e6ae46f05a50f..71eb7541d35ba5 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -213,6 +213,29 @@ SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) ptr++; break; + case SRE_OP_IN_IGNORE: + /* repeated set, case-insensitive (ascii) */ + TRACE(("|%p|%p|COUNT IN_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset)(state, pattern + 2, + (SRE_CODE) sre_lower_ascii(*ptr))) + ptr++; + break; + + case SRE_OP_IN_UNI_IGNORE: + /* repeated set, case-insensitive (unicode) */ + TRACE(("|%p|%p|COUNT IN_UNI_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset)(state, pattern + 2, + (SRE_CODE) sre_lower_unicode(*ptr))) + ptr++; + break; + + case SRE_OP_IN_LOC_IGNORE: + /* repeated set, case-insensitive (locale) */ + TRACE(("|%p|%p|COUNT IN_LOC_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset_loc_ignore)(state, pattern + 2, *ptr)) + ptr++; + break; + case SRE_OP_ANY: /* repeated dot wildcard. */ TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));