From 090c88742761529e96d5f007ec270a53316f0650 Mon Sep 17 00:00:00 2001 From: Kevin Deldycke Date: Tue, 30 Jun 2026 19:27:22 +0400 Subject: [PATCH] gh-142035: Make `TextWrapper` ANSI-aware (#152702) --- Doc/library/textwrap.rst | 27 ++++++- Doc/whatsnew/3.16.rst | 11 +++ Lib/argparse.py | 10 ++- Lib/idlelib/idle_test/test_calltip.py | 2 +- Lib/test/test_argparse.py | 26 +++++++ Lib/test/test_textwrap.py | 72 ++++++++++++++++++- Lib/textwrap.py | 62 ++++++++++++---- ...6-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst | 5 ++ ...-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst | 4 ++ 9 files changed, 199 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst create mode 100644 Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst index d12968dee91f3c..2970a291a1d1f3 100644 --- a/Doc/library/textwrap.rst +++ b/Doc/library/textwrap.rst @@ -19,7 +19,7 @@ functions should be good enough; otherwise, you should use an instance of replace_whitespace=True, fix_sentence_endings=False, \ break_long_words=True, drop_whitespace=True, \ break_on_hyphens=True, tabsize=8, max_lines=None, \ - placeholder=' [...]') + placeholder=' [...]', text_len=len) Wraps the single paragraph in *text* (a string) so every line is at most *width* characters long. Returns a list of output lines, without final @@ -37,7 +37,7 @@ functions should be good enough; otherwise, you should use an instance of replace_whitespace=True, fix_sentence_endings=False, \ break_long_words=True, drop_whitespace=True, \ break_on_hyphens=True, tabsize=8, \ - max_lines=None, placeholder=' [...]') + max_lines=None, placeholder=' [...]', text_len=len) Wraps the single paragraph in *text*, and returns a single string containing the wrapped paragraph. :func:`fill` is shorthand for :: @@ -50,7 +50,7 @@ functions should be good enough; otherwise, you should use an instance of .. function:: shorten(text, width, *, fix_sentence_endings=False, \ break_long_words=True, break_on_hyphens=True, \ - placeholder=' [...]') + placeholder=' [...]', text_len=len) Collapse and truncate the given *text* to fit in the given *width*. @@ -293,6 +293,27 @@ hyphenated words; only then will long words be broken if necessary, unless .. versionadded:: 3.4 + .. attribute:: text_len + + (default: :func:`len`) Callable used to measure the visible width of a + string when deciding where to wrap. Override the default to account for + characters that are not a single column wide, such as zero-width or + double-width characters, or invisible ANSI escape sequences:: + + >>> import re, textwrap + >>> visible_len = lambda s: len(re.sub(r'\x1b\[[0-9;]*m', '', s)) + >>> colored = 'normal \x1b[31mcolored\x1b[0m words here' + >>> lines = textwrap.wrap(colored, width=14, text_len=visible_len) + >>> [re.sub(r'\x1b\[[0-9;]*m', '', line) for line in lines] + ['normal colored', 'words here'] + + The callable must return a non-negative integer. It is assumed to be + additive over the whitespace- and hyphen-delimited chunks that wrapping + produces; a chunk that is too long to fit is split by visible width. + + .. versionadded:: 3.16 + + :class:`TextWrapper` also provides some public methods, analogous to the module-level convenience functions: diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index 1a73a79a58b78b..d92fcda3de545e 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -297,6 +297,17 @@ shlex (Contributed by Jay Berry in :gh:`148846`.) +textwrap +-------- + +* Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`, + :func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes + how the visible width of a string is measured, so text that contains + zero-width or double-width characters, or invisible ANSI escape sequences, + can be wrapped correctly. + (Contributed by Kevin Deldycke in :gh:`152702`.) + + tkinter ------- diff --git a/Lib/argparse.py b/Lib/argparse.py index 29e6ebb9634261..7ee5791014e180 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -771,19 +771,25 @@ def _iter_indented_subactions(self, action): yield from get_subactions() self._dedent() + def _text_len(self, text): + # Measure the visible width of *text*, ignoring any ANSI color escape + # sequences that may have been inserted for colored help output. + return len(self._decolor(text)) + def _split_lines(self, text, width): text = self._whitespace_matcher.sub(' ', text).strip() # The textwrap module is used only for formatting help. # Delay its import for speeding up the common usage of argparse. import textwrap - return textwrap.wrap(text, width) + return textwrap.wrap(text, width, text_len=self._text_len) def _fill_text(self, text, width, indent): text = self._whitespace_matcher.sub(' ', text).strip() import textwrap return textwrap.fill(text, width, initial_indent=indent, - subsequent_indent=indent) + subsequent_indent=indent, + text_len=self._text_len) def _get_help_string(self, action): return action.help diff --git a/Lib/idlelib/idle_test/test_calltip.py b/Lib/idlelib/idle_test/test_calltip.py index 28c196a42672fc..c1fea6076972f6 100644 --- a/Lib/idlelib/idle_test/test_calltip.py +++ b/Lib/idlelib/idle_test/test_calltip.py @@ -105,7 +105,7 @@ def test_signature_wrap(self): (width=70, initial_indent='', subsequent_indent='', expand_tabs=True, replace_whitespace=True, fix_sentence_endings=False, break_long_words=True, drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None, - placeholder=' [...]') + placeholder=' [...]', text_len=) Object for wrapping/filling text. The public interface consists of the wrap() and fill() methods; the other methods are just there for subclasses to override in order to tweak the default behaviour. diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1dc3f538f4ad8b..b1b1eeed4e9767 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -7869,6 +7869,32 @@ def test_help_with_format_specifiers(self): self.assertIn(f'type: {interp}int{reset}', help_text) self.assertIn(f'choices: {interp}a, b{reset}', help_text) + def test_colored_help_wraps_like_plain_help(self): + # gh-142035: ANSI color escapes in the help text (around the + # interpolated "(default: ...)" value) must not change where lines + # wrap. Stripping the colors must yield exactly the plain layout. + env = self.enterContext(os_helper.EnvironmentVarGuard()) + env["COLUMNS"] = "70" + + def build(color): + parser = argparse.ArgumentParser( + prog="PROG", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + color=color, + ) + parser.add_argument( + "--verbose", + action="store_true", + help="A l o n g d e s c r i p t i o n f o r t h e v e r b " + "o s e f l a g t o d e m o n s t r a t e w r a p p i n g", + ) + parser.add_argument("--input", default="input.txt", help="Input file path") + return parser + + colored = build(color=True).format_help() + plain = build(color=False).format_help() + self.assertEqual(_colorize.decolor(colored), plain) + def test_print_help_uses_target_file_for_color_decision(self): parser = argparse.ArgumentParser(prog='PROG', color=True) parser.add_argument('--opt') diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index aca1f427656bb5..28dde4763a581f 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -8,6 +8,7 @@ # $Id$ # +import re import unittest from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten @@ -1133,5 +1134,74 @@ def test_first_word_too_long_but_placeholder_fits(self): self.check_shorten("Helloo", 5, "[...]") -if __name__ == '__main__': +class TextLenTestCase(BaseTestCase): + # The text_len option customizes how the visible width of a string is + # measured. The motivating case is colored output, where invisible ANSI + # escape sequences must not count towards the line width (gh-142035). + + _ansi = re.compile(r"\x1b\[[0-9;]*m") + + @classmethod + def visible_len(cls, text): + return len(cls._ansi.sub("", text)) + + @classmethod + def decolor(cls, lines): + return [cls._ansi.sub("", line) for line in lines] + + @staticmethod + def color(text): + # Wrap every word in a pair of (zero visible width) escape sequences. + return " ".join(f"\x1b[31m{word}\x1b[0m" for word in text.split()) + + def check_shorten(self, text, width, expect, **kwargs): + self.check(shorten(text, width, **kwargs), expect) + + def test_default_text_len_is_len(self): + self.assertIs(TextWrapper().text_len, len) + + def test_explicit_len_matches_default(self): + text = "Hello there, how are you this fine day? I'm glad to hear it!" + self.check_wrap(text, 12, wrap(text, 12), text_len=len) + + def test_color_does_not_change_breaks(self): + text = "These are several short words to be wrapped and colored here" + for width in (10, 15, 20, 30): + with self.subTest(width=width): + lines = wrap(self.color(text), width, text_len=self.visible_len) + self.assertEqual(self.decolor(lines), wrap(text, width)) + + def test_color_respects_width(self): + lines = wrap( + self.color("one two three four five six seven"), + 9, + text_len=self.visible_len, + ) + for line in lines: + self.assertLessEqual(self.visible_len(line), 9) + + def test_break_long_word_by_visible_width(self): + word = "\x1b[31m" + "x" * 20 + "\x1b[0m" + lines = wrap(word, 8, text_len=self.visible_len) + self.assertEqual(self.decolor(lines), ["xxxxxxxx", "xxxxxxxx", "xxxx"]) + + def test_break_on_hyphens_with_color(self): + lines = wrap(self.color("spam-egg-ham-bacon"), 9, text_len=self.visible_len) + self.assertEqual(self.decolor(lines), ["spam-egg-", "ham-bacon"]) + + def test_shorten_with_text_len(self): + result = shorten( + self.color("one two three four five"), 12, text_len=self.visible_len + ) + self.assertLessEqual(self.visible_len(result), 12) + self.assertEqual(self._ansi.sub("", result), "one [...]") + + def test_measure_is_not_limited_to_ansi(self): + # Any width measure works, e.g. counting every character as two columns. + double = lambda s: 2 * len(s) + self.check_wrap("aa bb cc dd", 4, ["aa", "bb", "cc", "dd"], text_len=double) + self.check_wrap("aa bb cc dd", 5, ["aa", "bb", "cc", "dd"], text_len=double) + + +if __name__ == "__main__": unittest.main() diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 2f213e34c2c329..77a619084ae43b 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -61,6 +61,11 @@ class TextWrapper: Truncate wrapped lines. placeholder (default: ' [...]') Append to the last line of truncated text. + text_len (default: len) + Callable returning the visible width of a string. Override the + default to account for characters that are not one column wide, + such as zero-width or double-width characters, or invisible ANSI + escape sequences. It should return a non-negative integer. """ unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' ')) @@ -122,7 +127,8 @@ def __init__(self, tabsize=8, *, max_lines=None, - placeholder=' [...]'): + placeholder=' [...]', + text_len=len): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -135,6 +141,7 @@ def __init__(self, self.tabsize = tabsize self.max_lines = max_lines self.placeholder = placeholder + self.text_len = text_len # -- Private methods ----------------------------------------------- @@ -194,6 +201,28 @@ def _fix_sentence_endings(self, chunks): else: i += 1 + def _truncate_to_width(self, text, width): + """_truncate_to_width(text : string, width : int) -> string + + Return the longest prefix of *text* whose visible width, as measured + by ``self.text_len``, does not exceed *width*. With a custom text_len the + number of characters that fit need not equal *width*, so an over-long + word cannot be broken by slicing at the column count. At least one + character is always kept so that wrapping makes progress. + """ + # Fast path for the default len(): the width is the number of + # characters, so the prefix can be sliced directly. + if self.text_len is len: + return text[: max(width, 1)] + if self.text_len(text) <= width: + return text + cut = 1 + for i in range(1, len(text) + 1): + if self.text_len(text[:i]) > width: + break + cut = i + return text[:cut] + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): """_handle_long_word(chunks : [string], cur_line : [string], @@ -212,9 +241,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words and space_left > 0: - end = space_left chunk = reversed_chunks[-1] - if self.break_on_hyphens and len(chunk) > space_left: + # Keep as many leading characters as fit in the visible width. + end = len(self._truncate_to_width(chunk, space_left)) + if self.break_on_hyphens and self.text_len(chunk) > space_left: # break after last hyphen, but only if there are # non-hyphens before it hyphen = chunk.rfind('-', 0, space_left) @@ -256,7 +286,10 @@ def _wrap_chunks(self, chunks): indent = self.subsequent_indent else: indent = self.initial_indent - if len(indent) + len(self.placeholder.lstrip()) > self.width: + if ( + self.text_len(indent) + self.text_len(self.placeholder.lstrip()) + > self.width + ): raise ValueError("placeholder too large for max width") # Arrange in reverse order so items can be efficiently popped @@ -277,7 +310,7 @@ def _wrap_chunks(self, chunks): indent = self.initial_indent # Maximum width for this line. - width = self.width - len(indent) + width = self.width - self.text_len(indent) # First chunk on line is whitespace -- drop it, unless this # is the very beginning of the text (ie. no lines started yet). @@ -285,7 +318,7 @@ def _wrap_chunks(self, chunks): del chunks[-1] while chunks: - l = len(chunks[-1]) + l = self.text_len(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: @@ -298,13 +331,13 @@ def _wrap_chunks(self, chunks): # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: + if chunks and self.text_len(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) - cur_len = sum(map(len, cur_line)) + cur_len = sum(map(self.text_len, cur_line)) # If the last chunk on this line is all whitespace, drop it. if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': - cur_len -= len(cur_line[-1]) + cur_len -= self.text_len(cur_line[-1]) del cur_line[-1] if cur_line: @@ -320,17 +353,20 @@ def _wrap_chunks(self, chunks): else: while cur_line: if (cur_line[-1].strip() and - cur_len + len(self.placeholder) <= width): + cur_len + self.text_len(self.placeholder) <= width): cur_line.append(self.placeholder) lines.append(indent + ''.join(cur_line)) break - cur_len -= len(cur_line[-1]) + cur_len -= self.text_len(cur_line[-1]) del cur_line[-1] else: if lines: prev_line = lines[-1].rstrip() - if (len(prev_line) + len(self.placeholder) <= - self.width): + if ( + self.text_len(prev_line) + + self.text_len(self.placeholder) + <= self.width + ): lines[-1] = prev_line + self.placeholder break lines.append(indent + self.placeholder.lstrip()) diff --git a/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst b/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst new file mode 100644 index 00000000000000..b755520143d7d9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst @@ -0,0 +1,5 @@ +Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`, +:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes how +the visible width of a string is measured, allowing text that contains +zero-width or double-width characters, or invisible ANSI escape sequences, to +be wrapped correctly. diff --git a/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst b/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst new file mode 100644 index 00000000000000..e725a3bb8b1206 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst @@ -0,0 +1,4 @@ +Fix :mod:`argparse` help text wrapping when colors are enabled. ANSI escape +sequences inserted around interpolated values such as the ``(default: ...)`` +suffix no longer count towards the line width, so colored help wraps at the +same place as the equivalent uncolored help.