From 090c88742761529e96d5f007ec270a53316f0650 Mon Sep 17 00:00:00 2001
From: Kevin Deldycke <kevin@deldycke.com>
Date: Tue, 30 Jun 2026 19:27:22 +0400
Subject: [PATCH] gh-142035: Make `TextWrapper` ANSI-aware (#152702)

---
 Doc/library/textwrap.rst                      | 27 ++++++-
 Doc/whatsnew/3.16.rst                         | 11 +++
 Lib/argparse.py                               | 10 ++-
 Lib/idlelib/idle_test/test_calltip.py         |  2 +-
 Lib/test/test_argparse.py                     | 26 +++++++
 Lib/test/test_textwrap.py                     | 72 ++++++++++++++++++-
 Lib/textwrap.py                               | 62 ++++++++++++----
 ...6-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst |  5 ++
 ...-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst |  4 ++
 9 files changed, 199 insertions(+), 20 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst

diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index d12968dee91f3c..2970a291a1d1f3 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -19,7 +19,7 @@ functions should be good enough; otherwise, you should use an instance of
                    replace_whitespace=True, fix_sentence_endings=False, \
                    break_long_words=True, drop_whitespace=True, \
                    break_on_hyphens=True, tabsize=8, max_lines=None, \
-                   placeholder=' [...]')
+                   placeholder=' [...]', text_len=len)
 
    Wraps the single paragraph in *text* (a string) so every line is at most
    *width* characters long.  Returns a list of output lines, without final
@@ -37,7 +37,7 @@ functions should be good enough; otherwise, you should use an instance of
                    replace_whitespace=True, fix_sentence_endings=False, \
                    break_long_words=True, drop_whitespace=True, \
                    break_on_hyphens=True, tabsize=8, \
-                   max_lines=None, placeholder=' [...]')
+                   max_lines=None, placeholder=' [...]', text_len=len)
 
    Wraps the single paragraph in *text*, and returns a single string containing the
    wrapped paragraph.  :func:`fill` is shorthand for  ::
@@ -50,7 +50,7 @@ functions should be good enough; otherwise, you should use an instance of
 
 .. function:: shorten(text, width, *, fix_sentence_endings=False, \
                       break_long_words=True, break_on_hyphens=True, \
-                      placeholder=' [...]')
+                      placeholder=' [...]', text_len=len)
 
    Collapse and truncate the given *text* to fit in the given *width*.
 
@@ -293,6 +293,27 @@ hyphenated words; only then will long words be broken if necessary, unless
       .. versionadded:: 3.4
 
 
+   .. attribute:: text_len
+
+      (default: :func:`len`) Callable used to measure the visible width of a
+      string when deciding where to wrap.  Override the default to account for
+      characters that are not a single column wide, such as zero-width or
+      double-width characters, or invisible ANSI escape sequences::
+
+         >>> import re, textwrap
+         >>> visible_len = lambda s: len(re.sub(r'\x1b\[[0-9;]*m', '', s))
+         >>> colored = 'normal \x1b[31mcolored\x1b[0m words here'
+         >>> lines = textwrap.wrap(colored, width=14, text_len=visible_len)
+         >>> [re.sub(r'\x1b\[[0-9;]*m', '', line) for line in lines]
+         ['normal colored', 'words here']
+
+      The callable must return a non-negative integer.  It is assumed to be
+      additive over the whitespace- and hyphen-delimited chunks that wrapping
+      produces; a chunk that is too long to fit is split by visible width.
+
+      .. versionadded:: 3.16
+
+
    :class:`TextWrapper` also provides some public methods, analogous to the
    module-level convenience functions:
 
diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst
index 1a73a79a58b78b..d92fcda3de545e 100644
--- a/Doc/whatsnew/3.16.rst
+++ b/Doc/whatsnew/3.16.rst
@@ -297,6 +297,17 @@ shlex
   (Contributed by Jay Berry in :gh:`148846`.)
 
 
+textwrap
+--------
+
+* Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
+  :func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`.  It customizes
+  how the visible width of a string is measured, so text that contains
+  zero-width or double-width characters, or invisible ANSI escape sequences,
+  can be wrapped correctly.
+  (Contributed by Kevin Deldycke in :gh:`152702`.)
+
+
 tkinter
 -------
 
diff --git a/Lib/argparse.py b/Lib/argparse.py
index 29e6ebb9634261..7ee5791014e180 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -771,19 +771,25 @@ def _iter_indented_subactions(self, action):
             yield from get_subactions()
             self._dedent()
 
+    def _text_len(self, text):
+        # Measure the visible width of *text*, ignoring any ANSI color escape
+        # sequences that may have been inserted for colored help output.
+        return len(self._decolor(text))
+
     def _split_lines(self, text, width):
         text = self._whitespace_matcher.sub(' ', text).strip()
         # The textwrap module is used only for formatting help.
         # Delay its import for speeding up the common usage of argparse.
         import textwrap
-        return textwrap.wrap(text, width)
+        return textwrap.wrap(text, width, text_len=self._text_len)
 
     def _fill_text(self, text, width, indent):
         text = self._whitespace_matcher.sub(' ', text).strip()
         import textwrap
         return textwrap.fill(text, width,
                              initial_indent=indent,
-                             subsequent_indent=indent)
+                             subsequent_indent=indent,
+                             text_len=self._text_len)
 
     def _get_help_string(self, action):
         return action.help
diff --git a/Lib/idlelib/idle_test/test_calltip.py b/Lib/idlelib/idle_test/test_calltip.py
index 28c196a42672fc..c1fea6076972f6 100644
--- a/Lib/idlelib/idle_test/test_calltip.py
+++ b/Lib/idlelib/idle_test/test_calltip.py
@@ -105,7 +105,7 @@ def test_signature_wrap(self):
 (width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
     replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
     drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
-    placeholder=' [...]')
+    placeholder=' [...]', text_len=<built-in function len>)
 Object for wrapping/filling text.  The public interface consists of
 the wrap() and fill() methods; the other methods are just there for
 subclasses to override in order to tweak the default behaviour.
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 1dc3f538f4ad8b..b1b1eeed4e9767 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -7869,6 +7869,32 @@ def test_help_with_format_specifiers(self):
         self.assertIn(f'type: {interp}int{reset}', help_text)
         self.assertIn(f'choices: {interp}a, b{reset}', help_text)
 
+    def test_colored_help_wraps_like_plain_help(self):
+        # gh-142035: ANSI color escapes in the help text (around the
+        # interpolated "(default: ...)" value) must not change where lines
+        # wrap. Stripping the colors must yield exactly the plain layout.
+        env = self.enterContext(os_helper.EnvironmentVarGuard())
+        env["COLUMNS"] = "70"
+
+        def build(color):
+            parser = argparse.ArgumentParser(
+                prog="PROG",
+                formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                color=color,
+            )
+            parser.add_argument(
+                "--verbose",
+                action="store_true",
+                help="A l o n g d e s c r i p t i o n f o r t h e v e r b "
+                "o s e f l a g t o d e m o n s t r a t e w r a p p i n g",
+            )
+            parser.add_argument("--input", default="input.txt", help="Input file path")
+            return parser
+
+        colored = build(color=True).format_help()
+        plain = build(color=False).format_help()
+        self.assertEqual(_colorize.decolor(colored), plain)
+
     def test_print_help_uses_target_file_for_color_decision(self):
         parser = argparse.ArgumentParser(prog='PROG', color=True)
         parser.add_argument('--opt')
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index aca1f427656bb5..28dde4763a581f 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -8,6 +8,7 @@
 # $Id$
 #
 
+import re
 import unittest
 
 from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
@@ -1133,5 +1134,74 @@ def test_first_word_too_long_but_placeholder_fits(self):
         self.check_shorten("Helloo", 5, "[...]")
 
 
-if __name__ == '__main__':
+class TextLenTestCase(BaseTestCase):
+    # The text_len option customizes how the visible width of a string is
+    # measured. The motivating case is colored output, where invisible ANSI
+    # escape sequences must not count towards the line width (gh-142035).
+
+    _ansi = re.compile(r"\x1b\[[0-9;]*m")
+
+    @classmethod
+    def visible_len(cls, text):
+        return len(cls._ansi.sub("", text))
+
+    @classmethod
+    def decolor(cls, lines):
+        return [cls._ansi.sub("", line) for line in lines]
+
+    @staticmethod
+    def color(text):
+        # Wrap every word in a pair of (zero visible width) escape sequences.
+        return " ".join(f"\x1b[31m{word}\x1b[0m" for word in text.split())
+
+    def check_shorten(self, text, width, expect, **kwargs):
+        self.check(shorten(text, width, **kwargs), expect)
+
+    def test_default_text_len_is_len(self):
+        self.assertIs(TextWrapper().text_len, len)
+
+    def test_explicit_len_matches_default(self):
+        text = "Hello there, how are you this fine day?  I'm glad to hear it!"
+        self.check_wrap(text, 12, wrap(text, 12), text_len=len)
+
+    def test_color_does_not_change_breaks(self):
+        text = "These are several short words to be wrapped and colored here"
+        for width in (10, 15, 20, 30):
+            with self.subTest(width=width):
+                lines = wrap(self.color(text), width, text_len=self.visible_len)
+                self.assertEqual(self.decolor(lines), wrap(text, width))
+
+    def test_color_respects_width(self):
+        lines = wrap(
+            self.color("one two three four five six seven"),
+            9,
+            text_len=self.visible_len,
+        )
+        for line in lines:
+            self.assertLessEqual(self.visible_len(line), 9)
+
+    def test_break_long_word_by_visible_width(self):
+        word = "\x1b[31m" + "x" * 20 + "\x1b[0m"
+        lines = wrap(word, 8, text_len=self.visible_len)
+        self.assertEqual(self.decolor(lines), ["xxxxxxxx", "xxxxxxxx", "xxxx"])
+
+    def test_break_on_hyphens_with_color(self):
+        lines = wrap(self.color("spam-egg-ham-bacon"), 9, text_len=self.visible_len)
+        self.assertEqual(self.decolor(lines), ["spam-egg-", "ham-bacon"])
+
+    def test_shorten_with_text_len(self):
+        result = shorten(
+            self.color("one two three four five"), 12, text_len=self.visible_len
+        )
+        self.assertLessEqual(self.visible_len(result), 12)
+        self.assertEqual(self._ansi.sub("", result), "one [...]")
+
+    def test_measure_is_not_limited_to_ansi(self):
+        # Any width measure works, e.g. counting every character as two columns.
+        double = lambda s: 2 * len(s)
+        self.check_wrap("aa bb cc dd", 4, ["aa", "bb", "cc", "dd"], text_len=double)
+        self.check_wrap("aa bb cc dd", 5, ["aa", "bb", "cc", "dd"], text_len=double)
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 2f213e34c2c329..77a619084ae43b 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -61,6 +61,11 @@ class TextWrapper:
         Truncate wrapped lines.
       placeholder (default: ' [...]')
         Append to the last line of truncated text.
+      text_len (default: len)
+        Callable returning the visible width of a string.  Override the
+        default to account for characters that are not one column wide,
+        such as zero-width or double-width characters, or invisible ANSI
+        escape sequences.  It should return a non-negative integer.
     """
 
     unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))
@@ -122,7 +127,8 @@ def __init__(self,
                  tabsize=8,
                  *,
                  max_lines=None,
-                 placeholder=' [...]'):
+                 placeholder=' [...]',
+                 text_len=len):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -135,6 +141,7 @@ def __init__(self,
         self.tabsize = tabsize
         self.max_lines = max_lines
         self.placeholder = placeholder
+        self.text_len = text_len
 
 
     # -- Private methods -----------------------------------------------
@@ -194,6 +201,28 @@ def _fix_sentence_endings(self, chunks):
             else:
                 i += 1
 
+    def _truncate_to_width(self, text, width):
+        """_truncate_to_width(text : string, width : int) -> string
+
+        Return the longest prefix of *text* whose visible width, as measured
+        by ``self.text_len``, does not exceed *width*.  With a custom text_len the
+        number of characters that fit need not equal *width*, so an over-long
+        word cannot be broken by slicing at the column count.  At least one
+        character is always kept so that wrapping makes progress.
+        """
+        # Fast path for the default len(): the width is the number of
+        # characters, so the prefix can be sliced directly.
+        if self.text_len is len:
+            return text[: max(width, 1)]
+        if self.text_len(text) <= width:
+            return text
+        cut = 1
+        for i in range(1, len(text) + 1):
+            if self.text_len(text[:i]) > width:
+                break
+            cut = i
+        return text[:cut]
+
     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         """_handle_long_word(chunks : [string],
                              cur_line : [string],
@@ -212,9 +241,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         # If we're allowed to break long words, then do so: put as much
         # of the next chunk onto the current line as will fit.
         if self.break_long_words and space_left > 0:
-            end = space_left
             chunk = reversed_chunks[-1]
-            if self.break_on_hyphens and len(chunk) > space_left:
+            # Keep as many leading characters as fit in the visible width.
+            end = len(self._truncate_to_width(chunk, space_left))
+            if self.break_on_hyphens and self.text_len(chunk) > space_left:
                 # break after last hyphen, but only if there are
                 # non-hyphens before it
                 hyphen = chunk.rfind('-', 0, space_left)
@@ -256,7 +286,10 @@ def _wrap_chunks(self, chunks):
                 indent = self.subsequent_indent
             else:
                 indent = self.initial_indent
-            if len(indent) + len(self.placeholder.lstrip()) > self.width:
+            if (
+                self.text_len(indent) + self.text_len(self.placeholder.lstrip())
+                > self.width
+            ):
                 raise ValueError("placeholder too large for max width")
 
         # Arrange in reverse order so items can be efficiently popped
@@ -277,7 +310,7 @@ def _wrap_chunks(self, chunks):
                 indent = self.initial_indent
 
             # Maximum width for this line.
-            width = self.width - len(indent)
+            width = self.width - self.text_len(indent)
 
             # First chunk on line is whitespace -- drop it, unless this
             # is the very beginning of the text (ie. no lines started yet).
@@ -285,7 +318,7 @@ def _wrap_chunks(self, chunks):
                 del chunks[-1]
 
             while chunks:
-                l = len(chunks[-1])
+                l = self.text_len(chunks[-1])
 
                 # Can at least squeeze this chunk onto the current line.
                 if cur_len + l <= width:
@@ -298,13 +331,13 @@ def _wrap_chunks(self, chunks):
 
             # The current line is full, and the next chunk is too big to
             # fit on *any* line (not just this one).
-            if chunks and len(chunks[-1]) > width:
+            if chunks and self.text_len(chunks[-1]) > width:
                 self._handle_long_word(chunks, cur_line, cur_len, width)
-                cur_len = sum(map(len, cur_line))
+                cur_len = sum(map(self.text_len, cur_line))
 
             # If the last chunk on this line is all whitespace, drop it.
             if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
-                cur_len -= len(cur_line[-1])
+                cur_len -= self.text_len(cur_line[-1])
                 del cur_line[-1]
 
             if cur_line:
@@ -320,17 +353,20 @@ def _wrap_chunks(self, chunks):
                 else:
                     while cur_line:
                         if (cur_line[-1].strip() and
-                            cur_len + len(self.placeholder) <= width):
+                            cur_len + self.text_len(self.placeholder) <= width):
                             cur_line.append(self.placeholder)
                             lines.append(indent + ''.join(cur_line))
                             break
-                        cur_len -= len(cur_line[-1])
+                        cur_len -= self.text_len(cur_line[-1])
                         del cur_line[-1]
                     else:
                         if lines:
                             prev_line = lines[-1].rstrip()
-                            if (len(prev_line) + len(self.placeholder) <=
-                                    self.width):
+                            if (
+                                self.text_len(prev_line)
+                                + self.text_len(self.placeholder)
+                                <= self.width
+                            ):
                                 lines[-1] = prev_line + self.placeholder
                                 break
                         lines.append(indent + self.placeholder.lstrip())
diff --git a/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst b/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst
new file mode 100644
index 00000000000000..b755520143d7d9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-30-19-38-56.gh-issue-56708.Cz9Xpw.rst
@@ -0,0 +1,5 @@
+Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
+:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`.  It customizes how
+the visible width of a string is measured, allowing text that contains
+zero-width or double-width characters, or invisible ANSI escape sequences, to
+be wrapped correctly.
diff --git a/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst b/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst
new file mode 100644
index 00000000000000..e725a3bb8b1206
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-30-19-38-57.gh-issue-142035.Vt2Mhd.rst
@@ -0,0 +1,4 @@
+Fix :mod:`argparse` help text wrapping when colors are enabled.  ANSI escape
+sequences inserted around interpolated values such as the ``(default: ...)``
+suffix no longer count towards the line width, so colored help wraps at the
+same place as the equivalent uncolored help.