bpo-28660: Make TextWrapper break long words on hyphens (GH-22721)

author Irit Katriel <iritkatriel@yahoo.com>

Sun, 18 Oct 2020 17:01:15 +0000 (18:01 +0100)

committer GitHub <noreply@github.com>

Sun, 18 Oct 2020 17:01:15 +0000 (20:01 +0300)
author Irit Katriel <iritkatriel@yahoo.com>
Sun, 18 Oct 2020 17:01:15 +0000 (18:01 +0100)
committer GitHub <noreply@github.com>
Sun, 18 Oct 2020 17:01:15 +0000 (20:01 +0300)
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py

index ed97f70ba1fa40e829c3a8d1a6387ac631c6f1a7..dfbc2b93dfc0d6dc5a77c67bb88b3972f6cda4c9 100644 (file)
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -640,6 +640,78 @@ How *do* you spell that odd word, anyways?
                          max_lines=4)
  
  
+class LongWordWithHyphensTestCase(BaseTestCase):
+    def setUp(self):
+        self.wrapper = TextWrapper()
+        self.text1 = '''\
+We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase.
+'''
+        self.text2 = '''\
+1234567890-1234567890--this_is_a_very_long_option_indeed-good-bye"
+'''
+
+    def test_break_long_words_on_hyphen(self):
+        expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-',
+                    'cyclohexadiene-1-carboxylate synthase.']
+        self.check_wrap(self.text1, 50, expected)
+
+        expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-',
+                    'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.']
+        self.check_wrap(self.text1, 10, expected)
+
+        expected = ['1234567890',  '-123456789', '0--this_is', '_a_very_lo',
+                    'ng_option_', 'indeed-', 'good-bye"']
+        self.check_wrap(self.text2, 10, expected)
+
+    def test_break_long_words_not_on_hyphen(self):
+        expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad',
+                    'iene-1-carboxylate synthase.']
+        self.check_wrap(self.text1, 50, expected, break_on_hyphens=False)
+
+        expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,',
+                    '4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.']
+        self.check_wrap(self.text1, 10, expected, break_on_hyphens=False)
+
+        expected = ['1234567890',  '-123456789', '0--this_is', '_a_very_lo',
+                    'ng_option_', 'indeed-', 'good-bye"']
+        self.check_wrap(self.text2, 10, expected)
+
+    def test_break_on_hyphen_but_not_long_words(self):
+        expected = ['We used enyzme',
+                    '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+                    'synthase.']
+
+        self.check_wrap(self.text1, 50, expected, break_long_words=False)
+
+        expected = ['We used', 'enyzme',
+                    '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+                    'synthase.']
+        self.check_wrap(self.text1, 10, expected, break_long_words=False)
+
+        expected = ['1234567890',  '-123456789', '0--this_is', '_a_very_lo',
+                    'ng_option_', 'indeed-', 'good-bye"']
+        self.check_wrap(self.text2, 10, expected)
+
+
+    def test_do_not_break_long_words_or_on_hyphens(self):
+        expected = ['We used enyzme',
+                    '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+                    'synthase.']
+        self.check_wrap(self.text1, 50, expected,
+                        break_long_words=False,
+                        break_on_hyphens=False)
+
+        expected = ['We used', 'enyzme',
+                    '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+                    'synthase.']
+        self.check_wrap(self.text1, 10, expected,
+                        break_long_words=False,
+                        break_on_hyphens=False)
+
+        expected = ['1234567890',  '-123456789', '0--this_is', '_a_very_lo',
+                    'ng_option_', 'indeed-', 'good-bye"']
+        self.check_wrap(self.text2, 10, expected)
+
  class IndentTestCases(BaseTestCase):
  
      # called before each test method
diff --git a/Lib/textwrap.py b/Lib/textwrap.py

index 30e693c8de03548344a038d0dc946e1d3a3ca8d0..841de9baecf5d8a497b26d1648081d69a4612e5e 100644 (file)
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -215,8 +215,16 @@ class TextWrapper:
          # If we're allowed to break long words, then do so: put as much
          # of the next chunk onto the current line as will fit.
          if self.break_long_words:
-            cur_line.append(reversed_chunks[-1][:space_left])
-            reversed_chunks[-1] = reversed_chunks[-1][space_left:]
+            end = space_left
+            chunk = reversed_chunks[-1]
+            if self.break_on_hyphens and len(chunk) > space_left:
+                # break after last hyphen, but only if there are
+                # non-hyphens before it
+                hyphen = chunk.rfind('-', 0, space_left)
+                if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
+                    end = hyphen + 1
+            cur_line.append(chunk[:end])
+            reversed_chunks[-1] = chunk[end:]
  
          # Otherwise, we have to preserve the long word intact.  Only add
          # it to the current line if there's nothing already there --
diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst

new file mode 100644 (file)

index 0000000..d679934
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst
@@ -0,0 +1 @@
+:func:`textwrap.wrap` now attempts to break long words after hyphens when ``break_long_words=True`` and ``break_on_hyphens=True``.
author	Irit Katriel <iritkatriel@yahoo.com>
	Sun, 18 Oct 2020 17:01:15 +0000 (18:01 +0100)
committer	GitHub <noreply@github.com>
	Sun, 18 Oct 2020 17:01:15 +0000 (20:01 +0300)
Lib/test/test_textwrap.py		patch \| blob \| blame \| history
Lib/textwrap.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst	[new file with mode: 0644]	patch \| blob