From: Greg Ward Date: Thu, 3 Jun 2004 01:59:41 +0000 (+0000) Subject: SF #965425: fix so hyphenated words surrounded by punctuation are X-Git-Tag: v2.4a1~295 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6186410db06f28b3032a882d5ba7a2e3dcf95a80;p=thirdparty%2FPython%2Fcpython.git SF #965425: fix so hyphenated words surrounded by punctuation are wrapped correctly. --- diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 5ff4bcca2960..8c7279d8f810 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -1,5 +1,5 @@ # -# Test script for the textwrap module. +# Test suite for the textwrap module. # # Original tests written by Greg Ward . # Converted to PyUnit by Peter Hansen . @@ -271,6 +271,23 @@ What a mess! self.check_split("foo --option-opt bar", ["foo", " ", "--option-", "opt", " ", "bar"]) + def test_punct_hyphens(self): + # Oh bother, SF #965425 found another problem with hyphens -- + # hyphenated words in single quotes weren't handled correctly. + # In fact, the bug is that *any* punctuation around a hyphenated + # word was handled incorrectly, except for a leading "--", which + # was special-cased for Optik and Docutils. So test a variety + # of styles of punctuation around a hyphenated word. + # (Actually this is based on an Optik bug report, #813077). + self.check_split("the 'wibble-wobble' widget", + ['the', ' ', "'wibble-", "wobble'", ' ', 'widget']) + self.check_split('the "wibble-wobble" widget', + ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget']) + self.check_split("the (wibble-wobble) widget", + ['the', ' ', "(wibble-", "wobble)", ' ', 'widget']) + self.check_split("the ['wibble-wobble'] widget", + ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) + def test_funky_parens (self): # Second part of SF bug #596434: long option strings inside # parentheses. diff --git a/Lib/textwrap.py b/Lib/textwrap.py index d9df01928c61..32ab10bfbacd 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -79,11 +79,11 @@ class TextWrapper: # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). wordsep_re = re.compile(r'(\s+|' # any whitespace - r'-*\w{2,}-(?=\w{2,})|' # hyphenated words + r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash - # XXX will there be a locale-or-charset-aware version of - # string.lowercase in 2.3? + # XXX this is not locale- or charset-aware -- string.lowercase + # is US-ASCII only (and therefore English-only) sentence_end_re = re.compile(r'[%s]' # lowercase letter r'[\.\!\?]' # sentence-ending punct. r'[\"\']?' # optional end-of-quote