Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive...

author Christopher Lenz <cmlenz@gmail.com>

Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)

committer Christopher Lenz <cmlenz@gmail.com>

Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)
author Christopher Lenz <cmlenz@gmail.com>
Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)
committer Christopher Lenz <cmlenz@gmail.com>
Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)
diff --git a/babel/catalog/frontend.py b/babel/catalog/frontend.py

index 5c424d5d122a9c23d11095bdcd0ac68631f21bba..444f58e4d96509f13f8787afb504ccb735e98adb 100644 (file)
--- a/babel/catalog/frontend.py
+++ b/babel/catalog/frontend.py
@@ -62,10 +62,10 @@ class extract_messages(Command):
          ('output-file=', 'o',
           'name of the output file'),
          ('width=', 'w',
-         'set output line width. Default: 76'),
+         'set output line width (default 76)'),
          ('no-wrap', None,
-         'do not break long message lines, longer than the output '
-         'line width, into several lines.')
+         'do not break long message lines, longer than the output line width, '
+         'into several lines')
      ]
      boolean_options = [
          'no-default-keywords', 'no-location', 'omit-header', 'no-wrap'
@@ -73,36 +73,36 @@ class extract_messages(Command):
  
      def initialize_options(self):
          self.charset = 'utf-8'
+        self.width = 76
+        self.no_wrap = False
          self.keywords = self._keywords = DEFAULT_KEYWORDS.copy()
          self.no_default_keywords = False
          self.no_location = False
          self.omit_header = False
          self.output_file = None
          self.input_dirs = None
-        self.width = None
-        self.no_wrap = False
  
      def finalize_options(self):
          if not self.input_dirs:
              self.input_dirs = dict.fromkeys([k.split('.',1)[0]
                  for k in self.distribution.packages
              ]).keys()
+
          if self.no_default_keywords and not self.keywords:
-            raise DistutilsOptionError, \
-                'you must specify new keywords if you disable the default ones'
+            raise DistutilsOptionError('you must specify new keywords if you '
+                                       'disable the default ones')
          if self.no_default_keywords:
              self._keywords = {}
          if isinstance(self.keywords, basestring):
              self._keywords.update(parse_keywords(self.keywords.split()))
          self.keywords = self._keywords
+
          if self.no_wrap and self.width:
-            raise DistutilsOptionError, \
-                "'--no-wrap' and '--width' are mutually exclusive."
-        elif self.no_wrap and not self.width:
-            self.width = 0
-        elif not self.no_wrap and not self.width:
-            self.width = 76
-        elif self.width and not self.no_wrap:
+            raise DistutilsOptionError("'--no-wrap' and '--width' are mutually"
+                                       "exclusive")
+        if self.no_wrap:
+            self.width = None
+        else:
              self.width = int(self.width)
  
      def run(self):
@@ -115,11 +115,12 @@ class extract_messages(Command):
                  for filename, lineno, funcname, message in extracted:
                      messages.append((os.path.join(dirname, filename), lineno,
                                       funcname, message, None))
+
+            log.info('writing PO file to %s' % self.output_file)
              write_po(outfile, messages, project=self.distribution.get_name(),
-                     version=self.distribution.get_version(),
+                     version=self.distribution.get_version(), width=self.width,
                       charset=self.charset, no_location=self.no_location,
-                     omit_header=self.omit_header, width=self.width)
-            log.info('writing PO file to %s' % self.output_file)
+                     omit_header=self.omit_header)
          finally:
              outfile.close()
  
@@ -154,11 +155,11 @@ def main(argv=sys.argv):
      parser.add_option('-o', '--output', dest='output',
                        help='path to the output POT file')
      parser.add_option('-w', '--width', dest='width', type='int',
-                      help="set output line width. Default: 76")
+                      help="set output line width (default 76)")
      parser.add_option('--no-wrap', dest='no_wrap', default=False,
                        action = 'store_true', help='do not break long message '
                        'lines, longer than the output line width, into several '
-                      'lines.')
+                      'lines')
      options, args = parser.parse_args(argv[1:])
      if not args:
          parser.error('incorrect number of arguments')
@@ -193,9 +194,9 @@ def main(argv=sys.argv):
              for filename, lineno, funcname, message in extracted:
                  messages.append((os.path.join(dirname, filename), lineno,
                                   funcname, message, None))
-        write_po(outfile, messages,
+        write_po(outfile, messages, width=options.width,
                   charset=options.charset, no_location=options.no_location,
-                 omit_header=options.omit_header, width=options.width)
+                 omit_header=options.omit_header)
      finally:
          if options.output:
              outfile.close()
diff --git a/babel/catalog/pofile.py b/babel/catalog/pofile.py

index 1bf3271b36157e182c2306c0e4850fd2457a6830..f557b04261928c86ef4d78c77777d23d1e694f9e 100644 (file)
--- a/babel/catalog/pofile.py
+++ b/babel/catalog/pofile.py
@@ -18,83 +18,19 @@ format.
         <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
  """
  
-# TODO: line wrapping
-from textwrap import wrap
  from datetime import date, datetime
  import re
  try:
      set
  except NameError:
      from sets import Set as set
+import textwrap
  import time
  
  from babel import __version__ as VERSION
  
  __all__ = ['escape', 'normalize', 'read_po', 'write_po']
  
-POT_HEADER = """\
-# Translations Template for %%(project)s.
-# Copyright (C) YEAR ORGANIZATION
-# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: %%(project)s %%(version)s\\n"
-"POT-Creation-Date: %%(creation_date)s\\n"
-"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
-"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
-"Language-Team: LANGUAGE <LL@li.org>\\n"
-"MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=%%(charset)s\\n"
-"Content-Transfer-Encoding: 8bit\\n"
-"Generated-By: Babel %s\\n"
-
-""" % VERSION
-
-PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
-
-def escape(string):
-    r"""Escape the given string so that it can be included in double-quoted
-    strings in ``PO`` files.
-    
-    >>> escape('''Say:
-    ...   "hello, world!"
-    ... ''')
-    'Say:\\n  \\"hello, world!\\"\\n'
-    
-    :param string: the string to escape
-    :return: the escaped string
-    :rtype: `str` or `unicode`
-    """
-    return string.replace('\\', '\\\\') \
-                 .replace('\t', '\\t') \
-                 .replace('\r', '\\r') \
-                 .replace('\n', '\\n') \
-                 .replace('\"', '\\"')
-
-def normalize(string, charset='utf-8'):
-    """This converts a string into a format that is appropriate for .po files,
-    namely much closer to C style.
-    
-    :param string: the string to normalize
-    :param charset: the encoding to use for `unicode` strings
-    :return: the normalized string
-    :rtype: `str`
-    """
-    string = string.encode(charset, 'backslashreplace')
-    lines = string.split('\n')
-    if len(lines) == 1:
-        string = '"' + escape(string) + '"'
-    else:
-        if not lines[-1]:
-            del lines[-1]
-            lines[-1] = lines[-1] + '\n'
-        for i in range(len(lines)):
-            lines[i] = escape(lines[i])
-        lineterm = '\\n"\n"'
-        string = '""\n"' + lineterm.join(lines) + '"'
-    return string
-
  def read_po(fileobj):
      """Read messages from a ``gettext`` PO (portable object) file from the given
      file-like object.
@@ -195,6 +131,114 @@ def read_po(fileobj):
      if messages:
          yield pack()
  
+POT_HEADER = """\
+# Translations Template for %%(project)s.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: %%(project)s %%(version)s\\n"
+"POT-Creation-Date: %%(creation_date)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=%%(charset)s\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+"Generated-By: Babel %s\\n"
+
+""" % VERSION
+
+PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
+
+WORD_SEP = re.compile('('
+    r'\s+|'                                 # any whitespace
+    r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+    r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)'   # em-dash
+')')
+
+def escape(string):
+    r"""Escape the given string so that it can be included in double-quoted
+    strings in ``PO`` files.
+    
+    >>> escape('''Say:
+    ...   "hello, world!"
+    ... ''')
+    '"Say:\\n  \\"hello, world!\\"\\n"'
+    
+    :param string: the string to escape
+    :return: the escaped string
+    :rtype: `str` or `unicode`
+    """
+    return '"%s"' % string.replace('\\', '\\\\') \
+                          .replace('\t', '\\t') \
+                          .replace('\r', '\\r') \
+                          .replace('\n', '\\n') \
+                          .replace('\"', '\\"')
+
+def normalize(string, width=76):
+    r"""This converts a string into a format that is appropriate for .po files.
+    
+    >>> print normalize('''Say:
+    ...   "hello, world!"
+    ... ''', width=None)
+    ""
+    "Say:\n"
+    "  \"hello, world!\"\n"
+    
+    >>> print normalize('''Say:
+    ...   "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+    ... ''', width=32)
+    ""
+    "Say:\n"
+    "  \"Lorem ipsum dolor sit "
+    "amet, consectetur adipisicing"
+    " elit, \"\n"
+    
+    :param string: the string to normalize
+    :param width: the maximum line width; use `None`, 0, or a negative number
+                  to completely disable line wrapping
+    :param charset: the encoding to use for `unicode` strings
+    :return: the normalized string
+    :rtype: `unicode`
+    """
+    if width and width > 0:
+        lines = []
+        for idx, line in enumerate(string.splitlines(True)):
+            if len(escape(line)) > width:
+                chunks = WORD_SEP.split(line)
+                chunks.reverse()
+                while chunks:
+                    buf = []
+                    size = 2
+                    while chunks:
+                        l = len(escape(chunks[-1])) - 2
+                        if size + l < width:
+                            buf.append(chunks.pop())
+                            size += l
+                        else:
+                            if not buf:
+                                # handle long chunks by putting them on a
+                                # separate line
+                                buf.append(chunks.pop())
+                            break
+                    lines.append(u''.join(buf))
+            else:
+                lines.append(line)
+    else:
+        lines = string.splitlines(True)
+
+    if len(lines) == 1:
+        return escape(string)
+
+    # Remove empty trailing line
+    if not lines[-1]:
+        del lines[-1]
+        lines[-1] += '\n'
+
+    return u'""\n' + u'\n'.join([escape(l) for l in lines])
+
  def write_po(fileobj, messages, project='PROJECT', version='VERSION', width=76,
               charset='utf-8', no_location=False, omit_header=False):
      r"""Write a ``gettext`` PO (portable object) file to the given file-like
@@ -230,16 +274,23 @@ def write_po(fileobj, messages, project='PROJECT', version='VERSION', width=76,
      :param messages: an iterable over the messages
      :param project: the project name
      :param version: the project version
+    :param width: the maximum line width for the generated output; use `None`,
+                  0, or a negative number to completely disable line wrapping
      :param charset: the encoding
      :param no_location: do not emit a location comment for every message
      :param omit_header: do not include the ``msgid ""`` entry at the top of the
                          output
      """
      def _normalize(key):
-        return normalize(key, charset=charset)
+        return normalize(key, width=width).encode(charset, 'backslashreplace')
+
+    def _write(text):
+        if isinstance(text, unicode):
+            text = text.encode(charset)
+        fileobj.write(text)
  
      if not omit_header:
-        fileobj.write(POT_HEADER % {
+        _write(POT_HEADER % {
              'project': project,
              'version': version,
              'creation_date': time.strftime('%Y-%m-%d %H:%M%z'),
@@ -268,53 +319,22 @@ def write_po(fileobj, messages, project='PROJECT', version='VERSION', width=76,
  
      for msgid in msgids:
          if not no_location:
-            locs = [
-                u' %s:%s' % (fname, lineno) for
-                fname, lineno in locations[msgid]
-            ]
-            if width > 0:
-                wrapped = wrap(u''.join(locs), width, break_long_words=False)
-            else:
-                wrapped = locs
-            for line in wrapped:
-                fileobj.write(u'#: %s\n' % line.strip())
+            locs = u' '.join([u'%s:%d' % item for item in locations[msgid]])
+            if width and width > 0:
+                locs = textwrap.wrap(locs, width, break_long_words=False)
+            for line in locs:
+                _write('#: %s\n' % line.strip())
          flags = msgflags[msgid]
          if flags:
-            fileobj.write('#%s\n' % ', '.join([''] + list(flags)))
+            _write('#%s\n' % ', '.join([''] + list(flags)))
+
          if type(msgid) is tuple:
              assert len(msgid) == 2
-            if width > 0:
-                wrapped = wrap(msgid[0], width, break_long_words=False)
-            else:
-                wrapped = [msgid[0]]
-            if len(wrapped) == 1:
-                fileobj.write('msgid ')
-            else:
-                fileobj.write('msgid ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            if width > 0:
-                wrapped = wrap(msgid[1], width, break_long_words=False)
-            else:
-                wrapped = [msgid[1]]
-            if len(wrapped) == 1:
-                fileobj.write('msgid_plural ')
-            else:
-                fileobj.write('msgid_plural ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            fileobj.write('msgstr[0] ""\n')
-            fileobj.write('msgstr[1] ""\n')
+            _write('msgid %s\n' % _normalize(msgid[0]))
+            _write('msgid_plural %s\n' % _normalize(msgid[1]))
+            _write('msgstr[0] ""\n')
+            _write('msgstr[1] ""\n')
          else:
-            if width > 0:
-                wrapped = wrap(msgid, width, break_long_words=False)
-            else:
-                wrapped = [msgid]
-            if len(wrapped) == 1:
-                fileobj.write('msgid ')
-            else:
-                fileobj.write('msgid ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            fileobj.write('msgstr ""\n')
-        fileobj.write('\n')
+            _write('msgid %s\n' % _normalize(msgid))
+            _write('msgstr ""\n')
+        _write('\n')
diff --git a/babel/catalog/tests/pofile.py b/babel/catalog/tests/pofile.py

index b0f08e77f461877673c9dfc569e9596238e28d4b..78d7fa03f7bde335f986a1bf2c71f07f59b39491 100644 (file)
--- a/babel/catalog/tests/pofile.py
+++ b/babel/catalog/tests/pofile.py
@@ -12,12 +12,13 @@
  # history and logs, available at http://babel.edgewall.org/log/.
  
  import doctest
+from StringIO import StringIO
  import unittest
  
  from babel.catalog import pofile
  
  
-class PythonFormatFlagUnitTest(unittest.TestCase):
+class PythonFormatFlagTestCase(unittest.TestCase):
  
      def test_without_name(self):
          assert pofile.PYTHON_FORMAT('foo %d bar')
@@ -25,10 +26,59 @@ class PythonFormatFlagUnitTest(unittest.TestCase):
          assert pofile.PYTHON_FORMAT('foo %r bar')
  
  
+class WritePoTestCase(unittest.TestCase):
+
+    def test_join_locations(self):
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, u'foo', None),
+            ('utils.py', 3, None, u'foo', None),
+        ], omit_header=True)
+        self.assertEqual('''#: main.py:1 utils.py:3
+msgid "foo"
+msgstr ""''', buf.getvalue().strip())
+
+    def test_wrap_long_lines(self):
+        text = """Here's some text where       
+white space and line breaks matter, and should
+
+not be removed
+
+"""
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, text, None),
+        ], no_location=True, omit_header=True, width=42)
+        self.assertEqual(r'''msgid ""
+"Here's some text where       \n"
+"white space and line breaks matter, and"
+" should\n"
+"\n"
+"not be removed\n"
+"\n"
+msgstr ""''', buf.getvalue().strip())
+
+    def test_wrap_long_lines_with_long_word(self):
+        text = """Here's some text that
+includesareallylongwordthatmightbutshouldnt throw us into an infinite loop
+"""
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, text, None),
+        ], no_location=True, omit_header=True, width=32)
+        self.assertEqual(r'''msgid ""
+"Here's some text that\n"
+"includesareallylongwordthatmightbutshouldnt"
+" throw us into an infinite "
+"loop\n"
+msgstr ""''', buf.getvalue().strip())
+
+
  def suite():
      suite = unittest.TestSuite()
      suite.addTest(doctest.DocTestSuite(pofile))
-    suite.addTest(unittest.makeSuite(PythonFormatFlagUnitTest))
+    suite.addTest(unittest.makeSuite(PythonFormatFlagTestCase))
+    suite.addTest(unittest.makeSuite(WritePoTestCase))
      return suite
  
  if __name__ == '__main__':
diff --git a/doc/style/epydoc.css b/doc/style/epydoc.css

index cbe04c4286a119cfa0272e11d06fd4caae86eeef..87c71af7fb952ba6264592a0ba309d73d25cd064 100644 (file)
--- a/doc/style/epydoc.css
+++ b/doc/style/epydoc.css
@@ -54,7 +54,9 @@ table.summary th, table.summary td { border: 1px solid #d7d7d7; }
  table.summary th th, table.summary td td { border: none; }
  table.summary td.summary table td { color: #666; font-size: 90%; }
  table.summary td.summary table br { display: none; }
-table.summary td.summary span.summary-type { font-size: 90%; }
+table.summary td.summary span.summary-type { font-family: monospace; 
+  font-size: 90%;
+}
  table.summary td.summary span.summary-type code { font-size: 110%; }
  p.indent-wrapped-lines { color: #999; font-size: 85%; margin: 0;
    padding: 0 0 0 7em; text-indent: -7em;
author	Christopher Lenz <cmlenz@gmail.com>
	Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)
committer	Christopher Lenz <cmlenz@gmail.com>
	Fri, 1 Jun 2007 15:36:00 +0000 (15:36 +0000)
babel/catalog/frontend.py		patch \| blob \| blame \| history
babel/catalog/pofile.py		patch \| blob \| blame \| history
babel/catalog/tests/pofile.py		patch \| blob \| blame \| history
doc/style/epydoc.css		patch \| blob \| blame \| history