Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po...

author Christopher Lenz <cmlenz@gmail.com>

Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)

committer Christopher Lenz <cmlenz@gmail.com>

Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)
author Christopher Lenz <cmlenz@gmail.com>
Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)
committer Christopher Lenz <cmlenz@gmail.com>
Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)
diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py

index 1d20b4bd37b6f439359df709dc28de595b647007..8df97e98505816a43cb038645568494a059ff1a2 100644 (file)
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -14,6 +14,7 @@
  """Data structures for message catalogs."""
  
  from datetime import datetime
+from email import message_from_string
  import re
  try:
      set
@@ -24,7 +25,7 @@ import time
  from babel import __version__ as VERSION
  from babel.core import Locale
  from babel.messages.plurals import PLURALS
-from babel.util import odict, LOCALTZ, UTC
+from babel.util import odict, LOCALTZ, UTC, FixedOffsetTimezone
  
  __all__ = ['Message', 'Catalog']
  __docformat__ = 'restructuredtext en'
@@ -45,7 +46,8 @@ class Message(object):
                         ``(singular, plural)`` tuple for pluralizable messages
          :param locations: a sequence of ``(filenname, lineno)`` tuples
          :param flags: a set or sequence of flags
-        :param comments: a sequence of translator comments for the message
+        :param auto_comments: a sequence of automatic comments for the message
+        :param user_comments: a sequence of user comments for the message
          """
          self.id = id
          if not string and self.pluralizable:
@@ -149,7 +151,10 @@ class Catalog(object):
          self.version = version or 'VERSION' #: the project version
          self.copyright_holder = copyright_holder or 'ORGANIZATION'
          self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
-        self.last_translator = last_translator #: last translator name + email
+
+        self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
+        """Name and email address of the last translator."""
+
          self.charset = charset or 'utf-8'
  
          if creation_date is None:
@@ -186,11 +191,11 @@ class Catalog(object):
      # This file is distributed under the same license as the Foobar project.
      # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
      #
-
+    
      :type: `unicode`
      """)
  
-    def mime_headers(self):
+    def _get_mime_headers(self):
          headers = []
          headers.append(('Project-Id-Version',
                          '%s %s' % (self.project, self.version)))
@@ -213,7 +218,28 @@ class Catalog(object):
          headers.append(('Content-Transfer-Encoding', '8bit'))
          headers.append(('Generated-By', 'Babel %s\n' % VERSION))
          return headers
-    mime_headers = property(mime_headers, doc="""\
+
+    def _set_mime_headers(self, headers):
+        for name, value in headers:
+            name = name.lower()
+            if name == 'project-id-version':
+                parts = value.split(' ')
+                self.project = ' '.join(parts[:-1])
+                self.version = parts[-1]
+            elif name == 'report-msgid-bugs-to':
+                self.msgid_bugs_address = value
+            elif name == 'last-translator':
+                self.last_translator = value
+            elif name == 'pot-creation-date':
+                # FIXME: this should use dates.parse_datetime as soon as that
+                #        is ready
+                value, tzoffset, _ = re.split('[+-](\d{4})$', value, 1)
+                tt = time.strptime(value, '%Y-%m-%d %H:%M')
+                ts = time.mktime(tt)
+                tzoffset = FixedOffsetTimezone(int(tzoffset))
+                self.creation_date = datetime.fromtimestamp(ts, tzoffset)
+
+    mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
      The MIME headers of the catalog, used for the special ``msgid ""`` entry.
      
      The behavior of this property changes slightly depending on whether a locale
@@ -380,6 +406,10 @@ class Catalog(object):
              current.user_comments.extend(message.user_comments)
              current.flags |= message.flags
              message = current
+        elif id == '':
+            # special treatment for the header message
+            headers = message_from_string(message.string.encode(self.charset))
+            self.mime_headers = headers.items()
          else:
              if isinstance(id, (list, tuple)):
                  assert isinstance(message.string, (list, tuple))
@@ -403,7 +433,8 @@ class Catalog(object):
                         ``(singular, plural)`` tuple for pluralizable messages
          :param locations: a sequence of ``(filenname, lineno)`` tuples
          :param flags: a set or sequence of flags
-        :param comments: a list of translator comments
+        :param auto_comments: a sequence of automatic comments
+        :param user_comments: a sequence of user comments
          """
          self[id] = Message(id, string, list(locations), flags, auto_comments,
                             user_comments)
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py

index 6a00df68c976e58cd6524821729aba8bfa3753c1..d533f928470feef6ed243aaf178d095b44e5ffd1 100644 (file)
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -37,12 +37,7 @@ def read_po(fileobj):
      file-like object and return a `Catalog`.
      
      >>> from StringIO import StringIO
-    >>> buf = StringIO('''# Translations template for PROJECT.
-    ... # Copyright (C) YEAR COPYRIGHT HOLDER
-    ... # This file is distributed under the same license as the PROJECT project.
-    ... # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-    ... #
-    ... 
+    >>> buf = StringIO('''
      ... #: main.py:1
      ... #, fuzzy, python-format
      ... msgid "foo %(name)s"
@@ -59,12 +54,6 @@ def read_po(fileobj):
      >>> catalog = read_po(buf)
      >>> catalog.revision_date = datetime(2007, 04, 01)
      
-    >>> print catalog.header_comment
-    # Translations template for PROJECT.
-    # Copyright (C) 2007 ORGANIZATION
-    # This file is distributed under the same license as the PROJECT project.
-    # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
-    
      >>> for message in catalog:
      ...     if message.id:
      ...         print (message.id, message.string)
@@ -90,19 +79,17 @@ def read_po(fileobj):
      user_comments = []
      auto_comments = []
      in_msgid = in_msgstr = False
-    in_header = True
-    header_lines = []
  
      def _add_message():
          translations.sort()
          if len(messages) > 1:
-            msgid = tuple(messages)
+            msgid = tuple([denormalize(m) for m in messages])
          else:
-            msgid = messages[0]
+            msgid = denormalize(messages[0])
          if len(translations) > 1:
-            string = tuple([t[1] for t in translations])
+            string = tuple([denormalize(t[1]) for t in translations])
          else:
-            string = translations[0][1]
+            string = denormalize(translations[0][1])
          catalog.add(msgid, string, list(locations), set(flags),
                      list(user_comments), list(auto_comments))
          del messages[:]; del translations[:]; del locations[:];
@@ -111,59 +98,53 @@ def read_po(fileobj):
      for line in fileobj.readlines():
          line = line.strip()
          if line.startswith('#'):
-            if in_header and line[1:].startswith(' '):
-                header_lines.append(line)
-            else:
-                in_header = in_msgid = in_msgstr = False
-                if messages:
-                    _add_message()
-                if line[1:].startswith(':'):
-                    for location in line[2:].lstrip().split():
-                        filename, lineno = location.split(':', 1)
-                        locations.append((filename, int(lineno)))
-                elif line[1:].startswith(','):
-                    for flag in line[2:].lstrip().split(','):
-                        flags.append(flag.strip())
-                elif line[1:].startswith('.'):
-                    # These are called auto-comments
-                    comment = line[2:].strip()
-                    if comment:
-                        # Just check that we're not adding empty comments
-                        auto_comments.append(comment)
-                elif line[1:].startswith(' '):
-                    # These are called user comments
-                    comment = line[1:].strip()
-                    if comment:
-                        # Just check that we're not adding empty comments
-                        user_comments.append(comment)
+            in_msgid = in_msgstr = False
+            if messages:
+                _add_message()
+            if line[1:].startswith(':'):
+                for location in line[2:].lstrip().split():
+                    filename, lineno = location.split(':', 1)
+                    locations.append((filename, int(lineno)))
+            elif line[1:].startswith(','):
+                for flag in line[2:].lstrip().split(','):
+                    flags.append(flag.strip())
+            elif line[1:].startswith('.'):
+                # These are called auto-comments
+                comment = line[2:].strip()
+                if comment:
+                    # Just check that we're not adding empty comments
+                    auto_comments.append(comment)
+            elif line[1:].startswith(' '):
+                # These are called user comments
+                comment = line[1:].strip()
+                if comment:
+                    # Just check that we're not adding empty comments
+                    user_comments.append(comment)
          else:
-            in_header = False
              if line.startswith('msgid_plural'):
                  in_msgid = True
                  msg = line[12:].lstrip()
-                messages.append(msg[1:-1])
+                messages.append(msg)
              elif line.startswith('msgid'):
                  in_msgid = True
                  if messages:
                      _add_message()
-                msg = line[5:].lstrip()
-                messages.append(msg[1:-1])
+                messages.append(line[5:].lstrip())
              elif line.startswith('msgstr'):
                  in_msgid = False
                  in_msgstr = True
                  msg = line[6:].lstrip()
                  if msg.startswith('['):
                      idx, msg = msg[1:].split(']')
-                    translations.append([int(idx), msg.lstrip()[1:-1]])
+                    translations.append([int(idx), msg.lstrip()])
                  else:
-                    translations.append([0, msg[1:-1]])
+                    translations.append([0, msg])
              elif line.startswith('"'):
                  if in_msgid:
-                    messages[-1] += line.rstrip()[1:-1]
+                    messages[-1] += u'\n' + line.rstrip()
                  elif in_msgstr:
-                    translations[-1][1] += line.rstrip()[1:-1]
+                    translations[-1][1] += u'\n' + line.rstrip()
  
-    catalog.header_comment = '\n'.join(header_lines)
      if messages:
          _add_message()
      return catalog
@@ -193,8 +174,26 @@ def escape(string):
                            .replace('\n', '\\n') \
                            .replace('\"', '\\"')
  
+def unescape(string):
+    r"""Reverse escape the given string.
+    
+    >>> print unescape('"Say:\\n  \\"hello, world!\\"\\n"')
+    Say:
+      "hello, world!"
+    <BLANKLINE>
+    
+    :param string: the string to unescape
+    :return: the unescaped string
+    :rtype: `str` or `unicode`
+    """
+    return string[1:-1].replace('\\\\', '\\') \
+                       .replace('\\t', '\t') \
+                       .replace('\\r', '\r') \
+                       .replace('\\n', '\n') \
+                       .replace('\\"', '\"')
+
  def normalize(string, width=76):
-    r"""This converts a string into a format that is appropriate for .po files.
+    r"""Convert a string into a format that is appropriate for .po files.
      
      >>> print normalize('''Say:
      ...   "hello, world!"
@@ -253,6 +252,37 @@ def normalize(string, width=76):
          lines[-1] += '\n'
      return u'""\n' + u'\n'.join([escape(l) for l in lines])
  
+def denormalize(string):
+    r"""Reverse the normalization done by the `normalize` function.
+    
+    >>> print denormalize(r'''""
+    ... "Say:\n"
+    ... "  \"hello, world!\"\n"''')
+    Say:
+      "hello, world!"
+    <BLANKLINE>
+    
+    >>> print denormalize(r'''""
+    ... "Say:\n"
+    ... "  \"Lorem ipsum dolor sit "
+    ... "amet, consectetur adipisicing"
+    ... " elit, \"\n"''')
+    Say:
+      "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+    <BLANKLINE>
+    
+    :param string: the string to denormalize
+    :return: the denormalized string
+    :rtype: `unicode` or `str`
+    """
+    if string.startswith('""'):
+        lines = []
+        for line in string.splitlines()[1:]:
+            lines.append(unescape(line))
+        return ''.join(lines)
+    else:
+        return unescape(string)
+
  def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
               sort_output=False, sort_by_file=False):
      r"""Write a ``gettext`` PO (portable object) template file for a given
diff --git a/babel/messages/tests/pofile.py b/babel/messages/tests/pofile.py

index ddd8dc4b52180957e48d9c9eac9acc66c484d404..8120e6bb33c41d848a0135cd0abd9c5c2debbbd6 100644 (file)
--- a/babel/messages/tests/pofile.py
+++ b/babel/messages/tests/pofile.py
@@ -20,6 +20,23 @@ from babel.messages.catalog import Catalog
  from babel.messages import pofile
  
  
+class ReadPoTestCase(unittest.TestCase):
+
+    def test_read_multiline(self):
+        buf = StringIO(r'''msgid ""
+"Here's some text that\n"
+"includesareallylongwordthatmightbutshouldnt"
+" throw us into an infinite "
+"loop\n"
+msgstr ""''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(1, len(catalog))
+        message = list(catalog)[1]
+        self.assertEqual("Here's some text that\nincludesareallylongwordthat"
+                         "mightbutshouldnt throw us into an infinite loop\n",
+                         message.id)
+
+
  class WritePoTestCase(unittest.TestCase):
  
      def test_join_locations(self):
@@ -110,6 +127,7 @@ msgstr ""''', buf.getvalue().strip())
  def suite():
      suite = unittest.TestSuite()
      suite.addTest(doctest.DocTestSuite(pofile))
+    suite.addTest(unittest.makeSuite(ReadPoTestCase))
      suite.addTest(unittest.makeSuite(WritePoTestCase))
      return suite
  
diff --git a/babel/util.py b/babel/util.py

index 06c3a1e3fe2be5276970aec02caa802299569e84..a0eedf4e0b2f210d7a18a187975f832b813afffe 100644 (file)
--- a/babel/util.py
+++ b/babel/util.py
@@ -142,30 +142,38 @@ except AttributeError:
          rel_list = [os.path.pardir] * (len(start_list) - i) + path_list[i:]
          return os.path.join(*rel_list)
  
-try:
-    from pytz import UTC
-except ImportError:
-    ZERO = timedelta(0)
+ZERO = timedelta(0)
  
-    class UTC(tzinfo):
-        """Simple `tzinfo` implementation for UTC."""
  
-        def __repr__(self):
-            return '<UTC>'
+class FixedOffsetTimezone(tzinfo):
+    """Fixed offset in minutes east from UTC."""
  
-        def __str__(self):
-            return 'UTC'
+    def __init__(self, offset, name=None):
+        self._offset = timedelta(minutes=offset)
+        if name is None:
+            name = 'Etc/GMT+%d' % offset
+        self.zone = name
  
-        def utcoffset(self, dt):
-            return ZERO
+    def __str__(self):
+        return self.zone
  
-        def tzname(self, dt):
-            return 'UTC'
+    def __repr__(self):
+        return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
  
-        def dst(self, dt):
-            return ZERO
+    def utcoffset(self, dt):
+        return self._offset
+
+    def tzname(self, dt):
+        return self.zone
  
-    UTC = UTC()
+    def dst(self, dt):
+        return ZERO
+
+
+try:
+    from pytz import UTC
+except ImportError:
+    UTC = FixedOffsetTimezone(0, 'UTC')
      """`tzinfo` object for UTC (Universal Time).
      
      :type: `tzinfo`
@@ -179,6 +187,7 @@ else:
  
  DSTDIFF = DSTOFFSET - STDOFFSET
  
+
  class LocalTimezone(tzinfo):
  
      def utcoffset(self, dt):
@@ -204,6 +213,7 @@ class LocalTimezone(tzinfo):
          tt = time.localtime(stamp)
          return tt.tm_isdst > 0
  
+
  LOCALTZ = LocalTimezone()
  """`tzinfo` object for local time-zone.
author	Christopher Lenz <cmlenz@gmail.com>
	Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)
committer	Christopher Lenz <cmlenz@gmail.com>
	Thu, 14 Jun 2007 09:49:00 +0000 (09:49 +0000)
babel/messages/catalog.py		patch \| blob \| blame \| history
babel/messages/pofile.py		patch \| blob \| blame \| history
babel/messages/tests/pofile.py		patch \| blob \| blame \| history
babel/util.py		patch \| blob \| blame \| history