import array
import struct
-__all__ = ['write_mo']
+from babel.messages.catalog import Catalog, Message
+
+__all__ = ['read_mo', 'write_mo']
__docformat__ = 'restructuredtext en'
+
+LE_MAGIC = 0x950412deL
+BE_MAGIC = 0xde120495L
+
+def read_mo(fileobj):
+ """Read a binary MO file from the given file-like object and return a
+ corresponding `Catalog` object.
+
+ :param fileobj: the file-like object to read the MO file from
+ :return: a catalog object representing the parsed MO file
+ :rtype: `Catalog`
+
+ :note: The implementation of this function is heavily based on the
+ ``GNUTranslations._parse`` method of the ``gettext`` module in the
+ standard library.
+ """
+ catalog = Catalog()
+ headers = {}
+
+ unpack = struct.unpack
+ filename = getattr(fileobj, 'name', '')
+ charset = None
+
+ buf = fileobj.read()
+ buflen = len(buf)
+
+ # Parse the .mo file header, which consists of 5 little endian 32
+ # bit words.
+ magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
+ if magic == LE_MAGIC:
+ version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
+ ii = '<II'
+ elif magic == BE_MAGIC:
+ version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
+ ii = '>II'
+ else:
+ raise IOError(0, 'Bad magic number', filename)
+
+ # Now put all messages from the .mo file buffer into the catalog
+ # dictionary
+ for i in xrange(0, msgcount):
+ mlen, moff = unpack(ii, buf[masteridx:masteridx + 8])
+ mend = moff + mlen
+ tlen, toff = unpack(ii, buf[transidx:transidx + 8])
+ tend = toff + tlen
+ if mend < buflen and tend < buflen:
+ msg = buf[moff:mend]
+ tmsg = buf[toff:tend]
+ else:
+ raise IOError(0, 'File is corrupt', filename)
+
+ # See if we're looking at GNU .mo conventions for metadata
+ if mlen == 0:
+ # Catalog description
+ lastkey = key = None
+ for item in tmsg.splitlines():
+ item = item.strip()
+ if not item:
+ continue
+ if ':' in item:
+ key, value = item.split(':', 1)
+ lastkey = key = key.strip().lower()
+ value = value.strip()
+ headers[key] = value
+ if key == 'content-type':
+ charset = value.split('charset=')[1]
+ elif lastkey:
+ self._info[lastkey] += '\n' + item
+
+ # Note: we unconditionally convert both msgids and msgstrs to
+ # Unicode using the character encoding specified in the charset
+ # parameter of the Content-Type header. The gettext documentation
+ # strongly encourages msgids to be us-ascii, but some appliations
+ # require alternative encodings (e.g. Zope's ZCML and ZPT). For
+ # traditional gettext applications, the msgid conversion will
+ # cause no problems since us-ascii should always be a subset of
+ # the charset encoding. We may want to fall back to 8-bit msgids
+ # if the Unicode conversion fails.
+ if '\x00' in msg:
+ # Plural forms
+ msg = msg.split('\x00')
+ tmsg = tmsg.split('\x00')
+ if charset:
+ msg = [unicode(x, charset) for x in msg]
+ tmsg = [unicode(x, charset) for x in tmsg]
+ else:
+ if charset:
+ msg = unicode(msg, charset)
+ tmsg = unicode(tmsg, charset)
+ catalog[msg] = Message(msg, tmsg)
+
+ # advance to next entry in the seek tables
+ masteridx += 8
+ transidx += 8
+
+ catalog.mime_headers = headers.items()
+ return catalog
+
def write_mo(fileobj, catalog, use_fuzzy=False):
"""Write a catalog to the specified file-like object using the GNU MO file
format.
offsets = koffsets + voffsets
fileobj.write(struct.pack('Iiiiiii',
- 0x950412deL, # magic
+ LE_MAGIC, # magic
0, # version
len(messages), # number of entries
7 * 4, # start of key index
--- /dev/null
+# German (Germany) translations for TestProject.
+# Copyright (C) 2007 FooBar, Inc.
+# This file is distributed under the same license as the TestProject
+# project.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: TestProject 0.1\n"
+"Report-Msgid-Bugs-To: bugs.address@email.tld\n"
+"POT-Creation-Date: 2007-04-01 15:30+0200\n"
+"PO-Revision-Date: 2007-07-30 22:18+0200\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: de_DE <LL@li.org>\n"
+"Plural-Forms: nplurals=2; plural=(n != 1)\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 0.9dev-r245\n"
+
+#. This will be a translator coment,
+#. that will include several lines
+#: project/file1.py:8
+msgid "bar"
+msgstr "Stange"
+
+#: project/file2.py:9
+msgid "foobar"
+msgid_plural "foobars"
+msgstr[0] "Fuhstange"
+msgstr[1] "Fuhstangen"
+
import doctest
import gettext
+import os
import unittest
from StringIO import StringIO
from babel.messages import mofile, Catalog
+
+class ReadMoTestCase(unittest.TestCase):
+
+ def setUp(self):
+ self.datadir = os.path.join(os.path.dirname(__file__), 'data')
+
+ def test_basics(self):
+ mo_file = open(os.path.join(self.datadir, 'project', 'i18n', 'de',
+ 'LC_MESSAGES', 'messages.mo'))
+ try:
+ catalog = mofile.read_mo(mo_file)
+ self.assertEqual(2, len(catalog))
+ self.assertEqual('TestProject', catalog.project)
+ self.assertEqual('0.1', catalog.version)
+ self.assertEqual('Stange', catalog['bar'].string)
+ self.assertEqual(['Fuhstange', 'Fuhstangen'],
+ catalog['foobar'].string)
+ finally:
+ mo_file.close()
+
+
class WriteMoTestCase(unittest.TestCase):
def test_sorting(self):
def suite():
suite = unittest.TestSuite()
suite.addTest(doctest.DocTestSuite(mofile))
+ suite.addTest(unittest.makeSuite(ReadMoTestCase))
suite.addTest(unittest.makeSuite(WriteMoTestCase))
return suite