From: Christopher Lenz Date: Tue, 10 Jun 2008 17:05:52 +0000 (+0000) Subject: Add basic MO file reading in preparation for #54. X-Git-Tag: 1.0~341 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f48581982a335b430f40ed0d5f6c3341d6b17aee;p=thirdparty%2Fbabel.git Add basic MO file reading in preparation for #54. --- diff --git a/babel/messages/mofile.py b/babel/messages/mofile.py index bc0f3a8c..47f04973 100644 --- a/babel/messages/mofile.py +++ b/babel/messages/mofile.py @@ -21,9 +21,109 @@ import array import struct -__all__ = ['write_mo'] +from babel.messages.catalog import Catalog, Message + +__all__ = ['read_mo', 'write_mo'] __docformat__ = 'restructuredtext en' + +LE_MAGIC = 0x950412deL +BE_MAGIC = 0xde120495L + +def read_mo(fileobj): + """Read a binary MO file from the given file-like object and return a + corresponding `Catalog` object. + + :param fileobj: the file-like object to read the MO file from + :return: a catalog object representing the parsed MO file + :rtype: `Catalog` + + :note: The implementation of this function is heavily based on the + ``GNUTranslations._parse`` method of the ``gettext`` module in the + standard library. + """ + catalog = Catalog() + headers = {} + + unpack = struct.unpack + filename = getattr(fileobj, 'name', '') + charset = None + + buf = fileobj.read() + buflen = len(buf) + + # Parse the .mo file header, which consists of 5 little endian 32 + # bit words. + magic = unpack('4I', buf[4:20]) + ii = '>II' + else: + raise IOError(0, 'Bad magic number', filename) + + # Now put all messages from the .mo file buffer into the catalog + # dictionary + for i in xrange(0, msgcount): + mlen, moff = unpack(ii, buf[masteridx:masteridx + 8]) + mend = moff + mlen + tlen, toff = unpack(ii, buf[transidx:transidx + 8]) + tend = toff + tlen + if mend < buflen and tend < buflen: + msg = buf[moff:mend] + tmsg = buf[toff:tend] + else: + raise IOError(0, 'File is corrupt', filename) + + # See if we're looking at GNU .mo conventions for metadata + if mlen == 0: + # Catalog description + lastkey = key = None + for item in tmsg.splitlines(): + item = item.strip() + if not item: + continue + if ':' in item: + key, value = item.split(':', 1) + lastkey = key = key.strip().lower() + value = value.strip() + headers[key] = value + if key == 'content-type': + charset = value.split('charset=')[1] + elif lastkey: + self._info[lastkey] += '\n' + item + + # Note: we unconditionally convert both msgids and msgstrs to + # Unicode using the character encoding specified in the charset + # parameter of the Content-Type header. The gettext documentation + # strongly encourages msgids to be us-ascii, but some appliations + # require alternative encodings (e.g. Zope's ZCML and ZPT). For + # traditional gettext applications, the msgid conversion will + # cause no problems since us-ascii should always be a subset of + # the charset encoding. We may want to fall back to 8-bit msgids + # if the Unicode conversion fails. + if '\x00' in msg: + # Plural forms + msg = msg.split('\x00') + tmsg = tmsg.split('\x00') + if charset: + msg = [unicode(x, charset) for x in msg] + tmsg = [unicode(x, charset) for x in tmsg] + else: + if charset: + msg = unicode(msg, charset) + tmsg = unicode(tmsg, charset) + catalog[msg] = Message(msg, tmsg) + + # advance to next entry in the seek tables + masteridx += 8 + transidx += 8 + + catalog.mime_headers = headers.items() + return catalog + def write_mo(fileobj, catalog, use_fuzzy=False): """Write a catalog to the specified file-like object using the GNU MO file format. @@ -112,7 +212,7 @@ def write_mo(fileobj, catalog, use_fuzzy=False): offsets = koffsets + voffsets fileobj.write(struct.pack('Iiiiiii', - 0x950412deL, # magic + LE_MAGIC, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 2b041a50..3e73e134 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -122,8 +122,8 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): means it's a template) :param domain: the message domain :param ignore_obsolete: whether to ignore obsolete messages in the input - :return: an iterator over ``(message, translation, location)`` tuples - :rtype: ``iterator`` + :return: a catalog object representing the parsed PO file + :rtype: `Catalog` """ catalog = Catalog(locale=locale, domain=domain) diff --git a/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo new file mode 100644 index 00000000..21b17270 Binary files /dev/null and b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo differ diff --git a/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.po b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.po new file mode 100644 index 00000000..c5c97489 --- /dev/null +++ b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.po @@ -0,0 +1,32 @@ +# German (Germany) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: 2007-07-30 22:18+0200\n" +"Last-Translator: FULL NAME \n" +"Language-Team: de_DE \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 0.9dev-r245\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "Stange" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "Fuhstange" +msgstr[1] "Fuhstangen" + diff --git a/babel/messages/tests/mofile.py b/babel/messages/tests/mofile.py index 3199c75f..5a32b4cd 100644 --- a/babel/messages/tests/mofile.py +++ b/babel/messages/tests/mofile.py @@ -13,11 +13,33 @@ import doctest import gettext +import os import unittest from StringIO import StringIO from babel.messages import mofile, Catalog + +class ReadMoTestCase(unittest.TestCase): + + def setUp(self): + self.datadir = os.path.join(os.path.dirname(__file__), 'data') + + def test_basics(self): + mo_file = open(os.path.join(self.datadir, 'project', 'i18n', 'de', + 'LC_MESSAGES', 'messages.mo')) + try: + catalog = mofile.read_mo(mo_file) + self.assertEqual(2, len(catalog)) + self.assertEqual('TestProject', catalog.project) + self.assertEqual('0.1', catalog.version) + self.assertEqual('Stange', catalog['bar'].string) + self.assertEqual(['Fuhstange', 'Fuhstangen'], + catalog['foobar'].string) + finally: + mo_file.close() + + class WriteMoTestCase(unittest.TestCase): def test_sorting(self): @@ -57,6 +79,7 @@ class WriteMoTestCase(unittest.TestCase): def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(mofile)) + suite.addTest(unittest.makeSuite(ReadMoTestCase)) suite.addTest(unittest.makeSuite(WriteMoTestCase)) return suite