Initial revision of gettext support for Python. This will undergo

author Barry Warsaw <barry@python.org>

Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)

committer Barry Warsaw <barry@python.org>

Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)
author Barry Warsaw <barry@python.org>
Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)
committer Barry Warsaw <barry@python.org>
Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)
diff --git a/Lib/gettext.py b/Lib/gettext.py

new file mode 100644 (file)

index 0000000..a7f89b2
--- /dev/null
+++ b/Lib/gettext.py
@@ -0,0 +1,288 @@
+"""Internationalization and localization support.
+
+This module provides internationalization (I18N) and localization (L10N)
+support for your Python programs by providing an interface to the GNU gettext
+message catalog library.
+
+I18N refers to the operation by which a program is made aware of multiple
+languages.  L10N refers to the adaptation of your program, once
+internationalized, to the local language and cultural habits.  In order to
+provide multilingual messages for your Python programs, you need to take the
+following steps:
+
+    - prepare your program by specially marking translatable strings
+    - run a suite of tools over your marked program files to generate raw
+      messages catalogs
+    - create language specific translations of the message catalogs
+    - use this module so that message strings are properly translated
+
+In order to prepare your program for I18N, you need to look at all the strings
+in your program.  Any string that needs to be translated should be marked by
+wrapping it in _('...') -- i.e. a call to the function `_'.  For example:
+
+    filename = 'mylog.txt'
+    message = _('writing a log message')
+    fp = open(filename, 'w')
+    fp.write(message)
+    fp.close()
+
+In this example, the string `writing a log message' is marked as a candidate
+for translation, while the strings `mylog.txt' and `w' are not.
+
+The GNU gettext package provides a tool, called xgettext, that scans C and C++
+source code looking for these specially marked strings.  xgettext generates
+what are called `.pot' files, essentially structured human readable files
+which contain every marked string in the source code.  These .pot files are
+copied and handed over to translators who write language-specific versions for
+every supported language.
+
+For I18N Python programs however, xgettext won't work; it doesn't understand
+the myriad of string types support by Python.  The standard Python
+distribution provides a tool called pygettext that does though (found in the
+Tools/i18n directory).  This is a command line script that supports a similar
+interface as xgettext; see its documentation for details.  Once you've used
+pygettext to create your .pot files, you can use the standard GNU gettext
+tools to generate your machine-readable .mo files, which are what's used by
+this module.
+
+In the simple case, to use this module then, you need only add the following
+bit of code to the main driver file of your application:
+
+    import gettext
+    gettext.install()
+
+This sets everything up so that your _('...') function calls Just Work.  In
+other words, it installs `_' in the builtins namespace for convenience.  You
+can skip this step and do it manually by the equivalent code:
+
+    import gettext
+    import __builtin__
+    __builtin__['_'] = gettext.gettext
+
+Once you've done this, you probably want to call bindtextdomain() and
+textdomain() to get the domain set up properly.  Again, for convenience, you
+can pass the domain and localedir to install to set everything up in one fell
+swoop:
+
+    import gettext
+    gettext.install('mydomain', '/my/locale/dir')
+
+If your program needs to support many languages at the same time, you will
+want to create Translation objects explicitly, like so:
+
+    import gettext
+    gettext.install()
+
+    lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
+    lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
+    lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
+
+    gettext.set(lang1)
+    # all _() will now translate to language 1
+    gettext.set(lang2)
+    # all _() will now translate to language 2
+
+Currently, only GNU gettext format binary .mo files are supported.
+
+"""
+
+# This module represents the integration of work from the following authors:
+#
+# Martin von Loewis, who wrote the initial implementation of the underlying
+# C-based libintlmodule (later renamed _gettext), along with a skeletal
+# gettext.py implementation.
+#
+# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
+# which also included a pure-Python implementation to read .mo files if
+# intlmodule wasn't available.
+#
+# James Henstridge, who also wrote a gettext.py module, which has some
+# interesting, but currently unsupported experimental features: the notion of
+# a Catalog class and instances, and the ability to add to a catalog file via
+# a Python API.
+#
+# Barry Warsaw integrated these modules, wrote the .install() API and code,
+# and conformed all C and Python code to Python's coding standards.
+
+import os
+import sys
+import struct
+from UserDict import UserDict
+
+
+\f
+# globals
+_translations = {}
+_current_translation = None
+_current_domain = 'messages'
+
+# Domain to directory mapping, for use by bindtextdomain()
+_localedirs = {}
+
+
+\f
+class GNUTranslations(UserDict):
+    # Magic number of .mo files
+    MAGIC = 0x950412de
+
+    def __init__(self, fp):
+        if fp is None:
+            d = {}
+        else:
+            d = self._parse(fp)
+        UserDict.__init__(self, d)
+
+    def _parse(self, fp):
+        """Override this method to support alternative .mo formats."""
+        unpack = struct.unpack
+        filename = getattr(fp, 'name', '')
+        # Parse the .mo file header, which consists of 5 little endian 32
+        # bit words.
+        catalog = {}
+        buf = fp.read()
+        magic, version, msgcount, masteridx, transidx = unpack(
+            '<5i', buf[:20])
+        if magic <> self.MAGIC:
+            raise IOError(0, 'Bad magic number', filename)
+        #
+        # Now put all messages from the .mo file buffer into the catalog
+        # dictionary.
+        for i in xrange(0, msgcount):
+            mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
+            mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
+            tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
+            tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
+            if mend < len(buf) and tend < len(buf):
+                catalog[buf[mstart:mend]] = buf[tstart:tend]
+            else:
+                raise IOError(0, 'File is corrupt', filename)
+            #
+            # advance to next entry in the seek tables
+            masteridx = masteridx + 8
+            transidx = transidx + 8
+        return catalog
+
+
+\f
+# By default, use GNU gettext format .mo files
+Translations = GNUTranslations
+
+# Locate a .mo file using the gettext strategy
+def _find(localedir=None, languages=None, category=None, domain=None):
+    global _current_domain
+    global _localedirs
+
+    # Get some reasonable defaults for arguments that were not supplied
+    if domain is None:
+        domain = _current_domain
+    if category is None:
+        category = 'LC_MESSAGES'
+    if localedir is None:
+        localedir = _localedirs.get(
+            domain,
+            # TBD: The default localedir is actually system dependent.  I
+            # don't know of a good platform-consistent and portable way to
+            # default it, so instead, we'll just use sys.prefix.  Most
+            # programs should be calling bindtextdomain() or such explicitly
+            # anyway.
+            os.path.join(sys.prefix, 'share', 'locale'))
+    if languages is None:
+        languages = []
+        for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
+            val = os.environ.get(envar)
+            if val:
+                languages = val.split(':')
+                break
+        if 'C' not in languages:
+            languages.append('C')
+    # select a language
+    for lang in languages:
+        if lang == 'C':
+            break
+        mofile = os.path.join(localedir, lang, category, '%s.mo' % domain)
+        # see if it's in the cache
+        mo = _translations.get(mofile)
+        if mo:
+            return mo
+        fp = None
+        try:
+            try:
+                fp = open(mofile, 'rb')
+                t = Translations(fp)
+                _translations[mofile] = t
+                return t
+            except IOError:
+                pass
+        finally:
+            if fp:
+                fp.close()
+    return {}
+
+
+\f
+def bindtextdomain(domain=None, localedir=None):
+    """Bind domain to a file in the specified directory."""
+    global _localedirs
+    if domain is None:
+        return None
+    if localedir is None:
+        return _localedirs.get(domain, _localedirs.get('C'))
+    _localedirs[domain] = localedir
+    return localedir
+
+
+def textdomain(domain=None):
+    """Change or query the current global domain."""
+    global _current_domain
+    if domain is None:
+        return _current_domain
+    else:
+        _current_domain = domain
+        return domain
+
+
+def gettext(message):
+    """Return localized version of a message."""
+    return _find().get(message, message)
+
+
+def dgettext(domain, message):
+    """Like gettext(), but look up message in specified domain."""
+    return _find(domain=domain).get(message, message)
+
+
+def dcgettext(domain, message, category):
+    try:
+        from locale import LC_CTYPE, LC_TIME, LC_COLLATE
+        from locale import LC_MONETARY, LC_MESSAGES, LC_NUMERIC
+    except ImportError:
+        return message
+    categories = {
+        LC_CTYPE    : 'LC_CTYPE',
+        LC_TIME     : 'LC_TIME',
+        LC_COLLATE  : 'LC_COLLATE',
+        LC_MONETARY : 'LC_MONETARY',
+        LC_MESSAGES : 'LC_MESSAGES',
+        LC_NUMERIC  : 'LC_NUMERIC'
+        }
+    return _find(domain=domain, category=category).get(message, message)
+
+
+\f
+# A higher level API
+def set(translation):
+    global _current_translation
+    _current_translation = translation
+
+
+def get():
+    global _current_translation
+    return _current_translation
+
+
+def install(domain=None, localedir=None):
+    import __builtin__
+    __builtin__.__dict__['_'] = gettext
+    if domain is not None:
+        bindtextdomain(domain, localedir)
+        textdomain(domain)
author	Barry Warsaw <barry@python.org>
	Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)
committer	Barry Warsaw <barry@python.org>
	Fri, 25 Aug 2000 19:13:37 +0000 (19:13 +0000)