Improved downloading and conversion experience

author Armin Ronacher <armin.ronacher@active-4.com>

Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)

committer Armin Ronacher <armin.ronacher@active-4.com>

Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)
author Armin Ronacher <armin.ronacher@active-4.com>
Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)
committer Armin Ronacher <armin.ronacher@active-4.com>
Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)
diff --git a/Makefile b/Makefile

index e96ef8b7bbfa65230eddf8b19402a21866caffa4..199374ba94428908bb24ac5c50815290046f9907 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,14 @@
-test:
-       python setup.py test
+test: import-cldr
+       @python setup.py test
+
+import-cldr:
+       @./scripts/download_import_cldr.py
+
+clean-cldr:
+       @rm babel/localedata/*.dat
  
  develop:
-       pip install --editable .
+       @pip install --editable .
  
  tox-test:
         @tox
diff --git a/babel/global.dat b/babel/global.dat

index 49db8db3c32737abe419dfebbecd43777a7a8533..4eb6099013c1596ef7015be21d47fbe725a7f33e 100644 (file)

Binary files a/babel/global.dat and b/babel/global.dat differ
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py

index 3b2111751e2a4b0b0176717f74ba30d773ad0e8d..a3e5d799c35acadf1d720029a398404743655f21 100755 (executable)
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -3,6 +3,7 @@
  import os
  import sys
  import shutil
+import hashlib
  import zipfile
  import urllib
  import subprocess
@@ -10,32 +11,76 @@ import subprocess
  
  URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip'
  FILENAME = 'core-1.9.1.zip'
+FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c'
  BLKSIZE = 131072
  
  
+def get_terminal_width():
+    import fcntl
+    import termios
+    import struct
+    fd = sys.stdin.fileno()
+    cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
+    return cr[1]
+
+
+def reporthook(block_count, block_size, total_size):
+    bytes_transmitted = block_count * block_size
+    cols = get_terminal_width()
+    buffer = 6
+    percent = float(bytes_transmitted) / (total_size or 1)
+    done = int(percent * (cols - buffer))
+    sys.stdout.write('\r')
+    sys.stdout.write(' ' + '=' * done + ' ' * (cols - done - buffer))
+    sys.stdout.write('% 4d%%' % (percent * 100))
+    sys.stdout.flush()
+
+
+def log(message, *args):
+    if args:
+        message = message % args
+    print >> sys.stderr, message
+
+
+def is_good_file(filename):
+    if not os.path.isfile(filename):
+        log('\'%s\' not found', filename)
+        return False
+    h = hashlib.md5()
+    with open(filename, 'rb') as f:
+        while 1:
+            blk = f.read(BLKSIZE)
+            if not blk:
+                break
+            h.update(blk)
+        return h.hexdigest() == FILESUM
+
+
  def main():
      scripts_path = os.path.dirname(os.path.abspath(__file__))
      repo = os.path.dirname(scripts_path)
      cldr_path = os.path.join(repo, 'cldr')
      zip_path = os.path.join(cldr_path, FILENAME)
+    changed = False
  
-    if not os.path.isfile(zip_path):
-        with open(zip_path, 'wb') as f:
-            conn = urllib.urlopen(URL)
-            while True:
-                buf = conn.read(BLKSIZE)
-                if not buf:
-                    break
-                f.write(buf)
-            conn.close()
-
+    while not is_good_file(zip_path):
+        log('Downloading \'%s\'', FILENAME)
+        if os.path.isfile(zip_path):
+            os.remove(zip_path)
+        urllib.urlretrieve(URL, zip_path, reporthook)
+        changed = True
+        print
      common_path = os.path.join(cldr_path, 'common')
-    if os.path.isdir(common_path):
-        shutil.rmtree(common_path)
  
-    z = zipfile.ZipFile(zip_path)
-    z.extractall(cldr_path)
-    z.close()
+    if changed:
+        if os.path.isdir(common_path):
+            log('Deleting old CLDR checkout in \'%s\'', cldr_path)
+            shutil.rmtree(common_path)
+
+        log('Extracting CLDR to \'%s\'', cldr_path)
+        z = zipfile.ZipFile(zip_path)
+        z.extractall(cldr_path)
+        z.close()
  
      subprocess.check_call([
          sys.executable,
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py

index 595556bbefdd9bdd4c87df4e9fb7e562899de9d7..0717f22ba72bf196cf3b7b8912ec4c6db8011e8f 100755 (executable)
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -17,12 +17,7 @@ from optparse import OptionParser
  import os
  import re
  import sys
-# don't put the ElementTree import in babel/compat.py as this will add a new
-# dependency (elementtree) for Python 2.4 users.
-try:
-    from xml.etree import ElementTree
-except ImportError:
-    from elementtree import ElementTree
+from xml.etree import ElementTree
  
  # Make sure we're using Babel source, and not some previously installed version
  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
@@ -57,6 +52,30 @@ NAME_MAP = {
      'timeFormats': 'time_formats'
  }
  
+def log(message, *args):
+    if args:
+        message = message % args
+    print >> sys.stderr, message
+
+
+def error(message, *args):
+    log('ERROR: %s' % message, args)
+
+
+def need_conversion(dst_filename, data_dict, source_filename):
+    with open(source_filename, 'rb') as f:
+        blob = f.read(4096)
+        version = int(re.search(r'version number="\$Revision: (\d+)', blob).group(1))
+
+    data_dict['_version'] = version
+    if not os.path.isfile(dst_filename):
+        return True
+
+    with open(dst_filename, 'rb') as f:
+        data = pickle.load(f)
+        return data.get('_version') != version
+
+
  def _translate_alias(ctxt, path):
      parts = path.split('/')
      keys = ctxt[:]
@@ -83,35 +102,37 @@ def main():
      destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                             '..', 'babel')
  
-    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+    sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
+    sup = parse(sup_filename)
  
      # Import global data from the supplemental files
+    global_path = os.path.join(destdir, 'global.dat')
      global_data = {}
-
-    territory_zones = global_data.setdefault('territory_zones', {})
-    zone_aliases = global_data.setdefault('zone_aliases', {})
-    zone_territories = global_data.setdefault('zone_territories', {})
-    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
-        tzid = elem.attrib['type']
-        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
-        zone_territories[tzid] = elem.attrib['territory']
-        if 'aliases' in elem.attrib:
-            for alias in elem.attrib['aliases'].split():
-                zone_aliases[alias] = tzid
-
-    # Import Metazone mapping
-    meta_zones = global_data.setdefault('meta_zones', {})
-    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
-    for elem in tzsup.findall('.//timezone'):
-        for child in elem.findall('usesMetazone'):
-            if 'to' not in child.attrib: # FIXME: support old mappings
-                meta_zones[elem.attrib['type']] = child.attrib['mzone']
-
-    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
-    try:
-        pickle.dump(global_data, outfile, 2)
-    finally:
-        outfile.close()
+    if need_conversion(global_path, global_data, sup_filename):
+        territory_zones = global_data.setdefault('territory_zones', {})
+        zone_aliases = global_data.setdefault('zone_aliases', {})
+        zone_territories = global_data.setdefault('zone_territories', {})
+        for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
+            tzid = elem.attrib['type']
+            territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+            zone_territories[tzid] = elem.attrib['territory']
+            if 'aliases' in elem.attrib:
+                for alias in elem.attrib['aliases'].split():
+                    zone_aliases[alias] = tzid
+
+        # Import Metazone mapping
+        meta_zones = global_data.setdefault('meta_zones', {})
+        tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
+        for elem in tzsup.findall('.//timezone'):
+            for child in elem.findall('usesMetazone'):
+                if 'to' not in child.attrib: # FIXME: support old mappings
+                    meta_zones[elem.attrib['type']] = child.attrib['mzone']
+
+        outfile = open(global_path, 'wb')
+        try:
+            pickle.dump(global_data, outfile, 2)
+        finally:
+            outfile.close()
  
      # build a territory containment mapping for inheritance
      regions = {}
@@ -150,15 +171,19 @@ def main():
          if ext != '.xml':
              continue
  
-        print>>sys.stderr, 'Processing input file %r' % filename
-        tree = parse(os.path.join(srcdir, 'main', filename))
+        full_filename = os.path.join(srcdir, 'main', filename)
+        data_filename = os.path.join(destdir, 'localedata', stem + '.dat')
+
          data = {}
+        if not need_conversion(data_filename, data, full_filename):
+            continue
+
+        tree = parse(full_filename)
  
          language = None
          elem = tree.find('.//identity/language')
          if elem is not None:
              language = elem.attrib['type']
-        print>>sys.stderr, '  Language:  %r' % language
  
          territory = None
          elem = tree.find('.//identity/territory')
@@ -166,9 +191,10 @@ def main():
              territory = elem.attrib['type']
          else:
              territory = '001' # world
-        print>>sys.stderr, '  Territory: %r' % territory
          regions = territory_containment.get(territory, [])
-        print>>sys.stderr, '  Regions:    %r' % regions
+
+        log('Processing %s (Language = %s; Territory = %s)',
+            filename, language, territory)
  
          # plural rules
          locale_id = '_'.join(filter(None, [
@@ -376,7 +402,7 @@ def main():
                              date_formats[elem.attrib.get('type')] = \
                                  dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
                          except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                      elif elem.tag == 'alias':
                          date_formats = Alias(_translate_alias(
                              ['date_formats'], elem.attrib['path'])
@@ -393,7 +419,7 @@ def main():
                              time_formats[elem.attrib.get('type')] = \
                                  dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
                          except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                      elif elem.tag == 'alias':
                          time_formats = Alias(_translate_alias(
                              ['time_formats'], elem.attrib['path'])
@@ -410,7 +436,7 @@ def main():
                              datetime_formats[elem.attrib.get('type')] = \
                                  unicode(elem.findtext('dateTimeFormat/pattern'))
                          except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                      elif elem.tag == 'alias':
                          datetime_formats = Alias(_translate_alias(
                              ['datetime_formats'], elem.attrib['path'])
@@ -482,7 +508,7 @@ def main():
                  unit_patterns[unit_type][pattern.attrib['count']] = \
                          unicode(pattern.text)
  
-        outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
+        outfile = open(data_filename, 'wb')
          try:
              pickle.dump(data, outfile, 2)
          finally:
author	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 4 Jul 2013 13:02:51 +0000 (15:02 +0200)
Makefile		patch \| blob \| blame \| history
babel/global.dat		patch \| blob \| blame \| history
scripts/download_import_cldr.py		patch \| blob \| blame \| history
scripts/import_cldr.py		patch \| blob \| blame \| history