Add user-agent to CLDR downloader (#1247)

author Aarni Koskela <akx@iki.fi>

Sun, 1 Feb 2026 11:58:51 +0000 (12:58 +0100)

committer GitHub <noreply@github.com>

Sun, 1 Feb 2026 11:58:51 +0000 (11:58 +0000)
author Aarni Koskela <akx@iki.fi>
Sun, 1 Feb 2026 11:58:51 +0000 (12:58 +0100)
committer GitHub <noreply@github.com>
Sun, 1 Feb 2026 11:58:51 +0000 (11:58 +0000)
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py

index 85308aec8f4eb052fe1c7efa50f6a1bf817510e2..c197fd307a77e5caa48ba035bea49c4926ae492c 100755 (executable)
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -7,17 +7,15 @@ import shutil
  import subprocess
  import sys
  import zipfile
-from urllib.request import urlretrieve
+from urllib.request import Request, urlopen
  
  URL = 'https://unicode.org/Public/cldr/47/cldr-common-47.zip'
  FILENAME = 'cldr-common-47.0.zip'
  # Via https://unicode.org/Public/cldr/45/hashes/SHASUM512.txt
  FILESUM = '3b1eb2a046dae23cf16f611f452833e2a95affb1aa2ae3fa599753d229d152577114c2ff44ca98a7f369fa41dc6f45b0d7a6647653ca79694aacfd3f3be59801'
-BLKSIZE = 131072
  
  
-def reporthook(block_count, block_size, total_size):
-    bytes_transmitted = block_count * block_size
+def reporthook(bytes_transmitted, total_size):
      cols = shutil.get_terminal_size().columns
      buffer = 6
      percent = float(bytes_transmitted) / (total_size or 1)
@@ -31,6 +29,23 @@ def log(message):
      sys.stderr.write(f'{message}\n')
  
  
+def download_file(url, dest_path, reporthook=None):
+    request = Request(url, headers={'User-Agent': 'babel-cldr-downloader (https://babel.pocoo.org/)'})
+    with urlopen(request) as response:
+        total_size = int(response.headers.get('Content-Length', 0))
+        log(f"Downloading {url} to {dest_path}: {total_size // 1024} KiB")
+        block_count = 0
+        with open(dest_path, 'wb') as out_file:
+            while True:
+                block = response.read(262144)
+                if not block:
+                    break
+                out_file.write(block)
+                block_count += 1
+                if reporthook:
+                    reporthook(out_file.tell(), total_size)
+
+
  def is_good_file(filename):
      if not os.path.isfile(filename):
          log(f"Local copy '{filename}' not found")
@@ -38,7 +53,7 @@ def is_good_file(filename):
      h = hashlib.sha512()
      with open(filename, 'rb') as f:
          while True:
-            blk = f.read(BLKSIZE)
+            blk = f.read(262144)
              if not blk:
                  break
              h.update(blk)
@@ -59,9 +74,8 @@ def main():
      show_progress = (False if os.environ.get("BABEL_CLDR_NO_DOWNLOAD_PROGRESS") else sys.stdout.isatty())
  
      while not is_good_file(zip_path):
-        log(f"Downloading '{FILENAME}' from {URL}")
          tmp_path = f"{zip_path}.tmp"
-        urlretrieve(URL, tmp_path, (reporthook if show_progress else None))
+        download_file(URL, tmp_path, (reporthook if show_progress else None))
          os.replace(tmp_path, zip_path)
          changed = True
          print()
author	Aarni Koskela <akx@iki.fi>
	Sun, 1 Feb 2026 11:58:51 +0000 (12:58 +0100)
committer	GitHub <noreply@github.com>
	Sun, 1 Feb 2026 11:58:51 +0000 (11:58 +0000)