]> git.ipfire.org Git - thirdparty/linux-firmware.git/commitdiff
trivial: contrib: use python-magic to detect encoding of emails
authorMario Limonciello <mario.limonciello@amd.com>
Thu, 23 Jan 2025 16:28:11 +0000 (10:28 -0600)
committerMario Limonciello <mario.limonciello@amd.com>
Thu, 23 Jan 2025 16:34:28 +0000 (10:34 -0600)
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
contrib/process_linux_firmware.py

index 8e2eb3501c824ec2133ef53c60cd4d353b972bb0..8c024016852b12cd765c6874bc3cec10fbf7daab 100755 (executable)
@@ -11,6 +11,7 @@ import email.utils
 import smtplib
 import subprocess
 import sys
+import magic  # https://pypi.python.org/pypi/python-magic
 from datetime import date
 from enum import Enum
 
@@ -52,12 +53,11 @@ def classify_content(content):
         body = msg.get_payload(decode=True)
 
     if body:
-        for encoding in ["utf-8", "windows-1252"]:
-            try:
-                decoded = body.decode(encoding)
-                break
-            except UnicodeDecodeError:
-                pass
+        m = magic.Magic(mime_encoding=True)
+        try:
+            decoded = body.decode(m.from_buffer(body))
+        except UnicodeDecodeError:
+            pass
 
     if decoded:
         for key in content_types.keys():
@@ -70,8 +70,11 @@ def classify_content(content):
 
 
 def fetch_url(url):
+    blob = None
     with urllib.request.urlopen(url) as response:
-        return response.read().decode("utf-8")
+        blob = response.read()
+    m = magic.Magic(mime_encoding=True)
+    return blob.decode(m.from_buffer(blob))
 
 
 def quiet_cmd(cmd):