]> git.ipfire.org Git - dbl.git/commitdiff
sources: Normalize any international domain names
authorMichael Tremer <michael.tremer@ipfire.org>
Sun, 28 Dec 2025 13:42:57 +0000 (13:42 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Sun, 28 Dec 2025 13:42:57 +0000 (13:42 +0000)
We don't want these to hit the database in Unicode, but only in ASCII
format.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/dnsbl/sources.py

index 236a542b8452124896c8124f73cbe0500e3b5033..e9754c71c911e1d828a57814e39d62d7ab0413e2 100644 (file)
@@ -23,6 +23,7 @@ import email.utils
 import enum
 import gzip
 import httpx
+import idna
 import io
 import itertools
 import logging
@@ -236,6 +237,14 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True):
                                                        # Remove any trailing dots
                                                        domain = domain.removesuffix(".")
 
+                                                       # Normalize any international domain names
+                                                       try:
+                                                               domain = idna.encode(domain).decode("ascii")
+
+                                                       except idna.IDNAError as e:
+                                                               log.warning(_("Skipping invalid domain: %s") % domain)
+                                                               continue
+
                                                        # Skip any invalid domain names
                                                        if not util.is_fqdn(domain):
                                                                # Silently skip any IP addresses