From d3676d72e4fe1d46fedff19785dcf7ac6411b467 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Sun, 28 Dec 2025 13:42:57 +0000 Subject: [PATCH] sources: Normalize any international domain names We don't want these to hit the database in Unicode, but only in ASCII format. Signed-off-by: Michael Tremer --- src/dnsbl/sources.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/dnsbl/sources.py b/src/dnsbl/sources.py index 236a542..e9754c7 100644 --- a/src/dnsbl/sources.py +++ b/src/dnsbl/sources.py @@ -23,6 +23,7 @@ import email.utils import enum import gzip import httpx +import idna import io import itertools import logging @@ -236,6 +237,14 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): # Remove any trailing dots domain = domain.removesuffix(".") + # Normalize any international domain names + try: + domain = idna.encode(domain).decode("ascii") + + except idna.IDNAError as e: + log.warning(_("Skipping invalid domain: %s") % domain) + continue + # Skip any invalid domain names if not util.is_fqdn(domain): # Silently skip any IP addresses -- 2.47.3