From: Michael Tremer Date: Fri, 2 Jan 2026 14:12:42 +0000 (+0000) Subject: sources: Refactor the ABP parser X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1e686c38ecd83edb3c4575a261c7a0c3072827c8;p=dbl.git sources: Refactor the ABP parser Some sources don't implement the format very strictly. To not miss any data, we will have to split off any options and then check whether we want to add the domain to our lists. Signed-off-by: Michael Tremer --- diff --git a/src/dnsbl/sources.py b/src/dnsbl/sources.py index b58c311..dbf5a46 100644 --- a/src/dnsbl/sources.py +++ b/src/dnsbl/sources.py @@ -422,14 +422,42 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): """ Parse the domain from the AdBlockPlus format """ - if line.startswith("||"): - # Remove the leading || - line = line.removeprefix("||") + # Skip any comments + if line.startswith("!") or line.startswith("@@"): + return + + # Remove the leading || + # Some files actually don't always use this + line = line.removeprefix("||") + + # Split off any options + line, __, options = line.partition("$") + + # Parse the options + if options: + # Split options by comma and strip any whitespace + options = [option.strip() for option in options.split(",")] + + # Remove some options that we know and will ignore + for option in ("all", "important"): + try: + options.remove(option) + except ValueError: + pass + + # Cut off everything after ^ + line, __, rest = line.partition("^") - # Cut off everything after ^ - domain, _, rest = line.partition("^") + # The line now should only contain the domain + domain = line - return domain + # Ignore any domains that have unknown/unsupported options + if options: + log.warning(_("Cannot parse domain %s with unknown options: %s") \ + % (domain, ",".join(options))) + return + + return domain def _process_hosts(self, line): """