From: Michael Tremer Date: Sat, 6 Dec 2025 15:48:35 +0000 (+0000) Subject: dnsbl: Add a simply way to download plaintext lists X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1ea79ea4c740dab8f3a04313df51616bd814b589;p=dnsbl.git dnsbl: Add a simply way to download plaintext lists Signed-off-by: Michael Tremer --- diff --git a/src/database.sql b/src/database.sql index 5ac2336..c9db7d3 100644 --- a/src/database.sql +++ b/src/database.sql @@ -2,7 +2,7 @@ -- PostgreSQL database dump -- -\restrict Ygem6wvGn2DEdbwftbB0nUDuErUteV8NLEaojWTpdRkf4XgJXIO8rDTezxoNLXd +\restrict 4VzOIUheVtRwzS2rft5XdaUcuWX8KbD2he89e8drg1IaUXy4XXk30HlrSNZUryq -- Dumped from database version 17.6 (Debian 17.6-0+deb13u1) -- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1) @@ -69,7 +69,8 @@ CREATE TABLE public.source_domains ( source_id integer NOT NULL, name text NOT NULL, added_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - removed_at timestamp with time zone + removed_at timestamp with time zone, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL ); @@ -216,5 +217,5 @@ ALTER TABLE ONLY public.sources -- PostgreSQL database dump complete -- -\unrestrict Ygem6wvGn2DEdbwftbB0nUDuErUteV8NLEaojWTpdRkf4XgJXIO8rDTezxoNLXd +\unrestrict 4VzOIUheVtRwzS2rft5XdaUcuWX8KbD2he89e8drg1IaUXy4XXk30HlrSNZUryq diff --git a/src/dnsbl/__init__.py b/src/dnsbl/__init__.py index 5ba2a84..60856bf 100644 --- a/src/dnsbl/__init__.py +++ b/src/dnsbl/__init__.py @@ -20,6 +20,7 @@ import configparser import functools +import httpx import logging # Initialize logging as early as possible @@ -67,6 +68,15 @@ class Backend(object): # Create a new database connection return database.Database(self, uri) + def client(self): + """ + Creates a new HTTP client + """ + return httpx.Client( + # Follow any redirects + follow_redirects=True, + ) + @functools.cached_property def lists(self): return lists.Lists(self) diff --git a/src/dnsbl/sources.py b/src/dnsbl/sources.py index 8bc4b7d..5f177a1 100644 --- a/src/dnsbl/sources.py +++ b/src/dnsbl/sources.py @@ -1,6 +1,6 @@ ############################################################################### # # -# dnsbl - A DNS Blacklist Compositor For IPFire # +# dnsbl - A DNS Blocklist Compositor For IPFire # # Copyright (C) 2025 IPFire Development Team # # # # This program is free software: you can redistribute it and/or modify # @@ -20,9 +20,11 @@ import datetime import logging +import sqlalchemy.dialects.postgresql import sqlmodel from . import database +from . import util # Setup logging log = logging.getLogger(__name__) @@ -105,6 +107,73 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): """ log.debug("%s: Updating source %s" % (self.list, self)) + # XXX We should stored Etag or the last modified timestamp + + with self.db.transaction(): + with self.backend.client() as client: + with client.stream("GET", self.url) as response: + # Add all domains + for line in response.iter_lines(): + try: + self.add_domain(line) + + except ValueError as e: + log.warning("Failed to add '%s' to the database: %s" % (line, e)) + + # Mark all domains that have not been updated as removed + self.__prune() + + def add_domain(self, name): + """ + Adds or updates a domain. + """ + # Check if this is a valid domain name + if not util.is_fqdn(name): + raise ValueError("Not a valid domain name: %s" % name) + + stmt = ( + sqlalchemy.dialects.postgresql + .insert( + SourceDomain, + ) + .values({ + "source_id" : self.id, + "name" : name, + }) + .on_conflict_do_update( + index_elements = [ + SourceDomain.source_id, SourceDomain.name, + ], + index_where = SourceDomain.removed_at == None, + set_ = { + "updated_at" : sqlmodel.func.current_timestamp(), + } + ) + ) + self.backend.db.execute(stmt) + + def __prune(self): + """ + Prune any domains that have not been updated. + + This must only be called after all domains have been (re-)imported and + must be called in the same transaction as we are using the update timestamp + as reference. + """ + stmt = ( + sqlmodel + .update( + SourceDomain, + ) + .values({ + "removed_at" : sqlmodel.func.current_timestamp(), + }) + .where( + SourceDomain.updated_at < sqlmodel.func.current_timestamp(), + ) + ) + self.backend.db.execute(stmt) + class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True): __tablename__ = "source_domains" @@ -131,3 +200,8 @@ class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True): # Removed At removed_at : datetime.datetime | None + + # Updated At + updated_at : datetime.datetime = sqlmodel.Field( + sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")} + )