]> git.ipfire.org Git - dnsbl.git/commitdiff
dnsbl: Add a simply way to download plaintext lists
authorMichael Tremer <michael.tremer@ipfire.org>
Sat, 6 Dec 2025 15:48:35 +0000 (15:48 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Sat, 6 Dec 2025 15:48:35 +0000 (15:48 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/database.sql
src/dnsbl/__init__.py
src/dnsbl/sources.py

index 5ac2336ce6a53e096318e9f346dfc8009741fc1f..c9db7d37592b2803868551e35453ab79b2a64337 100644 (file)
@@ -2,7 +2,7 @@
 -- PostgreSQL database dump
 --
 
-\restrict Ygem6wvGn2DEdbwftbB0nUDuErUteV8NLEaojWTpdRkf4XgJXIO8rDTezxoNLXd
+\restrict 4VzOIUheVtRwzS2rft5XdaUcuWX8KbD2he89e8drg1IaUXy4XXk30HlrSNZUryq
 
 -- Dumped from database version 17.6 (Debian 17.6-0+deb13u1)
 -- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1)
@@ -69,7 +69,8 @@ CREATE TABLE public.source_domains (
     source_id integer NOT NULL,
     name text NOT NULL,
     added_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    removed_at timestamp with time zone
+    removed_at timestamp with time zone,
+    updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
 );
 
 
@@ -216,5 +217,5 @@ ALTER TABLE ONLY public.sources
 -- PostgreSQL database dump complete
 --
 
-\unrestrict Ygem6wvGn2DEdbwftbB0nUDuErUteV8NLEaojWTpdRkf4XgJXIO8rDTezxoNLXd
+\unrestrict 4VzOIUheVtRwzS2rft5XdaUcuWX8KbD2he89e8drg1IaUXy4XXk30HlrSNZUryq
 
index 5ba2a84824afb17662a6d036db82ba5968d68b45..60856bfe316df67cd5382e96005cc6e1bdaabf83 100644 (file)
@@ -20,6 +20,7 @@
 
 import configparser
 import functools
+import httpx
 import logging
 
 # Initialize logging as early as possible
@@ -67,6 +68,15 @@ class Backend(object):
                # Create a new database connection
                return database.Database(self, uri)
 
+       def client(self):
+               """
+                       Creates a new HTTP client
+               """
+               return httpx.Client(
+                       # Follow any redirects
+                       follow_redirects=True,
+               )
+
        @functools.cached_property
        def lists(self):
                return lists.Lists(self)
index 8bc4b7d273d17412432633ad02f37638225557a7..5f177a14b091e5e7151b1fd9e51b194c5a63547b 100644 (file)
@@ -1,6 +1,6 @@
 ###############################################################################
 #                                                                             #
-# dnsbl - A DNS Blacklist Compositor For IPFire                               #
+# dnsbl - A DNS Blocklist Compositor For IPFire                               #
 # Copyright (C) 2025 IPFire Development Team                                  #
 #                                                                             #
 # This program is free software: you can redistribute it and/or modify        #
 
 import datetime
 import logging
+import sqlalchemy.dialects.postgresql
 import sqlmodel
 
 from . import database
+from . import util
 
 # Setup logging
 log = logging.getLogger(__name__)
@@ -105,6 +107,73 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True):
                """
                log.debug("%s: Updating source %s" % (self.list, self))
 
+               # XXX We should stored Etag or the last modified timestamp
+
+               with self.db.transaction():
+                       with self.backend.client() as client:
+                               with client.stream("GET", self.url) as response:
+                                       # Add all domains
+                                       for line in response.iter_lines():
+                                               try:
+                                                       self.add_domain(line)
+
+                                               except ValueError as e:
+                                                       log.warning("Failed to add '%s' to the database: %s" % (line, e))
+
+                       # Mark all domains that have not been updated as removed
+                       self.__prune()
+
+       def add_domain(self, name):
+               """
+                       Adds or updates a domain.
+               """
+               # Check if this is a valid domain name
+               if not util.is_fqdn(name):
+                       raise ValueError("Not a valid domain name: %s" % name)
+
+               stmt = (
+                       sqlalchemy.dialects.postgresql
+                       .insert(
+                               SourceDomain,
+                       )
+                       .values({
+                               "source_id" : self.id,
+                               "name"      : name,
+                       })
+                       .on_conflict_do_update(
+                               index_elements = [
+                                       SourceDomain.source_id, SourceDomain.name,
+                               ],
+                               index_where = SourceDomain.removed_at == None,
+                               set_ = {
+                                       "updated_at" : sqlmodel.func.current_timestamp(),
+                               }
+                       )
+               )
+               self.backend.db.execute(stmt)
+
+       def __prune(self):
+               """
+                       Prune any domains that have not been updated.
+
+                       This must only be called after all domains have been (re-)imported and
+                       must be called in the same transaction as we are using the update timestamp
+                       as reference.
+               """
+               stmt = (
+                       sqlmodel
+                       .update(
+                               SourceDomain,
+                       )
+                       .values({
+                               "removed_at" : sqlmodel.func.current_timestamp(),
+                       })
+                       .where(
+                               SourceDomain.updated_at < sqlmodel.func.current_timestamp(),
+                       )
+               )
+               self.backend.db.execute(stmt)
+
 
 class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True):
        __tablename__ = "source_domains"
@@ -131,3 +200,8 @@ class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True):
 
        # Removed At
        removed_at : datetime.datetime | None
+
+       # Updated At
+       updated_at : datetime.datetime = sqlmodel.Field(
+               sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")}
+       )