]> git.ipfire.org Git - dbl.git/commitdiff
sources: Store number of total and dead domains
authorMichael Tremer <michael.tremer@ipfire.org>
Sun, 28 Dec 2025 14:26:12 +0000 (14:26 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Sun, 28 Dec 2025 14:26:12 +0000 (14:26 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/database.sql
src/dnsbl/checker.py
src/dnsbl/sources.py
src/scripts/dnsbl.in

index 3faa2f9518eba55c0192c50b00d90813d3b3b39c..a1025951bfcae1ab2fc975298152e8daf54ecf36 100644 (file)
@@ -2,7 +2,7 @@
 -- PostgreSQL database dump
 --
 
-\restrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr
+\restrict pljGpSA3LcghRNp7l8VhB9577BShEv8iwmjIVnsBJjUDBiyXWHsvzLrsdBLd4g5
 
 -- Dumped from database version 17.6 (Debian 17.6-0+deb13u1)
 -- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1)
@@ -153,7 +153,9 @@ CREATE TABLE public.sources (
     list_id integer,
     last_modified_at timestamp with time zone,
     etag text,
-    updated_at timestamp with time zone
+    updated_at timestamp with time zone,
+    total_domains integer,
+    dead_domains integer
 );
 
 
@@ -293,5 +295,5 @@ ALTER TABLE ONLY public.sources
 -- PostgreSQL database dump complete
 --
 
-\unrestrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr
+\unrestrict pljGpSA3LcghRNp7l8VhB9577BShEv8iwmjIVnsBJjUDBiyXWHsvzLrsdBLd4g5
 
index 9af6dc0bd2eda2844d04ff4244baf609693b658b..856d728660f4542bac9dee59b80cf5ebf4028ad2 100644 (file)
@@ -75,6 +75,10 @@ class Checker(object):
                                except TimeoutError:
                                        pass
 
+               # Update all stats after we checked all domains
+               for source in self.backend.sources:
+                       source.update_stats()
+
        def get_domains(self, limit=None):
                """
                        Returns all domains that need checking
index e9754c71c911e1d828a57814e39d62d7ab0413e2..6eb94de53a43246d558154f668a05dd83f648c71 100644 (file)
@@ -30,6 +30,7 @@ import logging
 import sqlalchemy.dialects.postgresql
 import sqlmodel
 
+from . import checker
 from . import database
 from . import util
 from .i18n import _
@@ -57,6 +58,23 @@ class Sources(object):
        def __init__(self, backend):
                self.backend = backend
 
+       def __iter__(self):
+               stmt = (
+                       sqlmodel
+                       .select(
+                               Source,
+                       )
+                       .where(
+                               Source.removed_at == None,
+                       )
+                       .order_by(
+                               Source.name,
+                               Source.slug,
+                       )
+               )
+
+               return self.backend.db.fetch(stmt)
+
        def get_by_id(self, id):
                stmt = (
                        sqlmodel
@@ -91,21 +109,7 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True):
                return self.name
 
        def __len__(self):
-               stmt = (
-                       sqlmodel
-                       .select(
-                               sqlmodel.func.count(),
-                       )
-                       .select_from(
-                               SourceDomain,
-                       )
-                       .where(
-                               SourceDomain.source == self,
-                               SourceDomain.removed_at == None,
-                       )
-               )
-
-               return self.backend.db.fetch_one(stmt)
+               return self.total_domains
 
        def __hash__(self):
                # Only hashable once the object has an ID
@@ -286,6 +290,9 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True):
                        # Mark all domains that have not been updated as removed
                        self.__prune()
 
+                       # Update the stats
+                       self.update_stats()
+
                # Signal that we have actually fetched new data
                return True
 
@@ -504,6 +511,57 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True):
 
                return sources
 
+       # Stats
+
+       total_domains : int | None
+
+       dead_domains : int | None
+
+       def update_stats(self):
+               """
+                       Updates the stats of this source
+               """
+               stmt = (
+                       sqlmodel
+                       .select(
+                               sqlmodel.func.count(),
+                       )
+                       .select_from(
+                               SourceDomain,
+                       )
+                       .where(
+                               SourceDomain.source == self,
+                               SourceDomain.removed_at == None,
+                       )
+               )
+
+               # Store the total number of domains
+               self.total_domains = self.backend.db.fetch_one(stmt)
+
+               stmt = (
+                       sqlmodel
+                       .select(
+                               sqlmodel.func.count(),
+                       )
+                       .select_from(
+                               SourceDomain,
+                       )
+                       .join(
+                               checker.CheckerDomain,
+                               checker.CheckerDomain.name == SourceDomain.name,
+                       )
+                       .where(
+                               SourceDomain.source == self,
+                               SourceDomain.removed_at == None,
+
+                               # Only check dead domains
+                               checker.CheckerDomain.status == False,
+                       )
+               )
+
+               # Store the total number of dead domains
+               self.dead_domains = self.backend.db.fetch_one(stmt)
+
 
 class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True):
        __tablename__ = "source_domains"
index db70b130e296f2c893731dae4d7a55bd4ba9e4d0..c6e8ced5cf306d6e3a1c592b743afeb2229c903d 100644 (file)
@@ -264,6 +264,7 @@ class CLI(object):
                table.add_column(_("Created At"))
                table.add_column(_("Created_By"))
                table.add_column(_("Listed Domains"), justify="right")
+               table.add_column(_("Dead Domains"), justify="right")
 
                for source in list.sources:
                        table.add_row(
@@ -272,7 +273,11 @@ class CLI(object):
                                source.url,
                                source.created_at.isoformat(),
                                source.created_by,
-                               babel.numbers.format_decimal(len(source)),
+                               babel.numbers.format_decimal(source.total_domains)
+                                       if source.total_domains else _("N/A"),
+                               babel.numbers.format_percent(
+                                       source.dead_domains / source.total_domains,
+                               ) if source.total_domains else _("N/A"),
                        )
 
                # Print the sources