From: Michael Tremer Date: Sun, 28 Dec 2025 14:26:12 +0000 (+0000) Subject: sources: Store number of total and dead domains X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ceb26c5e52db315f8d6a659e8ee2860cdde66141;p=dbl.git sources: Store number of total and dead domains Signed-off-by: Michael Tremer --- diff --git a/src/database.sql b/src/database.sql index 3faa2f9..a102595 100644 --- a/src/database.sql +++ b/src/database.sql @@ -2,7 +2,7 @@ -- PostgreSQL database dump -- -\restrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr +\restrict pljGpSA3LcghRNp7l8VhB9577BShEv8iwmjIVnsBJjUDBiyXWHsvzLrsdBLd4g5 -- Dumped from database version 17.6 (Debian 17.6-0+deb13u1) -- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1) @@ -153,7 +153,9 @@ CREATE TABLE public.sources ( list_id integer, last_modified_at timestamp with time zone, etag text, - updated_at timestamp with time zone + updated_at timestamp with time zone, + total_domains integer, + dead_domains integer ); @@ -293,5 +295,5 @@ ALTER TABLE ONLY public.sources -- PostgreSQL database dump complete -- -\unrestrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr +\unrestrict pljGpSA3LcghRNp7l8VhB9577BShEv8iwmjIVnsBJjUDBiyXWHsvzLrsdBLd4g5 diff --git a/src/dnsbl/checker.py b/src/dnsbl/checker.py index 9af6dc0..856d728 100644 --- a/src/dnsbl/checker.py +++ b/src/dnsbl/checker.py @@ -75,6 +75,10 @@ class Checker(object): except TimeoutError: pass + # Update all stats after we checked all domains + for source in self.backend.sources: + source.update_stats() + def get_domains(self, limit=None): """ Returns all domains that need checking diff --git a/src/dnsbl/sources.py b/src/dnsbl/sources.py index e9754c7..6eb94de 100644 --- a/src/dnsbl/sources.py +++ b/src/dnsbl/sources.py @@ -30,6 +30,7 @@ import logging import sqlalchemy.dialects.postgresql import sqlmodel +from . import checker from . import database from . import util from .i18n import _ @@ -57,6 +58,23 @@ class Sources(object): def __init__(self, backend): self.backend = backend + def __iter__(self): + stmt = ( + sqlmodel + .select( + Source, + ) + .where( + Source.removed_at == None, + ) + .order_by( + Source.name, + Source.slug, + ) + ) + + return self.backend.db.fetch(stmt) + def get_by_id(self, id): stmt = ( sqlmodel @@ -91,21 +109,7 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): return self.name def __len__(self): - stmt = ( - sqlmodel - .select( - sqlmodel.func.count(), - ) - .select_from( - SourceDomain, - ) - .where( - SourceDomain.source == self, - SourceDomain.removed_at == None, - ) - ) - - return self.backend.db.fetch_one(stmt) + return self.total_domains def __hash__(self): # Only hashable once the object has an ID @@ -286,6 +290,9 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): # Mark all domains that have not been updated as removed self.__prune() + # Update the stats + self.update_stats() + # Signal that we have actually fetched new data return True @@ -504,6 +511,57 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): return sources + # Stats + + total_domains : int | None + + dead_domains : int | None + + def update_stats(self): + """ + Updates the stats of this source + """ + stmt = ( + sqlmodel + .select( + sqlmodel.func.count(), + ) + .select_from( + SourceDomain, + ) + .where( + SourceDomain.source == self, + SourceDomain.removed_at == None, + ) + ) + + # Store the total number of domains + self.total_domains = self.backend.db.fetch_one(stmt) + + stmt = ( + sqlmodel + .select( + sqlmodel.func.count(), + ) + .select_from( + SourceDomain, + ) + .join( + checker.CheckerDomain, + checker.CheckerDomain.name == SourceDomain.name, + ) + .where( + SourceDomain.source == self, + SourceDomain.removed_at == None, + + # Only check dead domains + checker.CheckerDomain.status == False, + ) + ) + + # Store the total number of dead domains + self.dead_domains = self.backend.db.fetch_one(stmt) + class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True): __tablename__ = "source_domains" diff --git a/src/scripts/dnsbl.in b/src/scripts/dnsbl.in index db70b13..c6e8ced 100644 --- a/src/scripts/dnsbl.in +++ b/src/scripts/dnsbl.in @@ -264,6 +264,7 @@ class CLI(object): table.add_column(_("Created At")) table.add_column(_("Created_By")) table.add_column(_("Listed Domains"), justify="right") + table.add_column(_("Dead Domains"), justify="right") for source in list.sources: table.add_row( @@ -272,7 +273,11 @@ class CLI(object): source.url, source.created_at.isoformat(), source.created_by, - babel.numbers.format_decimal(len(source)), + babel.numbers.format_decimal(source.total_domains) + if source.total_domains else _("N/A"), + babel.numbers.format_percent( + source.dead_domains / source.total_domains, + ) if source.total_domains else _("N/A"), ) # Print the sources