From: Michael Tremer Date: Fri, 27 Feb 2026 13:43:28 +0000 (+0000) Subject: lists: Don't pull dead domains when optimizing X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3dc02841f950aadb42f7072633bc113ed102eaea;p=dbl.git lists: Don't pull dead domains when optimizing Since we have millions of dead domains, we must be more efficient here and not pull them since we already know that they will completely delisted. Signed-off-by: Michael Tremer --- diff --git a/src/dbl/lists.py b/src/dbl/lists.py index 0186dd1..f097b8a 100644 --- a/src/dbl/lists.py +++ b/src/dbl/lists.py @@ -791,43 +791,69 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): log.info("Optimizing %s..." % self) - # Fetch all whitelisted domains - whitelisted = await self.backend.db.fetch_as_set( + # Reset the status for all domains + await self.backend.db.execute( sqlmodel - .select( - domains.Domain.name, + .update( + domains.Domain, + ) + .values( + subsumed = False, + listed = True, ) - .distinct() .where( - # Select only domains from this list domains.Domain.list == self, + domains.Domain.removed_at == None, + sqlmodel.or_( + domains.Domain.subsumed == True, + domains.Domain.listed == False, + ), + ) + ) - # Only select domains that should not be blocked - domains.Domain.block == False, + # Delist all dead domains + await self.backend.db.execute( + sqlmodel + .update( + domains.Domain, + ) + .values( + listed = False, + ) + .where( + # The domain must be on this list + domains.Domain.list == self, - # Ignore domains that have been removed + # The domains must not be removed domains.Domain.removed_at == None, - ), + + # It must be considered as listed + domains.Domain.listed == True, + + # The domain must be blocked + domains.Domain.block == True, + + # The domain must be considered dead + domains.Domain.dead == True, + ) ) - # Fetch all dead domains - dead_names = await self.backend.db.fetch_as_set( + # Fetch all whitelisted domains + whitelisted = await self.backend.db.fetch_as_set( sqlmodel .select( domains.Domain.name, ) + .distinct() .where( # Select only domains from this list domains.Domain.list == self, - # Only select domains that should be blocked - domains.Domain.block == True, + # Only select domains that should not be blocked + domains.Domain.block == False, # Ignore domains that have been removed domains.Domain.removed_at == None, - - # Only select domains confirmed dead - domains.Domain.dead == True, ), ) @@ -846,6 +872,9 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): # Ignore domains that have been removed domains.Domain.removed_at == None, + + # Select everything that is still being listed + domains.Domain.listed == True, ), ) @@ -862,35 +891,11 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): if util.is_name_in(name, whitelisted): delisted_names.add(name) - # Delist everything that is dead - if name in dead_names: - delisted_names.add(name) - # Find any redundant domains for name in names: if util.is_parent_in(name, names): redundant_names.add(name) - # Reset the status for all domains - await self.backend.db.execute( - sqlmodel - .update( - domains.Domain, - ) - .values( - subsumed = False, - listed = True, - ) - .where( - domains.Domain.list == self, - domains.Domain.removed_at == None, - sqlmodel.or_( - domains.Domain.subsumed == True, - domains.Domain.listed == False, - ), - ) - ) - # Mark any redundant domains for batch in itertools.batched(redundant_names, 1000): await self.backend.db.execute(