]> git.ipfire.org Git - dbl.git/commitdiff
lists: Don't pull dead domains when optimizing
authorMichael Tremer <michael.tremer@ipfire.org>
Fri, 27 Feb 2026 13:43:28 +0000 (13:43 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Fri, 27 Feb 2026 13:43:28 +0000 (13:43 +0000)
Since we have millions of dead domains, we must be more efficient here
and not pull them since we already know that they will completely
delisted.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/dbl/lists.py

index 0186dd185bcf8fc36616de03671cab5b9db5cd3a..f097b8a550e70403f0cc834da5e1332ae346d11a 100644 (file)
@@ -791,43 +791,69 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True):
 
                log.info("Optimizing %s..." % self)
 
-               # Fetch all whitelisted domains
-               whitelisted = await self.backend.db.fetch_as_set(
+               # Reset the status for all domains
+               await self.backend.db.execute(
                        sqlmodel
-                       .select(
-                               domains.Domain.name,
+                       .update(
+                               domains.Domain,
+                       )
+                       .values(
+                               subsumed = False,
+                               listed = True,
                        )
-                       .distinct()
                        .where(
-                               # Select only domains from this list
                                domains.Domain.list == self,
+                               domains.Domain.removed_at == None,
+                               sqlmodel.or_(
+                                       domains.Domain.subsumed == True,
+                                       domains.Domain.listed == False,
+                               ),
+                       )
+               )
 
-                               # Only select domains that should not be blocked
-                               domains.Domain.block == False,
+               # Delist all dead domains
+               await self.backend.db.execute(
+                       sqlmodel
+                       .update(
+                               domains.Domain,
+                       )
+                       .values(
+                               listed = False,
+                       )
+                       .where(
+                               # The domain must be on this list
+                               domains.Domain.list == self,
 
-                               # Ignore domains that have been removed
+                               # The domains must not be removed
                                domains.Domain.removed_at == None,
-                       ),
+
+                               # It must be considered as listed
+                               domains.Domain.listed == True,
+
+                               # The domain must be blocked
+                               domains.Domain.block == True,
+
+                               # The domain must be considered dead
+                               domains.Domain.dead == True,
+                       )
                )
 
-               # Fetch all dead domains
-               dead_names = await self.backend.db.fetch_as_set(
+               # Fetch all whitelisted domains
+               whitelisted = await self.backend.db.fetch_as_set(
                        sqlmodel
                        .select(
                                domains.Domain.name,
                        )
+                       .distinct()
                        .where(
                                # Select only domains from this list
                                domains.Domain.list == self,
 
-                               # Only select domains that should be blocked
-                               domains.Domain.block == True,
+                               # Only select domains that should not be blocked
+                               domains.Domain.block == False,
 
                                # Ignore domains that have been removed
                                domains.Domain.removed_at == None,
-
-                               # Only select domains confirmed dead
-                               domains.Domain.dead == True,
                        ),
                )
 
@@ -846,6 +872,9 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True):
 
                                # Ignore domains that have been removed
                                domains.Domain.removed_at == None,
+
+                               # Select everything that is still being listed
+                               domains.Domain.listed == True,
                        ),
                )
 
@@ -862,35 +891,11 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True):
                                if util.is_name_in(name, whitelisted):
                                        delisted_names.add(name)
 
-                       # Delist everything that is dead
-                       if name in dead_names:
-                               delisted_names.add(name)
-
                # Find any redundant domains
                for name in names:
                        if util.is_parent_in(name, names):
                                        redundant_names.add(name)
 
-               # Reset the status for all domains
-               await self.backend.db.execute(
-                       sqlmodel
-                       .update(
-                               domains.Domain,
-                       )
-                       .values(
-                               subsumed = False,
-                               listed = True,
-                       )
-                       .where(
-                               domains.Domain.list == self,
-                               domains.Domain.removed_at == None,
-                               sqlmodel.or_(
-                                       domains.Domain.subsumed == True,
-                                       domains.Domain.listed == False,
-                               ),
-                       )
-               )
-
                # Mark any redundant domains
                for batch in itertools.batched(redundant_names, 1000):
                        await self.backend.db.execute(