return canary
- @functools.cached_property
- def __domains(self):
+ async def get_domains(self):
"""
- A CTE to access all (active) domains on this list
+ Returns all domains that are on this list
"""
- # Fetch all domains that should be blocked
- blocked_domains = (
+ names = self.backend.db.fetch(
sqlmodel
.select(
- domains.Domain,
- )
- .where(
- # Select only domains from this list
- domains.Domain.list == self,
-
- # Only select domains that should be blocked
- domains.Domain.block == True,
-
- # Ignore domains that have been removed
- domains.Domain.removed_at == None,
-
- # Ignore any domains that are subsumed by another domain
- domains.Domain.subsumed == False,
-
- # Only select domains that are not dead
- # or have not been checked, yet.
- sqlmodel.or_(
- domains.Domain.dead == None,
- domains.Domain.dead == False,
- ),
- )
- .cte("blocked_domains")
- )
-
- # Fetch all whitelisted domains
- whitelisted_domains = (
- sqlmodel
- .select(
- domains.Domain,
+ domains.Domain.name,
)
+ .distinct()
.where(
- # Select only domains from this list
+ # Only fetch domains from this list
domains.Domain.list == self,
- # Only select domains that should not be blocked
- domains.Domain.block == False,
-
- # Ignore domains that have been removed
+ # Domains cannot be removed
domains.Domain.removed_at == None,
- )
- .cte("whitelisted_domains")
- )
- # Remove any whitelisted and subdomains of any whitelisted domains
- # from the list of blocked domains
- listed_domains = (
- sqlmodel
- .select(
- blocked_domains.c.name,
- )
- .distinct(
- blocked_domains.c.name,
- )
- .where(
- ~sqlmodel.exists(
- sqlmodel
- .select(
- whitelisted_domains.c.name,
- )
- .where(
- (blocked_domains.c.name == whitelisted_domains.c.name) |
- (blocked_domains.c.name.like("%." + whitelisted_domains.c.name))
- )
- )
- )
- .cte("listed_domains")
- )
-
- return listed_domains
-
- async def get_domains(self):
- """
- Returns all domains that are on this list
- """
- domains = self.backend.db.fetch(
- sqlmodel
- .select(
- self.__domains.c.name,
+ # Only fetch listed domains
+ domains.Domain.listed == True,
)
+ .order_by(
+ domains.Domain.name,
+ ),
)
canary_inserted = False
# Walk through all domains and insert the canary
- async for domain in domains:
+ async for name in names:
# If we have not inserted the canary, yet, we will do
# it whenever it alphabetically fits
- if not canary_inserted and domain > self.canary:
+ if not canary_inserted and name > self.canary:
yield self.canary
canary_inserted = True
# Add the domain, too
- yield domain
+ yield name
# If we have added all domains but not the canary, we will add the canary anyways
if not canary_inserted:
await self.update_stats()
async def update_stats(self):
- stmt = (
+ # Store the number of total domains
+ self.total_domains = await self.backend.db.fetch_one(
sqlmodel
.select(
- sqlmodel.func.count(
- self.__domains.c.name,
- ),
+ sqlmodel.func.count(),
)
- )
+ .where(
+ domains.Domain.list == self,
- # Store the number of total domains
- self.total_domains = await self.backend.db.fetch_one(stmt)
+ # Select only domains from this list
+ domains.Domain.list == self,
+
+ # Only select domains that should be blocked
+ domains.Domain.block == True,
+
+ # Ignore domains that have been removed
+ domains.Domain.removed_at == None,
+
+ # Only count listed domains
+ domains.Domain.listed == True,
+ )
+ )
# Store the number of subsumed domains
self.subsumed_domains = await self.backend.db.fetch_one(
"""
log.info("Optimizing %s..." % self)
- # Fetch all domains on this list
- names = await self.backend.db.fetch_as_set(
+ # Fetch all whitelisted domains
+ whitelisted = await self.backend.db.fetch_as_set(
sqlmodel
.select(
- domains.Domain.name
+ domains.Domain.name,
)
.distinct()
.where(
+ # Select only domains from this list
domains.Domain.list == self,
+
+ # Only select domains that should not be blocked
+ domains.Domain.block == False,
+
+ # Ignore domains that have been removed
domains.Domain.removed_at == None,
+ ),
+ )
+
+ # Fetch all potentially blocked domains
+ names = await self.backend.db.fetch_as_set(
+ sqlmodel
+ .select(
+ domains.Domain.name,
)
+ .where(
+ # Select only domains from this list
+ domains.Domain.list == self,
+
+ # Only select domains that should be blocked
+ domains.Domain.block == True,
+
+ # Ignore domains that have been removed
+ domains.Domain.removed_at == None,
+
+ # Only select domains that are not dead
+ # or have not been checked, yet.
+ sqlmodel.or_(
+ domains.Domain.dead == None,
+ domains.Domain.dead == False,
+ ),
+ ),
)
+ # Collect all names that should actually be listed
+ delisted_names = set()
+
# Collect all names that are redundant
redundant_names = set()
- # Walk through all domains
+ # List everything that isn't whitelisted
for name in names:
- parent = name
+ if util.is_name_in(name, whitelisted):
+ delisted_names.add(name)
- # Check if any parent domain is also listed
- while "." in parent:
- *garbage, parent = parent.partition(".")
-
- # If the domain is already listed, we ignore it
- if parent in names:
+ # Walk through all domains
+ for name in names:
+ if util.is_parent_in(name, names):
redundant_names.add(name)
- break
-
- log.info(_("Identified %s redunduant domain(s)") % len(redundant_names))
# Reset the status for all domains
await self.backend.db.execute(
)
.values(
subsumed = False,
+ listed = True,
)
.where(
domains.Domain.list == self,
domains.Domain.removed_at == None,
- domains.Domain.subsumed == True,
+ sqlmodel.or_(
+ domains.Domain.subsumed == True,
+ domains.Domain.listed == False,
+ ),
)
)
- # De-list the redundant domains
+ # Mark any redundant domains
for batch in itertools.batched(redundant_names, 1000):
await self.backend.db.execute(
sqlmodel
)
.values(
subsumed = True,
+ listed = False,
)
.where(
domains.Domain.list == self,
)
)
+ # De-list anything that has been whitelisted
+ for batch in itertools.batched(delisted_names, 1000):
+ await self.backend.db.execute(
+ sqlmodel
+ .update(
+ domains.Domain,
+ )
+ .values(
+ listed = False,
+ )
+ .where(
+ domains.Domain.list == self,
+ domains.Domain.removed_at == None,
+ domains.Domain.name.in_(
+ batch,
+ )
+ )
+ )
+
+ log.info(_("Identified %s redunduant domain(s)") % len(redundant_names))
+ log.info(_("De-listed %s domain(s)") % len(delisted_names))
+
# Update all stats afterwards
if update_stats:
await self.update_stats()