]> git.ipfire.org Git - dbl.git/commitdiff
checker: Be more efficient when fetching domains
authorMichael Tremer <michael.tremer@ipfire.org>
Thu, 8 Jan 2026 11:05:46 +0000 (11:05 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Thu, 8 Jan 2026 11:05:46 +0000 (11:05 +0000)
We used to fetch domains that need to be checked in batches, but that
causes a lot of database load and does not keep the resolver busy.

This patch changes this so that we once fetch a large iterator from the
database which we will iteratively feed into the pool.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/dnsbl/checker.py

index fb2e475fc81ac17e313b4551d24dcd0f2da62c94..927796dfd22930e31d6052261eb1bda5f8203902 100644 (file)
@@ -40,35 +40,34 @@ class Checker(object):
                self.backend = backend
 
                # Initialize the executor
-               self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=64)
+               self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1024)
 
                # Initialize the resolver
                self.resolver = dns.resolver.Resolver()
 
                self.results = {}
 
-       def check(self, *domains, batch_size=1024):
+       def check(self, *domains, batch_size=10240):
                """
                        Checks all domains that need checking.
                """
-               threshold = 64
+               threshold = 1024
 
-               with self.executor:
-                       # Submit any passed domains
-                       for domain in domains:
-                               self.submit(domain)
+               # If no domains have been passed, fetch some from the database
+               if not domains:
+                       domains = self.get_domains()
 
+               with self.executor:
                        # Main Loop
                        while True:
                                # Submit some more domains for checking unless we have been given
                                # some by the caller.
-                               if not domains:
-                                       if len(self.results) < threshold:
-                                               for domain in self.get_domains(batch_size):
-                                                       self.submit(domain)
+                               for domain in domains:
+                                       self.submit(domain)
 
-                                               # Manually commit after a batch has been processed
-                                               self.backend.db.commit()
+                                       # Break when we have submitted enough domains
+                                       if len(self.results) >= threshold:
+                                               break
 
                                # Terminate if we have no domains left to check
                                if not self.results:
@@ -82,6 +81,9 @@ class Checker(object):
                                except TimeoutError:
                                        pass
 
+                               # Manually commit after a batch has been processed
+                               self.backend.db.commit()
+
                # Update all stats after we checked all domains
                for source in self.backend.sources:
                        source.update_stats()