From: Michael Tremer Date: Thu, 8 Jan 2026 11:05:46 +0000 (+0000) Subject: checker: Be more efficient when fetching domains X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f29b6d2537e4054088b133b9834b62b0bdbbff77;p=dbl.git checker: Be more efficient when fetching domains We used to fetch domains that need to be checked in batches, but that causes a lot of database load and does not keep the resolver busy. This patch changes this so that we once fetch a large iterator from the database which we will iteratively feed into the pool. Signed-off-by: Michael Tremer --- diff --git a/src/dnsbl/checker.py b/src/dnsbl/checker.py index fb2e475..927796d 100644 --- a/src/dnsbl/checker.py +++ b/src/dnsbl/checker.py @@ -40,35 +40,34 @@ class Checker(object): self.backend = backend # Initialize the executor - self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=64) + self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1024) # Initialize the resolver self.resolver = dns.resolver.Resolver() self.results = {} - def check(self, *domains, batch_size=1024): + def check(self, *domains, batch_size=10240): """ Checks all domains that need checking. """ - threshold = 64 + threshold = 1024 - with self.executor: - # Submit any passed domains - for domain in domains: - self.submit(domain) + # If no domains have been passed, fetch some from the database + if not domains: + domains = self.get_domains() + with self.executor: # Main Loop while True: # Submit some more domains for checking unless we have been given # some by the caller. - if not domains: - if len(self.results) < threshold: - for domain in self.get_domains(batch_size): - self.submit(domain) + for domain in domains: + self.submit(domain) - # Manually commit after a batch has been processed - self.backend.db.commit() + # Break when we have submitted enough domains + if len(self.results) >= threshold: + break # Terminate if we have no domains left to check if not self.results: @@ -82,6 +81,9 @@ class Checker(object): except TimeoutError: pass + # Manually commit after a batch has been processed + self.backend.db.commit() + # Update all stats after we checked all domains for source in self.backend.sources: source.update_stats()