-- PostgreSQL database dump
--
-\restrict 6nJVr9P5tK3JEHqsf1GqOxn2ArIH0TeeAQGlXEqO2KxOZaX0g6qxzMEPFvsyQVJ
+\restrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr
-- Dumped from database version 17.6 (Debian 17.6-0+deb13u1)
-- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1)
SET default_table_access_method = heap;
+--
+-- Name: checker_domains; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.checker_domains (
+ name text NOT NULL,
+ checked_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
+ status boolean NOT NULL
+);
+
+
--
-- Name: lists; Type: TABLE; Schema: public; Owner: -
--
ALTER TABLE ONLY public.sources ALTER COLUMN id SET DEFAULT nextval('public.sources_id_seq'::regclass);
+--
+-- Name: checker_domains checker_domains_pkey; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.checker_domains
+ ADD CONSTRAINT checker_domains_pkey PRIMARY KEY (name);
+
+
--
-- Name: lists lists_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
-- PostgreSQL database dump complete
--
-\unrestrict 6nJVr9P5tK3JEHqsf1GqOxn2ArIH0TeeAQGlXEqO2KxOZaX0g6qxzMEPFvsyQVJ
+\unrestrict Xf18Fi8Ow9mJBKm7A0YHydhg05f2dQMA1HMDnTkGw1D1d51sv6ya9FmQD2QjJyr
--- /dev/null
+###############################################################################
+# #
+# dnsbl - A DNS Blocklist Compositor For IPFire #
+# Copyright (C) 2025 IPFire Development Team #
+# #
+# This program is free software: you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License as published by #
+# the Free Software Foundation, either version 3 of the License, or #
+# (at your option) any later version. #
+# #
+# This program is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
+# GNU General Public License for more details. #
+# #
+# You should have received a copy of the GNU General Public License #
+# along with this program. If not, see <http://www.gnu.org/licenses/>. #
+# #
+###############################################################################
+
+import concurrent.futures
+import datetime
+import dns.rdatatype
+import dns.resolver
+import logging
+import sqlalchemy.dialects.postgresql
+import sqlmodel
+
+from . import database
+from . import sources
+
+# Setup logging
+log = logging.getLogger(__name__)
+
+class Checker(object):
+ """
+ The checker checks if a domain is still alive, i.e. resolves.
+ """
+ def __init__(self, backend):
+ self.backend = backend
+
+ # Initialize the executor
+ self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=64)
+
+ # Initialize the resolver
+ self.resolver = dns.resolver.Resolver()
+
+ self.results = {}
+
+ def check(self, batch_size=1024):
+ """
+ Checks all domains that need checking.
+ """
+ threshold = 64
+
+ with self.executor:
+ while True:
+ # Submit some more tasks whenever we run low
+ if len(self.results) < threshold:
+ for domain in self.get_domains(batch_size):
+ self.submit(domain)
+
+ # Manually commit after a batch has been processed
+ self.backend.db.commit()
+
+ # Terminate if we have no domains left to check
+ if not self.results:
+ break
+
+ try:
+ for result in concurrent.futures.as_completed(self.results, timeout=1):
+ self._store(result)
+
+ # If nothing has completed, we just start a new iteration
+ except TimeoutError:
+ pass
+
+ def get_domains(self, limit=None):
+ """
+ Returns all domains that need checking
+ """
+ cutoff = datetime.datetime.now() - datetime.timedelta(weeks=4)
+
+ stmt = (
+ sqlmodel
+ .select(
+ sources.SourceDomain.name,
+ )
+ .join(
+ CheckerDomain,
+ sources.SourceDomain.name == CheckerDomain.name,
+ isouter=True,
+ )
+ .where(
+ sources.SourceDomain.removed_at == None,
+
+ # Only return domains that have not been checked or where the last check
+ # was at least 4 weeks ago
+ sqlmodel.or_(
+ CheckerDomain.checked_at == None,
+ CheckerDomain.checked_at <= cutoff,
+ ),
+ )
+ .order_by(
+ sqlmodel.nullsfirst(CheckerDomain.checked_at),
+ sources.SourceDomain.name,
+ )
+ )
+
+ # Apply the limit (if any)
+ if limit:
+ stmt = stmt.limit(limit)
+
+ return self.backend.db.fetch(stmt)
+
+ def submit(self, domain, hostname=None):
+ """
+ Submits a new job to the queue
+ """
+ if hostname is None:
+ hostname = domain
+
+ result = self.executor.submit(self.resolve, hostname)
+
+ self.results[result] = domain
+
+ def resolve(self, domain):
+ log.debug("Resolving %s..." % domain)
+
+ # Fetch the result
+ return self.resolver.resolve(domain, "SOA", search=False, lifetime=60)
+
+ def _store(self, result):
+ """
+ Called after we have received a result for the queried domain
+ """
+ # Fetch the domain name
+ domain = self.results.pop(result)
+
+ # Fetch the result or raise any exceptions
+ try:
+ result = result.result()
+
+ # The response did not contain an answer to our question
+ except dns.resolver.NoAnswer as e:
+ response = e.response()
+
+ # If we have received a CNAME, we will resolve again
+ if response:
+ for rrset in response.answer:
+ if rrset.rdtype == dns.rdatatype.CNAME:
+ for record in rrset:
+ hostname = record.target.to_text(omit_final_dot=True)
+
+ return self.submit(domain, hostname=hostname)
+
+ # If there has been no response, we assume that the domain does not exist
+ status = False
+
+ # NXDOMAIN
+ except dns.resolver.NXDOMAIN as e:
+ status = False
+
+ # SERVFAIL
+ except dns.resolver.NoNameservers as e:
+ status = False
+
+ # Raise any other exception
+ except Exception as e:
+ raise e
+
+ # There has been no exception, the query returned some data
+ else:
+ status = True
+
+ log.debug("Storing result for %s..." % domain)
+
+ stmt = (
+ sqlalchemy.dialects.postgresql
+ .insert(
+ CheckerDomain,
+ )
+ .values({
+ "name" : domain,
+ "status" : status,
+ })
+ .on_conflict_do_update(
+ index_elements = [
+ CheckerDomain.name,
+ ],
+ set_ = {
+ "checked_at" : sqlmodel.func.current_timestamp(),
+ }
+ )
+ )
+
+ # Store the result
+ self.backend.db.execute(stmt)
+
+
+class CheckerDomain(sqlmodel.SQLModel, database.BackendMixin, table=True):
+ __tablename__ = "checker_domains"
+
+ # Name
+ name: str = sqlmodel.Field(primary_key=True)
+
+ # Checked At
+ checked_at : datetime.datetime = sqlmodel.Field(
+ sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")}
+ )
+
+ # Status
+ status : bool