From: Michael Tremer Date: Tue, 30 Dec 2025 17:29:40 +0000 (+0000) Subject: domains: Create a unified table X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=66d3fc3c4d0a05d91e767e5fd78042d8f6a4a516;p=dbl.git domains: Create a unified table Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 7ab311d..ce0ecd2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -54,6 +54,7 @@ dist_pkgpython_PYTHON = \ src/dnsbl/auth.py \ src/dnsbl/checker.py \ src/dnsbl/database.py \ + src/dnsbl/domains.py \ src/dnsbl/exporters.py \ src/dnsbl/i18n.py \ src/dnsbl/lists.py \ diff --git a/src/database.sql b/src/database.sql index 7db7a75..599ab9b 100644 --- a/src/database.sql +++ b/src/database.sql @@ -2,7 +2,7 @@ -- PostgreSQL database dump -- -\restrict yRgVASc7gN0f8tKEAi5kP5eoBt9MCpuKcneTEiATwCoxABuwGL5zUJdhirU6taT +\restrict 9n7trKLmkNUVbvGuw4jSXAOGEnNoIEkdn1vooVDVC0KJpHkgAgdEvezLD9YkXew -- Dumped from database version 17.6 (Debian 17.6-0+deb13u1) -- Dumped by pg_dump version 17.6 (Debian 17.6-0+deb13u1) @@ -69,6 +69,43 @@ CREATE TABLE public.checker_domains ( ); +-- +-- Name: domains; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.domains ( + id integer NOT NULL, + name text NOT NULL, + source_id integer, + added_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + added_by text, + removed_at timestamp with time zone, + removed_by text, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + report_id uuid +); + + +-- +-- Name: domains_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.domains_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: domains_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.domains_id_seq OWNED BY public.domains.id; + + -- -- Name: lists; Type: TABLE; Schema: public; Owner: - -- @@ -158,40 +195,6 @@ CREATE TABLE public.reports ( ); --- --- Name: source_domains; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.source_domains ( - id integer NOT NULL, - source_id integer NOT NULL, - name text NOT NULL, - added_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - removed_at timestamp with time zone, - updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - --- --- Name: source_domains_id_seq; Type: SEQUENCE; Schema: public; Owner: - --- - -CREATE SEQUENCE public.source_domains_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - --- --- Name: source_domains_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - --- - -ALTER SEQUENCE public.source_domains_id_seq OWNED BY public.source_domains.id; - - -- -- Name: sources; Type: TABLE; Schema: public; Owner: - -- @@ -242,24 +245,24 @@ ALTER TABLE ONLY public.api_keys ALTER COLUMN id SET DEFAULT nextval('public.api -- --- Name: lists id; Type: DEFAULT; Schema: public; Owner: - +-- Name: domains id; Type: DEFAULT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.lists ALTER COLUMN id SET DEFAULT nextval('public.lists_id_seq'::regclass); +ALTER TABLE ONLY public.domains ALTER COLUMN id SET DEFAULT nextval('public.domains_id_seq'::regclass); -- --- Name: nameservers id; Type: DEFAULT; Schema: public; Owner: - +-- Name: lists id; Type: DEFAULT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.nameservers ALTER COLUMN id SET DEFAULT nextval('public.nameservers_id_seq'::regclass); +ALTER TABLE ONLY public.lists ALTER COLUMN id SET DEFAULT nextval('public.lists_id_seq'::regclass); -- --- Name: source_domains id; Type: DEFAULT; Schema: public; Owner: - +-- Name: nameservers id; Type: DEFAULT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.source_domains ALTER COLUMN id SET DEFAULT nextval('public.source_domains_id_seq'::regclass); +ALTER TABLE ONLY public.nameservers ALTER COLUMN id SET DEFAULT nextval('public.nameservers_id_seq'::regclass); -- @@ -285,6 +288,14 @@ ALTER TABLE ONLY public.checker_domains ADD CONSTRAINT checker_domains_pkey PRIMARY KEY (name); +-- +-- Name: domains domains_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.domains + ADD CONSTRAINT domains_pkey PRIMARY KEY (id); + + -- -- Name: lists lists_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -309,14 +320,6 @@ ALTER TABLE ONLY public.reports ADD CONSTRAINT reports_pkey PRIMARY KEY (id); --- --- Name: source_domains source_domains_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.source_domains - ADD CONSTRAINT source_domains_pkey PRIMARY KEY (id); - - -- -- Name: sources sources_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -333,31 +336,38 @@ CREATE INDEX api_keys_prefix ON public.api_keys USING btree (prefix) WHERE (dele -- --- Name: lists_unique; Type: INDEX; Schema: public; Owner: - +-- Name: domains_search; Type: INDEX; Schema: public; Owner: - -- -CREATE UNIQUE INDEX lists_unique ON public.lists USING btree (slug) WHERE (deleted_at IS NULL); +CREATE INDEX domains_search ON public.domains USING btree (name) WHERE (removed_at IS NULL); -- --- Name: reports_open; Type: INDEX; Schema: public; Owner: - +-- Name: domains_sources; Type: INDEX; Schema: public; Owner: - -- -CREATE INDEX reports_open ON public.reports USING btree (name) WHERE (closed_at IS NULL); +CREATE UNIQUE INDEX domains_sources ON public.domains USING btree (source_id, name) WHERE (removed_at IS NULL); + + +-- +-- Name: domains_updated_at; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX domains_updated_at ON public.domains USING btree (source_id, updated_at) WHERE (removed_at IS NULL); -- --- Name: source_domains_unique; Type: INDEX; Schema: public; Owner: - +-- Name: lists_unique; Type: INDEX; Schema: public; Owner: - -- -CREATE UNIQUE INDEX source_domains_unique ON public.source_domains USING btree (source_id, name) WHERE (removed_at IS NULL); +CREATE UNIQUE INDEX lists_unique ON public.lists USING btree (slug) WHERE (deleted_at IS NULL); -- --- Name: source_domains_updated_at; Type: INDEX; Schema: public; Owner: - +-- Name: reports_open; Type: INDEX; Schema: public; Owner: - -- -CREATE INDEX source_domains_updated_at ON public.source_domains USING btree (source_id, updated_at) WHERE (removed_at IS NULL); +CREATE INDEX reports_open ON public.reports USING btree (name) WHERE (closed_at IS NULL); -- @@ -368,19 +378,27 @@ CREATE UNIQUE INDEX sources_unique ON public.sources USING btree (list_id, url) -- --- Name: reports reports_list_id; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: domains domains_report_id; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.reports - ADD CONSTRAINT reports_list_id FOREIGN KEY (list_id) REFERENCES public.lists(id); +ALTER TABLE ONLY public.domains + ADD CONSTRAINT domains_report_id FOREIGN KEY (report_id) REFERENCES public.reports(id); -- --- Name: source_domains source_domains_source_id; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: domains domains_source_id; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.source_domains - ADD CONSTRAINT source_domains_source_id FOREIGN KEY (source_id) REFERENCES public.sources(id); +ALTER TABLE ONLY public.domains + ADD CONSTRAINT domains_source_id FOREIGN KEY (source_id) REFERENCES public.sources(id); + + +-- +-- Name: reports reports_list_id; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.reports + ADD CONSTRAINT reports_list_id FOREIGN KEY (list_id) REFERENCES public.lists(id); -- @@ -395,5 +413,5 @@ ALTER TABLE ONLY public.sources -- PostgreSQL database dump complete -- -\unrestrict yRgVASc7gN0f8tKEAi5kP5eoBt9MCpuKcneTEiATwCoxABuwGL5zUJdhirU6taT +\unrestrict 9n7trKLmkNUVbvGuw4jSXAOGEnNoIEkdn1vooVDVC0KJpHkgAgdEvezLD9YkXew diff --git a/src/dnsbl/__init__.py b/src/dnsbl/__init__.py index 4e218a2..af1f72f 100644 --- a/src/dnsbl/__init__.py +++ b/src/dnsbl/__init__.py @@ -34,6 +34,7 @@ log = logging.getLogger(__name__) # Import sub-modules from . import auth from . import database +from . import domains from . import lists from . import reports from . import sources @@ -107,20 +108,22 @@ class Backend(object): stmt = ( sqlmodel .select( - sources.SourceDomain, + domains.Domain, ) .join( - sources.Source, sources.SourceDomain.source_id == sources.Source.id, + sources.Source, + domains.Domain.source_id == sources.Source.id, ) .join( - lists.List, sources.Source.list_id == lists.List.id, + lists.List, + sources.Source.list_id == lists.List.id, ) .where( sqlmodel.or_( - sources.SourceDomain.name == name, - sqlmodel.literal(name).like("%." + sources.SourceDomain.name), + domains.Domain.name == name, + sqlmodel.literal(name).like("%." + domains.Domain.name), ), - sources.SourceDomain.removed_at == None, + domains.Domain.removed_at == None, sources.Source.deleted_at == None, lists.List.deleted_at == None, ) diff --git a/src/dnsbl/checker.py b/src/dnsbl/checker.py index 856d728..c5d1e18 100644 --- a/src/dnsbl/checker.py +++ b/src/dnsbl/checker.py @@ -27,7 +27,7 @@ import sqlalchemy.dialects.postgresql import sqlmodel from . import database -from . import sources +from . import domains # Setup logging log = logging.getLogger(__name__) @@ -88,15 +88,15 @@ class Checker(object): stmt = ( sqlmodel .select( - sources.SourceDomain.name, + domains.Domain.name, ) .join( CheckerDomain, - sources.SourceDomain.name == CheckerDomain.name, + domains.Domain.name == CheckerDomain.name, isouter=True, ) .where( - sources.SourceDomain.removed_at == None, + domains.Domain.removed_at == None, # Only return domains that have not been checked or where the last check # was at least 4 weeks ago @@ -107,7 +107,7 @@ class Checker(object): ) .order_by( sqlmodel.nullsfirst(CheckerDomain.checked_at), - sources.SourceDomain.name, + domains.Domain.name, ) ) diff --git a/src/dnsbl/domains.py b/src/dnsbl/domains.py new file mode 100644 index 0000000..5feeb3b --- /dev/null +++ b/src/dnsbl/domains.py @@ -0,0 +1,70 @@ +############################################################################### +# # +# dnsbl - A DNS Blocklist Compositor For IPFire # +# Copyright (C) 2025 IPFire Development Team # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +import datetime +import sqlmodel +import uuid + +from . import database + +class Domain(sqlmodel.SQLModel, database.BackendMixin, table=True): + __tablename__ = "domains" + + def __str__(self): + return self.name + + # ID + id: int = sqlmodel.Field(primary_key=True) + + # Name + name: str + + # Source ID + source_id: int = sqlmodel.Field(foreign_key="sources.id") + + # Source + source: "Source" = sqlmodel.Relationship(back_populates="domains") + + # Added At + added_at: datetime.datetime = sqlmodel.Field( + sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")} + ) + + # Added By + added_by: str | None = None + + # Removed At + removed_at: datetime.datetime | None + + # Removed By + removed_by: str | None = None + + # Updated At + updated_at: datetime.datetime = sqlmodel.Field( + sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")} + ) + + # Report ID + + report_id: uuid.UUID | None = sqlmodel.Field(foreign_key="reports.id", default=None) + + # Report + + report: "Report" = sqlmodel.Relationship() diff --git a/src/dnsbl/lists.py b/src/dnsbl/lists.py index 99e7a8b..8071c31 100644 --- a/src/dnsbl/lists.py +++ b/src/dnsbl/lists.py @@ -27,6 +27,7 @@ import typing from . import checker from . import database +from . import domains from . import exporters from . import reports from . import sources @@ -200,21 +201,21 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): cte = ( sqlmodel .select( - sources.SourceDomain.name.label("name"), + domains.Domain.name.label("name"), ) .distinct( - sources.SourceDomain.name, + domains.Domain.name, ) .select_from( sources.Source, ) .join( - sources.SourceDomain, - sources.SourceDomain.source_id == sources.Source.id, + domains.Domain, + domains.Domain.source_id == sources.Source.id, ) .join( checker.CheckerDomain, - checker.CheckerDomain.name == sources.SourceDomain.name, + checker.CheckerDomain.name == domains.Domain.name, isouter=True, ) .where( @@ -225,7 +226,7 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): sources.Source.deleted_at == None, # Ignore domains that have been removed - sources.SourceDomain.removed_at == None, + domains.Domain.removed_at == None, # Only select domains that have been checked positive # or have not been checked, yet. @@ -235,7 +236,7 @@ class List(sqlmodel.SQLModel, database.BackendMixin, table=True): ), ) .order_by( - sources.SourceDomain.name, + domains.Domain.name, ) .cte("domains") ) diff --git a/src/dnsbl/sources.py b/src/dnsbl/sources.py index 3e59391..43bea5f 100644 --- a/src/dnsbl/sources.py +++ b/src/dnsbl/sources.py @@ -32,6 +32,7 @@ import sqlmodel from . import checker from . import database +from . import domains from . import util from .i18n import _ @@ -159,7 +160,7 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): updated_at : datetime.datetime | None # Domains - domains : "SourceDomain" = sqlmodel.Relationship(back_populates="source") + domains : "Domain" = sqlmodel.Relationship(back_populates="source") # Delete! @@ -411,32 +412,32 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): return domain - def add_domains(self, domains): + def add_domains(self, _domains): """ Adds or updates a domain. """ # Create a generator to format the values - domains = ( + _domains = ( { "source_id" : self.id, "name" : domain, } - for domain in domains + for domain in _domains ) # Submit domains in batches of 1000 values - for values in itertools.batched(domains, 1000): + for values in itertools.batched(_domains, 1000): stmt = ( sqlalchemy.dialects.postgresql .insert( - SourceDomain, + domains.Domain, ) .values(values) .on_conflict_do_update( index_elements = [ - SourceDomain.source_id, SourceDomain.name, + domains.Domain.source_id, domains.Domain.name, ], - index_where = SourceDomain.removed_at == None, + index_where = domains.Domain.removed_at == None, set_ = { "updated_at" : sqlmodel.func.current_timestamp(), } @@ -455,15 +456,15 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): stmt = ( sqlmodel .update( - SourceDomain, + domains.Domain, ) .values({ "removed_at" : sqlmodel.func.current_timestamp(), }) .where( - SourceDomain.source == self, - SourceDomain.updated_at < sqlmodel.func.current_timestamp(), - SourceDomain.removed_at == None, + domains.Domain.source == self, + domains.Domain.updated_at < sqlmodel.func.current_timestamp(), + domains.Domain.removed_at == None, ) ) self.backend.db.execute(stmt) @@ -479,8 +480,8 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): if source == self: continue - domains_self = sqlalchemy.orm.aliased(SourceDomain) - domains_other = sqlalchemy.orm.aliased(SourceDomain) + domains_self = sqlalchemy.orm.aliased(domains.Domain) + domains_other = sqlalchemy.orm.aliased(domains.Domain) stmt = ( sqlmodel @@ -526,11 +527,11 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): sqlmodel.func.count(), ) .select_from( - SourceDomain, + domains.Domain, ) .where( - SourceDomain.source == self, - SourceDomain.removed_at == None, + domains.Domain.source == self, + domains.Domain.removed_at == None, ) ) @@ -543,15 +544,15 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): sqlmodel.func.count(), ) .select_from( - SourceDomain, + domains.Domain, ) .join( checker.CheckerDomain, - checker.CheckerDomain.name == SourceDomain.name, + checker.CheckerDomain.name == domains.Domain.name, ) .where( - SourceDomain.source == self, - SourceDomain.removed_at == None, + domains.Domain.source == self, + domains.Domain.removed_at == None, # Only check dead domains checker.CheckerDomain.status == False, @@ -560,35 +561,3 @@ class Source(sqlmodel.SQLModel, database.BackendMixin, table=True): # Store the total number of dead domains self.dead_domains = self.backend.db.fetch_one(stmt) - - -class SourceDomain(sqlmodel.SQLModel, database.BackendMixin, table=True): - __tablename__ = "source_domains" - - def __str__(self): - return self.name - - # ID - id : int = sqlmodel.Field(primary_key=True) - - # Source ID - source_id : int = sqlmodel.Field(foreign_key="sources.id") - - # Source - source : "Source" = sqlmodel.Relationship(back_populates="domains") - - # Name - name : str - - # Added At - added_at : datetime.datetime = sqlmodel.Field( - sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")} - ) - - # Removed At - removed_at : datetime.datetime | None - - # Updated At - updated_at : datetime.datetime = sqlmodel.Field( - sa_column_kwargs = {"server_default" : sqlmodel.text("CURRENT_TIMESTAMP")} - )