From: Michael Tremer Date: Wed, 7 Jun 2023 10:27:02 +0000 (+0000) Subject: packages: Refactor search X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5f08ee3673abe365fa1ea80a7219e2b3bbb7687f;p=pbs.git packages: Refactor search This patch drops the materialized view as it becomes painfully slow to update. Instead we store the search data as an auto-generated row and put a GIN index over it. This should be sufficiently fast even with a large number of packages in the database. Signed-off-by: Michael Tremer --- diff --git a/src/buildservice/packages.py b/src/buildservice/packages.py index 390030b9..21fae5b3 100644 --- a/src/buildservice/packages.py +++ b/src/buildservice/packages.py @@ -34,6 +34,53 @@ class Packages(base.Object): for row in res: yield Package(self.backend, row.id, data=row) + def init(self): + self.db.execute(""" + CREATE TEMPORARY VIEW package_search_index AS ( + -- Source packages + SELECT + packages.id AS package_id, + packages._search AS document + FROM + builds + LEFT JOIN + packages ON builds.pkg_id = packages.id + WHERE + builds.deleted_at IS NULL + AND + builds.test IS FALSE + AND + packages.deleted_at IS NULL + + UNION + + -- Binary Packages + SELECT + source_packages.id AS package_id, + packages._search AS document + FROM + builds + LEFT JOIN + jobs ON builds.id = jobs.build_id + LEFT JOIN + job_packages ON jobs.id = job_packages.job_id + LEFT JOIN + packages ON job_packages.pkg_id = packages.id + LEFT JOIN + packages source_packages ON builds.pkg_id = source_packages.id + WHERE + builds.deleted_at IS NULL + AND + builds.test IS FALSE + AND + jobs.deleted_at IS NULL + AND + packages.deleted_at IS NULL + AND + source_packages.deleted_at IS NULL + ) + """) + def get_list(self): """ Returns a list with all package names and the summary line @@ -177,9 +224,6 @@ class Packages(base.Object): # Import package data await pkg._import_archive(archive) - # Refresh the search index - self.refresh_search_index() - return pkg def search(self, q, limit=None): diff --git a/src/database.sql b/src/database.sql index 87164139..92976bff 100644 --- a/src/database.sql +++ b/src/database.sql @@ -292,7 +292,8 @@ CREATE TABLE public.packages ( digest bytea NOT NULL, deleted_at timestamp without time zone, deleted_by integer, - distro_id integer NOT NULL + distro_id integer NOT NULL, + _search tsvector GENERATED ALWAYS AS (((setweight(to_tsvector('simple'::regconfig, name), 'A'::"char") || setweight(to_tsvector('english'::regconfig, summary), 'B'::"char")) || setweight(to_tsvector('english'::regconfig, description), 'C'::"char"))) STORED ); @@ -684,36 +685,6 @@ CREATE TABLE public.package_files ( ); --- --- Name: package_search_index_generator; Type: VIEW; Schema: public; Owner: - --- - -CREATE VIEW public.package_search_index_generator AS - WITH p AS ( - SELECT DISTINCT ON (packages.name) packages.id, - packages.name, - packages.summary, - packages.description - FROM public.packages - WHERE ((packages.deleted_at IS NULL) AND (packages.arch = 'src'::text)) - ORDER BY packages.name, packages.created_at DESC - ) - SELECT p.id AS package_id, - ((setweight(to_tsvector('simple'::regconfig, p.name), 'A'::"char") || setweight(to_tsvector('english'::regconfig, p.summary), 'B'::"char")) || setweight(to_tsvector('english'::regconfig, p.description), 'C'::"char")) AS document - FROM p; - - --- --- Name: package_search_index; Type: MATERIALIZED VIEW; Schema: public; Owner: - --- - -CREATE MATERIALIZED VIEW public.package_search_index AS - SELECT package_search_index_generator.package_id, - package_search_index_generator.document - FROM public.package_search_index_generator - WITH NO DATA; - - -- -- Name: packages_id_seq; Type: SEQUENCE; Schema: public; Owner: - -- @@ -1571,6 +1542,13 @@ CREATE INDEX builds_deleted ON public.builds USING btree (deleted_at) WHERE (del CREATE INDEX builds_deprecating_build_id ON public.builds USING btree (deprecating_build_id) WHERE ((deleted_at IS NULL) AND (deprecated_at IS NOT NULL)); +-- +-- Name: builds_not_tests; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX builds_not_tests ON public.builds USING btree (id) WHERE ((test IS FALSE) AND (deleted_at IS NULL)); + + -- -- Name: builds_pkg_id; Type: INDEX; Schema: public; Owner: - -- @@ -1705,17 +1683,17 @@ CREATE INDEX package_files_pkg_id ON public.package_files USING btree (pkg_id); -- --- Name: package_search_index_unique; Type: INDEX; Schema: public; Owner: - +-- Name: packages_name; Type: INDEX; Schema: public; Owner: - -- -CREATE UNIQUE INDEX package_search_index_unique ON public.package_search_index USING btree (package_id); +CREATE INDEX packages_name ON public.packages USING btree (name); -- --- Name: packages_name; Type: INDEX; Schema: public; Owner: - +-- Name: packages_search; Type: INDEX; Schema: public; Owner: - -- -CREATE INDEX packages_name ON public.packages USING btree (name); +CREATE INDEX packages_search ON public.packages USING gin (_search) WHERE (deleted_at IS NULL); --