]> git.ipfire.org Git - pbs.git/commitdiff
packages: Refactor search
authorMichael Tremer <michael.tremer@ipfire.org>
Wed, 7 Jun 2023 10:27:02 +0000 (10:27 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Wed, 7 Jun 2023 10:27:02 +0000 (10:27 +0000)
This patch drops the materialized view as it becomes painfully slow to
update. Instead we store the search data as an auto-generated row and
put a GIN index over it.

This should be sufficiently fast even with a large number of packages in
the database.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/buildservice/packages.py
src/database.sql

index 390030b901a9e20a3c48b2dfbb45a4c3e3d1beac..21fae5b3ca50fbaf426b6ab301f403405fc9e61c 100644 (file)
@@ -34,6 +34,53 @@ class Packages(base.Object):
                for row in res:
                        yield Package(self.backend, row.id, data=row)
 
+       def init(self):
+               self.db.execute("""
+                       CREATE TEMPORARY VIEW package_search_index AS (
+                               -- Source packages
+                               SELECT
+                                       packages.id AS package_id,
+                                       packages._search AS document
+                               FROM
+                                       builds
+                               LEFT JOIN
+                                       packages ON builds.pkg_id = packages.id
+                               WHERE
+                                       builds.deleted_at IS NULL
+                               AND
+                                       builds.test IS FALSE
+                               AND
+                                       packages.deleted_at IS NULL
+
+                               UNION
+
+                               -- Binary Packages
+                               SELECT
+                                       source_packages.id AS package_id,
+                                       packages._search AS document
+                               FROM
+                                       builds
+                               LEFT JOIN
+                                       jobs ON builds.id = jobs.build_id
+                               LEFT JOIN
+                                       job_packages ON jobs.id = job_packages.job_id
+                               LEFT JOIN
+                                       packages ON job_packages.pkg_id = packages.id
+                               LEFT JOIN
+                                       packages source_packages ON builds.pkg_id = source_packages.id
+                               WHERE
+                                       builds.deleted_at IS NULL
+                               AND
+                                       builds.test IS FALSE
+                               AND
+                                       jobs.deleted_at IS NULL
+                               AND
+                                       packages.deleted_at IS NULL
+                               AND
+                                       source_packages.deleted_at IS NULL
+                       )
+               """)
+
        def get_list(self):
                """
                        Returns a list with all package names and the summary line
@@ -177,9 +224,6 @@ class Packages(base.Object):
                # Import package data
                await pkg._import_archive(archive)
 
-               # Refresh the search index
-               self.refresh_search_index()
-
                return pkg
 
        def search(self, q, limit=None):
index 871641394212af7449bb2b989b551959a69d03c0..92976bfff9cd91d245de3fca71cf7accf337a250 100644 (file)
@@ -292,7 +292,8 @@ CREATE TABLE public.packages (
     digest bytea NOT NULL,
     deleted_at timestamp without time zone,
     deleted_by integer,
-    distro_id integer NOT NULL
+    distro_id integer NOT NULL,
+    _search tsvector GENERATED ALWAYS AS (((setweight(to_tsvector('simple'::regconfig, name), 'A'::"char") || setweight(to_tsvector('english'::regconfig, summary), 'B'::"char")) || setweight(to_tsvector('english'::regconfig, description), 'C'::"char"))) STORED
 );
 
 
@@ -684,36 +685,6 @@ CREATE TABLE public.package_files (
 );
 
 
---
--- Name: package_search_index_generator; Type: VIEW; Schema: public; Owner: -
---
-
-CREATE VIEW public.package_search_index_generator AS
- WITH p AS (
-         SELECT DISTINCT ON (packages.name) packages.id,
-            packages.name,
-            packages.summary,
-            packages.description
-           FROM public.packages
-          WHERE ((packages.deleted_at IS NULL) AND (packages.arch = 'src'::text))
-          ORDER BY packages.name, packages.created_at DESC
-        )
- SELECT p.id AS package_id,
-    ((setweight(to_tsvector('simple'::regconfig, p.name), 'A'::"char") || setweight(to_tsvector('english'::regconfig, p.summary), 'B'::"char")) || setweight(to_tsvector('english'::regconfig, p.description), 'C'::"char")) AS document
-   FROM p;
-
-
---
--- Name: package_search_index; Type: MATERIALIZED VIEW; Schema: public; Owner: -
---
-
-CREATE MATERIALIZED VIEW public.package_search_index AS
- SELECT package_search_index_generator.package_id,
-    package_search_index_generator.document
-   FROM public.package_search_index_generator
-  WITH NO DATA;
-
-
 --
 -- Name: packages_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -1571,6 +1542,13 @@ CREATE INDEX builds_deleted ON public.builds USING btree (deleted_at) WHERE (del
 CREATE INDEX builds_deprecating_build_id ON public.builds USING btree (deprecating_build_id) WHERE ((deleted_at IS NULL) AND (deprecated_at IS NOT NULL));
 
 
+--
+-- Name: builds_not_tests; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE UNIQUE INDEX builds_not_tests ON public.builds USING btree (id) WHERE ((test IS FALSE) AND (deleted_at IS NULL));
+
+
 --
 -- Name: builds_pkg_id; Type: INDEX; Schema: public; Owner: -
 --
@@ -1705,17 +1683,17 @@ CREATE INDEX package_files_pkg_id ON public.package_files USING btree (pkg_id);
 
 
 --
--- Name: package_search_index_unique; Type: INDEX; Schema: public; Owner: -
+-- Name: packages_name; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE UNIQUE INDEX package_search_index_unique ON public.package_search_index USING btree (package_id);
+CREATE INDEX packages_name ON public.packages USING btree (name);
 
 
 --
--- Name: packages_name; Type: INDEX; Schema: public; Owner: -
+-- Name: packages_search; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX packages_name ON public.packages USING btree (name);
+CREATE INDEX packages_search ON public.packages USING gin (_search) WHERE (deleted_at IS NULL);
 
 
 --