Fix some performance issues in GIN query startup.

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c

index 02decb0adb93447b678af6aaeb74064cacc1a203..d23100be1072b37dd4410f234090919769359b42 100644 (file)
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -557,16 +557,18 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
                 qsort_arg(entryIndexes, key->nentries, sizeof(int),
                                   entryIndexByFrequencyCmp, key);
  
+               for (i = 1; i < key->nentries; i++)
+                       key->entryRes[entryIndexes[i]] = GIN_MAYBE;
                 for (i = 0; i < key->nentries - 1; i++)
                 {
                         /* Pass all entries <= i as FALSE, and the rest as MAYBE */
-                       for (j = 0; j <= i; j++)
-                               key->entryRes[entryIndexes[j]] = GIN_FALSE;
-                       for (j = i + 1; j < key->nentries; j++)
-                               key->entryRes[entryIndexes[j]] = GIN_MAYBE;
+                       key->entryRes[entryIndexes[i]] = GIN_FALSE;
  
                         if (key->triConsistentFn(key) == GIN_FALSE)
                                 break;
+
+                       /* Make this loop interruptible in case there are many keys */
+                       CHECK_FOR_INTERRUPTS();
                 }
                 /* i is now the last required entry. */
  
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c

index 55e2d49fd72244f826d5c14e2055ba4f37b8dabe..35706937f87e591c3d2baf5325fed544f187be2a 100644 (file)
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -68,8 +68,13 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
          *
          * Entries with non-null extra_data are never considered identical, since
          * we can't know exactly what the opclass might be doing with that.
+        *
+        * Also, give up de-duplication once we have 100 entries.  That avoids
+        * spending O(N^2) time on probably-fruitless de-duplication of large
+        * search-key sets.  The threshold of 100 is arbitrary but matches
+        * predtest.c's threshold for what's a large array.
          */
-       if (extra_data == NULL)
+       if (extra_data == NULL && so->totalentries < 100)
         {
                 for (i = 0; i < so->totalentries; i++)
                 {
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 6 Mar 2025 16:54:27 +0000 (11:54 -0500)
src/backend/access/gin/ginget.c		patch \| blob \| blame \| history
src/backend/access/gin/ginscan.c		patch \| blob \| blame \| history