Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans

author David Rowley <drowley@postgresql.org>

Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)

committer David Rowley <drowley@postgresql.org>

Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
author David Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
committer David Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c

index 661971e7070b7cf50813d2fd35bc9c080506eafb..bb1fd5d15da1a55a092fdb659178eb62ca28886c 100644 (file)
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -35,19 +35,21 @@
  #include "access/tableam.h"
  #include "access/tupdesc.h"
  #include "access/visibilitymap.h"
+#include "catalog/pg_type.h"
  #include "executor/execdebug.h"
  #include "executor/nodeIndexonlyscan.h"
  #include "executor/nodeIndexscan.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
  #include "storage/predicate.h"
+#include "utils/builtins.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
  
  
  static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
-static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
-                                                       TupleDesc itupdesc);
+static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+                                                       IndexTuple itup, TupleDesc itupdesc);
  
  
  /* ----------------------------------------------------------------
@@ -208,7 +210,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
                         ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
                 }
                 else if (scandesc->xs_itup)
-                       StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
+                       StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
                 else
                         elog(ERROR, "no data returned for index-only scan");
  
@@ -266,7 +268,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
   * right now we don't need it elsewhere.
   */
  static void
-StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
+StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+                               IndexTuple itup, TupleDesc itupdesc)
  {
         /*
          * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
@@ -279,6 +282,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
  
         ExecClearTuple(slot);
         index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
+
+       /*
+        * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
+        * sized allocation.  We mark this branch as unlikely as generally "name"
+        * is used only for the system catalogs and this would have to be a user
+        * query running on those or some other user table with an index on a name
+        * column.
+        */
+       if (unlikely(node->ioss_NameCStringAttNums != NULL))
+       {
+               int                     attcount = node->ioss_NameCStringCount;
+
+               for (int idx = 0; idx < attcount; idx++)
+               {
+                       int                     attnum = node->ioss_NameCStringAttNums[idx];
+                       Name            name;
+
+                       /* skip null Datums */
+                       if (slot->tts_isnull[attnum])
+                               continue;
+
+                       /* allocate the NAMEDATALEN and copy the datum into that memory */
+                       name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
+                                                                                        NAMEDATALEN);
+
+                       /* use namestrcpy to zero-pad all trailing bytes */
+                       namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
+                       slot->tts_values[attnum] = NameGetDatum(name);
+               }
+       }
+
         ExecStoreVirtualTuple(slot);
  }
  
@@ -492,8 +526,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
  {
         IndexOnlyScanState *indexstate;
         Relation        currentRelation;
+       Relation        indexRelation;
         LOCKMODE        lockmode;
         TupleDesc       tupDesc;
+       int                     indnkeyatts;
+       int                     namecount;
  
         /*
          * create state structure
@@ -566,7 +603,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
  
         /* Open the index relation. */
         lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
-       indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode);
+       indexRelation = index_open(node->indexid, lockmode);
+       indexstate->ioss_RelationDesc = indexRelation;
  
         /*
          * Initialize index-specific scan state
@@ -579,7 +617,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
          * build the index scan keys from the index qualification
          */
         ExecIndexBuildScanKeys((PlanState *) indexstate,
-                                                  indexstate->ioss_RelationDesc,
+                                                  indexRelation,
                                                    node->indexqual,
                                                    false,
                                                    &indexstate->ioss_ScanKeys,
@@ -593,7 +631,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
          * any ORDER BY exprs have to be turned into scankeys in the same way
          */
         ExecIndexBuildScanKeys((PlanState *) indexstate,
-                                                  indexstate->ioss_RelationDesc,
+                                                  indexRelation,
                                                    node->indexorderby,
                                                    true,
                                                    &indexstate->ioss_OrderByKeys,
@@ -622,6 +660,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
                 indexstate->ioss_RuntimeContext = NULL;
         }
  
+       indexstate->ioss_NameCStringAttNums = NULL;
+       indnkeyatts = indexRelation->rd_index->indnkeyatts;
+       namecount = 0;
+
+       /*
+        * The "name" type for btree uses text_ops which results in storing
+        * cstrings in the indexed keys rather than names.  Here we detect that in
+        * a generic way in case other index AMs want to do the same optimization.
+        * Check for opclasses with an opcintype of NAMEOID and an index tuple
+        * descriptor with CSTRINGOID.  If any of these are found, create an array
+        * marking the index attribute number of each of them.  StoreIndexTuple()
+        * handles copying the name Datums into a NAMEDATALEN-byte allocation.
+        */
+
+       /* First, count the number of such index keys */
+       for (int attnum = 0; attnum < indnkeyatts; attnum++)
+       {
+               if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+                       indexRelation->rd_opcintype[attnum] == NAMEOID)
+                       namecount++;
+       }
+
+       if (namecount > 0)
+       {
+               int                     idx = 0;
+
+               /*
+                * Now create an array to mark the attribute numbers of the keys that
+                * need to be converted from cstring to name.
+                */
+               indexstate->ioss_NameCStringAttNums = (AttrNumber *)
+                                                                       palloc(sizeof(AttrNumber) * namecount);
+
+               for (int attnum = 0; attnum < indnkeyatts; attnum++)
+               {
+                       if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+                               indexRelation->rd_opcintype[attnum] == NAMEOID)
+                               indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
+               }
+       }
+
+       indexstate->ioss_NameCStringCount = namecount;
+
         /*
          * all done.
          */
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat

index f2342bb328cb7b167111e473b0991cc3a360808f..8e5a259684ed0cd2fb5ba5c269b3f7bfe73c675b 100644 (file)
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -91,8 +91,11 @@
  # Here's an ugly little hack to save space in the system catalog indexes.
  # btree doesn't ordinarily allow a storage type different from input type;
  # but cstring and name are the same thing except for trailing padding,
-# and we can safely omit that within an index entry.  So we declare the
-# btree opclass for name as using cstring storage type.
+# so we choose to omit that within an index entry.  Here we declare the
+# btree opclass for name as using cstring storage type.  This does require
+# that we pad the cstring out with the full NAMEDATALEN bytes when performing
+# index-only scans.  See corresponding hacks in ExecInitIndexOnlyScan() and
+# StoreIndexTuple().
  { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',
    opcintype => 'name', opckeytype => 'cstring' },
  
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index caf0d41f0bb3b470b543f689c0b044d6d4505010..d05ec67231fd9a4926942729b3813981fc48584c 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1484,6 +1484,8 @@ typedef struct IndexScanState
   *             TableSlot                  slot for holding tuples fetched from the table
   *             VMBuffer                   buffer in use for visibility map testing, if any
   *             PscanLen                   size of parallel index-only scan descriptor
+ *             NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN
+ *             NameCStringCount   number of elements in the NameCStringAttNums array
   * ----------------
   */
  typedef struct IndexOnlyScanState
@@ -1503,6 +1505,8 @@ typedef struct IndexOnlyScanState
         TupleTableSlot *ioss_TableSlot;
         Buffer          ioss_VMBuffer;
         Size            ioss_PscanLen;
+       AttrNumber *ioss_NameCStringAttNums;
+       int                     ioss_NameCStringCount;
  } IndexOnlyScanState;
  
  /* ----------------
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out

index 8e5d53e712acec68479a269c81a3f8dd25d140e3..4aacf355821eeafe216363b295c41d61591d6f57 100644 (file)
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -399,3 +399,28 @@ Indexes:
      "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)
  
  DROP TABLE tbl;
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using nametbl_c1_c2_idx on nametbl
+   Index Cond: ((c2 = 'two'::name) AND (c1 = 1))
+(2 rows)
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+ c2  | c1 | c3 
+-----+----+----
+ two |  1 |  3
+(1 row)
+
+RESET enable_seqscan;
+DROP TABLE nametbl;
diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql

index 7e517483adf49b8e691b660dac294d7a9c8b051f..880f4bbc421bd5b2fbad401020616f993941ab4f 100644 (file)
--- a/src/test/regress/sql/index_including.sql
+++ b/src/test/regress/sql/index_including.sql
@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;
  ALTER TABLE tbl ALTER c3 TYPE bigint;
  \d tbl
  DROP TABLE tbl;
+
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+RESET enable_seqscan;
+
+DROP TABLE nametbl;
+\ No newline at end of file
author	David Rowley <drowley@postgresql.org>
	Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
committer	David Rowley <drowley@postgresql.org>
	Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
src/backend/executor/nodeIndexonlyscan.c		patch \| blob \| blame \| history
src/include/catalog/pg_opclass.dat		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/test/regress/expected/index_including.out		patch \| blob \| blame \| history
src/test/regress/sql/index_including.sql		patch \| blob \| blame \| history