]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans
authorDavid Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
committerDavid Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:23:05 +0000 (13:23 +1200)
As an optimization, we store "name" columns as cstrings in btree
indexes.

Here we modify it so that Index Only Scans convert these cstrings back
to names with NAMEDATALEN bytes rather than storing the cstring in the
tuple slot, as was happening previously.

Bug: #17855
Reported-by: Alexander Lakhin
Reviewed-by: Alexander Lakhin, Tom Lane
Discussion: https://postgr.es/m/17855-5f523e0f9769a566@postgresql.org
Backpatch-through: 12, all supported versions

src/backend/executor/nodeIndexonlyscan.c
src/include/catalog/pg_opclass.dat
src/include/nodes/execnodes.h
src/test/regress/expected/index_including.out
src/test/regress/sql/index_including.sql

index 661971e7070b7cf50813d2fd35bc9c080506eafb..bb1fd5d15da1a55a092fdb659178eb62ca28886c 100644 (file)
 #include "access/tableam.h"
 #include "access/tupdesc.h"
 #include "access/visibilitymap.h"
+#include "catalog/pg_type.h"
 #include "executor/execdebug.h"
 #include "executor/nodeIndexonlyscan.h"
 #include "executor/nodeIndexscan.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
 
 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
-static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
-                                                       TupleDesc itupdesc);
+static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+                                                       IndexTuple itup, TupleDesc itupdesc);
 
 
 /* ----------------------------------------------------------------
@@ -208,7 +210,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
                        ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
                }
                else if (scandesc->xs_itup)
-                       StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
+                       StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
                else
                        elog(ERROR, "no data returned for index-only scan");
 
@@ -266,7 +268,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
  * right now we don't need it elsewhere.
  */
 static void
-StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
+StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+                               IndexTuple itup, TupleDesc itupdesc)
 {
        /*
         * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
@@ -279,6 +282,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
 
        ExecClearTuple(slot);
        index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
+
+       /*
+        * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
+        * sized allocation.  We mark this branch as unlikely as generally "name"
+        * is used only for the system catalogs and this would have to be a user
+        * query running on those or some other user table with an index on a name
+        * column.
+        */
+       if (unlikely(node->ioss_NameCStringAttNums != NULL))
+       {
+               int                     attcount = node->ioss_NameCStringCount;
+
+               for (int idx = 0; idx < attcount; idx++)
+               {
+                       int                     attnum = node->ioss_NameCStringAttNums[idx];
+                       Name            name;
+
+                       /* skip null Datums */
+                       if (slot->tts_isnull[attnum])
+                               continue;
+
+                       /* allocate the NAMEDATALEN and copy the datum into that memory */
+                       name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
+                                                                                        NAMEDATALEN);
+
+                       /* use namestrcpy to zero-pad all trailing bytes */
+                       namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
+                       slot->tts_values[attnum] = NameGetDatum(name);
+               }
+       }
+
        ExecStoreVirtualTuple(slot);
 }
 
@@ -492,8 +526,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 {
        IndexOnlyScanState *indexstate;
        Relation        currentRelation;
+       Relation        indexRelation;
        LOCKMODE        lockmode;
        TupleDesc       tupDesc;
+       int                     indnkeyatts;
+       int                     namecount;
 
        /*
         * create state structure
@@ -566,7 +603,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 
        /* Open the index relation. */
        lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
-       indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode);
+       indexRelation = index_open(node->indexid, lockmode);
+       indexstate->ioss_RelationDesc = indexRelation;
 
        /*
         * Initialize index-specific scan state
@@ -579,7 +617,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
         * build the index scan keys from the index qualification
         */
        ExecIndexBuildScanKeys((PlanState *) indexstate,
-                                                  indexstate->ioss_RelationDesc,
+                                                  indexRelation,
                                                   node->indexqual,
                                                   false,
                                                   &indexstate->ioss_ScanKeys,
@@ -593,7 +631,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
         * any ORDER BY exprs have to be turned into scankeys in the same way
         */
        ExecIndexBuildScanKeys((PlanState *) indexstate,
-                                                  indexstate->ioss_RelationDesc,
+                                                  indexRelation,
                                                   node->indexorderby,
                                                   true,
                                                   &indexstate->ioss_OrderByKeys,
@@ -622,6 +660,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
                indexstate->ioss_RuntimeContext = NULL;
        }
 
+       indexstate->ioss_NameCStringAttNums = NULL;
+       indnkeyatts = indexRelation->rd_index->indnkeyatts;
+       namecount = 0;
+
+       /*
+        * The "name" type for btree uses text_ops which results in storing
+        * cstrings in the indexed keys rather than names.  Here we detect that in
+        * a generic way in case other index AMs want to do the same optimization.
+        * Check for opclasses with an opcintype of NAMEOID and an index tuple
+        * descriptor with CSTRINGOID.  If any of these are found, create an array
+        * marking the index attribute number of each of them.  StoreIndexTuple()
+        * handles copying the name Datums into a NAMEDATALEN-byte allocation.
+        */
+
+       /* First, count the number of such index keys */
+       for (int attnum = 0; attnum < indnkeyatts; attnum++)
+       {
+               if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+                       indexRelation->rd_opcintype[attnum] == NAMEOID)
+                       namecount++;
+       }
+
+       if (namecount > 0)
+       {
+               int                     idx = 0;
+
+               /*
+                * Now create an array to mark the attribute numbers of the keys that
+                * need to be converted from cstring to name.
+                */
+               indexstate->ioss_NameCStringAttNums = (AttrNumber *)
+                                                                       palloc(sizeof(AttrNumber) * namecount);
+
+               for (int attnum = 0; attnum < indnkeyatts; attnum++)
+               {
+                       if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+                               indexRelation->rd_opcintype[attnum] == NAMEOID)
+                               indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
+               }
+       }
+
+       indexstate->ioss_NameCStringCount = namecount;
+
        /*
         * all done.
         */
index f2342bb328cb7b167111e473b0991cc3a360808f..8e5a259684ed0cd2fb5ba5c269b3f7bfe73c675b 100644 (file)
 # Here's an ugly little hack to save space in the system catalog indexes.
 # btree doesn't ordinarily allow a storage type different from input type;
 # but cstring and name are the same thing except for trailing padding,
-# and we can safely omit that within an index entry.  So we declare the
-# btree opclass for name as using cstring storage type.
+# so we choose to omit that within an index entry.  Here we declare the
+# btree opclass for name as using cstring storage type.  This does require
+# that we pad the cstring out with the full NAMEDATALEN bytes when performing
+# index-only scans.  See corresponding hacks in ExecInitIndexOnlyScan() and
+# StoreIndexTuple().
 { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',
   opcintype => 'name', opckeytype => 'cstring' },
 
index caf0d41f0bb3b470b543f689c0b044d6d4505010..d05ec67231fd9a4926942729b3813981fc48584c 100644 (file)
@@ -1484,6 +1484,8 @@ typedef struct IndexScanState
  *             TableSlot                  slot for holding tuples fetched from the table
  *             VMBuffer                   buffer in use for visibility map testing, if any
  *             PscanLen                   size of parallel index-only scan descriptor
+ *             NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN
+ *             NameCStringCount   number of elements in the NameCStringAttNums array
  * ----------------
  */
 typedef struct IndexOnlyScanState
@@ -1503,6 +1505,8 @@ typedef struct IndexOnlyScanState
        TupleTableSlot *ioss_TableSlot;
        Buffer          ioss_VMBuffer;
        Size            ioss_PscanLen;
+       AttrNumber *ioss_NameCStringAttNums;
+       int                     ioss_NameCStringCount;
 } IndexOnlyScanState;
 
 /* ----------------
index 8e5d53e712acec68479a269c81a3f8dd25d140e3..4aacf355821eeafe216363b295c41d61591d6f57 100644 (file)
@@ -399,3 +399,28 @@ Indexes:
     "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)
 
 DROP TABLE tbl;
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using nametbl_c1_c2_idx on nametbl
+   Index Cond: ((c2 = 'two'::name) AND (c1 = 1))
+(2 rows)
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+ c2  | c1 | c3 
+-----+----+----
+ two |  1 |  3
+(1 row)
+
+RESET enable_seqscan;
+DROP TABLE nametbl;
index 7e517483adf49b8e691b660dac294d7a9c8b051f..880f4bbc421bd5b2fbad401020616f993941ab4f 100644 (file)
@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;
 ALTER TABLE tbl ALTER c3 TYPE bigint;
 \d tbl
 DROP TABLE tbl;
+
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+RESET enable_seqscan;
+
+DROP TABLE nametbl;
\ No newline at end of file