From: Alexander Korotkov Date: Thu, 19 Sep 2019 18:30:19 +0000 (+0300) Subject: Improve handling of NULLs in KNN-GiST and KNN-SP-GiST X-Git-Tag: REL9_4_25~45 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=332eda5bd3d275fc14b721905b93c2b983a000d3;p=thirdparty%2Fpostgresql.git Improve handling of NULLs in KNN-GiST and KNN-SP-GiST This commit improves subject in two ways: * It removes ugliness of 02f90879e7, which stores distance values and null flags in two separate arrays after GISTSearchItem struct. Instead we pack both distance value and null flag in IndexOrderByDistance struct. Alignment overhead should be negligible, because we typically deal with at most few "col op const" expressions in ORDER BY clause. * It fixes handling of "col op NULL" expression in KNN-SP-GiST. Now, these expression are not passed to support functions, which can't deal with them. Instead, NULL result is implicitly assumed. It future we may decide to teach support functions to deal with NULL arguments, but current solution is bugfix suitable for backpatch. Reported-by: Nikita Glukhov Discussion: https://postgr.es/m/826f57ee-afc7-8977-c44c-6111d18b02ec%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4 --- diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 8cf22048321..68f74f99e96 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -34,9 +34,8 @@ * request it. recheck is not interesting when examining a non-leaf entry, * since we must visit the lower index page if there's any doubt. * - * If we are doing an ordered scan, so->distancesValues[] and - * so->distancesNulls[] is filled with distance data from the distance() - * functions before returning success. + * If we are doing an ordered scan, so->distances[] is filled with distance + * data from the distance() functions before returning success. * * We must decompress the key in the IndexTuple before passing it to the * sk_funcs (which actually are the opclass Consistent or Distance methods). @@ -56,8 +55,7 @@ gistindex_keytest(IndexScanDesc scan, GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; - double *distance_value_p; - bool *distance_null_p; + IndexOrderByDistance *distance_p; Relation r = scan->indexRelation; *recheck_p = false; @@ -75,8 +73,8 @@ gistindex_keytest(IndexScanDesc scan, elog(ERROR, "invalid GiST tuple found on leaf page"); for (i = 0; i < scan->numberOfOrderBys; i++) { - so->distanceValues[i] = -get_float8_infinity(); - so->distanceNulls[i] = false; + so->distances[i].value = -get_float8_infinity(); + so->distances[i].isnull = false; } return true; } @@ -160,8 +158,7 @@ gistindex_keytest(IndexScanDesc scan, /* OK, it passes --- now let's compute the distances */ key = scan->orderByData; - distance_value_p = so->distanceValues; - distance_null_p = so->distanceNulls; + distance_p = so->distances; keySize = scan->numberOfOrderBys; while (keySize > 0) { @@ -176,8 +173,8 @@ gistindex_keytest(IndexScanDesc scan, if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null */ - *distance_value_p = 0.0; - *distance_null_p = true; + distance_p->value = 0.0; + distance_p->isnull = true; } else { @@ -208,13 +205,12 @@ gistindex_keytest(IndexScanDesc scan, key->sk_argument, Int16GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype)); - *distance_value_p = DatumGetFloat8(dist); - *distance_null_p = false; + distance_p->value = DatumGetFloat8(dist); + distance_p->isnull = false; } key++; - distance_value_p++; - distance_null_p++; + distance_p++; keySize--; } @@ -227,8 +223,7 @@ gistindex_keytest(IndexScanDesc scan, * * scan: index scan we are executing * pageItem: search queue item identifying an index page to scan - * myDistanceValues: distances array associated with pageItem, or NULL at the root - * myDistanceNulls: null flags for myDistanceValues array, or NULL at the root + * myDistances: distances array associated with pageItem, or NULL at the root * tbm: if not NULL, gistgetbitmap's output bitmap * ntids: if not NULL, gistgetbitmap's output tuple counter * @@ -244,8 +239,7 @@ gistindex_keytest(IndexScanDesc scan, */ static void gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, - double *myDistanceValues, bool *myDistanceNulls, - TIDBitmap *tbm, int64 *ntids) + IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; Buffer buffer; @@ -280,7 +274,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, GISTSearchItem *item; /* This can't happen when starting at the root */ - Assert(myDistanceValues != NULL && myDistanceNulls != NULL); + Assert(myDistances != NULL); oldcxt = MemoryContextSwitchTo(so->queueCxt); @@ -293,10 +287,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, /* Insert it into the queue using same distances as for this page */ tmpItem->head = item; tmpItem->lastHeap = NULL; - memcpy(GISTSearchTreeItemDistanceValues(tmpItem, scan->numberOfOrderBys), - myDistanceValues, sizeof(double) * scan->numberOfOrderBys); - memcpy(GISTSearchTreeItemDistanceNulls(tmpItem, scan->numberOfOrderBys), - myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys); + memcpy(tmpItem->distances, myDistances, + sizeof(tmpItem->distances[0]) * scan->numberOfOrderBys); (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); @@ -387,10 +379,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, /* Insert it into the queue using new distance data */ tmpItem->head = item; tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL; - memcpy(GISTSearchTreeItemDistanceValues(tmpItem, nOrderBys), - so->distanceValues, sizeof(double) * nOrderBys); - memcpy(GISTSearchTreeItemDistanceNulls(tmpItem, nOrderBys), - so->distanceNulls, sizeof(bool) * nOrderBys); + memcpy(tmpItem->distances, so->distances, + sizeof(tmpItem->distances[0]) * nOrderBys); (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); @@ -473,10 +463,7 @@ getNextNearest(IndexScanDesc scan) /* visit an index page, extract its items into queue */ CHECK_FOR_INTERRUPTS(); - gistScanPage(scan, item, - GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys), - GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys), - NULL, NULL); + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); } pfree(item); @@ -514,7 +501,7 @@ gistgettuple(PG_FUNCTION_ARGS) fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); - gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL); + gistScanPage(scan, &fakeItem, NULL, NULL, NULL); } if (scan->numberOfOrderBys > 0) @@ -552,10 +539,7 @@ gistgettuple(PG_FUNCTION_ARGS) * this page, we fall out of the inner "do" and loop around to * return them. */ - gistScanPage(scan, item, - GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys), - GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys), - NULL, NULL); + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); pfree(item); } while (so->nPageData == 0); @@ -586,7 +570,7 @@ gistgetbitmap(PG_FUNCTION_ARGS) fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); - gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids); + gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* * While scanning a leaf page, ItemPointers of matching heap tuples will @@ -601,10 +585,7 @@ gistgetbitmap(PG_FUNCTION_ARGS) CHECK_FOR_INTERRUPTS(); - gistScanPage(scan, item, - GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys), - GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys), - tbm, &ntids); + gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids); pfree(item); } diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 15a5339f91f..3a38b79e18a 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -33,26 +33,23 @@ GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg) const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b; IndexScanDesc scan = (IndexScanDesc) arg; int i; - double *da = GISTSearchTreeItemDistanceValues(sa, scan->numberOfOrderBys), - *db = GISTSearchTreeItemDistanceValues(sb, scan->numberOfOrderBys); - bool *na = GISTSearchTreeItemDistanceNulls(sa, scan->numberOfOrderBys), - *nb = GISTSearchTreeItemDistanceNulls(sb, scan->numberOfOrderBys); /* Order according to distance comparison */ for (i = 0; i < scan->numberOfOrderBys; i++) { - if (na[i]) + if (sa->distances[i].isnull) { - if (!nb[i]) + if (!sb->distances[i].isnull) return 1; } - else if (nb[i]) + else if (sb->distances[i].isnull) { return -1; } else { - int cmp = float8_cmp_internal(da[i], db[i]); + int cmp = float8_cmp_internal(sa->distances[i].value, + sb->distances[i].value); if (cmp != 0) return cmp; @@ -147,8 +144,7 @@ gistbeginscan(PG_FUNCTION_ARGS) /* workspaces with size dependent on numberOfOrderBys: */ so->tmpTreeItem = palloc(SizeOfGISTSearchTreeItem(scan->numberOfOrderBys)); - so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys); - so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys); + so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys); so->qual_ok = true; /* in case there are zero keys */ scan->opaque = so; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index d99158fb39b..943e128a94d 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -112,6 +112,13 @@ typedef enum IndexUniqueCheck } IndexUniqueCheck; +/* Nullable "ORDER BY col op const" distance */ +typedef struct IndexOrderByDistance +{ + double value; + bool isnull; +} IndexOrderByDistance; + /* * generalized index_ interface routines (in indexam.c) */ diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 878e296816f..257c2150dcf 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -14,6 +14,7 @@ #ifndef GIST_PRIVATE_H #define GIST_PRIVATE_H +#include "access/genam.h" #include "access/gist.h" #include "access/itup.h" #include "fmgr.h" @@ -145,27 +146,13 @@ typedef struct GISTSearchTreeItem GISTSearchItem *head; /* first chain member */ GISTSearchItem *lastHeap; /* last heap-tuple member, if any */ - /* - * This data structure is followed by arrays of distance values and - * distance null flags. Size of both arrays is - * IndexScanDesc->numberOfOrderBys. See macros below for accessing those - * arrays. - */ + /* numberOfOrderBys entries */ + IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER]; } GISTSearchTreeItem; -#define SizeOfGISTSearchTreeItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchTreeItem)) + \ - (sizeof(double) + sizeof(bool)) * (n_distances)) - -/* - * We actually don't need n_distances compute pointer to distance values. - * Nevertheless take n_distances as argument to have same arguments list for - * GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls(). - */ -#define GISTSearchTreeItemDistanceValues(item, n_distances) \ - ((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchTreeItem)))) - -#define GISTSearchTreeItemDistanceNulls(item, n_distances) \ - ((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchTreeItem)) + sizeof(double) * (n_distances))) +#define SizeOfGISTSearchTreeItem(n_distances) \ + (offsetof(GISTSearchTreeItem, distances) + \ + sizeof(IndexOrderByDistance) * (n_distances)) /* * GISTScanOpaqueData: private state for a scan of a GiST index @@ -182,8 +169,7 @@ typedef struct GISTScanOpaqueData /* pre-allocated workspace arrays */ GISTSearchTreeItem *tmpTreeItem; /* workspace to pass to rb_insert */ - double *distanceValues; /* output area for gistindex_keytest */ - bool *distanceNulls; + IndexOrderByDistance *distances; /* output area for gistindex_keytest */ /* In a non-ordered search, returnable heap items are stored here: */ GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)]; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ab36aa3acb6..a346c1293be 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -824,6 +824,7 @@ IndexList IndexOnlyScan IndexOnlyScanState IndexOptInfo +IndexOrderByDistance IndexPath IndexRuntimeKeyInfo IndexScan