From: Michael Paquier Date: Thu, 25 Dec 2025 06:13:39 +0000 (+0900) Subject: Move attribute statistics functions to stat_utils.c X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;p=thirdparty%2Fpostgresql.git Move attribute statistics functions to stat_utils.c Many of the operations done for attribute stats in attribute_stats.c share the same logic as extended stats, as done by a patch under discussion to add support for extended stats import and export. All the pieces necessary for extended statistics are moved to stats_utils.c, which is the file where common facilities are shared for stats files. The following renames are done: * get_attr_stat_type() -> statatt_get_type() * init_empty_stats_tuple() -> statatt_init_empty_tuple() * set_stats_slot() -> statatt_set_slot() * get_elem_stat_type() -> statatt_get_elem_type() While on it, this commit adds more documentation for all these functions, describing more their internals and the dependencies that have been implied for attribute statistics. The same concepts apply to extended statistics, at some degree. Author: Corey Huinker Reviewed-by: Chao Li Reviewed-by: Yu Wang Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com --- diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index ef4d768feab..06bc1a05fc1 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -20,10 +20,8 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/namespace.h" -#include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" #include "statistics/statistics.h" #include "statistics/stat_utils.h" #include "utils/array.h" @@ -32,10 +30,6 @@ #include "utils/lsyscache.h" #include "utils/syscache.h" -#define DEFAULT_NULL_FRAC Float4GetDatum(0.0) -#define DEFAULT_AVG_WIDTH Int32GetDatum(0) /* unknown */ -#define DEFAULT_N_DISTINCT Float4GetDatum(0.0) /* unknown */ - /* * Positional argument numbers, names, and types for * attribute_statistics_update() and pg_restore_attribute_stats(). @@ -111,24 +105,9 @@ static struct StatsArgInfo cleararginfo[] = }; static bool attribute_statistics_update(FunctionCallInfo fcinfo); -static Node *get_attr_expr(Relation rel, int attnum); -static void get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr); -static bool get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr); -static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, - Oid typid, int32 typmod, bool *ok); -static void set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull); static void upsert_pg_statistic(Relation starel, HeapTuple oldtup, const Datum *values, const bool *nulls, const bool *replaces); static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit); -static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces); /* * Insert or Update Attribute Statistics @@ -298,16 +277,16 @@ attribute_statistics_update(FunctionCallInfo fcinfo) } /* derive information from attribute */ - get_attr_stat_type(reloid, attnum, - &atttypid, &atttypmod, - &atttyptype, &atttypcoll, - &eq_opr, <_opr); + statatt_get_type(reloid, attnum, + &atttypid, &atttypmod, + &atttyptype, &atttypcoll, + &eq_opr, <_opr); /* if needed, derive element type */ if (do_mcelem || do_dechist) { - if (!get_elem_stat_type(atttypid, atttyptype, - &elemtypid, &elem_eq_opr)) + if (!statatt_get_elem_type(atttypid, atttyptype, + &elemtypid, &elem_eq_opr)) { ereport(WARNING, (errmsg("could not determine element type of column \"%s\"", attname), @@ -361,8 +340,8 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (HeapTupleIsValid(statup)) heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls); else - init_empty_stats_tuple(reloid, attnum, inherited, values, nulls, - replaces); + statatt_init_empty_tuple(reloid, attnum, inherited, values, nulls, + replaces); /* if specified, set to argument values */ if (!PG_ARGISNULL(NULL_FRAC_ARG)) @@ -386,18 +365,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { bool converted; Datum stanumbers = PG_GETARG_DATUM(MOST_COMMON_FREQS_ARG); - Datum stavalues = text_to_stavalues("most_common_vals", - &array_in_fn, - PG_GETARG_DATUM(MOST_COMMON_VALS_ARG), - atttypid, atttypmod, - &converted); + Datum stavalues = statatt_build_stavalues("most_common_vals", + &array_in_fn, + PG_GETARG_DATUM(MOST_COMMON_VALS_ARG), + atttypid, atttypmod, + &converted); if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_MCV, - eq_opr, atttypcoll, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCV, + eq_opr, atttypcoll, + stanumbers, false, stavalues, false); } else result = false; @@ -409,18 +388,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo) Datum stavalues; bool converted = false; - stavalues = text_to_stavalues("histogram_bounds", - &array_in_fn, - PG_GETARG_DATUM(HISTOGRAM_BOUNDS_ARG), - atttypid, atttypmod, - &converted); + stavalues = statatt_build_stavalues("histogram_bounds", + &array_in_fn, + PG_GETARG_DATUM(HISTOGRAM_BOUNDS_ARG), + atttypid, atttypmod, + &converted); if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_HISTOGRAM, - lt_opr, atttypcoll, - 0, true, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_HISTOGRAM, + lt_opr, atttypcoll, + 0, true, stavalues, false); } else result = false; @@ -433,10 +412,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) ArrayType *arry = construct_array_builtin(elems, 1, FLOAT4OID); Datum stanumbers = PointerGetDatum(arry); - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_CORRELATION, - lt_opr, atttypcoll, - stanumbers, false, 0, true); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_CORRELATION, + lt_opr, atttypcoll, + stanumbers, false, 0, true); } /* STATISTIC_KIND_MCELEM */ @@ -446,18 +425,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo) bool converted = false; Datum stavalues; - stavalues = text_to_stavalues("most_common_elems", - &array_in_fn, - PG_GETARG_DATUM(MOST_COMMON_ELEMS_ARG), - elemtypid, atttypmod, - &converted); + stavalues = statatt_build_stavalues("most_common_elems", + &array_in_fn, + PG_GETARG_DATUM(MOST_COMMON_ELEMS_ARG), + elemtypid, atttypmod, + &converted); if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_MCELEM, - elem_eq_opr, atttypcoll, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCELEM, + elem_eq_opr, atttypcoll, + stanumbers, false, stavalues, false); } else result = false; @@ -468,10 +447,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { Datum stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG); - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_DECHIST, - elem_eq_opr, atttypcoll, - stanumbers, false, 0, true); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_DECHIST, + elem_eq_opr, atttypcoll, + stanumbers, false, 0, true); } /* @@ -486,18 +465,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo) bool converted = false; Datum stavalues; - stavalues = text_to_stavalues("range_bounds_histogram", - &array_in_fn, - PG_GETARG_DATUM(RANGE_BOUNDS_HISTOGRAM_ARG), - atttypid, atttypmod, - &converted); + stavalues = statatt_build_stavalues("range_bounds_histogram", + &array_in_fn, + PG_GETARG_DATUM(RANGE_BOUNDS_HISTOGRAM_ARG), + atttypid, atttypmod, + &converted); if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_BOUNDS_HISTOGRAM, - InvalidOid, InvalidOid, - 0, true, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_BOUNDS_HISTOGRAM, + InvalidOid, InvalidOid, + 0, true, stavalues, false); } else result = false; @@ -514,17 +493,17 @@ attribute_statistics_update(FunctionCallInfo fcinfo) bool converted = false; Datum stavalues; - stavalues = text_to_stavalues("range_length_histogram", - &array_in_fn, - PG_GETARG_DATUM(RANGE_LENGTH_HISTOGRAM_ARG), - FLOAT8OID, 0, &converted); + stavalues = statatt_build_stavalues("range_length_histogram", + &array_in_fn, + PG_GETARG_DATUM(RANGE_LENGTH_HISTOGRAM_ARG), + FLOAT8OID, 0, &converted); if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, - Float8LessOperator, InvalidOid, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + Float8LessOperator, InvalidOid, + stanumbers, false, stavalues, false); } else result = false; @@ -539,291 +518,6 @@ attribute_statistics_update(FunctionCallInfo fcinfo) return result; } -/* - * If this relation is an index and that index has expressions in it, and - * the attnum specified is known to be an expression, then we must walk - * the list attributes up to the specified attnum to get the right - * expression. - */ -static Node * -get_attr_expr(Relation rel, int attnum) -{ - List *index_exprs; - ListCell *indexpr_item; - - /* relation is not an index */ - if (rel->rd_rel->relkind != RELKIND_INDEX && - rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) - return NULL; - - index_exprs = RelationGetIndexExpressions(rel); - - /* index has no expressions to give */ - if (index_exprs == NIL) - return NULL; - - /* - * The index attnum points directly to a relation attnum, then it's not an - * expression attribute. - */ - if (rel->rd_index->indkey.values[attnum - 1] != 0) - return NULL; - - indexpr_item = list_head(rel->rd_indexprs); - - for (int i = 0; i < attnum - 1; i++) - if (rel->rd_index->indkey.values[i] == 0) - indexpr_item = lnext(rel->rd_indexprs, indexpr_item); - - if (indexpr_item == NULL) /* shouldn't happen */ - elog(ERROR, "too few entries in indexprs list"); - - return (Node *) lfirst(indexpr_item); -} - -/* - * Derive type information from the attribute. - */ -static void -get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr) -{ - Relation rel = relation_open(reloid, AccessShareLock); - Form_pg_attribute attr; - HeapTuple atup; - Node *expr; - TypeCacheEntry *typcache; - - atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid), - Int16GetDatum(attnum)); - - /* Attribute not found */ - if (!HeapTupleIsValid(atup)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column %d of relation \"%s\" does not exist", - attnum, RelationGetRelationName(rel)))); - - attr = (Form_pg_attribute) GETSTRUCT(atup); - - if (attr->attisdropped) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column %d of relation \"%s\" does not exist", - attnum, RelationGetRelationName(rel)))); - - expr = get_attr_expr(rel, attr->attnum); - - /* - * When analyzing an expression index, believe the expression tree's type - * not the column datatype --- the latter might be the opckeytype storage - * type of the opclass, which is not interesting for our purposes. This - * mimics the behavior of examine_attribute(). - */ - if (expr == NULL) - { - *atttypid = attr->atttypid; - *atttypmod = attr->atttypmod; - *atttypcoll = attr->attcollation; - } - else - { - *atttypid = exprType(expr); - *atttypmod = exprTypmod(expr); - - if (OidIsValid(attr->attcollation)) - *atttypcoll = attr->attcollation; - else - *atttypcoll = exprCollation(expr); - } - ReleaseSysCache(atup); - - /* - * If it's a multirange, step down to the range type, as is done by - * multirange_typanalyze(). - */ - if (type_is_multirange(*atttypid)) - *atttypid = get_multirange_range(*atttypid); - - /* finds the right operators even if atttypid is a domain */ - typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR); - *atttyptype = typcache->typtype; - *eq_opr = typcache->eq_opr; - *lt_opr = typcache->lt_opr; - - /* - * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See - * compute_tsvector_stats(). - */ - if (*atttypid == TSVECTOROID) - *atttypcoll = DEFAULT_COLLATION_OID; - - relation_close(rel, NoLock); -} - -/* - * Derive element type information from the attribute type. - */ -static bool -get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr) -{ - TypeCacheEntry *elemtypcache; - - if (atttypid == TSVECTOROID) - { - /* - * Special case: element type for tsvector is text. See - * compute_tsvector_stats(). - */ - *elemtypid = TEXTOID; - } - else - { - /* find underlying element type through any domain */ - *elemtypid = get_base_element_type(atttypid); - } - - if (!OidIsValid(*elemtypid)) - return false; - - /* finds the right operator even if elemtypid is a domain */ - elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR); - if (!OidIsValid(elemtypcache->eq_opr)) - return false; - - *elem_eq_opr = elemtypcache->eq_opr; - - return true; -} - -/* - * Cast a text datum into an array with element type elemtypid. - * - * If an error is encountered, capture it and re-throw a WARNING, and set ok - * to false. If the resulting array contains NULLs, raise a WARNING and set ok - * to false. Otherwise, set ok to true. - */ -static Datum -text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, - int32 typmod, bool *ok) -{ - LOCAL_FCINFO(fcinfo, 8); - char *s; - Datum result; - ErrorSaveContext escontext = {T_ErrorSaveContext}; - - escontext.details_wanted = true; - - s = TextDatumGetCString(d); - - InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid, - (Node *) &escontext, NULL); - - fcinfo->args[0].value = CStringGetDatum(s); - fcinfo->args[0].isnull = false; - fcinfo->args[1].value = ObjectIdGetDatum(typid); - fcinfo->args[1].isnull = false; - fcinfo->args[2].value = Int32GetDatum(typmod); - fcinfo->args[2].isnull = false; - - result = FunctionCallInvoke(fcinfo); - - pfree(s); - - if (escontext.error_occurred) - { - escontext.error_data->elevel = WARNING; - ThrowErrorData(escontext.error_data); - *ok = false; - return (Datum) 0; - } - - if (array_contains_nulls(DatumGetArrayTypeP(result))) - { - ereport(WARNING, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" array must not contain null values", staname))); - *ok = false; - return (Datum) 0; - } - - *ok = true; - - return result; -} - -/* - * Find and update the slot with the given stakind, or use the first empty - * slot. - */ -static void -set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull) -{ - int slotidx; - int first_empty = -1; - AttrNumber stakind_attnum; - AttrNumber staop_attnum; - AttrNumber stacoll_attnum; - - /* find existing slot with given stakind */ - for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++) - { - stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx; - - if (first_empty < 0 && - DatumGetInt16(values[stakind_attnum]) == 0) - first_empty = slotidx; - if (DatumGetInt16(values[stakind_attnum]) == stakind) - break; - } - - if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0) - slotidx = first_empty; - - if (slotidx >= STATISTIC_NUM_SLOTS) - ereport(ERROR, - (errmsg("maximum number of statistics slots exceeded: %d", - slotidx + 1))); - - stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx; - staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx; - stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx; - - if (DatumGetInt16(values[stakind_attnum]) != stakind) - { - values[stakind_attnum] = Int16GetDatum(stakind); - replaces[stakind_attnum] = true; - } - if (DatumGetObjectId(values[staop_attnum]) != staop) - { - values[staop_attnum] = ObjectIdGetDatum(staop); - replaces[staop_attnum] = true; - } - if (DatumGetObjectId(values[stacoll_attnum]) != stacoll) - { - values[stacoll_attnum] = ObjectIdGetDatum(stacoll); - replaces[stacoll_attnum] = true; - } - if (!stanumbers_isnull) - { - values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers; - nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false; - replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true; - } - if (!stavalues_isnull) - { - values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues; - nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false; - replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true; - } -} - /* * Upsert the pg_statistic record. */ @@ -880,44 +574,6 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit) return result; } -/* - * Initialize values and nulls for a new stats tuple. - */ -static void -init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces) -{ - memset(nulls, true, sizeof(bool) * Natts_pg_statistic); - memset(replaces, true, sizeof(bool) * Natts_pg_statistic); - - /* must initialize non-NULL attributes */ - - values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid); - nulls[Anum_pg_statistic_starelid - 1] = false; - values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum); - nulls[Anum_pg_statistic_staattnum - 1] = false; - values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited); - nulls[Anum_pg_statistic_stainherit - 1] = false; - - values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_NULL_FRAC; - nulls[Anum_pg_statistic_stanullfrac - 1] = false; - values[Anum_pg_statistic_stawidth - 1] = DEFAULT_AVG_WIDTH; - nulls[Anum_pg_statistic_stawidth - 1] = false; - values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_N_DISTINCT; - nulls[Anum_pg_statistic_stadistinct - 1] = false; - - /* initialize stakind, staop, and stacoll slots */ - for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++) - { - values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0; - nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false; - values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); - nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false; - values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); - nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false; - } -} - /* * Delete statistics for the given attribute. */ diff --git a/src/backend/statistics/stat_utils.c b/src/backend/statistics/stat_utils.c index 0c139bf43a7..dd6c1216b9a 100644 --- a/src/backend/statistics/stat_utils.c +++ b/src/backend/statistics/stat_utils.c @@ -21,9 +21,12 @@ #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/pg_class.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" +#include "catalog/pg_statistic.h" #include "funcapi.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "statistics/stat_utils.h" #include "storage/lmgr.h" #include "utils/acl.h" @@ -33,6 +36,15 @@ #include "utils/rel.h" #include "utils/syscache.h" +/* Default values assigned to new pg_statistic tuples. */ +#define DEFAULT_STATATT_NULL_FRAC Float4GetDatum(0.0) /* stanullfrac */ +#define DEFAULT_STATATT_AVG_WIDTH Int32GetDatum(0) /* stawidth, same as + * unknown */ +#define DEFAULT_STATATT_N_DISTINCT Float4GetDatum(0.0) /* stadistinct, same as + * unknown */ + +static Node *statatt_get_index_expr(Relation rel, int attnum); + /* * Ensure that a given argument is not null. */ @@ -280,6 +292,50 @@ stats_check_arg_type(const char *argname, Oid argtype, Oid expectedtype) return true; } +/* + * Check if attribute of an index is an expression, then retrieve the + * expression if is it the case. + * + * If the attnum specified is known to be an expression, then we must + * walk the list attributes up to the specified attnum to get the right + * expression. + */ +static Node * +statatt_get_index_expr(Relation rel, int attnum) +{ + List *index_exprs; + ListCell *indexpr_item; + + /* relation is not an index */ + if (rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + return NULL; + + index_exprs = RelationGetIndexExpressions(rel); + + /* index has no expressions to give */ + if (index_exprs == NIL) + return NULL; + + /* + * The index's attnum points directly to a relation attnum, hence it is + * not an expression attribute. + */ + if (rel->rd_index->indkey.values[attnum - 1] != 0) + return NULL; + + indexpr_item = list_head(rel->rd_indexprs); + + for (int i = 0; i < attnum - 1; i++) + if (rel->rd_index->indkey.values[i] == 0) + indexpr_item = lnext(rel->rd_indexprs, indexpr_item); + + if (indexpr_item == NULL) /* shouldn't happen */ + elog(ERROR, "too few entries in indexprs list"); + + return (Node *) lfirst(indexpr_item); +} + /* * Translate variadic argument pairs from 'pairs_fcinfo' into a * 'positional_fcinfo' appropriate for calling relation_statistics_update() or @@ -365,3 +421,325 @@ stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo, return result; } + +/* + * Derive type information from a relation attribute. + * + * This is needed for setting most slot statistics for all data types. + * + * This duplicates the logic in examine_attribute() but it will not skip the + * attribute if the attstattarget is 0. + * + * This information, retrieved from pg_attribute and pg_type with some + * specific handling for index expressions, is a prerequisite to calling + * any of the other statatt_*() functions. + */ +void +statatt_get_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr) +{ + Relation rel = relation_open(reloid, AccessShareLock); + Form_pg_attribute attr; + HeapTuple atup; + Node *expr; + TypeCacheEntry *typcache; + + atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid), + Int16GetDatum(attnum)); + + /* Attribute not found */ + if (!HeapTupleIsValid(atup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column %d of relation \"%s\" does not exist", + attnum, RelationGetRelationName(rel)))); + + attr = (Form_pg_attribute) GETSTRUCT(atup); + + if (attr->attisdropped) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column %d of relation \"%s\" does not exist", + attnum, RelationGetRelationName(rel)))); + + expr = statatt_get_index_expr(rel, attr->attnum); + + /* + * When analyzing an expression index, believe the expression tree's type + * not the column datatype --- the latter might be the opckeytype storage + * type of the opclass, which is not interesting for our purposes. This + * mimics the behavior of examine_attribute(). + */ + if (expr == NULL) + { + *atttypid = attr->atttypid; + *atttypmod = attr->atttypmod; + *atttypcoll = attr->attcollation; + } + else + { + *atttypid = exprType(expr); + *atttypmod = exprTypmod(expr); + + if (OidIsValid(attr->attcollation)) + *atttypcoll = attr->attcollation; + else + *atttypcoll = exprCollation(expr); + } + ReleaseSysCache(atup); + + /* + * If it's a multirange, step down to the range type, as is done by + * multirange_typanalyze(). + */ + if (type_is_multirange(*atttypid)) + *atttypid = get_multirange_range(*atttypid); + + /* finds the right operators even if atttypid is a domain */ + typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR); + *atttyptype = typcache->typtype; + *eq_opr = typcache->eq_opr; + *lt_opr = typcache->lt_opr; + + /* + * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See + * compute_tsvector_stats(). + */ + if (*atttypid == TSVECTOROID) + *atttypcoll = DEFAULT_COLLATION_OID; + + relation_close(rel, NoLock); +} + +/* + * Derive element type information from the attribute type. This information + * is needed when the given type is one that contains elements of other types. + * + * The atttypid and atttyptype should be derived from a previous call to + * statatt_get_type(). + */ +bool +statatt_get_elem_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr) +{ + TypeCacheEntry *elemtypcache; + + if (atttypid == TSVECTOROID) + { + /* + * Special case: element type for tsvector is text. See + * compute_tsvector_stats(). + */ + *elemtypid = TEXTOID; + } + else + { + /* find underlying element type through any domain */ + *elemtypid = get_base_element_type(atttypid); + } + + if (!OidIsValid(*elemtypid)) + return false; + + /* finds the right operator even if elemtypid is a domain */ + elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR); + if (!OidIsValid(elemtypcache->eq_opr)) + return false; + + *elem_eq_opr = elemtypcache->eq_opr; + + return true; +} + +/* + * Build an array with element type elemtypid from a text datum, used as + * value of an attribute in a tuple to-be-inserted into pg_statistic. + * + * The typid and typmod should be derived from a previous call to + * statatt_get_type(). + * + * If an error is encountered, capture it and throw a WARNING, with "ok" set + * to false. If the resulting array contains NULLs, raise a WARNING and + * set "ok" to false. When the operation succeeds, set "ok" to true. + */ +Datum +statatt_build_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, + int32 typmod, bool *ok) +{ + LOCAL_FCINFO(fcinfo, 8); + char *s; + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + escontext.details_wanted = true; + + s = TextDatumGetCString(d); + + InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid, + (Node *) &escontext, NULL); + + fcinfo->args[0].value = CStringGetDatum(s); + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = ObjectIdGetDatum(typid); + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = Int32GetDatum(typmod); + fcinfo->args[2].isnull = false; + + result = FunctionCallInvoke(fcinfo); + + pfree(s); + + if (escontext.error_occurred) + { + escontext.error_data->elevel = WARNING; + ThrowErrorData(escontext.error_data); + *ok = false; + return (Datum) 0; + } + + if (array_contains_nulls(DatumGetArrayTypeP(result))) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" array must not contain null values", staname))); + *ok = false; + return (Datum) 0; + } + + *ok = true; + + return result; +} + +/* + * Find and update the slot of a stakind, or use the first empty slot. + * + * Core statistics types expect the stakind value to be one of the + * STATISTIC_KIND_* constants defined in pg_statistic.h, but types defined + * by extensions are not restricted to those values. + * + * In the case of core statistics, the required staop is determined by the + * stakind given and will either be a hardcoded oid, or the eq/lt operator + * derived from statatt_get_type(). Likewise, types defined by extensions + * have no such restriction. + * + * The stacoll value should be either the atttypcoll derived from + * statatt_get_type(), or a harcoded value required by that particular + * stakind. + * + * The value/null pairs for stanumbers and stavalues should be calculated + * based on the stakind, using statatt_build_stavalues() or constructed arrays. + */ +void +statatt_set_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull) +{ + int slotidx; + int first_empty = -1; + AttrNumber stakind_attnum; + AttrNumber staop_attnum; + AttrNumber stacoll_attnum; + + /* find existing slot with given stakind */ + for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++) + { + stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx; + + if (first_empty < 0 && + DatumGetInt16(values[stakind_attnum]) == 0) + first_empty = slotidx; + if (DatumGetInt16(values[stakind_attnum]) == stakind) + break; + } + + if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0) + slotidx = first_empty; + + if (slotidx >= STATISTIC_NUM_SLOTS) + ereport(ERROR, + (errmsg("maximum number of statistics slots exceeded: %d", + slotidx + 1))); + + stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx; + staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx; + stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx; + + if (DatumGetInt16(values[stakind_attnum]) != stakind) + { + values[stakind_attnum] = Int16GetDatum(stakind); + replaces[stakind_attnum] = true; + } + if (DatumGetObjectId(values[staop_attnum]) != staop) + { + values[staop_attnum] = ObjectIdGetDatum(staop); + replaces[staop_attnum] = true; + } + if (DatumGetObjectId(values[stacoll_attnum]) != stacoll) + { + values[stacoll_attnum] = ObjectIdGetDatum(stacoll); + replaces[stacoll_attnum] = true; + } + if (!stanumbers_isnull) + { + values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers; + nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false; + replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true; + } + if (!stavalues_isnull) + { + values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues; + nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false; + replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true; + } +} + +/* + * Initialize values and nulls for a new pg_statistic tuple. + * + * The caller is responsible for allocating the arrays where the results are + * stored, which should be of size Natts_pg_statistic. + * + * When using this routine for a tuple inserted into pg_statistic, reloid, + * attnum and inherited flags should all be set. + * + * When using this routine for a tuple that is an element of a stxdexpr + * array inserted into pg_statistic_ext_data, reloid, attnum and inherited + * should be respectively set to InvalidOid, InvalidAttrNumber and false. + */ +void +statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces) +{ + memset(nulls, true, sizeof(bool) * Natts_pg_statistic); + memset(replaces, true, sizeof(bool) * Natts_pg_statistic); + + /* This must initialize non-NULL attributes */ + values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid); + nulls[Anum_pg_statistic_starelid - 1] = false; + values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum); + nulls[Anum_pg_statistic_staattnum - 1] = false; + values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited); + nulls[Anum_pg_statistic_stainherit - 1] = false; + + values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_STATATT_NULL_FRAC; + nulls[Anum_pg_statistic_stanullfrac - 1] = false; + values[Anum_pg_statistic_stawidth - 1] = DEFAULT_STATATT_AVG_WIDTH; + nulls[Anum_pg_statistic_stawidth - 1] = false; + values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_STATATT_N_DISTINCT; + nulls[Anum_pg_statistic_stadistinct - 1] = false; + + /* initialize stakind, staop, and stacoll slots */ + for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++) + { + values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0; + nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false; + values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); + nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false; + values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); + nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false; + } +} diff --git a/src/include/statistics/stat_utils.h b/src/include/statistics/stat_utils.h index f41b181d4d3..e2bc62a5614 100644 --- a/src/include/statistics/stat_utils.h +++ b/src/include/statistics/stat_utils.h @@ -13,6 +13,7 @@ #ifndef STATS_UTILS_H #define STATS_UTILS_H +#include "access/attnum.h" #include "fmgr.h" /* avoid including primnodes.h here */ @@ -40,4 +41,21 @@ extern bool stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo, FunctionCallInfo positional_fcinfo, struct StatsArgInfo *arginfo); +extern void statatt_get_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr); +extern void statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces); + +extern void statatt_set_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull); + +extern Datum statatt_build_stavalues(const char *staname, FmgrInfo *array_in, Datum d, + Oid typid, int32 typmod, bool *ok); +extern bool statatt_get_elem_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr); + #endif /* STATS_UTILS_H */