From: Michael Paquier Date: Thu, 29 Jan 2026 03:14:08 +0000 (+0900) Subject: Add support for "mcv" in pg_restore_extended_stats() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=efbebb4e85872b1c4d6bc19c4550e67850b83aab;p=thirdparty%2Fpostgresql.git Add support for "mcv" in pg_restore_extended_stats() This commit adds support for the restore of extended statistics of the kind "mcv", aka most-common values. This format is different from n_distinct and dependencies stat types in that it is the combination of three of the four different arrays from the pg_stats_ext view which in turn require three different input parameters on pg_restore_extended_statistics(). These are translated into three input arguments for the function: - "most_common_vals", acting as a leader of the others. It is a 2-dimension array, that includes the common values. - "most_common_freqs", 1-dimension array of float8[], with a number of elements that has to match with "most_common_vals". - "most_common_base_freqs", 1-dimension array of float8[], with a number of elements that has to match with "most_common_vals". All three arrays are required to achieve the restore of this type of extended statistics (if "most_common_vals" happens to be NULL in the catalogs, the rest is NULL by design). Note that "most_common_val_nulls" is not required in input, its data is rebuilt from the decomposition of the "most_common_vals" array based on its text[] representation. The initial versions of the patch provided this option in input, but we do not require it and it simplifies a lot the result. Support in pg_dump is added down to v13 which is where the support for this type of extended statistics has been added, when --statistics is used. This means that upgrade and dumps can restore extended statistics data transparently, like "dependencies", "ndistinct", attribute and relation statistics. For MCV, the values are directly queried from the relevant catalogs. Author: Corey Huinker Co-authored-by: Chao Li Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com --- diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index ea42056bbc9..3ac81905d1f 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -2222,8 +2222,10 @@ SELECT pg_restore_attribute_stats( Other arguments are the names and values of statistics corresponding to columns in pg_stats_ext . - This function currently supports n_distinct and - dependencies. + This function currently supports n_distinct, + dependencies, most_common_vals, + most_common_freqs, + and most_common_base_freqs. Additionally, this function accepts argument name diff --git a/src/backend/statistics/extended_stats_funcs.c b/src/backend/statistics/extended_stats_funcs.c index 80247cbac21..6fff31330e6 100644 --- a/src/backend/statistics/extended_stats_funcs.c +++ b/src/backend/statistics/extended_stats_funcs.c @@ -48,6 +48,9 @@ enum extended_stats_argnum INHERITED_ARG, NDISTINCT_ARG, DEPENDENCIES_ARG, + MOST_COMMON_VALS_ARG, + MOST_COMMON_FREQS_ARG, + MOST_COMMON_BASE_FREQS_ARG, NUM_EXTENDED_STATS_ARGS, }; @@ -64,6 +67,9 @@ static struct StatsArgInfo extarginfo[] = [INHERITED_ARG] = {"inherited", BOOLOID}, [NDISTINCT_ARG] = {"n_distinct", PG_NDISTINCTOID}, [DEPENDENCIES_ARG] = {"dependencies", PG_DEPENDENCIESOID}, + [MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTARRAYOID}, + [MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT8ARRAYOID}, + [MOST_COMMON_BASE_FREQS_ARG] = {"most_common_base_freqs", FLOAT8ARRAYOID}, [NUM_EXTENDED_STATS_ARGS] = {0}, }; @@ -90,6 +96,16 @@ static void upsert_pg_statistic_ext_data(const Datum *values, const bool *nulls, const bool *replaces); +static bool check_mcvlist_array(const ArrayType *arr, int argindex, + int required_ndims, int mcv_length); +static Datum import_mcv(const ArrayType *mcv_arr, + const ArrayType *freqs_arr, + const ArrayType *base_freqs_arr, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, int numattrs, + bool *ok); + + /* * Fetch a pg_statistic_ext row by name and namespace OID. */ @@ -252,16 +268,32 @@ extended_statistics_update(FunctionCallInfo fcinfo) bool success = true; Datum exprdatum; bool isnull; + List *exprs = NIL; + int numattnums = 0; int numexprs = 0; + int numattrs = 0; /* arrays of type info, if we need them */ + Oid *atttypids = NULL; + int32 *atttypmods = NULL; + Oid *atttypcolls = NULL; Oid relid; Oid locked_table = InvalidOid; /* * Fill out the StakindFlags "has" structure based on which parameters * were provided to the function. + * + * The MCV stats composite value is an array of record type, but this is + * externally represented as three arrays that must be interleaved into + * the array of records (pg_stats_ext stores four arrays, + * most_common_val_nulls is built from the contents of most_common_vals). + * Therefore, none of the three array values is meaningful unless the + * other two are also present and in sync in terms of array length. */ + has.mcv = (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) && + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) && + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)); has.ndistinct = !PG_ARGISNULL(NDISTINCT_ARG); has.dependencies = !PG_ARGISNULL(DEPENDENCIES_ARG); @@ -344,6 +376,7 @@ extended_statistics_update(FunctionCallInfo fcinfo) /* Find out what extended statistics kinds we should expect. */ expand_stxkind(tup, &enabled); + numattnums = stxform->stxkeys.dim1; /* decode expression (if any) */ exprdatum = SysCacheGetAttr(STATEXTOID, @@ -353,7 +386,6 @@ extended_statistics_update(FunctionCallInfo fcinfo) if (!isnull) { char *s; - List *exprs; s = TextDatumGetCString(exprdatum); exprs = (List *) stringToNode(s); @@ -377,6 +409,8 @@ extended_statistics_update(FunctionCallInfo fcinfo) numexprs = list_length(exprs); } + numattrs = numattnums + numexprs; + /* * If the object cannot support ndistinct, we should not have data for it. */ @@ -411,6 +445,115 @@ extended_statistics_update(FunctionCallInfo fcinfo) success = false; } + /* + * If the object cannot hold an MCV value, but any of the MCV parameters + * are set, then issue a WARNING and ensure that we do not try to load MCV + * stats later. In pg_stats_ext, most_common_val_nulls, most_common_freqs + * and most_common_base_freqs are NULL if most_common_vals is NULL. + */ + if (!enabled.mcv) + { + if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) || + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) || + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot specify parameters \"%s\", \"%s\" or \"%s\"", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname), + errhint("Extended statistics object \"%s\".\"%s\" does not support statistics of this type.", + quote_identifier(nspname), + quote_identifier(stxname))); + + has.mcv = false; + success = false; + } + } + else if (!has.mcv) + { + /* + * If we do not have all of the MCV arrays set while the extended + * statistics object expects something, something is wrong. This + * issues a WARNING if a partial input has been provided. + */ + if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) || + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) || + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not use \"%s\", \"%s\" and \"%s\": missing one or more parameters", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname)); + success = false; + } + } + + /* + * Either of these statistic types requires that we supply a semi-filled + * VacAttrStatP array. + * + * It is not possible to use the existing lookup_var_attr_stats() and + * examine_attribute() because these functions will skip attributes where + * attstattarget is 0, and we may have statistics data to import for those + * attributes. + */ + if (has.mcv) + { + atttypids = palloc0_array(Oid, numattrs); + atttypmods = palloc0_array(int32, numattrs); + atttypcolls = palloc0_array(Oid, numattrs); + + /* + * The leading stxkeys are attribute numbers up through numattnums. + * These keys must be in ascending AttNumber order, but we do not rely + * on that. + */ + for (int i = 0; i < numattnums; i++) + { + AttrNumber attnum = stxform->stxkeys.values[i]; + HeapTuple atup = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + + Form_pg_attribute attr; + + /* Attribute not found */ + if (!HeapTupleIsValid(atup)) + elog(ERROR, "stxkeys references nonexistent attnum %d", attnum); + + attr = (Form_pg_attribute) GETSTRUCT(atup); + + if (attr->attisdropped) + elog(ERROR, "stxkeys references dropped attnum %d", attnum); + + atttypids[i] = attr->atttypid; + atttypmods[i] = attr->atttypmod; + atttypcolls[i] = attr->attcollation; + ReleaseSysCache(atup); + } + + /* + * After all the positive number attnums in stxkeys come the negative + * numbers (if any) which represent expressions in the order that they + * appear in stxdexprs. Because the expressions are always + * monotonically decreasing from -1, there is no point in looking at + * the values in stxkeys, it's enough to know how many of them there + * are. + */ + for (int i = numattnums; i < numattrs; i++) + { + Node *expr = list_nth(exprs, i - numattnums); + + atttypids[i] = exprType(expr); + atttypmods[i] = exprTypmod(expr); + atttypcolls[i] = exprCollation(expr); + } + } + /* * Populate the pg_statistic_ext_data result tuple. */ @@ -471,6 +614,28 @@ extended_statistics_update(FunctionCallInfo fcinfo) statext_dependencies_free(dependencies); } + if (has.mcv) + { + Datum datum; + bool val_ok = false; + + datum = import_mcv(PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VALS_ARG), + PG_GETARG_ARRAYTYPE_P(MOST_COMMON_FREQS_ARG), + PG_GETARG_ARRAYTYPE_P(MOST_COMMON_BASE_FREQS_ARG), + atttypids, atttypmods, atttypcolls, numattrs, + &val_ok); + + if (val_ok) + { + Assert(datum != (Datum) 0); + values[Anum_pg_statistic_ext_data_stxdmcv - 1] = datum; + nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = false; + replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + } + else + success = false; + } + upsert_pg_statistic_ext_data(values, nulls, replaces); cleanup: @@ -478,9 +643,127 @@ cleanup: heap_freetuple(tup); if (pg_stext != NULL) table_close(pg_stext, RowExclusiveLock); + if (atttypids != NULL) + pfree(atttypids); + if (atttypmods != NULL) + pfree(atttypmods); + if (atttypcolls != NULL) + pfree(atttypcolls); return success; } +/* + * Consistency checks to ensure that other mcvlist arrays are in alignment + * with the mcv array. + */ +static bool +check_mcvlist_array(const ArrayType *arr, int argindex, int required_ndims, + int mcv_length) +{ + if (ARR_NDIM(arr) != required_ndims) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse array \"%s\": incorrect number of dimensions (%d required)", + extarginfo[argindex].argname, required_ndims)); + return false; + } + + if (array_contains_nulls(arr)) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse array \"%s\": NULL value found", + extarginfo[argindex].argname)); + return false; + } + + if (ARR_DIMS(arr)[0] != mcv_length) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse array \"%s\": incorrect number of elements (same as \"%s\" required)", + extarginfo[argindex].argname, + extarginfo[MOST_COMMON_VALS_ARG].argname)); + return false; + } + + return true; +} + +/* + * Create the stxdmcv datum from the equal-sized arrays of most common values, + * their null flags, and the frequency and base frequency associated with + * each value. + */ +static Datum +import_mcv(const ArrayType *mcv_arr, const ArrayType *freqs_arr, + const ArrayType *base_freqs_arr, Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, int numattrs, bool *ok) +{ + int nitems; + Datum *mcv_elems; + bool *mcv_nulls; + int check_nummcv; + Datum mcv = (Datum) 0; + + *ok = false; + + /* + * mcv_arr is an array of arrays. Each inner array must have the same + * number of elements "numattrs". + */ + if (ARR_NDIM(mcv_arr) != 2) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse array \"%s\": incorrect number of dimensions (%d required)", + extarginfo[MOST_COMMON_VALS_ARG].argname, 2)); + goto mcv_error; + } + + if (ARR_DIMS(mcv_arr)[1] != numattrs) + { + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse array \"%s\": found %d attributes but expected %d", + extarginfo[MOST_COMMON_VALS_ARG].argname, + ARR_DIMS(mcv_arr)[1], numattrs)); + goto mcv_error; + } + + /* + * "most_common_freqs" and "most_common_base_freqs" arrays must be of the + * same length, one-dimension and cannot contain NULLs. We use mcv_arr as + * the reference array for determining their length. + */ + nitems = ARR_DIMS(mcv_arr)[0]; + if (!check_mcvlist_array(freqs_arr, MOST_COMMON_FREQS_ARG, 1, nitems) || + !check_mcvlist_array(base_freqs_arr, MOST_COMMON_BASE_FREQS_ARG, 1, nitems)) + { + /* inconsistent input arrays found */ + goto mcv_error; + } + + /* + * This part builds the contents for "most_common_val_nulls", based on the + * values from "most_common_vals". + */ + deconstruct_array_builtin(mcv_arr, TEXTOID, &mcv_elems, + &mcv_nulls, &check_nummcv); + + mcv = statext_mcv_import(WARNING, numattrs, + atttypids, atttypmods, atttypcolls, + nitems, mcv_elems, mcv_nulls, + (float8 *) ARR_DATA_PTR(freqs_arr), + (float8 *) ARR_DATA_PTR(base_freqs_arr)); + + *ok = (mcv != (Datum) 0); + +mcv_error: + return mcv; +} + /* * Remove an existing pg_statistic_ext_data row for a given pg_statistic_ext * row and "inherited" pair. diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index de5a544b390..0b7da605a4c 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2187,3 +2187,148 @@ statext_mcv_free(MCVList *mcvlist) } pfree(mcvlist); } + +/* + * Create the MCV composite datum, which is a serialization of an array of + * MCVItems. + * + * The inputs consist of four separate arrays of equal length "numitems" + * (mcv_elems, mcv_nulls, freqs and base_freqs) that form the basics of + * what is stored in the catalogs. These form an array of composite + * records defined by the three atttypX arrays of equal length "numattrs". + * + * If any data element fails to convert to the input type specified for that + * attribute, then function will return a NULL Datum if elevel < ERROR. + */ +Datum +statext_mcv_import(int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + float8 *freqs, float8 *base_freqs) +{ + MCVList *mcvlist; + bytea *bytes; + VacAttrStats **vastats; + + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + (sizeof(MCVItem) * nitems)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */ + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + item->frequency = freqs[i]; + item->base_frequency = base_freqs[i]; + item->values = (Datum *) palloc0_array(Datum, numattrs); + item->isnull = (bool *) palloc0_array(bool, numattrs); + } + + /* + * Walk through each dimension, determine the input function for that + * type, and then attempt to convert all values in that column via that + * function. We approach this column-wise because it is simpler to deal + * with one input function at time, and possibly more cache-friendly. + */ + for (int j = 0; j < numattrs; j++) + { + FmgrInfo finfo; + Oid ioparam; + Oid infunc; + int index = j; + + getTypeInputInfo(atttypids[j], &infunc, &ioparam); + fmgr_info(infunc, &finfo); + + /* store info about data type OIDs */ + mcvlist->types[j] = atttypids[j]; + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + if (mcv_nulls[index]) + { + /* NULL value detected, hence no input to process */ + item->values[j] = (Datum) 0; + item->isnull[j] = true; + } + else + { + char *s = TextDatumGetCString(mcv_elems[index]); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j], + (Node *) &escontext, &item->values[j])) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse MCV element \"%s\": incorrect value", s))); + pfree(s); + goto error; + } + + pfree(s); + } + + index += numattrs; + } + } + + /* + * The function statext_mcv_serialize() requires an array of pointers to + * VacAttrStats records, but only a few fields within those records have + * to be filled out. + */ + vastats = (VacAttrStats **) palloc0_array(VacAttrStats *, numattrs); + + for (int i = 0; i < numattrs; i++) + { + Oid typid = atttypids[i]; + HeapTuple typtuple; + + typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid)); + + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + + vastats[i] = palloc0_object(VacAttrStats); + + vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + vastats[i]->attrtypid = typid; + vastats[i]->attrcollid = atttypcolls[i]; + } + + bytes = statext_mcv_serialize(mcvlist, vastats); + + for (int i = 0; i < numattrs; i++) + { + pfree(vastats[i]); + } + pfree((void *) vastats); + + pfree(mcv_elems); + pfree(mcv_nulls); + + if (bytes == NULL) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not import MCV list"))); + goto error; + } + + return PointerGetDatum(bytes); + +error: + statext_mcv_free(mcvlist); + return (Datum) 0; +} diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 078ee8500ad..2bebefd0ba2 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -18622,7 +18622,7 @@ dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) *-------- */ if (fout->remoteVersion >= 190000) - appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies "); + appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, "); else appendPQExpBufferStr(pq, "( " @@ -18646,7 +18646,17 @@ dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) " '" PG_DEPENDENCIES_KEY_DEGREE "', " " kv.value::double precision )) " "FROM json_each_text(e.dependencies::text::json) AS kv " - ") AS dependencies "); + ") AS dependencies, "); + + /* MCV was introduced v13 */ + if (fout->remoteVersion >= 130000) + appendPQExpBufferStr(pq, + "e.most_common_vals, e.most_common_freqs, " + "e.most_common_base_freqs "); + else + appendPQExpBufferStr(pq, + "NULL AS most_common_vals, NULL AS most_common_freqs, " + "NULL AS most_common_base_freqs "); /* pg_stats_ext introduced in v12 */ if (fout->remoteVersion >= 120000) @@ -18697,6 +18707,9 @@ dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) int i_inherited = PQfnumber(res, "inherited"); int i_ndistinct = PQfnumber(res, "n_distinct"); int i_dependencies = PQfnumber(res, "dependencies"); + int i_mcv = PQfnumber(res, "most_common_vals"); + int i_mcf = PQfnumber(res, "most_common_freqs"); + int i_mcbf = PQfnumber(res, "most_common_base_freqs"); for (int i = 0; i < nstats; i++) { @@ -18732,6 +18745,18 @@ dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) appendNamedArgument(out, fout, "dependencies", "pg_dependencies", PQgetvalue(res, i, i_dependencies)); + if (!PQgetisnull(res, i, i_mcv)) + appendNamedArgument(out, fout, "most_common_vals", "text[]", + PQgetvalue(res, i, i_mcv)); + + if (!PQgetisnull(res, i, i_mcf)) + appendNamedArgument(out, fout, "most_common_freqs", "double precision[]", + PQgetvalue(res, i, i_mcf)); + + if (!PQgetisnull(res, i, i_mcbf)) + appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]", + PQgetvalue(res, i, i_mcbf)); + appendPQExpBufferStr(out, "\n);\n"); } diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 54b4a26273d..c775442f2ee 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -90,6 +90,11 @@ extern MCVList *statext_mcv_build(StatsBuildData *data, extern bytea *statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats); extern MCVList *statext_mcv_deserialize(bytea *data); extern void statext_mcv_free(MCVList *mcvlist); +extern Datum statext_mcv_import(int elevel, int numattrs, Oid *atttypids, + int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, + bool *mcv_nulls, float8 *freqs, + float8 *base_freqs); extern MultiSortSupport multi_sort_init(int ndims); extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index 60b4c37cd6f..05279dda3f8 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -1147,12 +1147,18 @@ CREATE STATISTICS stats_import.test_stat_ndistinct (ndistinct) CREATE STATISTICS stats_import.test_stat_dependencies (dependencies) ON name, comp FROM stats_import.test; +CREATE STATISTICS stats_import.test_stat_mcv (mcv) + ON name, comp + FROM stats_import.test; CREATE STATISTICS stats_import.test_stat_ndistinct_exprs (ndistinct) ON lower(name), upper(name) FROM stats_import.test; CREATE STATISTICS stats_import.test_stat_dependencies_exprs (dependencies) ON lower(name), upper(name) FROM stats_import.test; +CREATE STATISTICS stats_import.test_stat_mcv_exprs (mcv) + ON lower(name), upper(name) + FROM stats_import.test; -- Generate statistics on table with data ANALYZE stats_import.test; CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) @@ -1876,6 +1882,21 @@ SELECT pg_catalog.pg_restore_extended_stats( t (1 row) +-- ok: MCV with expressions +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv_exprs', + 'inherited', false, + 'most_common_vals', '{{four,FOUR},{one,NULL},{NULL,TRE},{two,TWO}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.99}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.023,0.087}'::double precision[]); + pg_restore_extended_stats +--------------------------- + t +(1 row) + -- Check the presence of the restored stats, for each object. SELECT replace(e.n_distinct, '}, ', E'},\n') AS n_distinct FROM pg_stats_ext AS e @@ -1919,6 +1940,195 @@ WHERE e.statistics_schemaname = 'stats_import' AND {"attributes": [-2], "dependency": -1, "degree": 1.000000}] (1 row) +SELECT e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' AND + e.statistics_name = 'test_stat_mcv_exprs' AND + e.inherited = false \gx +-[ RECORD 1 ]----------+---------------------------------------------- +most_common_vals | {{four,FOUR},{one,NULL},{NULL,TRE},{two,TWO}} +most_common_val_nulls | {{f,f},{f,t},{t,f},{f,f}} +most_common_freqs | {0.25,0.25,0.25,0.99} +most_common_base_freqs | {0.0625,0.0625,0.023,0.087} + +-- Incorrect extended stats kind, mcv not supported +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_dependencies', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +WARNING: cannot specify parameters "most_common_vals", "most_common_freqs" or "most_common_base_freqs" +HINT: Extended statistics object "stats_import"."test_stat_dependencies" does not support statistics of this type. + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- MCV requires all three parameters +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +WARNING: could not use "most_common_vals", "most_common_freqs" and "most_common_base_freqs": missing one or more parameters + pg_restore_extended_stats +--------------------------- + f +(1 row) + +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +WARNING: could not use "most_common_vals", "most_common_freqs" and "most_common_base_freqs": missing one or more parameters + pg_restore_extended_stats +--------------------------- + f +(1 row) + +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[]); +WARNING: could not use "most_common_vals", "most_common_freqs" and "most_common_base_freqs": missing one or more parameters + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- most_common_vals that is not 2-D +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{four,NULL}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +WARNING: could not parse array "most_common_vals": incorrect number of dimensions (2 required) + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- most_common_freqs with length not matching with most_common_vals. +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +WARNING: could not parse array "most_common_freqs": incorrect number of elements (same as "most_common_vals" required) + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- most_common_base_freqs with length not matching most_common_vals. +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625}'::double precision[]); +WARNING: could not parse array "most_common_base_freqs": incorrect number of elements (same as "most_common_vals" required) + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- mcv attributes not matching object definition +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2}, + {tre,"(3,3.3,TRE,03-03-2003,)",-1,3}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[]); +WARNING: could not parse array "most_common_vals": found 4 attributes but expected 2 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok: mcv +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT replace(e.most_common_vals::text, '},', E'},\n ') AS mcvs, + e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' AND + e.statistics_name = 'test_stat_mcv' AND + e.inherited = false +\gx +-[ RECORD 1 ]----------+------------------------------------------------------------------ +mcvs | {{four,NULL}, + + | {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"},+ + | {tre,"(3,3.3,TRE,03-03-2003,)"}, + + | {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}} +most_common_val_nulls | {{f,t},{f,f},{f,f},{f,f}} +most_common_freqs | {0.25,0.25,0.25,0.25} +most_common_base_freqs | {0.0625,0.0625,0.0625,0.0625} + DROP SCHEMA stats_import CASCADE; NOTICE: drop cascades to 7 other objects DETAIL: drop cascades to type stats_import.complex_type diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql index 2a16d502e1c..ea41c41acb8 100644 --- a/src/test/regress/sql/stats_import.sql +++ b/src/test/regress/sql/stats_import.sql @@ -819,6 +819,10 @@ CREATE STATISTICS stats_import.test_stat_dependencies (dependencies) ON name, comp FROM stats_import.test; +CREATE STATISTICS stats_import.test_stat_mcv (mcv) + ON name, comp + FROM stats_import.test; + CREATE STATISTICS stats_import.test_stat_ndistinct_exprs (ndistinct) ON lower(name), upper(name) FROM stats_import.test; @@ -827,6 +831,10 @@ CREATE STATISTICS stats_import.test_stat_dependencies_exprs (dependencies) ON lower(name), upper(name) FROM stats_import.test; +CREATE STATISTICS stats_import.test_stat_mcv_exprs (mcv) + ON lower(name), upper(name) + FROM stats_import.test; + -- Generate statistics on table with data ANALYZE stats_import.test; @@ -1343,6 +1351,17 @@ SELECT pg_catalog.pg_restore_extended_stats( 'dependencies', '[{"attributes": [-1], "dependency": -2, "degree": 1.000000}, {"attributes": [-2], "dependency": -1, "degree": 1.000000}]'::pg_dependencies); +-- ok: MCV with expressions +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv_exprs', + 'inherited', false, + 'most_common_vals', '{{four,FOUR},{one,NULL},{NULL,TRE},{two,TWO}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.99}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.023,0.087}'::double precision[]); + -- Check the presence of the restored stats, for each object. SELECT replace(e.n_distinct, '}, ', E'},\n') AS n_distinct FROM pg_stats_ext AS e @@ -1368,4 +1387,133 @@ WHERE e.statistics_schemaname = 'stats_import' AND e.statistics_name = 'test_stat_dependencies_exprs' AND e.inherited = false; +SELECT e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' AND + e.statistics_name = 'test_stat_mcv_exprs' AND + e.inherited = false \gx + +-- Incorrect extended stats kind, mcv not supported +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_dependencies', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); + +-- MCV requires all three parameters +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[]); + +-- most_common_vals that is not 2-D +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{four,NULL}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); + +-- most_common_freqs with length not matching with most_common_vals. +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); + +-- most_common_base_freqs with length not matching most_common_vals. +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625}'::double precision[]); + +-- mcv attributes not matching object definition +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2}, + {tre,"(3,3.3,TRE,03-03-2003,)",-1,3}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[]); + +-- ok: mcv +SELECT pg_catalog.pg_restore_extended_stats( + 'schemaname', 'stats_import', + 'relname', 'test', + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_mcv', + 'inherited', false, + 'most_common_vals', '{{four,NULL}, + {one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")"}, + {tre,"(3,3.3,TRE,03-03-2003,)"}, + {two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")"}}'::text[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]); + +SELECT replace(e.most_common_vals::text, '},', E'},\n ') AS mcvs, + e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' AND + e.statistics_name = 'test_stat_mcv' AND + e.inherited = false +\gx + DROP SCHEMA stats_import CASCADE;