]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Teach planner about the idea that a mergejoin won't necessarily read
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 1 Mar 2002 04:09:28 +0000 (04:09 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 1 Mar 2002 04:09:28 +0000 (04:09 +0000)
both input streams to the end.  If one variable's range is much less
than the other, an indexscan-based merge can win by not scanning all
of the other table.  Per example from Reinhard Max.

src/backend/executor/nodeMergejoin.c
src/backend/optimizer/path/costsize.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/lsyscache.c
src/include/utils/lsyscache.h
src/include/utils/selfuncs.h

index b0c1eb90a7f8e99730ca58675a7f4a72095ffcdf..41f0e99693f4bd7a51fb93dc590dc8cd690ffe67 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.47 2001/10/28 06:25:43 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.48 2002/03/01 04:09:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -88,97 +88,62 @@ static bool MergeCompare(List *eqQual, List *compareQual, ExprContext *econtext)
 
 
 /* ----------------------------------------------------------------
- *             MJFormSkipQual
+ *             MJFormSkipQuals
  *
  *             This takes the mergeclause which is a qualification of the
- *             form ((= expr expr) (= expr expr) ...) and forms a new
- *             qualification like ((> expr expr) (> expr expr) ...) which
- *             is used by ExecMergeJoin() in order to determine if we should
- *             skip tuples.  The replacement operators are named either ">"
- *             or "<" according to the replaceopname parameter, and have the
- *             same operand data types as the "=" operators they replace.
- *             (We expect there to be such operators because the "=" operators
+ *             form ((= expr expr) (= expr expr) ...) and forms new lists
+ *             of the forms ((< expr expr) (< expr expr) ...) and
+ *             ((> expr expr) (> expr expr) ...).  These lists will be used
+ *             by ExecMergeJoin() to determine if we should skip tuples.
+ *             (We expect there to be suitable operators because the "=" operators
  *             were marked mergejoinable; however, there might be a different
  *             one needed in each qual clause.)
  * ----------------------------------------------------------------
  */
-static List *
-MJFormSkipQual(List *qualList, char *replaceopname)
+static void
+MJFormSkipQuals(List *qualList, List **ltQuals, List **gtQuals)
 {
-       List       *qualCopy;
-       List       *qualcdr;
-       Expr       *qual;
-       Oper       *op;
-       HeapTuple       optup;
-       Form_pg_operator opform;
-       Oid                     oprleft,
-                               oprright;
+       List       *ltcdr,
+                          *gtcdr;
 
        /*
-        * qualList is a list: ((op .. ..) ...)
-        *
-        * first we make a copy of it.  copyObject() makes a deep copy so let's
-        * use it instead of the old fashoned lispCopy()...
+        * Make modifiable copies of the qualList.
         */
-       qualCopy = (List *) copyObject((Node *) qualList);
+       *ltQuals = (List *) copyObject((Node *) qualList);
+       *gtQuals = (List *) copyObject((Node *) qualList);
 
-       foreach(qualcdr, qualCopy)
+       /*
+        * Scan both lists in parallel, so that we can update the operators
+        * with the minimum number of syscache searches.
+        */
+       ltcdr = *ltQuals;
+       foreach(gtcdr, *gtQuals)
        {
-               /*
-                * first get the current (op .. ..) list
-                */
-               qual = lfirst(qualcdr);
+               Expr       *ltqual = (Expr *) lfirst(ltcdr);
+               Expr       *gtqual = (Expr *) lfirst(gtcdr);
+               Oper       *ltop = (Oper *) ltqual->oper;
+               Oper       *gtop = (Oper *) gtqual->oper;
 
                /*
-                * now get at the op
+                * The two ops should be identical, so use either one for lookup.
                 */
-               op = (Oper *) qual->oper;
-               if (!IsA(op, Oper))
-                       elog(ERROR, "MJFormSkipQual: op not an Oper!");
+               if (!IsA(ltop, Oper))
+                       elog(ERROR, "MJFormSkipQuals: op not an Oper!");
 
                /*
-                * Get the declared left and right operand types of the operator.
-                * Note we do *not* use the actual operand types, since those
-                * might be different in scenarios with binary-compatible data
-                * types. There should be "<" and ">" operators matching a
-                * mergejoinable "=" operator's declared operand types, but we
-                * might not find them if we search with the actual operand types.
+                * Lookup the operators, and replace the data in the copied
+                * operator nodes.
                 */
-               optup = SearchSysCache(OPEROID,
-                                                          ObjectIdGetDatum(op->opno),
-                                                          0, 0, 0);
-               if (!HeapTupleIsValid(optup))   /* shouldn't happen */
-                       elog(ERROR, "MJFormSkipQual: operator %u not found", op->opno);
-               opform = (Form_pg_operator) GETSTRUCT(optup);
-               oprleft = opform->oprleft;
-               oprright = opform->oprright;
-               ReleaseSysCache(optup);
-
-               /*
-                * Now look up the matching "<" or ">" operator.  If there isn't
-                * one, whoever marked the "=" operator mergejoinable was a loser.
-                */
-               optup = SearchSysCache(OPERNAME,
-                                                          PointerGetDatum(replaceopname),
-                                                          ObjectIdGetDatum(oprleft),
-                                                          ObjectIdGetDatum(oprright),
-                                                          CharGetDatum('b'));
-               if (!HeapTupleIsValid(optup))
-                       elog(ERROR,
-                       "MJFormSkipQual: mergejoin operator %u has no matching %s op",
-                                op->opno, replaceopname);
-               opform = (Form_pg_operator) GETSTRUCT(optup);
-
-               /*
-                * And replace the data in the copied operator node.
-                */
-               op->opno = optup->t_data->t_oid;
-               op->opid = opform->oprcode;
-               op->op_fcache = NULL;
-               ReleaseSysCache(optup);
+               op_mergejoin_crossops(ltop->opno,
+                                                         &ltop->opno,
+                                                         &gtop->opno,
+                                                         &ltop->opid,
+                                                         &gtop->opid);
+               ltop->op_fcache = NULL;
+               gtop->op_fcache = NULL;
+
+               ltcdr = lnext(ltcdr);
        }
-
-       return qualCopy;
 }
 
 /* ----------------------------------------------------------------
@@ -1430,7 +1395,6 @@ bool
 ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
 {
        MergeJoinState *mergestate;
-       List       *joinclauses;
 
        MJ1_printf("ExecInitMergeJoin: %s\n",
                           "initializing node");
@@ -1522,9 +1486,9 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
        /*
         * form merge skip qualifications
         */
-       joinclauses = node->mergeclauses;
-       mergestate->mj_OuterSkipQual = MJFormSkipQual(joinclauses, "<");
-       mergestate->mj_InnerSkipQual = MJFormSkipQual(joinclauses, ">");
+       MJFormSkipQuals(node->mergeclauses,
+                                       &mergestate->mj_OuterSkipQual,
+                                       &mergestate->mj_InnerSkipQual);
 
        MJ_printf("\nExecInitMergeJoin: OuterSkipQual is ");
        MJ_nodeDisplay(mergestate->mj_OuterSkipQual);
index 4cfddf9e6ce700451acdcb5dbeaeb4254e698c6f..e50664cf66be4d709da9e97a819209de232983d1 100644 (file)
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.79 2001/10/25 05:49:32 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.80 2002/03/01 04:09:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,6 +58,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "parser/parsetree.h"
+#include "utils/selfuncs.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -565,12 +566,29 @@ cost_mergejoin(Path *path, Query *root,
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
+       double          outer_rows,
+                               inner_rows;
        double          ntuples;
+       Selectivity     leftscan,
+                               rightscan;
        Path            sort_path;              /* dummy for result of cost_sort */
 
        if (!enable_mergejoin)
                startup_cost += disable_cost;
 
+       /*
+        * A merge join will stop as soon as it exhausts either input stream.
+        * Estimate fraction of the left and right inputs that will actually
+        * need to be scanned.  We use only the first (most significant)
+        * merge clause for this purpose.
+        */
+       mergejoinscansel(root,
+                                        (Node *) ((RestrictInfo *) lfirst(mergeclauses))->clause,
+                                        &leftscan, &rightscan);
+
+       outer_rows = outer_path->parent->rows * leftscan;
+       inner_rows = inner_path->parent->rows * rightscan;
+
        /* cost of source data */
 
        /*
@@ -588,12 +606,14 @@ cost_mergejoin(Path *path, Query *root,
                                  outer_path->parent->rows,
                                  outer_path->parent->width);
                startup_cost += sort_path.startup_cost;
-               run_cost += sort_path.total_cost - sort_path.startup_cost;
+               run_cost += (sort_path.total_cost - sort_path.startup_cost)
+                       * leftscan;
        }
        else
        {
                startup_cost += outer_path->startup_cost;
-               run_cost += outer_path->total_cost - outer_path->startup_cost;
+               run_cost += (outer_path->total_cost - outer_path->startup_cost)
+                       * leftscan;
        }
 
        if (innersortkeys)                      /* do we need to sort inner? */
@@ -605,30 +625,33 @@ cost_mergejoin(Path *path, Query *root,
                                  inner_path->parent->rows,
                                  inner_path->parent->width);
                startup_cost += sort_path.startup_cost;
-               run_cost += sort_path.total_cost - sort_path.startup_cost;
+               run_cost += (sort_path.total_cost - sort_path.startup_cost)
+                       * rightscan;
        }
        else
        {
                startup_cost += inner_path->startup_cost;
-               run_cost += inner_path->total_cost - inner_path->startup_cost;
+               run_cost += (inner_path->total_cost - inner_path->startup_cost)
+                       * rightscan;
        }
 
        /*
         * The number of tuple comparisons needed depends drastically on the
         * number of equal keys in the two source relations, which we have no
-        * good way of estimating.      Somewhat arbitrarily, we charge one tuple
+        * good way of estimating.  (XXX could the MCV statistics help?)
+        * Somewhat arbitrarily, we charge one tuple
         * comparison (one cpu_operator_cost) for each tuple in the two source
         * relations.  This is probably a lower bound.
         */
-       run_cost += cpu_operator_cost *
-               (outer_path->parent->rows + inner_path->parent->rows);
+       run_cost += cpu_operator_cost * (outer_rows + inner_rows);
 
        /*
         * For each tuple that gets through the mergejoin proper, we charge
         * cpu_tuple_cost plus the cost of evaluating additional restriction
         * clauses that are to be applied at the join.  It's OK to use an
         * approximate selectivity here, since in most cases this is a minor
-        * component of the cost.
+        * component of the cost.  NOTE: it's correct to use the unscaled rows
+        * counts here, not the scaled-down counts we obtained above.
         */
        ntuples = approx_selectivity(root, mergeclauses) *
                outer_path->parent->rows * inner_path->parent->rows;
index edcfe91a47dc701dff53839eeaec298e1e29136d..6f08deec5a1dea05897bb39dee72f4f4e4a23383 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.103 2002/01/03 04:02:34 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.104 2002/03/01 04:09:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "parser/parsetree.h"
 #include "utils/builtins.h"
 #include "utils/date.h"
+#include "utils/datum.h"
 #include "utils/int8.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
 
+
 /*
  * Note: the default selectivity estimates are not chosen entirely at random.
  * We want them to be small enough to ensure that indexscans will be used if
        } while (0)
 
 
+static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
@@ -419,7 +422,9 @@ neqsel(PG_FUNCTION_ARGS)
  *
  * This is the guts of both scalarltsel and scalargtsel.  The caller has
  * commuted the clause, if necessary, so that we can treat the Var as
- * being on the left.
+ * being on the left.  The caller must also make sure that the other side
+ * of the clause is a non-null Const, and dissect same into a value and
+ * datatype.
  *
  * This routine works for any datatype (or pair of datatypes) known to
  * convert_to_scalar().  If it is applied to some other datatype,
@@ -427,11 +432,9 @@ neqsel(PG_FUNCTION_ARGS)
  */
 static double
 scalarineqsel(Query *root, Oid operator, bool isgt,
-                         Var *var, Node *other)
+                         Var *var, Datum constval, Oid consttype)
 {
        Oid                     relid;
-       Datum           constval;
-       Oid                     consttype;
        HeapTuple       statsTuple;
        Form_pg_statistic stats;
        FmgrInfo        opproc;
@@ -454,22 +457,6 @@ scalarineqsel(Query *root, Oid operator, bool isgt,
        if (relid == InvalidOid)
                return DEFAULT_INEQ_SEL;
 
-       /*
-        * Can't do anything useful if the something is not a constant,
-        * either.
-        */
-       if (!IsA(other, Const))
-               return DEFAULT_INEQ_SEL;
-
-       /*
-        * If the constant is NULL, assume operator is strict and return zero,
-        * ie, operator will never return TRUE.
-        */
-       if (((Const *) other)->constisnull)
-               return 0.0;
-       constval = ((Const *) other)->constvalue;
-       consttype = ((Const *) other)->consttype;
-
        /* get stats for the attribute */
        statsTuple = SearchSysCache(STATRELATT,
                                                                ObjectIdGetDatum(relid),
@@ -697,6 +684,8 @@ scalarltsel(PG_FUNCTION_ARGS)
        int                     varRelid = PG_GETARG_INT32(3);
        Var                *var;
        Node       *other;
+       Datum           constval;
+       Oid                     consttype;
        bool            varonleft;
        bool            isgt;
        double          selec;
@@ -710,6 +699,22 @@ scalarltsel(PG_FUNCTION_ARGS)
                                                         &var, &other, &varonleft))
                PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
+       /*
+        * Can't do anything useful if the something is not a constant,
+        * either.
+        */
+       if (!IsA(other, Const))
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+       /*
+        * If the constant is NULL, assume operator is strict and return zero,
+        * ie, operator will never return TRUE.
+        */
+       if (((Const *) other)->constisnull)
+               PG_RETURN_FLOAT8(0.0);
+       constval = ((Const *) other)->constvalue;
+       consttype = ((Const *) other)->consttype;
+
        /*
         * Force the var to be on the left to simplify logic in scalarineqsel.
         */
@@ -730,7 +735,7 @@ scalarltsel(PG_FUNCTION_ARGS)
                isgt = true;
        }
 
-       selec = scalarineqsel(root, operator, isgt, var, other);
+       selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
 
        PG_RETURN_FLOAT8((float8) selec);
 }
@@ -747,6 +752,8 @@ scalargtsel(PG_FUNCTION_ARGS)
        int                     varRelid = PG_GETARG_INT32(3);
        Var                *var;
        Node       *other;
+       Datum           constval;
+       Oid                     consttype;
        bool            varonleft;
        bool            isgt;
        double          selec;
@@ -760,6 +767,22 @@ scalargtsel(PG_FUNCTION_ARGS)
                                                         &var, &other, &varonleft))
                PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
+       /*
+        * Can't do anything useful if the something is not a constant,
+        * either.
+        */
+       if (!IsA(other, Const))
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+       /*
+        * If the constant is NULL, assume operator is strict and return zero,
+        * ie, operator will never return TRUE.
+        */
+       if (((Const *) other)->constisnull)
+               PG_RETURN_FLOAT8(0.0);
+       constval = ((Const *) other)->constvalue;
+       consttype = ((Const *) other)->consttype;
+
        /*
         * Force the var to be on the left to simplify logic in scalarineqsel.
         */
@@ -780,7 +803,7 @@ scalargtsel(PG_FUNCTION_ARGS)
                isgt = false;
        }
 
-       selec = scalarineqsel(root, operator, isgt, var, other);
+       selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
 
        PG_RETURN_FLOAT8((float8) selec);
 }
@@ -1696,6 +1719,229 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
        PG_RETURN_FLOAT8(result);
 }
 
+/*
+ * mergejoinscansel                    - Scan selectivity of merge join.
+ *
+ * A merge join will stop as soon as it exhausts either input stream.
+ * Therefore, if we can estimate the ranges of both input variables,
+ * we can estimate how much of the input will actually be read.  This
+ * can have a considerable impact on the cost when using indexscans.
+ *
+ * clause should be a clause already known to be mergejoinable.
+ *
+ * *leftscan is set to the fraction of the left-hand variable expected
+ * to be scanned (0 to 1), and similarly *rightscan for the right-hand
+ * variable.
+ */
+void
+mergejoinscansel(Query *root, Node *clause,
+                                Selectivity *leftscan,
+                                Selectivity *rightscan)
+{
+       Var                *left,
+                          *right;
+       Oid                     opno,
+                               lsortop,
+                               rsortop,
+                               ltop,
+                               gtop,
+                               revltop;
+       Datum           leftmax,
+                               rightmax;
+       double          selec;
+
+       /* Set default results if we can't figure anything out. */
+       *leftscan = *rightscan = 1.0;
+
+       /* Deconstruct the merge clause */
+       if (!is_opclause(clause))
+               return;                                 /* shouldn't happen */
+       opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+       left = get_leftop((Expr *) clause);
+       right = get_rightop((Expr *) clause);
+       if (!right)
+               return;                                 /* shouldn't happen */
+
+       /* Can't do anything if inputs are not Vars */
+       if (!IsA(left, Var) ||!IsA(right, Var))
+               return;
+
+       /* Verify mergejoinability and get left and right "<" operators */
+       if (!op_mergejoinable(opno,
+                                                 left->vartype,
+                                                 right->vartype,
+                                                 &lsortop,
+                                                 &rsortop))
+               return;                                 /* shouldn't happen */
+
+       /* Try to get maximum values of both vars */
+       if (!get_var_maximum(root, left, lsortop, &leftmax))
+               return;                                 /* no max available from stats */
+
+       if (!get_var_maximum(root, right, rsortop, &rightmax))
+               return;                                 /* no max available from stats */
+
+       /* Look up the "left < right" and "left > right" operators */
+       op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);
+
+       /* Look up the "right < left" operator */
+       revltop = get_commutator(gtop);
+       if (!OidIsValid(revltop))
+               return;                                 /* shouldn't happen */
+
+       /*
+        * Now, the fraction of the left variable that will be scanned is the
+        * fraction that's <= the right-side maximum value.  But only believe
+        * non-default estimates, else stick with our 1.0.
+        */
+       selec = scalarineqsel(root, ltop, false, left,
+                                                 rightmax, right->vartype);
+       if (selec != DEFAULT_INEQ_SEL)
+               *leftscan = selec;
+
+       /* And similarly for the right variable. */
+       selec = scalarineqsel(root, revltop, false, right,
+                                                 leftmax, left->vartype);
+       if (selec != DEFAULT_INEQ_SEL)
+               *rightscan = selec;
+
+       /*
+        * Only one of the two fractions can really be less than 1.0; believe
+        * the smaller estimate and reset the other one to exactly 1.0.
+        */
+       if (*leftscan > *rightscan)
+               *leftscan = 1.0;
+       else
+               *rightscan = 1.0;
+}
+
+/*
+ * get_var_maximum
+ *             Estimate the maximum value of the specified variable.
+ *             If successful, store value in *max and return TRUE.
+ *             If no data available, return FALSE.
+ *
+ * sortop is the "<" comparison operator to use.  (To extract the
+ * minimum instead of the maximum, just pass the ">" operator instead.)
+ */
+static bool
+get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max)
+{
+       Datum           tmax = 0;
+       bool            have_max = false;
+       Oid                     relid;
+       HeapTuple       statsTuple;
+       Form_pg_statistic stats;
+       int16           typLen;
+       bool            typByVal;
+       Datum      *values;
+       int                     nvalues;
+       int                     i;
+
+       relid = getrelid(var->varno, root->rtable);
+       if (relid == InvalidOid)
+               return false;
+
+       /* get stats for the attribute */
+       statsTuple = SearchSysCache(STATRELATT,
+                                                               ObjectIdGetDatum(relid),
+                                                               Int16GetDatum(var->varattno),
+                                                               0, 0);
+       if (!HeapTupleIsValid(statsTuple))
+       {
+               /* no stats available, so default result */
+               return false;
+       }
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
+
+       get_typlenbyval(var->vartype, &typLen, &typByVal);
+
+       /*
+        * If there is a histogram, grab the last or first value as appropriate.
+        *
+        * If there is a histogram that is sorted with some other operator
+        * than the one we want, fail --- this suggests that there is data
+        * we can't use.
+        */
+       if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
+                                                STATISTIC_KIND_HISTOGRAM, sortop,
+                                                &values, &nvalues,
+                                                NULL, NULL))
+       {
+               if (nvalues > 0)
+               {
+                       tmax = datumCopy(values[nvalues-1], typByVal, typLen);
+                       have_max = true;
+               }
+               free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
+       }
+       else
+       {
+               Oid             rsortop = get_commutator(sortop);
+
+               if (OidIsValid(rsortop) &&
+                       get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
+                                                        STATISTIC_KIND_HISTOGRAM, rsortop,
+                                                        &values, &nvalues,
+                                                        NULL, NULL))
+               {
+                       if (nvalues > 0)
+                       {
+                               tmax = datumCopy(values[0], typByVal, typLen);
+                               have_max = true;
+                       }
+                       free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
+               }
+               else if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
+                                                                 STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                                                                 &values, &nvalues,
+                                                                 NULL, NULL))
+               {
+                       free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
+                       ReleaseSysCache(statsTuple);
+                       return false;
+               }
+       }
+
+       /*
+        * If we have most-common-values info, look for a large MCV.  This
+        * is needed even if we also have a histogram, since the histogram
+        * excludes the MCVs.  However, usually the MCVs will not be the
+        * extreme values, so avoid unnecessary data copying.
+        */
+       if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
+                                                STATISTIC_KIND_MCV, InvalidOid,
+                                                &values, &nvalues,
+                                                NULL, NULL))
+       {
+               bool    large_mcv = false;
+               FmgrInfo        opproc;
+
+               fmgr_info(get_opcode(sortop), &opproc);
+
+               for (i = 0; i < nvalues; i++)
+               {
+                       if (!have_max)
+                       {
+                               tmax = values[i];
+                               large_mcv = have_max = true;
+                       }
+                       else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
+                       {
+                               tmax = values[i];
+                               large_mcv = true;
+                       }
+               }
+               if (large_mcv)
+                       tmax = datumCopy(tmax, typByVal, typLen);
+               free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
+       }
+
+       ReleaseSysCache(statsTuple);
+
+       *max = tmax;
+       return have_max;
+}
 
 /*
  * convert_to_scalar
index b359651b9ba5d5c9b23f6b1484f0725fe87a744a..706397d28c6a25aa328a463e0eb6db9e4c96c5d2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.59 2001/10/25 05:49:46 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.60 2002/03/01 04:09:26 tgl Exp $
  *
  * NOTES
  *       Eventually, the index information should go through here, too.
@@ -369,6 +369,76 @@ op_mergejoinable(Oid opno, Oid ltype, Oid rtype, Oid *leftOp, Oid *rightOp)
        return result;
 }
 
+/*
+ * op_mergejoin_crossops
+ *
+ *             Returns the cross-type comparison operators (ltype "<" rtype and
+ *             ltype ">" rtype) for an operator previously determined to be
+ *             mergejoinable.  Optionally, fetches the regproc ids of these
+ *             operators, as well as their operator OIDs.
+ *
+ * Raises error if operators cannot be found.  Assuming that the operator
+ * had indeed been marked mergejoinable, this indicates that whoever marked
+ * it so was mistaken.
+ */
+void
+op_mergejoin_crossops(Oid opno, Oid *ltop, Oid *gtop,
+                                         RegProcedure *ltproc, RegProcedure *gtproc)
+{
+       HeapTuple       tp;
+       Form_pg_operator optup;
+       Oid                     oprleft,
+                               oprright;
+
+       /*
+        * Get the declared left and right operand types of the operator.
+        */
+       tp = SearchSysCache(OPEROID,
+                                               ObjectIdGetDatum(opno),
+                                               0, 0, 0);
+       if (!HeapTupleIsValid(tp))      /* shouldn't happen */
+               elog(ERROR, "op_mergejoin_crossops: operator %u not found", opno);
+       optup = (Form_pg_operator) GETSTRUCT(tp);
+       oprleft = optup->oprleft;
+       oprright = optup->oprright;
+       ReleaseSysCache(tp);
+
+       /*
+        * Look up the "<" operator with the same input types.  If there isn't
+        * one, whoever marked the "=" operator mergejoinable was a loser.
+        */
+       tp = SearchSysCache(OPERNAME,
+                                               PointerGetDatum("<"),
+                                               ObjectIdGetDatum(oprleft),
+                                               ObjectIdGetDatum(oprright),
+                                               CharGetDatum('b'));
+       if (!HeapTupleIsValid(tp))
+               elog(ERROR, "op_mergejoin_crossops: mergejoin operator %u has no matching < operator",
+                        opno);
+       optup = (Form_pg_operator) GETSTRUCT(tp);
+       *ltop = tp->t_data->t_oid;
+       if (ltproc)
+               *ltproc = optup->oprcode;
+       ReleaseSysCache(tp);
+
+       /*
+        * And the same for the ">" operator.
+        */
+       tp = SearchSysCache(OPERNAME,
+                                               PointerGetDatum(">"),
+                                               ObjectIdGetDatum(oprleft),
+                                               ObjectIdGetDatum(oprright),
+                                               CharGetDatum('b'));
+       if (!HeapTupleIsValid(tp))
+               elog(ERROR, "op_mergejoin_crossops: mergejoin operator %u has no matching > operator",
+                        opno);
+       optup = (Form_pg_operator) GETSTRUCT(tp);
+       *gtop = tp->t_data->t_oid;
+       if (gtproc)
+               *gtproc = optup->oprcode;
+       ReleaseSysCache(tp);
+}
+
 /*
  * op_hashjoinable
  *
index 97de45594647e05ecda10ece768f970ee41f20f7..c76c8d53d88030dcd2216c73213bb01c3e932a1b 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.39 2001/11/05 17:46:36 momjian Exp $
+ * $Id: lsyscache.h,v 1.40 2002/03/01 04:09:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,6 +29,8 @@ extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
                                 Oid *leftOp, Oid *rightOp);
+extern void op_mergejoin_crossops(Oid opno, Oid *ltop, Oid *gtop,
+                                RegProcedure *ltproc, RegProcedure *gtproc);
 extern Oid     op_hashjoinable(Oid opno, Oid ltype, Oid rtype);
 extern bool op_iscachable(Oid opno);
 extern Oid     get_commutator(Oid opno);
index da5d4d08286bed58f5db2e0b80c115a966b5d556..87978ade799dbd5e307295b24a442de3b5ee7ecd 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.4 2001/11/05 17:46:36 momjian Exp $
+ * $Id: selfuncs.h,v 1.5 2002/03/01 04:09:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,8 +66,12 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
 extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
 
-Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
-Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
+extern Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
+extern Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
+
+extern void mergejoinscansel(Query *root, Node *clause,
+                                                        Selectivity *leftscan,
+                                                        Selectivity *rightscan);
 
 extern Datum btcostestimate(PG_FUNCTION_ARGS);
 extern Datum rtcostestimate(PG_FUNCTION_ARGS);