]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Improving various checks by Heikki Linnakangas <heikki@enterprisedb.com>
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 7 Sep 2007 15:35:11 +0000 (15:35 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 7 Sep 2007 15:35:11 +0000 (15:35 +0000)
- add code to check that the query tree is well-formed. It was indeed
  possible to send malformed queries in binary mode, which produced all
  kinds of strange results.

- make the left-field a uint32. There's no reason to
  arbitrarily limit it to 16-bits, and it won't increase the disk/memory
  footprint either now that QueryOperator and QueryOperand are separate
  structs.

- add check_stack_depth() call to all recursive functions I found.
  Some of them might have a natural limit so that you can't force
  arbitrarily deep recursions, but check_stack_depth() is cheap enough
  that seems best to just stick it into anything that might be a problem.

src/backend/utils/adt/tsquery.c
src/backend/utils/adt/tsquery_cleanup.c
src/backend/utils/adt/tsquery_rewrite.c
src/backend/utils/adt/tsquery_util.c
src/backend/utils/adt/tsrank.c
src/include/tsearch/ts_type.h

index 27b93eb64d7725fa0ec9795a31fd576e188ae13d..83f2c602a81e17b5a8e3ff8dd7b878295617b359 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.4 2007/09/07 15:35:10 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/memutils.h"
 #include "utils/pg_crc.h"
+#include "nodes/bitmapset.h"
 
 
 struct TSQueryParserStateData
@@ -388,14 +389,14 @@ makepol(TSQueryParserState state,
  * QueryItems must be in polish (prefix) notation. 
  */
 static void
-findoprnd(QueryItem *ptr, int *pos)
+findoprnd(QueryItem *ptr, uint32 *pos)
 {
        /* since this function recurses, it could be driven to stack overflow. */
        check_stack_depth();
 
        if (ptr[*pos].type == QI_VAL ||
                ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
-                                                                          * they haven't been cleansed
+                                                                          * they haven't been cleaned
                                                                           * away yet.
                                                                           */
        {
@@ -451,7 +452,7 @@ parse_tsquery(char *buf,
        TSQuery         query;
        int                     commonlen;
        QueryItem  *ptr;
-       int                     pos = 0;
+       uint32          pos = 0;
        ListCell   *cell;
 
        /* init state */
@@ -792,6 +793,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
        QueryItem  *item;
        int                     datalen = 0;
        char       *ptr;
+       Bitmapset  *parentset = NULL;
 
        size = pq_getmsgint(buf, sizeof(uint32));
        if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
@@ -799,7 +801,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 
        len = HDRSIZETQ + sizeof(QueryItem) * size;
 
-       query = (TSQuery) palloc(len);
+       query = (TSQuery) palloc0(len);
        query->size = size;
        item = GETQUERY(query);
 
@@ -814,6 +816,15 @@ tsqueryrecv(PG_FUNCTION_ARGS)
                                item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
                                item->operand.length = pq_getmsgint(buf, sizeof(int16));
 
+                               /* Check that the weight bitmap is valid */
+                               if (item->operand.weight < 0 || item->operand.weight > 0xF)
+                                       elog(ERROR, "invalid weight bitmap");
+
+                               /* XXX: We don't check that the CRC is valid. Actually, if we
+                                * bothered to calculate it to verify, there would be no need
+                                * to transfer it.
+                                */
+
                                /*
                                 * Check that datalen doesn't grow too large. Without the
                                 * check, a malicious client could induce a buffer overflow
@@ -828,7 +839,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
                                        elog(ERROR, "invalid tsquery; total operand length exceeded");
 
                                /* We can calculate distance from datalen, no need to send it
-                                * through the wire. If we did, we would have to check that
+                                * across the wire. If we did, we would have to check that
                                 * it's valid anyway.
                                 */
                                item->operand.distance = datalen;
@@ -842,24 +853,41 @@ tsqueryrecv(PG_FUNCTION_ARGS)
                                        item->operator.oper != OP_OR &&
                                        item->operator.oper != OP_AND)
                                        elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+
+                               /*
+                                * Check that no previous operator node points to the right
+                                * operand. That would mean that the operand node
+                                * has two parents.
+                                */
+                               if (bms_is_member(i + 1, parentset))
+                                       elog(ERROR, "malformed query tree");
+
+                               parentset = bms_add_member(parentset, i + 1);
+
                                if(item->operator.oper != OP_NOT)
                                {
-                                       item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+                                       uint32 left = (uint32) pq_getmsgint(buf, sizeof(uint32));
+
                                        /*
-                                        * Sanity checks
+                                        * Right operand is implicitly at "this+1". Don't allow
+                                        * left to point to the right operand, or to self.
                                         */
-                                       if (item->operator.left <= 0 || i + item->operator.left >= size)
+                                       if (left <= 1 || i + left >= size)
                                                elog(ERROR, "invalid pointer to left operand");
 
-                                       /* XXX: Though there's no way to construct a TSQuery that's
-                                        * not in polish notation, we don't enforce that for
-                                        * queries received from client in binary mode. Is there
-                                        * anything that relies on it?
-                                        *
-                                        * XXX: The tree could be malformed in other ways too,
-                                        * a node could have two parents, for example.
+                                       /*
+                                        * Check that no previous operator node points to the left
+                                        * operand.
                                         */
+                                       if (bms_is_member(i + left, parentset))
+                                               elog(ERROR, "malformed query tree");
+
+                                       parentset = bms_add_member(parentset, i + left);
+
+                                       item->operator.left = left;
                                }
+                               else
+                                       item->operator.left = 1; /* do not leave uninitialized fields */
 
                                if (i == size - 1)
                                        elog(ERROR, "invalid pointer to right operand");
@@ -871,6 +899,13 @@ tsqueryrecv(PG_FUNCTION_ARGS)
                item++;
        }
 
+       /* Now check that each node, except the root, has a parent. We
+        * already checked above that no node has more than one parent. */
+       if (bms_num_members(parentset) != size - 1 && size != 0)
+               elog(ERROR, "malformed query tree");
+
+       bms_free( parentset );
+
        query = (TSQuery) repalloc(query, len + datalen);
 
        item = GETQUERY(query);
index 22e6f7c8198918aac94ef73273fc9f90ef2bea77..6ab58228b59244eeed2f260eb1f3efa46cfe4678 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,6 +57,9 @@ typedef struct
 static void
 plainnode(PLAINTREE * state, NODE * node)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (state->cur == state->len)
        {
                state->len *= 2;
@@ -107,6 +110,9 @@ plaintree(NODE * root, int *len)
 static void
 freetree(NODE * node)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (!node)
                return;
        if (node->left)
@@ -125,6 +131,9 @@ freetree(NODE * node)
 static NODE *
 clean_NOT_intree(NODE * node)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (node->valnode->type == QI_VAL)
                return node;
 
@@ -203,6 +212,9 @@ clean_fakeval_intree(NODE * node, char *result)
        char            lresult = V_UNKNOWN,
                                rresult = V_UNKNOWN;
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (node->valnode->type == QI_VAL)
                return node;
        else 
index db2fe6c53ef91681bc9d39b98c1249d80ef3e103..d733a51d8e378dc9154d6916678f8df0f4b81ad4 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,9 @@
 static int
 addone(int *counters, int last, int total)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        counters[last]++;
        if (counters[last] >= total)
        {
@@ -173,6 +176,9 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
 static QTNode *
 dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        root = findeq(root, ex, subs, isfind);
 
        if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
index e378661488bd8604958c9802b18b4c51333ad6f5..60de44cc6f68d67efc2dc073099c7f4181d87740 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,9 @@ QT2QTN(QueryItem * in, char *operand)
 {
        QTNode     *node = (QTNode *) palloc0(sizeof(QTNode));
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        node->valnode = in;
 
        if (in->type == QI_OPR)
@@ -53,6 +56,9 @@ QTNFree(QTNode * in)
        if (!in)
                return;
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
                pfree(in->word);
 
@@ -79,6 +85,9 @@ QTNFree(QTNode * in)
 int
 QTNodeCompare(QTNode * an, QTNode * bn)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (an->valnode->type != bn->valnode->type)
                return (an->valnode->type > bn->valnode->type) ? -1 : 1;
        
@@ -133,6 +142,9 @@ QTNSort(QTNode * in)
 {
        int                     i;
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (in->valnode->type != QI_OPR)
                return;
 
@@ -165,6 +177,9 @@ QTNTernary(QTNode * in)
 {
        int                     i;
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (in->valnode->type != QI_OPR)
                return;
 
@@ -205,6 +220,9 @@ QTNBinary(QTNode * in)
 {
        int                     i;
 
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (in->valnode->type != QI_OPR)
                return;
 
@@ -244,6 +262,9 @@ QTNBinary(QTNode * in)
 static void
 cntsize(QTNode * in, int *sumlen, int *nnode)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        *nnode += 1;
        if (in->valnode->type == QI_OPR)
        {
@@ -268,6 +289,9 @@ typedef struct
 static void
 fillQT(QTN2QTState *state, QTNode *in)
 {
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
        if (in->valnode->type == QI_VAL)
        {
                memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
@@ -325,7 +349,12 @@ QTN2QT(QTNode *in)
 QTNode *
 QTNCopy(QTNode *in)
 {
-       QTNode     *out = (QTNode *) palloc(sizeof(QTNode));
+       QTNode     *out;
+
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
+       out = (QTNode *) palloc(sizeof(QTNode));
 
        *out = *in;
        out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
index d48e9b4a470be27d2843789fe27487c978e4e796..30ccfa3f6170ccc9236cb1c5095e8fbf3200a7e1 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -508,6 +508,10 @@ Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
        int                     i;
        bool            found = false;
 
+       /* since this function recurses, it could be driven to stack overflow.
+        * (though any decent compiler will optimize away the tail-recursion.   */
+       check_stack_depth();
+
        reset_istrue_flag(query);
 
        ext->p = 0x7fffffff;
index 91d724ef1c67752e313e24fe22fbbbe16774e407..15aa9f5c6e171cf24f9a9f4ea5529f3d3f1bd65d 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.3 2007/09/07 15:35:11 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -160,7 +160,13 @@ typedef struct
 {
        QueryItemType           type;   /* operand or kind of operator (ts_tokentype) */
        int8            weight;                 /* weights of operand to search. It's a bitmask of allowed weights.
-                                                                * if it =0 then any weight are allowed */
+                                                                * if it =0 then any weight are allowed.
+                                                                * Weights and bit map:
+                                                                * A: 1<<3
+                                                                * B: 1<<2
+                                                                * C: 1<<1
+                                                                * D: 1<<0
+                                                                */
        int32   valcrc;                         /* XXX: pg_crc32 would be a more appropriate data type, 
                                                                 * but we use comparisons to signed integers in the code. 
                                                                 * They would need to be changed as well. */
@@ -182,7 +188,7 @@ typedef struct
 {
        QueryItemType   type;
        int8            oper;           /* see above */
-       int16           left;           /* pointer to left operand. Right operand is
+       uint32          left;           /* pointer to left operand. Right operand is
                                                         * item + 1, left operand is placed
                                                         * item+item->left */
 } QueryOperator;