]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Tighten array dimensionality checks in Python -> SQL array conversion.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 4 May 2023 15:00:33 +0000 (11:00 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 4 May 2023 15:00:33 +0000 (11:00 -0400)
Like plperl before f47004add, plpython wasn't being sufficiently
careful about checking that list-of-list structures represent
rectangular arrays, so that it would accept some cases in which
different parts of the "array" are nested to different depths.
This was exacerbated by Python's weak distinction between
sequences and lists, so that in some cases strings could get
treated as though they are lists (and burst into individual
characters) even though a different ordering of the upper-level
list would give a different result.

Some of this behavior was unreachable (without risking a crash)
before 81eaaf65e.  It seems like a good idea to clean it all up
in the same releases, rather than shipping a non-crashing but
nonetheless visibly buggy behavior in the name of minimal change.
Hence, back-patch.

Per bug #17912 and further testing by Alexander Lakhin.

Discussion: https://postgr.es/m/17912-82ceed78731d9cdc@postgresql.org

src/pl/plpython/expected/plpython_types.out
src/pl/plpython/expected/plpython_types_3.out
src/pl/plpython/plpy_typeio.c
src/pl/plpython/sql/plpython_types.sql

index bae02e9c04ed5b0970a12f6bf5688a70a51e7a21..dec6963225e4c73f89119038bd05f84ad2c85f5c 100644 (file)
@@ -687,23 +687,74 @@ SELECT * FROM test_type_conversion_array_mixed2();
 ERROR:  invalid input syntax for integer: "abc"
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_array_mixed2"
-CREATE FUNCTION test_type_conversion_array_mixed3() RETURNS text[] AS $$
-return [[], 'a']
+-- check output of multi-dimensional arrays
+CREATE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [['a'], ['b'], ['c']]
 $$ LANGUAGE plpythonu;
-SELECT * FROM test_type_conversion_array_mixed3();
- test_type_conversion_array_mixed3 
+select test_type_conversion_md_array_out();
+ test_type_conversion_md_array_out 
 -----------------------------------
- {[],a}
+ {{a},{b},{c}}
 (1 row)
 
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], []]
+$$ LANGUAGE plpythonu;
+select test_type_conversion_md_array_out();
+ test_type_conversion_md_array_out 
+-----------------------------------
+ {}
+(1 row)
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], [1]]
+$$ LANGUAGE plpythonu;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], 1]
+$$ LANGUAGE plpythonu;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [1, []]
+$$ LANGUAGE plpythonu;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[1], [[]]]
+$$ LANGUAGE plpythonu;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
 CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
 return [[1,2,3],[4,5]]
 $$ LANGUAGE plpythonu;
 SELECT * FROM test_type_conversion_mdarray_malformed();
-ERROR:  wrong length of inner sequence: has length 2, but 3 was expected
-DETAIL:  To construct a multidimensional array, the inner sequences must all have the same length.
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_mdarray_malformed"
+CREATE FUNCTION test_type_conversion_mdarray_malformed2() RETURNS text[] AS $$
+return [[1,2,3], "abc"]
+$$ LANGUAGE plpythonu;
+SELECT * FROM test_type_conversion_mdarray_malformed2();
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed2"
+CREATE FUNCTION test_type_conversion_mdarray_malformed3() RETURNS text[] AS $$
+return ["abc", [1,2,3]]
+$$ LANGUAGE plpythonu;
+SELECT * FROM test_type_conversion_mdarray_malformed3();
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed3"
 CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
 return [[[[[[[1]]]]]]]
 $$ LANGUAGE plpythonu;
index 9049faaaf960e48ba0c253c8c4d4e0cb87141dcd..958a6a54b5092da40525110e6d8a2313f0936d8d 100644 (file)
@@ -687,23 +687,74 @@ SELECT * FROM test_type_conversion_array_mixed2();
 ERROR:  invalid input syntax for integer: "abc"
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_array_mixed2"
-CREATE FUNCTION test_type_conversion_array_mixed3() RETURNS text[] AS $$
-return [[], 'a']
+-- check output of multi-dimensional arrays
+CREATE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [['a'], ['b'], ['c']]
 $$ LANGUAGE plpython3u;
-SELECT * FROM test_type_conversion_array_mixed3();
- test_type_conversion_array_mixed3 
+select test_type_conversion_md_array_out();
+ test_type_conversion_md_array_out 
 -----------------------------------
- {[],a}
+ {{a},{b},{c}}
 (1 row)
 
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], []]
+$$ LANGUAGE plpython3u;
+select test_type_conversion_md_array_out();
+ test_type_conversion_md_array_out 
+-----------------------------------
+ {}
+(1 row)
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], [1]]
+$$ LANGUAGE plpython3u;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], 1]
+$$ LANGUAGE plpython3u;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [1, []]
+$$ LANGUAGE plpython3u;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[1], [[]]]
+$$ LANGUAGE plpython3u;
+select test_type_conversion_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_md_array_out"
 CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
 return [[1,2,3],[4,5]]
 $$ LANGUAGE plpython3u;
 SELECT * FROM test_type_conversion_mdarray_malformed();
-ERROR:  wrong length of inner sequence: has length 2, but 3 was expected
-DETAIL:  To construct a multidimensional array, the inner sequences must all have the same length.
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_mdarray_malformed"
+CREATE FUNCTION test_type_conversion_mdarray_malformed2() RETURNS text[] AS $$
+return [[1,2,3], "abc"]
+$$ LANGUAGE plpython3u;
+SELECT * FROM test_type_conversion_mdarray_malformed2();
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed2"
+CREATE FUNCTION test_type_conversion_mdarray_malformed3() RETURNS text[] AS $$
+return ["abc", [1,2,3]]
+$$ LANGUAGE plpython3u;
+SELECT * FROM test_type_conversion_mdarray_malformed3();
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed3"
 CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
 return [[[[[[[1]]]]]]]
 $$ LANGUAGE plpython3u;
index 6edef990652ae604dc7ba4d46f93cc0b3d911984..9b2afb82be098bb8bef8be18d32aeaf53511db6e 100644 (file)
@@ -58,9 +58,10 @@ static Datum PLyObject_ToTransform(PLyObToDatum *arg, PyObject *plrv,
                                          bool *isnull, bool inarray);
 static Datum PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
                                        bool *isnull, bool inarray);
-static void PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
-                                                       int *dims, int ndim, int dim,
-                                                       Datum *elems, bool *nulls, int *currelem);
+static void PLySequence_ToArray_recurse(PyObject *obj,
+                                                                               ArrayBuildState **astatep,
+                                                                               int *ndims, int *dims, int cur_depth,
+                                                                               PLyObToDatum *elm, Oid elmbasetype);
 
 /* conversion from Python objects to composite Datums */
 static Datum PLyString_ToComposite(PLyObToDatum *arg, PyObject *string, bool inarray);
@@ -1134,23 +1135,17 @@ PLyObject_ToTransform(PLyObToDatum *arg, PyObject *plrv,
 
 
 /*
- * Convert Python sequence to SQL array.
+ * Convert Python sequence (or list of lists) to SQL array.
  */
 static Datum
 PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
                                        bool *isnull, bool inarray)
 {
-       ArrayType  *array;
-       int                     i;
-       Datum      *elems;
-       bool       *nulls;
-       int                     len;
-       int                     ndim;
+       ArrayBuildState *astate = NULL;
+       int                     ndims = 1;
        int                     dims[MAXDIM];
        int                     lbs[MAXDIM];
-       int                     currelem;
-       PyObject   *pyptr = plrv;
-       PyObject   *next;
+       int                     i;
 
        if (plrv == Py_None)
        {
@@ -1160,122 +1155,133 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
        *isnull = false;
 
        /*
-        * Determine the number of dimensions, and their sizes.
-        */
-       ndim = 0;
-
-       Py_INCREF(plrv);
-
-       for (;;)
-       {
-               if (!PyList_Check(pyptr))
-                       break;
-
-               if (ndim == MAXDIM)
-                       PLy_elog(ERROR, "number of array dimensions exceeds the maximum allowed (%d)", MAXDIM);
-
-               dims[ndim] = PySequence_Length(pyptr);
-               if (dims[ndim] < 0)
-                       PLy_elog(ERROR, "could not determine sequence length for function return value");
-
-               if (dims[ndim] == 0)
-               {
-                       /* empty sequence */
-                       break;
-               }
-
-               ndim++;
-
-               next = PySequence_GetItem(pyptr, 0);
-               Py_XDECREF(pyptr);
-               pyptr = next;
-       }
-       Py_XDECREF(pyptr);
-
-       /*
-        * Check for zero dimensions. This happens if the object is a tuple or a
-        * string, rather than a list, or is not a sequence at all. We don't map
-        * tuples or strings to arrays in general, but in the first level, be
-        * lenient, for historical reasons. So if the object is a sequence of any
-        * kind, treat it as a one-dimensional array.
+        * For historical reasons, we allow any sequence (not only a list) at the
+        * top level when converting a Python object to a SQL array.  However, a
+        * multi-dimensional array is recognized only when the object contains
+        * true lists.
         */
-       if (ndim == 0)
-       {
-               if (!PySequence_Check(plrv))
-                       PLy_elog(ERROR, "return value of function with array return type is not a Python sequence");
-
-               ndim = 1;
-               dims[0] = PySequence_Length(plrv);
-       }
+       if (!PySequence_Check(plrv))
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATATYPE_MISMATCH),
+                                errmsg("return value of function with array return type is not a Python sequence")));
 
-       /* Allocate space for work arrays, after detecting array size overflow */
-       len = ArrayGetNItems(ndim, dims);
-       elems = palloc(sizeof(Datum) * len);
-       nulls = palloc(sizeof(bool) * len);
+       /* Initialize dimensionality info with first-level dimension */
+       memset(dims, 0, sizeof(dims));
+       dims[0] = PySequence_Length(plrv);
 
        /*
         * Traverse the Python lists, in depth-first order, and collect all the
-        * elements at the bottom level into 'elems'/'nulls' arrays.
+        * elements at the bottom level into an ArrayBuildState.
         */
-       currelem = 0;
-       PLySequence_ToArray_recurse(arg->u.array.elm, plrv,
-                                                               dims, ndim, 0,
-                                                               elems, nulls, &currelem);
+       PLySequence_ToArray_recurse(plrv, &astate,
+                                                               &ndims, dims, 1,
+                                                               arg->u.array.elm,
+                                                               arg->u.array.elmbasetype);
+
+       /* ensure we get zero-D array for no inputs, as per PG convention */
+       if (astate == NULL)
+               return PointerGetDatum(construct_empty_array(arg->u.array.elmbasetype));
 
-       for (i = 0; i < ndim; i++)
+       for (i = 0; i < ndims; i++)
                lbs[i] = 1;
 
-       array = construct_md_array(elems,
-                                                          nulls,
-                                                          ndim,
-                                                          dims,
-                                                          lbs,
-                                                          arg->u.array.elmbasetype,
-                                                          arg->u.array.elm->typlen,
-                                                          arg->u.array.elm->typbyval,
-                                                          arg->u.array.elm->typalign);
-
-       return PointerGetDatum(array);
+       return makeMdArrayResult(astate, ndims, dims, lbs,
+                                                        CurrentMemoryContext, true);
 }
 
 /*
  * Helper function for PLySequence_ToArray. Traverse a Python list of lists in
- * depth-first order, storing the elements in 'elems'.
+ * depth-first order, storing the elements in *astatep.
+ *
+ * The ArrayBuildState is created only when we first find a scalar element;
+ * if we didn't do it like that, we'd need some other convention for knowing
+ * whether we'd already found any scalars (and thus the number of dimensions
+ * is frozen).
  */
 static void
-PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
-                                                       int *dims, int ndim, int dim,
-                                                       Datum *elems, bool *nulls, int *currelem)
+PLySequence_ToArray_recurse(PyObject *obj, ArrayBuildState **astatep,
+                                                       int *ndims, int *dims, int cur_depth,
+                                                       PLyObToDatum *elm, Oid elmbasetype)
 {
        int                     i;
+       int                     len = PySequence_Length(obj);
 
-       if (PySequence_Length(list) != dims[dim])
-               ereport(ERROR,
-                               (errmsg("wrong length of inner sequence: has length %d, but %d was expected",
-                                               (int) PySequence_Length(list), dims[dim]),
-                                (errdetail("To construct a multidimensional array, the inner sequences must all have the same length."))));
+       /* We should not get here with a non-sequence object */
+       if (len < 0)
+               PLy_elog(ERROR, "could not determine sequence length for function return value");
 
-       if (dim < ndim - 1)
+       for (i = 0; i < len; i++)
        {
-               for (i = 0; i < dims[dim]; i++)
-               {
-                       PyObject   *sublist = PySequence_GetItem(list, i);
+               /* fetch the array element */
+               PyObject   *subobj = PySequence_GetItem(obj, i);
 
-                       PLySequence_ToArray_recurse(elm, sublist, dims, ndim, dim + 1,
-                                                                               elems, nulls, currelem);
-                       Py_XDECREF(sublist);
+               /* need PG_TRY to ensure we release the subobj's refcount */
+               PG_TRY();
+               {
+                       /* multi-dimensional array? */
+                       if (PyList_Check(subobj))
+                       {
+                               /* set size when at first element in this level, else compare */
+                               if (i == 0 && *ndims == cur_depth)
+                               {
+                                       /* array after some scalars at same level? */
+                                       if (*astatep != NULL)
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                                errmsg("multidimensional arrays must have array expressions with matching dimensions")));
+                                       /* too many dimensions? */
+                                       if (cur_depth >= MAXDIM)
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                                                errmsg("number of array dimensions exceeds the maximum allowed (%d)",
+                                                                               MAXDIM)));
+                                       /* OK, add a dimension */
+                                       dims[*ndims] = PySequence_Length(subobj);
+                                       (*ndims)++;
+                               }
+                               else if (cur_depth >= *ndims ||
+                                                PySequence_Length(subobj) != dims[cur_depth])
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                        errmsg("multidimensional arrays must have array expressions with matching dimensions")));
+
+                               /* recurse to fetch elements of this sub-array */
+                               PLySequence_ToArray_recurse(subobj, astatep,
+                                                                                       ndims, dims, cur_depth + 1,
+                                                                                       elm, elmbasetype);
+                       }
+                       else
+                       {
+                               Datum           dat;
+                               bool            isnull;
+
+                               /* scalar after some sub-arrays at same level? */
+                               if (*ndims != cur_depth)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                        errmsg("multidimensional arrays must have array expressions with matching dimensions")));
+
+                               /* convert non-list object to Datum */
+                               dat = elm->func(elm, subobj, &isnull, true);
+
+                               /* create ArrayBuildState if we didn't already */
+                               if (*astatep == NULL)
+                                       *astatep = initArrayResult(elmbasetype,
+                                                                                          CurrentMemoryContext, true);
+
+                               /* ... and save the element value in it */
+                               (void) accumArrayResult(*astatep, dat, isnull,
+                                                                               elmbasetype, CurrentMemoryContext);
+                       }
                }
-       }
-       else
-       {
-               for (i = 0; i < dims[dim]; i++)
+               PG_CATCH();
                {
-                       PyObject   *obj = PySequence_GetItem(list, i);
-
-                       elems[*currelem] = elm->func(elm, obj, &nulls[*currelem], true);
-                       Py_XDECREF(obj);
-                       (*currelem)++;
+                       Py_XDECREF(subobj);
+                       PG_RE_THROW();
                }
+               PG_END_TRY();
+
+               Py_XDECREF(subobj);
        }
 }
 
index 8fa8f6bee7f3b700d73be3f973beb11eb5a6615c..c3c9c01866a201dd330eae077f00924e50fc8c0a 100644 (file)
@@ -328,11 +328,43 @@ $$ LANGUAGE plpythonu;
 
 SELECT * FROM test_type_conversion_array_mixed2();
 
-CREATE FUNCTION test_type_conversion_array_mixed3() RETURNS text[] AS $$
-return [[], 'a']
+
+-- check output of multi-dimensional arrays
+CREATE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [['a'], ['b'], ['c']]
+$$ LANGUAGE plpythonu;
+
+select test_type_conversion_md_array_out();
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], []]
+$$ LANGUAGE plpythonu;
+
+select test_type_conversion_md_array_out();
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], [1]]
+$$ LANGUAGE plpythonu;
+
+select test_type_conversion_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[], 1]
+$$ LANGUAGE plpythonu;
+
+select test_type_conversion_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [1, []]
+$$ LANGUAGE plpythonu;
+
+select test_type_conversion_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION test_type_conversion_md_array_out() RETURNS text[] AS $$
+return [[1], [[]]]
 $$ LANGUAGE plpythonu;
 
-SELECT * FROM test_type_conversion_array_mixed3();
+select test_type_conversion_md_array_out();  -- fail
 
 
 CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
@@ -341,6 +373,18 @@ $$ LANGUAGE plpythonu;
 
 SELECT * FROM test_type_conversion_mdarray_malformed();
 
+CREATE FUNCTION test_type_conversion_mdarray_malformed2() RETURNS text[] AS $$
+return [[1,2,3], "abc"]
+$$ LANGUAGE plpythonu;
+
+SELECT * FROM test_type_conversion_mdarray_malformed2();
+
+CREATE FUNCTION test_type_conversion_mdarray_malformed3() RETURNS text[] AS $$
+return ["abc", [1,2,3]]
+$$ LANGUAGE plpythonu;
+
+SELECT * FROM test_type_conversion_mdarray_malformed3();
+
 CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
 return [[[[[[[1]]]]]]]
 $$ LANGUAGE plpythonu;