]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
Fix issues with JSON and float/numeric
authorGord Thompson <gord@gordthompson.com>
Sun, 20 Dec 2020 17:20:10 +0000 (10:20 -0700)
committerMike Bayer <mike_mp@zzzcomputing.com>
Mon, 21 Dec 2020 04:18:04 +0000 (23:18 -0500)
Decimal accuracy and behavior has been improved when extracting floating
point and/or decimal values from JSON strings using the
:meth:`_sql.sqltypes.JSON.Comparator.as_float` method, when the numeric
value inside of the JSON string has many significant digits; previously,
MySQL backends would truncate values with many significant digits and SQL
Server backends would raise an exception due to a DECIMAL cast with
insufficient significant digits.   Both backends now use a FLOAT-compatible
approach that does not hardcode significant digits for floating point
values. For precision numerics, a new method
:meth:`_sql.sqltypes.JSON.Comparator.as_numeric` has been added which
accepts arguments for precision and scale, and will return values as Python
``Decimal`` objects with no floating point conversion assuming the DBAPI
supports it (all but pysqlite).

Fixes: #5788
Change-Id: I6eb51fe172a389548dd6e3c65efec9f1f538012e

doc/build/changelog/unreleased_14/5788.rst [new file with mode: 0644]
lib/sqlalchemy/dialects/mssql/base.py
lib/sqlalchemy/dialects/mysql/base.py
lib/sqlalchemy/sql/sqltypes.py
lib/sqlalchemy/testing/requirements.py
lib/sqlalchemy/testing/suite/test_types.py
test/requirements.py

diff --git a/doc/build/changelog/unreleased_14/5788.rst b/doc/build/changelog/unreleased_14/5788.rst
new file mode 100644 (file)
index 0000000..e3bf7ea
--- /dev/null
@@ -0,0 +1,18 @@
+.. change::
+    :tags: bug, mssql, mysql, datatypes
+    :tickets: 5788
+    :versions: 1.4.0b2
+
+    Decimal accuracy and behavior has been improved when extracting floating
+    point and/or decimal values from JSON strings using the
+    :meth:`_sql.sqltypes.JSON.Comparator.as_float` method, when the numeric
+    value inside of the JSON string has many significant digits; previously,
+    MySQL backends would truncate values with many significant digits and SQL
+    Server backends would raise an exception due to a DECIMAL cast with
+    insufficient significant digits.   Both backends now use a FLOAT-compatible
+    approach that does not hardcode significant digits for floating point
+    values. For precision numerics, a new method
+    :meth:`_sql.sqltypes.JSON.Comparator.as_numeric` has been added which
+    accepts arguments for precision and scale, and will return values as Python
+    ``Decimal`` objects with no floating point conversion assuming the DBAPI
+    supports it (all but pysqlite).
\ No newline at end of file
index 911e1791aea85691d90522d9e86bf54819a465e4..bc5480e2c7ffbc08b8844010469d473c6e99fc3d 100644 (file)
@@ -2108,12 +2108,13 @@ class MSSQLCompiler(compiler.SQLCompiler):
                 self.process(binary.right, **kw),
             )
         elif binary.type._type_affinity is sqltypes.Numeric:
-            type_expression = (
-                "ELSE CAST(JSON_VALUE(%s, %s) AS DECIMAL(10, 6))"
-                % (
-                    self.process(binary.left, **kw),
-                    self.process(binary.right, **kw),
-                )
+            type_expression = "ELSE CAST(JSON_VALUE(%s, %s) AS %s)" % (
+                self.process(binary.left, **kw),
+                self.process(binary.right, **kw),
+                "FLOAT"
+                if isinstance(binary.type, sqltypes.Float)
+                else "NUMERIC(%s, %s)"
+                % (binary.type.precision, binary.type.scale),
             )
         elif binary.type._type_affinity is sqltypes.Boolean:
             # the NULL handling is particularly weird with boolean, so
index 911c0d522a2c40ab7c829aed31e68b6afb44b917..f90d961d8c6c9c7649767de314c129b90c84cea9 100644 (file)
@@ -1455,14 +1455,29 @@ class MySQLCompiler(compiler.SQLCompiler):
                 )
             )
         elif binary.type._type_affinity is sqltypes.Numeric:
-            # FLOAT / REAL not added in MySQL til 8.0.17
-            type_expression = (
-                "ELSE CAST(JSON_EXTRACT(%s, %s) AS DECIMAL(10, 6))"
-                % (
-                    self.process(binary.left, **kw),
-                    self.process(binary.right, **kw),
+            if (
+                binary.type.scale is not None
+                and binary.type.precision is not None
+            ):
+                # using DECIMAL here because MySQL does not recognize NUMERIC
+                type_expression = (
+                    "ELSE CAST(JSON_EXTRACT(%s, %s) AS DECIMAL(%s, %s))"
+                    % (
+                        self.process(binary.left, **kw),
+                        self.process(binary.right, **kw),
+                        binary.type.precision,
+                        binary.type.scale,
+                    )
+                )
+            else:
+                # FLOAT / REAL not added in MySQL til 8.0.17
+                type_expression = (
+                    "ELSE JSON_EXTRACT(%s, %s)+0.0000000000000000000000"
+                    % (
+                        self.process(binary.left, **kw),
+                        self.process(binary.right, **kw),
+                    )
                 )
-            )
         elif binary.type._type_affinity is sqltypes.Boolean:
             # the NULL handling is particularly weird with boolean, so
             # explicitly return true/false constants
index 09c7388abe7845935552f25da9e21d319cf98246..072afe46ee2773ae665b619cd8d34319c9ac3bdd 100644 (file)
@@ -2467,9 +2467,27 @@ class JSON(Indexable, TypeEngine):
             .. versionadded:: 1.3.11
 
             """
-            # note there's no Numeric or Decimal support here yet
             return self._binary_w_type(Float(), "as_float")
 
+        def as_numeric(self, precision, scale, asdecimal=True):
+            """Cast an indexed value as numeric/decimal.
+
+            e.g.::
+
+                stmt = select(
+                    mytable.c.json_column['some_data'].as_numeric(10, 6)
+                ).where(
+                    mytable.c.
+                    json_column['some_data'].as_numeric(10, 6) == 29.75
+                )
+
+            .. versionadded:: 1.4.0b2
+
+            """
+            return self._binary_w_type(
+                Numeric(precision, scale, asdecimal=asdecimal), "as_numeric"
+            )
+
         def as_json(self):
             """Cast an indexed value as JSON.
 
index 7ff3fcf388b4205500626518989a636623c61fcf..30b42cbf3cb51a950b5b2764e8040206d39579b7 100644 (file)
@@ -870,6 +870,14 @@ class SuiteRequirements(Requirements):
         """
         return exclusions.closed()
 
+    @property
+    def cast_precision_numerics_many_significant_digits(self):
+        """same as precision_numerics_many_significant_digits but within the
+        context of a CAST statement (hello MySQL)
+
+        """
+        return self.precision_numerics_many_significant_digits
+
     @property
     def implicit_decimal_binds(self):
         """target backend will return a selected Decimal as a Decimal, not
index 21d2e8942d1d972d6d23f2448db3f2243b7f83d9..749e83de436086178b63915a233bca943f8187cd 100644 (file)
@@ -3,6 +3,7 @@
 import datetime
 import decimal
 import json
+import re
 
 from .. import config
 from .. import engines
@@ -804,6 +805,26 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest):
             ("integer", None),
             ("float", 28.5),
             ("float", None),
+            (
+                "float",
+                1234567.89,
+            ),
+            ("numeric", 1234567.89),
+            # this one "works" because the float value you see here is
+            # lost immediately to floating point stuff
+            ("numeric", 99998969694839.983485848, requirements.python3),
+            ("numeric", 99939.983485848, requirements.python3),
+            ("_decimal", decimal.Decimal("1234567.89")),
+            (
+                "_decimal",
+                decimal.Decimal("99998969694839.983485848"),
+                # fails on SQLite and MySQL (non-mariadb)
+                requirements.cast_precision_numerics_many_significant_digits,
+            ),
+            (
+                "_decimal",
+                decimal.Decimal("99939.983485848"),
+            ),
         ] + json_elements
 
         def decorate(fn):
@@ -813,12 +834,52 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest):
 
         return decorate
 
-    @_index_fixtures(False)
-    def test_index_typed_access(self, datatype, value):
+    def _json_value_insert(self, connection, datatype, value, data_element):
         data_table = self.tables.data_table
-        data_element = {"key1": value}
-        with config.db.begin() as conn:
-            conn.execute(
+        if datatype == "_decimal":
+
+            # Python's builtin json serializer basically doesn't support
+            # Decimal objects without implicit float conversion period.
+            # users can otherwise use simplejson which supports
+            # precision decimals
+
+            # https://bugs.python.org/issue16535
+
+            # inserting as strings to avoid a new fixture around the
+            # dialect which would have idiosyncrasies for different
+            # backends.
+
+            class DecimalEncoder(json.JSONEncoder):
+                def default(self, o):
+                    if isinstance(o, decimal.Decimal):
+                        return str(o)
+                    return super(DecimalEncoder, self).default(o)
+
+            json_data = json.dumps(data_element, cls=DecimalEncoder)
+
+            # take the quotes out.  yup, there is *literally* no other
+            # way to get Python's json.dumps() to put all the digits in
+            # the string
+            json_data = re.sub(r'"(%s)"' % str(value), str(value), json_data)
+
+            datatype = "numeric"
+
+            connection.execute(
+                data_table.insert().values(
+                    name="row1",
+                    # to pass the string directly to every backend, including
+                    # PostgreSQL which needs the value to be CAST as JSON
+                    # both in the SQL as well as at the prepared statement
+                    # level for asyncpg, while at the same time MySQL
+                    # doesn't even support CAST for JSON, here we are
+                    # sending the string embedded in the SQL without using
+                    # a parameter.
+                    data=bindparam(None, json_data, literal_execute=True),
+                    nulldata=bindparam(None, json_data, literal_execute=True),
+                ),
+            )
+        else:
+            connection.execute(
                 data_table.insert(),
                 {
                     "name": "row1",
@@ -827,62 +888,101 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest):
                 },
             )
 
-            expr = data_table.c.data["key1"]
+        p_s = None
+
+        if datatype:
+            if datatype == "numeric":
+                a, b = str(value).split(".")
+                s = len(b)
+                p = len(a) + s
+
+                if isinstance(value, decimal.Decimal):
+                    compare_value = value
+                else:
+                    compare_value = decimal.Decimal(str(value))
+
+                p_s = (p, s)
+            else:
+                compare_value = value
+        else:
+            compare_value = value
+
+        return datatype, compare_value, p_s
+
+    @_index_fixtures(False)
+    @testing.emits_warning(r".*does \*not\* support Decimal objects natively")
+    def test_index_typed_access(self, datatype, value):
+        data_table = self.tables.data_table
+        data_element = {"key1": value}
 
+        with config.db.begin() as conn:
+
+            datatype, compare_value, p_s = self._json_value_insert(
+                conn, datatype, value, data_element
+            )
+
+            expr = data_table.c.data["key1"]
             if datatype:
-                expr = getattr(expr, "as_%s" % datatype)()
+                if datatype == "numeric" and p_s:
+                    expr = expr.as_numeric(*p_s)
+                else:
+                    expr = getattr(expr, "as_%s" % datatype)()
 
             roundtrip = conn.scalar(select(expr))
-            eq_(roundtrip, value)
+            eq_(roundtrip, compare_value)
             if util.py3k:  # skip py2k to avoid comparing unicode to str etc.
-                is_(type(roundtrip), type(value))
+                is_(type(roundtrip), type(compare_value))
 
     @_index_fixtures(True)
+    @testing.emits_warning(r".*does \*not\* support Decimal objects natively")
     def test_index_typed_comparison(self, datatype, value):
         data_table = self.tables.data_table
         data_element = {"key1": value}
+
         with config.db.begin() as conn:
-            conn.execute(
-                data_table.insert(),
-                {
-                    "name": "row1",
-                    "data": data_element,
-                    "nulldata": data_element,
-                },
+            datatype, compare_value, p_s = self._json_value_insert(
+                conn, datatype, value, data_element
             )
 
             expr = data_table.c.data["key1"]
             if datatype:
-                expr = getattr(expr, "as_%s" % datatype)()
+                if datatype == "numeric" and p_s:
+                    expr = expr.as_numeric(*p_s)
+                else:
+                    expr = getattr(expr, "as_%s" % datatype)()
 
-            row = conn.execute(select(expr).where(expr == value)).first()
+            row = conn.execute(
+                select(expr).where(expr == compare_value)
+            ).first()
 
             # make sure we get a row even if value is None
-            eq_(row, (value,))
+            eq_(row, (compare_value,))
 
     @_index_fixtures(True)
+    @testing.emits_warning(r".*does \*not\* support Decimal objects natively")
     def test_path_typed_comparison(self, datatype, value):
         data_table = self.tables.data_table
         data_element = {"key1": {"subkey1": value}}
         with config.db.begin() as conn:
-            conn.execute(
-                data_table.insert(),
-                {
-                    "name": "row1",
-                    "data": data_element,
-                    "nulldata": data_element,
-                },
+
+            datatype, compare_value, p_s = self._json_value_insert(
+                conn, datatype, value, data_element
             )
 
             expr = data_table.c.data[("key1", "subkey1")]
 
             if datatype:
-                expr = getattr(expr, "as_%s" % datatype)()
+                if datatype == "numeric" and p_s:
+                    expr = expr.as_numeric(*p_s)
+                else:
+                    expr = getattr(expr, "as_%s" % datatype)()
 
-            row = conn.execute(select(expr).where(expr == value)).first()
+            row = conn.execute(
+                select(expr).where(expr == compare_value)
+            ).first()
 
             # make sure we get a row even if value is None
-            eq_(row, (value,))
+            eq_(row, (compare_value,))
 
     @testing.combinations(
         (True,),
index 7cb09c309c6fbdc2dbdd9f4d48d19dfd297642cb..cb2f4840f6d558624dfd17b57055b0d949fab49e 100644 (file)
@@ -1183,6 +1183,16 @@ class DefaultRequirements(SuiteRequirements):
             ]
         )
 
+    @property
+    def cast_precision_numerics_many_significant_digits(self):
+        """same as precision_numerics_many_significant_digits but within the
+        context of a CAST statement (hello MySQL)
+
+        """
+        return self.precision_numerics_many_significant_digits + fails_if(
+            "mysql"
+        )
+
     @property
     def precision_numerics_retains_significant_digits(self):
         """A precision numeric type will return empty significant digits,