From 3b8a14153da9e7b6694571fa10f6d30c4012ee82 Mon Sep 17 00:00:00 2001 From: Gord Thompson Date: Sun, 20 Dec 2020 10:20:10 -0700 Subject: [PATCH] Fix issues with JSON and float/numeric Decimal accuracy and behavior has been improved when extracting floating point and/or decimal values from JSON strings using the :meth:`_sql.sqltypes.JSON.Comparator.as_float` method, when the numeric value inside of the JSON string has many significant digits; previously, MySQL backends would truncate values with many significant digits and SQL Server backends would raise an exception due to a DECIMAL cast with insufficient significant digits. Both backends now use a FLOAT-compatible approach that does not hardcode significant digits for floating point values. For precision numerics, a new method :meth:`_sql.sqltypes.JSON.Comparator.as_numeric` has been added which accepts arguments for precision and scale, and will return values as Python ``Decimal`` objects with no floating point conversion assuming the DBAPI supports it (all but pysqlite). Fixes: #5788 Change-Id: I6eb51fe172a389548dd6e3c65efec9f1f538012e --- doc/build/changelog/unreleased_14/5788.rst | 18 +++ lib/sqlalchemy/dialects/mssql/base.py | 13 +- lib/sqlalchemy/dialects/mysql/base.py | 29 +++- lib/sqlalchemy/sql/sqltypes.py | 20 ++- lib/sqlalchemy/testing/requirements.py | 8 ++ lib/sqlalchemy/testing/suite/test_types.py | 158 +++++++++++++++++---- test/requirements.py | 10 ++ 7 files changed, 213 insertions(+), 43 deletions(-) create mode 100644 doc/build/changelog/unreleased_14/5788.rst diff --git a/doc/build/changelog/unreleased_14/5788.rst b/doc/build/changelog/unreleased_14/5788.rst new file mode 100644 index 0000000000..e3bf7ea484 --- /dev/null +++ b/doc/build/changelog/unreleased_14/5788.rst @@ -0,0 +1,18 @@ +.. change:: + :tags: bug, mssql, mysql, datatypes + :tickets: 5788 + :versions: 1.4.0b2 + + Decimal accuracy and behavior has been improved when extracting floating + point and/or decimal values from JSON strings using the + :meth:`_sql.sqltypes.JSON.Comparator.as_float` method, when the numeric + value inside of the JSON string has many significant digits; previously, + MySQL backends would truncate values with many significant digits and SQL + Server backends would raise an exception due to a DECIMAL cast with + insufficient significant digits. Both backends now use a FLOAT-compatible + approach that does not hardcode significant digits for floating point + values. For precision numerics, a new method + :meth:`_sql.sqltypes.JSON.Comparator.as_numeric` has been added which + accepts arguments for precision and scale, and will return values as Python + ``Decimal`` objects with no floating point conversion assuming the DBAPI + supports it (all but pysqlite). \ No newline at end of file diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py index 911e1791ae..bc5480e2c7 100644 --- a/lib/sqlalchemy/dialects/mssql/base.py +++ b/lib/sqlalchemy/dialects/mssql/base.py @@ -2108,12 +2108,13 @@ class MSSQLCompiler(compiler.SQLCompiler): self.process(binary.right, **kw), ) elif binary.type._type_affinity is sqltypes.Numeric: - type_expression = ( - "ELSE CAST(JSON_VALUE(%s, %s) AS DECIMAL(10, 6))" - % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw), - ) + type_expression = "ELSE CAST(JSON_VALUE(%s, %s) AS %s)" % ( + self.process(binary.left, **kw), + self.process(binary.right, **kw), + "FLOAT" + if isinstance(binary.type, sqltypes.Float) + else "NUMERIC(%s, %s)" + % (binary.type.precision, binary.type.scale), ) elif binary.type._type_affinity is sqltypes.Boolean: # the NULL handling is particularly weird with boolean, so diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index 911c0d522a..f90d961d8c 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -1455,14 +1455,29 @@ class MySQLCompiler(compiler.SQLCompiler): ) ) elif binary.type._type_affinity is sqltypes.Numeric: - # FLOAT / REAL not added in MySQL til 8.0.17 - type_expression = ( - "ELSE CAST(JSON_EXTRACT(%s, %s) AS DECIMAL(10, 6))" - % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw), + if ( + binary.type.scale is not None + and binary.type.precision is not None + ): + # using DECIMAL here because MySQL does not recognize NUMERIC + type_expression = ( + "ELSE CAST(JSON_EXTRACT(%s, %s) AS DECIMAL(%s, %s))" + % ( + self.process(binary.left, **kw), + self.process(binary.right, **kw), + binary.type.precision, + binary.type.scale, + ) + ) + else: + # FLOAT / REAL not added in MySQL til 8.0.17 + type_expression = ( + "ELSE JSON_EXTRACT(%s, %s)+0.0000000000000000000000" + % ( + self.process(binary.left, **kw), + self.process(binary.right, **kw), + ) ) - ) elif binary.type._type_affinity is sqltypes.Boolean: # the NULL handling is particularly weird with boolean, so # explicitly return true/false constants diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py index 09c7388abe..072afe46ee 100644 --- a/lib/sqlalchemy/sql/sqltypes.py +++ b/lib/sqlalchemy/sql/sqltypes.py @@ -2467,9 +2467,27 @@ class JSON(Indexable, TypeEngine): .. versionadded:: 1.3.11 """ - # note there's no Numeric or Decimal support here yet return self._binary_w_type(Float(), "as_float") + def as_numeric(self, precision, scale, asdecimal=True): + """Cast an indexed value as numeric/decimal. + + e.g.:: + + stmt = select( + mytable.c.json_column['some_data'].as_numeric(10, 6) + ).where( + mytable.c. + json_column['some_data'].as_numeric(10, 6) == 29.75 + ) + + .. versionadded:: 1.4.0b2 + + """ + return self._binary_w_type( + Numeric(precision, scale, asdecimal=asdecimal), "as_numeric" + ) + def as_json(self): """Cast an indexed value as JSON. diff --git a/lib/sqlalchemy/testing/requirements.py b/lib/sqlalchemy/testing/requirements.py index 7ff3fcf388..30b42cbf3c 100644 --- a/lib/sqlalchemy/testing/requirements.py +++ b/lib/sqlalchemy/testing/requirements.py @@ -870,6 +870,14 @@ class SuiteRequirements(Requirements): """ return exclusions.closed() + @property + def cast_precision_numerics_many_significant_digits(self): + """same as precision_numerics_many_significant_digits but within the + context of a CAST statement (hello MySQL) + + """ + return self.precision_numerics_many_significant_digits + @property def implicit_decimal_binds(self): """target backend will return a selected Decimal as a Decimal, not diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 21d2e8942d..749e83de43 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -3,6 +3,7 @@ import datetime import decimal import json +import re from .. import config from .. import engines @@ -804,6 +805,26 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest): ("integer", None), ("float", 28.5), ("float", None), + ( + "float", + 1234567.89, + ), + ("numeric", 1234567.89), + # this one "works" because the float value you see here is + # lost immediately to floating point stuff + ("numeric", 99998969694839.983485848, requirements.python3), + ("numeric", 99939.983485848, requirements.python3), + ("_decimal", decimal.Decimal("1234567.89")), + ( + "_decimal", + decimal.Decimal("99998969694839.983485848"), + # fails on SQLite and MySQL (non-mariadb) + requirements.cast_precision_numerics_many_significant_digits, + ), + ( + "_decimal", + decimal.Decimal("99939.983485848"), + ), ] + json_elements def decorate(fn): @@ -813,12 +834,52 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest): return decorate - @_index_fixtures(False) - def test_index_typed_access(self, datatype, value): + def _json_value_insert(self, connection, datatype, value, data_element): data_table = self.tables.data_table - data_element = {"key1": value} - with config.db.begin() as conn: - conn.execute( + if datatype == "_decimal": + + # Python's builtin json serializer basically doesn't support + # Decimal objects without implicit float conversion period. + # users can otherwise use simplejson which supports + # precision decimals + + # https://bugs.python.org/issue16535 + + # inserting as strings to avoid a new fixture around the + # dialect which would have idiosyncrasies for different + # backends. + + class DecimalEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, decimal.Decimal): + return str(o) + return super(DecimalEncoder, self).default(o) + + json_data = json.dumps(data_element, cls=DecimalEncoder) + + # take the quotes out. yup, there is *literally* no other + # way to get Python's json.dumps() to put all the digits in + # the string + json_data = re.sub(r'"(%s)"' % str(value), str(value), json_data) + + datatype = "numeric" + + connection.execute( + data_table.insert().values( + name="row1", + # to pass the string directly to every backend, including + # PostgreSQL which needs the value to be CAST as JSON + # both in the SQL as well as at the prepared statement + # level for asyncpg, while at the same time MySQL + # doesn't even support CAST for JSON, here we are + # sending the string embedded in the SQL without using + # a parameter. + data=bindparam(None, json_data, literal_execute=True), + nulldata=bindparam(None, json_data, literal_execute=True), + ), + ) + else: + connection.execute( data_table.insert(), { "name": "row1", @@ -827,62 +888,101 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest): }, ) - expr = data_table.c.data["key1"] + p_s = None + + if datatype: + if datatype == "numeric": + a, b = str(value).split(".") + s = len(b) + p = len(a) + s + + if isinstance(value, decimal.Decimal): + compare_value = value + else: + compare_value = decimal.Decimal(str(value)) + + p_s = (p, s) + else: + compare_value = value + else: + compare_value = value + + return datatype, compare_value, p_s + + @_index_fixtures(False) + @testing.emits_warning(r".*does \*not\* support Decimal objects natively") + def test_index_typed_access(self, datatype, value): + data_table = self.tables.data_table + data_element = {"key1": value} + with config.db.begin() as conn: + + datatype, compare_value, p_s = self._json_value_insert( + conn, datatype, value, data_element + ) + + expr = data_table.c.data["key1"] if datatype: - expr = getattr(expr, "as_%s" % datatype)() + if datatype == "numeric" and p_s: + expr = expr.as_numeric(*p_s) + else: + expr = getattr(expr, "as_%s" % datatype)() roundtrip = conn.scalar(select(expr)) - eq_(roundtrip, value) + eq_(roundtrip, compare_value) if util.py3k: # skip py2k to avoid comparing unicode to str etc. - is_(type(roundtrip), type(value)) + is_(type(roundtrip), type(compare_value)) @_index_fixtures(True) + @testing.emits_warning(r".*does \*not\* support Decimal objects natively") def test_index_typed_comparison(self, datatype, value): data_table = self.tables.data_table data_element = {"key1": value} + with config.db.begin() as conn: - conn.execute( - data_table.insert(), - { - "name": "row1", - "data": data_element, - "nulldata": data_element, - }, + datatype, compare_value, p_s = self._json_value_insert( + conn, datatype, value, data_element ) expr = data_table.c.data["key1"] if datatype: - expr = getattr(expr, "as_%s" % datatype)() + if datatype == "numeric" and p_s: + expr = expr.as_numeric(*p_s) + else: + expr = getattr(expr, "as_%s" % datatype)() - row = conn.execute(select(expr).where(expr == value)).first() + row = conn.execute( + select(expr).where(expr == compare_value) + ).first() # make sure we get a row even if value is None - eq_(row, (value,)) + eq_(row, (compare_value,)) @_index_fixtures(True) + @testing.emits_warning(r".*does \*not\* support Decimal objects natively") def test_path_typed_comparison(self, datatype, value): data_table = self.tables.data_table data_element = {"key1": {"subkey1": value}} with config.db.begin() as conn: - conn.execute( - data_table.insert(), - { - "name": "row1", - "data": data_element, - "nulldata": data_element, - }, + + datatype, compare_value, p_s = self._json_value_insert( + conn, datatype, value, data_element ) expr = data_table.c.data[("key1", "subkey1")] if datatype: - expr = getattr(expr, "as_%s" % datatype)() + if datatype == "numeric" and p_s: + expr = expr.as_numeric(*p_s) + else: + expr = getattr(expr, "as_%s" % datatype)() - row = conn.execute(select(expr).where(expr == value)).first() + row = conn.execute( + select(expr).where(expr == compare_value) + ).first() # make sure we get a row even if value is None - eq_(row, (value,)) + eq_(row, (compare_value,)) @testing.combinations( (True,), diff --git a/test/requirements.py b/test/requirements.py index 7cb09c309c..cb2f4840f6 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -1183,6 +1183,16 @@ class DefaultRequirements(SuiteRequirements): ] ) + @property + def cast_precision_numerics_many_significant_digits(self): + """same as precision_numerics_many_significant_digits but within the + context of a CAST statement (hello MySQL) + + """ + return self.precision_numerics_many_significant_digits + fails_if( + "mysql" + ) + @property def precision_numerics_retains_significant_digits(self): """A precision numeric type will return empty significant digits, -- 2.47.2