From: Mike Bayer Date: Sun, 3 Apr 2022 17:44:57 +0000 (-0400) Subject: use .fromisoformat() for sqlite datetime, date, time parsing X-Git-Tag: rel_2_0_0b1~377^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccadbec82555c53eefa889160510f5af1e224709;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git use .fromisoformat() for sqlite datetime, date, time parsing SQLite datetime, date, and time datatypes now use Python standard lib ``fromisoformat()`` methods in order to parse incoming datetime, date, and time string values. This improves performance vs. the previous regular expression-based approach, and also automatically accommodates for datetime and time formats that contain either a six-digit "microseconds" format or a three-digit "milliseconds" format. Fixes: #7029 Change-Id: I67aab4fe5ee3055e5996050cf4564981413cc221 --- diff --git a/doc/build/changelog/unreleased_20/7029.rst b/doc/build/changelog/unreleased_20/7029.rst new file mode 100644 index 0000000000..98a261f778 --- /dev/null +++ b/doc/build/changelog/unreleased_20/7029.rst @@ -0,0 +1,10 @@ +.. change:: + :tags: usecase, sqlite, performance + :tickets: 7029 + + SQLite datetime, date, and time datatypes now use Python standard lib + ``fromisoformat()`` methods in order to parse incoming datetime, date, and + time string values. This improves performance vs. the previous regular + expression-based approach, and also automatically accommodates for datetime + and time formats that contain either a six-digit "microseconds" format or a + three-digit "milliseconds" format. diff --git a/lib/sqlalchemy/cyextension/processors.pyx b/lib/sqlalchemy/cyextension/processors.pyx index 9f23e73b1a..b0ad865c54 100644 --- a/lib/sqlalchemy/cyextension/processors.pyx +++ b/lib/sqlalchemy/cyextension/processors.pyx @@ -1,7 +1,9 @@ import datetime +from datetime import datetime as datetime_cls +from datetime import time as time_cls +from datetime import date as date_cls import re -from cpython.datetime cimport date_new, datetime_new, import_datetime, time_new from cpython.object cimport PyObject_Str from cpython.unicode cimport PyUnicode_AsASCIIString, PyUnicode_Check, PyUnicode_Decode from libc.stdio cimport sscanf @@ -27,53 +29,22 @@ cdef inline bytes to_bytes(object value, str type_name): "- value is not a string." ) from e -import_datetime() # required to call datetime_new/date_new/time_new - def str_to_datetime(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int year, month, day, hour, minute, second, microsecond = 0 - cdef bytes value_b = to_bytes(value, 'datetime') - cdef const char * string = value_b - - numparsed = sscanf(string, "%4u-%2u-%2u %2u:%2u:%2u.%6u", - &year, &month, &day, &hour, &minute, &second, µsecond) - if numparsed < 6: - raise ValueError( - "Couldn't parse datetime string: '%s'" % (value) - ) - return datetime_new(year, month, day, hour, minute, second, microsecond, None) + if value is not None: + value = datetime_cls.fromisoformat(value) + return value -def str_to_date(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int year, month, day - cdef bytes value_b = to_bytes(value, 'date') - cdef const char * string = value_b +def str_to_time(value): + if value is not None: + value = time_cls.fromisoformat(value) + return value - numparsed = sscanf(string, "%4u-%2u-%2u", &year, &month, &day) - if numparsed != 3: - raise ValueError( - "Couldn't parse date string: '%s'" % (value) - ) - return date_new(year, month, day) -def str_to_time(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int hour, minute, second, microsecond = 0 - cdef bytes value_b = to_bytes(value, 'time') - cdef const char * string = value_b +def str_to_date(value): + if value is not None: + value = date_cls.fromisoformat(value) + return value - numparsed = sscanf(string, "%2u:%2u:%2u.%6u", &hour, &minute, &second, µsecond) - if numparsed < 3: - raise ValueError( - "Couldn't parse time string: '%s'" % (value) - ) - return time_new(hour, minute, second, microsecond, None) cdef class DecimalResultProcessor: diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py index b1ac20383e..f21ab90836 100644 --- a/lib/sqlalchemy/dialects/sqlite/base.py +++ b/lib/sqlalchemy/dialects/sqlite/base.py @@ -926,6 +926,12 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime): 2021-03-15 12:05:57.105542 + The incoming storage format is by default parsed using the + Python ``datetime.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``datetime.fromisoformat()`` is used for default + datetime string parsing. + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -941,7 +947,8 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime): with keys year, month, day, hour, minute, second, and microsecond. :param regexp: regular expression which will be applied to incoming result - rows. If the regexp contains named groups, the resulting match dict is + rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming + strings. If the regexp contains named groups, the resulting match dict is applied to the Python datetime() constructor as keyword arguments. Otherwise, if positional groups are used, the datetime() constructor is called with positional arguments via @@ -1027,6 +1034,13 @@ class DATE(_DateTimeMixin, sqltypes.Date): 2011-03-15 + The incoming storage format is by default parsed using the + Python ``date.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``date.fromisoformat()`` is used for default + date string parsing. + + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -1042,11 +1056,13 @@ class DATE(_DateTimeMixin, sqltypes.Date): dict with keys year, month, and day. :param regexp: regular expression which will be applied to - incoming result rows. If the regexp contains named groups, the - resulting match dict is applied to the Python date() constructor - as keyword arguments. Otherwise, if positional groups are used, the - date() constructor is called with positional arguments via + incoming result rows, replacing the use of ``date.fromisoformat()`` to + parse incoming strings. If the regexp contains named groups, the resulting + match dict is applied to the Python date() constructor as keyword + arguments. Otherwise, if positional groups are used, the date() + constructor is called with positional arguments via ``*map(int, match_obj.groups(0))``. + """ _storage_format = "%(year)04d-%(month)02d-%(day)02d" @@ -1092,6 +1108,12 @@ class TIME(_DateTimeMixin, sqltypes.Time): 12:05:57.10558 + The incoming storage format is by default parsed using the + Python ``time.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``time.fromisoformat()`` is used for default + time string parsing. + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -1107,10 +1129,12 @@ class TIME(_DateTimeMixin, sqltypes.Time): with keys hour, minute, second, and microsecond. :param regexp: regular expression which will be applied to incoming result - rows. If the regexp contains named groups, the resulting match dict is + rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming + strings. If the regexp contains named groups, the resulting match dict is applied to the Python time() constructor as keyword arguments. Otherwise, if positional groups are used, the time() constructor is called with positional arguments via ``*map(int, match_obj.groups(0))``. + """ _storage_format = "%(hour)02d:%(minute)02d:%(second)02d.%(microsecond)06d" diff --git a/lib/sqlalchemy/engine/_py_processors.py b/lib/sqlalchemy/engine/_py_processors.py index 27cb9e9395..63f03466a5 100644 --- a/lib/sqlalchemy/engine/_py_processors.py +++ b/lib/sqlalchemy/engine/_py_processors.py @@ -16,8 +16,10 @@ They all share one common characteristic: None is passed through unchanged. from __future__ import annotations import datetime +from datetime import date as date_cls +from datetime import datetime as datetime_cls +from datetime import time as time_cls from decimal import Decimal -import re import typing from typing import Any from typing import Callable @@ -26,6 +28,7 @@ from typing import Type from typing import TypeVar from typing import Union + _DT = TypeVar( "_DT", bound=Union[datetime.datetime, datetime.time, datetime.date] ) @@ -50,6 +53,7 @@ def str_to_datetime_processor_factory( "Couldn't parse %s string '%r' " "- value is not a string." % (type_.__name__, value) ) from err + if m is None: raise ValueError( "Couldn't parse %s string: " @@ -108,12 +112,25 @@ def int_to_boolean(value: Optional[int]) -> Optional[bool]: return bool(value) -DATETIME_RE = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?") -TIME_RE = re.compile(r"(\d+):(\d+):(\d+)(?:\.(\d+))?") -DATE_RE = re.compile(r"(\d+)-(\d+)-(\d+)") +def str_to_datetime(value: Optional[str]) -> Optional[datetime.datetime]: + if value is not None: + dt_value = datetime_cls.fromisoformat(value) + else: + dt_value = None + return dt_value -str_to_datetime = str_to_datetime_processor_factory( - DATETIME_RE, datetime.datetime -) -str_to_time = str_to_datetime_processor_factory(TIME_RE, datetime.time) -str_to_date = str_to_datetime_processor_factory(DATE_RE, datetime.date) + +def str_to_time(value: Optional[str]) -> Optional[datetime.time]: + if value is not None: + dt_value = time_cls.fromisoformat(value) + else: + dt_value = None + return dt_value + + +def str_to_date(value: Optional[str]) -> Optional[datetime.date]: + if value is not None: + dt_value = date_cls.fromisoformat(value) + else: + dt_value = None + return dt_value diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 0940eab9b9..cc14dd9c4f 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -432,7 +432,7 @@ class DateTimeMicrosecondsTest(_DateFixture, fixtures.TablesTest): __requires__ = ("datetime_microseconds",) __backend__ = True datatype = DateTime - data = datetime.datetime(2012, 10, 15, 12, 57, 18, 396) + data = datetime.datetime(2012, 10, 15, 12, 57, 18, 39642) class TimestampMicrosecondsTest(_DateFixture, fixtures.TablesTest): diff --git a/test/dialect/test_sqlite.py b/test/dialect/test_sqlite.py index 9658fec832..8e7632c906 100644 --- a/test/dialect/test_sqlite.py +++ b/test/dialect/test_sqlite.py @@ -117,7 +117,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults): ]: assert_raises_message( ValueError, - "Couldn't parse %s string." % disp, + "Invalid isoformat string:", lambda: connection.execute( text("select 'ASDF' as value").columns(value=typ) ).scalar(), @@ -166,7 +166,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults): # 2004-05-21T00:00:00 storage_format="%(year)04d-%(month)02d-%(day)02d" "T%(hour)02d:%(minute)02d:%(second)02d", - regexp=r"(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)", + regexp=r"^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)$", ) t = Table("t", self.metadata, Column("d", sqlite_date)) self.metadata.create_all(connection) @@ -195,7 +195,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults): sqlite_date = sqlite.DATETIME( storage_format="%(year)04d%(month)02d%(day)02d" "%(hour)02d%(minute)02d%(second)02d", - regexp=r"(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})", + regexp=r"^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$", ) t = Table("t", self.metadata, Column("d", sqlite_date)) self.metadata.create_all(connection) diff --git a/test/engine/test_processors.py b/test/engine/test_processors.py index 392632327a..5f28e3ea0e 100644 --- a/test/engine/test_processors.py +++ b/test/engine/test_processors.py @@ -1,6 +1,9 @@ +import datetime +import re from types import MappingProxyType from sqlalchemy import exc +from sqlalchemy.engine import processors from sqlalchemy.testing import assert_raises_message from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_raises_message @@ -36,34 +39,72 @@ class CyBooleanProcessorTest(_BooleanProcessorTest): class _DateProcessorTest(fixtures.TestBase): + def test_iso_datetime(self): + eq_( + self.module.str_to_datetime("2022-04-03 17:12:34.353"), + datetime.datetime(2022, 4, 3, 17, 12, 34, 353000), + ) + + eq_( + self.module.str_to_datetime("2022-04-03 17:12:34.353123"), + datetime.datetime(2022, 4, 3, 17, 12, 34, 353123), + ) + + eq_( + self.module.str_to_datetime("2022-04-03 17:12:34"), + datetime.datetime(2022, 4, 3, 17, 12, 34), + ) + + eq_( + self.module.str_to_time("17:12:34.353123"), + datetime.time(17, 12, 34, 353123), + ) + + eq_( + self.module.str_to_time("17:12:34.353"), + datetime.time(17, 12, 34, 353000), + ) + + eq_( + self.module.str_to_time("17:12:34"), + datetime.time(17, 12, 34), + ) + + eq_(self.module.str_to_date("2022-04-03"), datetime.date(2022, 4, 3)) + def test_date_no_string(self): assert_raises_message( - ValueError, - "Couldn't parse date string '2012' - value is not a string", + TypeError, + "fromisoformat: argument must be str", self.module.str_to_date, 2012, ) - def test_datetime_no_string(self): + def test_datetime_no_string_custom_reg(self): assert_raises_message( ValueError, "Couldn't parse datetime string '2012' - value is not a string", - self.module.str_to_datetime, + processors.str_to_datetime_processor_factory( + re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?"), + datetime.datetime, + ), 2012, ) - def test_time_no_string(self): + def test_time_no_string_custom_reg(self): assert_raises_message( ValueError, "Couldn't parse time string '2012' - value is not a string", - self.module.str_to_time, + processors.str_to_datetime_processor_factory( + re.compile(r"^(\d+):(\d+):(\d+)(?:\.(\d{6}))?$"), datetime.time + ), 2012, ) def test_date_invalid_string(self): assert_raises_message( ValueError, - "Couldn't parse date string: '5:a'", + "Invalid isoformat string: '5:a'", self.module.str_to_date, "5:a", ) @@ -71,7 +112,7 @@ class _DateProcessorTest(fixtures.TestBase): def test_datetime_invalid_string(self): assert_raises_message( ValueError, - "Couldn't parse datetime string: '5:a'", + "Invalid isoformat string: '5:a'", self.module.str_to_datetime, "5:a", ) @@ -79,7 +120,7 @@ class _DateProcessorTest(fixtures.TestBase): def test_time_invalid_string(self): assert_raises_message( ValueError, - "Couldn't parse time string: '5:a'", + "Invalid isoformat string: '5:a'", self.module.str_to_time, "5:a", )