]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
use .fromisoformat() for sqlite datetime, date, time parsing
authorMike Bayer <mike_mp@zzzcomputing.com>
Sun, 3 Apr 2022 17:44:57 +0000 (13:44 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Sun, 3 Apr 2022 18:47:52 +0000 (14:47 -0400)
SQLite datetime, date, and time datatypes now use Python standard lib
``fromisoformat()`` methods in order to parse incoming datetime, date, and
time string values. This improves performance vs. the previous regular
expression-based approach, and also automatically accommodates for datetime
and time formats that contain either a six-digit "microseconds" format or a
three-digit "milliseconds" format.

Fixes: #7029
Change-Id: I67aab4fe5ee3055e5996050cf4564981413cc221

doc/build/changelog/unreleased_20/7029.rst [new file with mode: 0644]
lib/sqlalchemy/cyextension/processors.pyx
lib/sqlalchemy/dialects/sqlite/base.py
lib/sqlalchemy/engine/_py_processors.py
lib/sqlalchemy/testing/suite/test_types.py
test/dialect/test_sqlite.py
test/engine/test_processors.py

diff --git a/doc/build/changelog/unreleased_20/7029.rst b/doc/build/changelog/unreleased_20/7029.rst
new file mode 100644 (file)
index 0000000..98a261f
--- /dev/null
@@ -0,0 +1,10 @@
+.. change::
+    :tags: usecase, sqlite, performance
+    :tickets: 7029
+
+    SQLite datetime, date, and time datatypes now use Python standard lib
+    ``fromisoformat()`` methods in order to parse incoming datetime, date, and
+    time string values. This improves performance vs. the previous regular
+    expression-based approach, and also automatically accommodates for datetime
+    and time formats that contain either a six-digit "microseconds" format or a
+    three-digit "milliseconds" format.
index 9f23e73b1ac9af718608e14c384a9ee24282efe8..b0ad865c54ad6366b6f858655f338661de6ae731 100644 (file)
@@ -1,7 +1,9 @@
 import datetime
+from datetime import datetime as datetime_cls
+from datetime import time as time_cls
+from datetime import date as date_cls
 import re
 
-from cpython.datetime cimport date_new, datetime_new, import_datetime, time_new
 from cpython.object cimport PyObject_Str
 from cpython.unicode cimport PyUnicode_AsASCIIString, PyUnicode_Check, PyUnicode_Decode
 from libc.stdio cimport sscanf
@@ -27,53 +29,22 @@ cdef inline bytes to_bytes(object value, str type_name):
             "- value is not a string."
         ) from e
 
-import_datetime()  # required to call datetime_new/date_new/time_new
-
 def str_to_datetime(value):
-    if value is None:
-        return None
-    cdef int numparsed
-    cdef unsigned int year, month, day, hour, minute, second, microsecond = 0
-    cdef bytes value_b = to_bytes(value, 'datetime')
-    cdef const char * string = value_b
-
-    numparsed = sscanf(string, "%4u-%2u-%2u %2u:%2u:%2u.%6u",
-        &year, &month, &day, &hour, &minute, &second, &microsecond)
-    if numparsed < 6:
-        raise ValueError(
-                "Couldn't parse datetime string: '%s'" % (value)
-            )
-    return datetime_new(year, month, day, hour, minute, second, microsecond, None)
+    if value is not None:
+        value = datetime_cls.fromisoformat(value)
+    return value
 
-def str_to_date(value):
-    if value is None:
-        return None
-    cdef int numparsed
-    cdef unsigned int year, month, day
-    cdef bytes value_b = to_bytes(value, 'date')
-    cdef const char * string = value_b
+def str_to_time(value):
+    if value is not None:
+        value = time_cls.fromisoformat(value)
+    return value
 
-    numparsed = sscanf(string, "%4u-%2u-%2u", &year, &month, &day)
-    if numparsed != 3:
-        raise ValueError(
-                "Couldn't parse date string: '%s'" % (value)
-            )
-    return date_new(year, month, day)
 
-def str_to_time(value):
-    if value is None:
-        return None
-    cdef int numparsed
-    cdef unsigned int hour, minute, second, microsecond = 0
-    cdef bytes value_b = to_bytes(value, 'time')
-    cdef const char * string = value_b
+def str_to_date(value):
+    if value is not None:
+        value = date_cls.fromisoformat(value)
+    return value
 
-    numparsed = sscanf(string, "%2u:%2u:%2u.%6u", &hour, &minute, &second, &microsecond)
-    if numparsed < 3:
-        raise ValueError(
-                "Couldn't parse time string: '%s'" % (value)
-            )
-    return time_new(hour, minute, second, microsecond, None)
 
 
 cdef class DecimalResultProcessor:
index b1ac20383e9e59a194c67bd4cf0565ec127c7943..f21ab9083628cac7045c1e9bb8ff8644570b1dad 100644 (file)
@@ -926,6 +926,12 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime):
 
         2021-03-15 12:05:57.105542
 
+    The incoming storage format is by default parsed using the
+    Python ``datetime.fromisoformat()`` function.
+
+    .. versionchanged:: 2.0  ``datetime.fromisoformat()`` is used for default
+       datetime string parsing.
+
     The storage format can be customized to some degree using the
     ``storage_format`` and ``regexp`` parameters, such as::
 
@@ -941,7 +947,8 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime):
      with keys year, month, day, hour, minute, second, and microsecond.
 
     :param regexp: regular expression which will be applied to incoming result
-     rows. If the regexp contains named groups, the resulting match dict is
+     rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming
+     strings. If the regexp contains named groups, the resulting match dict is
      applied to the Python datetime() constructor as keyword arguments.
      Otherwise, if positional groups are used, the datetime() constructor
      is called with positional arguments via
@@ -1027,6 +1034,13 @@ class DATE(_DateTimeMixin, sqltypes.Date):
 
         2011-03-15
 
+    The incoming storage format is by default parsed using the
+    Python ``date.fromisoformat()`` function.
+
+    .. versionchanged:: 2.0  ``date.fromisoformat()`` is used for default
+       date string parsing.
+
+
     The storage format can be customized to some degree using the
     ``storage_format`` and ``regexp`` parameters, such as::
 
@@ -1042,11 +1056,13 @@ class DATE(_DateTimeMixin, sqltypes.Date):
      dict with keys year, month, and day.
 
     :param regexp: regular expression which will be applied to
-     incoming result rows. If the regexp contains named groups, the
-     resulting match dict is applied to the Python date() constructor
-     as keyword arguments. Otherwise, if positional groups are used, the
-     date() constructor is called with positional arguments via
+     incoming result rows, replacing the use of ``date.fromisoformat()`` to
+     parse incoming strings. If the regexp contains named groups, the resulting
+     match dict is applied to the Python date() constructor as keyword
+     arguments. Otherwise, if positional groups are used, the date()
+     constructor is called with positional arguments via
      ``*map(int, match_obj.groups(0))``.
+
     """
 
     _storage_format = "%(year)04d-%(month)02d-%(day)02d"
@@ -1092,6 +1108,12 @@ class TIME(_DateTimeMixin, sqltypes.Time):
 
         12:05:57.10558
 
+    The incoming storage format is by default parsed using the
+    Python ``time.fromisoformat()`` function.
+
+    .. versionchanged:: 2.0  ``time.fromisoformat()`` is used for default
+       time string parsing.
+
     The storage format can be customized to some degree using the
     ``storage_format`` and ``regexp`` parameters, such as::
 
@@ -1107,10 +1129,12 @@ class TIME(_DateTimeMixin, sqltypes.Time):
      with keys hour, minute, second, and microsecond.
 
     :param regexp: regular expression which will be applied to incoming result
-     rows. If the regexp contains named groups, the resulting match dict is
+     rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming
+     strings. If the regexp contains named groups, the resulting match dict is
      applied to the Python time() constructor as keyword arguments. Otherwise,
      if positional groups are used, the time() constructor is called with
      positional arguments via ``*map(int, match_obj.groups(0))``.
+
     """
 
     _storage_format = "%(hour)02d:%(minute)02d:%(second)02d.%(microsecond)06d"
index 27cb9e9395dc19c80cc433352a174dc01ca3d02a..63f03466a5ae5df67f0cbbc4225048263906808f 100644 (file)
@@ -16,8 +16,10 @@ They all share one common characteristic: None is passed through unchanged.
 from __future__ import annotations
 
 import datetime
+from datetime import date as date_cls
+from datetime import datetime as datetime_cls
+from datetime import time as time_cls
 from decimal import Decimal
-import re
 import typing
 from typing import Any
 from typing import Callable
@@ -26,6 +28,7 @@ from typing import Type
 from typing import TypeVar
 from typing import Union
 
+
 _DT = TypeVar(
     "_DT", bound=Union[datetime.datetime, datetime.time, datetime.date]
 )
@@ -50,6 +53,7 @@ def str_to_datetime_processor_factory(
                     "Couldn't parse %s string '%r' "
                     "- value is not a string." % (type_.__name__, value)
                 ) from err
+
             if m is None:
                 raise ValueError(
                     "Couldn't parse %s string: "
@@ -108,12 +112,25 @@ def int_to_boolean(value: Optional[int]) -> Optional[bool]:
         return bool(value)
 
 
-DATETIME_RE = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?")
-TIME_RE = re.compile(r"(\d+):(\d+):(\d+)(?:\.(\d+))?")
-DATE_RE = re.compile(r"(\d+)-(\d+)-(\d+)")
+def str_to_datetime(value: Optional[str]) -> Optional[datetime.datetime]:
+    if value is not None:
+        dt_value = datetime_cls.fromisoformat(value)
+    else:
+        dt_value = None
+    return dt_value
 
-str_to_datetime = str_to_datetime_processor_factory(
-    DATETIME_RE, datetime.datetime
-)
-str_to_time = str_to_datetime_processor_factory(TIME_RE, datetime.time)
-str_to_date = str_to_datetime_processor_factory(DATE_RE, datetime.date)
+
+def str_to_time(value: Optional[str]) -> Optional[datetime.time]:
+    if value is not None:
+        dt_value = time_cls.fromisoformat(value)
+    else:
+        dt_value = None
+    return dt_value
+
+
+def str_to_date(value: Optional[str]) -> Optional[datetime.date]:
+    if value is not None:
+        dt_value = date_cls.fromisoformat(value)
+    else:
+        dt_value = None
+    return dt_value
index 0940eab9b96b873c4eec90a6c71988318506ce16..cc14dd9c4ffc8bf30995f9ec2a0802f2136dfc0d 100644 (file)
@@ -432,7 +432,7 @@ class DateTimeMicrosecondsTest(_DateFixture, fixtures.TablesTest):
     __requires__ = ("datetime_microseconds",)
     __backend__ = True
     datatype = DateTime
-    data = datetime.datetime(2012, 10, 15, 12, 57, 18, 396)
+    data = datetime.datetime(2012, 10, 15, 12, 57, 18, 39642)
 
 
 class TimestampMicrosecondsTest(_DateFixture, fixtures.TablesTest):
index 9658fec8329f7a8aada70848ada8989a226b1fb0..8e7632c90667230baf3960f4b184d85db28d2010 100644 (file)
@@ -117,7 +117,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults):
         ]:
             assert_raises_message(
                 ValueError,
-                "Couldn't parse %s string." % disp,
+                "Invalid isoformat string:",
                 lambda: connection.execute(
                     text("select 'ASDF' as value").columns(value=typ)
                 ).scalar(),
@@ -166,7 +166,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults):
             # 2004-05-21T00:00:00
             storage_format="%(year)04d-%(month)02d-%(day)02d"
             "T%(hour)02d:%(minute)02d:%(second)02d",
-            regexp=r"(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)",
+            regexp=r"^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)$",
         )
         t = Table("t", self.metadata, Column("d", sqlite_date))
         self.metadata.create_all(connection)
@@ -195,7 +195,7 @@ class TestTypes(fixtures.TestBase, AssertsExecutionResults):
         sqlite_date = sqlite.DATETIME(
             storage_format="%(year)04d%(month)02d%(day)02d"
             "%(hour)02d%(minute)02d%(second)02d",
-            regexp=r"(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})",
+            regexp=r"^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$",
         )
         t = Table("t", self.metadata, Column("d", sqlite_date))
         self.metadata.create_all(connection)
index 392632327afffcc148ad61c26caec28d58849cac..5f28e3ea0ef59ee36ec8ee2fd47cee46713d8f0d 100644 (file)
@@ -1,6 +1,9 @@
+import datetime
+import re
 from types import MappingProxyType
 
 from sqlalchemy import exc
+from sqlalchemy.engine import processors
 from sqlalchemy.testing import assert_raises_message
 from sqlalchemy.testing import eq_
 from sqlalchemy.testing import expect_raises_message
@@ -36,34 +39,72 @@ class CyBooleanProcessorTest(_BooleanProcessorTest):
 
 
 class _DateProcessorTest(fixtures.TestBase):
+    def test_iso_datetime(self):
+        eq_(
+            self.module.str_to_datetime("2022-04-03 17:12:34.353"),
+            datetime.datetime(2022, 4, 3, 17, 12, 34, 353000),
+        )
+
+        eq_(
+            self.module.str_to_datetime("2022-04-03 17:12:34.353123"),
+            datetime.datetime(2022, 4, 3, 17, 12, 34, 353123),
+        )
+
+        eq_(
+            self.module.str_to_datetime("2022-04-03 17:12:34"),
+            datetime.datetime(2022, 4, 3, 17, 12, 34),
+        )
+
+        eq_(
+            self.module.str_to_time("17:12:34.353123"),
+            datetime.time(17, 12, 34, 353123),
+        )
+
+        eq_(
+            self.module.str_to_time("17:12:34.353"),
+            datetime.time(17, 12, 34, 353000),
+        )
+
+        eq_(
+            self.module.str_to_time("17:12:34"),
+            datetime.time(17, 12, 34),
+        )
+
+        eq_(self.module.str_to_date("2022-04-03"), datetime.date(2022, 4, 3))
+
     def test_date_no_string(self):
         assert_raises_message(
-            ValueError,
-            "Couldn't parse date string '2012' - value is not a string",
+            TypeError,
+            "fromisoformat: argument must be str",
             self.module.str_to_date,
             2012,
         )
 
-    def test_datetime_no_string(self):
+    def test_datetime_no_string_custom_reg(self):
         assert_raises_message(
             ValueError,
             "Couldn't parse datetime string '2012' - value is not a string",
-            self.module.str_to_datetime,
+            processors.str_to_datetime_processor_factory(
+                re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?"),
+                datetime.datetime,
+            ),
             2012,
         )
 
-    def test_time_no_string(self):
+    def test_time_no_string_custom_reg(self):
         assert_raises_message(
             ValueError,
             "Couldn't parse time string '2012' - value is not a string",
-            self.module.str_to_time,
+            processors.str_to_datetime_processor_factory(
+                re.compile(r"^(\d+):(\d+):(\d+)(?:\.(\d{6}))?$"), datetime.time
+            ),
             2012,
         )
 
     def test_date_invalid_string(self):
         assert_raises_message(
             ValueError,
-            "Couldn't parse date string: '5:a'",
+            "Invalid isoformat string: '5:a'",
             self.module.str_to_date,
             "5:a",
         )
@@ -71,7 +112,7 @@ class _DateProcessorTest(fixtures.TestBase):
     def test_datetime_invalid_string(self):
         assert_raises_message(
             ValueError,
-            "Couldn't parse datetime string: '5:a'",
+            "Invalid isoformat string: '5:a'",
             self.module.str_to_datetime,
             "5:a",
         )
@@ -79,7 +120,7 @@ class _DateProcessorTest(fixtures.TestBase):
     def test_time_invalid_string(self):
         assert_raises_message(
             ValueError,
-            "Couldn't parse time string: '5:a'",
+            "Invalid isoformat string: '5:a'",
             self.module.str_to_time,
             "5:a",
         )