--- /dev/null
+.. change::
+ :tags: usecase, sqlite, performance
+ :tickets: 7029
+
+ SQLite datetime, date, and time datatypes now use Python standard lib
+ ``fromisoformat()`` methods in order to parse incoming datetime, date, and
+ time string values. This improves performance vs. the previous regular
+ expression-based approach, and also automatically accommodates for datetime
+ and time formats that contain either a six-digit "microseconds" format or a
+ three-digit "milliseconds" format.
import datetime
+from datetime import datetime as datetime_cls
+from datetime import time as time_cls
+from datetime import date as date_cls
import re
-from cpython.datetime cimport date_new, datetime_new, import_datetime, time_new
from cpython.object cimport PyObject_Str
from cpython.unicode cimport PyUnicode_AsASCIIString, PyUnicode_Check, PyUnicode_Decode
from libc.stdio cimport sscanf
"- value is not a string."
) from e
-import_datetime() # required to call datetime_new/date_new/time_new
-
def str_to_datetime(value):
- if value is None:
- return None
- cdef int numparsed
- cdef unsigned int year, month, day, hour, minute, second, microsecond = 0
- cdef bytes value_b = to_bytes(value, 'datetime')
- cdef const char * string = value_b
-
- numparsed = sscanf(string, "%4u-%2u-%2u %2u:%2u:%2u.%6u",
- &year, &month, &day, &hour, &minute, &second, µsecond)
- if numparsed < 6:
- raise ValueError(
- "Couldn't parse datetime string: '%s'" % (value)
- )
- return datetime_new(year, month, day, hour, minute, second, microsecond, None)
+ if value is not None:
+ value = datetime_cls.fromisoformat(value)
+ return value
-def str_to_date(value):
- if value is None:
- return None
- cdef int numparsed
- cdef unsigned int year, month, day
- cdef bytes value_b = to_bytes(value, 'date')
- cdef const char * string = value_b
+def str_to_time(value):
+ if value is not None:
+ value = time_cls.fromisoformat(value)
+ return value
- numparsed = sscanf(string, "%4u-%2u-%2u", &year, &month, &day)
- if numparsed != 3:
- raise ValueError(
- "Couldn't parse date string: '%s'" % (value)
- )
- return date_new(year, month, day)
-def str_to_time(value):
- if value is None:
- return None
- cdef int numparsed
- cdef unsigned int hour, minute, second, microsecond = 0
- cdef bytes value_b = to_bytes(value, 'time')
- cdef const char * string = value_b
+def str_to_date(value):
+ if value is not None:
+ value = date_cls.fromisoformat(value)
+ return value
- numparsed = sscanf(string, "%2u:%2u:%2u.%6u", &hour, &minute, &second, µsecond)
- if numparsed < 3:
- raise ValueError(
- "Couldn't parse time string: '%s'" % (value)
- )
- return time_new(hour, minute, second, microsecond, None)
cdef class DecimalResultProcessor:
2021-03-15 12:05:57.105542
+ The incoming storage format is by default parsed using the
+ Python ``datetime.fromisoformat()`` function.
+
+ .. versionchanged:: 2.0 ``datetime.fromisoformat()`` is used for default
+ datetime string parsing.
+
The storage format can be customized to some degree using the
``storage_format`` and ``regexp`` parameters, such as::
with keys year, month, day, hour, minute, second, and microsecond.
:param regexp: regular expression which will be applied to incoming result
- rows. If the regexp contains named groups, the resulting match dict is
+ rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming
+ strings. If the regexp contains named groups, the resulting match dict is
applied to the Python datetime() constructor as keyword arguments.
Otherwise, if positional groups are used, the datetime() constructor
is called with positional arguments via
2011-03-15
+ The incoming storage format is by default parsed using the
+ Python ``date.fromisoformat()`` function.
+
+ .. versionchanged:: 2.0 ``date.fromisoformat()`` is used for default
+ date string parsing.
+
+
The storage format can be customized to some degree using the
``storage_format`` and ``regexp`` parameters, such as::
dict with keys year, month, and day.
:param regexp: regular expression which will be applied to
- incoming result rows. If the regexp contains named groups, the
- resulting match dict is applied to the Python date() constructor
- as keyword arguments. Otherwise, if positional groups are used, the
- date() constructor is called with positional arguments via
+ incoming result rows, replacing the use of ``date.fromisoformat()`` to
+ parse incoming strings. If the regexp contains named groups, the resulting
+ match dict is applied to the Python date() constructor as keyword
+ arguments. Otherwise, if positional groups are used, the date()
+ constructor is called with positional arguments via
``*map(int, match_obj.groups(0))``.
+
"""
_storage_format = "%(year)04d-%(month)02d-%(day)02d"
12:05:57.10558
+ The incoming storage format is by default parsed using the
+ Python ``time.fromisoformat()`` function.
+
+ .. versionchanged:: 2.0 ``time.fromisoformat()`` is used for default
+ time string parsing.
+
The storage format can be customized to some degree using the
``storage_format`` and ``regexp`` parameters, such as::
with keys hour, minute, second, and microsecond.
:param regexp: regular expression which will be applied to incoming result
- rows. If the regexp contains named groups, the resulting match dict is
+ rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming
+ strings. If the regexp contains named groups, the resulting match dict is
applied to the Python time() constructor as keyword arguments. Otherwise,
if positional groups are used, the time() constructor is called with
positional arguments via ``*map(int, match_obj.groups(0))``.
+
"""
_storage_format = "%(hour)02d:%(minute)02d:%(second)02d.%(microsecond)06d"
from __future__ import annotations
import datetime
+from datetime import date as date_cls
+from datetime import datetime as datetime_cls
+from datetime import time as time_cls
from decimal import Decimal
-import re
import typing
from typing import Any
from typing import Callable
from typing import TypeVar
from typing import Union
+
_DT = TypeVar(
"_DT", bound=Union[datetime.datetime, datetime.time, datetime.date]
)
"Couldn't parse %s string '%r' "
"- value is not a string." % (type_.__name__, value)
) from err
+
if m is None:
raise ValueError(
"Couldn't parse %s string: "
return bool(value)
-DATETIME_RE = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?")
-TIME_RE = re.compile(r"(\d+):(\d+):(\d+)(?:\.(\d+))?")
-DATE_RE = re.compile(r"(\d+)-(\d+)-(\d+)")
+def str_to_datetime(value: Optional[str]) -> Optional[datetime.datetime]:
+ if value is not None:
+ dt_value = datetime_cls.fromisoformat(value)
+ else:
+ dt_value = None
+ return dt_value
-str_to_datetime = str_to_datetime_processor_factory(
- DATETIME_RE, datetime.datetime
-)
-str_to_time = str_to_datetime_processor_factory(TIME_RE, datetime.time)
-str_to_date = str_to_datetime_processor_factory(DATE_RE, datetime.date)
+
+def str_to_time(value: Optional[str]) -> Optional[datetime.time]:
+ if value is not None:
+ dt_value = time_cls.fromisoformat(value)
+ else:
+ dt_value = None
+ return dt_value
+
+
+def str_to_date(value: Optional[str]) -> Optional[datetime.date]:
+ if value is not None:
+ dt_value = date_cls.fromisoformat(value)
+ else:
+ dt_value = None
+ return dt_value
__requires__ = ("datetime_microseconds",)
__backend__ = True
datatype = DateTime
- data = datetime.datetime(2012, 10, 15, 12, 57, 18, 396)
+ data = datetime.datetime(2012, 10, 15, 12, 57, 18, 39642)
class TimestampMicrosecondsTest(_DateFixture, fixtures.TablesTest):
]:
assert_raises_message(
ValueError,
- "Couldn't parse %s string." % disp,
+ "Invalid isoformat string:",
lambda: connection.execute(
text("select 'ASDF' as value").columns(value=typ)
).scalar(),
# 2004-05-21T00:00:00
storage_format="%(year)04d-%(month)02d-%(day)02d"
"T%(hour)02d:%(minute)02d:%(second)02d",
- regexp=r"(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)",
+ regexp=r"^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)$",
)
t = Table("t", self.metadata, Column("d", sqlite_date))
self.metadata.create_all(connection)
sqlite_date = sqlite.DATETIME(
storage_format="%(year)04d%(month)02d%(day)02d"
"%(hour)02d%(minute)02d%(second)02d",
- regexp=r"(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})",
+ regexp=r"^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$",
)
t = Table("t", self.metadata, Column("d", sqlite_date))
self.metadata.create_all(connection)
+import datetime
+import re
from types import MappingProxyType
from sqlalchemy import exc
+from sqlalchemy.engine import processors
from sqlalchemy.testing import assert_raises_message
from sqlalchemy.testing import eq_
from sqlalchemy.testing import expect_raises_message
class _DateProcessorTest(fixtures.TestBase):
+ def test_iso_datetime(self):
+ eq_(
+ self.module.str_to_datetime("2022-04-03 17:12:34.353"),
+ datetime.datetime(2022, 4, 3, 17, 12, 34, 353000),
+ )
+
+ eq_(
+ self.module.str_to_datetime("2022-04-03 17:12:34.353123"),
+ datetime.datetime(2022, 4, 3, 17, 12, 34, 353123),
+ )
+
+ eq_(
+ self.module.str_to_datetime("2022-04-03 17:12:34"),
+ datetime.datetime(2022, 4, 3, 17, 12, 34),
+ )
+
+ eq_(
+ self.module.str_to_time("17:12:34.353123"),
+ datetime.time(17, 12, 34, 353123),
+ )
+
+ eq_(
+ self.module.str_to_time("17:12:34.353"),
+ datetime.time(17, 12, 34, 353000),
+ )
+
+ eq_(
+ self.module.str_to_time("17:12:34"),
+ datetime.time(17, 12, 34),
+ )
+
+ eq_(self.module.str_to_date("2022-04-03"), datetime.date(2022, 4, 3))
+
def test_date_no_string(self):
assert_raises_message(
- ValueError,
- "Couldn't parse date string '2012' - value is not a string",
+ TypeError,
+ "fromisoformat: argument must be str",
self.module.str_to_date,
2012,
)
- def test_datetime_no_string(self):
+ def test_datetime_no_string_custom_reg(self):
assert_raises_message(
ValueError,
"Couldn't parse datetime string '2012' - value is not a string",
- self.module.str_to_datetime,
+ processors.str_to_datetime_processor_factory(
+ re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?"),
+ datetime.datetime,
+ ),
2012,
)
- def test_time_no_string(self):
+ def test_time_no_string_custom_reg(self):
assert_raises_message(
ValueError,
"Couldn't parse time string '2012' - value is not a string",
- self.module.str_to_time,
+ processors.str_to_datetime_processor_factory(
+ re.compile(r"^(\d+):(\d+):(\d+)(?:\.(\d{6}))?$"), datetime.time
+ ),
2012,
)
def test_date_invalid_string(self):
assert_raises_message(
ValueError,
- "Couldn't parse date string: '5:a'",
+ "Invalid isoformat string: '5:a'",
self.module.str_to_date,
"5:a",
)
def test_datetime_invalid_string(self):
assert_raises_message(
ValueError,
- "Couldn't parse datetime string: '5:a'",
+ "Invalid isoformat string: '5:a'",
self.module.str_to_datetime,
"5:a",
)
def test_time_invalid_string(self):
assert_raises_message(
ValueError,
- "Couldn't parse time string: '5:a'",
+ "Invalid isoformat string: '5:a'",
self.module.str_to_time,
"5:a",
)