fix: produce consistent error messages on timestamp overflow

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Wed, 8 Jun 2022 21:26:41 +0000 (23:26 +0200)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Wed, 8 Jun 2022 23:03:01 +0000 (01:03 +0200)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Wed, 8 Jun 2022 21:26:41 +0000 (23:26 +0200)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Wed, 8 Jun 2022 23:03:01 +0000 (01:03 +0200)
diff --git a/psycopg/psycopg/types/datetime.py b/psycopg/psycopg/types/datetime.py

index ac9217aaac93d109ab4409aa4b9a4db3e738306d..3a0394b8e0c88da7a0156008854b1fbc60705039 100644 (file)
--- a/psycopg/psycopg/types/datetime.py
+++ b/psycopg/psycopg/types/datetime.py
@@ -435,10 +435,7 @@ class TimestampLoader(Loader):
      def load(self, data: Buffer) -> datetime:
          m = self._re_format.match(data)
          if not m:
-            s = bytes(data).decode("utf8", "replace")
-            if s.endswith("BC"):
-                raise DataError(f"BC timestamps not supported, got {s!r}")
-            raise DataError(f"can't parse timestamp {s!r}")
+            raise _get_timestamp_load_error(self.connection, data) from None
  
          if self._order == self._ORDER_YMD:
              ye, mo, da, ho, mi, se, fr = m.groups()
@@ -470,9 +467,8 @@ class TimestampLoader(Loader):
  
          try:
              return datetime(int(ye), imo, int(da), int(ho), int(mi), int(se), us)
-        except ValueError as e:
-            s = bytes(data).decode("utf8", "replace")
-            raise DataError(f"can't parse timestamp {s!r}: {e}") from None
+        except ValueError as ex:
+            raise _get_timestamp_load_error(self.connection, data, ex) from None
  
  
  class TimestampBinaryLoader(Loader):
@@ -515,10 +511,7 @@ class TimestamptzLoader(Loader):
      def load(self, data: Buffer) -> datetime:
          m = self._re_format.match(data)
          if not m:
-            s = bytes(data).decode("utf8", "replace")
-            if s.endswith("BC"):
-                raise DataError(f"BC timestamps not supported, got {s!r}")
-            raise DataError(f"can't parse timestamp {s!r}")
+            raise _get_timestamp_load_error(self.connection, data) from None
  
          ye, mo, da, ho, mi, se, fr, sgn, oh, om, os = m.groups()
  
@@ -559,8 +552,7 @@ class TimestamptzLoader(Loader):
          except ValueError as e:
              ex = e
  
-        s = bytes(data).decode("utf8", "replace")
-        raise DataError(f"can't parse timestamptz {s!r}: {ex}") from None
+        raise _get_timestamp_load_error(self.connection, data, ex) from None
  
      def _load_notimpl(self, data: Buffer) -> datetime:
          s = bytes(data).decode("utf8", "replace")
@@ -691,6 +683,29 @@ def _get_datestyle(conn: Optional["BaseConnection[Any]"]) -> bytes:
      return b"ISO, DMY"
  
  
+def _get_timestamp_load_error(
+    conn: Optional["BaseConnection[Any]"], data: Buffer, ex: Optional[Exception] = None
+) -> Exception:
+    s = bytes(data).decode("utf8", "replace")
+
+    def is_overflow(s: str) -> bool:
+        if not s:
+            return False
+
+        ds = _get_datestyle(conn)
+        if not ds.startswith(b"P"):  # Postgres
+            return len(s.split()[0]) > 10  # date is first token
+        else:
+            return len(s.split()[-1]) > 4  # year is last token
+
+    if s == "-infinity" or s.endswith("BC"):
+        return DataError("timestamp too small (before year 1): {s!r}")
+    elif s == "infinity" or is_overflow(s):
+        return DataError(f"timestamp too large (after year 10K): {s!r}")
+    else:
+        return DataError(f"can't parse timestamp {s!r}: {ex or '(unknown)'}")
+
+
  _month_abbr = {
      n: i
      for i, n in enumerate(b"Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split(), 1)
diff --git a/psycopg_c/psycopg_c/types/datetime.pyx b/psycopg_c/psycopg_c/types/datetime.pyx

index d84dc06990e7f5662ac3c3623bc4065da6e94c4c..68580204a1fd04f267d70e1e7cfe6a569f0aa0a0 100644 (file)
--- a/psycopg_c/psycopg_c/types/datetime.pyx
+++ b/psycopg_c/psycopg_c/types/datetime.pyx
@@ -566,8 +566,7 @@ cdef class TimestampLoader(CLoader):
      cdef object cload(self, const char *data, size_t length):
          cdef const char *end = data + length
          if end[-1] == b'C':  # ends with BC
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"BC timestamp not supported, got {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          if self._order == ORDER_PGDM or self._order == ORDER_PGMD:
              return self._cload_pg(data, end)
@@ -585,8 +584,7 @@ cdef class TimestampLoader(CLoader):
          # Parse the first 6 groups of digits (date and time)
          ptr = _parse_date_values(data, end, vals, 6)
          if ptr == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timetz {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          # Parse the microseconds
          cdef int us = 0
@@ -606,8 +604,7 @@ cdef class TimestampLoader(CLoader):
              return cdt.datetime_new(
                  y, m, d, vals[HO], vals[MI], vals[SE], us, None)
          except ValueError as ex:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp {s!r}: {ex}") from None
+            raise _get_timestamp_load_error(self._pgconn, data, ex) from None
  
      cdef object _cload_pg(self, const char *data, const char *end):
          DEF HO = 0
@@ -625,14 +622,12 @@ cdef class TimestampLoader(CLoader):
          seps[1] = strchr(seps[0] + 1, b' ') if seps[0] != NULL else NULL
          seps[2] = strchr(seps[1] + 1, b' ') if seps[1] != NULL else NULL
          if seps[2] == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          # Parse the following 3 groups of digits (time)
          ptr = _parse_date_values(seps[2] + 1, end, vals, 3)
          if ptr == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          # Parse the microseconds
          cdef int us = 0
@@ -642,8 +637,7 @@ cdef class TimestampLoader(CLoader):
          # Parse the year
          ptr = _parse_date_values(ptr + 1, end, vals + 3, 1)
          if ptr == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          # Resolve the MD order
          cdef int m, d
@@ -654,16 +648,14 @@ cdef class TimestampLoader(CLoader):
              else: # self._order == ORDER_PGMD
                  m = _month_abbr[seps[0][1 : seps[1] - seps[0]]]
                  d = int(seps[1][1 : seps[2] - seps[1]])
-        except (KeyError, ValueError):
-            s = data.decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp: {s!r}") from None
+        except (KeyError, ValueError) as ex:
+            raise _get_timestamp_load_error(self._pgconn, data, ex) from None
  
          try:
              return cdt.datetime_new(
                  vals[YE], m, d, vals[HO], vals[MI], vals[SE], us, None)
          except ValueError as ex:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamp {s!r}: {ex}") from None
+            raise _get_timestamp_load_error(self._pgconn, data, ex) from None
  
  
  @cython.final
@@ -696,13 +688,9 @@ cdef class TimestampBinaryLoader(CLoader):
  
          except OverflowError:
              if val <= 0:
-                raise e.DataError(
-                    "timestamp too small (before year 1)"
-                ) from None
+                raise e.DataError("timestamp too small (before year 1)") from None
              else:
-                raise e.DataError(
-                    "timestamp too large (after year 10K)"
-                ) from None
+                raise e.DataError("timestamp too large (after year 10K)") from None
  
  
  cdef class _BaseTimestamptzLoader(CLoader):
@@ -729,14 +717,13 @@ cdef class TimestamptzLoader(_BaseTimestamptzLoader):
              self._order = ORDER_DMY
  
      cdef object cload(self, const char *data, size_t length):
-        cdef const char *end = data + length
-        if end[-1] == b'C':  # ends with BC
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"BC timestamptz not supported, got {s!r}")
-
          if self._order != ORDER_YMD:
              return self._cload_notimpl(data, length)
  
+        cdef const char *end = data + length
+        if end[-1] == b'C':  # ends with BC
+            raise _get_timestamp_load_error(self._pgconn, data) from None
+
          DEF D1 = 0
          DEF D2 = 1
          DEF D3 = 2
@@ -750,8 +737,7 @@ cdef class TimestamptzLoader(_BaseTimestamptzLoader):
          cdef const char *ptr
          ptr = _parse_date_values(data, end, vals, 6)
          if ptr == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamptz {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          # Parse the microseconds
          cdef int us = 0
@@ -770,8 +756,7 @@ cdef class TimestamptzLoader(_BaseTimestamptzLoader):
          # Parse the timezone
          cdef int offsecs = _parse_timezone_to_seconds(&ptr, end)
          if ptr == NULL:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamptz {s!r}")
+            raise _get_timestamp_load_error(self._pgconn, data) from None
  
          tzoff = cdt.timedelta_new(0, offsecs, 0)
  
@@ -798,8 +783,7 @@ cdef class TimestamptzLoader(_BaseTimestamptzLoader):
          except ValueError as ex:
              ex1 = ex
  
-        s = bytes(data).decode("utf8", "replace")
-        raise e.DataError(f"can't parse timestamptz {s!r}: {ex1}") from None
+        raise _get_timestamp_load_error(self._pgconn, data, ex1) from None
  
      cdef object _cload_notimpl(self, const char *data, size_t length):
          s = bytes(data)[:length].decode("utf8", "replace")
@@ -1097,10 +1081,34 @@ cdef object _timezone_from_seconds(int sec, __cache={}):
      return tz
  
  
+cdef object _get_timestamp_load_error(
+    pq.PGconn pgconn, const char *data, ex: Optional[Exception] = None
+):
+    s = bytes(data).decode("utf8", "replace")
+
+    def is_overflow(s):
+        if not s:
+            return False
+
+        ds = _get_datestyle(pgconn)
+        if not ds.startswith(b"P"):  # Postgres
+            return len(s.split()[0]) > 10  # date is first token
+        else:
+            return len(s.split()[-1]) > 4  # year is last token
+
+    if s == "-infinity" or s.endswith("BC"):
+        return e.DataError("timestamp too small (before year 1): {s!r}")
+    elif s == "infinity" or is_overflow(s):
+        return e.DataError(f"timestamp too large (after year 10K): {s!r}")
+    else:
+        return e.DataError(f"can't parse timestamp {s!r}: {ex or '(unknown)'}")
+
+
  cdef _timezones = {}
  _timezones[None] = timezone_utc
  _timezones[b"UTC"] = timezone_utc
  
+
  cdef object _timezone_from_connection(pq.PGconn pgconn):
      """Return the Python timezone info of the connection's timezone."""
      if pgconn is None:
diff --git a/tests/types/test_datetime.py b/tests/types/test_datetime.py

index 977ad67b6fac94d154b47b4bd68f4afc775099f3..4a8c56f2839d327532a8488e458a62814155bb99 100644 (file)
--- a/tests/types/test_datetime.py
+++ b/tests/types/test_datetime.py
@@ -233,6 +233,31 @@ class TestDatetime:
          with pytest.raises(DataError):
              cur.fetchone()[0]
  
+    overflow_samples = [
+        ("-infinity", "timestamp too small"),
+        ("1000-01-01 12:00 BC", "timestamp too small"),
+        ("10000-01-01 12:00", "timestamp too large"),
+        ("infinity", "timestamp too large"),
+    ]
+
+    @pytest.mark.parametrize("datestyle_out", ["ISO", "Postgres", "SQL", "German"])
+    @pytest.mark.parametrize("val, msg", overflow_samples)
+    def test_overflow_message(self, conn, datestyle_out, val, msg):
+        cur = conn.cursor()
+        cur.execute(f"set datestyle = {datestyle_out}, YMD")
+        cur.execute("select %s::timestamp", (val,))
+        with pytest.raises(DataError) as excinfo:
+            cur.fetchone()[0]
+        assert msg in str(excinfo.value)
+
+    @pytest.mark.parametrize("val, msg", overflow_samples)
+    def test_overflow_message_binary(self, conn, val, msg):
+        cur = conn.cursor(binary=True)
+        cur.execute("select %s::timestamp", (val,))
+        with pytest.raises(DataError) as excinfo:
+            cur.fetchone()[0]
+        assert msg in str(excinfo.value)
+
      def test_load_all_month_names(self, conn):
          cur = conn.cursor(binary=False)
          cur.execute("set datestyle = 'Postgres'")
@@ -390,6 +415,35 @@ class TestDateTimeTz:
          assert rec[0] == want
          assert rec[1] == 11111111
  
+    overflow_samples = [
+        ("-infinity", "timestamp too small"),
+        ("1000-01-01 12:00+00 BC", "timestamp too small"),
+        ("10000-01-01 12:00+00", "timestamp too large"),
+        ("infinity", "timestamp too large"),
+    ]
+
+    @pytest.mark.parametrize("datestyle_out", ["ISO", "Postgres", "SQL", "German"])
+    @pytest.mark.parametrize("val, msg", overflow_samples)
+    def test_overflow_message(self, conn, datestyle_out, val, msg):
+        cur = conn.cursor()
+        cur.execute(f"set datestyle = {datestyle_out}, YMD")
+        cur.execute("select %s::timestamptz", (val,))
+        if datestyle_out == "ISO":
+            with pytest.raises(DataError) as excinfo:
+                cur.fetchone()[0]
+            assert msg in str(excinfo.value)
+        else:
+            with pytest.raises(NotImplementedError):
+                cur.fetchone()[0]
+
+    @pytest.mark.parametrize("val, msg", overflow_samples)
+    def test_overflow_message_binary(self, conn, val, msg):
+        cur = conn.cursor(binary=True)
+        cur.execute("select %s::timestamptz", (val,))
+        with pytest.raises(DataError) as excinfo:
+            cur.fetchone()[0]
+        assert msg in str(excinfo.value)
+
      @pytest.mark.parametrize(
          "valname, tzval, tzname",
          [
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Wed, 8 Jun 2022 21:26:41 +0000 (23:26 +0200)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Wed, 8 Jun 2022 23:03:01 +0000 (01:03 +0200)
psycopg/psycopg/types/datetime.py		patch \| blob \| blame \| history
psycopg_c/psycopg_c/types/datetime.pyx		patch \| blob \| blame \| history
tests/types/test_datetime.py		patch \| blob \| blame \| history