Handle timestamps which would overflow in UTC but not in the want timezone

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Thu, 22 Jul 2021 21:17:53 +0000 (23:17 +0200)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Fri, 23 Jul 2021 14:39:28 +0000 (16:39 +0200)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Thu, 22 Jul 2021 21:17:53 +0000 (23:17 +0200)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Fri, 23 Jul 2021 14:39:28 +0000 (16:39 +0200)
diff --git a/psycopg/psycopg/types/datetime.py b/psycopg/psycopg/types/datetime.py

index e73af2c4309d660f8c47425468d1015095737b7c..b938c2351e301d75d5c6ceb399708b8ddd7c5eb4 100644 (file)
--- a/psycopg/psycopg/types/datetime.py
+++ b/psycopg/psycopg/types/datetime.py
@@ -595,14 +595,26 @@ class TimestamptzLoader(Loader):
          # thing it can return). So create a temporary datetime object, in utc,
          # shift it by the offset parsed from the timestamp, and then move it to
          # the connection timezone.
+        dt = None
+        ex: Exception
          try:
              dt = datetime(
                  int(ye), int(mo), int(da), int(ho), int(mi), int(se), us, utc
              )
              return (dt - tzoff).astimezone(self._timezone)
+        except OverflowError as e:
+            # If we have created the temporary 'dt' it means that we have a
+            # datetime close to max, the shift pushed it past max, overflowing.
+            # In this case return the datetime in a fixed offset timezone.
+            if dt is not None:
+                return dt.replace(tzinfo=timezone(tzoff))
+            else:
+                ex = e
          except ValueError as e:
-            s = bytes(data).decode("utf8", "replace")
-            raise DataError(f"can't parse timestamptz {s!r}: {e}") from None
+            ex = e
+
+        s = bytes(data).decode("utf8", "replace")
+        raise DataError(f"can't parse timestamptz {s!r}: {ex}") from None
  
      def _load_notimpl(self, data: Buffer) -> datetime:
          s = bytes(data).decode("utf8", "replace")
@@ -628,6 +640,25 @@ class TimestamptzBinaryLoader(Loader):
              ts = _pg_datetimetz_epoch + timedelta(microseconds=micros)
              return ts.astimezone(self._timezone)
          except OverflowError:
+            # If we were asked about a timestamp which would overflow in UTC,
+            # but not in the desired timezone (e.g. datetime.max at Chicago
+            # timezone) we can still save the day by shifting the value by the
+            # timezone offset and then replacing the timezone.
+            if self._timezone:
+                utcoff = self._timezone.utcoffset(
+                    datetime.min if micros < 0 else datetime.max
+                )
+                if utcoff:
+                    usoff = 1_000_000 * int(utcoff.total_seconds())
+                    try:
+                        ts = _pg_datetime_epoch + timedelta(
+                            microseconds=micros + usoff
+                        )
+                    except OverflowError:
+                        pass  # will raise downstream
+                    else:
+                        return ts.replace(tzinfo=self._timezone)
+
              if micros <= 0:
                  raise DataError(
                      "timestamp too small (before year 1)"
diff --git a/psycopg_c/psycopg_c/types/datetime.pyx b/psycopg_c/psycopg_c/types/datetime.pyx

index a917e29ac84a77c6109a182a94ed146d4a26d2e7..01c73f70f2c6b3bac5c82f9bec71408e2ce39a48 100644 (file)
--- a/psycopg_c/psycopg_c/types/datetime.pyx
+++ b/psycopg_c/psycopg_c/types/datetime.pyx
@@ -805,15 +805,26 @@ cdef class TimestamptzLoader(_BaseTimestamptzLoader):
          # thing it can return). So create a temporary datetime object, in utc,
          # shift it by the offset parsed from the timestamp, and then move it to
          # the connection timezone.
+        dt = None
          try:
              dt = cdt.datetime_new(
                  y, m, d, vals[HO], vals[MI], vals[SE], us, timezone_utc)
              dt -= tzoff
              return PyObject_CallFunctionObjArgs(datetime_astimezone,
                  <PyObject *>dt, <PyObject *>self._time_zone, NULL)
+        except OverflowError as ex:
+            # If we have created the temporary 'dt' it means that we have a
+            # datetime close to max, the shift pushed it past max, overflowing.
+            # In this case return the datetime in a fixed offset timezone.
+            if dt is not None:
+                return dt.replace(tzinfo=timezone(tzoff))
+            else:
+                ex1 = ex
          except ValueError as ex:
-            s = bytes(data).decode("utf8", "replace")
-            raise e.DataError(f"can't parse timestamptz {s!r}: {ex}") from None
+            ex1 = ex
+
+        s = bytes(data).decode("utf8", "replace")
+        raise e.DataError(f"can't parse timestamptz {s!r}: {ex1}") from None
  
      cdef object _cload_notimpl(self, const char *data, size_t length):
          s = bytes(data)[:length].decode("utf8", "replace")
@@ -854,6 +865,25 @@ cdef class TimestamptzBinaryLoader(_BaseTimestamptzLoader):
                  <PyObject *>dt, <PyObject *>self._time_zone, NULL)
  
          except OverflowError:
+            # If we were asked about a timestamp which would overflow in UTC,
+            # but not in the desired timezone (e.g. datetime.max at Chicago
+            # timezone) we can still save the day by shifting the value by the
+            # timezone offset and then replacing the timezone.
+            if self._time_zone is not None:
+                utcoff = self._time_zone.utcoffset(
+                    datetime.min if val < 0 else datetime.max
+                )
+                if utcoff:
+                    usoff = 1_000_000 * int(utcoff.total_seconds())
+                    try:
+                        ts = pg_datetime_epoch + timedelta(
+                            microseconds=val + usoff
+                        )
+                    except OverflowError:
+                        pass  # will raise downstream
+                    else:
+                        return ts.replace(tzinfo=self._time_zone)
+
              if val <= 0:
                  raise e.DataError(
                      "timestamp too small (before year 1)"
diff --git a/tests/types/test_datetime.py b/tests/types/test_datetime.py

index 0e979094b8ef620ff1758772e6b4fe6e1704ef34..6a10bcc7e1e4f34257fbfcceb93f6980e4306c2a 100644 (file)
--- a/tests/types/test_datetime.py
+++ b/tests/types/test_datetime.py
@@ -359,6 +359,45 @@ class TestDateTimeTz:
          assert rec[0] == want
          assert rec[1] == 11111111
  
+    mark_tz_sec = (
+        pytest.mark.skipif(
+            sys.version_info < (3, 7), reason="no seconds in tz offset"
+        ),
+    )
+
+    @pytest.mark.xfail(
+        sys.platform == "win32", reason="TODO why? Missing tzdata?"
+    )
+    @pytest.mark.parametrize(
+        "valname, tzval, tzname",
+        [
+            ("max", "-06", "America/Chicago"),
+            pytest.param("min", "+09:18:59", "Asia/Tokyo", marks=mark_tz_sec),
+        ],
+    )
+    @pytest.mark.parametrize("fmt_out", [pq.Format.TEXT, pq.Format.BINARY])
+    def test_max_with_timezone(self, conn, fmt_out, valname, tzval, tzname):
+        # This happens e.g. in Django when it caches forever.
+        # e.g. see Django test cache.tests.DBCacheTests.test_forever_timeout
+        val = getattr(dt.datetime, valname).replace(microsecond=0)
+        tz = dt.timezone(as_tzoffset(tzval))
+        want = val.replace(tzinfo=tz)
+
+        conn.execute("set timezone to '%s'" % tzname)
+        cur = conn.cursor(binary=fmt_out)
+        cur.execute("select %s::timestamptz", [str(val) + tzval])
+        got = cur.fetchone()[0]
+
+        assert got == want
+
+        extra = "1 day" if valname == "max" else "-1 day"
+        with pytest.raises(DataError):
+            cur.execute(
+                "select %s::timestamptz + %s::interval",
+                [str(val) + tzval, extra],
+            )
+            got = cur.fetchone()[0]
+
  
  class TestTime:
      @pytest.mark.parametrize(
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Thu, 22 Jul 2021 21:17:53 +0000 (23:17 +0200)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Fri, 23 Jul 2021 14:39:28 +0000 (16:39 +0200)
psycopg/psycopg/types/datetime.py		patch \| blob \| blame \| history
psycopg_c/psycopg_c/types/datetime.pyx		patch \| blob \| blame \| history
tests/types/test_datetime.py		patch \| blob \| blame \| history