From: Daniele Varrazzo Date: Wed, 7 May 2025 15:33:10 +0000 (+0200) Subject: fix: make sure that intervals with days and monts have the same PostgreSQL epoch X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Ffix-interval-epoch;p=thirdparty%2Fpsycopg.git fix: make sure that intervals with days and monts have the same PostgreSQL epoch While fixing #1071 I noticed that Postgres returns an approximation of leap years in intervals with specified months and years. More precisely: - Postgres internal representation keeps separate (micros, days, months) (as can be seen in the binary adapter). - Years are converted to months. When pressed for an equivalence with days, this shows with: =# select '1 year'::interval = '360 days'::interval; ?column? ---------- t - When converting the interval to seconds, Postgres adds 1/4 of a day every 12 months (rounding to an integer number of years towards 0). =# select extract('epoch' from '23 months'::interval) / 60. / 60 / 24 - (365 + 11 * 30); ?column? -------------------- 0.2500000000000000 (1 row) =# select extract('epoch' from '24 months'::interval) / 60. / 60 / 24 - (2 * 365); ?column? -------------------- 0.5000000000000000 (1 row) This MR implements a conversion from Postgres interval to Python following the same rule. As a consequence, the `extract('epoch' from interval)` function now returns the same number of seconds returned by the `datetime.timedelta.total_seconds()` of the value returned. The difference though is that the hours shows in the seconds: >>> conn.execute("select '1 year'::interval").fetchone()[0] datetime.timedelta(days=365, seconds=21600) >>> conn.execute("select '4 year'::interval").fetchone()[0].days, 365 * 4 (1461, 1460) This changeset only changes the Python implementation, not the C one. --- diff --git a/docs/news.rst b/docs/news.rst index 1238607fa..e6231cfda 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -21,6 +21,8 @@ Psycopg 3.2.8 (unreleased) `AsyncServerCursor` (:ticket:`#1066`). - Fix interval parsing with days or other parts and negative time in C module (:ticket:`#1071`). +- Make sure that intervals with days and months have the same epoch as + PostgreSQL (:ticket:`#1073`). Current release diff --git a/psycopg/psycopg/types/datetime.py b/psycopg/psycopg/types/datetime.py index 1fcab4b97..725d23efb 100644 --- a/psycopg/psycopg/types/datetime.py +++ b/psycopg/psycopg/types/datetime.py @@ -618,14 +618,21 @@ class IntervalLoader(Loader): s = bytes(data).decode("utf8", "replace") raise DataError(f"can't parse interval {s!r}") + ye: int | bytes + mo: int | bytes ye, mo, da, sgn, ho, mi, se = m.groups() days = 0 seconds = 0.0 + all_months = 0 if ye: - days += 365 * int(ye) + ye = int(ye) + days += 365 * ye + all_months += 12 * ye if mo: - days += 30 * int(mo) + mo = int(mo) + days += 30 * mo + all_months += mo if da: days += int(da) @@ -634,6 +641,12 @@ class IntervalLoader(Loader): if sgn == b"-": seconds = -seconds + # Postgres adds 0.25 days every 12 months to approximate leap years + if all_months >= 12: + seconds += (6 * 60 * 60) * (all_months // 12) + elif all_months <= -12: + seconds -= (6 * 60 * 60) * (all_months // -12) + try: return timedelta(days=days, seconds=seconds) except OverflowError as e: @@ -654,15 +667,19 @@ class IntervalBinaryLoader(Loader): def load(self, data: Buffer) -> timedelta: micros, days, months = _unpack_interval(data) + hours = 0 if months > 0: years, months = divmod(months, 12) days = days + 30 * months + 365 * years + # Postgres adds 0.25 days every 12 months to approximate leap years + hours = 6 * years elif months < 0: years, months = divmod(-months, 12) days = days - 30 * months - 365 * years + hours = -6 * years try: - return timedelta(days=days, microseconds=micros) + return timedelta(days=days, hours=hours, microseconds=micros) except OverflowError as e: raise DataError(f"can't parse interval: {e}") from None diff --git a/tests/types/test_datetime.py b/tests/types/test_datetime.py index 424543741..4e2372d32 100644 --- a/tests/types/test_datetime.py +++ b/tests/types/test_datetime.py @@ -704,18 +704,18 @@ class TestInterval: ("3723s,400000m", "1:2:3.4"), ("86399s,999999m", "23:59:59.999999"), ("30d", "30 day"), - ("365d", "1 year"), - ("-365d", "-1 year"), - ("-730d", "-2 years"), - ("1460d", "4 year"), + ("365d,6h", "1 year"), + ("-365d,-6h", "-1 year"), + ("-730d,-12h", "-2 years"), + ("1461d", "4 year"), ("30d", "1 month"), ("-30d", "-1 month"), ("60d", "2 month"), ("-90d", "-3 month"), ("186d", "6 mons 6 days"), ("174d", "6 mons -6 days"), - ("736d", "2 years 6 days"), - ("724d", "2 years -6 days"), + ("736d,12h", "2 years 6 days"), + ("724d,12h", "2 years -6 days"), ("330d", "1 years -1 month"), ("83063d,81640s,447000m", "1993534:40:40.447"), ("-1d,64800s", "41 days -990:00:00"), @@ -728,8 +728,38 @@ class TestInterval: @pytest.mark.parametrize("fmt_out", pq.Format) def test_load_interval(self, conn, val, expr, fmt_out): cur = conn.cursor(binary=fmt_out) - cur.execute(f"select '{expr}'::interval") - assert cur.fetchone()[0] == as_td(val) + cur.execute( + "select %(i)s::interval, extract('epoch' from %(i)s::interval)::float8", + {"i": expr}, + ) + got, nsecs = cur.fetchone() + assert got == as_td(val) + assert nsecs == as_td(val).total_seconds() + + @pytest.mark.parametrize("fmt_out", pq.Format) + def test_load_interval_leap_fraction(self, conn, fmt_out): + cur = conn.cursor(binary=fmt_out) + for y in (-5, -4, -3, 3, 4, 5): + for m in [-13, -12, -11, 11, 12, 13]: + cur.execute( + "select extract('epoch' from %s::interval)::float8", + [f"{y} year {m} month"], + ) + got = cur.fetchone()[0] + + m = y * 12 + m + if m >= 0: + y, m = divmod(m, 12) + d = 365 * y + 30 * m + h = 6 * y + want = dt.timedelta(days=d, hours=h) + else: + y, m = divmod(-m, 12) + d = 365 * y + 30 * m + h = 6 * y + want = -dt.timedelta(days=d, hours=h) + + assert got == want.total_seconds() @crdb_skip_datestyle @pytest.mark.xfail # weird interval outputs @@ -842,9 +872,12 @@ def as_td(s): if s in ("min", "max"): return getattr(dt.timedelta, s) - suffixes = {"d": "days", "s": "seconds", "m": "microseconds"} + suffixes = {"d": "days", "s": "seconds", "h": "hours", "m": "microseconds"} kwargs = {} for part in s.split(","): kwargs[suffixes[part[-1]]] = int(part[:-1]) + if "hours" in kwargs: + kwargs["seconds"] = kwargs.get("seconds", 0) + kwargs.pop("hours") * 60 * 60 + return dt.timedelta(**kwargs)