Improve overflow/underflow handling in regr_intercept().

author Dean Rasheed <dean.a.rasheed@gmail.com>

Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)

committer Dean Rasheed <dean.a.rasheed@gmail.com>

Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)
author Dean Rasheed <dean.a.rasheed@gmail.com>
Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)
committer Dean Rasheed <dean.a.rasheed@gmail.com>
Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c

index cc00c10c0d4bbff5331f3c60e441c8375dd7d0b4..262ea2b73ba8c05357791a9200a0882b3945a5b0 100644 (file)
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4010,7 +4010,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
                                 Sx,
                                 Sxx,
                                 Sy,
-                               Sxy;
+                               Sxy,
+                               dy;
  
         transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
         N = transvalues[0];
@@ -4029,7 +4030,41 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
         if (Sxx == 0)
                 PG_RETURN_NULL();
  
-       PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
+       /*
+        * The intercept is given by (Sy - dy) / N, where dy = Sx * Sxy / Sxx.
+        * However, when computing dy, the intermediate product Sx * Sxy might
+        * underflow or overflow.  If so, we can recover by decomposing Sx, Sxy,
+        * and Sxx into normalized mantissa and integer power-of-two components,
+        * computing the corresponding components of dy, and then recomposing dy.
+        * We avoid doing this if Sx, Sxy, or Sxx are infinite or NaN, since the
+        * exponent returned by frexp() is unspecified in those cases (and the
+        * final result would be the same in any case).
+        */
+       dy = Sx * Sxy / Sxx;
+       if ((dy == 0 || isinf(dy)) &&
+               !(isinf(Sx) || isinf(Sxy) || isinf(Sxx) ||
+                 isnan(Sx) || isnan(Sxy) || isnan(Sxx)))
+       {
+               float8          m_Sx,
+                                       m_Sxy,
+                                       m_Sxx,
+                                       m_dy;
+               int                     n_Sx,
+                                       n_Sxy,
+                                       n_Sxx,
+                                       n_dy;
+
+               m_Sx = frexp(Sx, &n_Sx);
+               m_Sxy = frexp(Sxy, &n_Sxy);
+               m_Sxx = frexp(Sxx, &n_Sxx);
+
+               m_dy = m_Sx * m_Sxy / m_Sxx;
+               n_dy = n_Sx + n_Sxy - n_Sxx;
+
+               dy = ldexp(m_dy, n_dy);
+       }
+
+       PG_RETURN_FLOAT8((Sy - dy) / N);
  }
  
  
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 1ccdf7dfdd7ce6d950ac4345bba5379e554051a5..89e051ee824023cf87b1817e90efa18a3fcfd68a 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -563,6 +563,18 @@ SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
      1 |       1
  (1 row)
  
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+ regr_intercept 
+----------------
+         1e+150
+(1 row)
+
+SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y);
+ regr_intercept 
+----------------
+         1e-131
+(1 row)
+
  -- these examples pose definitional questions for NaN inputs,
  -- which we resolve by saying that an all-NaN input column is not all equal
  SELECT corr(g, 'NaN'), regr_r2(g, 'NaN') FROM generate_series(1, 30) g;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index a310b39e7b822c65e61a61d9d2631f8e37cd3b03..916383db927350c1cd6cfda801d83abeba7fbb62 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -159,6 +159,8 @@ SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105),
  SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
         regr_r2(1e100 + g * 1e95, 1e100 + g * 1e95)
    FROM generate_series(1, 2) g;
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y);
  
  -- these examples pose definitional questions for NaN inputs,
  -- which we resolve by saying that an all-NaN input column is not all equal
author	Dean Rasheed <dean.a.rasheed@gmail.com>
	Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)
committer	Dean Rasheed <dean.a.rasheed@gmail.com>
	Wed, 3 Jun 2026 08:20:21 +0000 (09:20 +0100)
src/backend/utils/adt/float.c		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history