Modules/mathmodule.c

   1 /* Math module -- standard C math library functions, pi and e */
   2
   3 /* Here are some comments from Tim Peters, extracted from the
   4    discussion attached to http://bugs.python.org/issue1640.  They
   5    describe the general aims of the math module with respect to
   6    special values, IEEE-754 floating-point exceptions, and Python
   7    exceptions.
   8
   9 These are the "spirit of 754" rules:
  10
  11 1. If the mathematical result is a real number, but of magnitude too
  12 large to approximate by a machine float, overflow is signaled and the
  13 result is an infinity (with the appropriate sign).
  14
  15 2. If the mathematical result is a real number, but of magnitude too
  16 small to approximate by a machine float, underflow is signaled and the
  17 result is a zero (with the appropriate sign).
  18
  19 3. At a singularity (a value x such that the limit of f(y) as y
  20 approaches x exists and is an infinity), "divide by zero" is signaled
  21 and the result is an infinity (with the appropriate sign).  This is
  22 complicated a little by that the left-side and right-side limits may
  23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
  24 from the positive or negative directions.  In that specific case, the
  25 sign of the zero determines the result of 1/0.
  26
  27 4. At a point where a function has no defined result in the extended
  28 reals (i.e., the reals plus an infinity or two), invalid operation is
  29 signaled and a NaN is returned.
  30
  31 And these are what Python has historically /tried/ to do (but not
  32 always successfully, as platform libm behavior varies a lot):
  33
  34 For #1, raise OverflowError.
  35
  36 For #2, return a zero (with the appropriate sign if that happens by
  37 accident ;-)).
  38
  39 For #3 and #4, raise ValueError.  It may have made sense to raise
  40 Python's ZeroDivisionError in #3, but historically that's only been
  41 raised for division by zero and mod by zero.
  42
  43 */
  44
  45 /*
  46    In general, on an IEEE-754 platform the aim is to follow the C99
  47    standard, including Annex 'F', whenever possible.  Where the
  48    standard recommends raising the 'divide-by-zero' or 'invalid'
  49    floating-point exceptions, Python should raise a ValueError.  Where
  50    the standard recommends raising 'overflow', Python should raise an
  51    OverflowError.  In all other circumstances a value should be
  52    returned.
  53  */
  54
  55 #ifndef Py_BUILD_CORE_BUILTIN
  56 #  define Py_BUILD_CORE_MODULE 1
  57 #endif
  58
  59 #include "Python.h"
  60 #include "pycore_abstract.h"      // _PyNumber_Index()
  61 #include "pycore_bitutils.h"      // _Py_bit_length()
  62 #include "pycore_call.h"          // _PyObject_CallNoArgs()
  63 #include "pycore_long.h"          // _PyLong_GetZero()
  64 #include "pycore_moduleobject.h"  // _PyModule_GetState()
  65 #include "pycore_object.h"        // _PyObject_LookupSpecial()
  66 #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
  67 /* For DBL_EPSILON in _math.h */
  68 #include <float.h>
  69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
  70 #include "_math.h"
  71 #include <stdbool.h>
  72
  73 #include "clinic/mathmodule.c.h"
  74
  75 /*[clinic input]
  76 module math
  77 [clinic start generated code]*/
  78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
  79
  80
  81
  82 /*
  83 Double and triple length extended precision algorithms from:
  84
  85   Accurate Sum and Dot Product
  86   by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
  87   https://doi.org/10.1137/030601818
  88   https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
  89
  90 */
  91
  92 typedef struct{ double hi; double lo; } DoubleLength;
  93
  94 static DoubleLength
  95 dl_fast_sum(double a, double b)
  96 {
  97     /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
  98     assert(fabs(a) >= fabs(b));
  99     double x = a + b;
 100     double y = (a - x) + b;
 101     return (DoubleLength) {x, y};
 102 }
 103
 104 static DoubleLength
 105 dl_sum(double a, double b)
 106 {
 107     /* Algorithm 3.1 Error-free transformation of the sum */
 108     double x = a + b;
 109     double z = x - a;
 110     double y = (a - (x - z)) + (b - z);
 111     return (DoubleLength) {x, y};
 112 }
 113
 114 #ifndef UNRELIABLE_FMA
 115
 116 static DoubleLength
 117 dl_mul(double x, double y)
 118 {
 119     /* Algorithm 3.5. Error-free transformation of a product */
 120     double z = x * y;
 121     double zz = fma(x, y, -z);
 122     return (DoubleLength) {z, zz};
 123 }
 124
 125 #else
 126
 127 /*
 128    The default implementation of dl_mul() depends on the C math library
 129    having an accurate fma() function as required by § 7.12.13.1 of the
 130    C99 standard.
 131
 132    The UNRELIABLE_FMA option is provided as a slower but accurate
 133    alternative for builds where the fma() function is found wanting.
 134    The speed penalty may be modest (17% slower on an Apple M1 Max),
 135    so don't hesitate to enable this build option.
 136
 137    The algorithms are from the T. J. Dekker paper:
 138    A Floating-Point Technique for Extending the Available Precision
 139    https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
 140 */
 141
 142 static DoubleLength
 143 dl_split(double x) {
 144     // Dekker (5.5) and (5.6).
 145     double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
 146     double hi = t - (t - x);
 147     double lo = x - hi;
 148     return (DoubleLength) {hi, lo};
 149 }
 150
 151 static DoubleLength
 152 dl_mul(double x, double y)
 153 {
 154     // Dekker (5.12) and mul12()
 155     DoubleLength xx = dl_split(x);
 156     DoubleLength yy = dl_split(y);
 157     double p = xx.hi * yy.hi;
 158     double q = xx.hi * yy.lo + xx.lo * yy.hi;
 159     double z = p + q;
 160     double zz = p - z + q + xx.lo * yy.lo;
 161     return (DoubleLength) {z, zz};
 162 }
 163
 164 #endif
 165
 166 typedef struct { double hi; double lo; double tiny; } TripleLength;
 167
 168 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
 169
 170 static TripleLength
 171 tl_fma(double x, double y, TripleLength total)
 172 {
 173     /* Algorithm 5.10 with SumKVert for K=3 */
 174     DoubleLength pr = dl_mul(x, y);
 175     DoubleLength sm = dl_sum(total.hi, pr.hi);
 176     DoubleLength r1 = dl_sum(total.lo, pr.lo);
 177     DoubleLength r2 = dl_sum(r1.hi, sm.lo);
 178     return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
 179 }
 180
 181 static double
 182 tl_to_d(TripleLength total)
 183 {
 184     DoubleLength last = dl_sum(total.lo, total.hi);
 185     return total.tiny + last.lo + last.hi;
 186 }
 187
 188
 189 /*
 190    sin(pi*x), giving accurate results for all finite x (especially x
 191    integral or close to an integer).  This is here for use in the
 192    reflection formula for the gamma function.  It conforms to IEEE
 193    754-2008 for finite arguments, but not for infinities or nans.
 194 */
 195
 196 static const double pi = 3.141592653589793238462643383279502884197;
 197 static const double logpi = 1.144729885849400174143427351353058711647;
 198
 199 /* Version of PyFloat_AsDouble() with in-line fast paths
 200    for exact floats and integers.  Gives a substantial
 201    speed improvement for extracting float arguments.
 202 */
 203
 204 #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
 205     if (PyFloat_CheckExact(obj)) {                         \
 206         target_var = PyFloat_AS_DOUBLE(obj);               \
 207     }                                                      \
 208     else if (PyLong_CheckExact(obj)) {                     \
 209         target_var = PyLong_AsDouble(obj);                 \
 210         if (target_var == -1.0 && PyErr_Occurred()) {      \
 211             goto error_label;                              \
 212         }                                                  \
 213     }                                                      \
 214     else {                                                 \
 215         target_var = PyFloat_AsDouble(obj);                \
 216         if (target_var == -1.0 && PyErr_Occurred()) {      \
 217             goto error_label;                              \
 218         }                                                  \
 219     }
 220
 221 static double
 222 m_sinpi(double x)
 223 {
 224     double y, r;
 225     int n;
 226     /* this function should only ever be called for finite arguments */
 227     assert(isfinite(x));
 228     y = fmod(fabs(x), 2.0);
 229     n = (int)round(2.0*y);
 230     assert(0 <= n && n <= 4);
 231     switch (n) {
 232     case 0:
 233         r = sin(pi*y);
 234         break;
 235     case 1:
 236         r = cos(pi*(y-0.5));
 237         break;
 238     case 2:
 239         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
 240            -0.0 instead of 0.0 when y == 1.0. */
 241         r = sin(pi*(1.0-y));
 242         break;
 243     case 3:
 244         r = -cos(pi*(y-1.5));
 245         break;
 246     case 4:
 247         r = sin(pi*(y-2.0));
 248         break;
 249     default:
 250         Py_UNREACHABLE();
 251     }
 252     return copysign(1.0, x)*r;
 253 }
 254
 255 /* Implementation of the real gamma function.  Kept here to work around
 256    issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
 257    on various platforms (Windows, MacOS).  In extensive but non-exhaustive
 258    random tests, this function proved accurate to within <= 10 ulps across the
 259    entire float domain.  Note that accuracy may depend on the quality of the
 260    system math functions, the pow function in particular.  Special cases
 261    follow C99 annex F.  The parameters and method are tailored to platforms
 262    whose double format is the IEEE 754 binary64 format.
 263
 264    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
 265    and g=6.024680040776729583740234375; these parameters are amongst those
 266    used by the Boost library.  Following Boost (again), we re-express the
 267    Lanczos sum as a rational function, and compute it that way.  The
 268    coefficients below were computed independently using MPFR, and have been
 269    double-checked against the coefficients in the Boost source code.
 270
 271    For x < 0.0 we use the reflection formula.
 272
 273    There's one minor tweak that deserves explanation: Lanczos' formula for
 274    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
 275    values, x+g-0.5 can be represented exactly.  However, in cases where it
 276    can't be represented exactly the small error in x+g-0.5 can be magnified
 277    significantly by the pow and exp calls, especially for large x.  A cheap
 278    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
 279    involved in the computation of x+g-0.5 (that is, e = computed value of
 280    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
 281
 282    Correction factor
 283    -----------------
 284    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
 285    double, and e is tiny.  Then:
 286
 287      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
 288      = pow(y, x-0.5)/exp(y) * C,
 289
 290    where the correction_factor C is given by
 291
 292      C = pow(1-e/y, x-0.5) * exp(e)
 293
 294    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
 295
 296      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
 297
 298    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
 299
 300      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
 301
 302    Note that for accuracy, when computing r*C it's better to do
 303
 304      r + e*g/y*r;
 305
 306    than
 307
 308      r * (1 + e*g/y);
 309
 310    since the addition in the latter throws away most of the bits of
 311    information in e*g/y.
 312 */
 313
 314 #define LANCZOS_N 13
 315 static const double lanczos_g = 6.024680040776729583740234375;
 316 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
 317 static const double lanczos_num_coeffs[LANCZOS_N] = {
 318     23531376880.410759688572007674451636754734846804940,
 319     42919803642.649098768957899047001988850926355848959,
 320     35711959237.355668049440185451547166705960488635843,
 321     17921034426.037209699919755754458931112671403265390,
 322     6039542586.3520280050642916443072979210699388420708,
 323     1439720407.3117216736632230727949123939715485786772,
 324     248874557.86205415651146038641322942321632125127801,
 325     31426415.585400194380614231628318205362874684987640,
 326     2876370.6289353724412254090516208496135991145378768,
 327     186056.26539522349504029498971604569928220784236328,
 328     8071.6720023658162106380029022722506138218516325024,
 329     210.82427775157934587250973392071336271166969580291,
 330     2.5066282746310002701649081771338373386264310793408
 331 };
 332
 333 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
 334 static const double lanczos_den_coeffs[LANCZOS_N] = {
 335     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
 336     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
 337
 338 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
 339 #define NGAMMA_INTEGRAL 23
 340 static const double gamma_integral[NGAMMA_INTEGRAL] = {
 341     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
 342     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
 343     1307674368000.0, 20922789888000.0, 355687428096000.0,
 344     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
 345     51090942171709440000.0, 1124000727777607680000.0,
 346 };
 347
 348 /* Lanczos' sum L_g(x), for positive x */
 349
 350 static double
 351 lanczos_sum(double x)
 352 {
 353     double num = 0.0, den = 0.0;
 354     int i;
 355     assert(x > 0.0);
 356     /* evaluate the rational function lanczos_sum(x).  For large
 357        x, the obvious algorithm risks overflow, so we instead
 358        rescale the denominator and numerator of the rational
 359        function by x**(1-LANCZOS_N) and treat this as a
 360        rational function in 1/x.  This also reduces the error for
 361        larger x values.  The choice of cutoff point (5.0 below) is
 362        somewhat arbitrary; in tests, smaller cutoff values than
 363        this resulted in lower accuracy. */
 364     if (x < 5.0) {
 365         for (i = LANCZOS_N; --i >= 0; ) {
 366             num = num * x + lanczos_num_coeffs[i];
 367             den = den * x + lanczos_den_coeffs[i];
 368         }
 369     }
 370     else {
 371         for (i = 0; i < LANCZOS_N; i++) {
 372             num = num / x + lanczos_num_coeffs[i];
 373             den = den / x + lanczos_den_coeffs[i];
 374         }
 375     }
 376     return num/den;
 377 }
 378
 379
 380 static double
 381 m_tgamma(double x)
 382 {
 383     double absx, r, y, z, sqrtpow;
 384
 385     /* special cases */
 386     if (!isfinite(x)) {
 387         if (isnan(x) || x > 0.0)
 388             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
 389         else {
 390             errno = EDOM;
 391             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
 392         }
 393     }
 394     if (x == 0.0) {
 395         errno = EDOM;
 396         /* tgamma(+-0.0) = +-inf, divide-by-zero */
 397         return copysign(Py_INFINITY, x);
 398     }
 399
 400     /* integer arguments */
 401     if (x == floor(x)) {
 402         if (x < 0.0) {
 403             errno = EDOM;  /* tgamma(n) = nan, invalid for */
 404             return Py_NAN; /* negative integers n */
 405         }
 406         if (x <= NGAMMA_INTEGRAL)
 407             return gamma_integral[(int)x - 1];
 408     }
 409     absx = fabs(x);
 410
 411     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
 412     if (absx < 1e-20) {
 413         r = 1.0/x;
 414         if (isinf(r))
 415             errno = ERANGE;
 416         return r;
 417     }
 418
 419     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
 420        x > 200, and underflows to +-0.0 for x < -200, not a negative
 421        integer. */
 422     if (absx > 200.0) {
 423         if (x < 0.0) {
 424             return 0.0/m_sinpi(x);
 425         }
 426         else {
 427             errno = ERANGE;
 428             return Py_INFINITY;
 429         }
 430     }
 431
 432     y = absx + lanczos_g_minus_half;
 433     /* compute error in sum */
 434     if (absx > lanczos_g_minus_half) {
 435         /* note: the correction can be foiled by an optimizing
 436            compiler that (incorrectly) thinks that an expression like
 437            a + b - a - b can be optimized to 0.0.  This shouldn't
 438            happen in a standards-conforming compiler. */
 439         double q = y - absx;
 440         z = q - lanczos_g_minus_half;
 441     }
 442     else {
 443         double q = y - lanczos_g_minus_half;
 444         z = q - absx;
 445     }
 446     z = z * lanczos_g / y;
 447     if (x < 0.0) {
 448         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
 449         r -= z * r;
 450         if (absx < 140.0) {
 451             r /= pow(y, absx - 0.5);
 452         }
 453         else {
 454             sqrtpow = pow(y, absx / 2.0 - 0.25);
 455             r /= sqrtpow;
 456             r /= sqrtpow;
 457         }
 458     }
 459     else {
 460         r = lanczos_sum(absx) / exp(y);
 461         r += z * r;
 462         if (absx < 140.0) {
 463             r *= pow(y, absx - 0.5);
 464         }
 465         else {
 466             sqrtpow = pow(y, absx / 2.0 - 0.25);
 467             r *= sqrtpow;
 468             r *= sqrtpow;
 469         }
 470     }
 471     if (isinf(r))
 472         errno = ERANGE;
 473     return r;
 474 }
 475
 476 /*
 477    lgamma:  natural log of the absolute value of the Gamma function.
 478    For large arguments, Lanczos' formula works extremely well here.
 479 */
 480
 481 static double
 482 m_lgamma(double x)
 483 {
 484     double r;
 485     double absx;
 486
 487     /* special cases */
 488     if (!isfinite(x)) {
 489         if (isnan(x))
 490             return x;  /* lgamma(nan) = nan */
 491         else
 492             return Py_INFINITY; /* lgamma(+-inf) = +inf */
 493     }
 494
 495     /* integer arguments */
 496     if (x == floor(x) && x <= 2.0) {
 497         if (x <= 0.0) {
 498             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
 499             return Py_INFINITY; /* integers n <= 0 */
 500         }
 501         else {
 502             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
 503         }
 504     }
 505
 506     absx = fabs(x);
 507     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
 508     if (absx < 1e-20)
 509         return -log(absx);
 510
 511     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
 512        having a second set of numerator coefficients for lanczos_sum that
 513        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
 514        subtraction below; it's probably not worth it. */
 515     r = log(lanczos_sum(absx)) - lanczos_g;
 516     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
 517     if (x < 0.0)
 518         /* Use reflection formula to get value for negative x. */
 519         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
 520     if (isinf(r))
 521         errno = ERANGE;
 522     return r;
 523 }
 524
 525 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
 526    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
 527    binary floating-point format, the result is always exact. */
 528
 529 static double
 530 m_remainder(double x, double y)
 531 {
 532     /* Deal with most common case first. */
 533     if (isfinite(x) && isfinite(y)) {
 534         double absx, absy, c, m, r;
 535
 536         if (y == 0.0) {
 537             return Py_NAN;
 538         }
 539
 540         absx = fabs(x);
 541         absy = fabs(y);
 542         m = fmod(absx, absy);
 543
 544         /*
 545            Warning: some subtlety here. What we *want* to know at this point is
 546            whether the remainder m is less than, equal to, or greater than half
 547            of absy. However, we can't do that comparison directly because we
 548            can't be sure that 0.5*absy is representable (the multiplication
 549            might incur precision loss due to underflow). So instead we compare
 550            m with the complement c = absy - m: m < 0.5*absy if and only if m <
 551            c, and so on. The catch is that absy - m might also not be
 552            representable, but it turns out that it doesn't matter:
 553
 554            - if m > 0.5*absy then absy - m is exactly representable, by
 555              Sterbenz's lemma, so m > c
 556            - if m == 0.5*absy then again absy - m is exactly representable
 557              and m == c
 558            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
 559              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
 560              c, or (ii) absy is tiny, either subnormal or in the lowest normal
 561              binade. Then absy - m is exactly representable and again m < c.
 562         */
 563
 564         c = absy - m;
 565         if (m < c) {
 566             r = m;
 567         }
 568         else if (m > c) {
 569             r = -c;
 570         }
 571         else {
 572             /*
 573                Here absx is exactly halfway between two multiples of absy,
 574                and we need to choose the even multiple. x now has the form
 575
 576                    absx = n * absy + m
 577
 578                for some integer n (recalling that m = 0.5*absy at this point).
 579                If n is even we want to return m; if n is odd, we need to
 580                return -m.
 581
 582                So
 583
 584                    0.5 * (absx - m) = (n/2) * absy
 585
 586                and now reducing modulo absy gives us:
 587
 588                                                   | m, if n is odd
 589                    fmod(0.5 * (absx - m), absy) = |
 590                                                   | 0, if n is even
 591
 592                Now m - 2.0 * fmod(...) gives the desired result: m
 593                if n is even, -m if m is odd.
 594
 595                Note that all steps in fmod(0.5 * (absx - m), absy)
 596                will be computed exactly, with no rounding error
 597                introduced.
 598             */
 599             assert(m == c);
 600             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
 601         }
 602         return copysign(1.0, x) * r;
 603     }
 604
 605     /* Special values. */
 606     if (isnan(x)) {
 607         return x;
 608     }
 609     if (isnan(y)) {
 610         return y;
 611     }
 612     if (isinf(x)) {
 613         return Py_NAN;
 614     }
 615     assert(isinf(y));
 616     return x;
 617 }
 618
 619
 620 /*
 621     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
 622     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
 623     special values directly, passing positive non-special values through to
 624     the system log/log10.
 625  */
 626
 627 static double
 628 m_log(double x)
 629 {
 630     if (isfinite(x)) {
 631         if (x > 0.0)
 632             return log(x);
 633         errno = EDOM;
 634         if (x == 0.0)
 635             return -Py_INFINITY; /* log(0) = -inf */
 636         else
 637             return Py_NAN; /* log(-ve) = nan */
 638     }
 639     else if (isnan(x))
 640         return x; /* log(nan) = nan */
 641     else if (x > 0.0)
 642         return x; /* log(inf) = inf */
 643     else {
 644         errno = EDOM;
 645         return Py_NAN; /* log(-inf) = nan */
 646     }
 647 }
 648
 649 /*
 650    log2: log to base 2.
 651
 652    Uses an algorithm that should:
 653
 654      (a) produce exact results for powers of 2, and
 655      (b) give a monotonic log2 (for positive finite floats),
 656          assuming that the system log is monotonic.
 657 */
 658
 659 static double
 660 m_log2(double x)
 661 {
 662     if (!isfinite(x)) {
 663         if (isnan(x))
 664             return x; /* log2(nan) = nan */
 665         else if (x > 0.0)
 666             return x; /* log2(+inf) = +inf */
 667         else {
 668             errno = EDOM;
 669             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
 670         }
 671     }
 672
 673     if (x > 0.0) {
 674         return log2(x);
 675     }
 676     else if (x == 0.0) {
 677         errno = EDOM;
 678         return -Py_INFINITY; /* log2(0) = -inf, divide-by-zero */
 679     }
 680     else {
 681         errno = EDOM;
 682         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
 683     }
 684 }
 685
 686 static double
 687 m_log10(double x)
 688 {
 689     if (isfinite(x)) {
 690         if (x > 0.0)
 691             return log10(x);
 692         errno = EDOM;
 693         if (x == 0.0)
 694             return -Py_INFINITY; /* log10(0) = -inf */
 695         else
 696             return Py_NAN; /* log10(-ve) = nan */
 697     }
 698     else if (isnan(x))
 699         return x; /* log10(nan) = nan */
 700     else if (x > 0.0)
 701         return x; /* log10(inf) = inf */
 702     else {
 703         errno = EDOM;
 704         return Py_NAN; /* log10(-inf) = nan */
 705     }
 706 }
 707
 708
 709 /*[clinic input]
 710 math.gcd
 711
 712     *integers as args: array
 713
 714 Greatest Common Divisor.
 715 [clinic start generated code]*/
 716
 717 static PyObject *
 718 math_gcd_impl(PyObject *module, PyObject * const *args,
 719               Py_ssize_t args_length)
 720 /*[clinic end generated code: output=a26c95907374ffb4 input=ded7f0ea3850c05c]*/
 721 {
 722     // Fast-path for the common case: gcd(int, int)
 723     if (args_length == 2 && PyLong_CheckExact(args[0]) && PyLong_CheckExact(args[1]))
 724     {
 725         return _PyLong_GCD(args[0], args[1]);
 726     }
 727
 728     if (args_length == 0) {
 729         return PyLong_FromLong(0);
 730     }
 731
 732     PyObject *res = PyNumber_Index(args[0]);
 733     if (res == NULL) {
 734         return NULL;
 735     }
 736     if (args_length == 1) {
 737         Py_SETREF(res, PyNumber_Absolute(res));
 738         return res;
 739     }
 740
 741     PyObject *one = _PyLong_GetOne();  // borrowed ref
 742     for (Py_ssize_t i = 1; i < args_length; i++) {
 743         PyObject *x = _PyNumber_Index(args[i]);
 744         if (x == NULL) {
 745             Py_DECREF(res);
 746             return NULL;
 747         }
 748         if (res == one) {
 749             /* Fast path: just check arguments.
 750                It is okay to use identity comparison here. */
 751             Py_DECREF(x);
 752             continue;
 753         }
 754         Py_SETREF(res, _PyLong_GCD(res, x));
 755         Py_DECREF(x);
 756         if (res == NULL) {
 757             return NULL;
 758         }
 759     }
 760     return res;
 761 }
 762
 763
 764 static PyObject *
 765 long_lcm(PyObject *a, PyObject *b)
 766 {
 767     PyObject *g, *m, *f, *ab;
 768
 769     if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
 770         return PyLong_FromLong(0);
 771     }
 772     g = _PyLong_GCD(a, b);
 773     if (g == NULL) {
 774         return NULL;
 775     }
 776     f = PyNumber_FloorDivide(a, g);
 777     Py_DECREF(g);
 778     if (f == NULL) {
 779         return NULL;
 780     }
 781     m = PyNumber_Multiply(f, b);
 782     Py_DECREF(f);
 783     if (m == NULL) {
 784         return NULL;
 785     }
 786     ab = PyNumber_Absolute(m);
 787     Py_DECREF(m);
 788     return ab;
 789 }
 790
 791
 792 /*[clinic input]
 793 math.lcm
 794
 795     *integers as args: array
 796
 797 Least Common Multiple.
 798 [clinic start generated code]*/
 799
 800 static PyObject *
 801 math_lcm_impl(PyObject *module, PyObject * const *args,
 802               Py_ssize_t args_length)
 803 /*[clinic end generated code: output=c8a59a5c2e55c816 input=3e4f4b7cdf948a98]*/
 804 {
 805     PyObject *res, *x;
 806     Py_ssize_t i;
 807
 808     if (args_length == 0) {
 809         return PyLong_FromLong(1);
 810     }
 811     res = PyNumber_Index(args[0]);
 812     if (res == NULL) {
 813         return NULL;
 814     }
 815     if (args_length == 1) {
 816         Py_SETREF(res, PyNumber_Absolute(res));
 817         return res;
 818     }
 819
 820     PyObject *zero = _PyLong_GetZero();  // borrowed ref
 821     for (i = 1; i < args_length; i++) {
 822         x = PyNumber_Index(args[i]);
 823         if (x == NULL) {
 824             Py_DECREF(res);
 825             return NULL;
 826         }
 827         if (res == zero) {
 828             /* Fast path: just check arguments.
 829                It is okay to use identity comparison here. */
 830             Py_DECREF(x);
 831             continue;
 832         }
 833         Py_SETREF(res, long_lcm(res, x));
 834         Py_DECREF(x);
 835         if (res == NULL) {
 836             return NULL;
 837         }
 838     }
 839     return res;
 840 }
 841
 842
 843 /* Call is_error when errno != 0, and where x is the result libm
 844  * returned.  is_error will usually set up an exception and return
 845  * true (1), but may return false (0) without setting up an exception.
 846  */
 847 static int
 848 is_error(double x, int raise_edom)
 849 {
 850     int result = 1;     /* presumption of guilt */
 851     assert(errno);      /* non-zero errno is a precondition for calling */
 852     if (errno == EDOM) {
 853         if (raise_edom) {
 854             PyErr_SetString(PyExc_ValueError, "math domain error");
 855         }
 856     }
 857
 858     else if (errno == ERANGE) {
 859         /* ANSI C generally requires libm functions to set ERANGE
 860          * on overflow, but also generally *allows* them to set
 861          * ERANGE on underflow too.  There's no consistency about
 862          * the latter across platforms.
 863          * Alas, C99 never requires that errno be set.
 864          * Here we suppress the underflow errors (libm functions
 865          * should return a zero on underflow, and +- HUGE_VAL on
 866          * overflow, so testing the result for zero suffices to
 867          * distinguish the cases).
 868          *
 869          * On some platforms (Ubuntu/ia64) it seems that errno can be
 870          * set to ERANGE for subnormal results that do *not* underflow
 871          * to zero.  So to be safe, we'll ignore ERANGE whenever the
 872          * function result is less than 1.5 in absolute value.
 873          *
 874          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
 875          * are correctly detected, since the function may underflow
 876          * toward -1.0 rather than 0.0.
 877          */
 878         if (fabs(x) < 1.5)
 879             result = 0;
 880         else
 881             PyErr_SetString(PyExc_OverflowError,
 882                             "math range error");
 883     }
 884     else
 885         /* Unexpected math error */
 886         PyErr_SetFromErrno(PyExc_ValueError);
 887     return result;
 888 }
 889
 890 /*
 891    math_1 is used to wrap a libm function f that takes a double
 892    argument and returns a double.
 893
 894    The error reporting follows these rules, which are designed to do
 895    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
 896    platforms.
 897
 898    - a NaN result from non-NaN inputs causes ValueError to be raised
 899    - an infinite result from finite inputs causes OverflowError to be
 900      raised if can_overflow is 1, or raises ValueError if can_overflow
 901      is 0.
 902    - if the result is finite and errno == EDOM then ValueError is
 903      raised
 904    - if the result is finite and nonzero and errno == ERANGE then
 905      OverflowError is raised
 906
 907    The last rule is used to catch overflow on platforms which follow
 908    C89 but for which HUGE_VAL is not an infinity.
 909
 910    For the majority of one-argument functions these rules are enough
 911    to ensure that Python's functions behave as specified in 'Annex F'
 912    of the C99 standard, with the 'invalid' and 'divide-by-zero'
 913    floating-point exceptions mapping to Python's ValueError and the
 914    'overflow' floating-point exception mapping to OverflowError.
 915    math_1 only works for functions that don't have singularities *and*
 916    the possibility of overflow; fortunately, that covers everything we
 917    care about right now.
 918 */
 919
 920 static PyObject *
 921 math_1(PyObject *arg, double (*func) (double), int can_overflow,
 922        const char *err_msg)
 923 {
 924     double x, r;
 925     x = PyFloat_AsDouble(arg);
 926     if (x == -1.0 && PyErr_Occurred())
 927         return NULL;
 928     errno = 0;
 929     r = (*func)(x);
 930     if (isnan(r) && !isnan(x))
 931         goto domain_err; /* domain error */
 932     if (isinf(r) && isfinite(x)) {
 933         if (can_overflow)
 934             PyErr_SetString(PyExc_OverflowError,
 935                             "math range error"); /* overflow */
 936         else
 937             goto domain_err; /* singularity */
 938         return NULL;
 939     }
 940     if (isfinite(r) && errno && is_error(r, 1))
 941         /* this branch unnecessary on most platforms */
 942         return NULL;
 943
 944     return PyFloat_FromDouble(r);
 945
 946 domain_err:
 947     if (err_msg) {
 948         char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
 949         if (buf) {
 950             PyErr_Format(PyExc_ValueError, err_msg, buf);
 951             PyMem_Free(buf);
 952         }
 953         }
 954         else {
 955                 PyErr_SetString(PyExc_ValueError, "math domain error");
 956         }
 957     return NULL;
 958 }
 959
 960 /* variant of math_1, to be used when the function being wrapped is known to
 961    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
 962    errno = ERANGE for overflow). */
 963
 964 static PyObject *
 965 math_1a(PyObject *arg, double (*func) (double), const char *err_msg)
 966 {
 967     double x, r;
 968     x = PyFloat_AsDouble(arg);
 969     if (x == -1.0 && PyErr_Occurred())
 970         return NULL;
 971     errno = 0;
 972     r = (*func)(x);
 973     if (errno && is_error(r, err_msg ? 0 : 1)) {
 974         if (err_msg && errno == EDOM) {
 975             assert(!PyErr_Occurred()); /* exception is not set by is_error() */
 976             char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
 977             if (buf) {
 978                 PyErr_Format(PyExc_ValueError, err_msg, buf);
 979                 PyMem_Free(buf);
 980             }
 981         }
 982         return NULL;
 983     }
 984     return PyFloat_FromDouble(r);
 985 }
 986
 987 /*
 988    math_2 is used to wrap a libm function f that takes two double
 989    arguments and returns a double.
 990
 991    The error reporting follows these rules, which are designed to do
 992    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
 993    platforms.
 994
 995    - a NaN result from non-NaN inputs causes ValueError to be raised
 996    - an infinite result from finite inputs causes OverflowError to be
 997      raised.
 998    - if the result is finite and errno == EDOM then ValueError is
 999      raised
1000    - if the result is finite and nonzero and errno == ERANGE then
1001      OverflowError is raised
1002
1003    The last rule is used to catch overflow on platforms which follow
1004    C89 but for which HUGE_VAL is not an infinity.
1005
1006    For most two-argument functions (copysign, fmod, hypot, atan2)
1007    these rules are enough to ensure that Python's functions behave as
1008    specified in 'Annex F' of the C99 standard, with the 'invalid' and
1009    'divide-by-zero' floating-point exceptions mapping to Python's
1010    ValueError and the 'overflow' floating-point exception mapping to
1011    OverflowError.
1012 */
1013
1014 static PyObject *
1015 math_2(PyObject *const *args, Py_ssize_t nargs,
1016        double (*func) (double, double), const char *funcname)
1017 {
1018     double x, y, r;
1019     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1020         return NULL;
1021     x = PyFloat_AsDouble(args[0]);
1022     if (x == -1.0 && PyErr_Occurred()) {
1023         return NULL;
1024     }
1025     y = PyFloat_AsDouble(args[1]);
1026     if (y == -1.0 && PyErr_Occurred()) {
1027         return NULL;
1028     }
1029     errno = 0;
1030     r = (*func)(x, y);
1031     if (isnan(r)) {
1032         if (!isnan(x) && !isnan(y))
1033             errno = EDOM;
1034         else
1035             errno = 0;
1036     }
1037     else if (isinf(r)) {
1038         if (isfinite(x) && isfinite(y))
1039             errno = ERANGE;
1040         else
1041             errno = 0;
1042     }
1043     if (errno && is_error(r, 1))
1044         return NULL;
1045     else
1046         return PyFloat_FromDouble(r);
1047 }
1048
1049 #define FUNC1(funcname, func, can_overflow, docstring)                  \
1050     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1051         return math_1(args, func, can_overflow, NULL);                  \
1052     }\
1053     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1054
1055 #define FUNC1D(funcname, func, can_overflow, docstring, err_msg)        \
1056     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1057         return math_1(args, func, can_overflow, err_msg);               \
1058     }\
1059     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1060
1061 #define FUNC1A(funcname, func, docstring)                               \
1062     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1063         return math_1a(args, func, NULL);                               \
1064     }\
1065     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1066
1067 #define FUNC1AD(funcname, func, docstring, err_msg)                     \
1068     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1069         return math_1a(args, func, err_msg);                            \
1070     }\
1071     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1072
1073 #define FUNC2(funcname, func, docstring) \
1074     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1075         return math_2(args, nargs, func, #funcname); \
1076     }\
1077     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1078
1079 FUNC1D(acos, acos, 0,
1080       "acos($module, x, /)\n--\n\n"
1081       "Return the arc cosine (measured in radians) of x.\n\n"
1082       "The result is between 0 and pi.",
1083       "expected a number in range from -1 up to 1, got %s")
1084 FUNC1D(acosh, acosh, 0,
1085       "acosh($module, x, /)\n--\n\n"
1086       "Return the inverse hyperbolic cosine of x.",
1087       "expected argument value not less than 1, got %s")
1088 FUNC1D(asin, asin, 0,
1089       "asin($module, x, /)\n--\n\n"
1090       "Return the arc sine (measured in radians) of x.\n\n"
1091       "The result is between -pi/2 and pi/2.",
1092       "expected a number in range from -1 up to 1, got %s")
1093 FUNC1(asinh, asinh, 0,
1094       "asinh($module, x, /)\n--\n\n"
1095       "Return the inverse hyperbolic sine of x.")
1096 FUNC1(atan, atan, 0,
1097       "atan($module, x, /)\n--\n\n"
1098       "Return the arc tangent (measured in radians) of x.\n\n"
1099       "The result is between -pi/2 and pi/2.")
1100 FUNC2(atan2, atan2,
1101       "atan2($module, y, x, /)\n--\n\n"
1102       "Return the arc tangent (measured in radians) of y/x.\n\n"
1103       "Unlike atan(y/x), the signs of both x and y are considered.")
1104 FUNC1D(atanh, atanh, 0,
1105       "atanh($module, x, /)\n--\n\n"
1106       "Return the inverse hyperbolic tangent of x.",
1107       "expected a number between -1 and 1, got %s")
1108 FUNC1(cbrt, cbrt, 0,
1109       "cbrt($module, x, /)\n--\n\n"
1110       "Return the cube root of x.")
1111
1112 /*[clinic input]
1113 math.ceil
1114
1115     x as number: object
1116     /
1117
1118 Return the ceiling of x as an Integral.
1119
1120 This is the smallest integer >= x.
1121 [clinic start generated code]*/
1122
1123 static PyObject *
1124 math_ceil(PyObject *module, PyObject *number)
1125 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1126 {
1127     double x;
1128
1129     if (PyFloat_CheckExact(number)) {
1130         x = PyFloat_AS_DOUBLE(number);
1131     }
1132     else {
1133         PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__ceil__));
1134         if (result != NULL) {
1135             return result;
1136         }
1137         else if (PyErr_Occurred()) {
1138             return NULL;
1139         }
1140         x = PyFloat_AsDouble(number);
1141         if (x == -1.0 && PyErr_Occurred()) {
1142             return NULL;
1143         }
1144     }
1145     return PyLong_FromDouble(ceil(x));
1146 }
1147
1148 FUNC2(copysign, copysign,
1149       "copysign($module, x, y, /)\n--\n\n"
1150        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1151       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1152       "returns -1.0.\n")
1153 FUNC1D(cos, cos, 0,
1154       "cos($module, x, /)\n--\n\n"
1155       "Return the cosine of x (measured in radians).",
1156       "expected a finite input, got %s")
1157 FUNC1(cosh, cosh, 1,
1158       "cosh($module, x, /)\n--\n\n"
1159       "Return the hyperbolic cosine of x.")
1160 FUNC1A(erf, erf,
1161        "erf($module, x, /)\n--\n\n"
1162        "Error function at x.")
1163 FUNC1A(erfc, erfc,
1164        "erfc($module, x, /)\n--\n\n"
1165        "Complementary error function at x.")
1166 FUNC1(exp, exp, 1,
1167       "exp($module, x, /)\n--\n\n"
1168       "Return e raised to the power of x.")
1169 FUNC1(exp2, exp2, 1,
1170       "exp2($module, x, /)\n--\n\n"
1171       "Return 2 raised to the power of x.")
1172 FUNC1(expm1, expm1, 1,
1173       "expm1($module, x, /)\n--\n\n"
1174       "Return exp(x)-1.\n\n"
1175       "This function avoids the loss of precision involved in the direct "
1176       "evaluation of exp(x)-1 for small x.")
1177 FUNC1(fabs, fabs, 0,
1178       "fabs($module, x, /)\n--\n\n"
1179       "Return the absolute value of the float x.")
1180
1181 /*[clinic input]
1182 math.floor
1183
1184     x as number: object
1185     /
1186
1187 Return the floor of x as an Integral.
1188
1189 This is the largest integer <= x.
1190 [clinic start generated code]*/
1191
1192 static PyObject *
1193 math_floor(PyObject *module, PyObject *number)
1194 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1195 {
1196     double x;
1197
1198     if (PyFloat_CheckExact(number)) {
1199         x = PyFloat_AS_DOUBLE(number);
1200     }
1201     else {
1202         PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__floor__));
1203         if (result != NULL) {
1204             return result;
1205         }
1206         else if (PyErr_Occurred()) {
1207             return NULL;
1208         }
1209         x = PyFloat_AsDouble(number);
1210         if (x == -1.0 && PyErr_Occurred()) {
1211             return NULL;
1212         }
1213     }
1214     return PyLong_FromDouble(floor(x));
1215 }
1216
1217 /*[clinic input]
1218 math.fmax -> double
1219
1220     x: double
1221     y: double
1222     /
1223
1224 Return the larger of two floating-point arguments.
1225 [clinic start generated code]*/
1226
1227 static double
1228 math_fmax_impl(PyObject *module, double x, double y)
1229 /*[clinic end generated code: output=00692358d312fee2 input=021596c027336ffe]*/
1230 {
1231     return fmax(x, y);
1232 }
1233
1234 /*[clinic input]
1235 math.fmin -> double
1236
1237     x: double
1238     y: double
1239     /
1240
1241 Return the smaller of two floating-point arguments.
1242 [clinic start generated code]*/
1243
1244 static double
1245 math_fmin_impl(PyObject *module, double x, double y)
1246 /*[clinic end generated code: output=3d5b7826bd292dd9 input=d12e64ccc33f878a]*/
1247 {
1248     return fmin(x, y);
1249 }
1250
1251 FUNC1AD(gamma, m_tgamma,
1252       "gamma($module, x, /)\n--\n\n"
1253       "Gamma function at x.",
1254       "expected a noninteger or positive integer, got %s")
1255 FUNC1AD(lgamma, m_lgamma,
1256       "lgamma($module, x, /)\n--\n\n"
1257       "Natural logarithm of absolute value of Gamma function at x.",
1258       "expected a noninteger or positive integer, got %s")
1259 FUNC1D(log1p, m_log1p, 0,
1260       "log1p($module, x, /)\n--\n\n"
1261       "Return the natural logarithm of 1+x (base e).\n\n"
1262       "The result is computed in a way which is accurate for x near zero.",
1263       "expected argument value > -1, got %s")
1264 FUNC2(remainder, m_remainder,
1265       "remainder($module, x, y, /)\n--\n\n"
1266       "Difference between x and the closest integer multiple of y.\n\n"
1267       "Return x - n*y where n*y is the closest integer multiple of y.\n"
1268       "In the case where x is exactly halfway between two multiples of\n"
1269       "y, the nearest even value of n is used. The result is always exact.")
1270
1271 /*[clinic input]
1272 math.signbit
1273
1274     x: double
1275     /
1276
1277 Return True if the sign of x is negative and False otherwise.
1278 [clinic start generated code]*/
1279
1280 static PyObject *
1281 math_signbit_impl(PyObject *module, double x)
1282 /*[clinic end generated code: output=20c5f20156a9b871 input=3d3493fbcb5bdb3e]*/
1283 {
1284     return PyBool_FromLong(signbit(x));
1285 }
1286
1287 FUNC1D(sin, sin, 0,
1288       "sin($module, x, /)\n--\n\n"
1289       "Return the sine of x (measured in radians).",
1290       "expected a finite input, got %s")
1291 FUNC1(sinh, sinh, 1,
1292       "sinh($module, x, /)\n--\n\n"
1293       "Return the hyperbolic sine of x.")
1294 FUNC1D(sqrt, sqrt, 0,
1295       "sqrt($module, x, /)\n--\n\n"
1296       "Return the square root of x.",
1297       "expected a nonnegative input, got %s")
1298 FUNC1D(tan, tan, 0,
1299       "tan($module, x, /)\n--\n\n"
1300       "Return the tangent of x (measured in radians).",
1301       "expected a finite input, got %s")
1302 FUNC1(tanh, tanh, 0,
1303       "tanh($module, x, /)\n--\n\n"
1304       "Return the hyperbolic tangent of x.")
1305
1306 /* Precision summation function as msum() by Raymond Hettinger in
1307    <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
1308    enhanced with the exact partials sum and roundoff from Mark
1309    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1310    See those links for more details, proofs and other references.
1311
1312    Note 1: IEEE 754 floating-point semantics with a rounding mode of
1313    roundTiesToEven are assumed.
1314
1315    Note 2: No provision is made for intermediate overflow handling;
1316    therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1317    fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1318    overflow of the first partial sum.
1319
1320    Note 3: The algorithm has two potential sources of fragility. First, C
1321    permits arithmetic operations on `double`s to be performed in an
1322    intermediate format whose range and precision may be greater than those of
1323    `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1324    example on machines using the now largely historical x87 FPUs. In this case,
1325    `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1326    `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1327    should be safe from this source of errors. Second, an aggressively
1328    optimizing compiler can re-associate operations so that (for example) the
1329    statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1330    re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1331    re-association would be in violation of the C standard, and should not occur
1332    except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1333    -fassociative-math). Such optimizations should be avoided for this module.
1334
1335    Note 4: The signature of math.fsum() differs from builtins.sum()
1336    because the start argument doesn't make sense in the context of
1337    accurate summation.  Since the partials table is collapsed before
1338    returning a result, sum(seq2, start=sum(seq1)) may not equal the
1339    accurate result returned by sum(itertools.chain(seq1, seq2)).
1340 */
1341
1342 #define NUM_PARTIALS  32  /* initial partials array size, on stack */
1343
1344 /* Extend the partials array p[] by doubling its size. */
1345 static int                          /* non-zero on error */
1346 _fsum_realloc(double **p_ptr, Py_ssize_t  n,
1347              double  *ps,    Py_ssize_t *m_ptr)
1348 {
1349     void *v = NULL;
1350     Py_ssize_t m = *m_ptr;
1351
1352     m += m;  /* double */
1353     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1354         double *p = *p_ptr;
1355         if (p == ps) {
1356             v = PyMem_Malloc(sizeof(double) * m);
1357             if (v != NULL)
1358                 memcpy(v, ps, sizeof(double) * n);
1359         }
1360         else
1361             v = PyMem_Realloc(p, sizeof(double) * m);
1362     }
1363     if (v == NULL) {        /* size overflow or no memory */
1364         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1365         return 1;
1366     }
1367     *p_ptr = (double*) v;
1368     *m_ptr = m;
1369     return 0;
1370 }
1371
1372 /* Full precision summation of a sequence of floats.
1373
1374    def msum(iterable):
1375        partials = []  # sorted, non-overlapping partial sums
1376        for x in iterable:
1377            i = 0
1378            for y in partials:
1379                if abs(x) < abs(y):
1380                    x, y = y, x
1381                hi = x + y
1382                lo = y - (hi - x)
1383                if lo:
1384                    partials[i] = lo
1385                    i += 1
1386                x = hi
1387            partials[i:] = [x]
1388        return sum_exact(partials)
1389
1390    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
1391    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
1392    partial so that the list of partial sums remains exact.
1393
1394    Sum_exact() adds the partial sums exactly and correctly rounds the final
1395    result (using the round-half-to-even rule).  The items in partials remain
1396    non-zero, non-special, non-overlapping and strictly increasing in
1397    magnitude, but possibly not all having the same sign.
1398
1399    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1400 */
1401
1402 /*[clinic input]
1403 math.fsum
1404
1405     seq: object
1406     /
1407
1408 Return an accurate floating-point sum of values in the iterable seq.
1409
1410 Assumes IEEE-754 floating-point arithmetic.
1411 [clinic start generated code]*/
1412
1413 static PyObject *
1414 math_fsum(PyObject *module, PyObject *seq)
1415 /*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
1416 {
1417     PyObject *item, *iter, *sum = NULL;
1418     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1419     double x, y, t, ps[NUM_PARTIALS], *p = ps;
1420     double xsave, special_sum = 0.0, inf_sum = 0.0;
1421     double hi, yr, lo = 0.0;
1422
1423     iter = PyObject_GetIter(seq);
1424     if (iter == NULL)
1425         return NULL;
1426
1427     for(;;) {           /* for x in iterable */
1428         assert(0 <= n && n <= m);
1429         assert((m == NUM_PARTIALS && p == ps) ||
1430                (m >  NUM_PARTIALS && p != NULL));
1431
1432         item = PyIter_Next(iter);
1433         if (item == NULL) {
1434             if (PyErr_Occurred())
1435                 goto _fsum_error;
1436             break;
1437         }
1438         ASSIGN_DOUBLE(x, item, error_with_item);
1439         Py_DECREF(item);
1440
1441         xsave = x;
1442         for (i = j = 0; j < n; j++) {       /* for y in partials */
1443             y = p[j];
1444             if (fabs(x) < fabs(y)) {
1445                 t = x; x = y; y = t;
1446             }
1447             hi = x + y;
1448             yr = hi - x;
1449             lo = y - yr;
1450             if (lo != 0.0)
1451                 p[i++] = lo;
1452             x = hi;
1453         }
1454
1455         n = i;                              /* ps[i:] = [x] */
1456         if (x != 0.0) {
1457             if (! isfinite(x)) {
1458                 /* a nonfinite x could arise either as
1459                    a result of intermediate overflow, or
1460                    as a result of a nan or inf in the
1461                    summands */
1462                 if (isfinite(xsave)) {
1463                     PyErr_SetString(PyExc_OverflowError,
1464                           "intermediate overflow in fsum");
1465                     goto _fsum_error;
1466                 }
1467                 if (isinf(xsave))
1468                     inf_sum += xsave;
1469                 special_sum += xsave;
1470                 /* reset partials */
1471                 n = 0;
1472             }
1473             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1474                 goto _fsum_error;
1475             else
1476                 p[n++] = x;
1477         }
1478     }
1479
1480     if (special_sum != 0.0) {
1481         if (isnan(inf_sum))
1482             PyErr_SetString(PyExc_ValueError,
1483                             "-inf + inf in fsum");
1484         else
1485             sum = PyFloat_FromDouble(special_sum);
1486         goto _fsum_error;
1487     }
1488
1489     hi = 0.0;
1490     if (n > 0) {
1491         hi = p[--n];
1492         /* sum_exact(ps, hi) from the top, stop when the sum becomes
1493            inexact. */
1494         while (n > 0) {
1495             x = hi;
1496             y = p[--n];
1497             assert(fabs(y) < fabs(x));
1498             hi = x + y;
1499             yr = hi - x;
1500             lo = y - yr;
1501             if (lo != 0.0)
1502                 break;
1503         }
1504         /* Make half-even rounding work across multiple partials.
1505            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1506            digit to two instead of down to zero (the 1e-16 makes the 1
1507            slightly closer to two).  With a potential 1 ULP rounding
1508            error fixed-up, math.fsum() can guarantee commutativity. */
1509         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1510                       (lo > 0.0 && p[n-1] > 0.0))) {
1511             y = lo * 2.0;
1512             x = hi + y;
1513             yr = x - hi;
1514             if (y == yr)
1515                 hi = x;
1516         }
1517     }
1518     sum = PyFloat_FromDouble(hi);
1519
1520   _fsum_error:
1521     Py_DECREF(iter);
1522     if (p != ps)
1523         PyMem_Free(p);
1524     return sum;
1525
1526   error_with_item:
1527     Py_DECREF(item);
1528     goto _fsum_error;
1529 }
1530
1531 #undef NUM_PARTIALS
1532
1533
1534 static unsigned long
1535 count_set_bits(unsigned long n)
1536 {
1537     unsigned long count = 0;
1538     while (n != 0) {
1539         ++count;
1540         n &= n - 1; /* clear least significant bit */
1541     }
1542     return count;
1543 }
1544
1545 /* Integer square root
1546
1547 Given a nonnegative integer `n`, we want to compute the largest integer
1548 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1549 square root of `n`.
1550
1551 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1552 a positive integer `n`, the algorithm produces at each iteration an integer
1553 approximation `a` to the square root of `n >> s` for some even integer `s`,
1554 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1555 zero and we have an approximation to the square root of `n` itself.
1556
1557 At every step, the approximation `a` is strictly within 1.0 of the true square
1558 root, so we have
1559
1560     (a - 1)**2 < (n >> s) < (a + 1)**2
1561
1562 After the final iteration, a check-and-correct step is needed to determine
1563 whether `a` or `a - 1` gives the desired integer square root of `n`.
1564
1565 The algorithm is remarkable in its simplicity. There's no need for a
1566 per-iteration check-and-correct step, and termination is straightforward: the
1567 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1568 for `n > 1`). The only tricky part of the correctness proof is in establishing
1569 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1570 iteration to the next. A sketch of the proof of this is given below.
1571
1572 In addition to the proof sketch, a formal, computer-verified proof
1573 of correctness (using Lean) of an equivalent recursive algorithm can be found
1574 here:
1575
1576     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1577
1578
1579 Here's Python code equivalent to the C implementation below:
1580
1581     def isqrt(n):
1582         """
1583         Return the integer part of the square root of the input.
1584         """
1585         n = operator.index(n)
1586
1587         if n < 0:
1588             raise ValueError("isqrt() argument must be nonnegative")
1589         if n == 0:
1590             return 0
1591
1592         c = (n.bit_length() - 1) // 2
1593         a = 1
1594         d = 0
1595         for s in reversed(range(c.bit_length())):
1596             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1597             e = d
1598             d = c >> s
1599             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1600
1601         return a - (a*a > n)
1602
1603
1604 Sketch of proof of correctness
1605 ------------------------------
1606
1607 The delicate part of the correctness proof is showing that the loop invariant
1608 is preserved from one iteration to the next. That is, just before the line
1609
1610     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1611
1612 is executed in the above code, we know that
1613
1614     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1615
1616 (since `e` is always the value of `d` from the previous iteration). We must
1617 prove that after that line is executed, we have
1618
1619     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1620
1621 To facilitate the proof, we make some changes of notation. Write `m` for
1622 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1623
1624     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1625
1626 or equivalently:
1627
1628     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
1629
1630 Then we can rewrite (1) as:
1631
1632     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1633
1634 and we must show that (b - 1)**2 < m < (b + 1)**2.
1635
1636 From this point on, we switch to mathematical notation, so `/` means exact
1637 division rather than integer division and `^` is used for exponentiation. We
1638 use the `√` symbol for the exact square root. In (3), we can remove the
1639 implicit floor operation to give:
1640
1641     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1642
1643 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1644
1645     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
1646
1647 Squaring and dividing through by `2^(d-e+1) a` gives
1648
1649     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1650
1651 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1652 right-hand side of (6) with `1`, and now replacing the central
1653 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1654
1655     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1656
1657 Or equivalently, from (2):
1658
1659     (7) -1 < b - √m < 1
1660
1661 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1662 to prove.
1663
1664 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1665 a` that was used to get line (7) above. From the definition of `c`, we have
1666 `4^c <= n`, which implies
1667
1668     (8)  4^d <= m
1669
1670 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1671 that `2d - 2e - 1 <= d` and hence that
1672
1673     (9)  4^(2d - 2e - 1) <= m
1674
1675 Dividing both sides by `4^(d - e)` gives
1676
1677     (10)  4^(d - e - 1) <= m / 4^(d - e)
1678
1679 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1680
1681     (11)  4^(d - e - 1) < (a + 1)^2
1682
1683 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1684 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1685 completes the proof sketch.
1686
1687 */
1688
1689 /*
1690     The _approximate_isqrt_tab table provides approximate square roots for
1691     16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1692
1693         a = _approximate_isqrt_tab[(n >> 8) - 64]
1694
1695     is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1696
1697     The table was computed in Python using the expression:
1698
1699         [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1700 */
1701
1702 static const uint8_t _approximate_isqrt_tab[192] = {
1703     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1704     140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1705     151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1706     160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1707     170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1708     179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1709     187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1710     195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1711     203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1712     210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1713     217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1714     224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1715     231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1716     238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1717     244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1718     250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1719 };
1720
1721 /* Approximate square root of a large 64-bit integer.
1722
1723    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1724    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1725
1726 static inline uint32_t
1727 _approximate_isqrt(uint64_t n)
1728 {
1729     uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1730     u = (u << 7) + (uint32_t)(n >> 41) / u;
1731     return (u << 15) + (uint32_t)((n >> 17) / u);
1732 }
1733
1734 /*[clinic input]
1735 math.isqrt
1736
1737     n: object
1738     /
1739
1740 Return the integer part of the square root of the input.
1741 [clinic start generated code]*/
1742
1743 static PyObject *
1744 math_isqrt(PyObject *module, PyObject *n)
1745 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1746 {
1747     int a_too_large, c_bit_length;
1748     int64_t c, d;
1749     uint64_t m;
1750     uint32_t u;
1751     PyObject *a = NULL, *b;
1752
1753     n = _PyNumber_Index(n);
1754     if (n == NULL) {
1755         return NULL;
1756     }
1757
1758     if (_PyLong_IsNegative((PyLongObject *)n)) {
1759         PyErr_SetString(
1760             PyExc_ValueError,
1761             "isqrt() argument must be nonnegative");
1762         goto error;
1763     }
1764     if (_PyLong_IsZero((PyLongObject *)n)) {
1765         Py_DECREF(n);
1766         return PyLong_FromLong(0);
1767     }
1768
1769     /* c = (n.bit_length() - 1) // 2 */
1770     c = _PyLong_NumBits(n);
1771     assert(c > 0);
1772     assert(!PyErr_Occurred());
1773     c = (c - 1) / 2;
1774
1775     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1776        fast, almost branch-free algorithm. */
1777     if (c <= 31) {
1778         int shift = 31 - (int)c;
1779         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1780         Py_DECREF(n);
1781         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1782             return NULL;
1783         }
1784         u = _approximate_isqrt(m << 2*shift) >> shift;
1785         u -= (uint64_t)u * u > m;
1786         return PyLong_FromUnsignedLong(u);
1787     }
1788
1789     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1790        arithmetic, then switch to using Python long integers. */
1791
1792     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1793     c_bit_length = 6;
1794     while ((c >> c_bit_length) > 0) {
1795         ++c_bit_length;
1796     }
1797
1798     /* Initialise d and a. */
1799     d = c >> (c_bit_length - 5);
1800     b = _PyLong_Rshift(n, 2*c - 62);
1801     if (b == NULL) {
1802         goto error;
1803     }
1804     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1805     Py_DECREF(b);
1806     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1807         goto error;
1808     }
1809     u = _approximate_isqrt(m) >> (31U - d);
1810     a = PyLong_FromUnsignedLong(u);
1811     if (a == NULL) {
1812         goto error;
1813     }
1814
1815     for (int s = c_bit_length - 6; s >= 0; --s) {
1816         PyObject *q;
1817         int64_t e = d;
1818
1819         d = c >> s;
1820
1821         /* q = (n >> 2*c - e - d + 1) // a */
1822         q = _PyLong_Rshift(n, 2*c - d - e + 1);
1823         if (q == NULL) {
1824             goto error;
1825         }
1826         Py_SETREF(q, PyNumber_FloorDivide(q, a));
1827         if (q == NULL) {
1828             goto error;
1829         }
1830
1831         /* a = (a << d - 1 - e) + q */
1832         Py_SETREF(a, _PyLong_Lshift(a, d - 1 - e));
1833         if (a == NULL) {
1834             Py_DECREF(q);
1835             goto error;
1836         }
1837         Py_SETREF(a, PyNumber_Add(a, q));
1838         Py_DECREF(q);
1839         if (a == NULL) {
1840             goto error;
1841         }
1842     }
1843
1844     /* The correct result is either a or a - 1. Figure out which, and
1845        decrement a if necessary. */
1846
1847     /* a_too_large = n < a * a */
1848     b = PyNumber_Multiply(a, a);
1849     if (b == NULL) {
1850         goto error;
1851     }
1852     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1853     Py_DECREF(b);
1854     if (a_too_large == -1) {
1855         goto error;
1856     }
1857
1858     if (a_too_large) {
1859         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1860     }
1861     Py_DECREF(n);
1862     return a;
1863
1864   error:
1865     Py_XDECREF(a);
1866     Py_DECREF(n);
1867     return NULL;
1868 }
1869
1870 /* Divide-and-conquer factorial algorithm
1871  *
1872  * Based on the formula and pseudo-code provided at:
1873  * http://www.luschny.de/math/factorial/binarysplitfact.html
1874  *
1875  * Faster algorithms exist, but they're more complicated and depend on
1876  * a fast prime factorization algorithm.
1877  *
1878  * Notes on the algorithm
1879  * ----------------------
1880  *
1881  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
1882  * computed separately, and then combined using a left shift.
1883  *
1884  * The function factorial_odd_part computes the odd part m (i.e., the greatest
1885  * odd divisor) of factorial(n), using the formula:
1886  *
1887  *   factorial_odd_part(n) =
1888  *
1889  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1890  *
1891  * Example: factorial_odd_part(20) =
1892  *
1893  *        (1) *
1894  *        (1) *
1895  *        (1 * 3 * 5) *
1896  *        (1 * 3 * 5 * 7 * 9) *
1897  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1898  *
1899  * Here i goes from large to small: the first term corresponds to i=4 (any
1900  * larger i gives an empty product), and the last term corresponds to i=0.
1901  * Each term can be computed from the last by multiplying by the extra odd
1902  * numbers required: e.g., to get from the penultimate term to the last one,
1903  * we multiply by (11 * 13 * 15 * 17 * 19).
1904  *
1905  * To see a hint of why this formula works, here are the same numbers as above
1906  * but with the even parts (i.e., the appropriate powers of 2) included.  For
1907  * each subterm in the product for i, we multiply that subterm by 2**i:
1908  *
1909  *   factorial(20) =
1910  *
1911  *        (16) *
1912  *        (8) *
1913  *        (4 * 12 * 20) *
1914  *        (2 * 6 * 10 * 14 * 18) *
1915  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1916  *
1917  * The factorial_partial_product function computes the product of all odd j in
1918  * range(start, stop) for given start and stop.  It's used to compute the
1919  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
1920  * operates recursively, repeatedly splitting the range into two roughly equal
1921  * pieces until the subranges are small enough to be computed using only C
1922  * integer arithmetic.
1923  *
1924  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1925  * the factorial) is computed independently in the main math_factorial
1926  * function.  By standard results, its value is:
1927  *
1928  *    two_valuation = n//2 + n//4 + n//8 + ....
1929  *
1930  * It can be shown (e.g., by complete induction on n) that two_valuation is
1931  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1932  * '1'-bits in the binary expansion of n.
1933  */
1934
1935 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1936  * divide and conquer.  Assumes start and stop are odd and stop > start.
1937  * max_bits must be >= bit_length(stop - 2). */
1938
1939 static PyObject *
1940 factorial_partial_product(unsigned long start, unsigned long stop,
1941                           unsigned long max_bits)
1942 {
1943     unsigned long midpoint, num_operands;
1944     PyObject *left = NULL, *right = NULL, *result = NULL;
1945
1946     /* If the return value will fit an unsigned long, then we can
1947      * multiply in a tight, fast loop where each multiply is O(1).
1948      * Compute an upper bound on the number of bits required to store
1949      * the answer.
1950      *
1951      * Storing some integer z requires floor(lg(z))+1 bits, which is
1952      * conveniently the value returned by bit_length(z).  The
1953      * product x*y will require at most
1954      * bit_length(x) + bit_length(y) bits to store, based
1955      * on the idea that lg product = lg x + lg y.
1956      *
1957      * We know that stop - 2 is the largest number to be multiplied.  From
1958      * there, we have: bit_length(answer) <= num_operands *
1959      * bit_length(stop - 2)
1960      */
1961
1962     num_operands = (stop - start) / 2;
1963     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1964      * unlikely case of an overflow in num_operands * max_bits. */
1965     if (num_operands <= 8 * SIZEOF_LONG &&
1966         num_operands * max_bits <= 8 * SIZEOF_LONG) {
1967         unsigned long j, total;
1968         for (total = start, j = start + 2; j < stop; j += 2)
1969             total *= j;
1970         return PyLong_FromUnsignedLong(total);
1971     }
1972
1973     /* find midpoint of range(start, stop), rounded up to next odd number. */
1974     midpoint = (start + num_operands) | 1;
1975     left = factorial_partial_product(start, midpoint,
1976                                      _Py_bit_length(midpoint - 2));
1977     if (left == NULL)
1978         goto error;
1979     right = factorial_partial_product(midpoint, stop, max_bits);
1980     if (right == NULL)
1981         goto error;
1982     result = PyNumber_Multiply(left, right);
1983
1984   error:
1985     Py_XDECREF(left);
1986     Py_XDECREF(right);
1987     return result;
1988 }
1989
1990 /* factorial_odd_part:  compute the odd part of factorial(n). */
1991
1992 static PyObject *
1993 factorial_odd_part(unsigned long n)
1994 {
1995     long i;
1996     unsigned long v, lower, upper;
1997     PyObject *partial, *tmp, *inner, *outer;
1998
1999     inner = PyLong_FromLong(1);
2000     if (inner == NULL)
2001         return NULL;
2002     outer = Py_NewRef(inner);
2003
2004     upper = 3;
2005     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
2006         v = n >> i;
2007         if (v <= 2)
2008             continue;
2009         lower = upper;
2010         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2011         upper = (v + 1) | 1;
2012         /* Here inner is the product of all odd integers j in the range (0,
2013            n/2**(i+1)].  The factorial_partial_product call below gives the
2014            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2015         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
2016         /* inner *= partial */
2017         if (partial == NULL)
2018             goto error;
2019         tmp = PyNumber_Multiply(inner, partial);
2020         Py_DECREF(partial);
2021         if (tmp == NULL)
2022             goto error;
2023         Py_SETREF(inner, tmp);
2024         /* Now inner is the product of all odd integers j in the range (0,
2025            n/2**i], giving the inner product in the formula above. */
2026
2027         /* outer *= inner; */
2028         tmp = PyNumber_Multiply(outer, inner);
2029         if (tmp == NULL)
2030             goto error;
2031         Py_SETREF(outer, tmp);
2032     }
2033     Py_DECREF(inner);
2034     return outer;
2035
2036   error:
2037     Py_DECREF(outer);
2038     Py_DECREF(inner);
2039     return NULL;
2040 }
2041
2042
2043 /* Lookup table for small factorial values */
2044
2045 static const unsigned long SmallFactorials[] = {
2046     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2047     362880, 3628800, 39916800, 479001600,
2048 #if SIZEOF_LONG >= 8
2049     6227020800, 87178291200, 1307674368000,
2050     20922789888000, 355687428096000, 6402373705728000,
2051     121645100408832000, 2432902008176640000
2052 #endif
2053 };
2054
2055 /*[clinic input]
2056 math.factorial
2057
2058     n as arg: object
2059     /
2060
2061 Find n!.
2062 [clinic start generated code]*/
2063
2064 static PyObject *
2065 math_factorial(PyObject *module, PyObject *arg)
2066 /*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/
2067 {
2068     long x, two_valuation;
2069     int overflow;
2070     PyObject *result, *odd_part;
2071
2072     x = PyLong_AsLongAndOverflow(arg, &overflow);
2073     if (x == -1 && PyErr_Occurred()) {
2074         return NULL;
2075     }
2076     else if (overflow == 1) {
2077         PyErr_Format(PyExc_OverflowError,
2078                      "factorial() argument should not exceed %ld",
2079                      LONG_MAX);
2080         return NULL;
2081     }
2082     else if (overflow == -1 || x < 0) {
2083         PyErr_SetString(PyExc_ValueError,
2084                         "factorial() not defined for negative values");
2085         return NULL;
2086     }
2087
2088     /* use lookup table if x is small */
2089     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2090         return PyLong_FromUnsignedLong(SmallFactorials[x]);
2091
2092     /* else express in the form odd_part * 2**two_valuation, and compute as
2093        odd_part << two_valuation. */
2094     odd_part = factorial_odd_part(x);
2095     if (odd_part == NULL)
2096         return NULL;
2097     two_valuation = x - count_set_bits(x);
2098     result = _PyLong_Lshift(odd_part, two_valuation);
2099     Py_DECREF(odd_part);
2100     return result;
2101 }
2102
2103
2104 /*[clinic input]
2105 math.trunc
2106
2107     x: object
2108     /
2109
2110 Truncates the Real x to the nearest Integral toward 0.
2111
2112 Uses the __trunc__ magic method.
2113 [clinic start generated code]*/
2114
2115 static PyObject *
2116 math_trunc(PyObject *module, PyObject *x)
2117 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2118 {
2119     if (PyFloat_CheckExact(x)) {
2120         return PyFloat_Type.tp_as_number->nb_int(x);
2121     }
2122
2123     PyObject *result = _PyObject_MaybeCallSpecialNoArgs(x, &_Py_ID(__trunc__));
2124     if (result != NULL) {
2125         return result;
2126     }
2127     else if (!PyErr_Occurred()) {
2128         PyErr_Format(PyExc_TypeError,
2129             "type %.100s doesn't define __trunc__ method",
2130             Py_TYPE(x)->tp_name);
2131     }
2132     return NULL;
2133 }
2134
2135
2136 /*[clinic input]
2137 math.frexp
2138
2139     x: double
2140     /
2141
2142 Return the mantissa and exponent of x, as pair (m, e).
2143
2144 m is a float and e is an int, such that x = m * 2.**e.
2145 If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
2146 [clinic start generated code]*/
2147
2148 static PyObject *
2149 math_frexp_impl(PyObject *module, double x)
2150 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2151 {
2152     int i;
2153     /* deal with special cases directly, to sidestep platform
2154        differences */
2155     if (isnan(x) || isinf(x) || !x) {
2156         i = 0;
2157     }
2158     else {
2159         x = frexp(x, &i);
2160     }
2161     return Py_BuildValue("(di)", x, i);
2162 }
2163
2164
2165 /*[clinic input]
2166 math.ldexp
2167
2168     x: double
2169     i: object
2170     /
2171
2172 Return x * (2**i).
2173
2174 This is essentially the inverse of frexp().
2175 [clinic start generated code]*/
2176
2177 static PyObject *
2178 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2179 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2180 {
2181     double r;
2182     long exp;
2183     int overflow;
2184
2185     if (PyLong_Check(i)) {
2186         /* on overflow, replace exponent with either LONG_MAX
2187            or LONG_MIN, depending on the sign. */
2188         exp = PyLong_AsLongAndOverflow(i, &overflow);
2189         if (exp == -1 && PyErr_Occurred())
2190             return NULL;
2191         if (overflow)
2192             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2193     }
2194     else {
2195         PyErr_SetString(PyExc_TypeError,
2196                         "Expected an int as second argument to ldexp.");
2197         return NULL;
2198     }
2199
2200     if (x == 0. || !isfinite(x)) {
2201         /* NaNs, zeros and infinities are returned unchanged */
2202         r = x;
2203         errno = 0;
2204     } else if (exp > INT_MAX) {
2205         /* overflow */
2206         r = copysign(Py_INFINITY, x);
2207         errno = ERANGE;
2208     } else if (exp < INT_MIN) {
2209         /* underflow to +-0 */
2210         r = copysign(0., x);
2211         errno = 0;
2212     } else {
2213         errno = 0;
2214         r = ldexp(x, (int)exp);
2215 #ifdef _MSC_VER
2216         if (DBL_MIN > r && r > -DBL_MIN) {
2217             /* Denormal (or zero) results can be incorrectly rounded here (rather,
2218                truncated).  Fixed in newer versions of the C runtime, included
2219                with Windows 11. */
2220             int original_exp;
2221             frexp(x, &original_exp);
2222             if (original_exp > DBL_MIN_EXP) {
2223                 /* Shift down to the smallest normal binade.  No bits lost. */
2224                 int shift = DBL_MIN_EXP - original_exp;
2225                 x = ldexp(x, shift);
2226                 exp -= shift;
2227             }
2228             /* Multiplying by 2**exp finishes the job, and the HW will round as
2229                appropriate.  Note: if exp < -DBL_MANT_DIG, all of x is shifted
2230                to be < 0.5ULP of smallest denorm, so should be thrown away.  If
2231                exp is so very negative that ldexp underflows to 0, that's fine;
2232                no need to check in advance. */
2233             r = x*ldexp(1.0, (int)exp);
2234         }
2235 #endif
2236         if (isinf(r))
2237             errno = ERANGE;
2238     }
2239
2240     if (errno && is_error(r, 1))
2241         return NULL;
2242     return PyFloat_FromDouble(r);
2243 }
2244
2245
2246 /*[clinic input]
2247 math.modf
2248
2249     x: double
2250     /
2251
2252 Return the fractional and integer parts of x.
2253
2254 Both results carry the sign of x and are floats.
2255 [clinic start generated code]*/
2256
2257 static PyObject *
2258 math_modf_impl(PyObject *module, double x)
2259 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2260 {
2261     double y;
2262     /* some platforms don't do the right thing for NaNs and
2263        infinities, so we take care of special cases directly. */
2264     if (isinf(x))
2265         return Py_BuildValue("(dd)", copysign(0., x), x);
2266     else if (isnan(x))
2267         return Py_BuildValue("(dd)", x, x);
2268
2269     errno = 0;
2270     x = modf(x, &y);
2271     return Py_BuildValue("(dd)", x, y);
2272 }
2273
2274
2275 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2276    do that by itself -- loghelper can.  func is log or log10, and name is
2277    "log" or "log10".  Note that overflow of the result isn't possible: an int
2278    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2279    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2280    small enough to fit in an IEEE single.  log and log10 are even smaller.
2281    However, intermediate overflow is possible for an int if the number of bits
2282    in that int is larger than PY_SSIZE_T_MAX. */
2283
2284 static PyObject*
2285 loghelper(PyObject* arg, double (*func)(double))
2286 {
2287     /* If it is int, do it ourselves. */
2288     if (PyLong_Check(arg)) {
2289         double x, result;
2290         int64_t e;
2291
2292         /* Negative or zero inputs give a ValueError. */
2293         if (!_PyLong_IsPositive((PyLongObject *)arg)) {
2294             /* The input can be an arbitrary large integer, so we
2295                don't include it's value in the error message. */
2296             PyErr_SetString(PyExc_ValueError,
2297                             "expected a positive input");
2298             return NULL;
2299         }
2300
2301         x = PyLong_AsDouble(arg);
2302         if (x == -1.0 && PyErr_Occurred()) {
2303             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2304                 return NULL;
2305             /* Here the conversion to double overflowed, but it's possible
2306                to compute the log anyway.  Clear the exception and continue. */
2307             PyErr_Clear();
2308             x = _PyLong_Frexp((PyLongObject *)arg, &e);
2309             assert(e >= 0);
2310             assert(!PyErr_Occurred());
2311             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2312             result = func(x) + func(2.0) * e;
2313         }
2314         else
2315             /* Successfully converted x to a double. */
2316             result = func(x);
2317         return PyFloat_FromDouble(result);
2318     }
2319
2320     /* Else let libm handle it by itself. */
2321     return math_1(arg, func, 0, "expected a positive input, got %s");
2322 }
2323
2324
2325 /* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2326    for support of multiple signatures */
2327 static PyObject *
2328 math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
2329 {
2330     PyObject *num, *den;
2331     PyObject *ans;
2332
2333     if (!_PyArg_CheckPositional("log", nargs, 1, 2))
2334         return NULL;
2335
2336     num = loghelper(args[0], m_log);
2337     if (num == NULL || nargs == 1)
2338         return num;
2339
2340     den = loghelper(args[1], m_log);
2341     if (den == NULL) {
2342         Py_DECREF(num);
2343         return NULL;
2344     }
2345
2346     ans = PyNumber_TrueDivide(num, den);
2347     Py_DECREF(num);
2348     Py_DECREF(den);
2349     return ans;
2350 }
2351
2352 PyDoc_STRVAR(math_log_doc,
2353 "log(x, [base=math.e])\n\
2354 Return the logarithm of x to the given base.\n\n\
2355 If the base is not specified, returns the natural logarithm (base e) of x.");
2356
2357 /*[clinic input]
2358 math.log2
2359
2360     x: object
2361     /
2362
2363 Return the base 2 logarithm of x.
2364 [clinic start generated code]*/
2365
2366 static PyObject *
2367 math_log2(PyObject *module, PyObject *x)
2368 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2369 {
2370     return loghelper(x, m_log2);
2371 }
2372
2373
2374 /*[clinic input]
2375 math.log10
2376
2377     x: object
2378     /
2379
2380 Return the base 10 logarithm of x.
2381 [clinic start generated code]*/
2382
2383 static PyObject *
2384 math_log10(PyObject *module, PyObject *x)
2385 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2386 {
2387     return loghelper(x, m_log10);
2388 }
2389
2390
2391 /*[clinic input]
2392 math.fma
2393
2394     x: double
2395     y: double
2396     z: double
2397     /
2398
2399 Fused multiply-add operation.
2400
2401 Compute (x * y) + z with a single round.
2402 [clinic start generated code]*/
2403
2404 static PyObject *
2405 math_fma_impl(PyObject *module, double x, double y, double z)
2406 /*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2407 {
2408     double r = fma(x, y, z);
2409
2410     /* Fast path: if we got a finite result, we're done. */
2411     if (isfinite(r)) {
2412         return PyFloat_FromDouble(r);
2413     }
2414
2415     /* Non-finite result. Raise an exception if appropriate, else return r. */
2416     if (isnan(r)) {
2417         if (!isnan(x) && !isnan(y) && !isnan(z)) {
2418             /* NaN result from non-NaN inputs. */
2419             PyErr_SetString(PyExc_ValueError, "invalid operation in fma");
2420             return NULL;
2421         }
2422     }
2423     else if (isfinite(x) && isfinite(y) && isfinite(z)) {
2424         /* Infinite result from finite inputs. */
2425         PyErr_SetString(PyExc_OverflowError, "overflow in fma");
2426         return NULL;
2427     }
2428
2429     return PyFloat_FromDouble(r);
2430 }
2431
2432
2433 /*[clinic input]
2434 math.fmod
2435
2436     x: double
2437     y: double
2438     /
2439
2440 Return fmod(x, y), according to platform C.
2441
2442 x % y may differ.
2443 [clinic start generated code]*/
2444
2445 static PyObject *
2446 math_fmod_impl(PyObject *module, double x, double y)
2447 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2448 {
2449     double r;
2450     /* fmod(x, +/-Inf) returns x for finite x. */
2451     if (isinf(y) && isfinite(x))
2452         return PyFloat_FromDouble(x);
2453     errno = 0;
2454     r = fmod(x, y);
2455 #ifdef _MSC_VER
2456     /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2457        for zero result.  But C99+ says: "if y is nonzero, the result
2458        has the same sign as x".
2459      */
2460     if (r == 0.0 && y != 0.0) {
2461         r = copysign(r, x);
2462     }
2463 #endif
2464     if (isnan(r)) {
2465         if (!isnan(x) && !isnan(y))
2466             errno = EDOM;
2467         else
2468             errno = 0;
2469     }
2470     if (errno && is_error(r, 1))
2471         return NULL;
2472     else
2473         return PyFloat_FromDouble(r);
2474 }
2475
2476 /*
2477 Given a *vec* of values, compute the vector norm:
2478
2479     sqrt(sum(x ** 2 for x in vec))
2480
2481 The *max* variable should be equal to the largest fabs(x).
2482 The *n* variable is the length of *vec*.
2483 If n==0, then *max* should be 0.0.
2484 If an infinity is present in the vec, *max* should be INF.
2485 The *found_nan* variable indicates whether some member of
2486 the *vec* is a NaN.
2487
2488 To avoid overflow/underflow and to achieve high accuracy giving results
2489 that are almost always correctly rounded, four techniques are used:
2490
2491 * lossless scaling using a power-of-two scaling factor
2492 * accurate squaring using Veltkamp-Dekker splitting [1]
2493   or an equivalent with an fma() call
2494 * compensated summation using a variant of the Neumaier algorithm [2]
2495 * differential correction of the square root [3]
2496
2497 The usual presentation of the Neumaier summation algorithm has an
2498 expensive branch depending on which operand has the larger
2499 magnitude.  We avoid this cost by arranging the calculation so that
2500 fabs(csum) is always as large as fabs(x).
2501
2502 To establish the invariant, *csum* is initialized to 1.0 which is
2503 always larger than x**2 after scaling or after division by *max*.
2504 After the loop is finished, the initial 1.0 is subtracted out for a
2505 net zero effect on the final sum.  Since *csum* will be greater than
2506 1.0, the subtraction of 1.0 will not cause fractional digits to be
2507 dropped from *csum*.
2508
2509 To get the full benefit from compensated summation, the largest
2510 addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
2511 scaling or division by *max* should not be skipped even if not
2512 otherwise needed to prevent overflow or loss of precision.
2513
2514 The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
2515 gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
2516 algorithm gives a *hi* value that is correctly rounded to half
2517 precision.  When a value at or below 1.0 is correctly rounded, it
2518 never goes above 1.0.  And when values at or below 1.0 are squared,
2519 they remain at or below 1.0, thus preserving the summation invariant.
2520
2521 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2522 each scaled vector element has a magnitude less than 1.0.  After the
2523 Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
2524 value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
2525 Given that csum >= 1.0, we have:
2526     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2527 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2528
2529 To minimize loss of information during the accumulation of fractional
2530 values, each term has a separate accumulator.  This also breaks up
2531 sequential dependencies in the inner loop so the CPU can maximize
2532 floating-point throughput. [4]  On an Apple M1 Max, hypot(*vec)
2533 takes only 3.33 µsec when len(vec) == 1000.
2534
2535 The square root differential correction is needed because a
2536 correctly rounded square root of a correctly rounded sum of
2537 squares can still be off by as much as one ulp.
2538
2539 The differential correction starts with a value *x* that is
2540 the difference between the square of *h*, the possibly inaccurately
2541 rounded square root, and the accurately computed sum of squares.
2542 The correction is the first order term of the Maclaurin series
2543 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2544
2545 Essentially, this differential correction is equivalent to one
2546 refinement step in Newton's divide-and-average square root
2547 algorithm, effectively doubling the number of accurate bits.
2548 This technique is used in Dekker's SQRT2 algorithm and again in
2549 Borges' ALGORITHM 4 and 5.
2550
2551 The hypot() function is faithfully rounded (less than 1 ulp error)
2552 and usually correctly rounded (within 1/2 ulp).  The squaring
2553 step is exact.  The Neumaier summation computes as if in doubled
2554 precision (106 bits) and has the advantage that its input squares
2555 are non-negative so that the condition number of the sum is one.
2556 The square root with a differential correction is likewise computed
2557 as if in doubled precision.
2558
2559 For n <= 1000, prior to the final addition that rounds the overall
2560 result, the internal accuracy of "h" together with its correction of
2561 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2562 against a Decimal implementation with prec=300.  After 100 million
2563 trials, no incorrectly rounded examples were found.  In addition,
2564 perfect commutativity (all permutations are exactly equal) was
2565 verified for 1 billion random inputs with n=5. [7]
2566
2567 References:
2568
2569 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2570 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2571 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
2572 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
2573 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2574 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
2575 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
2576
2577 */
2578
2579 static inline double
2580 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2581 {
2582     double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2583     DoubleLength pr, sm;
2584     int max_e;
2585     Py_ssize_t i;
2586
2587     if (isinf(max)) {
2588         return max;
2589     }
2590     if (found_nan) {
2591         return Py_NAN;
2592     }
2593     if (max == 0.0 || n <= 1) {
2594         return max;
2595     }
2596     frexp(max, &max_e);
2597     if (max_e < -1023) {
2598         /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
2599         for (i=0 ; i < n ; i++) {
2600             vec[i] /= DBL_MIN;          // convert subnormals to normals
2601         }
2602         return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2603     }
2604     scale = ldexp(1.0, -max_e);
2605     assert(max * scale >= 0.5);
2606     assert(max * scale < 1.0);
2607     for (i=0 ; i < n ; i++) {
2608         x = vec[i];
2609         assert(isfinite(x) && fabs(x) <= max);
2610         x *= scale;                     // lossless scaling
2611         assert(fabs(x) < 1.0);
2612         pr = dl_mul(x, x);              // lossless squaring
2613         assert(pr.hi <= 1.0);
2614         sm = dl_fast_sum(csum, pr.hi);  // lossless addition
2615         csum = sm.hi;
2616         frac1 += pr.lo;                 // lossy addition
2617         frac2 += sm.lo;                 // lossy addition
2618     }
2619     h = sqrt(csum - 1.0 + (frac1 + frac2));
2620     pr = dl_mul(-h, h);
2621     sm = dl_fast_sum(csum, pr.hi);
2622     csum = sm.hi;
2623     frac1 += pr.lo;
2624     frac2 += sm.lo;
2625     x = csum - 1.0 + (frac1 + frac2);
2626     h +=  x / (2.0 * h);                 // differential correction
2627     return h / scale;
2628 }
2629
2630 #define NUM_STACK_ELEMS 16
2631
2632 /*[clinic input]
2633 math.dist
2634
2635     p: object
2636     q: object
2637     /
2638
2639 Return the Euclidean distance between two points p and q.
2640
2641 The points should be specified as sequences (or iterables) of
2642 coordinates.  Both inputs must have the same dimension.
2643
2644 Roughly equivalent to:
2645     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2646 [clinic start generated code]*/
2647
2648 static PyObject *
2649 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2650 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2651 {
2652     PyObject *item;
2653     double max = 0.0;
2654     double x, px, qx, result;
2655     Py_ssize_t i, m, n;
2656     int found_nan = 0, p_allocated = 0, q_allocated = 0;
2657     double diffs_on_stack[NUM_STACK_ELEMS];
2658     double *diffs = diffs_on_stack;
2659
2660     if (!PyTuple_Check(p)) {
2661         p = PySequence_Tuple(p);
2662         if (p == NULL) {
2663             return NULL;
2664         }
2665         p_allocated = 1;
2666     }
2667     if (!PyTuple_Check(q)) {
2668         q = PySequence_Tuple(q);
2669         if (q == NULL) {
2670             if (p_allocated) {
2671                 Py_DECREF(p);
2672             }
2673             return NULL;
2674         }
2675         q_allocated = 1;
2676     }
2677
2678     m = PyTuple_GET_SIZE(p);
2679     n = PyTuple_GET_SIZE(q);
2680     if (m != n) {
2681         PyErr_SetString(PyExc_ValueError,
2682                         "both points must have the same number of dimensions");
2683         goto error_exit;
2684     }
2685     if (n > NUM_STACK_ELEMS) {
2686         diffs = (double *) PyMem_Malloc(n * sizeof(double));
2687         if (diffs == NULL) {
2688             PyErr_NoMemory();
2689             goto error_exit;
2690         }
2691     }
2692     for (i=0 ; i<n ; i++) {
2693         item = PyTuple_GET_ITEM(p, i);
2694         ASSIGN_DOUBLE(px, item, error_exit);
2695         item = PyTuple_GET_ITEM(q, i);
2696         ASSIGN_DOUBLE(qx, item, error_exit);
2697         x = fabs(px - qx);
2698         diffs[i] = x;
2699         found_nan |= isnan(x);
2700         if (x > max) {
2701             max = x;
2702         }
2703     }
2704     result = vector_norm(n, diffs, max, found_nan);
2705     if (diffs != diffs_on_stack) {
2706         PyMem_Free(diffs);
2707     }
2708     if (p_allocated) {
2709         Py_DECREF(p);
2710     }
2711     if (q_allocated) {
2712         Py_DECREF(q);
2713     }
2714     return PyFloat_FromDouble(result);
2715
2716   error_exit:
2717     if (diffs != diffs_on_stack) {
2718         PyMem_Free(diffs);
2719     }
2720     if (p_allocated) {
2721         Py_DECREF(p);
2722     }
2723     if (q_allocated) {
2724         Py_DECREF(q);
2725     }
2726     return NULL;
2727 }
2728
2729 /*[clinic input]
2730 math.hypot
2731
2732     *coordinates as args: array
2733
2734 Multidimensional Euclidean distance from the origin to a point.
2735
2736 Roughly equivalent to:
2737     sqrt(sum(x**2 for x in coordinates))
2738
2739 For a two dimensional point (x, y), gives the hypotenuse
2740 using the Pythagorean theorem:  sqrt(x*x + y*y).
2741
2742 For example, the hypotenuse of a 3/4/5 right triangle is:
2743
2744     >>> hypot(3.0, 4.0)
2745     5.0
2746 [clinic start generated code]*/
2747
2748 static PyObject *
2749 math_hypot_impl(PyObject *module, PyObject * const *args,
2750                 Py_ssize_t args_length)
2751 /*[clinic end generated code: output=c9de404e24370068 input=1bceaf7d4fdcd9c2]*/
2752 {
2753     Py_ssize_t i;
2754     PyObject *item;
2755     double max = 0.0;
2756     double x, result;
2757     int found_nan = 0;
2758     double coord_on_stack[NUM_STACK_ELEMS];
2759     double *coordinates = coord_on_stack;
2760
2761     if (args_length > NUM_STACK_ELEMS) {
2762         coordinates = (double *) PyMem_Malloc(args_length * sizeof(double));
2763         if (coordinates == NULL) {
2764             return PyErr_NoMemory();
2765         }
2766     }
2767     for (i = 0; i < args_length; i++) {
2768         item = args[i];
2769         ASSIGN_DOUBLE(x, item, error_exit);
2770         x = fabs(x);
2771         coordinates[i] = x;
2772         found_nan |= isnan(x);
2773         if (x > max) {
2774             max = x;
2775         }
2776     }
2777     result = vector_norm(args_length, coordinates, max, found_nan);
2778     if (coordinates != coord_on_stack) {
2779         PyMem_Free(coordinates);
2780     }
2781     return PyFloat_FromDouble(result);
2782
2783   error_exit:
2784     if (coordinates != coord_on_stack) {
2785         PyMem_Free(coordinates);
2786     }
2787     return NULL;
2788 }
2789
2790 #undef NUM_STACK_ELEMS
2791
2792 /** sumprod() ***************************************************************/
2793
2794 /* Forward declaration */
2795 static inline int _check_long_mult_overflow(long a, long b);
2796
2797 static inline bool
2798 long_add_would_overflow(long a, long b)
2799 {
2800     return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2801 }
2802
2803 /*[clinic input]
2804 math.sumprod
2805
2806     p: object
2807     q: object
2808     /
2809
2810 Return the sum of products of values from two iterables p and q.
2811
2812 Roughly equivalent to:
2813
2814     sum(map(operator.mul, p, q, strict=True))
2815
2816 For float and mixed int/float inputs, the intermediate products
2817 and sums are computed with extended precision.
2818 [clinic start generated code]*/
2819
2820 static PyObject *
2821 math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2822 /*[clinic end generated code: output=6722dbfe60664554 input=a2880317828c61d2]*/
2823 {
2824     PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2825     PyObject *p_it, *q_it, *total;
2826     iternextfunc p_next, q_next;
2827     bool p_stopped = false, q_stopped = false;
2828     bool int_path_enabled = true, int_total_in_use = false;
2829     bool flt_path_enabled = true, flt_total_in_use = false;
2830     long int_total = 0;
2831     TripleLength flt_total = tl_zero;
2832
2833     p_it = PyObject_GetIter(p);
2834     if (p_it == NULL) {
2835         return NULL;
2836     }
2837     q_it = PyObject_GetIter(q);
2838     if (q_it == NULL) {
2839         Py_DECREF(p_it);
2840         return NULL;
2841     }
2842     total = PyLong_FromLong(0);
2843     if (total == NULL) {
2844         Py_DECREF(p_it);
2845         Py_DECREF(q_it);
2846         return NULL;
2847     }
2848     p_next = *Py_TYPE(p_it)->tp_iternext;
2849     q_next = *Py_TYPE(q_it)->tp_iternext;
2850     while (1) {
2851         bool finished;
2852
2853         assert (p_i == NULL);
2854         assert (q_i == NULL);
2855         assert (term_i == NULL);
2856         assert (new_total == NULL);
2857
2858         assert (p_it != NULL);
2859         assert (q_it != NULL);
2860         assert (total != NULL);
2861
2862         p_i = p_next(p_it);
2863         if (p_i == NULL) {
2864             if (PyErr_Occurred()) {
2865                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2866                     goto err_exit;
2867                 }
2868                 PyErr_Clear();
2869             }
2870             p_stopped = true;
2871         }
2872         q_i = q_next(q_it);
2873         if (q_i == NULL) {
2874             if (PyErr_Occurred()) {
2875                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2876                     goto err_exit;
2877                 }
2878                 PyErr_Clear();
2879             }
2880             q_stopped = true;
2881         }
2882         if (p_stopped != q_stopped) {
2883             PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2884             goto err_exit;
2885         }
2886         finished = p_stopped & q_stopped;
2887
2888         if (int_path_enabled) {
2889
2890             if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2891                 int overflow;
2892                 long int_p, int_q, int_prod;
2893
2894                 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2895                 if (overflow) {
2896                     goto finalize_int_path;
2897                 }
2898                 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2899                 if (overflow) {
2900                     goto finalize_int_path;
2901                 }
2902                 if (_check_long_mult_overflow(int_p, int_q)) {
2903                     goto finalize_int_path;
2904                 }
2905                 int_prod = int_p * int_q;
2906                 if (long_add_would_overflow(int_total, int_prod)) {
2907                     goto finalize_int_path;
2908                 }
2909                 int_total += int_prod;
2910                 int_total_in_use = true;
2911                 Py_CLEAR(p_i);
2912                 Py_CLEAR(q_i);
2913                 continue;
2914             }
2915
2916           finalize_int_path:
2917             // We're finished, overflowed, or have a non-int
2918             int_path_enabled = false;
2919             if (int_total_in_use) {
2920                 term_i = PyLong_FromLong(int_total);
2921                 if (term_i == NULL) {
2922                     goto err_exit;
2923                 }
2924                 new_total = PyNumber_Add(total, term_i);
2925                 if (new_total == NULL) {
2926                     goto err_exit;
2927                 }
2928                 Py_SETREF(total, new_total);
2929                 new_total = NULL;
2930                 Py_CLEAR(term_i);
2931                 int_total = 0;   // An ounce of prevention, ...
2932                 int_total_in_use = false;
2933             }
2934         }
2935
2936         if (flt_path_enabled) {
2937
2938             if (!finished) {
2939                 double flt_p, flt_q;
2940                 bool p_type_float = PyFloat_CheckExact(p_i);
2941                 bool q_type_float = PyFloat_CheckExact(q_i);
2942                 if (p_type_float && q_type_float) {
2943                     flt_p = PyFloat_AS_DOUBLE(p_i);
2944                     flt_q = PyFloat_AS_DOUBLE(q_i);
2945                 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2946                     /* We care about float/int pairs and int/float pairs because
2947                        they arise naturally in several use cases such as price
2948                        times quantity, measurements with integer weights, or
2949                        data selected by a vector of bools. */
2950                     flt_p = PyFloat_AS_DOUBLE(p_i);
2951                     flt_q = PyLong_AsDouble(q_i);
2952                     if (flt_q == -1.0 && PyErr_Occurred()) {
2953                         PyErr_Clear();
2954                         goto finalize_flt_path;
2955                     }
2956                 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
2957                     flt_q = PyFloat_AS_DOUBLE(q_i);
2958                     flt_p = PyLong_AsDouble(p_i);
2959                     if (flt_p == -1.0 && PyErr_Occurred()) {
2960                         PyErr_Clear();
2961                         goto finalize_flt_path;
2962                     }
2963                 } else {
2964                     goto finalize_flt_path;
2965                 }
2966                 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2967                 if (isfinite(new_flt_total.hi)) {
2968                     flt_total = new_flt_total;
2969                     flt_total_in_use = true;
2970                     Py_CLEAR(p_i);
2971                     Py_CLEAR(q_i);
2972                     continue;
2973                 }
2974             }
2975
2976           finalize_flt_path:
2977             // We're finished, overflowed, have a non-float, or got a non-finite value
2978             flt_path_enabled = false;
2979             if (flt_total_in_use) {
2980                 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2981                 if (term_i == NULL) {
2982                     goto err_exit;
2983                 }
2984                 new_total = PyNumber_Add(total, term_i);
2985                 if (new_total == NULL) {
2986                     goto err_exit;
2987                 }
2988                 Py_SETREF(total, new_total);
2989                 new_total = NULL;
2990                 Py_CLEAR(term_i);
2991                 flt_total = tl_zero;
2992                 flt_total_in_use = false;
2993             }
2994         }
2995
2996         assert(!int_total_in_use);
2997         assert(!flt_total_in_use);
2998         if (finished) {
2999             goto normal_exit;
3000         }
3001         term_i = PyNumber_Multiply(p_i, q_i);
3002         if (term_i == NULL) {
3003             goto err_exit;
3004         }
3005         new_total = PyNumber_Add(total, term_i);
3006         if (new_total == NULL) {
3007             goto err_exit;
3008         }
3009         Py_SETREF(total, new_total);
3010         new_total = NULL;
3011         Py_CLEAR(p_i);
3012         Py_CLEAR(q_i);
3013         Py_CLEAR(term_i);
3014     }
3015
3016  normal_exit:
3017     Py_DECREF(p_it);
3018     Py_DECREF(q_it);
3019     return total;
3020
3021  err_exit:
3022     Py_DECREF(p_it);
3023     Py_DECREF(q_it);
3024     Py_DECREF(total);
3025     Py_XDECREF(p_i);
3026     Py_XDECREF(q_i);
3027     Py_XDECREF(term_i);
3028     Py_XDECREF(new_total);
3029     return NULL;
3030 }
3031
3032
3033 /* pow can't use math_2, but needs its own wrapper: the problem is
3034    that an infinite result can arise either as a result of overflow
3035    (in which case OverflowError should be raised) or as a result of
3036    e.g. 0.**-5. (for which ValueError needs to be raised.)
3037 */
3038
3039 /*[clinic input]
3040 math.pow
3041
3042     x: double
3043     y: double
3044     /
3045
3046 Return x**y (x to the power of y).
3047 [clinic start generated code]*/
3048
3049 static PyObject *
3050 math_pow_impl(PyObject *module, double x, double y)
3051 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
3052 {
3053     double r;
3054     int odd_y;
3055
3056     /* deal directly with IEEE specials, to cope with problems on various
3057        platforms whose semantics don't exactly match C99 */
3058     r = 0.; /* silence compiler warning */
3059     if (!isfinite(x) || !isfinite(y)) {
3060         errno = 0;
3061         if (isnan(x))
3062             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
3063         else if (isnan(y))
3064             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
3065         else if (isinf(x)) {
3066             odd_y = isfinite(y) && fmod(fabs(y), 2.0) == 1.0;
3067             if (y > 0.)
3068                 r = odd_y ? x : fabs(x);
3069             else if (y == 0.)
3070                 r = 1.;
3071             else /* y < 0. */
3072                 r = odd_y ? copysign(0., x) : 0.;
3073         }
3074         else {
3075             assert(isinf(y));
3076             if (fabs(x) == 1.0)
3077                 r = 1.;
3078             else if (y > 0. && fabs(x) > 1.0)
3079                 r = y;
3080             else if (y < 0. && fabs(x) < 1.0) {
3081                 r = -y; /* result is +inf */
3082             }
3083             else
3084                 r = 0.;
3085         }
3086     }
3087     else {
3088         /* let libm handle finite**finite */
3089         errno = 0;
3090         r = pow(x, y);
3091         /* a NaN result should arise only from (-ve)**(finite
3092            non-integer); in this case we want to raise ValueError. */
3093         if (!isfinite(r)) {
3094             if (isnan(r)) {
3095                 errno = EDOM;
3096             }
3097             /*
3098                an infinite result here arises either from:
3099                (A) (+/-0.)**negative (-> divide-by-zero)
3100                (B) overflow of x**y with x and y finite
3101             */
3102             else if (isinf(r)) {
3103                 if (x == 0.)
3104                     errno = EDOM;
3105                 else
3106                     errno = ERANGE;
3107             }
3108         }
3109     }
3110
3111     if (errno && is_error(r, 1))
3112         return NULL;
3113     else
3114         return PyFloat_FromDouble(r);
3115 }
3116
3117
3118 static const double degToRad = Py_MATH_PI / 180.0;
3119 static const double radToDeg = 180.0 / Py_MATH_PI;
3120
3121 /*[clinic input]
3122 math.degrees
3123
3124     x: double
3125     /
3126
3127 Convert angle x from radians to degrees.
3128 [clinic start generated code]*/
3129
3130 static PyObject *
3131 math_degrees_impl(PyObject *module, double x)
3132 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3133 {
3134     return PyFloat_FromDouble(x * radToDeg);
3135 }
3136
3137
3138 /*[clinic input]
3139 math.radians
3140
3141     x: double
3142     /
3143
3144 Convert angle x from degrees to radians.
3145 [clinic start generated code]*/
3146
3147 static PyObject *
3148 math_radians_impl(PyObject *module, double x)
3149 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3150 {
3151     return PyFloat_FromDouble(x * degToRad);
3152 }
3153
3154
3155 /*[clinic input]
3156 math.isfinite
3157
3158     x: double
3159     /
3160
3161 Return True if x is neither an infinity nor a NaN, and False otherwise.
3162 [clinic start generated code]*/
3163
3164 static PyObject *
3165 math_isfinite_impl(PyObject *module, double x)
3166 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3167 {
3168     return PyBool_FromLong((long)isfinite(x));
3169 }
3170
3171
3172 /*[clinic input]
3173 math.isnormal
3174
3175     x: double
3176     /
3177
3178 Return True if x is normal, and False otherwise.
3179 [clinic start generated code]*/
3180
3181 static PyObject *
3182 math_isnormal_impl(PyObject *module, double x)
3183 /*[clinic end generated code: output=c7b302b5b89c3541 input=fdaa00c58aa7bc17]*/
3184 {
3185     return PyBool_FromLong(isnormal(x));
3186 }
3187
3188
3189 /*[clinic input]
3190 math.issubnormal
3191
3192     x: double
3193     /
3194
3195 Return True if x is subnormal, and False otherwise.
3196 [clinic start generated code]*/
3197
3198 static PyObject *
3199 math_issubnormal_impl(PyObject *module, double x)
3200 /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/
3201 {
3202 #if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
3203     return PyBool_FromLong(issubnormal(x));
3204 #else
3205     return PyBool_FromLong(isfinite(x) && x && !isnormal(x));
3206 #endif
3207 }
3208
3209
3210 /*[clinic input]
3211 math.isnan
3212
3213     x: double
3214     /
3215
3216 Return True if x is a NaN (not a number), and False otherwise.
3217 [clinic start generated code]*/
3218
3219 static PyObject *
3220 math_isnan_impl(PyObject *module, double x)
3221 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3222 {
3223     return PyBool_FromLong((long)isnan(x));
3224 }
3225
3226
3227 /*[clinic input]
3228 math.isinf
3229
3230     x: double
3231     /
3232
3233 Return True if x is a positive or negative infinity, and False otherwise.
3234 [clinic start generated code]*/
3235
3236 static PyObject *
3237 math_isinf_impl(PyObject *module, double x)
3238 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3239 {
3240     return PyBool_FromLong((long)isinf(x));
3241 }
3242
3243
3244 /*[clinic input]
3245 math.isclose -> bool
3246
3247     a: double
3248     b: double
3249     *
3250     rel_tol: double = 1e-09
3251         maximum difference for being considered "close", relative to the
3252         magnitude of the input values
3253     abs_tol: double = 0.0
3254         maximum difference for being considered "close", regardless of the
3255         magnitude of the input values
3256
3257 Determine whether two floating-point numbers are close in value.
3258
3259 Return True if a is close in value to b, and False otherwise.
3260
3261 For the values to be considered close, the difference between them
3262 must be smaller than at least one of the tolerances.
3263
3264 -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
3265 is, NaN is not close to anything, even itself.  inf and -inf are
3266 only close to themselves.
3267 [clinic start generated code]*/
3268
3269 static int
3270 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3271                   double abs_tol)
3272 /*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
3273 {
3274     double diff = 0.0;
3275
3276     /* sanity check on the inputs */
3277     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3278         PyErr_SetString(PyExc_ValueError,
3279                         "tolerances must be non-negative");
3280         return -1;
3281     }
3282
3283     if ( a == b ) {
3284         /* short circuit exact equality -- needed to catch two infinities of
3285            the same sign. And perhaps speeds things up a bit sometimes.
3286         */
3287         return 1;
3288     }
3289
3290     /* This catches the case of two infinities of opposite sign, or
3291        one infinity and one finite number. Two infinities of opposite
3292        sign would otherwise have an infinite relative tolerance.
3293        Two infinities of the same sign are caught by the equality check
3294        above.
3295     */
3296
3297     if (isinf(a) || isinf(b)) {
3298         return 0;
3299     }
3300
3301     /* now do the regular computation
3302        this is essentially the "weak" test from the Boost library
3303     */
3304
3305     diff = fabs(b - a);
3306
3307     return (((diff <= fabs(rel_tol * b)) ||
3308              (diff <= fabs(rel_tol * a))) ||
3309             (diff <= abs_tol));
3310 }
3311
3312 static inline int
3313 _check_long_mult_overflow(long a, long b) {
3314
3315     /* From Python2's int_mul code:
3316
3317     Integer overflow checking for * is painful:  Python tried a couple ways, but
3318     they didn't work on all platforms, or failed in endcases (a product of
3319     -sys.maxint-1 has been a particular pain).
3320
3321     Here's another way:
3322
3323     The native long product x*y is either exactly right or *way* off, being
3324     just the last n bits of the true product, where n is the number of bits
3325     in a long (the delivered product is the true product plus i*2**n for
3326     some integer i).
3327
3328     The native double product (double)x * (double)y is subject to three
3329     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
3330     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3331     But, unlike the native long product, it's not in *range* trouble:  even
3332     if sizeof(long)==32 (256-bit longs), the product easily fits in the
3333     dynamic range of a double.  So the leading 50 (or so) bits of the double
3334     product are correct.
3335
3336     We check these two ways against each other, and declare victory if they're
3337     approximately the same.  Else, because the native long product is the only
3338     one that can lose catastrophic amounts of information, it's the native long
3339     product that must have overflowed.
3340
3341     */
3342
3343     long longprod = (long)((unsigned long)a * b);
3344     double doubleprod = (double)a * (double)b;
3345     double doubled_longprod = (double)longprod;
3346
3347     if (doubled_longprod == doubleprod) {
3348         return 0;
3349     }
3350
3351     const double diff = doubled_longprod - doubleprod;
3352     const double absdiff = diff >= 0.0 ? diff : -diff;
3353     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3354
3355     if (32.0 * absdiff <= absprod) {
3356         return 0;
3357     }
3358
3359     return 1;
3360 }
3361
3362 /*[clinic input]
3363 math.prod
3364
3365     iterable: object
3366     /
3367     *
3368     start: object(c_default="NULL") = 1
3369
3370 Calculate the product of all the elements in the input iterable.
3371
3372 The default start value for the product is 1.
3373
3374 When the iterable is empty, return the start value.  This function is
3375 intended specifically for use with numeric values and may reject
3376 non-numeric types.
3377 [clinic start generated code]*/
3378
3379 static PyObject *
3380 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3381 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3382 {
3383     PyObject *result = start;
3384     PyObject *temp, *item, *iter;
3385
3386     iter = PyObject_GetIter(iterable);
3387     if (iter == NULL) {
3388         return NULL;
3389     }
3390
3391     if (result == NULL) {
3392         result = _PyLong_GetOne();
3393     }
3394     Py_INCREF(result);
3395 #ifndef SLOW_PROD
3396     /* Fast paths for integers keeping temporary products in C.
3397      * Assumes all inputs are the same type.
3398      * If the assumption fails, default to use PyObjects instead.
3399     */
3400     if (PyLong_CheckExact(result)) {
3401         int overflow;
3402         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3403         /* If this already overflowed, don't even enter the loop. */
3404         if (overflow == 0) {
3405             Py_SETREF(result, NULL);
3406         }
3407         /* Loop over all the items in the iterable until we finish, we overflow
3408          * or we found a non integer element */
3409         while (result == NULL) {
3410             item = PyIter_Next(iter);
3411             if (item == NULL) {
3412                 Py_DECREF(iter);
3413                 if (PyErr_Occurred()) {
3414                     return NULL;
3415                 }
3416                 return PyLong_FromLong(i_result);
3417             }
3418             if (PyLong_CheckExact(item)) {
3419                 long b = PyLong_AsLongAndOverflow(item, &overflow);
3420                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3421                     long x = i_result * b;
3422                     i_result = x;
3423                     Py_DECREF(item);
3424                     continue;
3425                 }
3426             }
3427             /* Either overflowed or is not an int.
3428              * Restore real objects and process normally */
3429             result = PyLong_FromLong(i_result);
3430             if (result == NULL) {
3431                 Py_DECREF(item);
3432                 Py_DECREF(iter);
3433                 return NULL;
3434             }
3435             temp = PyNumber_Multiply(result, item);
3436             Py_DECREF(result);
3437             Py_DECREF(item);
3438             result = temp;
3439             if (result == NULL) {
3440                 Py_DECREF(iter);
3441                 return NULL;
3442             }
3443         }
3444     }
3445
3446     /* Fast paths for floats keeping temporary products in C.
3447      * Assumes all inputs are the same type.
3448      * If the assumption fails, default to use PyObjects instead.
3449     */
3450     if (PyFloat_CheckExact(result)) {
3451         double f_result = PyFloat_AS_DOUBLE(result);
3452         Py_SETREF(result, NULL);
3453         while(result == NULL) {
3454             item = PyIter_Next(iter);
3455             if (item == NULL) {
3456                 Py_DECREF(iter);
3457                 if (PyErr_Occurred()) {
3458                     return NULL;
3459                 }
3460                 return PyFloat_FromDouble(f_result);
3461             }
3462             if (PyFloat_CheckExact(item)) {
3463                 f_result *= PyFloat_AS_DOUBLE(item);
3464                 Py_DECREF(item);
3465                 continue;
3466             }
3467             if (PyLong_CheckExact(item)) {
3468                 long value;
3469                 int overflow;
3470                 value = PyLong_AsLongAndOverflow(item, &overflow);
3471                 if (!overflow) {
3472                     f_result *= (double)value;
3473                     Py_DECREF(item);
3474                     continue;
3475                 }
3476             }
3477             result = PyFloat_FromDouble(f_result);
3478             if (result == NULL) {
3479                 Py_DECREF(item);
3480                 Py_DECREF(iter);
3481                 return NULL;
3482             }
3483             temp = PyNumber_Multiply(result, item);
3484             Py_DECREF(result);
3485             Py_DECREF(item);
3486             result = temp;
3487             if (result == NULL) {
3488                 Py_DECREF(iter);
3489                 return NULL;
3490             }
3491         }
3492     }
3493 #endif
3494     /* Consume rest of the iterable (if any) that could not be handled
3495      * by specialized functions above.*/
3496     for(;;) {
3497         item = PyIter_Next(iter);
3498         if (item == NULL) {
3499             /* error, or end-of-sequence */
3500             if (PyErr_Occurred()) {
3501                 Py_SETREF(result, NULL);
3502             }
3503             break;
3504         }
3505         temp = PyNumber_Multiply(result, item);
3506         Py_DECREF(result);
3507         Py_DECREF(item);
3508         result = temp;
3509         if (result == NULL)
3510             break;
3511     }
3512     Py_DECREF(iter);
3513     return result;
3514 }
3515
3516
3517 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3518
3519 Python code to generate the values:
3520
3521     import math
3522
3523     for n in range(128):
3524         fac = math.factorial(n)
3525         fac_odd_part = fac // (fac & -fac)
3526         reduced_fac_odd_part = fac_odd_part % (2**64)
3527         print(f"{reduced_fac_odd_part:#018x}u")
3528 */
3529 static const uint64_t reduced_factorial_odd_part[] = {
3530     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3531     0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3532     0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3533     0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3534     0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3535     0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3536     0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3537     0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3538     0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3539     0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3540     0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3541     0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3542     0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3543     0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3544     0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3545     0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3546     0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3547     0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3548     0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3549     0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3550     0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3551     0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3552     0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3553     0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3554     0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3555     0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3556     0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3557     0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3558     0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3559     0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3560     0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3561     0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3562 };
3563
3564 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3565
3566 Python code to generate the values:
3567
3568     import math
3569
3570     for n in range(128):
3571         fac = math.factorial(n)
3572         fac_odd_part = fac // (fac & -fac)
3573         inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3574         print(f"{inverted_fac_odd_part:#018x}u")
3575 */
3576 static const uint64_t inverted_factorial_odd_part[] = {
3577     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3578     0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3579     0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3580     0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3581     0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3582     0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3583     0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3584     0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3585     0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3586     0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3587     0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3588     0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3589     0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3590     0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3591     0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3592     0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3593     0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3594     0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3595     0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3596     0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3597     0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3598     0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3599     0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3600     0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3601     0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3602     0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3603     0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3604     0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3605     0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3606     0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3607     0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3608     0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3609 };
3610
3611 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3612
3613 Python code to generate the values:
3614
3615 import math
3616
3617 for n in range(128):
3618     fac = math.factorial(n)
3619     fac_trailing_zeros = (fac & -fac).bit_length() - 1
3620     print(fac_trailing_zeros)
3621 */
3622
3623 static const uint8_t factorial_trailing_zeros[] = {
3624      0,  0,  1,  1,  3,  3,  4,  4,  7,  7,  8,  8, 10, 10, 11, 11,  //  0-15
3625     15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26,  // 16-31
3626     31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42,  // 32-47
3627     46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57,  // 48-63
3628     63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74,  // 64-79
3629     78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89,  // 80-95
3630     94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105,  // 96-111
3631     109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120,  // 112-127
3632 };
3633
3634 /* Number of permutations and combinations.
3635  * P(n, k) = n! / (n-k)!
3636  * C(n, k) = P(n, k) / k!
3637  */
3638
3639 /* Calculate C(n, k) for n in the 63-bit range. */
3640 static PyObject *
3641 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3642 {
3643     assert(k != 0);
3644
3645     /* For small enough n and k the result fits in the 64-bit range and can
3646      * be calculated without allocating intermediate PyLong objects. */
3647     if (iscomb) {
3648         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3649          * fits into a uint64_t.  Exclude k = 1, because the second fast
3650          * path is faster for this case.*/
3651         static const unsigned char fast_comb_limits1[] = {
3652             0, 0, 127, 127, 127, 127, 127, 127,  // 0-7
3653             127, 127, 127, 127, 127, 127, 127, 127,  // 8-15
3654             116, 105, 97, 91, 86, 82, 78, 76,  // 16-23
3655             74, 72, 71, 70, 69, 68, 68, 67,  // 24-31
3656             67, 67, 67,  // 32-34
3657         };
3658         if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3659             /*
3660                 comb(n, k) fits into a uint64_t. We compute it as
3661
3662                     comb_odd_part << shift
3663
3664                 where 2**shift is the largest power of two dividing comb(n, k)
3665                 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3666                 calculated efficiently via arithmetic modulo 2**64, using three
3667                 lookups and two uint64_t multiplications.
3668             */
3669             uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3670                                    * inverted_factorial_odd_part[k]
3671                                    * inverted_factorial_odd_part[n - k];
3672             int shift = factorial_trailing_zeros[n]
3673                       - factorial_trailing_zeros[k]
3674                       - factorial_trailing_zeros[n - k];
3675             return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3676         }
3677
3678         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3679          * fits into a long long (which is at least 64 bit).  Only contains
3680          * items larger than in fast_comb_limits1. */
3681         static const unsigned long long fast_comb_limits2[] = {
3682             0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449,  // 0-7
3683             746, 453, 308, 227, 178, 147,  // 8-13
3684         };
3685         if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3686             /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3687             unsigned long long result = n;
3688             for (unsigned long long i = 1; i < k;) {
3689                 result *= --n;
3690                 result /= ++i;
3691             }
3692             return PyLong_FromUnsignedLongLong(result);
3693         }
3694     }
3695     else {
3696         /* Maps k to the maximal n so that k <= n and P(n, k)
3697          * fits into a long long (which is at least 64 bit). */
3698         static const unsigned long long fast_perm_limits[] = {
3699             0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568,  // 0-7
3700             259, 142, 88, 61, 45, 36, 30, 26,  // 8-15
3701             24, 22, 21, 20, 20,  // 16-20
3702         };
3703         if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3704             if (n <= 127) {
3705                 /* P(n, k) fits into a uint64_t. */
3706                 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3707                                        * inverted_factorial_odd_part[n - k];
3708                 int shift = factorial_trailing_zeros[n]
3709                           - factorial_trailing_zeros[n - k];
3710                 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3711             }
3712
3713             /* P(n, k) = P(n, k-1) * (n-k+1) */
3714             unsigned long long result = n;
3715             for (unsigned long long i = 1; i < k;) {
3716                 result *= --n;
3717                 ++i;
3718             }
3719             return PyLong_FromUnsignedLongLong(result);
3720         }
3721     }
3722
3723     /* For larger n use recursive formulas:
3724      *
3725      *   P(n, k) = P(n, j) * P(n-j, k-j)
3726      *   C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3727      */
3728     unsigned long long j = k / 2;
3729     PyObject *a, *b;
3730     a = perm_comb_small(n, j, iscomb);
3731     if (a == NULL) {
3732         return NULL;
3733     }
3734     b = perm_comb_small(n - j, k - j, iscomb);
3735     if (b == NULL) {
3736         goto error;
3737     }
3738     Py_SETREF(a, PyNumber_Multiply(a, b));
3739     Py_DECREF(b);
3740     if (iscomb && a != NULL) {
3741         b = perm_comb_small(k, j, 1);
3742         if (b == NULL) {
3743             goto error;
3744         }
3745         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3746         Py_DECREF(b);
3747     }
3748     return a;
3749
3750 error:
3751     Py_DECREF(a);
3752     return NULL;
3753 }
3754
3755 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3756  * It is more efficient than sequential multiplication thanks to
3757  * Karatsuba multiplication.
3758  */
3759 static PyObject *
3760 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3761 {
3762     if (k == 0) {
3763         return PyLong_FromLong(1);
3764     }
3765     if (k == 1) {
3766         return Py_NewRef(n);
3767     }
3768
3769     /* P(n, k) = P(n, j) * P(n-j, k-j) */
3770     /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3771     unsigned long long j = k / 2;
3772     PyObject *a, *b;
3773     a = perm_comb(n, j, iscomb);
3774     if (a == NULL) {
3775         return NULL;
3776     }
3777     PyObject *t = PyLong_FromUnsignedLongLong(j);
3778     if (t == NULL) {
3779         goto error;
3780     }
3781     n = PyNumber_Subtract(n, t);
3782     Py_DECREF(t);
3783     if (n == NULL) {
3784         goto error;
3785     }
3786     b = perm_comb(n, k - j, iscomb);
3787     Py_DECREF(n);
3788     if (b == NULL) {
3789         goto error;
3790     }
3791     Py_SETREF(a, PyNumber_Multiply(a, b));
3792     Py_DECREF(b);
3793     if (iscomb && a != NULL) {
3794         b = perm_comb_small(k, j, 1);
3795         if (b == NULL) {
3796             goto error;
3797         }
3798         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3799         Py_DECREF(b);
3800     }
3801     return a;
3802
3803 error:
3804     Py_DECREF(a);
3805     return NULL;
3806 }
3807
3808 /*[clinic input]
3809 math.perm
3810
3811     n: object
3812     k: object = None
3813     /
3814
3815 Number of ways to choose k items from n items without repetition and with order.
3816
3817 Evaluates to n! / (n - k)! when k <= n and evaluates
3818 to zero when k > n.
3819
3820 If k is not specified or is None, then k defaults to n
3821 and the function returns n!.
3822
3823 Raises TypeError if either of the arguments are not integers.
3824 Raises ValueError if either of the arguments are negative.
3825 [clinic start generated code]*/
3826
3827 static PyObject *
3828 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3829 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3830 {
3831     PyObject *result = NULL;
3832     int overflow, cmp;
3833     long long ki, ni;
3834
3835     if (k == Py_None) {
3836         return math_factorial(module, n);
3837     }
3838     n = PyNumber_Index(n);
3839     if (n == NULL) {
3840         return NULL;
3841     }
3842     k = PyNumber_Index(k);
3843     if (k == NULL) {
3844         Py_DECREF(n);
3845         return NULL;
3846     }
3847     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3848
3849     if (_PyLong_IsNegative((PyLongObject *)n)) {
3850         PyErr_SetString(PyExc_ValueError,
3851                         "n must be a non-negative integer");
3852         goto error;
3853     }
3854     if (_PyLong_IsNegative((PyLongObject *)k)) {
3855         PyErr_SetString(PyExc_ValueError,
3856                         "k must be a non-negative integer");
3857         goto error;
3858     }
3859
3860     cmp = PyObject_RichCompareBool(n, k, Py_LT);
3861     if (cmp != 0) {
3862         if (cmp > 0) {
3863             result = PyLong_FromLong(0);
3864             goto done;
3865         }
3866         goto error;
3867     }
3868
3869     ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3870     assert(overflow >= 0 && !PyErr_Occurred());
3871     if (overflow > 0) {
3872         PyErr_Format(PyExc_OverflowError,
3873                      "k must not exceed %lld",
3874                      LLONG_MAX);
3875         goto error;
3876     }
3877     assert(ki >= 0);
3878
3879     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3880     assert(overflow >= 0 && !PyErr_Occurred());
3881     if (!overflow && ki > 1) {
3882         assert(ni >= 0);
3883         result = perm_comb_small((unsigned long long)ni,
3884                                  (unsigned long long)ki, 0);
3885     }
3886     else {
3887         result = perm_comb(n, (unsigned long long)ki, 0);
3888     }
3889
3890 done:
3891     Py_DECREF(n);
3892     Py_DECREF(k);
3893     return result;
3894
3895 error:
3896     Py_DECREF(n);
3897     Py_DECREF(k);
3898     return NULL;
3899 }
3900
3901 /*[clinic input]
3902 math.comb
3903
3904     n: object
3905     k: object
3906     /
3907
3908 Number of ways to choose k items from n items without repetition and without order.
3909
3910 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3911 to zero when k > n.
3912
3913 Also called the binomial coefficient because it is equivalent
3914 to the coefficient of k-th term in polynomial expansion of the
3915 expression (1 + x)**n.
3916
3917 Raises TypeError if either of the arguments are not integers.
3918 Raises ValueError if either of the arguments are negative.
3919
3920 [clinic start generated code]*/
3921
3922 static PyObject *
3923 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3924 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3925 {
3926     PyObject *result = NULL, *temp;
3927     int overflow, cmp;
3928     long long ki, ni;
3929
3930     n = PyNumber_Index(n);
3931     if (n == NULL) {
3932         return NULL;
3933     }
3934     k = PyNumber_Index(k);
3935     if (k == NULL) {
3936         Py_DECREF(n);
3937         return NULL;
3938     }
3939     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3940
3941     if (_PyLong_IsNegative((PyLongObject *)n)) {
3942         PyErr_SetString(PyExc_ValueError,
3943                         "n must be a non-negative integer");
3944         goto error;
3945     }
3946     if (_PyLong_IsNegative((PyLongObject *)k)) {
3947         PyErr_SetString(PyExc_ValueError,
3948                         "k must be a non-negative integer");
3949         goto error;
3950     }
3951
3952     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3953     assert(overflow >= 0 && !PyErr_Occurred());
3954     if (!overflow) {
3955         assert(ni >= 0);
3956         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3957         assert(overflow >= 0 && !PyErr_Occurred());
3958         if (overflow || ki > ni) {
3959             result = PyLong_FromLong(0);
3960             goto done;
3961         }
3962         assert(ki >= 0);
3963
3964         ki = Py_MIN(ki, ni - ki);
3965         if (ki > 1) {
3966             result = perm_comb_small((unsigned long long)ni,
3967                                      (unsigned long long)ki, 1);
3968             goto done;
3969         }
3970         /* For k == 1 just return the original n in perm_comb(). */
3971     }
3972     else {
3973         /* k = min(k, n - k) */
3974         temp = PyNumber_Subtract(n, k);
3975         if (temp == NULL) {
3976             goto error;
3977         }
3978         assert(PyLong_Check(temp));
3979         if (_PyLong_IsNegative((PyLongObject *)temp)) {
3980             Py_DECREF(temp);
3981             result = PyLong_FromLong(0);
3982             goto done;
3983         }
3984         cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3985         if (cmp > 0) {
3986             Py_SETREF(k, temp);
3987         }
3988         else {
3989             Py_DECREF(temp);
3990             if (cmp < 0) {
3991                 goto error;
3992             }
3993         }
3994
3995         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3996         assert(overflow >= 0 && !PyErr_Occurred());
3997         if (overflow) {
3998             PyErr_Format(PyExc_OverflowError,
3999                          "min(n - k, k) must not exceed %lld",
4000                          LLONG_MAX);
4001             goto error;
4002         }
4003         assert(ki >= 0);
4004     }
4005
4006     result = perm_comb(n, (unsigned long long)ki, 1);
4007
4008 done:
4009     Py_DECREF(n);
4010     Py_DECREF(k);
4011     return result;
4012
4013 error:
4014     Py_DECREF(n);
4015     Py_DECREF(k);
4016     return NULL;
4017 }
4018
4019
4020 /*[clinic input]
4021 math.nextafter
4022
4023     x: double
4024     y: double
4025     /
4026     *
4027     steps: object = None
4028
4029 Return the floating-point value the given number of steps after x towards y.
4030
4031 If steps is not specified or is None, it defaults to 1.
4032
4033 Raises a TypeError, if x or y is not a double, or if steps is not an integer.
4034 Raises ValueError if steps is negative.
4035 [clinic start generated code]*/
4036
4037 static PyObject *
4038 math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
4039 /*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
4040 {
4041 #if defined(_AIX)
4042     if (x == y) {
4043         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
4044            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
4045         return PyFloat_FromDouble(y);
4046     }
4047     if (isnan(x)) {
4048         return PyFloat_FromDouble(x);
4049     }
4050     if (isnan(y)) {
4051         return PyFloat_FromDouble(y);
4052     }
4053 #endif
4054     if (steps == Py_None) {
4055         // fast path: we default to one step.
4056         return PyFloat_FromDouble(nextafter(x, y));
4057     }
4058     steps = PyNumber_Index(steps);
4059     if (steps == NULL) {
4060         return NULL;
4061     }
4062     assert(PyLong_CheckExact(steps));
4063     if (_PyLong_IsNegative((PyLongObject *)steps)) {
4064         PyErr_SetString(PyExc_ValueError,
4065                         "steps must be a non-negative integer");
4066         Py_DECREF(steps);
4067         return NULL;
4068     }
4069
4070     unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
4071     // Conveniently, uint64_t and double have the same number of bits
4072     // on all the platforms we care about.
4073     // So if an overflow occurs, we can just use UINT64_MAX.
4074     Py_DECREF(steps);
4075     if (usteps_ull >= UINT64_MAX) {
4076         // This branch includes the case where an error occurred, since
4077         // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
4078         // usteps_ull can be strictly larger than UINT64_MAX on a machine
4079         // where unsigned long long has width > 64 bits.
4080         if (PyErr_Occurred()) {
4081             if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
4082                 PyErr_Clear();
4083             }
4084             else {
4085                 return NULL;
4086             }
4087         }
4088         usteps_ull = UINT64_MAX;
4089     }
4090     assert(usteps_ull <= UINT64_MAX);
4091     uint64_t usteps = (uint64_t)usteps_ull;
4092
4093     if (usteps == 0) {
4094         return PyFloat_FromDouble(x);
4095     }
4096     if (isnan(x)) {
4097         return PyFloat_FromDouble(x);
4098     }
4099     if (isnan(y)) {
4100         return PyFloat_FromDouble(y);
4101     }
4102
4103     // We assume that double and uint64_t have the same endianness.
4104     // This is not guaranteed by the C-standard, but it is true for
4105     // all platforms we care about. (The most likely form of violation
4106     // would be a "mixed-endian" double.)
4107     union pun {double f; uint64_t i;};
4108     union pun ux = {x}, uy = {y};
4109     if (ux.i == uy.i) {
4110         return PyFloat_FromDouble(x);
4111     }
4112
4113     const uint64_t sign_bit = 1ULL<<63;
4114
4115     uint64_t ax = ux.i & ~sign_bit;
4116     uint64_t ay = uy.i & ~sign_bit;
4117
4118     // opposite signs
4119     if (((ux.i ^ uy.i) & sign_bit)) {
4120         // NOTE: ax + ay can never overflow, because their most significant bit
4121         // ain't set.
4122         if (ax + ay <= usteps) {
4123             return PyFloat_FromDouble(uy.f);
4124         // This comparison has to use <, because <= would get +0.0 vs -0.0
4125         // wrong.
4126         } else if (ax < usteps) {
4127             union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
4128             return PyFloat_FromDouble(result.f);
4129         } else {
4130             ux.i -= usteps;
4131             return PyFloat_FromDouble(ux.f);
4132         }
4133     // same sign
4134     } else if (ax > ay) {
4135         if (ax - ay >= usteps) {
4136             ux.i -= usteps;
4137             return PyFloat_FromDouble(ux.f);
4138         } else {
4139             return PyFloat_FromDouble(uy.f);
4140         }
4141     } else {
4142         if (ay - ax >= usteps) {
4143             ux.i += usteps;
4144             return PyFloat_FromDouble(ux.f);
4145         } else {
4146             return PyFloat_FromDouble(uy.f);
4147         }
4148     }
4149 }
4150
4151
4152 /*[clinic input]
4153 math.ulp -> double
4154
4155     x: double
4156     /
4157
4158 Return the value of the least significant bit of the float x.
4159 [clinic start generated code]*/
4160
4161 static double
4162 math_ulp_impl(PyObject *module, double x)
4163 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4164 {
4165     if (isnan(x)) {
4166         return x;
4167     }
4168     x = fabs(x);
4169     if (isinf(x)) {
4170         return x;
4171     }
4172     double inf = Py_INFINITY;
4173     double x2 = nextafter(x, inf);
4174     if (isinf(x2)) {
4175         /* special case: x is the largest positive representable float */
4176         x2 = nextafter(x, -inf);
4177         return x - x2;
4178     }
4179     return x2 - x;
4180 }
4181
4182 static int
4183 math_exec(PyObject *module)
4184 {
4185
4186     if (PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
4187         return -1;
4188     }
4189     if (PyModule_Add(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
4190         return -1;
4191     }
4192     // 2pi
4193     if (PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
4194         return -1;
4195     }
4196     if (PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
4197         return -1;
4198     }
4199     if (PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
4200         return -1;
4201     }
4202     return 0;
4203 }
4204
4205 static PyMethodDef math_methods[] = {
4206     {"acos",            math_acos,      METH_O,         math_acos_doc},
4207     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
4208     {"asin",            math_asin,      METH_O,         math_asin_doc},
4209     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
4210     {"atan",            math_atan,      METH_O,         math_atan_doc},
4211     {"atan2",           _PyCFunction_CAST(math_atan2),     METH_FASTCALL,  math_atan2_doc},
4212     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
4213     {"cbrt",            math_cbrt,      METH_O,         math_cbrt_doc},
4214     MATH_CEIL_METHODDEF
4215     {"copysign",        _PyCFunction_CAST(math_copysign),  METH_FASTCALL,  math_copysign_doc},
4216     {"cos",             math_cos,       METH_O,         math_cos_doc},
4217     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
4218     MATH_DEGREES_METHODDEF
4219     MATH_DIST_METHODDEF
4220     {"erf",             math_erf,       METH_O,         math_erf_doc},
4221     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
4222     {"exp",             math_exp,       METH_O,         math_exp_doc},
4223     {"exp2",            math_exp2,      METH_O,         math_exp2_doc},
4224     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
4225     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
4226     MATH_FACTORIAL_METHODDEF
4227     MATH_FLOOR_METHODDEF
4228     MATH_FMA_METHODDEF
4229     MATH_FMAX_METHODDEF
4230     MATH_FMOD_METHODDEF
4231     MATH_FMIN_METHODDEF
4232     MATH_FREXP_METHODDEF
4233     MATH_FSUM_METHODDEF
4234     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
4235     MATH_GCD_METHODDEF
4236     MATH_HYPOT_METHODDEF
4237     MATH_ISCLOSE_METHODDEF
4238     MATH_ISFINITE_METHODDEF
4239     MATH_ISNORMAL_METHODDEF
4240     MATH_ISSUBNORMAL_METHODDEF
4241     MATH_ISINF_METHODDEF
4242     MATH_ISNAN_METHODDEF
4243     MATH_ISQRT_METHODDEF
4244     MATH_LCM_METHODDEF
4245     MATH_LDEXP_METHODDEF
4246     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
4247     {"log",             _PyCFunction_CAST(math_log),       METH_FASTCALL,  math_log_doc},
4248     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
4249     MATH_LOG10_METHODDEF
4250     MATH_LOG2_METHODDEF
4251     MATH_MODF_METHODDEF
4252     MATH_POW_METHODDEF
4253     MATH_RADIANS_METHODDEF
4254     {"remainder",       _PyCFunction_CAST(math_remainder), METH_FASTCALL,  math_remainder_doc},
4255     MATH_SIGNBIT_METHODDEF
4256     {"sin",             math_sin,       METH_O,         math_sin_doc},
4257     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
4258     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
4259     {"tan",             math_tan,       METH_O,         math_tan_doc},
4260     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
4261     MATH_SUMPROD_METHODDEF
4262     MATH_TRUNC_METHODDEF
4263     MATH_PROD_METHODDEF
4264     MATH_PERM_METHODDEF
4265     MATH_COMB_METHODDEF
4266     MATH_NEXTAFTER_METHODDEF
4267     MATH_ULP_METHODDEF
4268     {NULL,              NULL}           /* sentinel */
4269 };
4270
4271 static PyModuleDef_Slot math_slots[] = {
4272     {Py_mod_exec, math_exec},
4273     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
4274     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
4275     {0, NULL}
4276 };
4277
4278 PyDoc_STRVAR(module_doc,
4279 "This module provides access to the mathematical functions\n"
4280 "defined by the C standard.");
4281
4282 static struct PyModuleDef mathmodule = {
4283     PyModuleDef_HEAD_INIT,
4284     .m_name = "math",
4285     .m_doc = module_doc,
4286     .m_size = 0,
4287     .m_methods = math_methods,
4288     .m_slots = math_slots,
4289 };
4290
4291 PyMODINIT_FUNC
4292 PyInit_math(void)
4293 {
4294     return PyModuleDef_Init(&mathmodule);
4295 }