Modules/mathmodule.c

   1 /* Math module -- standard C math library functions, pi and e */
   2
   3 /* Here are some comments from Tim Peters, extracted from the
   4    discussion attached to http://bugs.python.org/issue1640.  They
   5    describe the general aims of the math module with respect to
   6    special values, IEEE-754 floating-point exceptions, and Python
   7    exceptions.
   8
   9 These are the "spirit of 754" rules:
  10
  11 1. If the mathematical result is a real number, but of magnitude too
  12 large to approximate by a machine float, overflow is signaled and the
  13 result is an infinity (with the appropriate sign).
  14
  15 2. If the mathematical result is a real number, but of magnitude too
  16 small to approximate by a machine float, underflow is signaled and the
  17 result is a zero (with the appropriate sign).
  18
  19 3. At a singularity (a value x such that the limit of f(y) as y
  20 approaches x exists and is an infinity), "divide by zero" is signaled
  21 and the result is an infinity (with the appropriate sign).  This is
  22 complicated a little by that the left-side and right-side limits may
  23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
  24 from the positive or negative directions.  In that specific case, the
  25 sign of the zero determines the result of 1/0.
  26
  27 4. At a point where a function has no defined result in the extended
  28 reals (i.e., the reals plus an infinity or two), invalid operation is
  29 signaled and a NaN is returned.
  30
  31 And these are what Python has historically /tried/ to do (but not
  32 always successfully, as platform libm behavior varies a lot):
  33
  34 For #1, raise OverflowError.
  35
  36 For #2, return a zero (with the appropriate sign if that happens by
  37 accident ;-)).
  38
  39 For #3 and #4, raise ValueError.  It may have made sense to raise
  40 Python's ZeroDivisionError in #3, but historically that's only been
  41 raised for division by zero and mod by zero.
  42
  43 */
  44
  45 /*
  46    In general, on an IEEE-754 platform the aim is to follow the C99
  47    standard, including Annex 'F', whenever possible.  Where the
  48    standard recommends raising the 'divide-by-zero' or 'invalid'
  49    floating-point exceptions, Python should raise a ValueError.  Where
  50    the standard recommends raising 'overflow', Python should raise an
  51    OverflowError.  In all other circumstances a value should be
  52    returned.
  53  */
  54
  55 #ifndef Py_BUILD_CORE_BUILTIN
  56 #  define Py_BUILD_CORE_MODULE 1
  57 #endif
  58
  59 #include "Python.h"
  60 #include "pycore_bitutils.h"      // _Py_bit_length()
  61 #include "pycore_call.h"          // _PyObject_CallNoArgs()
  62 #include "pycore_dtoa.h"          // _Py_dg_infinity()
  63 #include "pycore_long.h"          // _PyLong_GetZero()
  64 #include "pycore_moduleobject.h"  // _PyModule_GetState()
  65 #include "pycore_object.h"        // _PyObject_LookupSpecial()
  66 #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
  67 /* For DBL_EPSILON in _math.h */
  68 #include <float.h>
  69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
  70 #include "_math.h"
  71 #include <stdbool.h>
  72
  73 #include "clinic/mathmodule.c.h"
  74
  75 /*[clinic input]
  76 module math
  77 [clinic start generated code]*/
  78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
  79
  80
  81 typedef struct {
  82     PyObject *str___ceil__;
  83     PyObject *str___floor__;
  84     PyObject *str___trunc__;
  85 } math_module_state;
  86
  87 static inline math_module_state*
  88 get_math_module_state(PyObject *module)
  89 {
  90     void *state = _PyModule_GetState(module);
  91     assert(state != NULL);
  92     return (math_module_state *)state;
  93 }
  94
  95 /*
  96 Double and triple length extended precision algorithms from:
  97
  98   Accurate Sum and Dot Product
  99   by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
 100   https://doi.org/10.1137/030601818
 101   https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
 102
 103 */
 104
 105 typedef struct{ double hi; double lo; } DoubleLength;
 106
 107 static DoubleLength
 108 dl_fast_sum(double a, double b)
 109 {
 110     /* Algorithm 1.1. Compensated summation of two floating point numbers. */
 111     assert(fabs(a) >= fabs(b));
 112     double x = a + b;
 113     double y = (a - x) + b;
 114     return (DoubleLength) {x, y};
 115 }
 116
 117 static DoubleLength
 118 dl_sum(double a, double b)
 119 {
 120     /* Algorithm 3.1 Error-free transformation of the sum */
 121     double x = a + b;
 122     double z = x - a;
 123     double y = (a - (x - z)) + (b - z);
 124     return (DoubleLength) {x, y};
 125 }
 126
 127 #ifndef UNRELIABLE_FMA
 128
 129 static DoubleLength
 130 dl_mul(double x, double y)
 131 {
 132     /* Algorithm 3.5. Error-free transformation of a product */
 133     double z = x * y;
 134     double zz = fma(x, y, -z);
 135     return (DoubleLength) {z, zz};
 136 }
 137
 138 #else
 139
 140 /*
 141    The default implementation of dl_mul() depends on the C math library
 142    having an accurate fma() function as required by § 7.12.13.1 of the
 143    C99 standard.
 144
 145    The UNRELIABLE_FMA option is provided as a slower but accurate
 146    alternative for builds where the fma() function is found wanting.
 147    The speed penalty may be modest (17% slower on an Apple M1 Max),
 148    so don't hesitate to enable this build option.
 149
 150    The algorithms are from the T. J. Dekker paper:
 151    A Floating-Point Technique for Extending the Available Precision
 152    https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
 153 */
 154
 155 static DoubleLength
 156 dl_split(double x) {
 157     // Dekker (5.5) and (5.6).
 158     double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
 159     double hi = t - (t - x);
 160     double lo = x - hi;
 161     return (DoubleLength) {hi, lo};
 162 }
 163
 164 static DoubleLength
 165 dl_mul(double x, double y)
 166 {
 167     // Dekker (5.12) and mul12()
 168     DoubleLength xx = dl_split(x);
 169     DoubleLength yy = dl_split(y);
 170     double p = xx.hi * yy.hi;
 171     double q = xx.hi * yy.lo + xx.lo * yy.hi;
 172     double z = p + q;
 173     double zz = p - z + q + xx.lo * yy.lo;
 174     return (DoubleLength) {z, zz};
 175 }
 176
 177 #endif
 178
 179 typedef struct { double hi; double lo; double tiny; } TripleLength;
 180
 181 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
 182
 183 static TripleLength
 184 tl_fma(double x, double y, TripleLength total)
 185 {
 186     /* Algorithm 5.10 with SumKVert for K=3 */
 187     DoubleLength pr = dl_mul(x, y);
 188     DoubleLength sm = dl_sum(total.hi, pr.hi);
 189     DoubleLength r1 = dl_sum(total.lo, pr.lo);
 190     DoubleLength r2 = dl_sum(r1.hi, sm.lo);
 191     return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
 192 }
 193
 194 static double
 195 tl_to_d(TripleLength total)
 196 {
 197     DoubleLength last = dl_sum(total.lo, total.hi);
 198     return total.tiny + last.lo + last.hi;
 199 }
 200
 201
 202 /*
 203    sin(pi*x), giving accurate results for all finite x (especially x
 204    integral or close to an integer).  This is here for use in the
 205    reflection formula for the gamma function.  It conforms to IEEE
 206    754-2008 for finite arguments, but not for infinities or nans.
 207 */
 208
 209 static const double pi = 3.141592653589793238462643383279502884197;
 210 static const double logpi = 1.144729885849400174143427351353058711647;
 211
 212 /* Version of PyFloat_AsDouble() with in-line fast paths
 213    for exact floats and integers.  Gives a substantial
 214    speed improvement for extracting float arguments.
 215 */
 216
 217 #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
 218     if (PyFloat_CheckExact(obj)) {                         \
 219         target_var = PyFloat_AS_DOUBLE(obj);               \
 220     }                                                      \
 221     else if (PyLong_CheckExact(obj)) {                     \
 222         target_var = PyLong_AsDouble(obj);                 \
 223         if (target_var == -1.0 && PyErr_Occurred()) {      \
 224             goto error_label;                              \
 225         }                                                  \
 226     }                                                      \
 227     else {                                                 \
 228         target_var = PyFloat_AsDouble(obj);                \
 229         if (target_var == -1.0 && PyErr_Occurred()) {      \
 230             goto error_label;                              \
 231         }                                                  \
 232     }
 233
 234 static double
 235 m_sinpi(double x)
 236 {
 237     double y, r;
 238     int n;
 239     /* this function should only ever be called for finite arguments */
 240     assert(Py_IS_FINITE(x));
 241     y = fmod(fabs(x), 2.0);
 242     n = (int)round(2.0*y);
 243     assert(0 <= n && n <= 4);
 244     switch (n) {
 245     case 0:
 246         r = sin(pi*y);
 247         break;
 248     case 1:
 249         r = cos(pi*(y-0.5));
 250         break;
 251     case 2:
 252         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
 253            -0.0 instead of 0.0 when y == 1.0. */
 254         r = sin(pi*(1.0-y));
 255         break;
 256     case 3:
 257         r = -cos(pi*(y-1.5));
 258         break;
 259     case 4:
 260         r = sin(pi*(y-2.0));
 261         break;
 262     default:
 263         Py_UNREACHABLE();
 264     }
 265     return copysign(1.0, x)*r;
 266 }
 267
 268 /* Implementation of the real gamma function.  Kept here to work around
 269    issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
 270    on various platforms (Windows, MacOS).  In extensive but non-exhaustive
 271    random tests, this function proved accurate to within <= 10 ulps across the
 272    entire float domain.  Note that accuracy may depend on the quality of the
 273    system math functions, the pow function in particular.  Special cases
 274    follow C99 annex F.  The parameters and method are tailored to platforms
 275    whose double format is the IEEE 754 binary64 format.
 276
 277    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
 278    and g=6.024680040776729583740234375; these parameters are amongst those
 279    used by the Boost library.  Following Boost (again), we re-express the
 280    Lanczos sum as a rational function, and compute it that way.  The
 281    coefficients below were computed independently using MPFR, and have been
 282    double-checked against the coefficients in the Boost source code.
 283
 284    For x < 0.0 we use the reflection formula.
 285
 286    There's one minor tweak that deserves explanation: Lanczos' formula for
 287    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
 288    values, x+g-0.5 can be represented exactly.  However, in cases where it
 289    can't be represented exactly the small error in x+g-0.5 can be magnified
 290    significantly by the pow and exp calls, especially for large x.  A cheap
 291    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
 292    involved in the computation of x+g-0.5 (that is, e = computed value of
 293    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
 294
 295    Correction factor
 296    -----------------
 297    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
 298    double, and e is tiny.  Then:
 299
 300      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
 301      = pow(y, x-0.5)/exp(y) * C,
 302
 303    where the correction_factor C is given by
 304
 305      C = pow(1-e/y, x-0.5) * exp(e)
 306
 307    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
 308
 309      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
 310
 311    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
 312
 313      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
 314
 315    Note that for accuracy, when computing r*C it's better to do
 316
 317      r + e*g/y*r;
 318
 319    than
 320
 321      r * (1 + e*g/y);
 322
 323    since the addition in the latter throws away most of the bits of
 324    information in e*g/y.
 325 */
 326
 327 #define LANCZOS_N 13
 328 static const double lanczos_g = 6.024680040776729583740234375;
 329 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
 330 static const double lanczos_num_coeffs[LANCZOS_N] = {
 331     23531376880.410759688572007674451636754734846804940,
 332     42919803642.649098768957899047001988850926355848959,
 333     35711959237.355668049440185451547166705960488635843,
 334     17921034426.037209699919755754458931112671403265390,
 335     6039542586.3520280050642916443072979210699388420708,
 336     1439720407.3117216736632230727949123939715485786772,
 337     248874557.86205415651146038641322942321632125127801,
 338     31426415.585400194380614231628318205362874684987640,
 339     2876370.6289353724412254090516208496135991145378768,
 340     186056.26539522349504029498971604569928220784236328,
 341     8071.6720023658162106380029022722506138218516325024,
 342     210.82427775157934587250973392071336271166969580291,
 343     2.5066282746310002701649081771338373386264310793408
 344 };
 345
 346 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
 347 static const double lanczos_den_coeffs[LANCZOS_N] = {
 348     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
 349     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
 350
 351 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
 352 #define NGAMMA_INTEGRAL 23
 353 static const double gamma_integral[NGAMMA_INTEGRAL] = {
 354     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
 355     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
 356     1307674368000.0, 20922789888000.0, 355687428096000.0,
 357     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
 358     51090942171709440000.0, 1124000727777607680000.0,
 359 };
 360
 361 /* Lanczos' sum L_g(x), for positive x */
 362
 363 static double
 364 lanczos_sum(double x)
 365 {
 366     double num = 0.0, den = 0.0;
 367     int i;
 368     assert(x > 0.0);
 369     /* evaluate the rational function lanczos_sum(x).  For large
 370        x, the obvious algorithm risks overflow, so we instead
 371        rescale the denominator and numerator of the rational
 372        function by x**(1-LANCZOS_N) and treat this as a
 373        rational function in 1/x.  This also reduces the error for
 374        larger x values.  The choice of cutoff point (5.0 below) is
 375        somewhat arbitrary; in tests, smaller cutoff values than
 376        this resulted in lower accuracy. */
 377     if (x < 5.0) {
 378         for (i = LANCZOS_N; --i >= 0; ) {
 379             num = num * x + lanczos_num_coeffs[i];
 380             den = den * x + lanczos_den_coeffs[i];
 381         }
 382     }
 383     else {
 384         for (i = 0; i < LANCZOS_N; i++) {
 385             num = num / x + lanczos_num_coeffs[i];
 386             den = den / x + lanczos_den_coeffs[i];
 387         }
 388     }
 389     return num/den;
 390 }
 391
 392 /* Constant for +infinity, generated in the same way as float('inf'). */
 393
 394 static double
 395 m_inf(void)
 396 {
 397 #if _PY_SHORT_FLOAT_REPR == 1
 398     return _Py_dg_infinity(0);
 399 #else
 400     return Py_HUGE_VAL;
 401 #endif
 402 }
 403
 404 /* Constant nan value, generated in the same way as float('nan'). */
 405 /* We don't currently assume that Py_NAN is defined everywhere. */
 406
 407 #if _PY_SHORT_FLOAT_REPR == 1
 408
 409 static double
 410 m_nan(void)
 411 {
 412 #if _PY_SHORT_FLOAT_REPR == 1
 413     return _Py_dg_stdnan(0);
 414 #else
 415     return Py_NAN;
 416 #endif
 417 }
 418
 419 #endif
 420
 421 static double
 422 m_tgamma(double x)
 423 {
 424     double absx, r, y, z, sqrtpow;
 425
 426     /* special cases */
 427     if (!Py_IS_FINITE(x)) {
 428         if (Py_IS_NAN(x) || x > 0.0)
 429             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
 430         else {
 431             errno = EDOM;
 432             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
 433         }
 434     }
 435     if (x == 0.0) {
 436         errno = EDOM;
 437         /* tgamma(+-0.0) = +-inf, divide-by-zero */
 438         return copysign(Py_HUGE_VAL, x);
 439     }
 440
 441     /* integer arguments */
 442     if (x == floor(x)) {
 443         if (x < 0.0) {
 444             errno = EDOM;  /* tgamma(n) = nan, invalid for */
 445             return Py_NAN; /* negative integers n */
 446         }
 447         if (x <= NGAMMA_INTEGRAL)
 448             return gamma_integral[(int)x - 1];
 449     }
 450     absx = fabs(x);
 451
 452     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
 453     if (absx < 1e-20) {
 454         r = 1.0/x;
 455         if (Py_IS_INFINITY(r))
 456             errno = ERANGE;
 457         return r;
 458     }
 459
 460     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
 461        x > 200, and underflows to +-0.0 for x < -200, not a negative
 462        integer. */
 463     if (absx > 200.0) {
 464         if (x < 0.0) {
 465             return 0.0/m_sinpi(x);
 466         }
 467         else {
 468             errno = ERANGE;
 469             return Py_HUGE_VAL;
 470         }
 471     }
 472
 473     y = absx + lanczos_g_minus_half;
 474     /* compute error in sum */
 475     if (absx > lanczos_g_minus_half) {
 476         /* note: the correction can be foiled by an optimizing
 477            compiler that (incorrectly) thinks that an expression like
 478            a + b - a - b can be optimized to 0.0.  This shouldn't
 479            happen in a standards-conforming compiler. */
 480         double q = y - absx;
 481         z = q - lanczos_g_minus_half;
 482     }
 483     else {
 484         double q = y - lanczos_g_minus_half;
 485         z = q - absx;
 486     }
 487     z = z * lanczos_g / y;
 488     if (x < 0.0) {
 489         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
 490         r -= z * r;
 491         if (absx < 140.0) {
 492             r /= pow(y, absx - 0.5);
 493         }
 494         else {
 495             sqrtpow = pow(y, absx / 2.0 - 0.25);
 496             r /= sqrtpow;
 497             r /= sqrtpow;
 498         }
 499     }
 500     else {
 501         r = lanczos_sum(absx) / exp(y);
 502         r += z * r;
 503         if (absx < 140.0) {
 504             r *= pow(y, absx - 0.5);
 505         }
 506         else {
 507             sqrtpow = pow(y, absx / 2.0 - 0.25);
 508             r *= sqrtpow;
 509             r *= sqrtpow;
 510         }
 511     }
 512     if (Py_IS_INFINITY(r))
 513         errno = ERANGE;
 514     return r;
 515 }
 516
 517 /*
 518    lgamma:  natural log of the absolute value of the Gamma function.
 519    For large arguments, Lanczos' formula works extremely well here.
 520 */
 521
 522 static double
 523 m_lgamma(double x)
 524 {
 525     double r;
 526     double absx;
 527
 528     /* special cases */
 529     if (!Py_IS_FINITE(x)) {
 530         if (Py_IS_NAN(x))
 531             return x;  /* lgamma(nan) = nan */
 532         else
 533             return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
 534     }
 535
 536     /* integer arguments */
 537     if (x == floor(x) && x <= 2.0) {
 538         if (x <= 0.0) {
 539             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
 540             return Py_HUGE_VAL; /* integers n <= 0 */
 541         }
 542         else {
 543             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
 544         }
 545     }
 546
 547     absx = fabs(x);
 548     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
 549     if (absx < 1e-20)
 550         return -log(absx);
 551
 552     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
 553        having a second set of numerator coefficients for lanczos_sum that
 554        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
 555        subtraction below; it's probably not worth it. */
 556     r = log(lanczos_sum(absx)) - lanczos_g;
 557     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
 558     if (x < 0.0)
 559         /* Use reflection formula to get value for negative x. */
 560         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
 561     if (Py_IS_INFINITY(r))
 562         errno = ERANGE;
 563     return r;
 564 }
 565
 566 /*
 567    wrapper for atan2 that deals directly with special cases before
 568    delegating to the platform libm for the remaining cases.  This
 569    is necessary to get consistent behaviour across platforms.
 570    Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
 571    always follow C99.
 572 */
 573
 574 static double
 575 m_atan2(double y, double x)
 576 {
 577     if (Py_IS_NAN(x) || Py_IS_NAN(y))
 578         return Py_NAN;
 579     if (Py_IS_INFINITY(y)) {
 580         if (Py_IS_INFINITY(x)) {
 581             if (copysign(1., x) == 1.)
 582                 /* atan2(+-inf, +inf) == +-pi/4 */
 583                 return copysign(0.25*Py_MATH_PI, y);
 584             else
 585                 /* atan2(+-inf, -inf) == +-pi*3/4 */
 586                 return copysign(0.75*Py_MATH_PI, y);
 587         }
 588         /* atan2(+-inf, x) == +-pi/2 for finite x */
 589         return copysign(0.5*Py_MATH_PI, y);
 590     }
 591     if (Py_IS_INFINITY(x) || y == 0.) {
 592         if (copysign(1., x) == 1.)
 593             /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
 594             return copysign(0., y);
 595         else
 596             /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
 597             return copysign(Py_MATH_PI, y);
 598     }
 599     return atan2(y, x);
 600 }
 601
 602
 603 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
 604    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
 605    binary floating-point format, the result is always exact. */
 606
 607 static double
 608 m_remainder(double x, double y)
 609 {
 610     /* Deal with most common case first. */
 611     if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
 612         double absx, absy, c, m, r;
 613
 614         if (y == 0.0) {
 615             return Py_NAN;
 616         }
 617
 618         absx = fabs(x);
 619         absy = fabs(y);
 620         m = fmod(absx, absy);
 621
 622         /*
 623            Warning: some subtlety here. What we *want* to know at this point is
 624            whether the remainder m is less than, equal to, or greater than half
 625            of absy. However, we can't do that comparison directly because we
 626            can't be sure that 0.5*absy is representable (the multiplication
 627            might incur precision loss due to underflow). So instead we compare
 628            m with the complement c = absy - m: m < 0.5*absy if and only if m <
 629            c, and so on. The catch is that absy - m might also not be
 630            representable, but it turns out that it doesn't matter:
 631
 632            - if m > 0.5*absy then absy - m is exactly representable, by
 633              Sterbenz's lemma, so m > c
 634            - if m == 0.5*absy then again absy - m is exactly representable
 635              and m == c
 636            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
 637              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
 638              c, or (ii) absy is tiny, either subnormal or in the lowest normal
 639              binade. Then absy - m is exactly representable and again m < c.
 640         */
 641
 642         c = absy - m;
 643         if (m < c) {
 644             r = m;
 645         }
 646         else if (m > c) {
 647             r = -c;
 648         }
 649         else {
 650             /*
 651                Here absx is exactly halfway between two multiples of absy,
 652                and we need to choose the even multiple. x now has the form
 653
 654                    absx = n * absy + m
 655
 656                for some integer n (recalling that m = 0.5*absy at this point).
 657                If n is even we want to return m; if n is odd, we need to
 658                return -m.
 659
 660                So
 661
 662                    0.5 * (absx - m) = (n/2) * absy
 663
 664                and now reducing modulo absy gives us:
 665
 666                                                   | m, if n is odd
 667                    fmod(0.5 * (absx - m), absy) = |
 668                                                   | 0, if n is even
 669
 670                Now m - 2.0 * fmod(...) gives the desired result: m
 671                if n is even, -m if m is odd.
 672
 673                Note that all steps in fmod(0.5 * (absx - m), absy)
 674                will be computed exactly, with no rounding error
 675                introduced.
 676             */
 677             assert(m == c);
 678             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
 679         }
 680         return copysign(1.0, x) * r;
 681     }
 682
 683     /* Special values. */
 684     if (Py_IS_NAN(x)) {
 685         return x;
 686     }
 687     if (Py_IS_NAN(y)) {
 688         return y;
 689     }
 690     if (Py_IS_INFINITY(x)) {
 691         return Py_NAN;
 692     }
 693     assert(Py_IS_INFINITY(y));
 694     return x;
 695 }
 696
 697
 698 /*
 699     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
 700     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
 701     special values directly, passing positive non-special values through to
 702     the system log/log10.
 703  */
 704
 705 static double
 706 m_log(double x)
 707 {
 708     if (Py_IS_FINITE(x)) {
 709         if (x > 0.0)
 710             return log(x);
 711         errno = EDOM;
 712         if (x == 0.0)
 713             return -Py_HUGE_VAL; /* log(0) = -inf */
 714         else
 715             return Py_NAN; /* log(-ve) = nan */
 716     }
 717     else if (Py_IS_NAN(x))
 718         return x; /* log(nan) = nan */
 719     else if (x > 0.0)
 720         return x; /* log(inf) = inf */
 721     else {
 722         errno = EDOM;
 723         return Py_NAN; /* log(-inf) = nan */
 724     }
 725 }
 726
 727 /*
 728    log2: log to base 2.
 729
 730    Uses an algorithm that should:
 731
 732      (a) produce exact results for powers of 2, and
 733      (b) give a monotonic log2 (for positive finite floats),
 734          assuming that the system log is monotonic.
 735 */
 736
 737 static double
 738 m_log2(double x)
 739 {
 740     if (!Py_IS_FINITE(x)) {
 741         if (Py_IS_NAN(x))
 742             return x; /* log2(nan) = nan */
 743         else if (x > 0.0)
 744             return x; /* log2(+inf) = +inf */
 745         else {
 746             errno = EDOM;
 747             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
 748         }
 749     }
 750
 751     if (x > 0.0) {
 752         return log2(x);
 753     }
 754     else if (x == 0.0) {
 755         errno = EDOM;
 756         return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
 757     }
 758     else {
 759         errno = EDOM;
 760         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
 761     }
 762 }
 763
 764 static double
 765 m_log10(double x)
 766 {
 767     if (Py_IS_FINITE(x)) {
 768         if (x > 0.0)
 769             return log10(x);
 770         errno = EDOM;
 771         if (x == 0.0)
 772             return -Py_HUGE_VAL; /* log10(0) = -inf */
 773         else
 774             return Py_NAN; /* log10(-ve) = nan */
 775     }
 776     else if (Py_IS_NAN(x))
 777         return x; /* log10(nan) = nan */
 778     else if (x > 0.0)
 779         return x; /* log10(inf) = inf */
 780     else {
 781         errno = EDOM;
 782         return Py_NAN; /* log10(-inf) = nan */
 783     }
 784 }
 785
 786
 787 static PyObject *
 788 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
 789 {
 790     PyObject *res, *x;
 791     Py_ssize_t i;
 792
 793     if (nargs == 0) {
 794         return PyLong_FromLong(0);
 795     }
 796     res = PyNumber_Index(args[0]);
 797     if (res == NULL) {
 798         return NULL;
 799     }
 800     if (nargs == 1) {
 801         Py_SETREF(res, PyNumber_Absolute(res));
 802         return res;
 803     }
 804
 805     PyObject *one = _PyLong_GetOne();  // borrowed ref
 806     for (i = 1; i < nargs; i++) {
 807         x = _PyNumber_Index(args[i]);
 808         if (x == NULL) {
 809             Py_DECREF(res);
 810             return NULL;
 811         }
 812         if (res == one) {
 813             /* Fast path: just check arguments.
 814                It is okay to use identity comparison here. */
 815             Py_DECREF(x);
 816             continue;
 817         }
 818         Py_SETREF(res, _PyLong_GCD(res, x));
 819         Py_DECREF(x);
 820         if (res == NULL) {
 821             return NULL;
 822         }
 823     }
 824     return res;
 825 }
 826
 827 PyDoc_STRVAR(math_gcd_doc,
 828 "gcd($module, *integers)\n"
 829 "--\n"
 830 "\n"
 831 "Greatest Common Divisor.");
 832
 833
 834 static PyObject *
 835 long_lcm(PyObject *a, PyObject *b)
 836 {
 837     PyObject *g, *m, *f, *ab;
 838
 839     if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
 840         return PyLong_FromLong(0);
 841     }
 842     g = _PyLong_GCD(a, b);
 843     if (g == NULL) {
 844         return NULL;
 845     }
 846     f = PyNumber_FloorDivide(a, g);
 847     Py_DECREF(g);
 848     if (f == NULL) {
 849         return NULL;
 850     }
 851     m = PyNumber_Multiply(f, b);
 852     Py_DECREF(f);
 853     if (m == NULL) {
 854         return NULL;
 855     }
 856     ab = PyNumber_Absolute(m);
 857     Py_DECREF(m);
 858     return ab;
 859 }
 860
 861
 862 static PyObject *
 863 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
 864 {
 865     PyObject *res, *x;
 866     Py_ssize_t i;
 867
 868     if (nargs == 0) {
 869         return PyLong_FromLong(1);
 870     }
 871     res = PyNumber_Index(args[0]);
 872     if (res == NULL) {
 873         return NULL;
 874     }
 875     if (nargs == 1) {
 876         Py_SETREF(res, PyNumber_Absolute(res));
 877         return res;
 878     }
 879
 880     PyObject *zero = _PyLong_GetZero();  // borrowed ref
 881     for (i = 1; i < nargs; i++) {
 882         x = PyNumber_Index(args[i]);
 883         if (x == NULL) {
 884             Py_DECREF(res);
 885             return NULL;
 886         }
 887         if (res == zero) {
 888             /* Fast path: just check arguments.
 889                It is okay to use identity comparison here. */
 890             Py_DECREF(x);
 891             continue;
 892         }
 893         Py_SETREF(res, long_lcm(res, x));
 894         Py_DECREF(x);
 895         if (res == NULL) {
 896             return NULL;
 897         }
 898     }
 899     return res;
 900 }
 901
 902
 903 PyDoc_STRVAR(math_lcm_doc,
 904 "lcm($module, *integers)\n"
 905 "--\n"
 906 "\n"
 907 "Least Common Multiple.");
 908
 909
 910 /* Call is_error when errno != 0, and where x is the result libm
 911  * returned.  is_error will usually set up an exception and return
 912  * true (1), but may return false (0) without setting up an exception.
 913  */
 914 static int
 915 is_error(double x)
 916 {
 917     int result = 1;     /* presumption of guilt */
 918     assert(errno);      /* non-zero errno is a precondition for calling */
 919     if (errno == EDOM)
 920         PyErr_SetString(PyExc_ValueError, "math domain error");
 921
 922     else if (errno == ERANGE) {
 923         /* ANSI C generally requires libm functions to set ERANGE
 924          * on overflow, but also generally *allows* them to set
 925          * ERANGE on underflow too.  There's no consistency about
 926          * the latter across platforms.
 927          * Alas, C99 never requires that errno be set.
 928          * Here we suppress the underflow errors (libm functions
 929          * should return a zero on underflow, and +- HUGE_VAL on
 930          * overflow, so testing the result for zero suffices to
 931          * distinguish the cases).
 932          *
 933          * On some platforms (Ubuntu/ia64) it seems that errno can be
 934          * set to ERANGE for subnormal results that do *not* underflow
 935          * to zero.  So to be safe, we'll ignore ERANGE whenever the
 936          * function result is less than 1.5 in absolute value.
 937          *
 938          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
 939          * are correctly detected, since the function may underflow
 940          * toward -1.0 rather than 0.0.
 941          */
 942         if (fabs(x) < 1.5)
 943             result = 0;
 944         else
 945             PyErr_SetString(PyExc_OverflowError,
 946                             "math range error");
 947     }
 948     else
 949         /* Unexpected math error */
 950         PyErr_SetFromErrno(PyExc_ValueError);
 951     return result;
 952 }
 953
 954 /*
 955    math_1 is used to wrap a libm function f that takes a double
 956    argument and returns a double.
 957
 958    The error reporting follows these rules, which are designed to do
 959    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
 960    platforms.
 961
 962    - a NaN result from non-NaN inputs causes ValueError to be raised
 963    - an infinite result from finite inputs causes OverflowError to be
 964      raised if can_overflow is 1, or raises ValueError if can_overflow
 965      is 0.
 966    - if the result is finite and errno == EDOM then ValueError is
 967      raised
 968    - if the result is finite and nonzero and errno == ERANGE then
 969      OverflowError is raised
 970
 971    The last rule is used to catch overflow on platforms which follow
 972    C89 but for which HUGE_VAL is not an infinity.
 973
 974    For the majority of one-argument functions these rules are enough
 975    to ensure that Python's functions behave as specified in 'Annex F'
 976    of the C99 standard, with the 'invalid' and 'divide-by-zero'
 977    floating-point exceptions mapping to Python's ValueError and the
 978    'overflow' floating-point exception mapping to OverflowError.
 979    math_1 only works for functions that don't have singularities *and*
 980    the possibility of overflow; fortunately, that covers everything we
 981    care about right now.
 982 */
 983
 984 static PyObject *
 985 math_1(PyObject *arg, double (*func) (double), int can_overflow)
 986 {
 987     double x, r;
 988     x = PyFloat_AsDouble(arg);
 989     if (x == -1.0 && PyErr_Occurred())
 990         return NULL;
 991     errno = 0;
 992     r = (*func)(x);
 993     if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
 994         PyErr_SetString(PyExc_ValueError,
 995                         "math domain error"); /* invalid arg */
 996         return NULL;
 997     }
 998     if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
 999         if (can_overflow)
1000             PyErr_SetString(PyExc_OverflowError,
1001                             "math range error"); /* overflow */
1002         else
1003             PyErr_SetString(PyExc_ValueError,
1004                             "math domain error"); /* singularity */
1005         return NULL;
1006     }
1007     if (Py_IS_FINITE(r) && errno && is_error(r))
1008         /* this branch unnecessary on most platforms */
1009         return NULL;
1010
1011     return PyFloat_FromDouble(r);
1012 }
1013
1014 /* variant of math_1, to be used when the function being wrapped is known to
1015    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1016    errno = ERANGE for overflow). */
1017
1018 static PyObject *
1019 math_1a(PyObject *arg, double (*func) (double))
1020 {
1021     double x, r;
1022     x = PyFloat_AsDouble(arg);
1023     if (x == -1.0 && PyErr_Occurred())
1024         return NULL;
1025     errno = 0;
1026     r = (*func)(x);
1027     if (errno && is_error(r))
1028         return NULL;
1029     return PyFloat_FromDouble(r);
1030 }
1031
1032 /*
1033    math_2 is used to wrap a libm function f that takes two double
1034    arguments and returns a double.
1035
1036    The error reporting follows these rules, which are designed to do
1037    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1038    platforms.
1039
1040    - a NaN result from non-NaN inputs causes ValueError to be raised
1041    - an infinite result from finite inputs causes OverflowError to be
1042      raised.
1043    - if the result is finite and errno == EDOM then ValueError is
1044      raised
1045    - if the result is finite and nonzero and errno == ERANGE then
1046      OverflowError is raised
1047
1048    The last rule is used to catch overflow on platforms which follow
1049    C89 but for which HUGE_VAL is not an infinity.
1050
1051    For most two-argument functions (copysign, fmod, hypot, atan2)
1052    these rules are enough to ensure that Python's functions behave as
1053    specified in 'Annex F' of the C99 standard, with the 'invalid' and
1054    'divide-by-zero' floating-point exceptions mapping to Python's
1055    ValueError and the 'overflow' floating-point exception mapping to
1056    OverflowError.
1057 */
1058
1059 static PyObject *
1060 math_2(PyObject *const *args, Py_ssize_t nargs,
1061        double (*func) (double, double), const char *funcname)
1062 {
1063     double x, y, r;
1064     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1065         return NULL;
1066     x = PyFloat_AsDouble(args[0]);
1067     if (x == -1.0 && PyErr_Occurred()) {
1068         return NULL;
1069     }
1070     y = PyFloat_AsDouble(args[1]);
1071     if (y == -1.0 && PyErr_Occurred()) {
1072         return NULL;
1073     }
1074     errno = 0;
1075     r = (*func)(x, y);
1076     if (Py_IS_NAN(r)) {
1077         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1078             errno = EDOM;
1079         else
1080             errno = 0;
1081     }
1082     else if (Py_IS_INFINITY(r)) {
1083         if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1084             errno = ERANGE;
1085         else
1086             errno = 0;
1087     }
1088     if (errno && is_error(r))
1089         return NULL;
1090     else
1091         return PyFloat_FromDouble(r);
1092 }
1093
1094 #define FUNC1(funcname, func, can_overflow, docstring)                  \
1095     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1096         return math_1(args, func, can_overflow);                            \
1097     }\
1098     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1099
1100 #define FUNC1A(funcname, func, docstring)                               \
1101     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1102         return math_1a(args, func);                                     \
1103     }\
1104     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1105
1106 #define FUNC2(funcname, func, docstring) \
1107     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1108         return math_2(args, nargs, func, #funcname); \
1109     }\
1110     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1111
1112 FUNC1(acos, acos, 0,
1113       "acos($module, x, /)\n--\n\n"
1114       "Return the arc cosine (measured in radians) of x.\n\n"
1115       "The result is between 0 and pi.")
1116 FUNC1(acosh, acosh, 0,
1117       "acosh($module, x, /)\n--\n\n"
1118       "Return the inverse hyperbolic cosine of x.")
1119 FUNC1(asin, asin, 0,
1120       "asin($module, x, /)\n--\n\n"
1121       "Return the arc sine (measured in radians) of x.\n\n"
1122       "The result is between -pi/2 and pi/2.")
1123 FUNC1(asinh, asinh, 0,
1124       "asinh($module, x, /)\n--\n\n"
1125       "Return the inverse hyperbolic sine of x.")
1126 FUNC1(atan, atan, 0,
1127       "atan($module, x, /)\n--\n\n"
1128       "Return the arc tangent (measured in radians) of x.\n\n"
1129       "The result is between -pi/2 and pi/2.")
1130 FUNC2(atan2, m_atan2,
1131       "atan2($module, y, x, /)\n--\n\n"
1132       "Return the arc tangent (measured in radians) of y/x.\n\n"
1133       "Unlike atan(y/x), the signs of both x and y are considered.")
1134 FUNC1(atanh, atanh, 0,
1135       "atanh($module, x, /)\n--\n\n"
1136       "Return the inverse hyperbolic tangent of x.")
1137 FUNC1(cbrt, cbrt, 0,
1138       "cbrt($module, x, /)\n--\n\n"
1139       "Return the cube root of x.")
1140
1141 /*[clinic input]
1142 math.ceil
1143
1144     x as number: object
1145     /
1146
1147 Return the ceiling of x as an Integral.
1148
1149 This is the smallest integer >= x.
1150 [clinic start generated code]*/
1151
1152 static PyObject *
1153 math_ceil(PyObject *module, PyObject *number)
1154 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1155 {
1156
1157     if (!PyFloat_CheckExact(number)) {
1158         math_module_state *state = get_math_module_state(module);
1159         PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
1160         if (method != NULL) {
1161             PyObject *result = _PyObject_CallNoArgs(method);
1162             Py_DECREF(method);
1163             return result;
1164         }
1165         if (PyErr_Occurred())
1166             return NULL;
1167     }
1168     double x = PyFloat_AsDouble(number);
1169     if (x == -1.0 && PyErr_Occurred())
1170         return NULL;
1171
1172     return PyLong_FromDouble(ceil(x));
1173 }
1174
1175 FUNC2(copysign, copysign,
1176       "copysign($module, x, y, /)\n--\n\n"
1177        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1178       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1179       "returns -1.0.\n")
1180 FUNC1(cos, cos, 0,
1181       "cos($module, x, /)\n--\n\n"
1182       "Return the cosine of x (measured in radians).")
1183 FUNC1(cosh, cosh, 1,
1184       "cosh($module, x, /)\n--\n\n"
1185       "Return the hyperbolic cosine of x.")
1186 FUNC1A(erf, erf,
1187        "erf($module, x, /)\n--\n\n"
1188        "Error function at x.")
1189 FUNC1A(erfc, erfc,
1190        "erfc($module, x, /)\n--\n\n"
1191        "Complementary error function at x.")
1192 FUNC1(exp, exp, 1,
1193       "exp($module, x, /)\n--\n\n"
1194       "Return e raised to the power of x.")
1195 FUNC1(exp2, exp2, 1,
1196       "exp2($module, x, /)\n--\n\n"
1197       "Return 2 raised to the power of x.")
1198 FUNC1(expm1, expm1, 1,
1199       "expm1($module, x, /)\n--\n\n"
1200       "Return exp(x)-1.\n\n"
1201       "This function avoids the loss of precision involved in the direct "
1202       "evaluation of exp(x)-1 for small x.")
1203 FUNC1(fabs, fabs, 0,
1204       "fabs($module, x, /)\n--\n\n"
1205       "Return the absolute value of the float x.")
1206
1207 /*[clinic input]
1208 math.floor
1209
1210     x as number: object
1211     /
1212
1213 Return the floor of x as an Integral.
1214
1215 This is the largest integer <= x.
1216 [clinic start generated code]*/
1217
1218 static PyObject *
1219 math_floor(PyObject *module, PyObject *number)
1220 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1221 {
1222     double x;
1223
1224     if (PyFloat_CheckExact(number)) {
1225         x = PyFloat_AS_DOUBLE(number);
1226     }
1227     else
1228     {
1229         math_module_state *state = get_math_module_state(module);
1230         PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
1231         if (method != NULL) {
1232             PyObject *result = _PyObject_CallNoArgs(method);
1233             Py_DECREF(method);
1234             return result;
1235         }
1236         if (PyErr_Occurred())
1237             return NULL;
1238         x = PyFloat_AsDouble(number);
1239         if (x == -1.0 && PyErr_Occurred())
1240             return NULL;
1241     }
1242     return PyLong_FromDouble(floor(x));
1243 }
1244
1245 FUNC1A(gamma, m_tgamma,
1246       "gamma($module, x, /)\n--\n\n"
1247       "Gamma function at x.")
1248 FUNC1A(lgamma, m_lgamma,
1249       "lgamma($module, x, /)\n--\n\n"
1250       "Natural logarithm of absolute value of Gamma function at x.")
1251 FUNC1(log1p, m_log1p, 0,
1252       "log1p($module, x, /)\n--\n\n"
1253       "Return the natural logarithm of 1+x (base e).\n\n"
1254       "The result is computed in a way which is accurate for x near zero.")
1255 FUNC2(remainder, m_remainder,
1256       "remainder($module, x, y, /)\n--\n\n"
1257       "Difference between x and the closest integer multiple of y.\n\n"
1258       "Return x - n*y where n*y is the closest integer multiple of y.\n"
1259       "In the case where x is exactly halfway between two multiples of\n"
1260       "y, the nearest even value of n is used. The result is always exact.")
1261 FUNC1(sin, sin, 0,
1262       "sin($module, x, /)\n--\n\n"
1263       "Return the sine of x (measured in radians).")
1264 FUNC1(sinh, sinh, 1,
1265       "sinh($module, x, /)\n--\n\n"
1266       "Return the hyperbolic sine of x.")
1267 FUNC1(sqrt, sqrt, 0,
1268       "sqrt($module, x, /)\n--\n\n"
1269       "Return the square root of x.")
1270 FUNC1(tan, tan, 0,
1271       "tan($module, x, /)\n--\n\n"
1272       "Return the tangent of x (measured in radians).")
1273 FUNC1(tanh, tanh, 0,
1274       "tanh($module, x, /)\n--\n\n"
1275       "Return the hyperbolic tangent of x.")
1276
1277 /* Precision summation function as msum() by Raymond Hettinger in
1278    <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1279    enhanced with the exact partials sum and roundoff from Mark
1280    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1281    See those links for more details, proofs and other references.
1282
1283    Note 1: IEEE 754 floating-point semantics with a rounding mode of
1284    roundTiesToEven are assumed.
1285
1286    Note 2: No provision is made for intermediate overflow handling;
1287    therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1288    fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1289    overflow of the first partial sum.
1290
1291    Note 3: The algorithm has two potential sources of fragility. First, C
1292    permits arithmetic operations on `double`s to be performed in an
1293    intermediate format whose range and precision may be greater than those of
1294    `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1295    example on machines using the now largely historical x87 FPUs. In this case,
1296    `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1297    `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1298    should be safe from this source of errors. Second, an aggressively
1299    optimizing compiler can re-associate operations so that (for example) the
1300    statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1301    re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1302    re-association would be in violation of the C standard, and should not occur
1303    except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1304    -fassociative-math). Such optimizations should be avoided for this module.
1305
1306    Note 4: The signature of math.fsum() differs from builtins.sum()
1307    because the start argument doesn't make sense in the context of
1308    accurate summation.  Since the partials table is collapsed before
1309    returning a result, sum(seq2, start=sum(seq1)) may not equal the
1310    accurate result returned by sum(itertools.chain(seq1, seq2)).
1311 */
1312
1313 #define NUM_PARTIALS  32  /* initial partials array size, on stack */
1314
1315 /* Extend the partials array p[] by doubling its size. */
1316 static int                          /* non-zero on error */
1317 _fsum_realloc(double **p_ptr, Py_ssize_t  n,
1318              double  *ps,    Py_ssize_t *m_ptr)
1319 {
1320     void *v = NULL;
1321     Py_ssize_t m = *m_ptr;
1322
1323     m += m;  /* double */
1324     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1325         double *p = *p_ptr;
1326         if (p == ps) {
1327             v = PyMem_Malloc(sizeof(double) * m);
1328             if (v != NULL)
1329                 memcpy(v, ps, sizeof(double) * n);
1330         }
1331         else
1332             v = PyMem_Realloc(p, sizeof(double) * m);
1333     }
1334     if (v == NULL) {        /* size overflow or no memory */
1335         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1336         return 1;
1337     }
1338     *p_ptr = (double*) v;
1339     *m_ptr = m;
1340     return 0;
1341 }
1342
1343 /* Full precision summation of a sequence of floats.
1344
1345    def msum(iterable):
1346        partials = []  # sorted, non-overlapping partial sums
1347        for x in iterable:
1348            i = 0
1349            for y in partials:
1350                if abs(x) < abs(y):
1351                    x, y = y, x
1352                hi = x + y
1353                lo = y - (hi - x)
1354                if lo:
1355                    partials[i] = lo
1356                    i += 1
1357                x = hi
1358            partials[i:] = [x]
1359        return sum_exact(partials)
1360
1361    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
1362    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
1363    partial so that the list of partial sums remains exact.
1364
1365    Sum_exact() adds the partial sums exactly and correctly rounds the final
1366    result (using the round-half-to-even rule).  The items in partials remain
1367    non-zero, non-special, non-overlapping and strictly increasing in
1368    magnitude, but possibly not all having the same sign.
1369
1370    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1371 */
1372
1373 /*[clinic input]
1374 math.fsum
1375
1376     seq: object
1377     /
1378
1379 Return an accurate floating point sum of values in the iterable seq.
1380
1381 Assumes IEEE-754 floating point arithmetic.
1382 [clinic start generated code]*/
1383
1384 static PyObject *
1385 math_fsum(PyObject *module, PyObject *seq)
1386 /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
1387 {
1388     PyObject *item, *iter, *sum = NULL;
1389     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1390     double x, y, t, ps[NUM_PARTIALS], *p = ps;
1391     double xsave, special_sum = 0.0, inf_sum = 0.0;
1392     double hi, yr, lo = 0.0;
1393
1394     iter = PyObject_GetIter(seq);
1395     if (iter == NULL)
1396         return NULL;
1397
1398     for(;;) {           /* for x in iterable */
1399         assert(0 <= n && n <= m);
1400         assert((m == NUM_PARTIALS && p == ps) ||
1401                (m >  NUM_PARTIALS && p != NULL));
1402
1403         item = PyIter_Next(iter);
1404         if (item == NULL) {
1405             if (PyErr_Occurred())
1406                 goto _fsum_error;
1407             break;
1408         }
1409         ASSIGN_DOUBLE(x, item, error_with_item);
1410         Py_DECREF(item);
1411
1412         xsave = x;
1413         for (i = j = 0; j < n; j++) {       /* for y in partials */
1414             y = p[j];
1415             if (fabs(x) < fabs(y)) {
1416                 t = x; x = y; y = t;
1417             }
1418             hi = x + y;
1419             yr = hi - x;
1420             lo = y - yr;
1421             if (lo != 0.0)
1422                 p[i++] = lo;
1423             x = hi;
1424         }
1425
1426         n = i;                              /* ps[i:] = [x] */
1427         if (x != 0.0) {
1428             if (! Py_IS_FINITE(x)) {
1429                 /* a nonfinite x could arise either as
1430                    a result of intermediate overflow, or
1431                    as a result of a nan or inf in the
1432                    summands */
1433                 if (Py_IS_FINITE(xsave)) {
1434                     PyErr_SetString(PyExc_OverflowError,
1435                           "intermediate overflow in fsum");
1436                     goto _fsum_error;
1437                 }
1438                 if (Py_IS_INFINITY(xsave))
1439                     inf_sum += xsave;
1440                 special_sum += xsave;
1441                 /* reset partials */
1442                 n = 0;
1443             }
1444             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1445                 goto _fsum_error;
1446             else
1447                 p[n++] = x;
1448         }
1449     }
1450
1451     if (special_sum != 0.0) {
1452         if (Py_IS_NAN(inf_sum))
1453             PyErr_SetString(PyExc_ValueError,
1454                             "-inf + inf in fsum");
1455         else
1456             sum = PyFloat_FromDouble(special_sum);
1457         goto _fsum_error;
1458     }
1459
1460     hi = 0.0;
1461     if (n > 0) {
1462         hi = p[--n];
1463         /* sum_exact(ps, hi) from the top, stop when the sum becomes
1464            inexact. */
1465         while (n > 0) {
1466             x = hi;
1467             y = p[--n];
1468             assert(fabs(y) < fabs(x));
1469             hi = x + y;
1470             yr = hi - x;
1471             lo = y - yr;
1472             if (lo != 0.0)
1473                 break;
1474         }
1475         /* Make half-even rounding work across multiple partials.
1476            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1477            digit to two instead of down to zero (the 1e-16 makes the 1
1478            slightly closer to two).  With a potential 1 ULP rounding
1479            error fixed-up, math.fsum() can guarantee commutativity. */
1480         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1481                       (lo > 0.0 && p[n-1] > 0.0))) {
1482             y = lo * 2.0;
1483             x = hi + y;
1484             yr = x - hi;
1485             if (y == yr)
1486                 hi = x;
1487         }
1488     }
1489     sum = PyFloat_FromDouble(hi);
1490
1491   _fsum_error:
1492     Py_DECREF(iter);
1493     if (p != ps)
1494         PyMem_Free(p);
1495     return sum;
1496
1497   error_with_item:
1498     Py_DECREF(item);
1499     goto _fsum_error;
1500 }
1501
1502 #undef NUM_PARTIALS
1503
1504
1505 static unsigned long
1506 count_set_bits(unsigned long n)
1507 {
1508     unsigned long count = 0;
1509     while (n != 0) {
1510         ++count;
1511         n &= n - 1; /* clear least significant bit */
1512     }
1513     return count;
1514 }
1515
1516 /* Integer square root
1517
1518 Given a nonnegative integer `n`, we want to compute the largest integer
1519 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1520 square root of `n`.
1521
1522 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1523 a positive integer `n`, the algorithm produces at each iteration an integer
1524 approximation `a` to the square root of `n >> s` for some even integer `s`,
1525 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1526 zero and we have an approximation to the square root of `n` itself.
1527
1528 At every step, the approximation `a` is strictly within 1.0 of the true square
1529 root, so we have
1530
1531     (a - 1)**2 < (n >> s) < (a + 1)**2
1532
1533 After the final iteration, a check-and-correct step is needed to determine
1534 whether `a` or `a - 1` gives the desired integer square root of `n`.
1535
1536 The algorithm is remarkable in its simplicity. There's no need for a
1537 per-iteration check-and-correct step, and termination is straightforward: the
1538 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1539 for `n > 1`). The only tricky part of the correctness proof is in establishing
1540 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1541 iteration to the next. A sketch of the proof of this is given below.
1542
1543 In addition to the proof sketch, a formal, computer-verified proof
1544 of correctness (using Lean) of an equivalent recursive algorithm can be found
1545 here:
1546
1547     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1548
1549
1550 Here's Python code equivalent to the C implementation below:
1551
1552     def isqrt(n):
1553         """
1554         Return the integer part of the square root of the input.
1555         """
1556         n = operator.index(n)
1557
1558         if n < 0:
1559             raise ValueError("isqrt() argument must be nonnegative")
1560         if n == 0:
1561             return 0
1562
1563         c = (n.bit_length() - 1) // 2
1564         a = 1
1565         d = 0
1566         for s in reversed(range(c.bit_length())):
1567             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1568             e = d
1569             d = c >> s
1570             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1571
1572         return a - (a*a > n)
1573
1574
1575 Sketch of proof of correctness
1576 ------------------------------
1577
1578 The delicate part of the correctness proof is showing that the loop invariant
1579 is preserved from one iteration to the next. That is, just before the line
1580
1581     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1582
1583 is executed in the above code, we know that
1584
1585     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1586
1587 (since `e` is always the value of `d` from the previous iteration). We must
1588 prove that after that line is executed, we have
1589
1590     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1591
1592 To facilitate the proof, we make some changes of notation. Write `m` for
1593 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1594
1595     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1596
1597 or equivalently:
1598
1599     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
1600
1601 Then we can rewrite (1) as:
1602
1603     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1604
1605 and we must show that (b - 1)**2 < m < (b + 1)**2.
1606
1607 From this point on, we switch to mathematical notation, so `/` means exact
1608 division rather than integer division and `^` is used for exponentiation. We
1609 use the `√` symbol for the exact square root. In (3), we can remove the
1610 implicit floor operation to give:
1611
1612     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1613
1614 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1615
1616     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
1617
1618 Squaring and dividing through by `2^(d-e+1) a` gives
1619
1620     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1621
1622 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1623 right-hand side of (6) with `1`, and now replacing the central
1624 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1625
1626     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1627
1628 Or equivalently, from (2):
1629
1630     (7) -1 < b - √m < 1
1631
1632 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1633 to prove.
1634
1635 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1636 a` that was used to get line (7) above. From the definition of `c`, we have
1637 `4^c <= n`, which implies
1638
1639     (8)  4^d <= m
1640
1641 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1642 that `2d - 2e - 1 <= d` and hence that
1643
1644     (9)  4^(2d - 2e - 1) <= m
1645
1646 Dividing both sides by `4^(d - e)` gives
1647
1648     (10)  4^(d - e - 1) <= m / 4^(d - e)
1649
1650 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1651
1652     (11)  4^(d - e - 1) < (a + 1)^2
1653
1654 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1655 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1656 completes the proof sketch.
1657
1658 */
1659
1660 /*
1661     The _approximate_isqrt_tab table provides approximate square roots for
1662     16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1663
1664         a = _approximate_isqrt_tab[(n >> 8) - 64]
1665
1666     is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1667
1668     The table was computed in Python using the expression:
1669
1670         [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1671 */
1672
1673 static const uint8_t _approximate_isqrt_tab[192] = {
1674     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1675     140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1676     151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1677     160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1678     170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1679     179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1680     187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1681     195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1682     203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1683     210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1684     217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1685     224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1686     231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1687     238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1688     244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1689     250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1690 };
1691
1692 /* Approximate square root of a large 64-bit integer.
1693
1694    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1695    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1696
1697 static inline uint32_t
1698 _approximate_isqrt(uint64_t n)
1699 {
1700     uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1701     u = (u << 7) + (uint32_t)(n >> 41) / u;
1702     return (u << 15) + (uint32_t)((n >> 17) / u);
1703 }
1704
1705 /*[clinic input]
1706 math.isqrt
1707
1708     n: object
1709     /
1710
1711 Return the integer part of the square root of the input.
1712 [clinic start generated code]*/
1713
1714 static PyObject *
1715 math_isqrt(PyObject *module, PyObject *n)
1716 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1717 {
1718     int a_too_large, c_bit_length;
1719     size_t c, d;
1720     uint64_t m;
1721     uint32_t u;
1722     PyObject *a = NULL, *b;
1723
1724     n = _PyNumber_Index(n);
1725     if (n == NULL) {
1726         return NULL;
1727     }
1728
1729     if (_PyLong_Sign(n) < 0) {
1730         PyErr_SetString(
1731             PyExc_ValueError,
1732             "isqrt() argument must be nonnegative");
1733         goto error;
1734     }
1735     if (_PyLong_Sign(n) == 0) {
1736         Py_DECREF(n);
1737         return PyLong_FromLong(0);
1738     }
1739
1740     /* c = (n.bit_length() - 1) // 2 */
1741     c = _PyLong_NumBits(n);
1742     if (c == (size_t)(-1)) {
1743         goto error;
1744     }
1745     c = (c - 1U) / 2U;
1746
1747     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1748        fast, almost branch-free algorithm. */
1749     if (c <= 31U) {
1750         int shift = 31 - (int)c;
1751         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1752         Py_DECREF(n);
1753         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1754             return NULL;
1755         }
1756         u = _approximate_isqrt(m << 2*shift) >> shift;
1757         u -= (uint64_t)u * u > m;
1758         return PyLong_FromUnsignedLong(u);
1759     }
1760
1761     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1762        arithmetic, then switch to using Python long integers. */
1763
1764     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1765     c_bit_length = 6;
1766     while ((c >> c_bit_length) > 0U) {
1767         ++c_bit_length;
1768     }
1769
1770     /* Initialise d and a. */
1771     d = c >> (c_bit_length - 5);
1772     b = _PyLong_Rshift(n, 2U*c - 62U);
1773     if (b == NULL) {
1774         goto error;
1775     }
1776     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1777     Py_DECREF(b);
1778     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1779         goto error;
1780     }
1781     u = _approximate_isqrt(m) >> (31U - d);
1782     a = PyLong_FromUnsignedLong(u);
1783     if (a == NULL) {
1784         goto error;
1785     }
1786
1787     for (int s = c_bit_length - 6; s >= 0; --s) {
1788         PyObject *q;
1789         size_t e = d;
1790
1791         d = c >> s;
1792
1793         /* q = (n >> 2*c - e - d + 1) // a */
1794         q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1795         if (q == NULL) {
1796             goto error;
1797         }
1798         Py_SETREF(q, PyNumber_FloorDivide(q, a));
1799         if (q == NULL) {
1800             goto error;
1801         }
1802
1803         /* a = (a << d - 1 - e) + q */
1804         Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1805         if (a == NULL) {
1806             Py_DECREF(q);
1807             goto error;
1808         }
1809         Py_SETREF(a, PyNumber_Add(a, q));
1810         Py_DECREF(q);
1811         if (a == NULL) {
1812             goto error;
1813         }
1814     }
1815
1816     /* The correct result is either a or a - 1. Figure out which, and
1817        decrement a if necessary. */
1818
1819     /* a_too_large = n < a * a */
1820     b = PyNumber_Multiply(a, a);
1821     if (b == NULL) {
1822         goto error;
1823     }
1824     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1825     Py_DECREF(b);
1826     if (a_too_large == -1) {
1827         goto error;
1828     }
1829
1830     if (a_too_large) {
1831         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1832     }
1833     Py_DECREF(n);
1834     return a;
1835
1836   error:
1837     Py_XDECREF(a);
1838     Py_DECREF(n);
1839     return NULL;
1840 }
1841
1842 /* Divide-and-conquer factorial algorithm
1843  *
1844  * Based on the formula and pseudo-code provided at:
1845  * http://www.luschny.de/math/factorial/binarysplitfact.html
1846  *
1847  * Faster algorithms exist, but they're more complicated and depend on
1848  * a fast prime factorization algorithm.
1849  *
1850  * Notes on the algorithm
1851  * ----------------------
1852  *
1853  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
1854  * computed separately, and then combined using a left shift.
1855  *
1856  * The function factorial_odd_part computes the odd part m (i.e., the greatest
1857  * odd divisor) of factorial(n), using the formula:
1858  *
1859  *   factorial_odd_part(n) =
1860  *
1861  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1862  *
1863  * Example: factorial_odd_part(20) =
1864  *
1865  *        (1) *
1866  *        (1) *
1867  *        (1 * 3 * 5) *
1868  *        (1 * 3 * 5 * 7 * 9) *
1869  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1870  *
1871  * Here i goes from large to small: the first term corresponds to i=4 (any
1872  * larger i gives an empty product), and the last term corresponds to i=0.
1873  * Each term can be computed from the last by multiplying by the extra odd
1874  * numbers required: e.g., to get from the penultimate term to the last one,
1875  * we multiply by (11 * 13 * 15 * 17 * 19).
1876  *
1877  * To see a hint of why this formula works, here are the same numbers as above
1878  * but with the even parts (i.e., the appropriate powers of 2) included.  For
1879  * each subterm in the product for i, we multiply that subterm by 2**i:
1880  *
1881  *   factorial(20) =
1882  *
1883  *        (16) *
1884  *        (8) *
1885  *        (4 * 12 * 20) *
1886  *        (2 * 6 * 10 * 14 * 18) *
1887  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1888  *
1889  * The factorial_partial_product function computes the product of all odd j in
1890  * range(start, stop) for given start and stop.  It's used to compute the
1891  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
1892  * operates recursively, repeatedly splitting the range into two roughly equal
1893  * pieces until the subranges are small enough to be computed using only C
1894  * integer arithmetic.
1895  *
1896  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1897  * the factorial) is computed independently in the main math_factorial
1898  * function.  By standard results, its value is:
1899  *
1900  *    two_valuation = n//2 + n//4 + n//8 + ....
1901  *
1902  * It can be shown (e.g., by complete induction on n) that two_valuation is
1903  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1904  * '1'-bits in the binary expansion of n.
1905  */
1906
1907 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1908  * divide and conquer.  Assumes start and stop are odd and stop > start.
1909  * max_bits must be >= bit_length(stop - 2). */
1910
1911 static PyObject *
1912 factorial_partial_product(unsigned long start, unsigned long stop,
1913                           unsigned long max_bits)
1914 {
1915     unsigned long midpoint, num_operands;
1916     PyObject *left = NULL, *right = NULL, *result = NULL;
1917
1918     /* If the return value will fit an unsigned long, then we can
1919      * multiply in a tight, fast loop where each multiply is O(1).
1920      * Compute an upper bound on the number of bits required to store
1921      * the answer.
1922      *
1923      * Storing some integer z requires floor(lg(z))+1 bits, which is
1924      * conveniently the value returned by bit_length(z).  The
1925      * product x*y will require at most
1926      * bit_length(x) + bit_length(y) bits to store, based
1927      * on the idea that lg product = lg x + lg y.
1928      *
1929      * We know that stop - 2 is the largest number to be multiplied.  From
1930      * there, we have: bit_length(answer) <= num_operands *
1931      * bit_length(stop - 2)
1932      */
1933
1934     num_operands = (stop - start) / 2;
1935     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1936      * unlikely case of an overflow in num_operands * max_bits. */
1937     if (num_operands <= 8 * SIZEOF_LONG &&
1938         num_operands * max_bits <= 8 * SIZEOF_LONG) {
1939         unsigned long j, total;
1940         for (total = start, j = start + 2; j < stop; j += 2)
1941             total *= j;
1942         return PyLong_FromUnsignedLong(total);
1943     }
1944
1945     /* find midpoint of range(start, stop), rounded up to next odd number. */
1946     midpoint = (start + num_operands) | 1;
1947     left = factorial_partial_product(start, midpoint,
1948                                      _Py_bit_length(midpoint - 2));
1949     if (left == NULL)
1950         goto error;
1951     right = factorial_partial_product(midpoint, stop, max_bits);
1952     if (right == NULL)
1953         goto error;
1954     result = PyNumber_Multiply(left, right);
1955
1956   error:
1957     Py_XDECREF(left);
1958     Py_XDECREF(right);
1959     return result;
1960 }
1961
1962 /* factorial_odd_part:  compute the odd part of factorial(n). */
1963
1964 static PyObject *
1965 factorial_odd_part(unsigned long n)
1966 {
1967     long i;
1968     unsigned long v, lower, upper;
1969     PyObject *partial, *tmp, *inner, *outer;
1970
1971     inner = PyLong_FromLong(1);
1972     if (inner == NULL)
1973         return NULL;
1974     outer = Py_NewRef(inner);
1975
1976     upper = 3;
1977     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
1978         v = n >> i;
1979         if (v <= 2)
1980             continue;
1981         lower = upper;
1982         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
1983         upper = (v + 1) | 1;
1984         /* Here inner is the product of all odd integers j in the range (0,
1985            n/2**(i+1)].  The factorial_partial_product call below gives the
1986            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
1987         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
1988         /* inner *= partial */
1989         if (partial == NULL)
1990             goto error;
1991         tmp = PyNumber_Multiply(inner, partial);
1992         Py_DECREF(partial);
1993         if (tmp == NULL)
1994             goto error;
1995         Py_SETREF(inner, tmp);
1996         /* Now inner is the product of all odd integers j in the range (0,
1997            n/2**i], giving the inner product in the formula above. */
1998
1999         /* outer *= inner; */
2000         tmp = PyNumber_Multiply(outer, inner);
2001         if (tmp == NULL)
2002             goto error;
2003         Py_SETREF(outer, tmp);
2004     }
2005     Py_DECREF(inner);
2006     return outer;
2007
2008   error:
2009     Py_DECREF(outer);
2010     Py_DECREF(inner);
2011     return NULL;
2012 }
2013
2014
2015 /* Lookup table for small factorial values */
2016
2017 static const unsigned long SmallFactorials[] = {
2018     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2019     362880, 3628800, 39916800, 479001600,
2020 #if SIZEOF_LONG >= 8
2021     6227020800, 87178291200, 1307674368000,
2022     20922789888000, 355687428096000, 6402373705728000,
2023     121645100408832000, 2432902008176640000
2024 #endif
2025 };
2026
2027 /*[clinic input]
2028 math.factorial
2029
2030     n as arg: object
2031     /
2032
2033 Find n!.
2034
2035 Raise a ValueError if x is negative or non-integral.
2036 [clinic start generated code]*/
2037
2038 static PyObject *
2039 math_factorial(PyObject *module, PyObject *arg)
2040 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2041 {
2042     long x, two_valuation;
2043     int overflow;
2044     PyObject *result, *odd_part;
2045
2046     x = PyLong_AsLongAndOverflow(arg, &overflow);
2047     if (x == -1 && PyErr_Occurred()) {
2048         return NULL;
2049     }
2050     else if (overflow == 1) {
2051         PyErr_Format(PyExc_OverflowError,
2052                      "factorial() argument should not exceed %ld",
2053                      LONG_MAX);
2054         return NULL;
2055     }
2056     else if (overflow == -1 || x < 0) {
2057         PyErr_SetString(PyExc_ValueError,
2058                         "factorial() not defined for negative values");
2059         return NULL;
2060     }
2061
2062     /* use lookup table if x is small */
2063     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2064         return PyLong_FromUnsignedLong(SmallFactorials[x]);
2065
2066     /* else express in the form odd_part * 2**two_valuation, and compute as
2067        odd_part << two_valuation. */
2068     odd_part = factorial_odd_part(x);
2069     if (odd_part == NULL)
2070         return NULL;
2071     two_valuation = x - count_set_bits(x);
2072     result = _PyLong_Lshift(odd_part, two_valuation);
2073     Py_DECREF(odd_part);
2074     return result;
2075 }
2076
2077
2078 /*[clinic input]
2079 math.trunc
2080
2081     x: object
2082     /
2083
2084 Truncates the Real x to the nearest Integral toward 0.
2085
2086 Uses the __trunc__ magic method.
2087 [clinic start generated code]*/
2088
2089 static PyObject *
2090 math_trunc(PyObject *module, PyObject *x)
2091 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2092 {
2093     PyObject *trunc, *result;
2094
2095     if (PyFloat_CheckExact(x)) {
2096         return PyFloat_Type.tp_as_number->nb_int(x);
2097     }
2098
2099     if (Py_TYPE(x)->tp_dict == NULL) {
2100         if (PyType_Ready(Py_TYPE(x)) < 0)
2101             return NULL;
2102     }
2103
2104     math_module_state *state = get_math_module_state(module);
2105     trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
2106     if (trunc == NULL) {
2107         if (!PyErr_Occurred())
2108             PyErr_Format(PyExc_TypeError,
2109                          "type %.100s doesn't define __trunc__ method",
2110                          Py_TYPE(x)->tp_name);
2111         return NULL;
2112     }
2113     result = _PyObject_CallNoArgs(trunc);
2114     Py_DECREF(trunc);
2115     return result;
2116 }
2117
2118
2119 /*[clinic input]
2120 math.frexp
2121
2122     x: double
2123     /
2124
2125 Return the mantissa and exponent of x, as pair (m, e).
2126
2127 m is a float and e is an int, such that x = m * 2.**e.
2128 If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
2129 [clinic start generated code]*/
2130
2131 static PyObject *
2132 math_frexp_impl(PyObject *module, double x)
2133 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2134 {
2135     int i;
2136     /* deal with special cases directly, to sidestep platform
2137        differences */
2138     if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2139         i = 0;
2140     }
2141     else {
2142         x = frexp(x, &i);
2143     }
2144     return Py_BuildValue("(di)", x, i);
2145 }
2146
2147
2148 /*[clinic input]
2149 math.ldexp
2150
2151     x: double
2152     i: object
2153     /
2154
2155 Return x * (2**i).
2156
2157 This is essentially the inverse of frexp().
2158 [clinic start generated code]*/
2159
2160 static PyObject *
2161 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2162 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2163 {
2164     double r;
2165     long exp;
2166     int overflow;
2167
2168     if (PyLong_Check(i)) {
2169         /* on overflow, replace exponent with either LONG_MAX
2170            or LONG_MIN, depending on the sign. */
2171         exp = PyLong_AsLongAndOverflow(i, &overflow);
2172         if (exp == -1 && PyErr_Occurred())
2173             return NULL;
2174         if (overflow)
2175             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2176     }
2177     else {
2178         PyErr_SetString(PyExc_TypeError,
2179                         "Expected an int as second argument to ldexp.");
2180         return NULL;
2181     }
2182
2183     if (x == 0. || !Py_IS_FINITE(x)) {
2184         /* NaNs, zeros and infinities are returned unchanged */
2185         r = x;
2186         errno = 0;
2187     } else if (exp > INT_MAX) {
2188         /* overflow */
2189         r = copysign(Py_HUGE_VAL, x);
2190         errno = ERANGE;
2191     } else if (exp < INT_MIN) {
2192         /* underflow to +-0 */
2193         r = copysign(0., x);
2194         errno = 0;
2195     } else {
2196         errno = 0;
2197         r = ldexp(x, (int)exp);
2198         if (Py_IS_INFINITY(r))
2199             errno = ERANGE;
2200     }
2201
2202     if (errno && is_error(r))
2203         return NULL;
2204     return PyFloat_FromDouble(r);
2205 }
2206
2207
2208 /*[clinic input]
2209 math.modf
2210
2211     x: double
2212     /
2213
2214 Return the fractional and integer parts of x.
2215
2216 Both results carry the sign of x and are floats.
2217 [clinic start generated code]*/
2218
2219 static PyObject *
2220 math_modf_impl(PyObject *module, double x)
2221 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2222 {
2223     double y;
2224     /* some platforms don't do the right thing for NaNs and
2225        infinities, so we take care of special cases directly. */
2226     if (!Py_IS_FINITE(x)) {
2227         if (Py_IS_INFINITY(x))
2228             return Py_BuildValue("(dd)", copysign(0., x), x);
2229         else if (Py_IS_NAN(x))
2230             return Py_BuildValue("(dd)", x, x);
2231     }
2232
2233     errno = 0;
2234     x = modf(x, &y);
2235     return Py_BuildValue("(dd)", x, y);
2236 }
2237
2238
2239 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2240    do that by itself -- loghelper can.  func is log or log10, and name is
2241    "log" or "log10".  Note that overflow of the result isn't possible: an int
2242    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2243    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2244    small enough to fit in an IEEE single.  log and log10 are even smaller.
2245    However, intermediate overflow is possible for an int if the number of bits
2246    in that int is larger than PY_SSIZE_T_MAX. */
2247
2248 static PyObject*
2249 loghelper(PyObject* arg, double (*func)(double))
2250 {
2251     /* If it is int, do it ourselves. */
2252     if (PyLong_Check(arg)) {
2253         double x, result;
2254         Py_ssize_t e;
2255
2256         /* Negative or zero inputs give a ValueError. */
2257         if (Py_SIZE(arg) <= 0) {
2258             PyErr_SetString(PyExc_ValueError,
2259                             "math domain error");
2260             return NULL;
2261         }
2262
2263         x = PyLong_AsDouble(arg);
2264         if (x == -1.0 && PyErr_Occurred()) {
2265             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2266                 return NULL;
2267             /* Here the conversion to double overflowed, but it's possible
2268                to compute the log anyway.  Clear the exception and continue. */
2269             PyErr_Clear();
2270             x = _PyLong_Frexp((PyLongObject *)arg, &e);
2271             if (x == -1.0 && PyErr_Occurred())
2272                 return NULL;
2273             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2274             result = func(x) + func(2.0) * e;
2275         }
2276         else
2277             /* Successfully converted x to a double. */
2278             result = func(x);
2279         return PyFloat_FromDouble(result);
2280     }
2281
2282     /* Else let libm handle it by itself. */
2283     return math_1(arg, func, 0);
2284 }
2285
2286
2287 /*[clinic input]
2288 math.log
2289
2290     x:    object
2291     [
2292     base: object(c_default="NULL") = math.e
2293     ]
2294     /
2295
2296 Return the logarithm of x to the given base.
2297
2298 If the base not specified, returns the natural logarithm (base e) of x.
2299 [clinic start generated code]*/
2300
2301 static PyObject *
2302 math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2303               PyObject *base)
2304 /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
2305 {
2306     PyObject *num, *den;
2307     PyObject *ans;
2308
2309     num = loghelper(x, m_log);
2310     if (num == NULL || base == NULL)
2311         return num;
2312
2313     den = loghelper(base, m_log);
2314     if (den == NULL) {
2315         Py_DECREF(num);
2316         return NULL;
2317     }
2318
2319     ans = PyNumber_TrueDivide(num, den);
2320     Py_DECREF(num);
2321     Py_DECREF(den);
2322     return ans;
2323 }
2324
2325
2326 /*[clinic input]
2327 math.log2
2328
2329     x: object
2330     /
2331
2332 Return the base 2 logarithm of x.
2333 [clinic start generated code]*/
2334
2335 static PyObject *
2336 math_log2(PyObject *module, PyObject *x)
2337 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2338 {
2339     return loghelper(x, m_log2);
2340 }
2341
2342
2343 /*[clinic input]
2344 math.log10
2345
2346     x: object
2347     /
2348
2349 Return the base 10 logarithm of x.
2350 [clinic start generated code]*/
2351
2352 static PyObject *
2353 math_log10(PyObject *module, PyObject *x)
2354 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2355 {
2356     return loghelper(x, m_log10);
2357 }
2358
2359
2360 /*[clinic input]
2361 math.fmod
2362
2363     x: double
2364     y: double
2365     /
2366
2367 Return fmod(x, y), according to platform C.
2368
2369 x % y may differ.
2370 [clinic start generated code]*/
2371
2372 static PyObject *
2373 math_fmod_impl(PyObject *module, double x, double y)
2374 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2375 {
2376     double r;
2377     /* fmod(x, +/-Inf) returns x for finite x. */
2378     if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2379         return PyFloat_FromDouble(x);
2380     errno = 0;
2381     r = fmod(x, y);
2382     if (Py_IS_NAN(r)) {
2383         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2384             errno = EDOM;
2385         else
2386             errno = 0;
2387     }
2388     if (errno && is_error(r))
2389         return NULL;
2390     else
2391         return PyFloat_FromDouble(r);
2392 }
2393
2394 /*
2395 Given a *vec* of values, compute the vector norm:
2396
2397     sqrt(sum(x ** 2 for x in vec))
2398
2399 The *max* variable should be equal to the largest fabs(x).
2400 The *n* variable is the length of *vec*.
2401 If n==0, then *max* should be 0.0.
2402 If an infinity is present in the vec, *max* should be INF.
2403 The *found_nan* variable indicates whether some member of
2404 the *vec* is a NaN.
2405
2406 To avoid overflow/underflow and to achieve high accuracy giving results
2407 that are almost always correctly rounded, four techniques are used:
2408
2409 * lossless scaling using a power-of-two scaling factor
2410 * accurate squaring using Veltkamp-Dekker splitting [1]
2411   or an equivalent with an fma() call
2412 * compensated summation using a variant of the Neumaier algorithm [2]
2413 * differential correction of the square root [3]
2414
2415 The usual presentation of the Neumaier summation algorithm has an
2416 expensive branch depending on which operand has the larger
2417 magnitude.  We avoid this cost by arranging the calculation so that
2418 fabs(csum) is always as large as fabs(x).
2419
2420 To establish the invariant, *csum* is initialized to 1.0 which is
2421 always larger than x**2 after scaling or after division by *max*.
2422 After the loop is finished, the initial 1.0 is subtracted out for a
2423 net zero effect on the final sum.  Since *csum* will be greater than
2424 1.0, the subtraction of 1.0 will not cause fractional digits to be
2425 dropped from *csum*.
2426
2427 To get the full benefit from compensated summation, the largest
2428 addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
2429 scaling or division by *max* should not be skipped even if not
2430 otherwise needed to prevent overflow or loss of precision.
2431
2432 The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
2433 gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
2434 algorithm gives a *hi* value that is correctly rounded to half
2435 precision.  When a value at or below 1.0 is correctly rounded, it
2436 never goes above 1.0.  And when values at or below 1.0 are squared,
2437 they remain at or below 1.0, thus preserving the summation invariant.
2438
2439 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2440 each scaled vector element has a magnitude less than 1.0.  After the
2441 Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
2442 value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
2443 Given that csum >= 1.0, we have:
2444     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2445 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2446
2447 To minimize loss of information during the accumulation of fractional
2448 values, each term has a separate accumulator.  This also breaks up
2449 sequential dependencies in the inner loop so the CPU can maximize
2450 floating point throughput. [4]  On a 2.6 GHz Haswell, adding one
2451 dimension has an incremental cost of only 5ns -- for example when
2452 moving from hypot(x,y) to hypot(x,y,z).
2453
2454 The square root differential correction is needed because a
2455 correctly rounded square root of a correctly rounded sum of
2456 squares can still be off by as much as one ulp.
2457
2458 The differential correction starts with a value *x* that is
2459 the difference between the square of *h*, the possibly inaccurately
2460 rounded square root, and the accurately computed sum of squares.
2461 The correction is the first order term of the Maclaurin series
2462 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2463
2464 Essentially, this differential correction is equivalent to one
2465 refinement step in Newton's divide-and-average square root
2466 algorithm, effectively doubling the number of accurate bits.
2467 This technique is used in Dekker's SQRT2 algorithm and again in
2468 Borges' ALGORITHM 4 and 5.
2469
2470 The hypot() function is faithfully rounded (less than 1 ulp error)
2471 and usually correctly rounded (within 1/2 ulp).  The squaring
2472 step is exact.  The Neumaier summation computes as if in doubled
2473 precision (106 bits) and has the advantage that its input squares
2474 are non-negative so that the condition number of the sum is one.
2475 The square root with a differential correction is likewise computed
2476 as if in double precision.
2477
2478 For n <= 1000, prior to the final addition that rounds the overall
2479 result, the internal accuracy of "h" together with its correction of
2480 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2481 against a Decimal implementation with prec=300.  After 100 million
2482 trials, no incorrectly rounded examples were found.  In addition,
2483 perfect commutativity (all permutations are exactly equal) was
2484 verified for 1 billion random inputs with n=5. [7]
2485
2486 References:
2487
2488 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2489 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2490 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
2491 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
2492 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2493 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
2494 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
2495
2496 */
2497
2498 static inline double
2499 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2500 {
2501     double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2502     DoubleLength pr, sm;
2503     int max_e;
2504     Py_ssize_t i;
2505
2506     if (Py_IS_INFINITY(max)) {
2507         return max;
2508     }
2509     if (found_nan) {
2510         return Py_NAN;
2511     }
2512     if (max == 0.0 || n <= 1) {
2513         return max;
2514     }
2515     frexp(max, &max_e);
2516     if (max_e < -1023) {
2517         /* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
2518            So we first perform lossless scaling from subnormals back to normals,
2519            then recurse back to vector_norm(), and then finally undo the scaling.
2520         */
2521         for (i=0 ; i < n ; i++) {
2522             vec[i] /= DBL_MIN;
2523         }
2524         return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2525     }
2526     scale = ldexp(1.0, -max_e);
2527     assert(max * scale >= 0.5);
2528     assert(max * scale < 1.0);
2529     for (i=0 ; i < n ; i++) {
2530         x = vec[i];
2531         assert(Py_IS_FINITE(x) && fabs(x) <= max);
2532
2533         x *= scale;
2534         assert(fabs(x) < 1.0);
2535
2536         pr = dl_mul(x, x);
2537         assert(pr.hi <= 1.0);
2538
2539         sm = dl_fast_sum(csum, pr.hi);
2540         csum = sm.hi;
2541         frac1 += pr.lo;
2542         frac2 += sm.lo;
2543     }
2544     h = sqrt(csum - 1.0 + (frac1 + frac2));
2545     pr = dl_mul(-h, h);
2546     sm = dl_fast_sum(csum, pr.hi);
2547     csum = sm.hi;
2548     frac1 += pr.lo;
2549     frac2 += sm.lo;
2550     x = csum - 1.0 + (frac1 + frac2);
2551     return (h + x / (2.0 * h)) / scale;
2552 }
2553
2554 #define NUM_STACK_ELEMS 16
2555
2556 /*[clinic input]
2557 math.dist
2558
2559     p: object
2560     q: object
2561     /
2562
2563 Return the Euclidean distance between two points p and q.
2564
2565 The points should be specified as sequences (or iterables) of
2566 coordinates.  Both inputs must have the same dimension.
2567
2568 Roughly equivalent to:
2569     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2570 [clinic start generated code]*/
2571
2572 static PyObject *
2573 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2574 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2575 {
2576     PyObject *item;
2577     double max = 0.0;
2578     double x, px, qx, result;
2579     Py_ssize_t i, m, n;
2580     int found_nan = 0, p_allocated = 0, q_allocated = 0;
2581     double diffs_on_stack[NUM_STACK_ELEMS];
2582     double *diffs = diffs_on_stack;
2583
2584     if (!PyTuple_Check(p)) {
2585         p = PySequence_Tuple(p);
2586         if (p == NULL) {
2587             return NULL;
2588         }
2589         p_allocated = 1;
2590     }
2591     if (!PyTuple_Check(q)) {
2592         q = PySequence_Tuple(q);
2593         if (q == NULL) {
2594             if (p_allocated) {
2595                 Py_DECREF(p);
2596             }
2597             return NULL;
2598         }
2599         q_allocated = 1;
2600     }
2601
2602     m = PyTuple_GET_SIZE(p);
2603     n = PyTuple_GET_SIZE(q);
2604     if (m != n) {
2605         PyErr_SetString(PyExc_ValueError,
2606                         "both points must have the same number of dimensions");
2607         goto error_exit;
2608     }
2609     if (n > NUM_STACK_ELEMS) {
2610         diffs = (double *) PyObject_Malloc(n * sizeof(double));
2611         if (diffs == NULL) {
2612             PyErr_NoMemory();
2613             goto error_exit;
2614         }
2615     }
2616     for (i=0 ; i<n ; i++) {
2617         item = PyTuple_GET_ITEM(p, i);
2618         ASSIGN_DOUBLE(px, item, error_exit);
2619         item = PyTuple_GET_ITEM(q, i);
2620         ASSIGN_DOUBLE(qx, item, error_exit);
2621         x = fabs(px - qx);
2622         diffs[i] = x;
2623         found_nan |= Py_IS_NAN(x);
2624         if (x > max) {
2625             max = x;
2626         }
2627     }
2628     result = vector_norm(n, diffs, max, found_nan);
2629     if (diffs != diffs_on_stack) {
2630         PyObject_Free(diffs);
2631     }
2632     if (p_allocated) {
2633         Py_DECREF(p);
2634     }
2635     if (q_allocated) {
2636         Py_DECREF(q);
2637     }
2638     return PyFloat_FromDouble(result);
2639
2640   error_exit:
2641     if (diffs != diffs_on_stack) {
2642         PyObject_Free(diffs);
2643     }
2644     if (p_allocated) {
2645         Py_DECREF(p);
2646     }
2647     if (q_allocated) {
2648         Py_DECREF(q);
2649     }
2650     return NULL;
2651 }
2652
2653 /* AC: cannot convert yet, waiting for *args support */
2654 static PyObject *
2655 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2656 {
2657     Py_ssize_t i;
2658     PyObject *item;
2659     double max = 0.0;
2660     double x, result;
2661     int found_nan = 0;
2662     double coord_on_stack[NUM_STACK_ELEMS];
2663     double *coordinates = coord_on_stack;
2664
2665     if (nargs > NUM_STACK_ELEMS) {
2666         coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
2667         if (coordinates == NULL) {
2668             return PyErr_NoMemory();
2669         }
2670     }
2671     for (i = 0; i < nargs; i++) {
2672         item = args[i];
2673         ASSIGN_DOUBLE(x, item, error_exit);
2674         x = fabs(x);
2675         coordinates[i] = x;
2676         found_nan |= Py_IS_NAN(x);
2677         if (x > max) {
2678             max = x;
2679         }
2680     }
2681     result = vector_norm(nargs, coordinates, max, found_nan);
2682     if (coordinates != coord_on_stack) {
2683         PyObject_Free(coordinates);
2684     }
2685     return PyFloat_FromDouble(result);
2686
2687   error_exit:
2688     if (coordinates != coord_on_stack) {
2689         PyObject_Free(coordinates);
2690     }
2691     return NULL;
2692 }
2693
2694 #undef NUM_STACK_ELEMS
2695
2696 PyDoc_STRVAR(math_hypot_doc,
2697              "hypot(*coordinates) -> value\n\n\
2698 Multidimensional Euclidean distance from the origin to a point.\n\
2699 \n\
2700 Roughly equivalent to:\n\
2701     sqrt(sum(x**2 for x in coordinates))\n\
2702 \n\
2703 For a two dimensional point (x, y), gives the hypotenuse\n\
2704 using the Pythagorean theorem:  sqrt(x*x + y*y).\n\
2705 \n\
2706 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2707 \n\
2708     >>> hypot(3.0, 4.0)\n\
2709     5.0\n\
2710 ");
2711
2712 /** sumprod() ***************************************************************/
2713
2714 /* Forward declaration */
2715 static inline int _check_long_mult_overflow(long a, long b);
2716
2717 static inline bool
2718 long_add_would_overflow(long a, long b)
2719 {
2720     return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2721 }
2722
2723 /*[clinic input]
2724 math.sumprod
2725
2726     p: object
2727     q: object
2728     /
2729
2730 Return the sum of products of values from two iterables p and q.
2731
2732 Roughly equivalent to:
2733
2734     sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
2735
2736 For float and mixed int/float inputs, the intermediate products
2737 and sums are computed with extended precision.
2738 [clinic start generated code]*/
2739
2740 static PyObject *
2741 math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2742 /*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
2743 {
2744     PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2745     PyObject *p_it, *q_it, *total;
2746     iternextfunc p_next, q_next;
2747     bool p_stopped = false, q_stopped = false;
2748     bool int_path_enabled = true, int_total_in_use = false;
2749     bool flt_path_enabled = true, flt_total_in_use = false;
2750     long int_total = 0;
2751     TripleLength flt_total = tl_zero;
2752
2753     p_it = PyObject_GetIter(p);
2754     if (p_it == NULL) {
2755         return NULL;
2756     }
2757     q_it = PyObject_GetIter(q);
2758     if (q_it == NULL) {
2759         Py_DECREF(p_it);
2760         return NULL;
2761     }
2762     total = PyLong_FromLong(0);
2763     if (total == NULL) {
2764         Py_DECREF(p_it);
2765         Py_DECREF(q_it);
2766         return NULL;
2767     }
2768     p_next = *Py_TYPE(p_it)->tp_iternext;
2769     q_next = *Py_TYPE(q_it)->tp_iternext;
2770     while (1) {
2771         bool finished;
2772
2773         assert (p_i == NULL);
2774         assert (q_i == NULL);
2775         assert (term_i == NULL);
2776         assert (new_total == NULL);
2777
2778         assert (p_it != NULL);
2779         assert (q_it != NULL);
2780         assert (total != NULL);
2781
2782         p_i = p_next(p_it);
2783         if (p_i == NULL) {
2784             if (PyErr_Occurred()) {
2785                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2786                     goto err_exit;
2787                 }
2788                 PyErr_Clear();
2789             }
2790             p_stopped = true;
2791         }
2792         q_i = q_next(q_it);
2793         if (q_i == NULL) {
2794             if (PyErr_Occurred()) {
2795                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2796                     goto err_exit;
2797                 }
2798                 PyErr_Clear();
2799             }
2800             q_stopped = true;
2801         }
2802         if (p_stopped != q_stopped) {
2803             PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2804             goto err_exit;
2805         }
2806         finished = p_stopped & q_stopped;
2807
2808         if (int_path_enabled) {
2809
2810             if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2811                 int overflow;
2812                 long int_p, int_q, int_prod;
2813
2814                 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2815                 if (overflow) {
2816                     goto finalize_int_path;
2817                 }
2818                 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2819                 if (overflow) {
2820                     goto finalize_int_path;
2821                 }
2822                 if (_check_long_mult_overflow(int_p, int_q)) {
2823                     goto finalize_int_path;
2824                 }
2825                 int_prod = int_p * int_q;
2826                 if (long_add_would_overflow(int_total, int_prod)) {
2827                     goto finalize_int_path;
2828                 }
2829                 int_total += int_prod;
2830                 int_total_in_use = true;
2831                 Py_CLEAR(p_i);
2832                 Py_CLEAR(q_i);
2833                 continue;
2834             }
2835
2836           finalize_int_path:
2837             // We're finished, overflowed, or have a non-int
2838             int_path_enabled = false;
2839             if (int_total_in_use) {
2840                 term_i = PyLong_FromLong(int_total);
2841                 if (term_i == NULL) {
2842                     goto err_exit;
2843                 }
2844                 new_total = PyNumber_Add(total, term_i);
2845                 if (new_total == NULL) {
2846                     goto err_exit;
2847                 }
2848                 Py_SETREF(total, new_total);
2849                 new_total = NULL;
2850                 Py_CLEAR(term_i);
2851                 int_total = 0;   // An ounce of prevention, ...
2852                 int_total_in_use = false;
2853             }
2854         }
2855
2856         if (flt_path_enabled) {
2857
2858             if (!finished) {
2859                 double flt_p, flt_q;
2860                 bool p_type_float = PyFloat_CheckExact(p_i);
2861                 bool q_type_float = PyFloat_CheckExact(q_i);
2862                 if (p_type_float && q_type_float) {
2863                     flt_p = PyFloat_AS_DOUBLE(p_i);
2864                     flt_q = PyFloat_AS_DOUBLE(q_i);
2865                 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2866                     /* We care about float/int pairs and int/float pairs because
2867                        they arise naturally in several use cases such as price
2868                        times quantity, measurements with integer weights, or
2869                        data selected by a vector of bools. */
2870                     flt_p = PyFloat_AS_DOUBLE(p_i);
2871                     flt_q = PyLong_AsDouble(q_i);
2872                     if (flt_q == -1.0 && PyErr_Occurred()) {
2873                         PyErr_Clear();
2874                         goto finalize_flt_path;
2875                     }
2876                 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(q_i))) {
2877                     flt_q = PyFloat_AS_DOUBLE(q_i);
2878                     flt_p = PyLong_AsDouble(p_i);
2879                     if (flt_p == -1.0 && PyErr_Occurred()) {
2880                         PyErr_Clear();
2881                         goto finalize_flt_path;
2882                     }
2883                 } else {
2884                     goto finalize_flt_path;
2885                 }
2886                 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2887                 if (isfinite(new_flt_total.hi)) {
2888                     flt_total = new_flt_total;
2889                     flt_total_in_use = true;
2890                     Py_CLEAR(p_i);
2891                     Py_CLEAR(q_i);
2892                     continue;
2893                 }
2894             }
2895
2896           finalize_flt_path:
2897             // We're finished, overflowed, have a non-float, or got a non-finite value
2898             flt_path_enabled = false;
2899             if (flt_total_in_use) {
2900                 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2901                 if (term_i == NULL) {
2902                     goto err_exit;
2903                 }
2904                 new_total = PyNumber_Add(total, term_i);
2905                 if (new_total == NULL) {
2906                     goto err_exit;
2907                 }
2908                 Py_SETREF(total, new_total);
2909                 new_total = NULL;
2910                 Py_CLEAR(term_i);
2911                 flt_total = tl_zero;
2912                 flt_total_in_use = false;
2913             }
2914         }
2915
2916         assert(!int_total_in_use);
2917         assert(!flt_total_in_use);
2918         if (finished) {
2919             goto normal_exit;
2920         }
2921         term_i = PyNumber_Multiply(p_i, q_i);
2922         if (term_i == NULL) {
2923             goto err_exit;
2924         }
2925         new_total = PyNumber_Add(total, term_i);
2926         if (new_total == NULL) {
2927             goto err_exit;
2928         }
2929         Py_SETREF(total, new_total);
2930         new_total = NULL;
2931         Py_CLEAR(p_i);
2932         Py_CLEAR(q_i);
2933         Py_CLEAR(term_i);
2934     }
2935
2936  normal_exit:
2937     Py_DECREF(p_it);
2938     Py_DECREF(q_it);
2939     return total;
2940
2941  err_exit:
2942     Py_DECREF(p_it);
2943     Py_DECREF(q_it);
2944     Py_DECREF(total);
2945     Py_XDECREF(p_i);
2946     Py_XDECREF(q_i);
2947     Py_XDECREF(term_i);
2948     Py_XDECREF(new_total);
2949     return NULL;
2950 }
2951
2952
2953 /* pow can't use math_2, but needs its own wrapper: the problem is
2954    that an infinite result can arise either as a result of overflow
2955    (in which case OverflowError should be raised) or as a result of
2956    e.g. 0.**-5. (for which ValueError needs to be raised.)
2957 */
2958
2959 /*[clinic input]
2960 math.pow
2961
2962     x: double
2963     y: double
2964     /
2965
2966 Return x**y (x to the power of y).
2967 [clinic start generated code]*/
2968
2969 static PyObject *
2970 math_pow_impl(PyObject *module, double x, double y)
2971 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2972 {
2973     double r;
2974     int odd_y;
2975
2976     /* deal directly with IEEE specials, to cope with problems on various
2977        platforms whose semantics don't exactly match C99 */
2978     r = 0.; /* silence compiler warning */
2979     if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2980         errno = 0;
2981         if (Py_IS_NAN(x))
2982             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2983         else if (Py_IS_NAN(y))
2984             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2985         else if (Py_IS_INFINITY(x)) {
2986             odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2987             if (y > 0.)
2988                 r = odd_y ? x : fabs(x);
2989             else if (y == 0.)
2990                 r = 1.;
2991             else /* y < 0. */
2992                 r = odd_y ? copysign(0., x) : 0.;
2993         }
2994         else if (Py_IS_INFINITY(y)) {
2995             if (fabs(x) == 1.0)
2996                 r = 1.;
2997             else if (y > 0. && fabs(x) > 1.0)
2998                 r = y;
2999             else if (y < 0. && fabs(x) < 1.0) {
3000                 r = -y; /* result is +inf */
3001             }
3002             else
3003                 r = 0.;
3004         }
3005     }
3006     else {
3007         /* let libm handle finite**finite */
3008         errno = 0;
3009         r = pow(x, y);
3010         /* a NaN result should arise only from (-ve)**(finite
3011            non-integer); in this case we want to raise ValueError. */
3012         if (!Py_IS_FINITE(r)) {
3013             if (Py_IS_NAN(r)) {
3014                 errno = EDOM;
3015             }
3016             /*
3017                an infinite result here arises either from:
3018                (A) (+/-0.)**negative (-> divide-by-zero)
3019                (B) overflow of x**y with x and y finite
3020             */
3021             else if (Py_IS_INFINITY(r)) {
3022                 if (x == 0.)
3023                     errno = EDOM;
3024                 else
3025                     errno = ERANGE;
3026             }
3027         }
3028     }
3029
3030     if (errno && is_error(r))
3031         return NULL;
3032     else
3033         return PyFloat_FromDouble(r);
3034 }
3035
3036
3037 static const double degToRad = Py_MATH_PI / 180.0;
3038 static const double radToDeg = 180.0 / Py_MATH_PI;
3039
3040 /*[clinic input]
3041 math.degrees
3042
3043     x: double
3044     /
3045
3046 Convert angle x from radians to degrees.
3047 [clinic start generated code]*/
3048
3049 static PyObject *
3050 math_degrees_impl(PyObject *module, double x)
3051 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3052 {
3053     return PyFloat_FromDouble(x * radToDeg);
3054 }
3055
3056
3057 /*[clinic input]
3058 math.radians
3059
3060     x: double
3061     /
3062
3063 Convert angle x from degrees to radians.
3064 [clinic start generated code]*/
3065
3066 static PyObject *
3067 math_radians_impl(PyObject *module, double x)
3068 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3069 {
3070     return PyFloat_FromDouble(x * degToRad);
3071 }
3072
3073
3074 /*[clinic input]
3075 math.isfinite
3076
3077     x: double
3078     /
3079
3080 Return True if x is neither an infinity nor a NaN, and False otherwise.
3081 [clinic start generated code]*/
3082
3083 static PyObject *
3084 math_isfinite_impl(PyObject *module, double x)
3085 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3086 {
3087     return PyBool_FromLong((long)Py_IS_FINITE(x));
3088 }
3089
3090
3091 /*[clinic input]
3092 math.isnan
3093
3094     x: double
3095     /
3096
3097 Return True if x is a NaN (not a number), and False otherwise.
3098 [clinic start generated code]*/
3099
3100 static PyObject *
3101 math_isnan_impl(PyObject *module, double x)
3102 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3103 {
3104     return PyBool_FromLong((long)Py_IS_NAN(x));
3105 }
3106
3107
3108 /*[clinic input]
3109 math.isinf
3110
3111     x: double
3112     /
3113
3114 Return True if x is a positive or negative infinity, and False otherwise.
3115 [clinic start generated code]*/
3116
3117 static PyObject *
3118 math_isinf_impl(PyObject *module, double x)
3119 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3120 {
3121     return PyBool_FromLong((long)Py_IS_INFINITY(x));
3122 }
3123
3124
3125 /*[clinic input]
3126 math.isclose -> bool
3127
3128     a: double
3129     b: double
3130     *
3131     rel_tol: double = 1e-09
3132         maximum difference for being considered "close", relative to the
3133         magnitude of the input values
3134     abs_tol: double = 0.0
3135         maximum difference for being considered "close", regardless of the
3136         magnitude of the input values
3137
3138 Determine whether two floating point numbers are close in value.
3139
3140 Return True if a is close in value to b, and False otherwise.
3141
3142 For the values to be considered close, the difference between them
3143 must be smaller than at least one of the tolerances.
3144
3145 -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
3146 is, NaN is not close to anything, even itself.  inf and -inf are
3147 only close to themselves.
3148 [clinic start generated code]*/
3149
3150 static int
3151 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3152                   double abs_tol)
3153 /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
3154 {
3155     double diff = 0.0;
3156
3157     /* sanity check on the inputs */
3158     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3159         PyErr_SetString(PyExc_ValueError,
3160                         "tolerances must be non-negative");
3161         return -1;
3162     }
3163
3164     if ( a == b ) {
3165         /* short circuit exact equality -- needed to catch two infinities of
3166            the same sign. And perhaps speeds things up a bit sometimes.
3167         */
3168         return 1;
3169     }
3170
3171     /* This catches the case of two infinities of opposite sign, or
3172        one infinity and one finite number. Two infinities of opposite
3173        sign would otherwise have an infinite relative tolerance.
3174        Two infinities of the same sign are caught by the equality check
3175        above.
3176     */
3177
3178     if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3179         return 0;
3180     }
3181
3182     /* now do the regular computation
3183        this is essentially the "weak" test from the Boost library
3184     */
3185
3186     diff = fabs(b - a);
3187
3188     return (((diff <= fabs(rel_tol * b)) ||
3189              (diff <= fabs(rel_tol * a))) ||
3190             (diff <= abs_tol));
3191 }
3192
3193 static inline int
3194 _check_long_mult_overflow(long a, long b) {
3195
3196     /* From Python2's int_mul code:
3197
3198     Integer overflow checking for * is painful:  Python tried a couple ways, but
3199     they didn't work on all platforms, or failed in endcases (a product of
3200     -sys.maxint-1 has been a particular pain).
3201
3202     Here's another way:
3203
3204     The native long product x*y is either exactly right or *way* off, being
3205     just the last n bits of the true product, where n is the number of bits
3206     in a long (the delivered product is the true product plus i*2**n for
3207     some integer i).
3208
3209     The native double product (double)x * (double)y is subject to three
3210     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
3211     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3212     But, unlike the native long product, it's not in *range* trouble:  even
3213     if sizeof(long)==32 (256-bit longs), the product easily fits in the
3214     dynamic range of a double.  So the leading 50 (or so) bits of the double
3215     product are correct.
3216
3217     We check these two ways against each other, and declare victory if they're
3218     approximately the same.  Else, because the native long product is the only
3219     one that can lose catastrophic amounts of information, it's the native long
3220     product that must have overflowed.
3221
3222     */
3223
3224     long longprod = (long)((unsigned long)a * b);
3225     double doubleprod = (double)a * (double)b;
3226     double doubled_longprod = (double)longprod;
3227
3228     if (doubled_longprod == doubleprod) {
3229         return 0;
3230     }
3231
3232     const double diff = doubled_longprod - doubleprod;
3233     const double absdiff = diff >= 0.0 ? diff : -diff;
3234     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3235
3236     if (32.0 * absdiff <= absprod) {
3237         return 0;
3238     }
3239
3240     return 1;
3241 }
3242
3243 /*[clinic input]
3244 math.prod
3245
3246     iterable: object
3247     /
3248     *
3249     start: object(c_default="NULL") = 1
3250
3251 Calculate the product of all the elements in the input iterable.
3252
3253 The default start value for the product is 1.
3254
3255 When the iterable is empty, return the start value.  This function is
3256 intended specifically for use with numeric values and may reject
3257 non-numeric types.
3258 [clinic start generated code]*/
3259
3260 static PyObject *
3261 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3262 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3263 {
3264     PyObject *result = start;
3265     PyObject *temp, *item, *iter;
3266
3267     iter = PyObject_GetIter(iterable);
3268     if (iter == NULL) {
3269         return NULL;
3270     }
3271
3272     if (result == NULL) {
3273         result = _PyLong_GetOne();
3274     }
3275     Py_INCREF(result);
3276 #ifndef SLOW_PROD
3277     /* Fast paths for integers keeping temporary products in C.
3278      * Assumes all inputs are the same type.
3279      * If the assumption fails, default to use PyObjects instead.
3280     */
3281     if (PyLong_CheckExact(result)) {
3282         int overflow;
3283         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3284         /* If this already overflowed, don't even enter the loop. */
3285         if (overflow == 0) {
3286             Py_SETREF(result, NULL);
3287         }
3288         /* Loop over all the items in the iterable until we finish, we overflow
3289          * or we found a non integer element */
3290         while (result == NULL) {
3291             item = PyIter_Next(iter);
3292             if (item == NULL) {
3293                 Py_DECREF(iter);
3294                 if (PyErr_Occurred()) {
3295                     return NULL;
3296                 }
3297                 return PyLong_FromLong(i_result);
3298             }
3299             if (PyLong_CheckExact(item)) {
3300                 long b = PyLong_AsLongAndOverflow(item, &overflow);
3301                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3302                     long x = i_result * b;
3303                     i_result = x;
3304                     Py_DECREF(item);
3305                     continue;
3306                 }
3307             }
3308             /* Either overflowed or is not an int.
3309              * Restore real objects and process normally */
3310             result = PyLong_FromLong(i_result);
3311             if (result == NULL) {
3312                 Py_DECREF(item);
3313                 Py_DECREF(iter);
3314                 return NULL;
3315             }
3316             temp = PyNumber_Multiply(result, item);
3317             Py_DECREF(result);
3318             Py_DECREF(item);
3319             result = temp;
3320             if (result == NULL) {
3321                 Py_DECREF(iter);
3322                 return NULL;
3323             }
3324         }
3325     }
3326
3327     /* Fast paths for floats keeping temporary products in C.
3328      * Assumes all inputs are the same type.
3329      * If the assumption fails, default to use PyObjects instead.
3330     */
3331     if (PyFloat_CheckExact(result)) {
3332         double f_result = PyFloat_AS_DOUBLE(result);
3333         Py_SETREF(result, NULL);
3334         while(result == NULL) {
3335             item = PyIter_Next(iter);
3336             if (item == NULL) {
3337                 Py_DECREF(iter);
3338                 if (PyErr_Occurred()) {
3339                     return NULL;
3340                 }
3341                 return PyFloat_FromDouble(f_result);
3342             }
3343             if (PyFloat_CheckExact(item)) {
3344                 f_result *= PyFloat_AS_DOUBLE(item);
3345                 Py_DECREF(item);
3346                 continue;
3347             }
3348             if (PyLong_CheckExact(item)) {
3349                 long value;
3350                 int overflow;
3351                 value = PyLong_AsLongAndOverflow(item, &overflow);
3352                 if (!overflow) {
3353                     f_result *= (double)value;
3354                     Py_DECREF(item);
3355                     continue;
3356                 }
3357             }
3358             result = PyFloat_FromDouble(f_result);
3359             if (result == NULL) {
3360                 Py_DECREF(item);
3361                 Py_DECREF(iter);
3362                 return NULL;
3363             }
3364             temp = PyNumber_Multiply(result, item);
3365             Py_DECREF(result);
3366             Py_DECREF(item);
3367             result = temp;
3368             if (result == NULL) {
3369                 Py_DECREF(iter);
3370                 return NULL;
3371             }
3372         }
3373     }
3374 #endif
3375     /* Consume rest of the iterable (if any) that could not be handled
3376      * by specialized functions above.*/
3377     for(;;) {
3378         item = PyIter_Next(iter);
3379         if (item == NULL) {
3380             /* error, or end-of-sequence */
3381             if (PyErr_Occurred()) {
3382                 Py_SETREF(result, NULL);
3383             }
3384             break;
3385         }
3386         temp = PyNumber_Multiply(result, item);
3387         Py_DECREF(result);
3388         Py_DECREF(item);
3389         result = temp;
3390         if (result == NULL)
3391             break;
3392     }
3393     Py_DECREF(iter);
3394     return result;
3395 }
3396
3397
3398 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3399
3400 Python code to generate the values:
3401
3402     import math
3403
3404     for n in range(128):
3405         fac = math.factorial(n)
3406         fac_odd_part = fac // (fac & -fac)
3407         reduced_fac_odd_part = fac_odd_part % (2**64)
3408         print(f"{reduced_fac_odd_part:#018x}u")
3409 */
3410 static const uint64_t reduced_factorial_odd_part[] = {
3411     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3412     0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3413     0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3414     0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3415     0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3416     0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3417     0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3418     0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3419     0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3420     0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3421     0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3422     0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3423     0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3424     0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3425     0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3426     0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3427     0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3428     0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3429     0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3430     0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3431     0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3432     0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3433     0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3434     0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3435     0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3436     0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3437     0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3438     0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3439     0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3440     0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3441     0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3442     0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3443 };
3444
3445 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3446
3447 Python code to generate the values:
3448
3449     import math
3450
3451     for n in range(128):
3452         fac = math.factorial(n)
3453         fac_odd_part = fac // (fac & -fac)
3454         inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3455         print(f"{inverted_fac_odd_part:#018x}u")
3456 */
3457 static const uint64_t inverted_factorial_odd_part[] = {
3458     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3459     0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3460     0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3461     0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3462     0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3463     0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3464     0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3465     0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3466     0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3467     0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3468     0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3469     0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3470     0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3471     0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3472     0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3473     0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3474     0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3475     0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3476     0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3477     0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3478     0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3479     0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3480     0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3481     0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3482     0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3483     0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3484     0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3485     0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3486     0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3487     0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3488     0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3489     0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3490 };
3491
3492 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3493
3494 Python code to generate the values:
3495
3496 import math
3497
3498 for n in range(128):
3499     fac = math.factorial(n)
3500     fac_trailing_zeros = (fac & -fac).bit_length() - 1
3501     print(fac_trailing_zeros)
3502 */
3503
3504 static const uint8_t factorial_trailing_zeros[] = {
3505      0,  0,  1,  1,  3,  3,  4,  4,  7,  7,  8,  8, 10, 10, 11, 11,  //  0-15
3506     15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26,  // 16-31
3507     31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42,  // 32-47
3508     46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57,  // 48-63
3509     63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74,  // 64-79
3510     78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89,  // 80-95
3511     94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105,  // 96-111
3512     109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120,  // 112-127
3513 };
3514
3515 /* Number of permutations and combinations.
3516  * P(n, k) = n! / (n-k)!
3517  * C(n, k) = P(n, k) / k!
3518  */
3519
3520 /* Calculate C(n, k) for n in the 63-bit range. */
3521 static PyObject *
3522 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3523 {
3524     if (k == 0) {
3525         return PyLong_FromLong(1);
3526     }
3527
3528     /* For small enough n and k the result fits in the 64-bit range and can
3529      * be calculated without allocating intermediate PyLong objects. */
3530     if (iscomb) {
3531         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3532          * fits into a uint64_t.  Exclude k = 1, because the second fast
3533          * path is faster for this case.*/
3534         static const unsigned char fast_comb_limits1[] = {
3535             0, 0, 127, 127, 127, 127, 127, 127,  // 0-7
3536             127, 127, 127, 127, 127, 127, 127, 127,  // 8-15
3537             116, 105, 97, 91, 86, 82, 78, 76,  // 16-23
3538             74, 72, 71, 70, 69, 68, 68, 67,  // 24-31
3539             67, 67, 67,  // 32-34
3540         };
3541         if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3542             /*
3543                 comb(n, k) fits into a uint64_t. We compute it as
3544
3545                     comb_odd_part << shift
3546
3547                 where 2**shift is the largest power of two dividing comb(n, k)
3548                 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3549                 calculated efficiently via arithmetic modulo 2**64, using three
3550                 lookups and two uint64_t multiplications.
3551             */
3552             uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3553                                    * inverted_factorial_odd_part[k]
3554                                    * inverted_factorial_odd_part[n - k];
3555             int shift = factorial_trailing_zeros[n]
3556                       - factorial_trailing_zeros[k]
3557                       - factorial_trailing_zeros[n - k];
3558             return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3559         }
3560
3561         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3562          * fits into a long long (which is at least 64 bit).  Only contains
3563          * items larger than in fast_comb_limits1. */
3564         static const unsigned long long fast_comb_limits2[] = {
3565             0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449,  // 0-7
3566             746, 453, 308, 227, 178, 147,  // 8-13
3567         };
3568         if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3569             /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3570             unsigned long long result = n;
3571             for (unsigned long long i = 1; i < k;) {
3572                 result *= --n;
3573                 result /= ++i;
3574             }
3575             return PyLong_FromUnsignedLongLong(result);
3576         }
3577     }
3578     else {
3579         /* Maps k to the maximal n so that k <= n and P(n, k)
3580          * fits into a long long (which is at least 64 bit). */
3581         static const unsigned long long fast_perm_limits[] = {
3582             0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568,  // 0-7
3583             259, 142, 88, 61, 45, 36, 30, 26,  // 8-15
3584             24, 22, 21, 20, 20,  // 16-20
3585         };
3586         if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3587             if (n <= 127) {
3588                 /* P(n, k) fits into a uint64_t. */
3589                 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3590                                        * inverted_factorial_odd_part[n - k];
3591                 int shift = factorial_trailing_zeros[n]
3592                           - factorial_trailing_zeros[n - k];
3593                 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3594             }
3595
3596             /* P(n, k) = P(n, k-1) * (n-k+1) */
3597             unsigned long long result = n;
3598             for (unsigned long long i = 1; i < k;) {
3599                 result *= --n;
3600                 ++i;
3601             }
3602             return PyLong_FromUnsignedLongLong(result);
3603         }
3604     }
3605
3606     /* For larger n use recursive formulas:
3607      *
3608      *   P(n, k) = P(n, j) * P(n-j, k-j)
3609      *   C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3610      */
3611     unsigned long long j = k / 2;
3612     PyObject *a, *b;
3613     a = perm_comb_small(n, j, iscomb);
3614     if (a == NULL) {
3615         return NULL;
3616     }
3617     b = perm_comb_small(n - j, k - j, iscomb);
3618     if (b == NULL) {
3619         goto error;
3620     }
3621     Py_SETREF(a, PyNumber_Multiply(a, b));
3622     Py_DECREF(b);
3623     if (iscomb && a != NULL) {
3624         b = perm_comb_small(k, j, 1);
3625         if (b == NULL) {
3626             goto error;
3627         }
3628         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3629         Py_DECREF(b);
3630     }
3631     return a;
3632
3633 error:
3634     Py_DECREF(a);
3635     return NULL;
3636 }
3637
3638 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3639  * It is more efficient than sequential multiplication thanks to
3640  * Karatsuba multiplication.
3641  */
3642 static PyObject *
3643 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3644 {
3645     if (k == 0) {
3646         return PyLong_FromLong(1);
3647     }
3648     if (k == 1) {
3649         return Py_NewRef(n);
3650     }
3651
3652     /* P(n, k) = P(n, j) * P(n-j, k-j) */
3653     /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3654     unsigned long long j = k / 2;
3655     PyObject *a, *b;
3656     a = perm_comb(n, j, iscomb);
3657     if (a == NULL) {
3658         return NULL;
3659     }
3660     PyObject *t = PyLong_FromUnsignedLongLong(j);
3661     if (t == NULL) {
3662         goto error;
3663     }
3664     n = PyNumber_Subtract(n, t);
3665     Py_DECREF(t);
3666     if (n == NULL) {
3667         goto error;
3668     }
3669     b = perm_comb(n, k - j, iscomb);
3670     Py_DECREF(n);
3671     if (b == NULL) {
3672         goto error;
3673     }
3674     Py_SETREF(a, PyNumber_Multiply(a, b));
3675     Py_DECREF(b);
3676     if (iscomb && a != NULL) {
3677         b = perm_comb_small(k, j, 1);
3678         if (b == NULL) {
3679             goto error;
3680         }
3681         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3682         Py_DECREF(b);
3683     }
3684     return a;
3685
3686 error:
3687     Py_DECREF(a);
3688     return NULL;
3689 }
3690
3691 /*[clinic input]
3692 math.perm
3693
3694     n: object
3695     k: object = None
3696     /
3697
3698 Number of ways to choose k items from n items without repetition and with order.
3699
3700 Evaluates to n! / (n - k)! when k <= n and evaluates
3701 to zero when k > n.
3702
3703 If k is not specified or is None, then k defaults to n
3704 and the function returns n!.
3705
3706 Raises TypeError if either of the arguments are not integers.
3707 Raises ValueError if either of the arguments are negative.
3708 [clinic start generated code]*/
3709
3710 static PyObject *
3711 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3712 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3713 {
3714     PyObject *result = NULL;
3715     int overflow, cmp;
3716     long long ki, ni;
3717
3718     if (k == Py_None) {
3719         return math_factorial(module, n);
3720     }
3721     n = PyNumber_Index(n);
3722     if (n == NULL) {
3723         return NULL;
3724     }
3725     k = PyNumber_Index(k);
3726     if (k == NULL) {
3727         Py_DECREF(n);
3728         return NULL;
3729     }
3730     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3731
3732     if (Py_SIZE(n) < 0) {
3733         PyErr_SetString(PyExc_ValueError,
3734                         "n must be a non-negative integer");
3735         goto error;
3736     }
3737     if (Py_SIZE(k) < 0) {
3738         PyErr_SetString(PyExc_ValueError,
3739                         "k must be a non-negative integer");
3740         goto error;
3741     }
3742
3743     cmp = PyObject_RichCompareBool(n, k, Py_LT);
3744     if (cmp != 0) {
3745         if (cmp > 0) {
3746             result = PyLong_FromLong(0);
3747             goto done;
3748         }
3749         goto error;
3750     }
3751
3752     ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3753     assert(overflow >= 0 && !PyErr_Occurred());
3754     if (overflow > 0) {
3755         PyErr_Format(PyExc_OverflowError,
3756                      "k must not exceed %lld",
3757                      LLONG_MAX);
3758         goto error;
3759     }
3760     assert(ki >= 0);
3761
3762     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3763     assert(overflow >= 0 && !PyErr_Occurred());
3764     if (!overflow && ki > 1) {
3765         assert(ni >= 0);
3766         result = perm_comb_small((unsigned long long)ni,
3767                                  (unsigned long long)ki, 0);
3768     }
3769     else {
3770         result = perm_comb(n, (unsigned long long)ki, 0);
3771     }
3772
3773 done:
3774     Py_DECREF(n);
3775     Py_DECREF(k);
3776     return result;
3777
3778 error:
3779     Py_DECREF(n);
3780     Py_DECREF(k);
3781     return NULL;
3782 }
3783
3784 /*[clinic input]
3785 math.comb
3786
3787     n: object
3788     k: object
3789     /
3790
3791 Number of ways to choose k items from n items without repetition and without order.
3792
3793 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3794 to zero when k > n.
3795
3796 Also called the binomial coefficient because it is equivalent
3797 to the coefficient of k-th term in polynomial expansion of the
3798 expression (1 + x)**n.
3799
3800 Raises TypeError if either of the arguments are not integers.
3801 Raises ValueError if either of the arguments are negative.
3802
3803 [clinic start generated code]*/
3804
3805 static PyObject *
3806 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3807 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3808 {
3809     PyObject *result = NULL, *temp;
3810     int overflow, cmp;
3811     long long ki, ni;
3812
3813     n = PyNumber_Index(n);
3814     if (n == NULL) {
3815         return NULL;
3816     }
3817     k = PyNumber_Index(k);
3818     if (k == NULL) {
3819         Py_DECREF(n);
3820         return NULL;
3821     }
3822     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3823
3824     if (Py_SIZE(n) < 0) {
3825         PyErr_SetString(PyExc_ValueError,
3826                         "n must be a non-negative integer");
3827         goto error;
3828     }
3829     if (Py_SIZE(k) < 0) {
3830         PyErr_SetString(PyExc_ValueError,
3831                         "k must be a non-negative integer");
3832         goto error;
3833     }
3834
3835     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3836     assert(overflow >= 0 && !PyErr_Occurred());
3837     if (!overflow) {
3838         assert(ni >= 0);
3839         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3840         assert(overflow >= 0 && !PyErr_Occurred());
3841         if (overflow || ki > ni) {
3842             result = PyLong_FromLong(0);
3843             goto done;
3844         }
3845         assert(ki >= 0);
3846
3847         ki = Py_MIN(ki, ni - ki);
3848         if (ki > 1) {
3849             result = perm_comb_small((unsigned long long)ni,
3850                                      (unsigned long long)ki, 1);
3851             goto done;
3852         }
3853         /* For k == 1 just return the original n in perm_comb(). */
3854     }
3855     else {
3856         /* k = min(k, n - k) */
3857         temp = PyNumber_Subtract(n, k);
3858         if (temp == NULL) {
3859             goto error;
3860         }
3861         if (Py_SIZE(temp) < 0) {
3862             Py_DECREF(temp);
3863             result = PyLong_FromLong(0);
3864             goto done;
3865         }
3866         cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3867         if (cmp > 0) {
3868             Py_SETREF(k, temp);
3869         }
3870         else {
3871             Py_DECREF(temp);
3872             if (cmp < 0) {
3873                 goto error;
3874             }
3875         }
3876
3877         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3878         assert(overflow >= 0 && !PyErr_Occurred());
3879         if (overflow) {
3880             PyErr_Format(PyExc_OverflowError,
3881                          "min(n - k, k) must not exceed %lld",
3882                          LLONG_MAX);
3883             goto error;
3884         }
3885         assert(ki >= 0);
3886     }
3887
3888     result = perm_comb(n, (unsigned long long)ki, 1);
3889
3890 done:
3891     Py_DECREF(n);
3892     Py_DECREF(k);
3893     return result;
3894
3895 error:
3896     Py_DECREF(n);
3897     Py_DECREF(k);
3898     return NULL;
3899 }
3900
3901
3902 /*[clinic input]
3903 math.nextafter
3904
3905     x: double
3906     y: double
3907     /
3908
3909 Return the next floating-point value after x towards y.
3910 [clinic start generated code]*/
3911
3912 static PyObject *
3913 math_nextafter_impl(PyObject *module, double x, double y)
3914 /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3915 {
3916 #if defined(_AIX)
3917     if (x == y) {
3918         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3919            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3920         return PyFloat_FromDouble(y);
3921     }
3922     if (Py_IS_NAN(x)) {
3923         return PyFloat_FromDouble(x);
3924     }
3925     if (Py_IS_NAN(y)) {
3926         return PyFloat_FromDouble(y);
3927     }
3928 #endif
3929     return PyFloat_FromDouble(nextafter(x, y));
3930 }
3931
3932
3933 /*[clinic input]
3934 math.ulp -> double
3935
3936     x: double
3937     /
3938
3939 Return the value of the least significant bit of the float x.
3940 [clinic start generated code]*/
3941
3942 static double
3943 math_ulp_impl(PyObject *module, double x)
3944 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3945 {
3946     if (Py_IS_NAN(x)) {
3947         return x;
3948     }
3949     x = fabs(x);
3950     if (Py_IS_INFINITY(x)) {
3951         return x;
3952     }
3953     double inf = m_inf();
3954     double x2 = nextafter(x, inf);
3955     if (Py_IS_INFINITY(x2)) {
3956         /* special case: x is the largest positive representable float */
3957         x2 = nextafter(x, -inf);
3958         return x - x2;
3959     }
3960     return x2 - x;
3961 }
3962
3963 static int
3964 math_exec(PyObject *module)
3965 {
3966
3967     math_module_state *state = get_math_module_state(module);
3968     state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
3969     if (state->str___ceil__ == NULL) {
3970         return -1;
3971     }
3972     state->str___floor__ = PyUnicode_InternFromString("__floor__");
3973     if (state->str___floor__ == NULL) {
3974         return -1;
3975     }
3976     state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
3977     if (state->str___trunc__ == NULL) {
3978         return -1;
3979     }
3980     if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3981         return -1;
3982     }
3983     if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3984         return -1;
3985     }
3986     // 2pi
3987     if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3988         return -1;
3989     }
3990     if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3991         return -1;
3992     }
3993 #if _PY_SHORT_FLOAT_REPR == 1
3994     if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3995         return -1;
3996     }
3997 #endif
3998     return 0;
3999 }
4000
4001 static int
4002 math_clear(PyObject *module)
4003 {
4004     math_module_state *state = get_math_module_state(module);
4005     Py_CLEAR(state->str___ceil__);
4006     Py_CLEAR(state->str___floor__);
4007     Py_CLEAR(state->str___trunc__);
4008     return 0;
4009 }
4010
4011 static void
4012 math_free(void *module)
4013 {
4014     math_clear((PyObject *)module);
4015 }
4016
4017 static PyMethodDef math_methods[] = {
4018     {"acos",            math_acos,      METH_O,         math_acos_doc},
4019     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
4020     {"asin",            math_asin,      METH_O,         math_asin_doc},
4021     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
4022     {"atan",            math_atan,      METH_O,         math_atan_doc},
4023     {"atan2",           _PyCFunction_CAST(math_atan2),     METH_FASTCALL,  math_atan2_doc},
4024     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
4025     {"cbrt",            math_cbrt,      METH_O,         math_cbrt_doc},
4026     MATH_CEIL_METHODDEF
4027     {"copysign",        _PyCFunction_CAST(math_copysign),  METH_FASTCALL,  math_copysign_doc},
4028     {"cos",             math_cos,       METH_O,         math_cos_doc},
4029     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
4030     MATH_DEGREES_METHODDEF
4031     MATH_DIST_METHODDEF
4032     {"erf",             math_erf,       METH_O,         math_erf_doc},
4033     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
4034     {"exp",             math_exp,       METH_O,         math_exp_doc},
4035     {"exp2",            math_exp2,      METH_O,         math_exp2_doc},
4036     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
4037     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
4038     MATH_FACTORIAL_METHODDEF
4039     MATH_FLOOR_METHODDEF
4040     MATH_FMOD_METHODDEF
4041     MATH_FREXP_METHODDEF
4042     MATH_FSUM_METHODDEF
4043     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
4044     {"gcd",             _PyCFunction_CAST(math_gcd),       METH_FASTCALL,  math_gcd_doc},
4045     {"hypot",           _PyCFunction_CAST(math_hypot),     METH_FASTCALL,  math_hypot_doc},
4046     MATH_ISCLOSE_METHODDEF
4047     MATH_ISFINITE_METHODDEF
4048     MATH_ISINF_METHODDEF
4049     MATH_ISNAN_METHODDEF
4050     MATH_ISQRT_METHODDEF
4051     {"lcm",             _PyCFunction_CAST(math_lcm),       METH_FASTCALL,  math_lcm_doc},
4052     MATH_LDEXP_METHODDEF
4053     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
4054     MATH_LOG_METHODDEF
4055     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
4056     MATH_LOG10_METHODDEF
4057     MATH_LOG2_METHODDEF
4058     MATH_MODF_METHODDEF
4059     MATH_POW_METHODDEF
4060     MATH_RADIANS_METHODDEF
4061     {"remainder",       _PyCFunction_CAST(math_remainder), METH_FASTCALL,  math_remainder_doc},
4062     {"sin",             math_sin,       METH_O,         math_sin_doc},
4063     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
4064     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
4065     {"tan",             math_tan,       METH_O,         math_tan_doc},
4066     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
4067     MATH_SUMPROD_METHODDEF
4068     MATH_TRUNC_METHODDEF
4069     MATH_PROD_METHODDEF
4070     MATH_PERM_METHODDEF
4071     MATH_COMB_METHODDEF
4072     MATH_NEXTAFTER_METHODDEF
4073     MATH_ULP_METHODDEF
4074     {NULL,              NULL}           /* sentinel */
4075 };
4076
4077 static PyModuleDef_Slot math_slots[] = {
4078     {Py_mod_exec, math_exec},
4079     {0, NULL}
4080 };
4081
4082 PyDoc_STRVAR(module_doc,
4083 "This module provides access to the mathematical functions\n"
4084 "defined by the C standard.");
4085
4086 static struct PyModuleDef mathmodule = {
4087     PyModuleDef_HEAD_INIT,
4088     .m_name = "math",
4089     .m_doc = module_doc,
4090     .m_size = sizeof(math_module_state),
4091     .m_methods = math_methods,
4092     .m_slots = math_slots,
4093     .m_clear = math_clear,
4094     .m_free = math_free,
4095 };
4096
4097 PyMODINIT_FUNC
4098 PyInit_math(void)
4099 {
4100     return PyModuleDef_Init(&mathmodule);
4101 }