]> git.ipfire.org Git - thirdparty/Python/cpython.git/blame - Modules/mathmodule.c
gh-121914: Change the names of the symbol tables for lambda and genexpr (GH-135288)
[thirdparty/Python/cpython.git] / Modules / mathmodule.c
CommitLineData
85a5fbbd
GR
1/* Math module -- standard C math library functions, pi and e */
2
53876d9c
CH
3/* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9These are the "spirit of 754" rules:
10
111. If the mathematical result is a real number, but of magnitude too
12large to approximate by a machine float, overflow is signaled and the
13result is an infinity (with the appropriate sign).
14
152. If the mathematical result is a real number, but of magnitude too
16small to approximate by a machine float, underflow is signaled and the
17result is a zero (with the appropriate sign).
18
193. At a singularity (a value x such that the limit of f(y) as y
20approaches x exists and is an infinity), "divide by zero" is signaled
21and the result is an infinity (with the appropriate sign). This is
22complicated a little by that the left-side and right-side limits may
23not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24from the positive or negative directions. In that specific case, the
25sign of the zero determines the result of 1/0.
26
274. At a point where a function has no defined result in the extended
28reals (i.e., the reals plus an infinity or two), invalid operation is
29signaled and a NaN is returned.
30
31And these are what Python has historically /tried/ to do (but not
32always successfully, as platform libm behavior varies a lot):
33
34For #1, raise OverflowError.
35
36For #2, return a zero (with the appropriate sign if that happens by
37accident ;-)).
38
39For #3 and #4, raise ValueError. It may have made sense to raise
40Python's ZeroDivisionError in #3, but historically that's only been
41raised for division by zero and mod by zero.
42
43*/
44
45/*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
03e9f5dc
CH
55#ifndef Py_BUILD_CORE_BUILTIN
56# define Py_BUILD_CORE_MODULE 1
57#endif
58
8b43b19e 59#include "Python.h"
8ba47146 60#include "pycore_abstract.h" // _PyNumber_Index()
794e7d1a 61#include "pycore_bitutils.h" // _Py_bit_length()
d943d191 62#include "pycore_call.h" // _PyObject_CallNoArgs()
37834136 63#include "pycore_long.h" // _PyLong_GetZero()
23c9febd
DN
64#include "pycore_moduleobject.h" // _PyModule_GetState()
65#include "pycore_object.h" // _PyObject_LookupSpecial()
9bbdde21 66#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
fa26245a
CH
67/* For DBL_EPSILON in _math.h */
68#include <float.h>
69/* For _Py_log1p with workarounds for buggy handling of zeros. */
664b511c 70#include "_math.h"
47b9f83a 71#include <stdbool.h>
85a5fbbd 72
c9ea9335
SS
73#include "clinic/mathmodule.c.h"
74
75/*[clinic input]
76module math
77[clinic start generated code]*/
78/*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79
80
23c9febd 81
0a22aa05
RH
82/*
83Double and triple length extended precision algorithms from:
84
85 Accurate Sum and Dot Product
86 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
87 https://doi.org/10.1137/030601818
88 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
89
90*/
91
92typedef struct{ double hi; double lo; } DoubleLength;
93
94static DoubleLength
95dl_fast_sum(double a, double b)
96{
1a0c7b9b 97 /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
0a22aa05
RH
98 assert(fabs(a) >= fabs(b));
99 double x = a + b;
100 double y = (a - x) + b;
101 return (DoubleLength) {x, y};
102}
103
104static DoubleLength
105dl_sum(double a, double b)
106{
107 /* Algorithm 3.1 Error-free transformation of the sum */
108 double x = a + b;
109 double z = x - a;
110 double y = (a - (x - z)) + (b - z);
111 return (DoubleLength) {x, y};
112}
113
114#ifndef UNRELIABLE_FMA
115
116static DoubleLength
117dl_mul(double x, double y)
118{
119 /* Algorithm 3.5. Error-free transformation of a product */
120 double z = x * y;
121 double zz = fma(x, y, -z);
122 return (DoubleLength) {z, zz};
123}
124
125#else
126
127/*
128 The default implementation of dl_mul() depends on the C math library
129 having an accurate fma() function as required by § 7.12.13.1 of the
130 C99 standard.
131
132 The UNRELIABLE_FMA option is provided as a slower but accurate
133 alternative for builds where the fma() function is found wanting.
134 The speed penalty may be modest (17% slower on an Apple M1 Max),
135 so don't hesitate to enable this build option.
136
137 The algorithms are from the T. J. Dekker paper:
138 A Floating-Point Technique for Extending the Available Precision
139 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
140*/
141
142static DoubleLength
143dl_split(double x) {
144 // Dekker (5.5) and (5.6).
145 double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
146 double hi = t - (t - x);
147 double lo = x - hi;
148 return (DoubleLength) {hi, lo};
149}
150
151static DoubleLength
152dl_mul(double x, double y)
153{
154 // Dekker (5.12) and mul12()
155 DoubleLength xx = dl_split(x);
156 DoubleLength yy = dl_split(y);
157 double p = xx.hi * yy.hi;
158 double q = xx.hi * yy.lo + xx.lo * yy.hi;
159 double z = p + q;
160 double zz = p - z + q + xx.lo * yy.lo;
161 return (DoubleLength) {z, zz};
162}
163
164#endif
165
166typedef struct { double hi; double lo; double tiny; } TripleLength;
167
168static const TripleLength tl_zero = {0.0, 0.0, 0.0};
169
170static TripleLength
171tl_fma(double x, double y, TripleLength total)
172{
173 /* Algorithm 5.10 with SumKVert for K=3 */
174 DoubleLength pr = dl_mul(x, y);
175 DoubleLength sm = dl_sum(total.hi, pr.hi);
176 DoubleLength r1 = dl_sum(total.lo, pr.lo);
177 DoubleLength r2 = dl_sum(r1.hi, sm.lo);
178 return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
179}
180
181static double
182tl_to_d(TripleLength total)
183{
184 DoubleLength last = dl_sum(total.lo, total.hi);
185 return total.tiny + last.lo + last.hi;
186}
187
188
12c4bdb0
MD
189/*
190 sin(pi*x), giving accurate results for all finite x (especially x
191 integral or close to an integer). This is here for use in the
192 reflection formula for the gamma function. It conforms to IEEE
193 754-2008 for finite arguments, but not for infinities or nans.
194*/
195
196static const double pi = 3.141592653589793238462643383279502884197;
9c91eb84 197static const double logpi = 1.144729885849400174143427351353058711647;
cfd735ea
RH
198
199/* Version of PyFloat_AsDouble() with in-line fast paths
200 for exact floats and integers. Gives a substantial
201 speed improvement for extracting float arguments.
202*/
203
204#define ASSIGN_DOUBLE(target_var, obj, error_label) \
205 if (PyFloat_CheckExact(obj)) { \
206 target_var = PyFloat_AS_DOUBLE(obj); \
207 } \
208 else if (PyLong_CheckExact(obj)) { \
209 target_var = PyLong_AsDouble(obj); \
210 if (target_var == -1.0 && PyErr_Occurred()) { \
211 goto error_label; \
212 } \
213 } \
214 else { \
215 target_var = PyFloat_AsDouble(obj); \
216 if (target_var == -1.0 && PyErr_Occurred()) { \
217 goto error_label; \
218 } \
219 }
220
12c4bdb0 221static double
f57cd828 222m_sinpi(double x)
8832b621 223{
f95a1b3c
AP
224 double y, r;
225 int n;
226 /* this function should only ever be called for finite arguments */
cd11ff12 227 assert(isfinite(x));
f95a1b3c
AP
228 y = fmod(fabs(x), 2.0);
229 n = (int)round(2.0*y);
230 assert(0 <= n && n <= 4);
231 switch (n) {
232 case 0:
233 r = sin(pi*y);
234 break;
235 case 1:
236 r = cos(pi*(y-0.5));
237 break;
238 case 2:
239 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
240 -0.0 instead of 0.0 when y == 1.0. */
241 r = sin(pi*(1.0-y));
242 break;
243 case 3:
244 r = -cos(pi*(y-1.5));
245 break;
246 case 4:
247 r = sin(pi*(y-2.0));
248 break;
249 default:
b2e57948 250 Py_UNREACHABLE();
f95a1b3c
AP
251 }
252 return copysign(1.0, x)*r;
12c4bdb0 253}
a40c793d 254
58395759
SK
255/* Implementation of the real gamma function. Kept here to work around
256 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
257 on various platforms (Windows, MacOS). In extensive but non-exhaustive
12c4bdb0
MD
258 random tests, this function proved accurate to within <= 10 ulps across the
259 entire float domain. Note that accuracy may depend on the quality of the
260 system math functions, the pow function in particular. Special cases
261 follow C99 annex F. The parameters and method are tailored to platforms
262 whose double format is the IEEE 754 binary64 format.
263
264 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
265 and g=6.024680040776729583740234375; these parameters are amongst those
266 used by the Boost library. Following Boost (again), we re-express the
267 Lanczos sum as a rational function, and compute it that way. The
268 coefficients below were computed independently using MPFR, and have been
269 double-checked against the coefficients in the Boost source code.
270
271 For x < 0.0 we use the reflection formula.
272
273 There's one minor tweak that deserves explanation: Lanczos' formula for
274 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
275 values, x+g-0.5 can be represented exactly. However, in cases where it
276 can't be represented exactly the small error in x+g-0.5 can be magnified
277 significantly by the pow and exp calls, especially for large x. A cheap
278 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
279 involved in the computation of x+g-0.5 (that is, e = computed value of
280 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
281
282 Correction factor
283 -----------------
284 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
285 double, and e is tiny. Then:
286
287 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
288 = pow(y, x-0.5)/exp(y) * C,
289
290 where the correction_factor C is given by
291
292 C = pow(1-e/y, x-0.5) * exp(e)
293
294 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
295
296 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
297
298 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
299
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
301
302 Note that for accuracy, when computing r*C it's better to do
303
304 r + e*g/y*r;
305
306 than
307
308 r * (1 + e*g/y);
309
310 since the addition in the latter throws away most of the bits of
311 information in e*g/y.
312*/
313
314#define LANCZOS_N 13
315static const double lanczos_g = 6.024680040776729583740234375;
316static const double lanczos_g_minus_half = 5.524680040776729583740234375;
317static const double lanczos_num_coeffs[LANCZOS_N] = {
f95a1b3c
AP
318 23531376880.410759688572007674451636754734846804940,
319 42919803642.649098768957899047001988850926355848959,
320 35711959237.355668049440185451547166705960488635843,
321 17921034426.037209699919755754458931112671403265390,
322 6039542586.3520280050642916443072979210699388420708,
323 1439720407.3117216736632230727949123939715485786772,
324 248874557.86205415651146038641322942321632125127801,
325 31426415.585400194380614231628318205362874684987640,
326 2876370.6289353724412254090516208496135991145378768,
327 186056.26539522349504029498971604569928220784236328,
328 8071.6720023658162106380029022722506138218516325024,
329 210.82427775157934587250973392071336271166969580291,
330 2.5066282746310002701649081771338373386264310793408
12c4bdb0
MD
331};
332
333/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
334static const double lanczos_den_coeffs[LANCZOS_N] = {
f95a1b3c
AP
335 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
336 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
12c4bdb0
MD
337
338/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
339#define NGAMMA_INTEGRAL 23
340static const double gamma_integral[NGAMMA_INTEGRAL] = {
f95a1b3c
AP
341 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
342 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
343 1307674368000.0, 20922789888000.0, 355687428096000.0,
344 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
345 51090942171709440000.0, 1124000727777607680000.0,
12c4bdb0
MD
346};
347
348/* Lanczos' sum L_g(x), for positive x */
349
350static double
351lanczos_sum(double x)
352{
f95a1b3c
AP
353 double num = 0.0, den = 0.0;
354 int i;
355 assert(x > 0.0);
356 /* evaluate the rational function lanczos_sum(x). For large
357 x, the obvious algorithm risks overflow, so we instead
358 rescale the denominator and numerator of the rational
359 function by x**(1-LANCZOS_N) and treat this as a
360 rational function in 1/x. This also reduces the error for
361 larger x values. The choice of cutoff point (5.0 below) is
362 somewhat arbitrary; in tests, smaller cutoff values than
363 this resulted in lower accuracy. */
364 if (x < 5.0) {
365 for (i = LANCZOS_N; --i >= 0; ) {
366 num = num * x + lanczos_num_coeffs[i];
367 den = den * x + lanczos_den_coeffs[i];
368 }
369 }
370 else {
371 for (i = 0; i < LANCZOS_N; i++) {
372 num = num / x + lanczos_num_coeffs[i];
373 den = den / x + lanczos_den_coeffs[i];
374 }
375 }
376 return num/den;
12c4bdb0
MD
377}
378
a5d0c7c2 379
12c4bdb0
MD
380static double
381m_tgamma(double x)
382{
f95a1b3c
AP
383 double absx, r, y, z, sqrtpow;
384
385 /* special cases */
cd11ff12
SK
386 if (!isfinite(x)) {
387 if (isnan(x) || x > 0.0)
f95a1b3c
AP
388 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
389 else {
390 errno = EDOM;
391 return Py_NAN; /* tgamma(-inf) = nan, invalid */
392 }
393 }
394 if (x == 0.0) {
395 errno = EDOM;
50203a69 396 /* tgamma(+-0.0) = +-inf, divide-by-zero */
7a3b0350 397 return copysign(Py_INFINITY, x);
f95a1b3c
AP
398 }
399
400 /* integer arguments */
401 if (x == floor(x)) {
402 if (x < 0.0) {
403 errno = EDOM; /* tgamma(n) = nan, invalid for */
404 return Py_NAN; /* negative integers n */
405 }
406 if (x <= NGAMMA_INTEGRAL)
407 return gamma_integral[(int)x - 1];
408 }
409 absx = fabs(x);
410
411 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
412 if (absx < 1e-20) {
413 r = 1.0/x;
cd11ff12 414 if (isinf(r))
f95a1b3c
AP
415 errno = ERANGE;
416 return r;
417 }
418
419 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
420 x > 200, and underflows to +-0.0 for x < -200, not a negative
421 integer. */
422 if (absx > 200.0) {
423 if (x < 0.0) {
f57cd828 424 return 0.0/m_sinpi(x);
f95a1b3c
AP
425 }
426 else {
427 errno = ERANGE;
8477951a 428 return Py_INFINITY;
f95a1b3c
AP
429 }
430 }
431
432 y = absx + lanczos_g_minus_half;
433 /* compute error in sum */
434 if (absx > lanczos_g_minus_half) {
435 /* note: the correction can be foiled by an optimizing
436 compiler that (incorrectly) thinks that an expression like
437 a + b - a - b can be optimized to 0.0. This shouldn't
438 happen in a standards-conforming compiler. */
439 double q = y - absx;
440 z = q - lanczos_g_minus_half;
441 }
442 else {
443 double q = y - lanczos_g_minus_half;
444 z = q - absx;
445 }
446 z = z * lanczos_g / y;
447 if (x < 0.0) {
f57cd828 448 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
f95a1b3c
AP
449 r -= z * r;
450 if (absx < 140.0) {
451 r /= pow(y, absx - 0.5);
452 }
453 else {
454 sqrtpow = pow(y, absx / 2.0 - 0.25);
455 r /= sqrtpow;
456 r /= sqrtpow;
457 }
458 }
459 else {
460 r = lanczos_sum(absx) / exp(y);
461 r += z * r;
462 if (absx < 140.0) {
463 r *= pow(y, absx - 0.5);
464 }
465 else {
466 sqrtpow = pow(y, absx / 2.0 - 0.25);
467 r *= sqrtpow;
468 r *= sqrtpow;
469 }
470 }
cd11ff12 471 if (isinf(r))
f95a1b3c
AP
472 errno = ERANGE;
473 return r;
8832b621
GR
474}
475
05d2e084
MD
476/*
477 lgamma: natural log of the absolute value of the Gamma function.
478 For large arguments, Lanczos' formula works extremely well here.
479*/
480
481static double
482m_lgamma(double x)
483{
97553fdf 484 double r;
97553fdf 485 double absx;
f95a1b3c
AP
486
487 /* special cases */
cd11ff12
SK
488 if (!isfinite(x)) {
489 if (isnan(x))
f95a1b3c
AP
490 return x; /* lgamma(nan) = nan */
491 else
8477951a 492 return Py_INFINITY; /* lgamma(+-inf) = +inf */
f95a1b3c
AP
493 }
494
495 /* integer arguments */
496 if (x == floor(x) && x <= 2.0) {
497 if (x <= 0.0) {
498 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
8477951a 499 return Py_INFINITY; /* integers n <= 0 */
f95a1b3c
AP
500 }
501 else {
502 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
503 }
504 }
505
506 absx = fabs(x);
507 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
508 if (absx < 1e-20)
509 return -log(absx);
510
9c91eb84
MD
511 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
512 having a second set of numerator coefficients for lanczos_sum that
513 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
514 subtraction below; it's probably not worth it. */
515 r = log(lanczos_sum(absx)) - lanczos_g;
516 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
517 if (x < 0.0)
518 /* Use reflection formula to get value for negative x. */
f57cd828 519 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
cd11ff12 520 if (isinf(r))
f95a1b3c
AP
521 errno = ERANGE;
522 return r;
05d2e084
MD
523}
524
a0ce375e
MD
525/* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
526 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
527 binary floating-point format, the result is always exact. */
528
529static double
530m_remainder(double x, double y)
531{
532 /* Deal with most common case first. */
cd11ff12 533 if (isfinite(x) && isfinite(y)) {
a0ce375e
MD
534 double absx, absy, c, m, r;
535
536 if (y == 0.0) {
537 return Py_NAN;
538 }
539
540 absx = fabs(x);
541 absy = fabs(y);
542 m = fmod(absx, absy);
543
544 /*
545 Warning: some subtlety here. What we *want* to know at this point is
546 whether the remainder m is less than, equal to, or greater than half
547 of absy. However, we can't do that comparison directly because we
01484703 548 can't be sure that 0.5*absy is representable (the multiplication
a0ce375e
MD
549 might incur precision loss due to underflow). So instead we compare
550 m with the complement c = absy - m: m < 0.5*absy if and only if m <
551 c, and so on. The catch is that absy - m might also not be
552 representable, but it turns out that it doesn't matter:
553
554 - if m > 0.5*absy then absy - m is exactly representable, by
555 Sterbenz's lemma, so m > c
556 - if m == 0.5*absy then again absy - m is exactly representable
557 and m == c
558 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
559 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
560 c, or (ii) absy is tiny, either subnormal or in the lowest normal
561 binade. Then absy - m is exactly representable and again m < c.
562 */
563
564 c = absy - m;
565 if (m < c) {
566 r = m;
567 }
568 else if (m > c) {
569 r = -c;
570 }
571 else {
572 /*
573 Here absx is exactly halfway between two multiples of absy,
574 and we need to choose the even multiple. x now has the form
575
576 absx = n * absy + m
577
578 for some integer n (recalling that m = 0.5*absy at this point).
579 If n is even we want to return m; if n is odd, we need to
580 return -m.
581
582 So
583
584 0.5 * (absx - m) = (n/2) * absy
585
586 and now reducing modulo absy gives us:
587
588 | m, if n is odd
589 fmod(0.5 * (absx - m), absy) = |
590 | 0, if n is even
591
592 Now m - 2.0 * fmod(...) gives the desired result: m
593 if n is even, -m if m is odd.
594
595 Note that all steps in fmod(0.5 * (absx - m), absy)
596 will be computed exactly, with no rounding error
597 introduced.
598 */
599 assert(m == c);
600 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
601 }
602 return copysign(1.0, x) * r;
603 }
604
605 /* Special values. */
cd11ff12 606 if (isnan(x)) {
a0ce375e
MD
607 return x;
608 }
cd11ff12 609 if (isnan(y)) {
a0ce375e
MD
610 return y;
611 }
cd11ff12 612 if (isinf(x)) {
a0ce375e
MD
613 return Py_NAN;
614 }
cd11ff12 615 assert(isinf(y));
a0ce375e
MD
616 return x;
617}
618
619
e675f08e
MD
620/*
621 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
622 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
623 special values directly, passing positive non-special values through to
624 the system log/log10.
625 */
626
627static double
628m_log(double x)
629{
cd11ff12 630 if (isfinite(x)) {
f95a1b3c
AP
631 if (x > 0.0)
632 return log(x);
633 errno = EDOM;
634 if (x == 0.0)
8477951a 635 return -Py_INFINITY; /* log(0) = -inf */
f95a1b3c
AP
636 else
637 return Py_NAN; /* log(-ve) = nan */
638 }
cd11ff12 639 else if (isnan(x))
f95a1b3c
AP
640 return x; /* log(nan) = nan */
641 else if (x > 0.0)
642 return x; /* log(inf) = inf */
643 else {
644 errno = EDOM;
645 return Py_NAN; /* log(-inf) = nan */
646 }
e675f08e
MD
647}
648
fa0e3d52
VS
649/*
650 log2: log to base 2.
651
652 Uses an algorithm that should:
83b8c0be 653
fa0e3d52 654 (a) produce exact results for powers of 2, and
83b8c0be
MD
655 (b) give a monotonic log2 (for positive finite floats),
656 assuming that the system log is monotonic.
fa0e3d52
VS
657*/
658
659static double
660m_log2(double x)
661{
cd11ff12
SK
662 if (!isfinite(x)) {
663 if (isnan(x))
fa0e3d52
VS
664 return x; /* log2(nan) = nan */
665 else if (x > 0.0)
666 return x; /* log2(+inf) = +inf */
667 else {
668 errno = EDOM;
669 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
670 }
671 }
672
673 if (x > 0.0) {
8f9f8d61 674 return log2(x);
fa0e3d52
VS
675 }
676 else if (x == 0.0) {
677 errno = EDOM;
8477951a 678 return -Py_INFINITY; /* log2(0) = -inf, divide-by-zero */
fa0e3d52
VS
679 }
680 else {
681 errno = EDOM;
23442584 682 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
fa0e3d52
VS
683 }
684}
685
e675f08e
MD
686static double
687m_log10(double x)
688{
cd11ff12 689 if (isfinite(x)) {
f95a1b3c
AP
690 if (x > 0.0)
691 return log10(x);
692 errno = EDOM;
693 if (x == 0.0)
8477951a 694 return -Py_INFINITY; /* log10(0) = -inf */
f95a1b3c
AP
695 else
696 return Py_NAN; /* log10(-ve) = nan */
697 }
cd11ff12 698 else if (isnan(x))
f95a1b3c
AP
699 return x; /* log10(nan) = nan */
700 else if (x > 0.0)
701 return x; /* log10(inf) = inf */
702 else {
703 errno = EDOM;
704 return Py_NAN; /* log10(-inf) = nan */
705 }
e675f08e
MD
706}
707
708
3275cb19
SK
709/*[clinic input]
710math.gcd
711
1f777396 712 *integers as args: array
3275cb19
SK
713
714Greatest Common Divisor.
715[clinic start generated code]*/
716
559e7f16 717static PyObject *
1f777396
SS
718math_gcd_impl(PyObject *module, PyObject * const *args,
719 Py_ssize_t args_length)
720/*[clinic end generated code: output=a26c95907374ffb4 input=ded7f0ea3850c05c]*/
559e7f16 721{
93930eaf 722 // Fast-path for the common case: gcd(int, int)
1f777396 723 if (args_length == 2 && PyLong_CheckExact(args[0]) && PyLong_CheckExact(args[1]))
93930eaf
VS
724 {
725 return _PyLong_GCD(args[0], args[1]);
726 }
c9ea9335 727
1f777396 728 if (args_length == 0) {
559e7f16
SS
729 return PyLong_FromLong(0);
730 }
93930eaf
VS
731
732 PyObject *res = PyNumber_Index(args[0]);
559e7f16
SS
733 if (res == NULL) {
734 return NULL;
735 }
1f777396 736 if (args_length == 1) {
559e7f16
SS
737 Py_SETREF(res, PyNumber_Absolute(res));
738 return res;
739 }
3e7ee023
VS
740
741 PyObject *one = _PyLong_GetOne(); // borrowed ref
1f777396 742 for (Py_ssize_t i = 1; i < args_length; i++) {
93930eaf 743 PyObject *x = _PyNumber_Index(args[i]);
559e7f16
SS
744 if (x == NULL) {
745 Py_DECREF(res);
746 return NULL;
747 }
3e7ee023 748 if (res == one) {
559e7f16
SS
749 /* Fast path: just check arguments.
750 It is okay to use identity comparison here. */
751 Py_DECREF(x);
752 continue;
753 }
754 Py_SETREF(res, _PyLong_GCD(res, x));
755 Py_DECREF(x);
756 if (res == NULL) {
757 return NULL;
758 }
759 }
760 return res;
761}
762
c9ea9335 763
48e47aaa 764static PyObject *
559e7f16 765long_lcm(PyObject *a, PyObject *b)
48e47aaa 766{
559e7f16 767 PyObject *g, *m, *f, *ab;
48e47aaa 768
7559f5fd 769 if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
559e7f16
SS
770 return PyLong_FromLong(0);
771 }
772 g = _PyLong_GCD(a, b);
773 if (g == NULL) {
48e47aaa 774 return NULL;
559e7f16
SS
775 }
776 f = PyNumber_FloorDivide(a, g);
777 Py_DECREF(g);
778 if (f == NULL) {
48e47aaa
SS
779 return NULL;
780 }
559e7f16
SS
781 m = PyNumber_Multiply(f, b);
782 Py_DECREF(f);
783 if (m == NULL) {
784 return NULL;
785 }
786 ab = PyNumber_Absolute(m);
787 Py_DECREF(m);
788 return ab;
48e47aaa
SS
789}
790
48e47aaa 791
3275cb19
SK
792/*[clinic input]
793math.lcm
794
1f777396 795 *integers as args: array
3275cb19
SK
796
797Least Common Multiple.
798[clinic start generated code]*/
799
559e7f16 800static PyObject *
1f777396
SS
801math_lcm_impl(PyObject *module, PyObject * const *args,
802 Py_ssize_t args_length)
803/*[clinic end generated code: output=c8a59a5c2e55c816 input=3e4f4b7cdf948a98]*/
559e7f16
SS
804{
805 PyObject *res, *x;
806 Py_ssize_t i;
807
1f777396 808 if (args_length == 0) {
559e7f16
SS
809 return PyLong_FromLong(1);
810 }
811 res = PyNumber_Index(args[0]);
812 if (res == NULL) {
813 return NULL;
814 }
1f777396 815 if (args_length == 1) {
559e7f16
SS
816 Py_SETREF(res, PyNumber_Absolute(res));
817 return res;
818 }
3e7ee023
VS
819
820 PyObject *zero = _PyLong_GetZero(); // borrowed ref
1f777396 821 for (i = 1; i < args_length; i++) {
559e7f16
SS
822 x = PyNumber_Index(args[i]);
823 if (x == NULL) {
824 Py_DECREF(res);
825 return NULL;
826 }
3e7ee023 827 if (res == zero) {
559e7f16
SS
828 /* Fast path: just check arguments.
829 It is okay to use identity comparison here. */
830 Py_DECREF(x);
831 continue;
832 }
833 Py_SETREF(res, long_lcm(res, x));
834 Py_DECREF(x);
835 if (res == NULL) {
836 return NULL;
837 }
838 }
839 return res;
840}
841
842
12c4bdb0
MD
843/* Call is_error when errno != 0, and where x is the result libm
844 * returned. is_error will usually set up an exception and return
845 * true (1), but may return false (0) without setting up an exception.
846 */
847static int
75f59bb6 848is_error(double x, int raise_edom)
12c4bdb0 849{
f95a1b3c
AP
850 int result = 1; /* presumption of guilt */
851 assert(errno); /* non-zero errno is a precondition for calling */
75f59bb6
SK
852 if (errno == EDOM) {
853 if (raise_edom) {
854 PyErr_SetString(PyExc_ValueError, "math domain error");
855 }
856 }
f95a1b3c
AP
857
858 else if (errno == ERANGE) {
859 /* ANSI C generally requires libm functions to set ERANGE
860 * on overflow, but also generally *allows* them to set
861 * ERANGE on underflow too. There's no consistency about
862 * the latter across platforms.
863 * Alas, C99 never requires that errno be set.
864 * Here we suppress the underflow errors (libm functions
865 * should return a zero on underflow, and +- HUGE_VAL on
866 * overflow, so testing the result for zero suffices to
867 * distinguish the cases).
868 *
869 * On some platforms (Ubuntu/ia64) it seems that errno can be
870 * set to ERANGE for subnormal results that do *not* underflow
871 * to zero. So to be safe, we'll ignore ERANGE whenever the
3363e1cb
SD
872 * function result is less than 1.5 in absolute value.
873 *
874 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
875 * are correctly detected, since the function may underflow
876 * toward -1.0 rather than 0.0.
f95a1b3c 877 */
3363e1cb 878 if (fabs(x) < 1.5)
f95a1b3c
AP
879 result = 0;
880 else
881 PyErr_SetString(PyExc_OverflowError,
882 "math range error");
883 }
884 else
885 /* Unexpected math error */
886 PyErr_SetFromErrno(PyExc_ValueError);
887 return result;
12c4bdb0
MD
888}
889
53876d9c
CH
890/*
891 math_1 is used to wrap a libm function f that takes a double
c9ea9335 892 argument and returns a double.
53876d9c
CH
893
894 The error reporting follows these rules, which are designed to do
895 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
896 platforms.
897
898 - a NaN result from non-NaN inputs causes ValueError to be raised
899 - an infinite result from finite inputs causes OverflowError to be
900 raised if can_overflow is 1, or raises ValueError if can_overflow
901 is 0.
902 - if the result is finite and errno == EDOM then ValueError is
903 raised
904 - if the result is finite and nonzero and errno == ERANGE then
905 OverflowError is raised
906
907 The last rule is used to catch overflow on platforms which follow
908 C89 but for which HUGE_VAL is not an infinity.
909
910 For the majority of one-argument functions these rules are enough
911 to ensure that Python's functions behave as specified in 'Annex F'
912 of the C99 standard, with the 'invalid' and 'divide-by-zero'
913 floating-point exceptions mapping to Python's ValueError and the
914 'overflow' floating-point exception mapping to OverflowError.
915 math_1 only works for functions that don't have singularities *and*
916 the possibility of overflow; fortunately, that covers everything we
917 care about right now.
918*/
919
8b43b19e 920static PyObject *
75f59bb6
SK
921math_1(PyObject *arg, double (*func) (double), int can_overflow,
922 const char *err_msg)
85a5fbbd 923{
f95a1b3c
AP
924 double x, r;
925 x = PyFloat_AsDouble(arg);
926 if (x == -1.0 && PyErr_Occurred())
927 return NULL;
928 errno = 0;
f95a1b3c 929 r = (*func)(x);
75f59bb6
SK
930 if (isnan(r) && !isnan(x))
931 goto domain_err; /* domain error */
cd11ff12 932 if (isinf(r) && isfinite(x)) {
2354a759
BP
933 if (can_overflow)
934 PyErr_SetString(PyExc_OverflowError,
935 "math range error"); /* overflow */
936 else
75f59bb6 937 goto domain_err; /* singularity */
2354a759 938 return NULL;
f95a1b3c 939 }
75f59bb6 940 if (isfinite(r) && errno && is_error(r, 1))
f95a1b3c
AP
941 /* this branch unnecessary on most platforms */
942 return NULL;
943
45fa12ae 944 return PyFloat_FromDouble(r);
75f59bb6
SK
945
946domain_err:
947 if (err_msg) {
948 char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
949 if (buf) {
950 PyErr_Format(PyExc_ValueError, err_msg, buf);
951 PyMem_Free(buf);
952 }
953 }
954 else {
955 PyErr_SetString(PyExc_ValueError, "math domain error");
956 }
957 return NULL;
c2155835
JY
958}
959
12c4bdb0
MD
960/* variant of math_1, to be used when the function being wrapped is known to
961 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
962 errno = ERANGE for overflow). */
963
964static PyObject *
75f59bb6 965math_1a(PyObject *arg, double (*func) (double), const char *err_msg)
12c4bdb0 966{
f95a1b3c
AP
967 double x, r;
968 x = PyFloat_AsDouble(arg);
969 if (x == -1.0 && PyErr_Occurred())
970 return NULL;
971 errno = 0;
f95a1b3c 972 r = (*func)(x);
75f59bb6
SK
973 if (errno && is_error(r, err_msg ? 0 : 1)) {
974 if (err_msg && errno == EDOM) {
975 assert(!PyErr_Occurred()); /* exception is not set by is_error() */
976 char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
977 if (buf) {
978 PyErr_Format(PyExc_ValueError, err_msg, buf);
979 PyMem_Free(buf);
980 }
981 }
f95a1b3c 982 return NULL;
75f59bb6 983 }
f95a1b3c 984 return PyFloat_FromDouble(r);
12c4bdb0
MD
985}
986
53876d9c
CH
987/*
988 math_2 is used to wrap a libm function f that takes two double
989 arguments and returns a double.
990
991 The error reporting follows these rules, which are designed to do
992 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
993 platforms.
994
995 - a NaN result from non-NaN inputs causes ValueError to be raised
996 - an infinite result from finite inputs causes OverflowError to be
997 raised.
998 - if the result is finite and errno == EDOM then ValueError is
999 raised
1000 - if the result is finite and nonzero and errno == ERANGE then
1001 OverflowError is raised
1002
1003 The last rule is used to catch overflow on platforms which follow
1004 C89 but for which HUGE_VAL is not an infinity.
1005
1006 For most two-argument functions (copysign, fmod, hypot, atan2)
1007 these rules are enough to ensure that Python's functions behave as
1008 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1009 'divide-by-zero' floating-point exceptions mapping to Python's
1010 ValueError and the 'overflow' floating-point exception mapping to
1011 OverflowError.
1012*/
1013
8b43b19e 1014static PyObject *
d0d3e991
SS
1015math_2(PyObject *const *args, Py_ssize_t nargs,
1016 double (*func) (double, double), const char *funcname)
85a5fbbd 1017{
f95a1b3c 1018 double x, y, r;
d0d3e991 1019 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
f95a1b3c 1020 return NULL;
d0d3e991 1021 x = PyFloat_AsDouble(args[0]);
5208b4b3
ZS
1022 if (x == -1.0 && PyErr_Occurred()) {
1023 return NULL;
1024 }
d0d3e991 1025 y = PyFloat_AsDouble(args[1]);
5208b4b3 1026 if (y == -1.0 && PyErr_Occurred()) {
f95a1b3c 1027 return NULL;
5208b4b3 1028 }
f95a1b3c 1029 errno = 0;
f95a1b3c 1030 r = (*func)(x, y);
cd11ff12
SK
1031 if (isnan(r)) {
1032 if (!isnan(x) && !isnan(y))
f95a1b3c
AP
1033 errno = EDOM;
1034 else
1035 errno = 0;
1036 }
cd11ff12
SK
1037 else if (isinf(r)) {
1038 if (isfinite(x) && isfinite(y))
f95a1b3c
AP
1039 errno = ERANGE;
1040 else
1041 errno = 0;
1042 }
75f59bb6 1043 if (errno && is_error(r, 1))
f95a1b3c
AP
1044 return NULL;
1045 else
1046 return PyFloat_FromDouble(r);
85a5fbbd
GR
1047}
1048
f95a1b3c
AP
1049#define FUNC1(funcname, func, can_overflow, docstring) \
1050 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
75f59bb6
SK
1051 return math_1(args, func, can_overflow, NULL); \
1052 }\
1053 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1054
1055#define FUNC1D(funcname, func, can_overflow, docstring, err_msg) \
1056 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1057 return math_1(args, func, can_overflow, err_msg); \
f95a1b3c
AP
1058 }\
1059 PyDoc_STRVAR(math_##funcname##_doc, docstring);
85a5fbbd 1060
f95a1b3c
AP
1061#define FUNC1A(funcname, func, docstring) \
1062 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
75f59bb6
SK
1063 return math_1a(args, func, NULL); \
1064 }\
1065 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1066
1067#define FUNC1AD(funcname, func, docstring, err_msg) \
1068 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1069 return math_1a(args, func, err_msg); \
f95a1b3c
AP
1070 }\
1071 PyDoc_STRVAR(math_##funcname##_doc, docstring);
12c4bdb0 1072
40c48685 1073#define FUNC2(funcname, func, docstring) \
d0d3e991
SS
1074 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1075 return math_2(args, nargs, func, #funcname); \
f95a1b3c
AP
1076 }\
1077 PyDoc_STRVAR(math_##funcname##_doc, docstring);
c6e22902 1078
d0660a9a 1079FUNC1D(acos, acos, 0,
c9ea9335 1080 "acos($module, x, /)\n--\n\n"
dc3f99fa 1081 "Return the arc cosine (measured in radians) of x.\n\n"
d0660a9a
SK
1082 "The result is between 0 and pi.",
1083 "expected a number in range from -1 up to 1, got %s")
1084FUNC1D(acosh, acosh, 0,
c9ea9335 1085 "acosh($module, x, /)\n--\n\n"
d0660a9a
SK
1086 "Return the inverse hyperbolic cosine of x.",
1087 "expected argument value not less than 1, got %s")
1088FUNC1D(asin, asin, 0,
c9ea9335 1089 "asin($module, x, /)\n--\n\n"
dc3f99fa 1090 "Return the arc sine (measured in radians) of x.\n\n"
d0660a9a
SK
1091 "The result is between -pi/2 and pi/2.",
1092 "expected a number in range from -1 up to 1, got %s")
fa26245a 1093FUNC1(asinh, asinh, 0,
c9ea9335
SS
1094 "asinh($module, x, /)\n--\n\n"
1095 "Return the inverse hyperbolic sine of x.")
53876d9c 1096FUNC1(atan, atan, 0,
c9ea9335 1097 "atan($module, x, /)\n--\n\n"
dc3f99fa
GC
1098 "Return the arc tangent (measured in radians) of x.\n\n"
1099 "The result is between -pi/2 and pi/2.")
19be0ee9 1100FUNC2(atan2, atan2,
c9ea9335
SS
1101 "atan2($module, y, x, /)\n--\n\n"
1102 "Return the arc tangent (measured in radians) of y/x.\n\n"
fe71f813 1103 "Unlike atan(y/x), the signs of both x and y are considered.")
75f59bb6 1104FUNC1D(atanh, atanh, 0,
c9ea9335 1105 "atanh($module, x, /)\n--\n\n"
75f59bb6
SK
1106 "Return the inverse hyperbolic tangent of x.",
1107 "expected a number between -1 and 1, got %s")
ac867f10
AR
1108FUNC1(cbrt, cbrt, 0,
1109 "cbrt($module, x, /)\n--\n\n"
1110 "Return the cube root of x.")
c9ea9335
SS
1111
1112/*[clinic input]
1113math.ceil
1114
1115 x as number: object
1116 /
1117
1118Return the ceiling of x as an Integral.
1119
1120This is the smallest integer >= x.
1121[clinic start generated code]*/
13e05de9 1122
c9ea9335
SS
1123static PyObject *
1124math_ceil(PyObject *module, PyObject *number)
1125/*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1126{
f013b475 1127 double x;
f95a1b3c 1128
f013b475
S
1129 if (PyFloat_CheckExact(number)) {
1130 x = PyFloat_AS_DOUBLE(number);
1131 }
1132 else {
67fbfb42
SG
1133 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__ceil__));
1134 if (result != NULL) {
5fd5cb8d
SS
1135 return result;
1136 }
67fbfb42 1137 else if (PyErr_Occurred()) {
f95a1b3c 1138 return NULL;
67fbfb42 1139 }
f013b475 1140 x = PyFloat_AsDouble(number);
67fbfb42 1141 if (x == -1.0 && PyErr_Occurred()) {
f013b475 1142 return NULL;
67fbfb42 1143 }
f751bc9c 1144 }
5fd5cb8d 1145 return PyLong_FromDouble(ceil(x));
13e05de9
GR
1146}
1147
53876d9c 1148FUNC2(copysign, copysign,
c9ea9335
SS
1149 "copysign($module, x, y, /)\n--\n\n"
1150 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1151 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1152 "returns -1.0.\n")
d0660a9a 1153FUNC1D(cos, cos, 0,
c9ea9335 1154 "cos($module, x, /)\n--\n\n"
d0660a9a
SK
1155 "Return the cosine of x (measured in radians).",
1156 "expected a finite input, got %s")
53876d9c 1157FUNC1(cosh, cosh, 1,
c9ea9335
SS
1158 "cosh($module, x, /)\n--\n\n"
1159 "Return the hyperbolic cosine of x.")
58395759 1160FUNC1A(erf, erf,
c9ea9335
SS
1161 "erf($module, x, /)\n--\n\n"
1162 "Error function at x.")
58395759 1163FUNC1A(erfc, erfc,
c9ea9335
SS
1164 "erfc($module, x, /)\n--\n\n"
1165 "Complementary error function at x.")
53876d9c 1166FUNC1(exp, exp, 1,
c9ea9335
SS
1167 "exp($module, x, /)\n--\n\n"
1168 "Return e raised to the power of x.")
6266e4af
G
1169FUNC1(exp2, exp2, 1,
1170 "exp2($module, x, /)\n--\n\n"
1171 "Return 2 raised to the power of x.")
fa26245a 1172FUNC1(expm1, expm1, 1,
c9ea9335
SS
1173 "expm1($module, x, /)\n--\n\n"
1174 "Return exp(x)-1.\n\n"
664b511c
MD
1175 "This function avoids the loss of precision involved in the direct "
1176 "evaluation of exp(x)-1 for small x.")
53876d9c 1177FUNC1(fabs, fabs, 0,
c9ea9335
SS
1178 "fabs($module, x, /)\n--\n\n"
1179 "Return the absolute value of the float x.")
1180
1181/*[clinic input]
1182math.floor
13e05de9 1183
c9ea9335
SS
1184 x as number: object
1185 /
1186
1187Return the floor of x as an Integral.
1188
1189This is the largest integer <= x.
1190[clinic start generated code]*/
1191
1192static PyObject *
1193math_floor(PyObject *module, PyObject *number)
1194/*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1195{
930f4518
RH
1196 double x;
1197
930f4518
RH
1198 if (PyFloat_CheckExact(number)) {
1199 x = PyFloat_AS_DOUBLE(number);
1200 }
f013b475 1201 else {
67fbfb42
SG
1202 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__floor__));
1203 if (result != NULL) {
5fd5cb8d
SS
1204 return result;
1205 }
67fbfb42 1206 else if (PyErr_Occurred()) {
f95a1b3c 1207 return NULL;
67fbfb42 1208 }
930f4518 1209 x = PyFloat_AsDouble(number);
67fbfb42 1210 if (x == -1.0 && PyErr_Occurred()) {
930f4518 1211 return NULL;
67fbfb42 1212 }
8bb9cde6 1213 }
5fd5cb8d 1214 return PyLong_FromDouble(floor(x));
13e05de9
GR
1215}
1216
2301cdb5
BT
1217/*[clinic input]
1218math.fmax -> double
1219
1220 x: double
1221 y: double
1222 /
1223
1224Return the larger of two floating-point arguments.
1225[clinic start generated code]*/
1226
1227static double
1228math_fmax_impl(PyObject *module, double x, double y)
1229/*[clinic end generated code: output=00692358d312fee2 input=021596c027336ffe]*/
1230{
1231 return fmax(x, y);
1232}
1233
1234/*[clinic input]
1235math.fmin -> double
1236
1237 x: double
1238 y: double
1239 /
1240
1241Return the smaller of two floating-point arguments.
1242[clinic start generated code]*/
1243
1244static double
1245math_fmin_impl(PyObject *module, double x, double y)
1246/*[clinic end generated code: output=3d5b7826bd292dd9 input=d12e64ccc33f878a]*/
1247{
1248 return fmin(x, y);
1249}
1250
75f59bb6 1251FUNC1AD(gamma, m_tgamma,
c9ea9335 1252 "gamma($module, x, /)\n--\n\n"
75f59bb6 1253 "Gamma function at x.",
d0660a9a
SK
1254 "expected a noninteger or positive integer, got %s")
1255FUNC1AD(lgamma, m_lgamma,
c9ea9335 1256 "lgamma($module, x, /)\n--\n\n"
d0660a9a
SK
1257 "Natural logarithm of absolute value of Gamma function at x.",
1258 "expected a noninteger or positive integer, got %s")
1259FUNC1D(log1p, m_log1p, 0,
c9ea9335
SS
1260 "log1p($module, x, /)\n--\n\n"
1261 "Return the natural logarithm of 1+x (base e).\n\n"
d0660a9a
SK
1262 "The result is computed in a way which is accurate for x near zero.",
1263 "expected argument value > -1, got %s")
a0ce375e
MD
1264FUNC2(remainder, m_remainder,
1265 "remainder($module, x, y, /)\n--\n\n"
1266 "Difference between x and the closest integer multiple of y.\n\n"
1267 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1268 "In the case where x is exactly halfway between two multiples of\n"
1269 "y, the nearest even value of n is used. The result is always exact.")
42ccac2d
BT
1270
1271/*[clinic input]
1272math.signbit
1273
1274 x: double
1275 /
1276
1277Return True if the sign of x is negative and False otherwise.
1278[clinic start generated code]*/
1279
1280static PyObject *
1281math_signbit_impl(PyObject *module, double x)
1282/*[clinic end generated code: output=20c5f20156a9b871 input=3d3493fbcb5bdb3e]*/
1283{
1284 return PyBool_FromLong(signbit(x));
1285}
1286
d0660a9a 1287FUNC1D(sin, sin, 0,
c9ea9335 1288 "sin($module, x, /)\n--\n\n"
d0660a9a
SK
1289 "Return the sine of x (measured in radians).",
1290 "expected a finite input, got %s")
53876d9c 1291FUNC1(sinh, sinh, 1,
c9ea9335
SS
1292 "sinh($module, x, /)\n--\n\n"
1293 "Return the hyperbolic sine of x.")
75f59bb6 1294FUNC1D(sqrt, sqrt, 0,
c9ea9335 1295 "sqrt($module, x, /)\n--\n\n"
75f59bb6
SK
1296 "Return the square root of x.",
1297 "expected a nonnegative input, got %s")
d0660a9a 1298FUNC1D(tan, tan, 0,
c9ea9335 1299 "tan($module, x, /)\n--\n\n"
d0660a9a
SK
1300 "Return the tangent of x (measured in radians).",
1301 "expected a finite input, got %s")
53876d9c 1302FUNC1(tanh, tanh, 0,
c9ea9335
SS
1303 "tanh($module, x, /)\n--\n\n"
1304 "Return the hyperbolic tangent of x.")
85a5fbbd 1305
2b7411df 1306/* Precision summation function as msum() by Raymond Hettinger in
c4f9823b 1307 <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
2b7411df
BP
1308 enhanced with the exact partials sum and roundoff from Mark
1309 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1310 See those links for more details, proofs and other references.
1311
87d3bd0e
MD
1312 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1313 roundTiesToEven are assumed.
2b7411df 1314
87d3bd0e
MD
1315 Note 2: No provision is made for intermediate overflow handling;
1316 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1317 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
2b7411df
BP
1318 overflow of the first partial sum.
1319
87d3bd0e
MD
1320 Note 3: The algorithm has two potential sources of fragility. First, C
1321 permits arithmetic operations on `double`s to be performed in an
1322 intermediate format whose range and precision may be greater than those of
1323 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1324 example on machines using the now largely historical x87 FPUs. In this case,
1325 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1326 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1327 should be safe from this source of errors. Second, an aggressively
1328 optimizing compiler can re-associate operations so that (for example) the
1329 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1330 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1331 re-association would be in violation of the C standard, and should not occur
1332 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1333 -fassociative-math). Such optimizations should be avoided for this module.
1334
1335 Note 4: The signature of math.fsum() differs from builtins.sum()
2b7411df
BP
1336 because the start argument doesn't make sense in the context of
1337 accurate summation. Since the partials table is collapsed before
1338 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1339 accurate result returned by sum(itertools.chain(seq1, seq2)).
1340*/
1341
1342#define NUM_PARTIALS 32 /* initial partials array size, on stack */
1343
1344/* Extend the partials array p[] by doubling its size. */
1345static int /* non-zero on error */
aa7633ab 1346_fsum_realloc(double **p_ptr, Py_ssize_t n,
2b7411df
BP
1347 double *ps, Py_ssize_t *m_ptr)
1348{
f95a1b3c
AP
1349 void *v = NULL;
1350 Py_ssize_t m = *m_ptr;
1351
1352 m += m; /* double */
049e509a 1353 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
f95a1b3c
AP
1354 double *p = *p_ptr;
1355 if (p == ps) {
1356 v = PyMem_Malloc(sizeof(double) * m);
1357 if (v != NULL)
1358 memcpy(v, ps, sizeof(double) * n);
1359 }
1360 else
1361 v = PyMem_Realloc(p, sizeof(double) * m);
1362 }
1363 if (v == NULL) { /* size overflow or no memory */
1364 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1365 return 1;
1366 }
1367 *p_ptr = (double*) v;
1368 *m_ptr = m;
1369 return 0;
2b7411df
BP
1370}
1371
1372/* Full precision summation of a sequence of floats.
1373
1374 def msum(iterable):
1375 partials = [] # sorted, non-overlapping partial sums
1376 for x in iterable:
fdb0accc
MD
1377 i = 0
1378 for y in partials:
1379 if abs(x) < abs(y):
1380 x, y = y, x
1381 hi = x + y
1382 lo = y - (hi - x)
1383 if lo:
1384 partials[i] = lo
1385 i += 1
1386 x = hi
1387 partials[i:] = [x]
2b7411df
BP
1388 return sum_exact(partials)
1389
1390 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1391 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1392 partial so that the list of partial sums remains exact.
1393
1394 Sum_exact() adds the partial sums exactly and correctly rounds the final
1395 result (using the round-half-to-even rule). The items in partials remain
1396 non-zero, non-special, non-overlapping and strictly increasing in
1397 magnitude, but possibly not all having the same sign.
1398
1399 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1400*/
1401
c9ea9335
SS
1402/*[clinic input]
1403math.fsum
1404
1405 seq: object
1406 /
1407
1a0c7b9b 1408Return an accurate floating-point sum of values in the iterable seq.
c9ea9335 1409
1a0c7b9b 1410Assumes IEEE-754 floating-point arithmetic.
c9ea9335
SS
1411[clinic start generated code]*/
1412
1413static PyObject *
1414math_fsum(PyObject *module, PyObject *seq)
1a0c7b9b 1415/*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
2b7411df 1416{
f95a1b3c
AP
1417 PyObject *item, *iter, *sum = NULL;
1418 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1419 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1420 double xsave, special_sum = 0.0, inf_sum = 0.0;
36f23293 1421 double hi, yr, lo = 0.0;
f95a1b3c
AP
1422
1423 iter = PyObject_GetIter(seq);
1424 if (iter == NULL)
1425 return NULL;
1426
f95a1b3c
AP
1427 for(;;) { /* for x in iterable */
1428 assert(0 <= n && n <= m);
1429 assert((m == NUM_PARTIALS && p == ps) ||
1430 (m > NUM_PARTIALS && p != NULL));
1431
1432 item = PyIter_Next(iter);
1433 if (item == NULL) {
1434 if (PyErr_Occurred())
1435 goto _fsum_error;
1436 break;
1437 }
cfd735ea 1438 ASSIGN_DOUBLE(x, item, error_with_item);
f95a1b3c 1439 Py_DECREF(item);
f95a1b3c
AP
1440
1441 xsave = x;
1442 for (i = j = 0; j < n; j++) { /* for y in partials */
1443 y = p[j];
1444 if (fabs(x) < fabs(y)) {
1445 t = x; x = y; y = t;
1446 }
1447 hi = x + y;
1448 yr = hi - x;
1449 lo = y - yr;
1450 if (lo != 0.0)
1451 p[i++] = lo;
1452 x = hi;
1453 }
1454
1455 n = i; /* ps[i:] = [x] */
1456 if (x != 0.0) {
cd11ff12 1457 if (! isfinite(x)) {
f95a1b3c
AP
1458 /* a nonfinite x could arise either as
1459 a result of intermediate overflow, or
1460 as a result of a nan or inf in the
1461 summands */
cd11ff12 1462 if (isfinite(xsave)) {
f95a1b3c
AP
1463 PyErr_SetString(PyExc_OverflowError,
1464 "intermediate overflow in fsum");
1465 goto _fsum_error;
1466 }
cd11ff12 1467 if (isinf(xsave))
f95a1b3c
AP
1468 inf_sum += xsave;
1469 special_sum += xsave;
1470 /* reset partials */
1471 n = 0;
1472 }
1473 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1474 goto _fsum_error;
1475 else
1476 p[n++] = x;
1477 }
1478 }
1479
1480 if (special_sum != 0.0) {
cd11ff12 1481 if (isnan(inf_sum))
f95a1b3c
AP
1482 PyErr_SetString(PyExc_ValueError,
1483 "-inf + inf in fsum");
1484 else
1485 sum = PyFloat_FromDouble(special_sum);
1486 goto _fsum_error;
1487 }
1488
1489 hi = 0.0;
1490 if (n > 0) {
1491 hi = p[--n];
1492 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1493 inexact. */
1494 while (n > 0) {
1495 x = hi;
1496 y = p[--n];
1497 assert(fabs(y) < fabs(x));
1498 hi = x + y;
1499 yr = hi - x;
1500 lo = y - yr;
1501 if (lo != 0.0)
1502 break;
1503 }
1504 /* Make half-even rounding work across multiple partials.
1505 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1506 digit to two instead of down to zero (the 1e-16 makes the 1
1507 slightly closer to two). With a potential 1 ULP rounding
1508 error fixed-up, math.fsum() can guarantee commutativity. */
1509 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1510 (lo > 0.0 && p[n-1] > 0.0))) {
1511 y = lo * 2.0;
1512 x = hi + y;
1513 yr = x - hi;
1514 if (y == yr)
1515 hi = x;
1516 }
1517 }
1518 sum = PyFloat_FromDouble(hi);
2b7411df 1519
cfd735ea 1520 _fsum_error:
f95a1b3c
AP
1521 Py_DECREF(iter);
1522 if (p != ps)
1523 PyMem_Free(p);
1524 return sum;
cfd735ea
RH
1525
1526 error_with_item:
1527 Py_DECREF(item);
1528 goto _fsum_error;
2b7411df
BP
1529}
1530
1531#undef NUM_PARTIALS
1532
2b7411df 1533
4c8a9a2d
MD
1534static unsigned long
1535count_set_bits(unsigned long n)
1536{
1537 unsigned long count = 0;
1538 while (n != 0) {
1539 ++count;
1540 n &= n - 1; /* clear least significant bit */
1541 }
1542 return count;
1543}
1544
73934b9d
MD
1545/* Integer square root
1546
1547Given a nonnegative integer `n`, we want to compute the largest integer
1548`a` for which `a * a <= n`, or equivalently the integer part of the exact
1549square root of `n`.
1550
1551We use an adaptive-precision pure-integer version of Newton's iteration. Given
1552a positive integer `n`, the algorithm produces at each iteration an integer
1553approximation `a` to the square root of `n >> s` for some even integer `s`,
1554with `s` decreasing as the iterations progress. On the final iteration, `s` is
1555zero and we have an approximation to the square root of `n` itself.
1556
1557At every step, the approximation `a` is strictly within 1.0 of the true square
1558root, so we have
1559
1560 (a - 1)**2 < (n >> s) < (a + 1)**2
1561
1562After the final iteration, a check-and-correct step is needed to determine
1563whether `a` or `a - 1` gives the desired integer square root of `n`.
1564
1565The algorithm is remarkable in its simplicity. There's no need for a
1566per-iteration check-and-correct step, and termination is straightforward: the
1567number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1568for `n > 1`). The only tricky part of the correctness proof is in establishing
1569that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1570iteration to the next. A sketch of the proof of this is given below.
1571
1572In addition to the proof sketch, a formal, computer-verified proof
1573of correctness (using Lean) of an equivalent recursive algorithm can be found
1574here:
1575
1576 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1577
1578
1579Here's Python code equivalent to the C implementation below:
1580
1581 def isqrt(n):
1582 """
1583 Return the integer part of the square root of the input.
1584 """
1585 n = operator.index(n)
1586
1587 if n < 0:
1588 raise ValueError("isqrt() argument must be nonnegative")
1589 if n == 0:
1590 return 0
1591
1592 c = (n.bit_length() - 1) // 2
1593 a = 1
1594 d = 0
1595 for s in reversed(range(c.bit_length())):
2dfeaa92 1596 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
73934b9d
MD
1597 e = d
1598 d = c >> s
1599 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
73934b9d
MD
1600
1601 return a - (a*a > n)
1602
1603
1604Sketch of proof of correctness
1605------------------------------
1606
1607The delicate part of the correctness proof is showing that the loop invariant
1608is preserved from one iteration to the next. That is, just before the line
1609
1610 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1611
1612is executed in the above code, we know that
1613
1614 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1615
1616(since `e` is always the value of `d` from the previous iteration). We must
1617prove that after that line is executed, we have
1618
1619 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1620
f7d72e48 1621To facilitate the proof, we make some changes of notation. Write `m` for
73934b9d
MD
1622`n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1623
1624 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1625
1626or equivalently:
1627
1628 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1629
1630Then we can rewrite (1) as:
1631
1632 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1633
1634and we must show that (b - 1)**2 < m < (b + 1)**2.
1635
1636From this point on, we switch to mathematical notation, so `/` means exact
1637division rather than integer division and `^` is used for exponentiation. We
1638use the `√` symbol for the exact square root. In (3), we can remove the
1639implicit floor operation to give:
1640
1641 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1642
1643Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1644
1645 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1646
1647Squaring and dividing through by `2^(d-e+1) a` gives
1648
1649 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1650
1651We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1652right-hand side of (6) with `1`, and now replacing the central
1653term `m / (2^(d-e+1) a)` with its floor in (6) gives
1654
1655 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1656
1657Or equivalently, from (2):
1658
1659 (7) -1 < b - √m < 1
1660
1661and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1662to prove.
1663
1664We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1665a` that was used to get line (7) above. From the definition of `c`, we have
1666`4^c <= n`, which implies
1667
1668 (8) 4^d <= m
1669
1670also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1671that `2d - 2e - 1 <= d` and hence that
1672
1673 (9) 4^(2d - 2e - 1) <= m
1674
1675Dividing both sides by `4^(d - e)` gives
1676
1677 (10) 4^(d - e - 1) <= m / 4^(d - e)
1678
1679But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1680
1681 (11) 4^(d - e - 1) < (a + 1)^2
1682
1683Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1684`a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1685completes the proof sketch.
1686
1687*/
1688
d02c5e9b
MD
1689/*
1690 The _approximate_isqrt_tab table provides approximate square roots for
1691 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1692
1693 a = _approximate_isqrt_tab[(n >> 8) - 64]
1694
1695 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1696
1697 The table was computed in Python using the expression:
1698
1699 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1700*/
1701
1702static const uint8_t _approximate_isqrt_tab[192] = {
1703 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1704 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1705 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1706 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1707 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1708 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1709 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1710 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1711 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1712 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1713 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1714 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1715 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1716 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1717 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1718 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1719};
5c08ce9b
MD
1720
1721/* Approximate square root of a large 64-bit integer.
1722
1723 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1724 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1725
d02c5e9b 1726static inline uint32_t
5c08ce9b
MD
1727_approximate_isqrt(uint64_t n)
1728{
d02c5e9b
MD
1729 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1730 u = (u << 7) + (uint32_t)(n >> 41) / u;
1731 return (u << 15) + (uint32_t)((n >> 17) / u);
5c08ce9b
MD
1732}
1733
73934b9d
MD
1734/*[clinic input]
1735math.isqrt
1736
1737 n: object
1738 /
1739
1740Return the integer part of the square root of the input.
1741[clinic start generated code]*/
1742
1743static PyObject *
1744math_isqrt(PyObject *module, PyObject *n)
1745/*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1746{
5c08ce9b 1747 int a_too_large, c_bit_length;
d08c7888 1748 int64_t c, d;
d02c5e9b
MD
1749 uint64_t m;
1750 uint32_t u;
73934b9d
MD
1751 PyObject *a = NULL, *b;
1752
5f4b229d 1753 n = _PyNumber_Index(n);
73934b9d
MD
1754 if (n == NULL) {
1755 return NULL;
1756 }
1757
7559f5fd 1758 if (_PyLong_IsNegative((PyLongObject *)n)) {
73934b9d
MD
1759 PyErr_SetString(
1760 PyExc_ValueError,
1761 "isqrt() argument must be nonnegative");
1762 goto error;
1763 }
7559f5fd 1764 if (_PyLong_IsZero((PyLongObject *)n)) {
73934b9d
MD
1765 Py_DECREF(n);
1766 return PyLong_FromLong(0);
1767 }
1768
5c08ce9b 1769 /* c = (n.bit_length() - 1) // 2 */
73934b9d 1770 c = _PyLong_NumBits(n);
d08c7888
SS
1771 assert(c > 0);
1772 assert(!PyErr_Occurred());
1773 c = (c - 1) / 2;
73934b9d 1774
5c08ce9b 1775 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
d02c5e9b 1776 fast, almost branch-free algorithm. */
d08c7888 1777 if (c <= 31) {
d02c5e9b 1778 int shift = 31 - (int)c;
5c08ce9b
MD
1779 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1780 Py_DECREF(n);
1781 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1782 return NULL;
1783 }
d02c5e9b
MD
1784 u = _approximate_isqrt(m << 2*shift) >> shift;
1785 u -= (uint64_t)u * u > m;
1786 return PyLong_FromUnsignedLong(u);
73934b9d
MD
1787 }
1788
5c08ce9b
MD
1789 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1790 arithmetic, then switch to using Python long integers. */
1791
1792 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1793 c_bit_length = 6;
d08c7888 1794 while ((c >> c_bit_length) > 0) {
5c08ce9b
MD
1795 ++c_bit_length;
1796 }
1797
1798 /* Initialise d and a. */
1799 d = c >> (c_bit_length - 5);
d08c7888 1800 b = _PyLong_Rshift(n, 2*c - 62);
5c08ce9b
MD
1801 if (b == NULL) {
1802 goto error;
1803 }
1804 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1805 Py_DECREF(b);
1806 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1807 goto error;
1808 }
1809 u = _approximate_isqrt(m) >> (31U - d);
d02c5e9b 1810 a = PyLong_FromUnsignedLong(u);
73934b9d
MD
1811 if (a == NULL) {
1812 goto error;
1813 }
5c08ce9b
MD
1814
1815 for (int s = c_bit_length - 6; s >= 0; --s) {
a5119e7d 1816 PyObject *q;
d08c7888 1817 int64_t e = d;
73934b9d
MD
1818
1819 d = c >> s;
1820
1821 /* q = (n >> 2*c - e - d + 1) // a */
d08c7888 1822 q = _PyLong_Rshift(n, 2*c - d - e + 1);
73934b9d
MD
1823 if (q == NULL) {
1824 goto error;
1825 }
1826 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1827 if (q == NULL) {
1828 goto error;
1829 }
1830
1831 /* a = (a << d - 1 - e) + q */
d08c7888 1832 Py_SETREF(a, _PyLong_Lshift(a, d - 1 - e));
73934b9d
MD
1833 if (a == NULL) {
1834 Py_DECREF(q);
1835 goto error;
1836 }
1837 Py_SETREF(a, PyNumber_Add(a, q));
1838 Py_DECREF(q);
1839 if (a == NULL) {
1840 goto error;
1841 }
1842 }
1843
1844 /* The correct result is either a or a - 1. Figure out which, and
1845 decrement a if necessary. */
1846
1847 /* a_too_large = n < a * a */
1848 b = PyNumber_Multiply(a, a);
1849 if (b == NULL) {
1850 goto error;
1851 }
1852 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1853 Py_DECREF(b);
1854 if (a_too_large == -1) {
1855 goto error;
1856 }
1857
1858 if (a_too_large) {
37834136 1859 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
73934b9d
MD
1860 }
1861 Py_DECREF(n);
1862 return a;
1863
1864 error:
1865 Py_XDECREF(a);
1866 Py_DECREF(n);
1867 return NULL;
1868}
1869
4c8a9a2d
MD
1870/* Divide-and-conquer factorial algorithm
1871 *
15f44ab0 1872 * Based on the formula and pseudo-code provided at:
4c8a9a2d
MD
1873 * http://www.luschny.de/math/factorial/binarysplitfact.html
1874 *
1875 * Faster algorithms exist, but they're more complicated and depend on
9527afd0 1876 * a fast prime factorization algorithm.
4c8a9a2d
MD
1877 *
1878 * Notes on the algorithm
1879 * ----------------------
1880 *
1881 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1882 * computed separately, and then combined using a left shift.
1883 *
1884 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1885 * odd divisor) of factorial(n), using the formula:
1886 *
1887 * factorial_odd_part(n) =
1888 *
1889 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1890 *
1891 * Example: factorial_odd_part(20) =
1892 *
1893 * (1) *
1894 * (1) *
1895 * (1 * 3 * 5) *
09605ad7 1896 * (1 * 3 * 5 * 7 * 9) *
4c8a9a2d
MD
1897 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1898 *
1899 * Here i goes from large to small: the first term corresponds to i=4 (any
1900 * larger i gives an empty product), and the last term corresponds to i=0.
1901 * Each term can be computed from the last by multiplying by the extra odd
1902 * numbers required: e.g., to get from the penultimate term to the last one,
1903 * we multiply by (11 * 13 * 15 * 17 * 19).
1904 *
1905 * To see a hint of why this formula works, here are the same numbers as above
1906 * but with the even parts (i.e., the appropriate powers of 2) included. For
1907 * each subterm in the product for i, we multiply that subterm by 2**i:
1908 *
1909 * factorial(20) =
1910 *
1911 * (16) *
1912 * (8) *
1913 * (4 * 12 * 20) *
1914 * (2 * 6 * 10 * 14 * 18) *
1915 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1916 *
1917 * The factorial_partial_product function computes the product of all odd j in
1918 * range(start, stop) for given start and stop. It's used to compute the
1919 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1920 * operates recursively, repeatedly splitting the range into two roughly equal
1921 * pieces until the subranges are small enough to be computed using only C
1922 * integer arithmetic.
1923 *
1924 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1925 * the factorial) is computed independently in the main math_factorial
1926 * function. By standard results, its value is:
1927 *
1928 * two_valuation = n//2 + n//4 + n//8 + ....
1929 *
1930 * It can be shown (e.g., by complete induction on n) that two_valuation is
1931 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1932 * '1'-bits in the binary expansion of n.
1933 */
1934
1935/* factorial_partial_product: Compute product(range(start, stop, 2)) using
1936 * divide and conquer. Assumes start and stop are odd and stop > start.
1937 * max_bits must be >= bit_length(stop - 2). */
1938
1939static PyObject *
1940factorial_partial_product(unsigned long start, unsigned long stop,
1941 unsigned long max_bits)
1942{
1943 unsigned long midpoint, num_operands;
1944 PyObject *left = NULL, *right = NULL, *result = NULL;
1945
1946 /* If the return value will fit an unsigned long, then we can
1947 * multiply in a tight, fast loop where each multiply is O(1).
1948 * Compute an upper bound on the number of bits required to store
1949 * the answer.
1950 *
1951 * Storing some integer z requires floor(lg(z))+1 bits, which is
1952 * conveniently the value returned by bit_length(z). The
1953 * product x*y will require at most
1954 * bit_length(x) + bit_length(y) bits to store, based
1955 * on the idea that lg product = lg x + lg y.
1956 *
1957 * We know that stop - 2 is the largest number to be multiplied. From
1958 * there, we have: bit_length(answer) <= num_operands *
1959 * bit_length(stop - 2)
1960 */
1961
1962 num_operands = (stop - start) / 2;
1963 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1964 * unlikely case of an overflow in num_operands * max_bits. */
1965 if (num_operands <= 8 * SIZEOF_LONG &&
1966 num_operands * max_bits <= 8 * SIZEOF_LONG) {
1967 unsigned long j, total;
1968 for (total = start, j = start + 2; j < stop; j += 2)
1969 total *= j;
1970 return PyLong_FromUnsignedLong(total);
1971 }
1972
1973 /* find midpoint of range(start, stop), rounded up to next odd number. */
1974 midpoint = (start + num_operands) | 1;
1975 left = factorial_partial_product(start, midpoint,
c5b79003 1976 _Py_bit_length(midpoint - 2));
4c8a9a2d
MD
1977 if (left == NULL)
1978 goto error;
1979 right = factorial_partial_product(midpoint, stop, max_bits);
1980 if (right == NULL)
1981 goto error;
1982 result = PyNumber_Multiply(left, right);
1983
1984 error:
1985 Py_XDECREF(left);
1986 Py_XDECREF(right);
1987 return result;
1988}
1989
1990/* factorial_odd_part: compute the odd part of factorial(n). */
1991
1992static PyObject *
1993factorial_odd_part(unsigned long n)
1994{
1995 long i;
1996 unsigned long v, lower, upper;
1997 PyObject *partial, *tmp, *inner, *outer;
1998
1999 inner = PyLong_FromLong(1);
2000 if (inner == NULL)
2001 return NULL;
3e2f7135 2002 outer = Py_NewRef(inner);
4c8a9a2d
MD
2003
2004 upper = 3;
c5b79003 2005 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
4c8a9a2d
MD
2006 v = n >> i;
2007 if (v <= 2)
2008 continue;
2009 lower = upper;
2010 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2011 upper = (v + 1) | 1;
2012 /* Here inner is the product of all odd integers j in the range (0,
2013 n/2**(i+1)]. The factorial_partial_product call below gives the
2014 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
c5b79003 2015 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
4c8a9a2d
MD
2016 /* inner *= partial */
2017 if (partial == NULL)
2018 goto error;
2019 tmp = PyNumber_Multiply(inner, partial);
2020 Py_DECREF(partial);
2021 if (tmp == NULL)
2022 goto error;
7e3f09ca 2023 Py_SETREF(inner, tmp);
4c8a9a2d
MD
2024 /* Now inner is the product of all odd integers j in the range (0,
2025 n/2**i], giving the inner product in the formula above. */
2026
2027 /* outer *= inner; */
2028 tmp = PyNumber_Multiply(outer, inner);
2029 if (tmp == NULL)
2030 goto error;
7e3f09ca 2031 Py_SETREF(outer, tmp);
4c8a9a2d 2032 }
76464494
MD
2033 Py_DECREF(inner);
2034 return outer;
4c8a9a2d
MD
2035
2036 error:
2037 Py_DECREF(outer);
4c8a9a2d 2038 Py_DECREF(inner);
76464494 2039 return NULL;
4c8a9a2d
MD
2040}
2041
c9ea9335 2042
4c8a9a2d
MD
2043/* Lookup table for small factorial values */
2044
2045static const unsigned long SmallFactorials[] = {
2046 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2047 362880, 3628800, 39916800, 479001600,
2048#if SIZEOF_LONG >= 8
2049 6227020800, 87178291200, 1307674368000,
2050 20922789888000, 355687428096000, 6402373705728000,
2051 121645100408832000, 2432902008176640000
2052#endif
2053};
2054
c9ea9335
SS
2055/*[clinic input]
2056math.factorial
2057
1ba82d44 2058 n as arg: object
c9ea9335
SS
2059 /
2060
1ba82d44 2061Find n!.
c9ea9335
SS
2062[clinic start generated code]*/
2063
c28e1fa7 2064static PyObject *
c9ea9335 2065math_factorial(PyObject *module, PyObject *arg)
27ed6457 2066/*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/
c28e1fa7 2067{
a5119e7d 2068 long x, two_valuation;
5990d286 2069 int overflow;
578c3955 2070 PyObject *result, *odd_part;
f95a1b3c 2071
578c3955 2072 x = PyLong_AsLongAndOverflow(arg, &overflow);
5990d286 2073 if (x == -1 && PyErr_Occurred()) {
f95a1b3c 2074 return NULL;
5990d286
MD
2075 }
2076 else if (overflow == 1) {
2077 PyErr_Format(PyExc_OverflowError,
2078 "factorial() argument should not exceed %ld",
2079 LONG_MAX);
2080 return NULL;
2081 }
2082 else if (overflow == -1 || x < 0) {
f95a1b3c 2083 PyErr_SetString(PyExc_ValueError,
4c8a9a2d 2084 "factorial() not defined for negative values");
f95a1b3c
AP
2085 return NULL;
2086 }
2087
4c8a9a2d 2088 /* use lookup table if x is small */
63941881 2089 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
4c8a9a2d
MD
2090 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2091
2092 /* else express in the form odd_part * 2**two_valuation, and compute as
2093 odd_part << two_valuation. */
2094 odd_part = factorial_odd_part(x);
2095 if (odd_part == NULL)
2096 return NULL;
a5119e7d
SS
2097 two_valuation = x - count_set_bits(x);
2098 result = _PyLong_Lshift(odd_part, two_valuation);
4c8a9a2d 2099 Py_DECREF(odd_part);
f95a1b3c 2100 return result;
c28e1fa7
GB
2101}
2102
c9ea9335
SS
2103
2104/*[clinic input]
2105math.trunc
2106
2107 x: object
2108 /
2109
2110Truncates the Real x to the nearest Integral toward 0.
2111
2112Uses the __trunc__ magic method.
2113[clinic start generated code]*/
c28e1fa7 2114
400adb03 2115static PyObject *
c9ea9335
SS
2116math_trunc(PyObject *module, PyObject *x)
2117/*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
400adb03 2118{
5fd5cb8d
SS
2119 if (PyFloat_CheckExact(x)) {
2120 return PyFloat_Type.tp_as_number->nb_int(x);
2121 }
2122
67fbfb42
SG
2123 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(x, &_Py_ID(__trunc__));
2124 if (result != NULL) {
2125 return result;
f95a1b3c 2126 }
67fbfb42
SG
2127 else if (!PyErr_Occurred()) {
2128 PyErr_Format(PyExc_TypeError,
2129 "type %.100s doesn't define __trunc__ method",
2130 Py_TYPE(x)->tp_name);
2131 }
2132 return NULL;
400adb03
CH
2133}
2134
c9ea9335
SS
2135
2136/*[clinic input]
2137math.frexp
2138
2139 x: double
2140 /
2141
2142Return the mantissa and exponent of x, as pair (m, e).
2143
2144m is a float and e is an int, such that x = m * 2.**e.
2145If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2146[clinic start generated code]*/
400adb03 2147
8b43b19e 2148static PyObject *
c9ea9335
SS
2149math_frexp_impl(PyObject *module, double x)
2150/*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
d18ad583 2151{
f95a1b3c 2152 int i;
f95a1b3c
AP
2153 /* deal with special cases directly, to sidestep platform
2154 differences */
cd11ff12 2155 if (isnan(x) || isinf(x) || !x) {
f95a1b3c
AP
2156 i = 0;
2157 }
2158 else {
f95a1b3c 2159 x = frexp(x, &i);
f95a1b3c
AP
2160 }
2161 return Py_BuildValue("(di)", x, i);
d18ad583
GR
2162}
2163
c9ea9335
SS
2164
2165/*[clinic input]
2166math.ldexp
2167
2168 x: double
2169 i: object
2170 /
2171
2172Return x * (2**i).
2173
2174This is essentially the inverse of frexp().
2175[clinic start generated code]*/
c6e22902 2176
8b43b19e 2177static PyObject *
c9ea9335
SS
2178math_ldexp_impl(PyObject *module, double x, PyObject *i)
2179/*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
d18ad583 2180{
c9ea9335 2181 double r;
f95a1b3c
AP
2182 long exp;
2183 int overflow;
f95a1b3c 2184
c9ea9335 2185 if (PyLong_Check(i)) {
f95a1b3c
AP
2186 /* on overflow, replace exponent with either LONG_MAX
2187 or LONG_MIN, depending on the sign. */
c9ea9335 2188 exp = PyLong_AsLongAndOverflow(i, &overflow);
f95a1b3c
AP
2189 if (exp == -1 && PyErr_Occurred())
2190 return NULL;
2191 if (overflow)
2192 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2193 }
2194 else {
2195 PyErr_SetString(PyExc_TypeError,
95949427 2196 "Expected an int as second argument to ldexp.");
f95a1b3c
AP
2197 return NULL;
2198 }
2199
cd11ff12 2200 if (x == 0. || !isfinite(x)) {
f95a1b3c
AP
2201 /* NaNs, zeros and infinities are returned unchanged */
2202 r = x;
2203 errno = 0;
2204 } else if (exp > INT_MAX) {
2205 /* overflow */
8477951a 2206 r = copysign(Py_INFINITY, x);
f95a1b3c
AP
2207 errno = ERANGE;
2208 } else if (exp < INT_MIN) {
2209 /* underflow to +-0 */
2210 r = copysign(0., x);
2211 errno = 0;
2212 } else {
2213 errno = 0;
f95a1b3c 2214 r = ldexp(x, (int)exp);
cf8941c6
SK
2215#ifdef _MSC_VER
2216 if (DBL_MIN > r && r > -DBL_MIN) {
2217 /* Denormal (or zero) results can be incorrectly rounded here (rather,
2218 truncated). Fixed in newer versions of the C runtime, included
2219 with Windows 11. */
2220 int original_exp;
2221 frexp(x, &original_exp);
2222 if (original_exp > DBL_MIN_EXP) {
2223 /* Shift down to the smallest normal binade. No bits lost. */
2224 int shift = DBL_MIN_EXP - original_exp;
2225 x = ldexp(x, shift);
2226 exp -= shift;
2227 }
2228 /* Multiplying by 2**exp finishes the job, and the HW will round as
2229 appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted
2230 to be < 0.5ULP of smallest denorm, so should be thrown away. If
2231 exp is so very negative that ldexp underflows to 0, that's fine;
2232 no need to check in advance. */
2233 r = x*ldexp(1.0, (int)exp);
2234 }
2235#endif
cd11ff12 2236 if (isinf(r))
f95a1b3c
AP
2237 errno = ERANGE;
2238 }
2239
75f59bb6 2240 if (errno && is_error(r, 1))
f95a1b3c
AP
2241 return NULL;
2242 return PyFloat_FromDouble(r);
d18ad583
GR
2243}
2244
c9ea9335
SS
2245
2246/*[clinic input]
2247math.modf
2248
2249 x: double
2250 /
2251
2252Return the fractional and integer parts of x.
2253
2254Both results carry the sign of x and are floats.
2255[clinic start generated code]*/
c6e22902 2256
8b43b19e 2257static PyObject *
c9ea9335
SS
2258math_modf_impl(PyObject *module, double x)
2259/*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
d18ad583 2260{
c9ea9335 2261 double y;
f95a1b3c
AP
2262 /* some platforms don't do the right thing for NaNs and
2263 infinities, so we take care of special cases directly. */
cd11ff12 2264 if (isinf(x))
9c995abd 2265 return Py_BuildValue("(dd)", copysign(0., x), x);
cd11ff12 2266 else if (isnan(x))
9c995abd 2267 return Py_BuildValue("(dd)", x, x);
f95a1b3c
AP
2268
2269 errno = 0;
f95a1b3c 2270 x = modf(x, &y);
f95a1b3c 2271 return Py_BuildValue("(dd)", x, y);
d18ad583 2272}
85a5fbbd 2273
c6e22902 2274
95949427 2275/* A decent logarithm is easy to compute even for huge ints, but libm can't
78526168 2276 do that by itself -- loghelper can. func is log or log10, and name is
95949427 2277 "log" or "log10". Note that overflow of the result isn't possible: an int
6ecd9e53
MD
2278 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2279 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
78526168 2280 small enough to fit in an IEEE single. log and log10 are even smaller.
95949427
SS
2281 However, intermediate overflow is possible for an int if the number of bits
2282 in that int is larger than PY_SSIZE_T_MAX. */
78526168
TP
2283
2284static PyObject*
5a80e858 2285loghelper(PyObject* arg, double (*func)(double))
78526168 2286{
95949427 2287 /* If it is int, do it ourselves. */
f95a1b3c 2288 if (PyLong_Check(arg)) {
c6037174 2289 double x, result;
32c7dbb2 2290 int64_t e;
c6037174
MD
2291
2292 /* Negative or zero inputs give a ValueError. */
7559f5fd 2293 if (!_PyLong_IsPositive((PyLongObject *)arg)) {
0c356c86
SK
2294 /* The input can be an arbitrary large integer, so we
2295 don't include it's value in the error message. */
2296 PyErr_SetString(PyExc_ValueError,
2297 "expected a positive input");
f95a1b3c
AP
2298 return NULL;
2299 }
c6037174
MD
2300
2301 x = PyLong_AsDouble(arg);
2302 if (x == -1.0 && PyErr_Occurred()) {
2303 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2304 return NULL;
2305 /* Here the conversion to double overflowed, but it's possible
2306 to compute the log anyway. Clear the exception and continue. */
2307 PyErr_Clear();
2308 x = _PyLong_Frexp((PyLongObject *)arg, &e);
d08c7888
SS
2309 assert(e >= 0);
2310 assert(!PyErr_Occurred());
c6037174
MD
2311 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2312 result = func(x) + func(2.0) * e;
2313 }
2314 else
2315 /* Successfully converted x to a double. */
2316 result = func(x);
2317 return PyFloat_FromDouble(result);
f95a1b3c
AP
2318 }
2319
2320 /* Else let libm handle it by itself. */
75f59bb6 2321 return math_1(arg, func, 0, "expected a positive input, got %s");
78526168
TP
2322}
2323
c9ea9335 2324
d1a89ce5
SK
2325/* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2326 for support of multiple signatures */
78526168 2327static PyObject *
d1a89ce5 2328math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
78526168 2329{
f95a1b3c
AP
2330 PyObject *num, *den;
2331 PyObject *ans;
2332
d1a89ce5
SK
2333 if (!_PyArg_CheckPositional("log", nargs, 1, 2))
2334 return NULL;
2335
2336 num = loghelper(args[0], m_log);
2337 if (num == NULL || nargs == 1)
f95a1b3c
AP
2338 return num;
2339
d1a89ce5 2340 den = loghelper(args[1], m_log);
f95a1b3c
AP
2341 if (den == NULL) {
2342 Py_DECREF(num);
2343 return NULL;
2344 }
2345
2346 ans = PyNumber_TrueDivide(num, den);
2347 Py_DECREF(num);
2348 Py_DECREF(den);
2349 return ans;
78526168
TP
2350}
2351
d1a89ce5
SK
2352PyDoc_STRVAR(math_log_doc,
2353"log(x, [base=math.e])\n\
2354Return the logarithm of x to the given base.\n\n\
52cedc5c 2355If the base is not specified, returns the natural logarithm (base e) of x.");
c9ea9335
SS
2356
2357/*[clinic input]
2358math.log2
2359
2360 x: object
2361 /
2362
2363Return the base 2 logarithm of x.
2364[clinic start generated code]*/
78526168 2365
fa0e3d52 2366static PyObject *
c9ea9335
SS
2367math_log2(PyObject *module, PyObject *x)
2368/*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
fa0e3d52 2369{
5a80e858 2370 return loghelper(x, m_log2);
fa0e3d52
VS
2371}
2372
c9ea9335
SS
2373
2374/*[clinic input]
2375math.log10
2376
2377 x: object
2378 /
2379
2380Return the base 10 logarithm of x.
2381[clinic start generated code]*/
fa0e3d52 2382
78526168 2383static PyObject *
c9ea9335
SS
2384math_log10(PyObject *module, PyObject *x)
2385/*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
78526168 2386{
5a80e858 2387 return loghelper(x, m_log10);
78526168
TP
2388}
2389
c9ea9335 2390
8e3c953b
VS
2391/*[clinic input]
2392math.fma
2393
2394 x: double
2395 y: double
2396 z: double
2397 /
2398
2399Fused multiply-add operation.
2400
2401Compute (x * y) + z with a single round.
2402[clinic start generated code]*/
2403
2404static PyObject *
2405math_fma_impl(PyObject *module, double x, double y, double z)
2406/*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2407{
2408 double r = fma(x, y, z);
2409
2410 /* Fast path: if we got a finite result, we're done. */
cd11ff12 2411 if (isfinite(r)) {
8e3c953b
VS
2412 return PyFloat_FromDouble(r);
2413 }
2414
2415 /* Non-finite result. Raise an exception if appropriate, else return r. */
cd11ff12
SK
2416 if (isnan(r)) {
2417 if (!isnan(x) && !isnan(y) && !isnan(z)) {
8e3c953b
VS
2418 /* NaN result from non-NaN inputs. */
2419 PyErr_SetString(PyExc_ValueError, "invalid operation in fma");
2420 return NULL;
2421 }
2422 }
cd11ff12 2423 else if (isfinite(x) && isfinite(y) && isfinite(z)) {
8e3c953b
VS
2424 /* Infinite result from finite inputs. */
2425 PyErr_SetString(PyExc_OverflowError, "overflow in fma");
2426 return NULL;
2427 }
2428
2429 return PyFloat_FromDouble(r);
2430}
2431
2432
c9ea9335
SS
2433/*[clinic input]
2434math.fmod
2435
2436 x: double
2437 y: double
2438 /
2439
2440Return fmod(x, y), according to platform C.
2441
2442x % y may differ.
2443[clinic start generated code]*/
78526168 2444
53876d9c 2445static PyObject *
c9ea9335
SS
2446math_fmod_impl(PyObject *module, double x, double y)
2447/*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
53876d9c 2448{
c9ea9335 2449 double r;
f95a1b3c 2450 /* fmod(x, +/-Inf) returns x for finite x. */
cd11ff12 2451 if (isinf(y) && isfinite(x))
f95a1b3c
AP
2452 return PyFloat_FromDouble(x);
2453 errno = 0;
f95a1b3c 2454 r = fmod(x, y);
f4dd4402
SK
2455#ifdef _MSC_VER
2456 /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2457 for zero result. But C99+ says: "if y is nonzero, the result
2458 has the same sign as x".
2459 */
2460 if (r == 0.0 && y != 0.0) {
2461 r = copysign(r, x);
2462 }
2463#endif
cd11ff12
SK
2464 if (isnan(r)) {
2465 if (!isnan(x) && !isnan(y))
f95a1b3c
AP
2466 errno = EDOM;
2467 else
2468 errno = 0;
2469 }
75f59bb6 2470 if (errno && is_error(r, 1))
f95a1b3c
AP
2471 return NULL;
2472 else
2473 return PyFloat_FromDouble(r);
53876d9c
CH
2474}
2475
13990745 2476/*
8e19c8be 2477Given a *vec* of values, compute the vector norm:
13990745 2478
8e19c8be 2479 sqrt(sum(x ** 2 for x in vec))
fff3c280 2480
8e19c8be
RH
2481The *max* variable should be equal to the largest fabs(x).
2482The *n* variable is the length of *vec*.
2483If n==0, then *max* should be 0.0.
745c0f39 2484If an infinity is present in the vec, *max* should be INF.
c630e104
RH
2485The *found_nan* variable indicates whether some member of
2486the *vec* is a NaN.
21786f51 2487
8e19c8be
RH
2488To avoid overflow/underflow and to achieve high accuracy giving results
2489that are almost always correctly rounded, four techniques are used:
2490
2491* lossless scaling using a power-of-two scaling factor
67c998de 2492* accurate squaring using Veltkamp-Dekker splitting [1]
0a22aa05 2493 or an equivalent with an fma() call
67c998de
RH
2494* compensated summation using a variant of the Neumaier algorithm [2]
2495* differential correction of the square root [3]
8e19c8be
RH
2496
2497The usual presentation of the Neumaier summation algorithm has an
2498expensive branch depending on which operand has the larger
2499magnitude. We avoid this cost by arranging the calculation so that
2500fabs(csum) is always as large as fabs(x).
2501
2502To establish the invariant, *csum* is initialized to 1.0 which is
457d4e97 2503always larger than x**2 after scaling or after division by *max*.
8e19c8be
RH
2504After the loop is finished, the initial 1.0 is subtracted out for a
2505net zero effect on the final sum. Since *csum* will be greater than
25061.0, the subtraction of 1.0 will not cause fractional digits to be
2507dropped from *csum*.
2508
2509To get the full benefit from compensated summation, the largest
2510addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2511scaling or division by *max* should not be skipped even if not
2512otherwise needed to prevent overflow or loss of precision.
2513
82e79480 2514The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
8e19c8be
RH
2515gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2516algorithm gives a *hi* value that is correctly rounded to half
2517precision. When a value at or below 1.0 is correctly rounded, it
2518never goes above 1.0. And when values at or below 1.0 are squared,
2519they remain at or below 1.0, thus preserving the summation invariant.
2520
27de2860
RH
2521Another interesting assertion is that csum+lo*lo == csum. In the loop,
2522each scaled vector element has a magnitude less than 1.0. After the
2523Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2524value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2525Given that csum >= 1.0, we have:
2526 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2527Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2528
92c38164 2529To minimize loss of information during the accumulation of fractional
67c998de
RH
2530values, each term has a separate accumulator. This also breaks up
2531sequential dependencies in the inner loop so the CPU can maximize
1a0c7b9b 2532floating-point throughput. [4] On an Apple M1 Max, hypot(*vec)
3adb23a1 2533takes only 3.33 µsec when len(vec) == 1000.
92c38164 2534
8e19c8be
RH
2535The square root differential correction is needed because a
2536correctly rounded square root of a correctly rounded sum of
2537squares can still be off by as much as one ulp.
2538
2539The differential correction starts with a value *x* that is
2540the difference between the square of *h*, the possibly inaccurately
2541rounded square root, and the accurately computed sum of squares.
2542The correction is the first order term of the Maclaurin series
457d4e97 2543expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
8e19c8be
RH
2544
2545Essentially, this differential correction is equivalent to one
82e79480 2546refinement step in Newton's divide-and-average square root
8e19c8be
RH
2547algorithm, effectively doubling the number of accurate bits.
2548This technique is used in Dekker's SQRT2 algorithm and again in
2549Borges' ALGORITHM 4 and 5.
2550
0a22aa05
RH
2551The hypot() function is faithfully rounded (less than 1 ulp error)
2552and usually correctly rounded (within 1/2 ulp). The squaring
2553step is exact. The Neumaier summation computes as if in doubled
2554precision (106 bits) and has the advantage that its input squares
2555are non-negative so that the condition number of the sum is one.
2556The square root with a differential correction is likewise computed
3adb23a1 2557as if in doubled precision.
0a22aa05
RH
2558
2559For n <= 1000, prior to the final addition that rounds the overall
2560result, the internal accuracy of "h" together with its correction of
2561"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2562against a Decimal implementation with prec=300. After 100 million
2563trials, no incorrectly rounded examples were found. In addition,
2564perfect commutativity (all permutations are exactly equal) was
2565verified for 1 billion random inputs with n=5. [7]
67c998de 2566
8e19c8be
RH
2567References:
2568
25691. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
25702. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
92c38164 25713. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
457d4e97
RH
25724. Data dependency graph: https://bugs.python.org/file49439/hypot.png
25735. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
497126f7 25746. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
457d4e97 25757. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
fff3c280 2576
13990745
RH
2577*/
2578
2579static inline double
c630e104 2580vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
13990745 2581{
72186aa6 2582 double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
0a22aa05 2583 DoubleLength pr, sm;
fff3c280 2584 int max_e;
13990745
RH
2585 Py_ssize_t i;
2586
cd11ff12 2587 if (isinf(max)) {
c630e104
RH
2588 return max;
2589 }
2590 if (found_nan) {
2591 return Py_NAN;
2592 }
f3267144 2593 if (max == 0.0 || n <= 1) {
745c0f39 2594 return max;
13990745 2595 }
fff3c280 2596 frexp(max, &max_e);
72186aa6 2597 if (max_e < -1023) {
3adb23a1 2598 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
fff3c280 2599 for (i=0 ; i < n ; i++) {
3adb23a1 2600 vec[i] /= DBL_MIN; // convert subnormals to normals
72186aa6
RH
2601 }
2602 return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2603 }
2604 scale = ldexp(1.0, -max_e);
2605 assert(max * scale >= 0.5);
2606 assert(max * scale < 1.0);
2607 for (i=0 ; i < n ; i++) {
2608 x = vec[i];
cd11ff12 2609 assert(isfinite(x) && fabs(x) <= max);
3adb23a1 2610 x *= scale; // lossless scaling
72186aa6 2611 assert(fabs(x) < 1.0);
3adb23a1 2612 pr = dl_mul(x, x); // lossless squaring
72186aa6 2613 assert(pr.hi <= 1.0);
3adb23a1 2614 sm = dl_fast_sum(csum, pr.hi); // lossless addition
0a22aa05 2615 csum = sm.hi;
3adb23a1
RH
2616 frac1 += pr.lo; // lossy addition
2617 frac2 += sm.lo; // lossy addition
fff3c280 2618 }
72186aa6
RH
2619 h = sqrt(csum - 1.0 + (frac1 + frac2));
2620 pr = dl_mul(-h, h);
2621 sm = dl_fast_sum(csum, pr.hi);
2622 csum = sm.hi;
2623 frac1 += pr.lo;
2624 frac2 += sm.lo;
2625 x = csum - 1.0 + (frac1 + frac2);
3adb23a1
RH
2626 h += x / (2.0 * h); // differential correction
2627 return h / scale;
13990745
RH
2628}
2629
c630e104
RH
2630#define NUM_STACK_ELEMS 16
2631
9c18b1ae
RH
2632/*[clinic input]
2633math.dist
2634
6b5f1b49
RH
2635 p: object
2636 q: object
9c18b1ae
RH
2637 /
2638
2639Return the Euclidean distance between two points p and q.
2640
6b5f1b49
RH
2641The points should be specified as sequences (or iterables) of
2642coordinates. Both inputs must have the same dimension.
9c18b1ae
RH
2643
2644Roughly equivalent to:
2645 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2646[clinic start generated code]*/
2647
2648static PyObject *
2649math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
6b5f1b49 2650/*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
9c18b1ae
RH
2651{
2652 PyObject *item;
9c18b1ae 2653 double max = 0.0;
9c18b1ae
RH
2654 double x, px, qx, result;
2655 Py_ssize_t i, m, n;
6b5f1b49 2656 int found_nan = 0, p_allocated = 0, q_allocated = 0;
c630e104
RH
2657 double diffs_on_stack[NUM_STACK_ELEMS];
2658 double *diffs = diffs_on_stack;
9c18b1ae 2659
6b5f1b49
RH
2660 if (!PyTuple_Check(p)) {
2661 p = PySequence_Tuple(p);
2662 if (p == NULL) {
2663 return NULL;
2664 }
2665 p_allocated = 1;
2666 }
2667 if (!PyTuple_Check(q)) {
2668 q = PySequence_Tuple(q);
2669 if (q == NULL) {
2670 if (p_allocated) {
2671 Py_DECREF(p);
2672 }
2673 return NULL;
2674 }
2675 q_allocated = 1;
2676 }
2677
9c18b1ae
RH
2678 m = PyTuple_GET_SIZE(p);
2679 n = PyTuple_GET_SIZE(q);
2680 if (m != n) {
2681 PyErr_SetString(PyExc_ValueError,
2682 "both points must have the same number of dimensions");
ab575050 2683 goto error_exit;
9c18b1ae 2684 }
c630e104 2685 if (n > NUM_STACK_ELEMS) {
dcd28b5c 2686 diffs = (double *) PyMem_Malloc(n * sizeof(double));
c630e104 2687 if (diffs == NULL) {
ab575050
KA
2688 PyErr_NoMemory();
2689 goto error_exit;
c630e104 2690 }
9c18b1ae
RH
2691 }
2692 for (i=0 ; i<n ; i++) {
2693 item = PyTuple_GET_ITEM(p, i);
cfd735ea 2694 ASSIGN_DOUBLE(px, item, error_exit);
9c18b1ae 2695 item = PyTuple_GET_ITEM(q, i);
cfd735ea 2696 ASSIGN_DOUBLE(qx, item, error_exit);
9c18b1ae
RH
2697 x = fabs(px - qx);
2698 diffs[i] = x;
cd11ff12 2699 found_nan |= isnan(x);
9c18b1ae
RH
2700 if (x > max) {
2701 max = x;
2702 }
2703 }
c630e104
RH
2704 result = vector_norm(n, diffs, max, found_nan);
2705 if (diffs != diffs_on_stack) {
dcd28b5c 2706 PyMem_Free(diffs);
9c18b1ae 2707 }
6b5f1b49
RH
2708 if (p_allocated) {
2709 Py_DECREF(p);
2710 }
2711 if (q_allocated) {
2712 Py_DECREF(q);
2713 }
9c18b1ae 2714 return PyFloat_FromDouble(result);
c630e104
RH
2715
2716 error_exit:
2717 if (diffs != diffs_on_stack) {
dcd28b5c 2718 PyMem_Free(diffs);
c630e104 2719 }
6b5f1b49
RH
2720 if (p_allocated) {
2721 Py_DECREF(p);
2722 }
2723 if (q_allocated) {
2724 Py_DECREF(q);
2725 }
c630e104 2726 return NULL;
9c18b1ae
RH
2727}
2728
3275cb19
SK
2729/*[clinic input]
2730math.hypot
2731
1f777396 2732 *coordinates as args: array
3275cb19
SK
2733
2734Multidimensional Euclidean distance from the origin to a point.
2735
2736Roughly equivalent to:
2737 sqrt(sum(x**2 for x in coordinates))
2738
2739For a two dimensional point (x, y), gives the hypotenuse
2740using the Pythagorean theorem: sqrt(x*x + y*y).
2741
2742For example, the hypotenuse of a 3/4/5 right triangle is:
2743
2744 >>> hypot(3.0, 4.0)
2745 5.0
2746[clinic start generated code]*/
2747
53876d9c 2748static PyObject *
1f777396
SS
2749math_hypot_impl(PyObject *module, PyObject * const *args,
2750 Py_ssize_t args_length)
2751/*[clinic end generated code: output=c9de404e24370068 input=1bceaf7d4fdcd9c2]*/
53876d9c 2752{
d0d3e991 2753 Py_ssize_t i;
c6dabe37 2754 PyObject *item;
c6dabe37 2755 double max = 0.0;
c6dabe37
RH
2756 double x, result;
2757 int found_nan = 0;
c630e104
RH
2758 double coord_on_stack[NUM_STACK_ELEMS];
2759 double *coordinates = coord_on_stack;
c6dabe37 2760
1f777396
SS
2761 if (args_length > NUM_STACK_ELEMS) {
2762 coordinates = (double *) PyMem_Malloc(args_length * sizeof(double));
4c49da0c
ZS
2763 if (coordinates == NULL) {
2764 return PyErr_NoMemory();
2765 }
c630e104 2766 }
1f777396 2767 for (i = 0; i < args_length; i++) {
d0d3e991 2768 item = args[i];
cfd735ea 2769 ASSIGN_DOUBLE(x, item, error_exit);
c6dabe37
RH
2770 x = fabs(x);
2771 coordinates[i] = x;
cd11ff12 2772 found_nan |= isnan(x);
c6dabe37
RH
2773 if (x > max) {
2774 max = x;
2775 }
f95a1b3c 2776 }
1f777396 2777 result = vector_norm(args_length, coordinates, max, found_nan);
c630e104 2778 if (coordinates != coord_on_stack) {
dcd28b5c 2779 PyMem_Free(coordinates);
f95a1b3c 2780 }
c6dabe37 2781 return PyFloat_FromDouble(result);
c630e104
RH
2782
2783 error_exit:
2784 if (coordinates != coord_on_stack) {
dcd28b5c 2785 PyMem_Free(coordinates);
c630e104
RH
2786 }
2787 return NULL;
53876d9c
CH
2788}
2789
c630e104
RH
2790#undef NUM_STACK_ELEMS
2791
47b9f83a
RH
2792/** sumprod() ***************************************************************/
2793
2794/* Forward declaration */
2795static inline int _check_long_mult_overflow(long a, long b);
2796
2797static inline bool
2798long_add_would_overflow(long a, long b)
2799{
2800 return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2801}
2802
47b9f83a
RH
2803/*[clinic input]
2804math.sumprod
2805
2806 p: object
2807 q: object
2808 /
2809
2810Return the sum of products of values from two iterables p and q.
2811
2812Roughly equivalent to:
2813
3d4fda21 2814 sum(map(operator.mul, p, q, strict=True))
47b9f83a
RH
2815
2816For float and mixed int/float inputs, the intermediate products
2817and sums are computed with extended precision.
2818[clinic start generated code]*/
2819
2820static PyObject *
2821math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
3d4fda21 2822/*[clinic end generated code: output=6722dbfe60664554 input=a2880317828c61d2]*/
47b9f83a
RH
2823{
2824 PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2825 PyObject *p_it, *q_it, *total;
2826 iternextfunc p_next, q_next;
2827 bool p_stopped = false, q_stopped = false;
2828 bool int_path_enabled = true, int_total_in_use = false;
2829 bool flt_path_enabled = true, flt_total_in_use = false;
2830 long int_total = 0;
b139bcd8 2831 TripleLength flt_total = tl_zero;
47b9f83a
RH
2832
2833 p_it = PyObject_GetIter(p);
2834 if (p_it == NULL) {
2835 return NULL;
2836 }
2837 q_it = PyObject_GetIter(q);
2838 if (q_it == NULL) {
2839 Py_DECREF(p_it);
2840 return NULL;
2841 }
2842 total = PyLong_FromLong(0);
2843 if (total == NULL) {
2844 Py_DECREF(p_it);
2845 Py_DECREF(q_it);
2846 return NULL;
2847 }
2848 p_next = *Py_TYPE(p_it)->tp_iternext;
2849 q_next = *Py_TYPE(q_it)->tp_iternext;
2850 while (1) {
2851 bool finished;
2852
2853 assert (p_i == NULL);
2854 assert (q_i == NULL);
2855 assert (term_i == NULL);
2856 assert (new_total == NULL);
2857
2858 assert (p_it != NULL);
2859 assert (q_it != NULL);
2860 assert (total != NULL);
2861
2862 p_i = p_next(p_it);
2863 if (p_i == NULL) {
2864 if (PyErr_Occurred()) {
2865 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2866 goto err_exit;
2867 }
2868 PyErr_Clear();
2869 }
2870 p_stopped = true;
2871 }
2872 q_i = q_next(q_it);
2873 if (q_i == NULL) {
2874 if (PyErr_Occurred()) {
2875 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2876 goto err_exit;
2877 }
2878 PyErr_Clear();
2879 }
2880 q_stopped = true;
2881 }
2882 if (p_stopped != q_stopped) {
2883 PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2884 goto err_exit;
2885 }
2886 finished = p_stopped & q_stopped;
2887
2888 if (int_path_enabled) {
2889
2890 if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2891 int overflow;
2892 long int_p, int_q, int_prod;
2893
2894 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2895 if (overflow) {
2896 goto finalize_int_path;
2897 }
2898 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2899 if (overflow) {
2900 goto finalize_int_path;
2901 }
2902 if (_check_long_mult_overflow(int_p, int_q)) {
2903 goto finalize_int_path;
2904 }
2905 int_prod = int_p * int_q;
2906 if (long_add_would_overflow(int_total, int_prod)) {
2907 goto finalize_int_path;
2908 }
2909 int_total += int_prod;
2910 int_total_in_use = true;
2911 Py_CLEAR(p_i);
2912 Py_CLEAR(q_i);
2913 continue;
2914 }
2915
2916 finalize_int_path:
997073c2 2917 // We're finished, overflowed, or have a non-int
47b9f83a
RH
2918 int_path_enabled = false;
2919 if (int_total_in_use) {
2920 term_i = PyLong_FromLong(int_total);
2921 if (term_i == NULL) {
2922 goto err_exit;
2923 }
2924 new_total = PyNumber_Add(total, term_i);
2925 if (new_total == NULL) {
2926 goto err_exit;
2927 }
2928 Py_SETREF(total, new_total);
2929 new_total = NULL;
2930 Py_CLEAR(term_i);
2931 int_total = 0; // An ounce of prevention, ...
2932 int_total_in_use = false;
2933 }
2934 }
2935
2936 if (flt_path_enabled) {
2937
2938 if (!finished) {
2939 double flt_p, flt_q;
2940 bool p_type_float = PyFloat_CheckExact(p_i);
2941 bool q_type_float = PyFloat_CheckExact(q_i);
2942 if (p_type_float && q_type_float) {
2943 flt_p = PyFloat_AS_DOUBLE(p_i);
2944 flt_q = PyFloat_AS_DOUBLE(q_i);
2945 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2946 /* We care about float/int pairs and int/float pairs because
2947 they arise naturally in several use cases such as price
2948 times quantity, measurements with integer weights, or
2949 data selected by a vector of bools. */
2950 flt_p = PyFloat_AS_DOUBLE(p_i);
2951 flt_q = PyLong_AsDouble(q_i);
2952 if (flt_q == -1.0 && PyErr_Occurred()) {
2953 PyErr_Clear();
2954 goto finalize_flt_path;
2955 }
9dc4fb82 2956 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
47b9f83a
RH
2957 flt_q = PyFloat_AS_DOUBLE(q_i);
2958 flt_p = PyLong_AsDouble(p_i);
2959 if (flt_p == -1.0 && PyErr_Occurred()) {
2960 PyErr_Clear();
2961 goto finalize_flt_path;
2962 }
2963 } else {
2964 goto finalize_flt_path;
2965 }
84483aac 2966 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
47b9f83a
RH
2967 if (isfinite(new_flt_total.hi)) {
2968 flt_total = new_flt_total;
2969 flt_total_in_use = true;
2970 Py_CLEAR(p_i);
2971 Py_CLEAR(q_i);
2972 continue;
2973 }
2974 }
2975
2976 finalize_flt_path:
2977 // We're finished, overflowed, have a non-float, or got a non-finite value
2978 flt_path_enabled = false;
2979 if (flt_total_in_use) {
b139bcd8 2980 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
47b9f83a
RH
2981 if (term_i == NULL) {
2982 goto err_exit;
2983 }
2984 new_total = PyNumber_Add(total, term_i);
2985 if (new_total == NULL) {
2986 goto err_exit;
2987 }
2988 Py_SETREF(total, new_total);
2989 new_total = NULL;
2990 Py_CLEAR(term_i);
b139bcd8 2991 flt_total = tl_zero;
47b9f83a
RH
2992 flt_total_in_use = false;
2993 }
2994 }
2995
2996 assert(!int_total_in_use);
2997 assert(!flt_total_in_use);
2998 if (finished) {
2999 goto normal_exit;
3000 }
3001 term_i = PyNumber_Multiply(p_i, q_i);
3002 if (term_i == NULL) {
3003 goto err_exit;
3004 }
3005 new_total = PyNumber_Add(total, term_i);
3006 if (new_total == NULL) {
3007 goto err_exit;
3008 }
3009 Py_SETREF(total, new_total);
3010 new_total = NULL;
3011 Py_CLEAR(p_i);
3012 Py_CLEAR(q_i);
3013 Py_CLEAR(term_i);
3014 }
3015
3016 normal_exit:
3017 Py_DECREF(p_it);
3018 Py_DECREF(q_it);
3019 return total;
3020
3021 err_exit:
3022 Py_DECREF(p_it);
3023 Py_DECREF(q_it);
3024 Py_DECREF(total);
3025 Py_XDECREF(p_i);
3026 Py_XDECREF(q_i);
3027 Py_XDECREF(term_i);
3028 Py_XDECREF(new_total);
3029 return NULL;
3030}
3031
3032
53876d9c
CH
3033/* pow can't use math_2, but needs its own wrapper: the problem is
3034 that an infinite result can arise either as a result of overflow
3035 (in which case OverflowError should be raised) or as a result of
3036 e.g. 0.**-5. (for which ValueError needs to be raised.)
3037*/
3038
c9ea9335
SS
3039/*[clinic input]
3040math.pow
3041
3042 x: double
3043 y: double
3044 /
3045
3046Return x**y (x to the power of y).
3047[clinic start generated code]*/
3048
53876d9c 3049static PyObject *
c9ea9335
SS
3050math_pow_impl(PyObject *module, double x, double y)
3051/*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
53876d9c 3052{
c9ea9335 3053 double r;
f95a1b3c
AP
3054 int odd_y;
3055
f95a1b3c
AP
3056 /* deal directly with IEEE specials, to cope with problems on various
3057 platforms whose semantics don't exactly match C99 */
3058 r = 0.; /* silence compiler warning */
cd11ff12 3059 if (!isfinite(x) || !isfinite(y)) {
f95a1b3c 3060 errno = 0;
cd11ff12 3061 if (isnan(x))
f95a1b3c 3062 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
cd11ff12 3063 else if (isnan(y))
f95a1b3c 3064 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
cd11ff12
SK
3065 else if (isinf(x)) {
3066 odd_y = isfinite(y) && fmod(fabs(y), 2.0) == 1.0;
f95a1b3c
AP
3067 if (y > 0.)
3068 r = odd_y ? x : fabs(x);
3069 else if (y == 0.)
3070 r = 1.;
3071 else /* y < 0. */
3072 r = odd_y ? copysign(0., x) : 0.;
3073 }
9c995abd 3074 else {
cd11ff12 3075 assert(isinf(y));
f95a1b3c
AP
3076 if (fabs(x) == 1.0)
3077 r = 1.;
3078 else if (y > 0. && fabs(x) > 1.0)
3079 r = y;
3080 else if (y < 0. && fabs(x) < 1.0) {
3081 r = -y; /* result is +inf */
f95a1b3c
AP
3082 }
3083 else
3084 r = 0.;
3085 }
3086 }
3087 else {
3088 /* let libm handle finite**finite */
3089 errno = 0;
f95a1b3c 3090 r = pow(x, y);
f95a1b3c
AP
3091 /* a NaN result should arise only from (-ve)**(finite
3092 non-integer); in this case we want to raise ValueError. */
cd11ff12
SK
3093 if (!isfinite(r)) {
3094 if (isnan(r)) {
f95a1b3c
AP
3095 errno = EDOM;
3096 }
3097 /*
3098 an infinite result here arises either from:
3099 (A) (+/-0.)**negative (-> divide-by-zero)
3100 (B) overflow of x**y with x and y finite
3101 */
cd11ff12 3102 else if (isinf(r)) {
f95a1b3c
AP
3103 if (x == 0.)
3104 errno = EDOM;
3105 else
3106 errno = ERANGE;
3107 }
3108 }
3109 }
3110
75f59bb6 3111 if (errno && is_error(r, 1))
f95a1b3c
AP
3112 return NULL;
3113 else
3114 return PyFloat_FromDouble(r);
53876d9c
CH
3115}
3116
53876d9c 3117
072c0f1b
CH
3118static const double degToRad = Py_MATH_PI / 180.0;
3119static const double radToDeg = 180.0 / Py_MATH_PI;
d6f2267a 3120
c9ea9335
SS
3121/*[clinic input]
3122math.degrees
3123
3124 x: double
3125 /
3126
3127Convert angle x from radians to degrees.
3128[clinic start generated code]*/
3129
d6f2267a 3130static PyObject *
c9ea9335
SS
3131math_degrees_impl(PyObject *module, double x)
3132/*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
d6f2267a 3133{
f95a1b3c 3134 return PyFloat_FromDouble(x * radToDeg);
d6f2267a
RH
3135}
3136
c9ea9335
SS
3137
3138/*[clinic input]
3139math.radians
3140
3141 x: double
3142 /
3143
3144Convert angle x from degrees to radians.
3145[clinic start generated code]*/
d6f2267a
RH
3146
3147static PyObject *
c9ea9335
SS
3148math_radians_impl(PyObject *module, double x)
3149/*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
d6f2267a 3150{
f95a1b3c 3151 return PyFloat_FromDouble(x * degToRad);
d6f2267a
RH
3152}
3153
c9ea9335
SS
3154
3155/*[clinic input]
3156math.isfinite
3157
3158 x: double
3159 /
3160
3161Return True if x is neither an infinity nor a NaN, and False otherwise.
3162[clinic start generated code]*/
78526168 3163
8e0c9968 3164static PyObject *
c9ea9335
SS
3165math_isfinite_impl(PyObject *module, double x)
3166/*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
8e0c9968 3167{
cd11ff12 3168 return PyBool_FromLong((long)isfinite(x));
8e0c9968
MD
3169}
3170
c9ea9335 3171
5f61cde8
SK
3172/*[clinic input]
3173math.isnormal
3174
3175 x: double
3176 /
3177
3178Return True if x is normal, and False otherwise.
3179[clinic start generated code]*/
3180
3181static PyObject *
3182math_isnormal_impl(PyObject *module, double x)
3183/*[clinic end generated code: output=c7b302b5b89c3541 input=fdaa00c58aa7bc17]*/
3184{
3185 return PyBool_FromLong(isnormal(x));
3186}
3187
3188
3189/*[clinic input]
3190math.issubnormal
3191
3192 x: double
3193 /
3194
3195Return True if x is subnormal, and False otherwise.
3196[clinic start generated code]*/
3197
3198static PyObject *
3199math_issubnormal_impl(PyObject *module, double x)
3200/*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/
3201{
a88b49c3 3202#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
5f61cde8
SK
3203 return PyBool_FromLong(issubnormal(x));
3204#else
3205 return PyBool_FromLong(isfinite(x) && x && !isnormal(x));
3206#endif
3207}
3208
3209
c9ea9335
SS
3210/*[clinic input]
3211math.isnan
3212
3213 x: double
3214 /
3215
3216Return True if x is a NaN (not a number), and False otherwise.
3217[clinic start generated code]*/
8e0c9968 3218
072c0f1b 3219static PyObject *
c9ea9335
SS
3220math_isnan_impl(PyObject *module, double x)
3221/*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
072c0f1b 3222{
cd11ff12 3223 return PyBool_FromLong((long)isnan(x));
072c0f1b
CH
3224}
3225
c9ea9335
SS
3226
3227/*[clinic input]
3228math.isinf
3229
3230 x: double
3231 /
3232
3233Return True if x is a positive or negative infinity, and False otherwise.
3234[clinic start generated code]*/
072c0f1b
CH
3235
3236static PyObject *
c9ea9335
SS
3237math_isinf_impl(PyObject *module, double x)
3238/*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
072c0f1b 3239{
cd11ff12 3240 return PyBool_FromLong((long)isinf(x));
072c0f1b
CH
3241}
3242
072c0f1b 3243
c9ea9335
SS
3244/*[clinic input]
3245math.isclose -> bool
d5519ed7 3246
c9ea9335
SS
3247 a: double
3248 b: double
3249 *
3250 rel_tol: double = 1e-09
3251 maximum difference for being considered "close", relative to the
3252 magnitude of the input values
3253 abs_tol: double = 0.0
3254 maximum difference for being considered "close", regardless of the
3255 magnitude of the input values
d5519ed7 3256
1a0c7b9b 3257Determine whether two floating-point numbers are close in value.
d5519ed7 3258
c9ea9335
SS
3259Return True if a is close in value to b, and False otherwise.
3260
3261For the values to be considered close, the difference between them
3262must be smaller than at least one of the tolerances.
3263
3264-inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3265is, NaN is not close to anything, even itself. inf and -inf are
3266only close to themselves.
3267[clinic start generated code]*/
3268
3269static int
3270math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3271 double abs_tol)
1a0c7b9b 3272/*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
c9ea9335
SS
3273{
3274 double diff = 0.0;
d5519ed7
TE
3275
3276 /* sanity check on the inputs */
3277 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3278 PyErr_SetString(PyExc_ValueError,
3279 "tolerances must be non-negative");
c9ea9335 3280 return -1;
d5519ed7
TE
3281 }
3282
3283 if ( a == b ) {
3284 /* short circuit exact equality -- needed to catch two infinities of
3285 the same sign. And perhaps speeds things up a bit sometimes.
3286 */
c9ea9335 3287 return 1;
d5519ed7
TE
3288 }
3289
3290 /* This catches the case of two infinities of opposite sign, or
3291 one infinity and one finite number. Two infinities of opposite
3292 sign would otherwise have an infinite relative tolerance.
3293 Two infinities of the same sign are caught by the equality check
3294 above.
3295 */
3296
cd11ff12 3297 if (isinf(a) || isinf(b)) {
c9ea9335 3298 return 0;
d5519ed7
TE
3299 }
3300
3301 /* now do the regular computation
3302 this is essentially the "weak" test from the Boost library
3303 */
3304
3305 diff = fabs(b - a);
3306
c9ea9335
SS
3307 return (((diff <= fabs(rel_tol * b)) ||
3308 (diff <= fabs(rel_tol * a))) ||
3309 (diff <= abs_tol));
d5519ed7
TE
3310}
3311
0411411c
PG
3312static inline int
3313_check_long_mult_overflow(long a, long b) {
3314
3315 /* From Python2's int_mul code:
3316
3317 Integer overflow checking for * is painful: Python tried a couple ways, but
3318 they didn't work on all platforms, or failed in endcases (a product of
3319 -sys.maxint-1 has been a particular pain).
3320
3321 Here's another way:
3322
3323 The native long product x*y is either exactly right or *way* off, being
3324 just the last n bits of the true product, where n is the number of bits
3325 in a long (the delivered product is the true product plus i*2**n for
3326 some integer i).
3327
3328 The native double product (double)x * (double)y is subject to three
3329 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3330 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3331 But, unlike the native long product, it's not in *range* trouble: even
3332 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3333 dynamic range of a double. So the leading 50 (or so) bits of the double
3334 product are correct.
3335
3336 We check these two ways against each other, and declare victory if they're
3337 approximately the same. Else, because the native long product is the only
3338 one that can lose catastrophic amounts of information, it's the native long
3339 product that must have overflowed.
3340
3341 */
3342
3343 long longprod = (long)((unsigned long)a * b);
3344 double doubleprod = (double)a * (double)b;
3345 double doubled_longprod = (double)longprod;
3346
3347 if (doubled_longprod == doubleprod) {
3348 return 0;
3349 }
3350
3351 const double diff = doubled_longprod - doubleprod;
3352 const double absdiff = diff >= 0.0 ? diff : -diff;
3353 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3354
3355 if (32.0 * absdiff <= absprod) {
3356 return 0;
3357 }
3358
3359 return 1;
3360}
d5519ed7 3361
bc098515
PG
3362/*[clinic input]
3363math.prod
3364
3365 iterable: object
3366 /
3367 *
3368 start: object(c_default="NULL") = 1
3369
3370Calculate the product of all the elements in the input iterable.
3371
3372The default start value for the product is 1.
3373
3374When the iterable is empty, return the start value. This function is
3375intended specifically for use with numeric values and may reject
3376non-numeric types.
3377[clinic start generated code]*/
3378
3379static PyObject *
3380math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3381/*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3382{
3383 PyObject *result = start;
3384 PyObject *temp, *item, *iter;
3385
3386 iter = PyObject_GetIter(iterable);
3387 if (iter == NULL) {
3388 return NULL;
3389 }
3390
3391 if (result == NULL) {
84975146 3392 result = _PyLong_GetOne();
bc098515 3393 }
84975146 3394 Py_INCREF(result);
bc098515
PG
3395#ifndef SLOW_PROD
3396 /* Fast paths for integers keeping temporary products in C.
3397 * Assumes all inputs are the same type.
3398 * If the assumption fails, default to use PyObjects instead.
3399 */
3400 if (PyLong_CheckExact(result)) {
3401 int overflow;
3402 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3403 /* If this already overflowed, don't even enter the loop. */
3404 if (overflow == 0) {
81f7359f 3405 Py_SETREF(result, NULL);
bc098515
PG
3406 }
3407 /* Loop over all the items in the iterable until we finish, we overflow
3408 * or we found a non integer element */
84975146 3409 while (result == NULL) {
bc098515
PG
3410 item = PyIter_Next(iter);
3411 if (item == NULL) {
3412 Py_DECREF(iter);
3413 if (PyErr_Occurred()) {
3414 return NULL;
3415 }
3416 return PyLong_FromLong(i_result);
3417 }
3418 if (PyLong_CheckExact(item)) {
3419 long b = PyLong_AsLongAndOverflow(item, &overflow);
0411411c
PG
3420 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3421 long x = i_result * b;
bc098515
PG
3422 i_result = x;
3423 Py_DECREF(item);
3424 continue;
3425 }
3426 }
3427 /* Either overflowed or is not an int.
3428 * Restore real objects and process normally */
3429 result = PyLong_FromLong(i_result);
3430 if (result == NULL) {
3431 Py_DECREF(item);
3432 Py_DECREF(iter);
3433 return NULL;
3434 }
3435 temp = PyNumber_Multiply(result, item);
3436 Py_DECREF(result);
3437 Py_DECREF(item);
3438 result = temp;
3439 if (result == NULL) {
3440 Py_DECREF(iter);
3441 return NULL;
3442 }
3443 }
3444 }
3445
3446 /* Fast paths for floats keeping temporary products in C.
3447 * Assumes all inputs are the same type.
3448 * If the assumption fails, default to use PyObjects instead.
3449 */
3450 if (PyFloat_CheckExact(result)) {
3451 double f_result = PyFloat_AS_DOUBLE(result);
81f7359f 3452 Py_SETREF(result, NULL);
bc098515
PG
3453 while(result == NULL) {
3454 item = PyIter_Next(iter);
3455 if (item == NULL) {
3456 Py_DECREF(iter);
3457 if (PyErr_Occurred()) {
3458 return NULL;
3459 }
3460 return PyFloat_FromDouble(f_result);
3461 }
3462 if (PyFloat_CheckExact(item)) {
3463 f_result *= PyFloat_AS_DOUBLE(item);
3464 Py_DECREF(item);
3465 continue;
3466 }
3467 if (PyLong_CheckExact(item)) {
3468 long value;
3469 int overflow;
3470 value = PyLong_AsLongAndOverflow(item, &overflow);
3471 if (!overflow) {
3472 f_result *= (double)value;
3473 Py_DECREF(item);
3474 continue;
3475 }
3476 }
3477 result = PyFloat_FromDouble(f_result);
3478 if (result == NULL) {
3479 Py_DECREF(item);
3480 Py_DECREF(iter);
3481 return NULL;
3482 }
3483 temp = PyNumber_Multiply(result, item);
3484 Py_DECREF(result);
3485 Py_DECREF(item);
3486 result = temp;
3487 if (result == NULL) {
3488 Py_DECREF(iter);
3489 return NULL;
3490 }
3491 }
3492 }
3493#endif
3494 /* Consume rest of the iterable (if any) that could not be handled
3495 * by specialized functions above.*/
3496 for(;;) {
3497 item = PyIter_Next(iter);
3498 if (item == NULL) {
3499 /* error, or end-of-sequence */
3500 if (PyErr_Occurred()) {
81f7359f 3501 Py_SETREF(result, NULL);
bc098515
PG
3502 }
3503 break;
3504 }
3505 temp = PyNumber_Multiply(result, item);
3506 Py_DECREF(result);
3507 Py_DECREF(item);
3508 result = temp;
3509 if (result == NULL)
3510 break;
3511 }
3512 Py_DECREF(iter);
3513 return result;
3514}
3515
3516
2d787971
SS
3517/* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3518
3519Python code to generate the values:
3520
3521 import math
3522
3523 for n in range(128):
3524 fac = math.factorial(n)
3525 fac_odd_part = fac // (fac & -fac)
3526 reduced_fac_odd_part = fac_odd_part % (2**64)
3527 print(f"{reduced_fac_odd_part:#018x}u")
3528*/
3529static const uint64_t reduced_factorial_odd_part[] = {
3530 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3531 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3532 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3533 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3534 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3535 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3536 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3537 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3538 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3539 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3540 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3541 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3542 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3543 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3544 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3545 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3546 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3547 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3548 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3549 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3550 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3551 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3552 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3553 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3554 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3555 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3556 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3557 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3558 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3559 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3560 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3561 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3562};
3563
3564/* inverses of reduced_factorial_odd_part values modulo 2**64.
3565
3566Python code to generate the values:
3567
3568 import math
3569
3570 for n in range(128):
3571 fac = math.factorial(n)
3572 fac_odd_part = fac // (fac & -fac)
3573 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3574 print(f"{inverted_fac_odd_part:#018x}u")
3575*/
3576static const uint64_t inverted_factorial_odd_part[] = {
3577 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3578 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3579 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3580 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3581 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3582 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3583 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3584 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3585 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3586 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3587 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3588 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3589 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3590 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3591 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3592 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3593 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3594 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3595 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3596 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3597 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3598 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3599 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3600 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3601 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3602 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3603 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3604 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3605 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3606 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3607 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3608 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3609};
3610
3611/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3612
3613Python code to generate the values:
3614
3615import math
3616
3617for n in range(128):
3618 fac = math.factorial(n)
3619 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3620 print(fac_trailing_zeros)
3621*/
3622
3623static const uint8_t factorial_trailing_zeros[] = {
3624 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3625 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3626 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3627 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3628 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3629 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3630 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3631 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3632};
3633
60c320c3
SS
3634/* Number of permutations and combinations.
3635 * P(n, k) = n! / (n-k)!
3636 * C(n, k) = P(n, k) / k!
3637 */
3638
3639/* Calculate C(n, k) for n in the 63-bit range. */
3640static PyObject *
3641perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3642{
9c995abd 3643 assert(k != 0);
60c320c3
SS
3644
3645 /* For small enough n and k the result fits in the 64-bit range and can
3646 * be calculated without allocating intermediate PyLong objects. */
2d787971
SS
3647 if (iscomb) {
3648 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3649 * fits into a uint64_t. Exclude k = 1, because the second fast
3650 * path is faster for this case.*/
3651 static const unsigned char fast_comb_limits1[] = {
3652 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3653 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3654 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3655 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3656 67, 67, 67, // 32-34
3657 };
3658 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3659 /*
3660 comb(n, k) fits into a uint64_t. We compute it as
3661
3662 comb_odd_part << shift
3663
3664 where 2**shift is the largest power of two dividing comb(n, k)
3665 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3666 calculated efficiently via arithmetic modulo 2**64, using three
3667 lookups and two uint64_t multiplications.
3668 */
3669 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3670 * inverted_factorial_odd_part[k]
3671 * inverted_factorial_odd_part[n - k];
3672 int shift = factorial_trailing_zeros[n]
3673 - factorial_trailing_zeros[k]
3674 - factorial_trailing_zeros[n - k];
3675 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3676 }
3677
3678 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3679 * fits into a long long (which is at least 64 bit). Only contains
3680 * items larger than in fast_comb_limits1. */
3681 static const unsigned long long fast_comb_limits2[] = {
3682 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3683 746, 453, 308, 227, 178, 147, // 8-13
3684 };
3685 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3686 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3687 unsigned long long result = n;
60c320c3
SS
3688 for (unsigned long long i = 1; i < k;) {
3689 result *= --n;
3690 result /= ++i;
3691 }
2d787971 3692 return PyLong_FromUnsignedLongLong(result);
60c320c3 3693 }
2d787971
SS
3694 }
3695 else {
3696 /* Maps k to the maximal n so that k <= n and P(n, k)
3697 * fits into a long long (which is at least 64 bit). */
3698 static const unsigned long long fast_perm_limits[] = {
3699 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3700 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3701 24, 22, 21, 20, 20, // 16-20
3702 };
3703 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3704 if (n <= 127) {
3705 /* P(n, k) fits into a uint64_t. */
3706 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3707 * inverted_factorial_odd_part[n - k];
3708 int shift = factorial_trailing_zeros[n]
3709 - factorial_trailing_zeros[n - k];
3710 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3711 }
3712
3713 /* P(n, k) = P(n, k-1) * (n-k+1) */
3714 unsigned long long result = n;
60c320c3
SS
3715 for (unsigned long long i = 1; i < k;) {
3716 result *= --n;
3717 ++i;
3718 }
2d787971 3719 return PyLong_FromUnsignedLongLong(result);
60c320c3 3720 }
60c320c3
SS
3721 }
3722
2d787971
SS
3723 /* For larger n use recursive formulas:
3724 *
3725 * P(n, k) = P(n, j) * P(n-j, k-j)
3726 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3727 */
60c320c3
SS
3728 unsigned long long j = k / 2;
3729 PyObject *a, *b;
3730 a = perm_comb_small(n, j, iscomb);
3731 if (a == NULL) {
3732 return NULL;
3733 }
3734 b = perm_comb_small(n - j, k - j, iscomb);
3735 if (b == NULL) {
3736 goto error;
3737 }
3738 Py_SETREF(a, PyNumber_Multiply(a, b));
3739 Py_DECREF(b);
3740 if (iscomb && a != NULL) {
3741 b = perm_comb_small(k, j, 1);
3742 if (b == NULL) {
3743 goto error;
3744 }
3745 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3746 Py_DECREF(b);
3747 }
3748 return a;
3749
3750error:
3751 Py_DECREF(a);
3752 return NULL;
3753}
3754
3755/* Calculate P(n, k) or C(n, k) using recursive formulas.
3756 * It is more efficient than sequential multiplication thanks to
3757 * Karatsuba multiplication.
3758 */
3759static PyObject *
3760perm_comb(PyObject *n, unsigned long long k, int iscomb)
3761{
3762 if (k == 0) {
3763 return PyLong_FromLong(1);
3764 }
3765 if (k == 1) {
3e2f7135 3766 return Py_NewRef(n);
60c320c3
SS
3767 }
3768
3769 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3770 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3771 unsigned long long j = k / 2;
3772 PyObject *a, *b;
3773 a = perm_comb(n, j, iscomb);
3774 if (a == NULL) {
3775 return NULL;
3776 }
3777 PyObject *t = PyLong_FromUnsignedLongLong(j);
3778 if (t == NULL) {
3779 goto error;
3780 }
3781 n = PyNumber_Subtract(n, t);
3782 Py_DECREF(t);
3783 if (n == NULL) {
3784 goto error;
3785 }
3786 b = perm_comb(n, k - j, iscomb);
3787 Py_DECREF(n);
3788 if (b == NULL) {
3789 goto error;
3790 }
3791 Py_SETREF(a, PyNumber_Multiply(a, b));
3792 Py_DECREF(b);
3793 if (iscomb && a != NULL) {
3794 b = perm_comb_small(k, j, 1);
3795 if (b == NULL) {
3796 goto error;
3797 }
3798 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3799 Py_DECREF(b);
3800 }
3801 return a;
3802
3803error:
3804 Py_DECREF(a);
3805 return NULL;
3806}
3807
5ae299ac
SS
3808/*[clinic input]
3809math.perm
3810
3811 n: object
e119b3d1 3812 k: object = None
5ae299ac
SS
3813 /
3814
3815Number of ways to choose k items from n items without repetition and with order.
3816
963eb0f4
RH
3817Evaluates to n! / (n - k)! when k <= n and evaluates
3818to zero when k > n.
5ae299ac 3819
e119b3d1
RH
3820If k is not specified or is None, then k defaults to n
3821and the function returns n!.
3822
963eb0f4
RH
3823Raises TypeError if either of the arguments are not integers.
3824Raises ValueError if either of the arguments are negative.
5ae299ac
SS
3825[clinic start generated code]*/
3826
3827static PyObject *
3828math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
e119b3d1 3829/*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
5ae299ac 3830{
60c320c3 3831 PyObject *result = NULL;
5ae299ac 3832 int overflow, cmp;
60c320c3 3833 long long ki, ni;
5ae299ac 3834
e119b3d1
RH
3835 if (k == Py_None) {
3836 return math_factorial(module, n);
3837 }
5ae299ac
SS
3838 n = PyNumber_Index(n);
3839 if (n == NULL) {
3840 return NULL;
3841 }
5ae299ac
SS
3842 k = PyNumber_Index(k);
3843 if (k == NULL) {
3844 Py_DECREF(n);
3845 return NULL;
3846 }
60c320c3 3847 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
5ae299ac 3848
7559f5fd 3849 if (_PyLong_IsNegative((PyLongObject *)n)) {
5ae299ac
SS
3850 PyErr_SetString(PyExc_ValueError,
3851 "n must be a non-negative integer");
3852 goto error;
3853 }
7559f5fd 3854 if (_PyLong_IsNegative((PyLongObject *)k)) {
45e0411e
MD
3855 PyErr_SetString(PyExc_ValueError,
3856 "k must be a non-negative integer");
3857 goto error;
3858 }
3859
5ae299ac
SS
3860 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3861 if (cmp != 0) {
3862 if (cmp > 0) {
963eb0f4
RH
3863 result = PyLong_FromLong(0);
3864 goto done;
5ae299ac
SS
3865 }
3866 goto error;
3867 }
3868
60c320c3
SS
3869 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3870 assert(overflow >= 0 && !PyErr_Occurred());
5ae299ac
SS
3871 if (overflow > 0) {
3872 PyErr_Format(PyExc_OverflowError,
3873 "k must not exceed %lld",
3874 LLONG_MAX);
3875 goto error;
3876 }
60c320c3 3877 assert(ki >= 0);
5ae299ac 3878
60c320c3
SS
3879 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3880 assert(overflow >= 0 && !PyErr_Occurred());
3881 if (!overflow && ki > 1) {
3882 assert(ni >= 0);
3883 result = perm_comb_small((unsigned long long)ni,
3884 (unsigned long long)ki, 0);
5ae299ac 3885 }
60c320c3
SS
3886 else {
3887 result = perm_comb(n, (unsigned long long)ki, 0);
5ae299ac 3888 }
5ae299ac
SS
3889
3890done:
3891 Py_DECREF(n);
3892 Py_DECREF(k);
3893 return result;
3894
3895error:
5ae299ac
SS
3896 Py_DECREF(n);
3897 Py_DECREF(k);
3898 return NULL;
3899}
3900
4a686504
YA
3901/*[clinic input]
3902math.comb
3903
2b843ac0
SS
3904 n: object
3905 k: object
3906 /
4a686504 3907
2b843ac0 3908Number of ways to choose k items from n items without repetition and without order.
4a686504 3909
963eb0f4
RH
3910Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3911to zero when k > n.
3912
3913Also called the binomial coefficient because it is equivalent
3914to the coefficient of k-th term in polynomial expansion of the
3915expression (1 + x)**n.
4a686504 3916
963eb0f4
RH
3917Raises TypeError if either of the arguments are not integers.
3918Raises ValueError if either of the arguments are negative.
4a686504
YA
3919
3920[clinic start generated code]*/
3921
3922static PyObject *
3923math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
963eb0f4 3924/*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
4a686504 3925{
60c320c3 3926 PyObject *result = NULL, *temp;
4a686504 3927 int overflow, cmp;
60c320c3 3928 long long ki, ni;
4a686504 3929
2b843ac0
SS
3930 n = PyNumber_Index(n);
3931 if (n == NULL) {
3932 return NULL;
4a686504 3933 }
2b843ac0
SS
3934 k = PyNumber_Index(k);
3935 if (k == NULL) {
3936 Py_DECREF(n);
3937 return NULL;
4a686504 3938 }
60c320c3 3939 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
4a686504 3940
7559f5fd 3941 if (_PyLong_IsNegative((PyLongObject *)n)) {
2b843ac0
SS
3942 PyErr_SetString(PyExc_ValueError,
3943 "n must be a non-negative integer");
3944 goto error;
4a686504 3945 }
7559f5fd 3946 if (_PyLong_IsNegative((PyLongObject *)k)) {
45e0411e
MD
3947 PyErr_SetString(PyExc_ValueError,
3948 "k must be a non-negative integer");
3949 goto error;
3950 }
3951
60c320c3
SS
3952 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3953 assert(overflow >= 0 && !PyErr_Occurred());
3954 if (!overflow) {
3955 assert(ni >= 0);
3956 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3957 assert(overflow >= 0 && !PyErr_Occurred());
3958 if (overflow || ki > ni) {
3959 result = PyLong_FromLong(0);
3960 goto done;
3961 }
3962 assert(ki >= 0);
02b5417f 3963
60c320c3
SS
3964 ki = Py_MIN(ki, ni - ki);
3965 if (ki > 1) {
3966 result = perm_comb_small((unsigned long long)ni,
3967 (unsigned long long)ki, 1);
3968 goto done;
3969 }
3970 /* For k == 1 just return the original n in perm_comb(). */
4a686504
YA
3971 }
3972 else {
60c320c3
SS
3973 /* k = min(k, n - k) */
3974 temp = PyNumber_Subtract(n, k);
3975 if (temp == NULL) {
2b843ac0
SS
3976 goto error;
3977 }
7559f5fd
MS
3978 assert(PyLong_Check(temp));
3979 if (_PyLong_IsNegative((PyLongObject *)temp)) {
60c320c3
SS
3980 Py_DECREF(temp);
3981 result = PyLong_FromLong(0);
3982 goto done;
4a686504 3983 }
60c320c3
SS
3984 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3985 if (cmp > 0) {
3986 Py_SETREF(k, temp);
4a686504 3987 }
60c320c3
SS
3988 else {
3989 Py_DECREF(temp);
3990 if (cmp < 0) {
3991 goto error;
3992 }
4a686504 3993 }
60c320c3
SS
3994
3995 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3996 assert(overflow >= 0 && !PyErr_Occurred());
3997 if (overflow) {
3998 PyErr_Format(PyExc_OverflowError,
3999 "min(n - k, k) must not exceed %lld",
4000 LLONG_MAX);
2b843ac0 4001 goto error;
4a686504 4002 }
60c320c3 4003 assert(ki >= 0);
4a686504 4004 }
60c320c3
SS
4005
4006 result = perm_comb(n, (unsigned long long)ki, 1);
4a686504 4007
2b843ac0
SS
4008done:
4009 Py_DECREF(n);
4010 Py_DECREF(k);
4011 return result;
4a686504 4012
2b843ac0 4013error:
2b843ac0
SS
4014 Py_DECREF(n);
4015 Py_DECREF(k);
4a686504
YA
4016 return NULL;
4017}
4018
4019
100fafcf
VS
4020/*[clinic input]
4021math.nextafter
4022
4023 x: double
4024 y: double
4025 /
6e39fa19
MG
4026 *
4027 steps: object = None
4028
4029Return the floating-point value the given number of steps after x towards y.
4030
4031If steps is not specified or is None, it defaults to 1.
100fafcf 4032
6e39fa19
MG
4033Raises a TypeError, if x or y is not a double, or if steps is not an integer.
4034Raises ValueError if steps is negative.
100fafcf
VS
4035[clinic start generated code]*/
4036
4037static PyObject *
6e39fa19
MG
4038math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
4039/*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
100fafcf 4040{
85ead4fc
VS
4041#if defined(_AIX)
4042 if (x == y) {
4043 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
4044 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
4045 return PyFloat_FromDouble(y);
4046 }
cd11ff12 4047 if (isnan(x)) {
0837f99d 4048 return PyFloat_FromDouble(x);
c1c3493f 4049 }
cd11ff12 4050 if (isnan(y)) {
0837f99d 4051 return PyFloat_FromDouble(y);
c1c3493f 4052 }
85ead4fc 4053#endif
6e39fa19
MG
4054 if (steps == Py_None) {
4055 // fast path: we default to one step.
4056 return PyFloat_FromDouble(nextafter(x, y));
4057 }
4058 steps = PyNumber_Index(steps);
4059 if (steps == NULL) {
4060 return NULL;
4061 }
4062 assert(PyLong_CheckExact(steps));
4063 if (_PyLong_IsNegative((PyLongObject *)steps)) {
4064 PyErr_SetString(PyExc_ValueError,
4065 "steps must be a non-negative integer");
4066 Py_DECREF(steps);
4067 return NULL;
4068 }
4069
4070 unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
4071 // Conveniently, uint64_t and double have the same number of bits
4072 // on all the platforms we care about.
4073 // So if an overflow occurs, we can just use UINT64_MAX.
4074 Py_DECREF(steps);
4075 if (usteps_ull >= UINT64_MAX) {
4076 // This branch includes the case where an error occurred, since
4077 // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
4078 // usteps_ull can be strictly larger than UINT64_MAX on a machine
4079 // where unsigned long long has width > 64 bits.
4080 if (PyErr_Occurred()) {
4081 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
4082 PyErr_Clear();
4083 }
4084 else {
4085 return NULL;
4086 }
4087 }
4088 usteps_ull = UINT64_MAX;
4089 }
4090 assert(usteps_ull <= UINT64_MAX);
4091 uint64_t usteps = (uint64_t)usteps_ull;
4092
4093 if (usteps == 0) {
4094 return PyFloat_FromDouble(x);
4095 }
cd11ff12 4096 if (isnan(x)) {
6e39fa19
MG
4097 return PyFloat_FromDouble(x);
4098 }
cd11ff12 4099 if (isnan(y)) {
6e39fa19
MG
4100 return PyFloat_FromDouble(y);
4101 }
4102
4103 // We assume that double and uint64_t have the same endianness.
4104 // This is not guaranteed by the C-standard, but it is true for
4105 // all platforms we care about. (The most likely form of violation
4106 // would be a "mixed-endian" double.)
4107 union pun {double f; uint64_t i;};
4108 union pun ux = {x}, uy = {y};
4109 if (ux.i == uy.i) {
4110 return PyFloat_FromDouble(x);
4111 }
4112
4113 const uint64_t sign_bit = 1ULL<<63;
4114
4115 uint64_t ax = ux.i & ~sign_bit;
4116 uint64_t ay = uy.i & ~sign_bit;
4117
4118 // opposite signs
4119 if (((ux.i ^ uy.i) & sign_bit)) {
4120 // NOTE: ax + ay can never overflow, because their most significant bit
4121 // ain't set.
4122 if (ax + ay <= usteps) {
4123 return PyFloat_FromDouble(uy.f);
4124 // This comparison has to use <, because <= would get +0.0 vs -0.0
4125 // wrong.
4126 } else if (ax < usteps) {
4127 union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
4128 return PyFloat_FromDouble(result.f);
4129 } else {
4130 ux.i -= usteps;
4131 return PyFloat_FromDouble(ux.f);
4132 }
4133 // same sign
4134 } else if (ax > ay) {
4135 if (ax - ay >= usteps) {
4136 ux.i -= usteps;
4137 return PyFloat_FromDouble(ux.f);
4138 } else {
4139 return PyFloat_FromDouble(uy.f);
4140 }
4141 } else {
4142 if (ay - ax >= usteps) {
4143 ux.i += usteps;
4144 return PyFloat_FromDouble(ux.f);
4145 } else {
4146 return PyFloat_FromDouble(uy.f);
4147 }
4148 }
100fafcf
VS
4149}
4150
4151
0b2ab219
VS
4152/*[clinic input]
4153math.ulp -> double
4154
4155 x: double
4156 /
4157
4158Return the value of the least significant bit of the float x.
4159[clinic start generated code]*/
4160
4161static double
4162math_ulp_impl(PyObject *module, double x)
4163/*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4164{
cd11ff12 4165 if (isnan(x)) {
0b2ab219
VS
4166 return x;
4167 }
4168 x = fabs(x);
cd11ff12 4169 if (isinf(x)) {
0b2ab219
VS
4170 return x;
4171 }
7a3b0350 4172 double inf = Py_INFINITY;
0b2ab219 4173 double x2 = nextafter(x, inf);
cd11ff12 4174 if (isinf(x2)) {
0b2ab219
VS
4175 /* special case: x is the largest positive representable float */
4176 x2 = nextafter(x, -inf);
4177 return x - x2;
4178 }
4179 return x2 - x;
4180}
4181
5be82413
DN
4182static int
4183math_exec(PyObject *module)
4184{
23c9febd 4185
3e65baee 4186 if (PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
5be82413
DN
4187 return -1;
4188 }
3e65baee 4189 if (PyModule_Add(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
5be82413
DN
4190 return -1;
4191 }
4192 // 2pi
3e65baee 4193 if (PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
5be82413
DN
4194 return -1;
4195 }
3e65baee 4196 if (PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
5be82413
DN
4197 return -1;
4198 }
3e65baee 4199 if (PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
5be82413
DN
4200 return -1;
4201 }
5be82413
DN
4202 return 0;
4203}
0b2ab219 4204
8b43b19e 4205static PyMethodDef math_methods[] = {
f95a1b3c
AP
4206 {"acos", math_acos, METH_O, math_acos_doc},
4207 {"acosh", math_acosh, METH_O, math_acosh_doc},
4208 {"asin", math_asin, METH_O, math_asin_doc},
4209 {"asinh", math_asinh, METH_O, math_asinh_doc},
4210 {"atan", math_atan, METH_O, math_atan_doc},
804f2529 4211 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
f95a1b3c 4212 {"atanh", math_atanh, METH_O, math_atanh_doc},
ac867f10 4213 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
c9ea9335 4214 MATH_CEIL_METHODDEF
804f2529 4215 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
f95a1b3c
AP
4216 {"cos", math_cos, METH_O, math_cos_doc},
4217 {"cosh", math_cosh, METH_O, math_cosh_doc},
c9ea9335 4218 MATH_DEGREES_METHODDEF
9c18b1ae 4219 MATH_DIST_METHODDEF
f95a1b3c
AP
4220 {"erf", math_erf, METH_O, math_erf_doc},
4221 {"erfc", math_erfc, METH_O, math_erfc_doc},
4222 {"exp", math_exp, METH_O, math_exp_doc},
6266e4af 4223 {"exp2", math_exp2, METH_O, math_exp2_doc},
f95a1b3c
AP
4224 {"expm1", math_expm1, METH_O, math_expm1_doc},
4225 {"fabs", math_fabs, METH_O, math_fabs_doc},
c9ea9335
SS
4226 MATH_FACTORIAL_METHODDEF
4227 MATH_FLOOR_METHODDEF
8e3c953b 4228 MATH_FMA_METHODDEF
2301cdb5 4229 MATH_FMAX_METHODDEF
c9ea9335 4230 MATH_FMOD_METHODDEF
2301cdb5 4231 MATH_FMIN_METHODDEF
c9ea9335
SS
4232 MATH_FREXP_METHODDEF
4233 MATH_FSUM_METHODDEF
f95a1b3c 4234 {"gamma", math_gamma, METH_O, math_gamma_doc},
3275cb19
SK
4235 MATH_GCD_METHODDEF
4236 MATH_HYPOT_METHODDEF
c9ea9335
SS
4237 MATH_ISCLOSE_METHODDEF
4238 MATH_ISFINITE_METHODDEF
5f61cde8
SK
4239 MATH_ISNORMAL_METHODDEF
4240 MATH_ISSUBNORMAL_METHODDEF
c9ea9335
SS
4241 MATH_ISINF_METHODDEF
4242 MATH_ISNAN_METHODDEF
73934b9d 4243 MATH_ISQRT_METHODDEF
3275cb19 4244 MATH_LCM_METHODDEF
c9ea9335 4245 MATH_LDEXP_METHODDEF
f95a1b3c 4246 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
d1a89ce5 4247 {"log", _PyCFunction_CAST(math_log), METH_FASTCALL, math_log_doc},
f95a1b3c 4248 {"log1p", math_log1p, METH_O, math_log1p_doc},
c9ea9335
SS
4249 MATH_LOG10_METHODDEF
4250 MATH_LOG2_METHODDEF
4251 MATH_MODF_METHODDEF
4252 MATH_POW_METHODDEF
4253 MATH_RADIANS_METHODDEF
804f2529 4254 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
42ccac2d 4255 MATH_SIGNBIT_METHODDEF
f95a1b3c
AP
4256 {"sin", math_sin, METH_O, math_sin_doc},
4257 {"sinh", math_sinh, METH_O, math_sinh_doc},
4258 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
4259 {"tan", math_tan, METH_O, math_tan_doc},
4260 {"tanh", math_tanh, METH_O, math_tanh_doc},
47b9f83a 4261 MATH_SUMPROD_METHODDEF
c9ea9335 4262 MATH_TRUNC_METHODDEF
bc098515 4263 MATH_PROD_METHODDEF
5ae299ac 4264 MATH_PERM_METHODDEF
4a686504 4265 MATH_COMB_METHODDEF
100fafcf 4266 MATH_NEXTAFTER_METHODDEF
0b2ab219 4267 MATH_ULP_METHODDEF
f95a1b3c 4268 {NULL, NULL} /* sentinel */
85a5fbbd
GR
4269};
4270
5be82413
DN
4271static PyModuleDef_Slot math_slots[] = {
4272 {Py_mod_exec, math_exec},
a9c6e061 4273 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
c2627d6e 4274 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
5be82413
DN
4275 {0, NULL}
4276};
c6e22902 4277
14f8b4cf 4278PyDoc_STRVAR(module_doc,
6faad355
NB
4279"This module provides access to the mathematical functions\n"
4280"defined by the C standard.");
c6e22902 4281
1a21451b 4282static struct PyModuleDef mathmodule = {
f95a1b3c 4283 PyModuleDef_HEAD_INIT,
5be82413
DN
4284 .m_name = "math",
4285 .m_doc = module_doc,
67fbfb42 4286 .m_size = 0,
5be82413
DN
4287 .m_methods = math_methods,
4288 .m_slots = math_slots,
1a21451b
ML
4289};
4290
fe51c6d6 4291PyMODINIT_FUNC
1a21451b 4292PyInit_math(void)
85a5fbbd 4293{
5be82413 4294 return PyModuleDef_Init(&mathmodule);
85a5fbbd 4295}