]> git.ipfire.org Git - thirdparty/Python/cpython.git/blame_incremental - Modules/mathmodule.c
gh-135256: Simplify parsing parameters in Argument Clinic (GH-135257)
[thirdparty/Python/cpython.git] / Modules / mathmodule.c
... / ...
CommitLineData
1/* Math module -- standard C math library functions, pi and e */
2
3/* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9These are the "spirit of 754" rules:
10
111. If the mathematical result is a real number, but of magnitude too
12large to approximate by a machine float, overflow is signaled and the
13result is an infinity (with the appropriate sign).
14
152. If the mathematical result is a real number, but of magnitude too
16small to approximate by a machine float, underflow is signaled and the
17result is a zero (with the appropriate sign).
18
193. At a singularity (a value x such that the limit of f(y) as y
20approaches x exists and is an infinity), "divide by zero" is signaled
21and the result is an infinity (with the appropriate sign). This is
22complicated a little by that the left-side and right-side limits may
23not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24from the positive or negative directions. In that specific case, the
25sign of the zero determines the result of 1/0.
26
274. At a point where a function has no defined result in the extended
28reals (i.e., the reals plus an infinity or two), invalid operation is
29signaled and a NaN is returned.
30
31And these are what Python has historically /tried/ to do (but not
32always successfully, as platform libm behavior varies a lot):
33
34For #1, raise OverflowError.
35
36For #2, return a zero (with the appropriate sign if that happens by
37accident ;-)).
38
39For #3 and #4, raise ValueError. It may have made sense to raise
40Python's ZeroDivisionError in #3, but historically that's only been
41raised for division by zero and mod by zero.
42
43*/
44
45/*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
55#ifndef Py_BUILD_CORE_BUILTIN
56# define Py_BUILD_CORE_MODULE 1
57#endif
58
59#include "Python.h"
60#include "pycore_abstract.h" // _PyNumber_Index()
61#include "pycore_bitutils.h" // _Py_bit_length()
62#include "pycore_call.h" // _PyObject_CallNoArgs()
63#include "pycore_long.h" // _PyLong_GetZero()
64#include "pycore_moduleobject.h" // _PyModule_GetState()
65#include "pycore_object.h" // _PyObject_LookupSpecial()
66#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
67/* For DBL_EPSILON in _math.h */
68#include <float.h>
69/* For _Py_log1p with workarounds for buggy handling of zeros. */
70#include "_math.h"
71#include <stdbool.h>
72
73#include "clinic/mathmodule.c.h"
74
75/*[clinic input]
76module math
77[clinic start generated code]*/
78/*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79
80
81
82/*
83Double and triple length extended precision algorithms from:
84
85 Accurate Sum and Dot Product
86 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
87 https://doi.org/10.1137/030601818
88 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
89
90*/
91
92typedef struct{ double hi; double lo; } DoubleLength;
93
94static DoubleLength
95dl_fast_sum(double a, double b)
96{
97 /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
98 assert(fabs(a) >= fabs(b));
99 double x = a + b;
100 double y = (a - x) + b;
101 return (DoubleLength) {x, y};
102}
103
104static DoubleLength
105dl_sum(double a, double b)
106{
107 /* Algorithm 3.1 Error-free transformation of the sum */
108 double x = a + b;
109 double z = x - a;
110 double y = (a - (x - z)) + (b - z);
111 return (DoubleLength) {x, y};
112}
113
114#ifndef UNRELIABLE_FMA
115
116static DoubleLength
117dl_mul(double x, double y)
118{
119 /* Algorithm 3.5. Error-free transformation of a product */
120 double z = x * y;
121 double zz = fma(x, y, -z);
122 return (DoubleLength) {z, zz};
123}
124
125#else
126
127/*
128 The default implementation of dl_mul() depends on the C math library
129 having an accurate fma() function as required by § 7.12.13.1 of the
130 C99 standard.
131
132 The UNRELIABLE_FMA option is provided as a slower but accurate
133 alternative for builds where the fma() function is found wanting.
134 The speed penalty may be modest (17% slower on an Apple M1 Max),
135 so don't hesitate to enable this build option.
136
137 The algorithms are from the T. J. Dekker paper:
138 A Floating-Point Technique for Extending the Available Precision
139 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
140*/
141
142static DoubleLength
143dl_split(double x) {
144 // Dekker (5.5) and (5.6).
145 double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
146 double hi = t - (t - x);
147 double lo = x - hi;
148 return (DoubleLength) {hi, lo};
149}
150
151static DoubleLength
152dl_mul(double x, double y)
153{
154 // Dekker (5.12) and mul12()
155 DoubleLength xx = dl_split(x);
156 DoubleLength yy = dl_split(y);
157 double p = xx.hi * yy.hi;
158 double q = xx.hi * yy.lo + xx.lo * yy.hi;
159 double z = p + q;
160 double zz = p - z + q + xx.lo * yy.lo;
161 return (DoubleLength) {z, zz};
162}
163
164#endif
165
166typedef struct { double hi; double lo; double tiny; } TripleLength;
167
168static const TripleLength tl_zero = {0.0, 0.0, 0.0};
169
170static TripleLength
171tl_fma(double x, double y, TripleLength total)
172{
173 /* Algorithm 5.10 with SumKVert for K=3 */
174 DoubleLength pr = dl_mul(x, y);
175 DoubleLength sm = dl_sum(total.hi, pr.hi);
176 DoubleLength r1 = dl_sum(total.lo, pr.lo);
177 DoubleLength r2 = dl_sum(r1.hi, sm.lo);
178 return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
179}
180
181static double
182tl_to_d(TripleLength total)
183{
184 DoubleLength last = dl_sum(total.lo, total.hi);
185 return total.tiny + last.lo + last.hi;
186}
187
188
189/*
190 sin(pi*x), giving accurate results for all finite x (especially x
191 integral or close to an integer). This is here for use in the
192 reflection formula for the gamma function. It conforms to IEEE
193 754-2008 for finite arguments, but not for infinities or nans.
194*/
195
196static const double pi = 3.141592653589793238462643383279502884197;
197static const double logpi = 1.144729885849400174143427351353058711647;
198
199/* Version of PyFloat_AsDouble() with in-line fast paths
200 for exact floats and integers. Gives a substantial
201 speed improvement for extracting float arguments.
202*/
203
204#define ASSIGN_DOUBLE(target_var, obj, error_label) \
205 if (PyFloat_CheckExact(obj)) { \
206 target_var = PyFloat_AS_DOUBLE(obj); \
207 } \
208 else if (PyLong_CheckExact(obj)) { \
209 target_var = PyLong_AsDouble(obj); \
210 if (target_var == -1.0 && PyErr_Occurred()) { \
211 goto error_label; \
212 } \
213 } \
214 else { \
215 target_var = PyFloat_AsDouble(obj); \
216 if (target_var == -1.0 && PyErr_Occurred()) { \
217 goto error_label; \
218 } \
219 }
220
221static double
222m_sinpi(double x)
223{
224 double y, r;
225 int n;
226 /* this function should only ever be called for finite arguments */
227 assert(isfinite(x));
228 y = fmod(fabs(x), 2.0);
229 n = (int)round(2.0*y);
230 assert(0 <= n && n <= 4);
231 switch (n) {
232 case 0:
233 r = sin(pi*y);
234 break;
235 case 1:
236 r = cos(pi*(y-0.5));
237 break;
238 case 2:
239 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
240 -0.0 instead of 0.0 when y == 1.0. */
241 r = sin(pi*(1.0-y));
242 break;
243 case 3:
244 r = -cos(pi*(y-1.5));
245 break;
246 case 4:
247 r = sin(pi*(y-2.0));
248 break;
249 default:
250 Py_UNREACHABLE();
251 }
252 return copysign(1.0, x)*r;
253}
254
255/* Implementation of the real gamma function. Kept here to work around
256 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
257 on various platforms (Windows, MacOS). In extensive but non-exhaustive
258 random tests, this function proved accurate to within <= 10 ulps across the
259 entire float domain. Note that accuracy may depend on the quality of the
260 system math functions, the pow function in particular. Special cases
261 follow C99 annex F. The parameters and method are tailored to platforms
262 whose double format is the IEEE 754 binary64 format.
263
264 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
265 and g=6.024680040776729583740234375; these parameters are amongst those
266 used by the Boost library. Following Boost (again), we re-express the
267 Lanczos sum as a rational function, and compute it that way. The
268 coefficients below were computed independently using MPFR, and have been
269 double-checked against the coefficients in the Boost source code.
270
271 For x < 0.0 we use the reflection formula.
272
273 There's one minor tweak that deserves explanation: Lanczos' formula for
274 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
275 values, x+g-0.5 can be represented exactly. However, in cases where it
276 can't be represented exactly the small error in x+g-0.5 can be magnified
277 significantly by the pow and exp calls, especially for large x. A cheap
278 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
279 involved in the computation of x+g-0.5 (that is, e = computed value of
280 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
281
282 Correction factor
283 -----------------
284 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
285 double, and e is tiny. Then:
286
287 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
288 = pow(y, x-0.5)/exp(y) * C,
289
290 where the correction_factor C is given by
291
292 C = pow(1-e/y, x-0.5) * exp(e)
293
294 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
295
296 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
297
298 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
299
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
301
302 Note that for accuracy, when computing r*C it's better to do
303
304 r + e*g/y*r;
305
306 than
307
308 r * (1 + e*g/y);
309
310 since the addition in the latter throws away most of the bits of
311 information in e*g/y.
312*/
313
314#define LANCZOS_N 13
315static const double lanczos_g = 6.024680040776729583740234375;
316static const double lanczos_g_minus_half = 5.524680040776729583740234375;
317static const double lanczos_num_coeffs[LANCZOS_N] = {
318 23531376880.410759688572007674451636754734846804940,
319 42919803642.649098768957899047001988850926355848959,
320 35711959237.355668049440185451547166705960488635843,
321 17921034426.037209699919755754458931112671403265390,
322 6039542586.3520280050642916443072979210699388420708,
323 1439720407.3117216736632230727949123939715485786772,
324 248874557.86205415651146038641322942321632125127801,
325 31426415.585400194380614231628318205362874684987640,
326 2876370.6289353724412254090516208496135991145378768,
327 186056.26539522349504029498971604569928220784236328,
328 8071.6720023658162106380029022722506138218516325024,
329 210.82427775157934587250973392071336271166969580291,
330 2.5066282746310002701649081771338373386264310793408
331};
332
333/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
334static const double lanczos_den_coeffs[LANCZOS_N] = {
335 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
336 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
337
338/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
339#define NGAMMA_INTEGRAL 23
340static const double gamma_integral[NGAMMA_INTEGRAL] = {
341 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
342 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
343 1307674368000.0, 20922789888000.0, 355687428096000.0,
344 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
345 51090942171709440000.0, 1124000727777607680000.0,
346};
347
348/* Lanczos' sum L_g(x), for positive x */
349
350static double
351lanczos_sum(double x)
352{
353 double num = 0.0, den = 0.0;
354 int i;
355 assert(x > 0.0);
356 /* evaluate the rational function lanczos_sum(x). For large
357 x, the obvious algorithm risks overflow, so we instead
358 rescale the denominator and numerator of the rational
359 function by x**(1-LANCZOS_N) and treat this as a
360 rational function in 1/x. This also reduces the error for
361 larger x values. The choice of cutoff point (5.0 below) is
362 somewhat arbitrary; in tests, smaller cutoff values than
363 this resulted in lower accuracy. */
364 if (x < 5.0) {
365 for (i = LANCZOS_N; --i >= 0; ) {
366 num = num * x + lanczos_num_coeffs[i];
367 den = den * x + lanczos_den_coeffs[i];
368 }
369 }
370 else {
371 for (i = 0; i < LANCZOS_N; i++) {
372 num = num / x + lanczos_num_coeffs[i];
373 den = den / x + lanczos_den_coeffs[i];
374 }
375 }
376 return num/den;
377}
378
379
380static double
381m_tgamma(double x)
382{
383 double absx, r, y, z, sqrtpow;
384
385 /* special cases */
386 if (!isfinite(x)) {
387 if (isnan(x) || x > 0.0)
388 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
389 else {
390 errno = EDOM;
391 return Py_NAN; /* tgamma(-inf) = nan, invalid */
392 }
393 }
394 if (x == 0.0) {
395 errno = EDOM;
396 /* tgamma(+-0.0) = +-inf, divide-by-zero */
397 return copysign(Py_INFINITY, x);
398 }
399
400 /* integer arguments */
401 if (x == floor(x)) {
402 if (x < 0.0) {
403 errno = EDOM; /* tgamma(n) = nan, invalid for */
404 return Py_NAN; /* negative integers n */
405 }
406 if (x <= NGAMMA_INTEGRAL)
407 return gamma_integral[(int)x - 1];
408 }
409 absx = fabs(x);
410
411 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
412 if (absx < 1e-20) {
413 r = 1.0/x;
414 if (isinf(r))
415 errno = ERANGE;
416 return r;
417 }
418
419 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
420 x > 200, and underflows to +-0.0 for x < -200, not a negative
421 integer. */
422 if (absx > 200.0) {
423 if (x < 0.0) {
424 return 0.0/m_sinpi(x);
425 }
426 else {
427 errno = ERANGE;
428 return Py_INFINITY;
429 }
430 }
431
432 y = absx + lanczos_g_minus_half;
433 /* compute error in sum */
434 if (absx > lanczos_g_minus_half) {
435 /* note: the correction can be foiled by an optimizing
436 compiler that (incorrectly) thinks that an expression like
437 a + b - a - b can be optimized to 0.0. This shouldn't
438 happen in a standards-conforming compiler. */
439 double q = y - absx;
440 z = q - lanczos_g_minus_half;
441 }
442 else {
443 double q = y - lanczos_g_minus_half;
444 z = q - absx;
445 }
446 z = z * lanczos_g / y;
447 if (x < 0.0) {
448 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
449 r -= z * r;
450 if (absx < 140.0) {
451 r /= pow(y, absx - 0.5);
452 }
453 else {
454 sqrtpow = pow(y, absx / 2.0 - 0.25);
455 r /= sqrtpow;
456 r /= sqrtpow;
457 }
458 }
459 else {
460 r = lanczos_sum(absx) / exp(y);
461 r += z * r;
462 if (absx < 140.0) {
463 r *= pow(y, absx - 0.5);
464 }
465 else {
466 sqrtpow = pow(y, absx / 2.0 - 0.25);
467 r *= sqrtpow;
468 r *= sqrtpow;
469 }
470 }
471 if (isinf(r))
472 errno = ERANGE;
473 return r;
474}
475
476/*
477 lgamma: natural log of the absolute value of the Gamma function.
478 For large arguments, Lanczos' formula works extremely well here.
479*/
480
481static double
482m_lgamma(double x)
483{
484 double r;
485 double absx;
486
487 /* special cases */
488 if (!isfinite(x)) {
489 if (isnan(x))
490 return x; /* lgamma(nan) = nan */
491 else
492 return Py_INFINITY; /* lgamma(+-inf) = +inf */
493 }
494
495 /* integer arguments */
496 if (x == floor(x) && x <= 2.0) {
497 if (x <= 0.0) {
498 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
499 return Py_INFINITY; /* integers n <= 0 */
500 }
501 else {
502 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
503 }
504 }
505
506 absx = fabs(x);
507 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
508 if (absx < 1e-20)
509 return -log(absx);
510
511 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
512 having a second set of numerator coefficients for lanczos_sum that
513 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
514 subtraction below; it's probably not worth it. */
515 r = log(lanczos_sum(absx)) - lanczos_g;
516 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
517 if (x < 0.0)
518 /* Use reflection formula to get value for negative x. */
519 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
520 if (isinf(r))
521 errno = ERANGE;
522 return r;
523}
524
525/* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
526 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
527 binary floating-point format, the result is always exact. */
528
529static double
530m_remainder(double x, double y)
531{
532 /* Deal with most common case first. */
533 if (isfinite(x) && isfinite(y)) {
534 double absx, absy, c, m, r;
535
536 if (y == 0.0) {
537 return Py_NAN;
538 }
539
540 absx = fabs(x);
541 absy = fabs(y);
542 m = fmod(absx, absy);
543
544 /*
545 Warning: some subtlety here. What we *want* to know at this point is
546 whether the remainder m is less than, equal to, or greater than half
547 of absy. However, we can't do that comparison directly because we
548 can't be sure that 0.5*absy is representable (the multiplication
549 might incur precision loss due to underflow). So instead we compare
550 m with the complement c = absy - m: m < 0.5*absy if and only if m <
551 c, and so on. The catch is that absy - m might also not be
552 representable, but it turns out that it doesn't matter:
553
554 - if m > 0.5*absy then absy - m is exactly representable, by
555 Sterbenz's lemma, so m > c
556 - if m == 0.5*absy then again absy - m is exactly representable
557 and m == c
558 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
559 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
560 c, or (ii) absy is tiny, either subnormal or in the lowest normal
561 binade. Then absy - m is exactly representable and again m < c.
562 */
563
564 c = absy - m;
565 if (m < c) {
566 r = m;
567 }
568 else if (m > c) {
569 r = -c;
570 }
571 else {
572 /*
573 Here absx is exactly halfway between two multiples of absy,
574 and we need to choose the even multiple. x now has the form
575
576 absx = n * absy + m
577
578 for some integer n (recalling that m = 0.5*absy at this point).
579 If n is even we want to return m; if n is odd, we need to
580 return -m.
581
582 So
583
584 0.5 * (absx - m) = (n/2) * absy
585
586 and now reducing modulo absy gives us:
587
588 | m, if n is odd
589 fmod(0.5 * (absx - m), absy) = |
590 | 0, if n is even
591
592 Now m - 2.0 * fmod(...) gives the desired result: m
593 if n is even, -m if m is odd.
594
595 Note that all steps in fmod(0.5 * (absx - m), absy)
596 will be computed exactly, with no rounding error
597 introduced.
598 */
599 assert(m == c);
600 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
601 }
602 return copysign(1.0, x) * r;
603 }
604
605 /* Special values. */
606 if (isnan(x)) {
607 return x;
608 }
609 if (isnan(y)) {
610 return y;
611 }
612 if (isinf(x)) {
613 return Py_NAN;
614 }
615 assert(isinf(y));
616 return x;
617}
618
619
620/*
621 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
622 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
623 special values directly, passing positive non-special values through to
624 the system log/log10.
625 */
626
627static double
628m_log(double x)
629{
630 if (isfinite(x)) {
631 if (x > 0.0)
632 return log(x);
633 errno = EDOM;
634 if (x == 0.0)
635 return -Py_INFINITY; /* log(0) = -inf */
636 else
637 return Py_NAN; /* log(-ve) = nan */
638 }
639 else if (isnan(x))
640 return x; /* log(nan) = nan */
641 else if (x > 0.0)
642 return x; /* log(inf) = inf */
643 else {
644 errno = EDOM;
645 return Py_NAN; /* log(-inf) = nan */
646 }
647}
648
649/*
650 log2: log to base 2.
651
652 Uses an algorithm that should:
653
654 (a) produce exact results for powers of 2, and
655 (b) give a monotonic log2 (for positive finite floats),
656 assuming that the system log is monotonic.
657*/
658
659static double
660m_log2(double x)
661{
662 if (!isfinite(x)) {
663 if (isnan(x))
664 return x; /* log2(nan) = nan */
665 else if (x > 0.0)
666 return x; /* log2(+inf) = +inf */
667 else {
668 errno = EDOM;
669 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
670 }
671 }
672
673 if (x > 0.0) {
674 return log2(x);
675 }
676 else if (x == 0.0) {
677 errno = EDOM;
678 return -Py_INFINITY; /* log2(0) = -inf, divide-by-zero */
679 }
680 else {
681 errno = EDOM;
682 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
683 }
684}
685
686static double
687m_log10(double x)
688{
689 if (isfinite(x)) {
690 if (x > 0.0)
691 return log10(x);
692 errno = EDOM;
693 if (x == 0.0)
694 return -Py_INFINITY; /* log10(0) = -inf */
695 else
696 return Py_NAN; /* log10(-ve) = nan */
697 }
698 else if (isnan(x))
699 return x; /* log10(nan) = nan */
700 else if (x > 0.0)
701 return x; /* log10(inf) = inf */
702 else {
703 errno = EDOM;
704 return Py_NAN; /* log10(-inf) = nan */
705 }
706}
707
708
709/*[clinic input]
710math.gcd
711
712 *integers as args: array
713
714Greatest Common Divisor.
715[clinic start generated code]*/
716
717static PyObject *
718math_gcd_impl(PyObject *module, PyObject * const *args,
719 Py_ssize_t args_length)
720/*[clinic end generated code: output=a26c95907374ffb4 input=ded7f0ea3850c05c]*/
721{
722 // Fast-path for the common case: gcd(int, int)
723 if (args_length == 2 && PyLong_CheckExact(args[0]) && PyLong_CheckExact(args[1]))
724 {
725 return _PyLong_GCD(args[0], args[1]);
726 }
727
728 if (args_length == 0) {
729 return PyLong_FromLong(0);
730 }
731
732 PyObject *res = PyNumber_Index(args[0]);
733 if (res == NULL) {
734 return NULL;
735 }
736 if (args_length == 1) {
737 Py_SETREF(res, PyNumber_Absolute(res));
738 return res;
739 }
740
741 PyObject *one = _PyLong_GetOne(); // borrowed ref
742 for (Py_ssize_t i = 1; i < args_length; i++) {
743 PyObject *x = _PyNumber_Index(args[i]);
744 if (x == NULL) {
745 Py_DECREF(res);
746 return NULL;
747 }
748 if (res == one) {
749 /* Fast path: just check arguments.
750 It is okay to use identity comparison here. */
751 Py_DECREF(x);
752 continue;
753 }
754 Py_SETREF(res, _PyLong_GCD(res, x));
755 Py_DECREF(x);
756 if (res == NULL) {
757 return NULL;
758 }
759 }
760 return res;
761}
762
763
764static PyObject *
765long_lcm(PyObject *a, PyObject *b)
766{
767 PyObject *g, *m, *f, *ab;
768
769 if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
770 return PyLong_FromLong(0);
771 }
772 g = _PyLong_GCD(a, b);
773 if (g == NULL) {
774 return NULL;
775 }
776 f = PyNumber_FloorDivide(a, g);
777 Py_DECREF(g);
778 if (f == NULL) {
779 return NULL;
780 }
781 m = PyNumber_Multiply(f, b);
782 Py_DECREF(f);
783 if (m == NULL) {
784 return NULL;
785 }
786 ab = PyNumber_Absolute(m);
787 Py_DECREF(m);
788 return ab;
789}
790
791
792/*[clinic input]
793math.lcm
794
795 *integers as args: array
796
797Least Common Multiple.
798[clinic start generated code]*/
799
800static PyObject *
801math_lcm_impl(PyObject *module, PyObject * const *args,
802 Py_ssize_t args_length)
803/*[clinic end generated code: output=c8a59a5c2e55c816 input=3e4f4b7cdf948a98]*/
804{
805 PyObject *res, *x;
806 Py_ssize_t i;
807
808 if (args_length == 0) {
809 return PyLong_FromLong(1);
810 }
811 res = PyNumber_Index(args[0]);
812 if (res == NULL) {
813 return NULL;
814 }
815 if (args_length == 1) {
816 Py_SETREF(res, PyNumber_Absolute(res));
817 return res;
818 }
819
820 PyObject *zero = _PyLong_GetZero(); // borrowed ref
821 for (i = 1; i < args_length; i++) {
822 x = PyNumber_Index(args[i]);
823 if (x == NULL) {
824 Py_DECREF(res);
825 return NULL;
826 }
827 if (res == zero) {
828 /* Fast path: just check arguments.
829 It is okay to use identity comparison here. */
830 Py_DECREF(x);
831 continue;
832 }
833 Py_SETREF(res, long_lcm(res, x));
834 Py_DECREF(x);
835 if (res == NULL) {
836 return NULL;
837 }
838 }
839 return res;
840}
841
842
843/* Call is_error when errno != 0, and where x is the result libm
844 * returned. is_error will usually set up an exception and return
845 * true (1), but may return false (0) without setting up an exception.
846 */
847static int
848is_error(double x, int raise_edom)
849{
850 int result = 1; /* presumption of guilt */
851 assert(errno); /* non-zero errno is a precondition for calling */
852 if (errno == EDOM) {
853 if (raise_edom) {
854 PyErr_SetString(PyExc_ValueError, "math domain error");
855 }
856 }
857
858 else if (errno == ERANGE) {
859 /* ANSI C generally requires libm functions to set ERANGE
860 * on overflow, but also generally *allows* them to set
861 * ERANGE on underflow too. There's no consistency about
862 * the latter across platforms.
863 * Alas, C99 never requires that errno be set.
864 * Here we suppress the underflow errors (libm functions
865 * should return a zero on underflow, and +- HUGE_VAL on
866 * overflow, so testing the result for zero suffices to
867 * distinguish the cases).
868 *
869 * On some platforms (Ubuntu/ia64) it seems that errno can be
870 * set to ERANGE for subnormal results that do *not* underflow
871 * to zero. So to be safe, we'll ignore ERANGE whenever the
872 * function result is less than 1.5 in absolute value.
873 *
874 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
875 * are correctly detected, since the function may underflow
876 * toward -1.0 rather than 0.0.
877 */
878 if (fabs(x) < 1.5)
879 result = 0;
880 else
881 PyErr_SetString(PyExc_OverflowError,
882 "math range error");
883 }
884 else
885 /* Unexpected math error */
886 PyErr_SetFromErrno(PyExc_ValueError);
887 return result;
888}
889
890/*
891 math_1 is used to wrap a libm function f that takes a double
892 argument and returns a double.
893
894 The error reporting follows these rules, which are designed to do
895 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
896 platforms.
897
898 - a NaN result from non-NaN inputs causes ValueError to be raised
899 - an infinite result from finite inputs causes OverflowError to be
900 raised if can_overflow is 1, or raises ValueError if can_overflow
901 is 0.
902 - if the result is finite and errno == EDOM then ValueError is
903 raised
904 - if the result is finite and nonzero and errno == ERANGE then
905 OverflowError is raised
906
907 The last rule is used to catch overflow on platforms which follow
908 C89 but for which HUGE_VAL is not an infinity.
909
910 For the majority of one-argument functions these rules are enough
911 to ensure that Python's functions behave as specified in 'Annex F'
912 of the C99 standard, with the 'invalid' and 'divide-by-zero'
913 floating-point exceptions mapping to Python's ValueError and the
914 'overflow' floating-point exception mapping to OverflowError.
915 math_1 only works for functions that don't have singularities *and*
916 the possibility of overflow; fortunately, that covers everything we
917 care about right now.
918*/
919
920static PyObject *
921math_1(PyObject *arg, double (*func) (double), int can_overflow,
922 const char *err_msg)
923{
924 double x, r;
925 x = PyFloat_AsDouble(arg);
926 if (x == -1.0 && PyErr_Occurred())
927 return NULL;
928 errno = 0;
929 r = (*func)(x);
930 if (isnan(r) && !isnan(x))
931 goto domain_err; /* domain error */
932 if (isinf(r) && isfinite(x)) {
933 if (can_overflow)
934 PyErr_SetString(PyExc_OverflowError,
935 "math range error"); /* overflow */
936 else
937 goto domain_err; /* singularity */
938 return NULL;
939 }
940 if (isfinite(r) && errno && is_error(r, 1))
941 /* this branch unnecessary on most platforms */
942 return NULL;
943
944 return PyFloat_FromDouble(r);
945
946domain_err:
947 if (err_msg) {
948 char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
949 if (buf) {
950 PyErr_Format(PyExc_ValueError, err_msg, buf);
951 PyMem_Free(buf);
952 }
953 }
954 else {
955 PyErr_SetString(PyExc_ValueError, "math domain error");
956 }
957 return NULL;
958}
959
960/* variant of math_1, to be used when the function being wrapped is known to
961 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
962 errno = ERANGE for overflow). */
963
964static PyObject *
965math_1a(PyObject *arg, double (*func) (double), const char *err_msg)
966{
967 double x, r;
968 x = PyFloat_AsDouble(arg);
969 if (x == -1.0 && PyErr_Occurred())
970 return NULL;
971 errno = 0;
972 r = (*func)(x);
973 if (errno && is_error(r, err_msg ? 0 : 1)) {
974 if (err_msg && errno == EDOM) {
975 assert(!PyErr_Occurred()); /* exception is not set by is_error() */
976 char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
977 if (buf) {
978 PyErr_Format(PyExc_ValueError, err_msg, buf);
979 PyMem_Free(buf);
980 }
981 }
982 return NULL;
983 }
984 return PyFloat_FromDouble(r);
985}
986
987/*
988 math_2 is used to wrap a libm function f that takes two double
989 arguments and returns a double.
990
991 The error reporting follows these rules, which are designed to do
992 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
993 platforms.
994
995 - a NaN result from non-NaN inputs causes ValueError to be raised
996 - an infinite result from finite inputs causes OverflowError to be
997 raised.
998 - if the result is finite and errno == EDOM then ValueError is
999 raised
1000 - if the result is finite and nonzero and errno == ERANGE then
1001 OverflowError is raised
1002
1003 The last rule is used to catch overflow on platforms which follow
1004 C89 but for which HUGE_VAL is not an infinity.
1005
1006 For most two-argument functions (copysign, fmod, hypot, atan2)
1007 these rules are enough to ensure that Python's functions behave as
1008 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1009 'divide-by-zero' floating-point exceptions mapping to Python's
1010 ValueError and the 'overflow' floating-point exception mapping to
1011 OverflowError.
1012*/
1013
1014static PyObject *
1015math_2(PyObject *const *args, Py_ssize_t nargs,
1016 double (*func) (double, double), const char *funcname)
1017{
1018 double x, y, r;
1019 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1020 return NULL;
1021 x = PyFloat_AsDouble(args[0]);
1022 if (x == -1.0 && PyErr_Occurred()) {
1023 return NULL;
1024 }
1025 y = PyFloat_AsDouble(args[1]);
1026 if (y == -1.0 && PyErr_Occurred()) {
1027 return NULL;
1028 }
1029 errno = 0;
1030 r = (*func)(x, y);
1031 if (isnan(r)) {
1032 if (!isnan(x) && !isnan(y))
1033 errno = EDOM;
1034 else
1035 errno = 0;
1036 }
1037 else if (isinf(r)) {
1038 if (isfinite(x) && isfinite(y))
1039 errno = ERANGE;
1040 else
1041 errno = 0;
1042 }
1043 if (errno && is_error(r, 1))
1044 return NULL;
1045 else
1046 return PyFloat_FromDouble(r);
1047}
1048
1049#define FUNC1(funcname, func, can_overflow, docstring) \
1050 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1051 return math_1(args, func, can_overflow, NULL); \
1052 }\
1053 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1054
1055#define FUNC1D(funcname, func, can_overflow, docstring, err_msg) \
1056 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1057 return math_1(args, func, can_overflow, err_msg); \
1058 }\
1059 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1060
1061#define FUNC1A(funcname, func, docstring) \
1062 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1063 return math_1a(args, func, NULL); \
1064 }\
1065 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1066
1067#define FUNC1AD(funcname, func, docstring, err_msg) \
1068 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1069 return math_1a(args, func, err_msg); \
1070 }\
1071 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1072
1073#define FUNC2(funcname, func, docstring) \
1074 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1075 return math_2(args, nargs, func, #funcname); \
1076 }\
1077 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1078
1079FUNC1D(acos, acos, 0,
1080 "acos($module, x, /)\n--\n\n"
1081 "Return the arc cosine (measured in radians) of x.\n\n"
1082 "The result is between 0 and pi.",
1083 "expected a number in range from -1 up to 1, got %s")
1084FUNC1D(acosh, acosh, 0,
1085 "acosh($module, x, /)\n--\n\n"
1086 "Return the inverse hyperbolic cosine of x.",
1087 "expected argument value not less than 1, got %s")
1088FUNC1D(asin, asin, 0,
1089 "asin($module, x, /)\n--\n\n"
1090 "Return the arc sine (measured in radians) of x.\n\n"
1091 "The result is between -pi/2 and pi/2.",
1092 "expected a number in range from -1 up to 1, got %s")
1093FUNC1(asinh, asinh, 0,
1094 "asinh($module, x, /)\n--\n\n"
1095 "Return the inverse hyperbolic sine of x.")
1096FUNC1(atan, atan, 0,
1097 "atan($module, x, /)\n--\n\n"
1098 "Return the arc tangent (measured in radians) of x.\n\n"
1099 "The result is between -pi/2 and pi/2.")
1100FUNC2(atan2, atan2,
1101 "atan2($module, y, x, /)\n--\n\n"
1102 "Return the arc tangent (measured in radians) of y/x.\n\n"
1103 "Unlike atan(y/x), the signs of both x and y are considered.")
1104FUNC1D(atanh, atanh, 0,
1105 "atanh($module, x, /)\n--\n\n"
1106 "Return the inverse hyperbolic tangent of x.",
1107 "expected a number between -1 and 1, got %s")
1108FUNC1(cbrt, cbrt, 0,
1109 "cbrt($module, x, /)\n--\n\n"
1110 "Return the cube root of x.")
1111
1112/*[clinic input]
1113math.ceil
1114
1115 x as number: object
1116 /
1117
1118Return the ceiling of x as an Integral.
1119
1120This is the smallest integer >= x.
1121[clinic start generated code]*/
1122
1123static PyObject *
1124math_ceil(PyObject *module, PyObject *number)
1125/*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1126{
1127 double x;
1128
1129 if (PyFloat_CheckExact(number)) {
1130 x = PyFloat_AS_DOUBLE(number);
1131 }
1132 else {
1133 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__ceil__));
1134 if (result != NULL) {
1135 return result;
1136 }
1137 else if (PyErr_Occurred()) {
1138 return NULL;
1139 }
1140 x = PyFloat_AsDouble(number);
1141 if (x == -1.0 && PyErr_Occurred()) {
1142 return NULL;
1143 }
1144 }
1145 return PyLong_FromDouble(ceil(x));
1146}
1147
1148FUNC2(copysign, copysign,
1149 "copysign($module, x, y, /)\n--\n\n"
1150 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1151 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1152 "returns -1.0.\n")
1153FUNC1D(cos, cos, 0,
1154 "cos($module, x, /)\n--\n\n"
1155 "Return the cosine of x (measured in radians).",
1156 "expected a finite input, got %s")
1157FUNC1(cosh, cosh, 1,
1158 "cosh($module, x, /)\n--\n\n"
1159 "Return the hyperbolic cosine of x.")
1160FUNC1A(erf, erf,
1161 "erf($module, x, /)\n--\n\n"
1162 "Error function at x.")
1163FUNC1A(erfc, erfc,
1164 "erfc($module, x, /)\n--\n\n"
1165 "Complementary error function at x.")
1166FUNC1(exp, exp, 1,
1167 "exp($module, x, /)\n--\n\n"
1168 "Return e raised to the power of x.")
1169FUNC1(exp2, exp2, 1,
1170 "exp2($module, x, /)\n--\n\n"
1171 "Return 2 raised to the power of x.")
1172FUNC1(expm1, expm1, 1,
1173 "expm1($module, x, /)\n--\n\n"
1174 "Return exp(x)-1.\n\n"
1175 "This function avoids the loss of precision involved in the direct "
1176 "evaluation of exp(x)-1 for small x.")
1177FUNC1(fabs, fabs, 0,
1178 "fabs($module, x, /)\n--\n\n"
1179 "Return the absolute value of the float x.")
1180
1181/*[clinic input]
1182math.floor
1183
1184 x as number: object
1185 /
1186
1187Return the floor of x as an Integral.
1188
1189This is the largest integer <= x.
1190[clinic start generated code]*/
1191
1192static PyObject *
1193math_floor(PyObject *module, PyObject *number)
1194/*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1195{
1196 double x;
1197
1198 if (PyFloat_CheckExact(number)) {
1199 x = PyFloat_AS_DOUBLE(number);
1200 }
1201 else {
1202 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(number, &_Py_ID(__floor__));
1203 if (result != NULL) {
1204 return result;
1205 }
1206 else if (PyErr_Occurred()) {
1207 return NULL;
1208 }
1209 x = PyFloat_AsDouble(number);
1210 if (x == -1.0 && PyErr_Occurred()) {
1211 return NULL;
1212 }
1213 }
1214 return PyLong_FromDouble(floor(x));
1215}
1216
1217/*[clinic input]
1218math.fmax -> double
1219
1220 x: double
1221 y: double
1222 /
1223
1224Return the larger of two floating-point arguments.
1225[clinic start generated code]*/
1226
1227static double
1228math_fmax_impl(PyObject *module, double x, double y)
1229/*[clinic end generated code: output=00692358d312fee2 input=021596c027336ffe]*/
1230{
1231 return fmax(x, y);
1232}
1233
1234/*[clinic input]
1235math.fmin -> double
1236
1237 x: double
1238 y: double
1239 /
1240
1241Return the smaller of two floating-point arguments.
1242[clinic start generated code]*/
1243
1244static double
1245math_fmin_impl(PyObject *module, double x, double y)
1246/*[clinic end generated code: output=3d5b7826bd292dd9 input=d12e64ccc33f878a]*/
1247{
1248 return fmin(x, y);
1249}
1250
1251FUNC1AD(gamma, m_tgamma,
1252 "gamma($module, x, /)\n--\n\n"
1253 "Gamma function at x.",
1254 "expected a noninteger or positive integer, got %s")
1255FUNC1AD(lgamma, m_lgamma,
1256 "lgamma($module, x, /)\n--\n\n"
1257 "Natural logarithm of absolute value of Gamma function at x.",
1258 "expected a noninteger or positive integer, got %s")
1259FUNC1D(log1p, m_log1p, 0,
1260 "log1p($module, x, /)\n--\n\n"
1261 "Return the natural logarithm of 1+x (base e).\n\n"
1262 "The result is computed in a way which is accurate for x near zero.",
1263 "expected argument value > -1, got %s")
1264FUNC2(remainder, m_remainder,
1265 "remainder($module, x, y, /)\n--\n\n"
1266 "Difference between x and the closest integer multiple of y.\n\n"
1267 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1268 "In the case where x is exactly halfway between two multiples of\n"
1269 "y, the nearest even value of n is used. The result is always exact.")
1270
1271/*[clinic input]
1272math.signbit
1273
1274 x: double
1275 /
1276
1277Return True if the sign of x is negative and False otherwise.
1278[clinic start generated code]*/
1279
1280static PyObject *
1281math_signbit_impl(PyObject *module, double x)
1282/*[clinic end generated code: output=20c5f20156a9b871 input=3d3493fbcb5bdb3e]*/
1283{
1284 return PyBool_FromLong(signbit(x));
1285}
1286
1287FUNC1D(sin, sin, 0,
1288 "sin($module, x, /)\n--\n\n"
1289 "Return the sine of x (measured in radians).",
1290 "expected a finite input, got %s")
1291FUNC1(sinh, sinh, 1,
1292 "sinh($module, x, /)\n--\n\n"
1293 "Return the hyperbolic sine of x.")
1294FUNC1D(sqrt, sqrt, 0,
1295 "sqrt($module, x, /)\n--\n\n"
1296 "Return the square root of x.",
1297 "expected a nonnegative input, got %s")
1298FUNC1D(tan, tan, 0,
1299 "tan($module, x, /)\n--\n\n"
1300 "Return the tangent of x (measured in radians).",
1301 "expected a finite input, got %s")
1302FUNC1(tanh, tanh, 0,
1303 "tanh($module, x, /)\n--\n\n"
1304 "Return the hyperbolic tangent of x.")
1305
1306/* Precision summation function as msum() by Raymond Hettinger in
1307 <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
1308 enhanced with the exact partials sum and roundoff from Mark
1309 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1310 See those links for more details, proofs and other references.
1311
1312 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1313 roundTiesToEven are assumed.
1314
1315 Note 2: No provision is made for intermediate overflow handling;
1316 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1317 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1318 overflow of the first partial sum.
1319
1320 Note 3: The algorithm has two potential sources of fragility. First, C
1321 permits arithmetic operations on `double`s to be performed in an
1322 intermediate format whose range and precision may be greater than those of
1323 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1324 example on machines using the now largely historical x87 FPUs. In this case,
1325 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1326 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1327 should be safe from this source of errors. Second, an aggressively
1328 optimizing compiler can re-associate operations so that (for example) the
1329 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1330 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1331 re-association would be in violation of the C standard, and should not occur
1332 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1333 -fassociative-math). Such optimizations should be avoided for this module.
1334
1335 Note 4: The signature of math.fsum() differs from builtins.sum()
1336 because the start argument doesn't make sense in the context of
1337 accurate summation. Since the partials table is collapsed before
1338 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1339 accurate result returned by sum(itertools.chain(seq1, seq2)).
1340*/
1341
1342#define NUM_PARTIALS 32 /* initial partials array size, on stack */
1343
1344/* Extend the partials array p[] by doubling its size. */
1345static int /* non-zero on error */
1346_fsum_realloc(double **p_ptr, Py_ssize_t n,
1347 double *ps, Py_ssize_t *m_ptr)
1348{
1349 void *v = NULL;
1350 Py_ssize_t m = *m_ptr;
1351
1352 m += m; /* double */
1353 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1354 double *p = *p_ptr;
1355 if (p == ps) {
1356 v = PyMem_Malloc(sizeof(double) * m);
1357 if (v != NULL)
1358 memcpy(v, ps, sizeof(double) * n);
1359 }
1360 else
1361 v = PyMem_Realloc(p, sizeof(double) * m);
1362 }
1363 if (v == NULL) { /* size overflow or no memory */
1364 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1365 return 1;
1366 }
1367 *p_ptr = (double*) v;
1368 *m_ptr = m;
1369 return 0;
1370}
1371
1372/* Full precision summation of a sequence of floats.
1373
1374 def msum(iterable):
1375 partials = [] # sorted, non-overlapping partial sums
1376 for x in iterable:
1377 i = 0
1378 for y in partials:
1379 if abs(x) < abs(y):
1380 x, y = y, x
1381 hi = x + y
1382 lo = y - (hi - x)
1383 if lo:
1384 partials[i] = lo
1385 i += 1
1386 x = hi
1387 partials[i:] = [x]
1388 return sum_exact(partials)
1389
1390 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1391 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1392 partial so that the list of partial sums remains exact.
1393
1394 Sum_exact() adds the partial sums exactly and correctly rounds the final
1395 result (using the round-half-to-even rule). The items in partials remain
1396 non-zero, non-special, non-overlapping and strictly increasing in
1397 magnitude, but possibly not all having the same sign.
1398
1399 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1400*/
1401
1402/*[clinic input]
1403math.fsum
1404
1405 seq: object
1406 /
1407
1408Return an accurate floating-point sum of values in the iterable seq.
1409
1410Assumes IEEE-754 floating-point arithmetic.
1411[clinic start generated code]*/
1412
1413static PyObject *
1414math_fsum(PyObject *module, PyObject *seq)
1415/*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
1416{
1417 PyObject *item, *iter, *sum = NULL;
1418 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1419 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1420 double xsave, special_sum = 0.0, inf_sum = 0.0;
1421 double hi, yr, lo = 0.0;
1422
1423 iter = PyObject_GetIter(seq);
1424 if (iter == NULL)
1425 return NULL;
1426
1427 for(;;) { /* for x in iterable */
1428 assert(0 <= n && n <= m);
1429 assert((m == NUM_PARTIALS && p == ps) ||
1430 (m > NUM_PARTIALS && p != NULL));
1431
1432 item = PyIter_Next(iter);
1433 if (item == NULL) {
1434 if (PyErr_Occurred())
1435 goto _fsum_error;
1436 break;
1437 }
1438 ASSIGN_DOUBLE(x, item, error_with_item);
1439 Py_DECREF(item);
1440
1441 xsave = x;
1442 for (i = j = 0; j < n; j++) { /* for y in partials */
1443 y = p[j];
1444 if (fabs(x) < fabs(y)) {
1445 t = x; x = y; y = t;
1446 }
1447 hi = x + y;
1448 yr = hi - x;
1449 lo = y - yr;
1450 if (lo != 0.0)
1451 p[i++] = lo;
1452 x = hi;
1453 }
1454
1455 n = i; /* ps[i:] = [x] */
1456 if (x != 0.0) {
1457 if (! isfinite(x)) {
1458 /* a nonfinite x could arise either as
1459 a result of intermediate overflow, or
1460 as a result of a nan or inf in the
1461 summands */
1462 if (isfinite(xsave)) {
1463 PyErr_SetString(PyExc_OverflowError,
1464 "intermediate overflow in fsum");
1465 goto _fsum_error;
1466 }
1467 if (isinf(xsave))
1468 inf_sum += xsave;
1469 special_sum += xsave;
1470 /* reset partials */
1471 n = 0;
1472 }
1473 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1474 goto _fsum_error;
1475 else
1476 p[n++] = x;
1477 }
1478 }
1479
1480 if (special_sum != 0.0) {
1481 if (isnan(inf_sum))
1482 PyErr_SetString(PyExc_ValueError,
1483 "-inf + inf in fsum");
1484 else
1485 sum = PyFloat_FromDouble(special_sum);
1486 goto _fsum_error;
1487 }
1488
1489 hi = 0.0;
1490 if (n > 0) {
1491 hi = p[--n];
1492 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1493 inexact. */
1494 while (n > 0) {
1495 x = hi;
1496 y = p[--n];
1497 assert(fabs(y) < fabs(x));
1498 hi = x + y;
1499 yr = hi - x;
1500 lo = y - yr;
1501 if (lo != 0.0)
1502 break;
1503 }
1504 /* Make half-even rounding work across multiple partials.
1505 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1506 digit to two instead of down to zero (the 1e-16 makes the 1
1507 slightly closer to two). With a potential 1 ULP rounding
1508 error fixed-up, math.fsum() can guarantee commutativity. */
1509 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1510 (lo > 0.0 && p[n-1] > 0.0))) {
1511 y = lo * 2.0;
1512 x = hi + y;
1513 yr = x - hi;
1514 if (y == yr)
1515 hi = x;
1516 }
1517 }
1518 sum = PyFloat_FromDouble(hi);
1519
1520 _fsum_error:
1521 Py_DECREF(iter);
1522 if (p != ps)
1523 PyMem_Free(p);
1524 return sum;
1525
1526 error_with_item:
1527 Py_DECREF(item);
1528 goto _fsum_error;
1529}
1530
1531#undef NUM_PARTIALS
1532
1533
1534static unsigned long
1535count_set_bits(unsigned long n)
1536{
1537 unsigned long count = 0;
1538 while (n != 0) {
1539 ++count;
1540 n &= n - 1; /* clear least significant bit */
1541 }
1542 return count;
1543}
1544
1545/* Integer square root
1546
1547Given a nonnegative integer `n`, we want to compute the largest integer
1548`a` for which `a * a <= n`, or equivalently the integer part of the exact
1549square root of `n`.
1550
1551We use an adaptive-precision pure-integer version of Newton's iteration. Given
1552a positive integer `n`, the algorithm produces at each iteration an integer
1553approximation `a` to the square root of `n >> s` for some even integer `s`,
1554with `s` decreasing as the iterations progress. On the final iteration, `s` is
1555zero and we have an approximation to the square root of `n` itself.
1556
1557At every step, the approximation `a` is strictly within 1.0 of the true square
1558root, so we have
1559
1560 (a - 1)**2 < (n >> s) < (a + 1)**2
1561
1562After the final iteration, a check-and-correct step is needed to determine
1563whether `a` or `a - 1` gives the desired integer square root of `n`.
1564
1565The algorithm is remarkable in its simplicity. There's no need for a
1566per-iteration check-and-correct step, and termination is straightforward: the
1567number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1568for `n > 1`). The only tricky part of the correctness proof is in establishing
1569that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1570iteration to the next. A sketch of the proof of this is given below.
1571
1572In addition to the proof sketch, a formal, computer-verified proof
1573of correctness (using Lean) of an equivalent recursive algorithm can be found
1574here:
1575
1576 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1577
1578
1579Here's Python code equivalent to the C implementation below:
1580
1581 def isqrt(n):
1582 """
1583 Return the integer part of the square root of the input.
1584 """
1585 n = operator.index(n)
1586
1587 if n < 0:
1588 raise ValueError("isqrt() argument must be nonnegative")
1589 if n == 0:
1590 return 0
1591
1592 c = (n.bit_length() - 1) // 2
1593 a = 1
1594 d = 0
1595 for s in reversed(range(c.bit_length())):
1596 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1597 e = d
1598 d = c >> s
1599 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1600
1601 return a - (a*a > n)
1602
1603
1604Sketch of proof of correctness
1605------------------------------
1606
1607The delicate part of the correctness proof is showing that the loop invariant
1608is preserved from one iteration to the next. That is, just before the line
1609
1610 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1611
1612is executed in the above code, we know that
1613
1614 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1615
1616(since `e` is always the value of `d` from the previous iteration). We must
1617prove that after that line is executed, we have
1618
1619 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1620
1621To facilitate the proof, we make some changes of notation. Write `m` for
1622`n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1623
1624 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1625
1626or equivalently:
1627
1628 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1629
1630Then we can rewrite (1) as:
1631
1632 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1633
1634and we must show that (b - 1)**2 < m < (b + 1)**2.
1635
1636From this point on, we switch to mathematical notation, so `/` means exact
1637division rather than integer division and `^` is used for exponentiation. We
1638use the `√` symbol for the exact square root. In (3), we can remove the
1639implicit floor operation to give:
1640
1641 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1642
1643Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1644
1645 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1646
1647Squaring and dividing through by `2^(d-e+1) a` gives
1648
1649 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1650
1651We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1652right-hand side of (6) with `1`, and now replacing the central
1653term `m / (2^(d-e+1) a)` with its floor in (6) gives
1654
1655 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1656
1657Or equivalently, from (2):
1658
1659 (7) -1 < b - √m < 1
1660
1661and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1662to prove.
1663
1664We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1665a` that was used to get line (7) above. From the definition of `c`, we have
1666`4^c <= n`, which implies
1667
1668 (8) 4^d <= m
1669
1670also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1671that `2d - 2e - 1 <= d` and hence that
1672
1673 (9) 4^(2d - 2e - 1) <= m
1674
1675Dividing both sides by `4^(d - e)` gives
1676
1677 (10) 4^(d - e - 1) <= m / 4^(d - e)
1678
1679But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1680
1681 (11) 4^(d - e - 1) < (a + 1)^2
1682
1683Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1684`a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1685completes the proof sketch.
1686
1687*/
1688
1689/*
1690 The _approximate_isqrt_tab table provides approximate square roots for
1691 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1692
1693 a = _approximate_isqrt_tab[(n >> 8) - 64]
1694
1695 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1696
1697 The table was computed in Python using the expression:
1698
1699 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1700*/
1701
1702static const uint8_t _approximate_isqrt_tab[192] = {
1703 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1704 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1705 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1706 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1707 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1708 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1709 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1710 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1711 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1712 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1713 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1714 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1715 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1716 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1717 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1718 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1719};
1720
1721/* Approximate square root of a large 64-bit integer.
1722
1723 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1724 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1725
1726static inline uint32_t
1727_approximate_isqrt(uint64_t n)
1728{
1729 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1730 u = (u << 7) + (uint32_t)(n >> 41) / u;
1731 return (u << 15) + (uint32_t)((n >> 17) / u);
1732}
1733
1734/*[clinic input]
1735math.isqrt
1736
1737 n: object
1738 /
1739
1740Return the integer part of the square root of the input.
1741[clinic start generated code]*/
1742
1743static PyObject *
1744math_isqrt(PyObject *module, PyObject *n)
1745/*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1746{
1747 int a_too_large, c_bit_length;
1748 int64_t c, d;
1749 uint64_t m;
1750 uint32_t u;
1751 PyObject *a = NULL, *b;
1752
1753 n = _PyNumber_Index(n);
1754 if (n == NULL) {
1755 return NULL;
1756 }
1757
1758 if (_PyLong_IsNegative((PyLongObject *)n)) {
1759 PyErr_SetString(
1760 PyExc_ValueError,
1761 "isqrt() argument must be nonnegative");
1762 goto error;
1763 }
1764 if (_PyLong_IsZero((PyLongObject *)n)) {
1765 Py_DECREF(n);
1766 return PyLong_FromLong(0);
1767 }
1768
1769 /* c = (n.bit_length() - 1) // 2 */
1770 c = _PyLong_NumBits(n);
1771 assert(c > 0);
1772 assert(!PyErr_Occurred());
1773 c = (c - 1) / 2;
1774
1775 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1776 fast, almost branch-free algorithm. */
1777 if (c <= 31) {
1778 int shift = 31 - (int)c;
1779 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1780 Py_DECREF(n);
1781 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1782 return NULL;
1783 }
1784 u = _approximate_isqrt(m << 2*shift) >> shift;
1785 u -= (uint64_t)u * u > m;
1786 return PyLong_FromUnsignedLong(u);
1787 }
1788
1789 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1790 arithmetic, then switch to using Python long integers. */
1791
1792 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1793 c_bit_length = 6;
1794 while ((c >> c_bit_length) > 0) {
1795 ++c_bit_length;
1796 }
1797
1798 /* Initialise d and a. */
1799 d = c >> (c_bit_length - 5);
1800 b = _PyLong_Rshift(n, 2*c - 62);
1801 if (b == NULL) {
1802 goto error;
1803 }
1804 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1805 Py_DECREF(b);
1806 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1807 goto error;
1808 }
1809 u = _approximate_isqrt(m) >> (31U - d);
1810 a = PyLong_FromUnsignedLong(u);
1811 if (a == NULL) {
1812 goto error;
1813 }
1814
1815 for (int s = c_bit_length - 6; s >= 0; --s) {
1816 PyObject *q;
1817 int64_t e = d;
1818
1819 d = c >> s;
1820
1821 /* q = (n >> 2*c - e - d + 1) // a */
1822 q = _PyLong_Rshift(n, 2*c - d - e + 1);
1823 if (q == NULL) {
1824 goto error;
1825 }
1826 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1827 if (q == NULL) {
1828 goto error;
1829 }
1830
1831 /* a = (a << d - 1 - e) + q */
1832 Py_SETREF(a, _PyLong_Lshift(a, d - 1 - e));
1833 if (a == NULL) {
1834 Py_DECREF(q);
1835 goto error;
1836 }
1837 Py_SETREF(a, PyNumber_Add(a, q));
1838 Py_DECREF(q);
1839 if (a == NULL) {
1840 goto error;
1841 }
1842 }
1843
1844 /* The correct result is either a or a - 1. Figure out which, and
1845 decrement a if necessary. */
1846
1847 /* a_too_large = n < a * a */
1848 b = PyNumber_Multiply(a, a);
1849 if (b == NULL) {
1850 goto error;
1851 }
1852 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1853 Py_DECREF(b);
1854 if (a_too_large == -1) {
1855 goto error;
1856 }
1857
1858 if (a_too_large) {
1859 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1860 }
1861 Py_DECREF(n);
1862 return a;
1863
1864 error:
1865 Py_XDECREF(a);
1866 Py_DECREF(n);
1867 return NULL;
1868}
1869
1870/* Divide-and-conquer factorial algorithm
1871 *
1872 * Based on the formula and pseudo-code provided at:
1873 * http://www.luschny.de/math/factorial/binarysplitfact.html
1874 *
1875 * Faster algorithms exist, but they're more complicated and depend on
1876 * a fast prime factorization algorithm.
1877 *
1878 * Notes on the algorithm
1879 * ----------------------
1880 *
1881 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1882 * computed separately, and then combined using a left shift.
1883 *
1884 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1885 * odd divisor) of factorial(n), using the formula:
1886 *
1887 * factorial_odd_part(n) =
1888 *
1889 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1890 *
1891 * Example: factorial_odd_part(20) =
1892 *
1893 * (1) *
1894 * (1) *
1895 * (1 * 3 * 5) *
1896 * (1 * 3 * 5 * 7 * 9) *
1897 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1898 *
1899 * Here i goes from large to small: the first term corresponds to i=4 (any
1900 * larger i gives an empty product), and the last term corresponds to i=0.
1901 * Each term can be computed from the last by multiplying by the extra odd
1902 * numbers required: e.g., to get from the penultimate term to the last one,
1903 * we multiply by (11 * 13 * 15 * 17 * 19).
1904 *
1905 * To see a hint of why this formula works, here are the same numbers as above
1906 * but with the even parts (i.e., the appropriate powers of 2) included. For
1907 * each subterm in the product for i, we multiply that subterm by 2**i:
1908 *
1909 * factorial(20) =
1910 *
1911 * (16) *
1912 * (8) *
1913 * (4 * 12 * 20) *
1914 * (2 * 6 * 10 * 14 * 18) *
1915 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1916 *
1917 * The factorial_partial_product function computes the product of all odd j in
1918 * range(start, stop) for given start and stop. It's used to compute the
1919 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1920 * operates recursively, repeatedly splitting the range into two roughly equal
1921 * pieces until the subranges are small enough to be computed using only C
1922 * integer arithmetic.
1923 *
1924 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1925 * the factorial) is computed independently in the main math_factorial
1926 * function. By standard results, its value is:
1927 *
1928 * two_valuation = n//2 + n//4 + n//8 + ....
1929 *
1930 * It can be shown (e.g., by complete induction on n) that two_valuation is
1931 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1932 * '1'-bits in the binary expansion of n.
1933 */
1934
1935/* factorial_partial_product: Compute product(range(start, stop, 2)) using
1936 * divide and conquer. Assumes start and stop are odd and stop > start.
1937 * max_bits must be >= bit_length(stop - 2). */
1938
1939static PyObject *
1940factorial_partial_product(unsigned long start, unsigned long stop,
1941 unsigned long max_bits)
1942{
1943 unsigned long midpoint, num_operands;
1944 PyObject *left = NULL, *right = NULL, *result = NULL;
1945
1946 /* If the return value will fit an unsigned long, then we can
1947 * multiply in a tight, fast loop where each multiply is O(1).
1948 * Compute an upper bound on the number of bits required to store
1949 * the answer.
1950 *
1951 * Storing some integer z requires floor(lg(z))+1 bits, which is
1952 * conveniently the value returned by bit_length(z). The
1953 * product x*y will require at most
1954 * bit_length(x) + bit_length(y) bits to store, based
1955 * on the idea that lg product = lg x + lg y.
1956 *
1957 * We know that stop - 2 is the largest number to be multiplied. From
1958 * there, we have: bit_length(answer) <= num_operands *
1959 * bit_length(stop - 2)
1960 */
1961
1962 num_operands = (stop - start) / 2;
1963 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1964 * unlikely case of an overflow in num_operands * max_bits. */
1965 if (num_operands <= 8 * SIZEOF_LONG &&
1966 num_operands * max_bits <= 8 * SIZEOF_LONG) {
1967 unsigned long j, total;
1968 for (total = start, j = start + 2; j < stop; j += 2)
1969 total *= j;
1970 return PyLong_FromUnsignedLong(total);
1971 }
1972
1973 /* find midpoint of range(start, stop), rounded up to next odd number. */
1974 midpoint = (start + num_operands) | 1;
1975 left = factorial_partial_product(start, midpoint,
1976 _Py_bit_length(midpoint - 2));
1977 if (left == NULL)
1978 goto error;
1979 right = factorial_partial_product(midpoint, stop, max_bits);
1980 if (right == NULL)
1981 goto error;
1982 result = PyNumber_Multiply(left, right);
1983
1984 error:
1985 Py_XDECREF(left);
1986 Py_XDECREF(right);
1987 return result;
1988}
1989
1990/* factorial_odd_part: compute the odd part of factorial(n). */
1991
1992static PyObject *
1993factorial_odd_part(unsigned long n)
1994{
1995 long i;
1996 unsigned long v, lower, upper;
1997 PyObject *partial, *tmp, *inner, *outer;
1998
1999 inner = PyLong_FromLong(1);
2000 if (inner == NULL)
2001 return NULL;
2002 outer = Py_NewRef(inner);
2003
2004 upper = 3;
2005 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
2006 v = n >> i;
2007 if (v <= 2)
2008 continue;
2009 lower = upper;
2010 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2011 upper = (v + 1) | 1;
2012 /* Here inner is the product of all odd integers j in the range (0,
2013 n/2**(i+1)]. The factorial_partial_product call below gives the
2014 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2015 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
2016 /* inner *= partial */
2017 if (partial == NULL)
2018 goto error;
2019 tmp = PyNumber_Multiply(inner, partial);
2020 Py_DECREF(partial);
2021 if (tmp == NULL)
2022 goto error;
2023 Py_SETREF(inner, tmp);
2024 /* Now inner is the product of all odd integers j in the range (0,
2025 n/2**i], giving the inner product in the formula above. */
2026
2027 /* outer *= inner; */
2028 tmp = PyNumber_Multiply(outer, inner);
2029 if (tmp == NULL)
2030 goto error;
2031 Py_SETREF(outer, tmp);
2032 }
2033 Py_DECREF(inner);
2034 return outer;
2035
2036 error:
2037 Py_DECREF(outer);
2038 Py_DECREF(inner);
2039 return NULL;
2040}
2041
2042
2043/* Lookup table for small factorial values */
2044
2045static const unsigned long SmallFactorials[] = {
2046 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2047 362880, 3628800, 39916800, 479001600,
2048#if SIZEOF_LONG >= 8
2049 6227020800, 87178291200, 1307674368000,
2050 20922789888000, 355687428096000, 6402373705728000,
2051 121645100408832000, 2432902008176640000
2052#endif
2053};
2054
2055/*[clinic input]
2056math.factorial
2057
2058 n as arg: object
2059 /
2060
2061Find n!.
2062[clinic start generated code]*/
2063
2064static PyObject *
2065math_factorial(PyObject *module, PyObject *arg)
2066/*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/
2067{
2068 long x, two_valuation;
2069 int overflow;
2070 PyObject *result, *odd_part;
2071
2072 x = PyLong_AsLongAndOverflow(arg, &overflow);
2073 if (x == -1 && PyErr_Occurred()) {
2074 return NULL;
2075 }
2076 else if (overflow == 1) {
2077 PyErr_Format(PyExc_OverflowError,
2078 "factorial() argument should not exceed %ld",
2079 LONG_MAX);
2080 return NULL;
2081 }
2082 else if (overflow == -1 || x < 0) {
2083 PyErr_SetString(PyExc_ValueError,
2084 "factorial() not defined for negative values");
2085 return NULL;
2086 }
2087
2088 /* use lookup table if x is small */
2089 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2090 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2091
2092 /* else express in the form odd_part * 2**two_valuation, and compute as
2093 odd_part << two_valuation. */
2094 odd_part = factorial_odd_part(x);
2095 if (odd_part == NULL)
2096 return NULL;
2097 two_valuation = x - count_set_bits(x);
2098 result = _PyLong_Lshift(odd_part, two_valuation);
2099 Py_DECREF(odd_part);
2100 return result;
2101}
2102
2103
2104/*[clinic input]
2105math.trunc
2106
2107 x: object
2108 /
2109
2110Truncates the Real x to the nearest Integral toward 0.
2111
2112Uses the __trunc__ magic method.
2113[clinic start generated code]*/
2114
2115static PyObject *
2116math_trunc(PyObject *module, PyObject *x)
2117/*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2118{
2119 if (PyFloat_CheckExact(x)) {
2120 return PyFloat_Type.tp_as_number->nb_int(x);
2121 }
2122
2123 PyObject *result = _PyObject_MaybeCallSpecialNoArgs(x, &_Py_ID(__trunc__));
2124 if (result != NULL) {
2125 return result;
2126 }
2127 else if (!PyErr_Occurred()) {
2128 PyErr_Format(PyExc_TypeError,
2129 "type %.100s doesn't define __trunc__ method",
2130 Py_TYPE(x)->tp_name);
2131 }
2132 return NULL;
2133}
2134
2135
2136/*[clinic input]
2137math.frexp
2138
2139 x: double
2140 /
2141
2142Return the mantissa and exponent of x, as pair (m, e).
2143
2144m is a float and e is an int, such that x = m * 2.**e.
2145If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2146[clinic start generated code]*/
2147
2148static PyObject *
2149math_frexp_impl(PyObject *module, double x)
2150/*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2151{
2152 int i;
2153 /* deal with special cases directly, to sidestep platform
2154 differences */
2155 if (isnan(x) || isinf(x) || !x) {
2156 i = 0;
2157 }
2158 else {
2159 x = frexp(x, &i);
2160 }
2161 return Py_BuildValue("(di)", x, i);
2162}
2163
2164
2165/*[clinic input]
2166math.ldexp
2167
2168 x: double
2169 i: object
2170 /
2171
2172Return x * (2**i).
2173
2174This is essentially the inverse of frexp().
2175[clinic start generated code]*/
2176
2177static PyObject *
2178math_ldexp_impl(PyObject *module, double x, PyObject *i)
2179/*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2180{
2181 double r;
2182 long exp;
2183 int overflow;
2184
2185 if (PyLong_Check(i)) {
2186 /* on overflow, replace exponent with either LONG_MAX
2187 or LONG_MIN, depending on the sign. */
2188 exp = PyLong_AsLongAndOverflow(i, &overflow);
2189 if (exp == -1 && PyErr_Occurred())
2190 return NULL;
2191 if (overflow)
2192 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2193 }
2194 else {
2195 PyErr_SetString(PyExc_TypeError,
2196 "Expected an int as second argument to ldexp.");
2197 return NULL;
2198 }
2199
2200 if (x == 0. || !isfinite(x)) {
2201 /* NaNs, zeros and infinities are returned unchanged */
2202 r = x;
2203 errno = 0;
2204 } else if (exp > INT_MAX) {
2205 /* overflow */
2206 r = copysign(Py_INFINITY, x);
2207 errno = ERANGE;
2208 } else if (exp < INT_MIN) {
2209 /* underflow to +-0 */
2210 r = copysign(0., x);
2211 errno = 0;
2212 } else {
2213 errno = 0;
2214 r = ldexp(x, (int)exp);
2215#ifdef _MSC_VER
2216 if (DBL_MIN > r && r > -DBL_MIN) {
2217 /* Denormal (or zero) results can be incorrectly rounded here (rather,
2218 truncated). Fixed in newer versions of the C runtime, included
2219 with Windows 11. */
2220 int original_exp;
2221 frexp(x, &original_exp);
2222 if (original_exp > DBL_MIN_EXP) {
2223 /* Shift down to the smallest normal binade. No bits lost. */
2224 int shift = DBL_MIN_EXP - original_exp;
2225 x = ldexp(x, shift);
2226 exp -= shift;
2227 }
2228 /* Multiplying by 2**exp finishes the job, and the HW will round as
2229 appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted
2230 to be < 0.5ULP of smallest denorm, so should be thrown away. If
2231 exp is so very negative that ldexp underflows to 0, that's fine;
2232 no need to check in advance. */
2233 r = x*ldexp(1.0, (int)exp);
2234 }
2235#endif
2236 if (isinf(r))
2237 errno = ERANGE;
2238 }
2239
2240 if (errno && is_error(r, 1))
2241 return NULL;
2242 return PyFloat_FromDouble(r);
2243}
2244
2245
2246/*[clinic input]
2247math.modf
2248
2249 x: double
2250 /
2251
2252Return the fractional and integer parts of x.
2253
2254Both results carry the sign of x and are floats.
2255[clinic start generated code]*/
2256
2257static PyObject *
2258math_modf_impl(PyObject *module, double x)
2259/*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2260{
2261 double y;
2262 /* some platforms don't do the right thing for NaNs and
2263 infinities, so we take care of special cases directly. */
2264 if (isinf(x))
2265 return Py_BuildValue("(dd)", copysign(0., x), x);
2266 else if (isnan(x))
2267 return Py_BuildValue("(dd)", x, x);
2268
2269 errno = 0;
2270 x = modf(x, &y);
2271 return Py_BuildValue("(dd)", x, y);
2272}
2273
2274
2275/* A decent logarithm is easy to compute even for huge ints, but libm can't
2276 do that by itself -- loghelper can. func is log or log10, and name is
2277 "log" or "log10". Note that overflow of the result isn't possible: an int
2278 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2279 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2280 small enough to fit in an IEEE single. log and log10 are even smaller.
2281 However, intermediate overflow is possible for an int if the number of bits
2282 in that int is larger than PY_SSIZE_T_MAX. */
2283
2284static PyObject*
2285loghelper(PyObject* arg, double (*func)(double))
2286{
2287 /* If it is int, do it ourselves. */
2288 if (PyLong_Check(arg)) {
2289 double x, result;
2290 int64_t e;
2291
2292 /* Negative or zero inputs give a ValueError. */
2293 if (!_PyLong_IsPositive((PyLongObject *)arg)) {
2294 /* The input can be an arbitrary large integer, so we
2295 don't include it's value in the error message. */
2296 PyErr_SetString(PyExc_ValueError,
2297 "expected a positive input");
2298 return NULL;
2299 }
2300
2301 x = PyLong_AsDouble(arg);
2302 if (x == -1.0 && PyErr_Occurred()) {
2303 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2304 return NULL;
2305 /* Here the conversion to double overflowed, but it's possible
2306 to compute the log anyway. Clear the exception and continue. */
2307 PyErr_Clear();
2308 x = _PyLong_Frexp((PyLongObject *)arg, &e);
2309 assert(e >= 0);
2310 assert(!PyErr_Occurred());
2311 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2312 result = func(x) + func(2.0) * e;
2313 }
2314 else
2315 /* Successfully converted x to a double. */
2316 result = func(x);
2317 return PyFloat_FromDouble(result);
2318 }
2319
2320 /* Else let libm handle it by itself. */
2321 return math_1(arg, func, 0, "expected a positive input, got %s");
2322}
2323
2324
2325/* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2326 for support of multiple signatures */
2327static PyObject *
2328math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
2329{
2330 PyObject *num, *den;
2331 PyObject *ans;
2332
2333 if (!_PyArg_CheckPositional("log", nargs, 1, 2))
2334 return NULL;
2335
2336 num = loghelper(args[0], m_log);
2337 if (num == NULL || nargs == 1)
2338 return num;
2339
2340 den = loghelper(args[1], m_log);
2341 if (den == NULL) {
2342 Py_DECREF(num);
2343 return NULL;
2344 }
2345
2346 ans = PyNumber_TrueDivide(num, den);
2347 Py_DECREF(num);
2348 Py_DECREF(den);
2349 return ans;
2350}
2351
2352PyDoc_STRVAR(math_log_doc,
2353"log(x, [base=math.e])\n\
2354Return the logarithm of x to the given base.\n\n\
2355If the base is not specified, returns the natural logarithm (base e) of x.");
2356
2357/*[clinic input]
2358math.log2
2359
2360 x: object
2361 /
2362
2363Return the base 2 logarithm of x.
2364[clinic start generated code]*/
2365
2366static PyObject *
2367math_log2(PyObject *module, PyObject *x)
2368/*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2369{
2370 return loghelper(x, m_log2);
2371}
2372
2373
2374/*[clinic input]
2375math.log10
2376
2377 x: object
2378 /
2379
2380Return the base 10 logarithm of x.
2381[clinic start generated code]*/
2382
2383static PyObject *
2384math_log10(PyObject *module, PyObject *x)
2385/*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2386{
2387 return loghelper(x, m_log10);
2388}
2389
2390
2391/*[clinic input]
2392math.fma
2393
2394 x: double
2395 y: double
2396 z: double
2397 /
2398
2399Fused multiply-add operation.
2400
2401Compute (x * y) + z with a single round.
2402[clinic start generated code]*/
2403
2404static PyObject *
2405math_fma_impl(PyObject *module, double x, double y, double z)
2406/*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2407{
2408 double r = fma(x, y, z);
2409
2410 /* Fast path: if we got a finite result, we're done. */
2411 if (isfinite(r)) {
2412 return PyFloat_FromDouble(r);
2413 }
2414
2415 /* Non-finite result. Raise an exception if appropriate, else return r. */
2416 if (isnan(r)) {
2417 if (!isnan(x) && !isnan(y) && !isnan(z)) {
2418 /* NaN result from non-NaN inputs. */
2419 PyErr_SetString(PyExc_ValueError, "invalid operation in fma");
2420 return NULL;
2421 }
2422 }
2423 else if (isfinite(x) && isfinite(y) && isfinite(z)) {
2424 /* Infinite result from finite inputs. */
2425 PyErr_SetString(PyExc_OverflowError, "overflow in fma");
2426 return NULL;
2427 }
2428
2429 return PyFloat_FromDouble(r);
2430}
2431
2432
2433/*[clinic input]
2434math.fmod
2435
2436 x: double
2437 y: double
2438 /
2439
2440Return fmod(x, y), according to platform C.
2441
2442x % y may differ.
2443[clinic start generated code]*/
2444
2445static PyObject *
2446math_fmod_impl(PyObject *module, double x, double y)
2447/*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2448{
2449 double r;
2450 /* fmod(x, +/-Inf) returns x for finite x. */
2451 if (isinf(y) && isfinite(x))
2452 return PyFloat_FromDouble(x);
2453 errno = 0;
2454 r = fmod(x, y);
2455#ifdef _MSC_VER
2456 /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2457 for zero result. But C99+ says: "if y is nonzero, the result
2458 has the same sign as x".
2459 */
2460 if (r == 0.0 && y != 0.0) {
2461 r = copysign(r, x);
2462 }
2463#endif
2464 if (isnan(r)) {
2465 if (!isnan(x) && !isnan(y))
2466 errno = EDOM;
2467 else
2468 errno = 0;
2469 }
2470 if (errno && is_error(r, 1))
2471 return NULL;
2472 else
2473 return PyFloat_FromDouble(r);
2474}
2475
2476/*
2477Given a *vec* of values, compute the vector norm:
2478
2479 sqrt(sum(x ** 2 for x in vec))
2480
2481The *max* variable should be equal to the largest fabs(x).
2482The *n* variable is the length of *vec*.
2483If n==0, then *max* should be 0.0.
2484If an infinity is present in the vec, *max* should be INF.
2485The *found_nan* variable indicates whether some member of
2486the *vec* is a NaN.
2487
2488To avoid overflow/underflow and to achieve high accuracy giving results
2489that are almost always correctly rounded, four techniques are used:
2490
2491* lossless scaling using a power-of-two scaling factor
2492* accurate squaring using Veltkamp-Dekker splitting [1]
2493 or an equivalent with an fma() call
2494* compensated summation using a variant of the Neumaier algorithm [2]
2495* differential correction of the square root [3]
2496
2497The usual presentation of the Neumaier summation algorithm has an
2498expensive branch depending on which operand has the larger
2499magnitude. We avoid this cost by arranging the calculation so that
2500fabs(csum) is always as large as fabs(x).
2501
2502To establish the invariant, *csum* is initialized to 1.0 which is
2503always larger than x**2 after scaling or after division by *max*.
2504After the loop is finished, the initial 1.0 is subtracted out for a
2505net zero effect on the final sum. Since *csum* will be greater than
25061.0, the subtraction of 1.0 will not cause fractional digits to be
2507dropped from *csum*.
2508
2509To get the full benefit from compensated summation, the largest
2510addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2511scaling or division by *max* should not be skipped even if not
2512otherwise needed to prevent overflow or loss of precision.
2513
2514The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
2515gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2516algorithm gives a *hi* value that is correctly rounded to half
2517precision. When a value at or below 1.0 is correctly rounded, it
2518never goes above 1.0. And when values at or below 1.0 are squared,
2519they remain at or below 1.0, thus preserving the summation invariant.
2520
2521Another interesting assertion is that csum+lo*lo == csum. In the loop,
2522each scaled vector element has a magnitude less than 1.0. After the
2523Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2524value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2525Given that csum >= 1.0, we have:
2526 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2527Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2528
2529To minimize loss of information during the accumulation of fractional
2530values, each term has a separate accumulator. This also breaks up
2531sequential dependencies in the inner loop so the CPU can maximize
2532floating-point throughput. [4] On an Apple M1 Max, hypot(*vec)
2533takes only 3.33 µsec when len(vec) == 1000.
2534
2535The square root differential correction is needed because a
2536correctly rounded square root of a correctly rounded sum of
2537squares can still be off by as much as one ulp.
2538
2539The differential correction starts with a value *x* that is
2540the difference between the square of *h*, the possibly inaccurately
2541rounded square root, and the accurately computed sum of squares.
2542The correction is the first order term of the Maclaurin series
2543expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2544
2545Essentially, this differential correction is equivalent to one
2546refinement step in Newton's divide-and-average square root
2547algorithm, effectively doubling the number of accurate bits.
2548This technique is used in Dekker's SQRT2 algorithm and again in
2549Borges' ALGORITHM 4 and 5.
2550
2551The hypot() function is faithfully rounded (less than 1 ulp error)
2552and usually correctly rounded (within 1/2 ulp). The squaring
2553step is exact. The Neumaier summation computes as if in doubled
2554precision (106 bits) and has the advantage that its input squares
2555are non-negative so that the condition number of the sum is one.
2556The square root with a differential correction is likewise computed
2557as if in doubled precision.
2558
2559For n <= 1000, prior to the final addition that rounds the overall
2560result, the internal accuracy of "h" together with its correction of
2561"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2562against a Decimal implementation with prec=300. After 100 million
2563trials, no incorrectly rounded examples were found. In addition,
2564perfect commutativity (all permutations are exactly equal) was
2565verified for 1 billion random inputs with n=5. [7]
2566
2567References:
2568
25691. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
25702. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
25713. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
25724. Data dependency graph: https://bugs.python.org/file49439/hypot.png
25735. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
25746. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
25757. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
2576
2577*/
2578
2579static inline double
2580vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2581{
2582 double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2583 DoubleLength pr, sm;
2584 int max_e;
2585 Py_ssize_t i;
2586
2587 if (isinf(max)) {
2588 return max;
2589 }
2590 if (found_nan) {
2591 return Py_NAN;
2592 }
2593 if (max == 0.0 || n <= 1) {
2594 return max;
2595 }
2596 frexp(max, &max_e);
2597 if (max_e < -1023) {
2598 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
2599 for (i=0 ; i < n ; i++) {
2600 vec[i] /= DBL_MIN; // convert subnormals to normals
2601 }
2602 return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2603 }
2604 scale = ldexp(1.0, -max_e);
2605 assert(max * scale >= 0.5);
2606 assert(max * scale < 1.0);
2607 for (i=0 ; i < n ; i++) {
2608 x = vec[i];
2609 assert(isfinite(x) && fabs(x) <= max);
2610 x *= scale; // lossless scaling
2611 assert(fabs(x) < 1.0);
2612 pr = dl_mul(x, x); // lossless squaring
2613 assert(pr.hi <= 1.0);
2614 sm = dl_fast_sum(csum, pr.hi); // lossless addition
2615 csum = sm.hi;
2616 frac1 += pr.lo; // lossy addition
2617 frac2 += sm.lo; // lossy addition
2618 }
2619 h = sqrt(csum - 1.0 + (frac1 + frac2));
2620 pr = dl_mul(-h, h);
2621 sm = dl_fast_sum(csum, pr.hi);
2622 csum = sm.hi;
2623 frac1 += pr.lo;
2624 frac2 += sm.lo;
2625 x = csum - 1.0 + (frac1 + frac2);
2626 h += x / (2.0 * h); // differential correction
2627 return h / scale;
2628}
2629
2630#define NUM_STACK_ELEMS 16
2631
2632/*[clinic input]
2633math.dist
2634
2635 p: object
2636 q: object
2637 /
2638
2639Return the Euclidean distance between two points p and q.
2640
2641The points should be specified as sequences (or iterables) of
2642coordinates. Both inputs must have the same dimension.
2643
2644Roughly equivalent to:
2645 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2646[clinic start generated code]*/
2647
2648static PyObject *
2649math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2650/*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2651{
2652 PyObject *item;
2653 double max = 0.0;
2654 double x, px, qx, result;
2655 Py_ssize_t i, m, n;
2656 int found_nan = 0, p_allocated = 0, q_allocated = 0;
2657 double diffs_on_stack[NUM_STACK_ELEMS];
2658 double *diffs = diffs_on_stack;
2659
2660 if (!PyTuple_Check(p)) {
2661 p = PySequence_Tuple(p);
2662 if (p == NULL) {
2663 return NULL;
2664 }
2665 p_allocated = 1;
2666 }
2667 if (!PyTuple_Check(q)) {
2668 q = PySequence_Tuple(q);
2669 if (q == NULL) {
2670 if (p_allocated) {
2671 Py_DECREF(p);
2672 }
2673 return NULL;
2674 }
2675 q_allocated = 1;
2676 }
2677
2678 m = PyTuple_GET_SIZE(p);
2679 n = PyTuple_GET_SIZE(q);
2680 if (m != n) {
2681 PyErr_SetString(PyExc_ValueError,
2682 "both points must have the same number of dimensions");
2683 goto error_exit;
2684 }
2685 if (n > NUM_STACK_ELEMS) {
2686 diffs = (double *) PyMem_Malloc(n * sizeof(double));
2687 if (diffs == NULL) {
2688 PyErr_NoMemory();
2689 goto error_exit;
2690 }
2691 }
2692 for (i=0 ; i<n ; i++) {
2693 item = PyTuple_GET_ITEM(p, i);
2694 ASSIGN_DOUBLE(px, item, error_exit);
2695 item = PyTuple_GET_ITEM(q, i);
2696 ASSIGN_DOUBLE(qx, item, error_exit);
2697 x = fabs(px - qx);
2698 diffs[i] = x;
2699 found_nan |= isnan(x);
2700 if (x > max) {
2701 max = x;
2702 }
2703 }
2704 result = vector_norm(n, diffs, max, found_nan);
2705 if (diffs != diffs_on_stack) {
2706 PyMem_Free(diffs);
2707 }
2708 if (p_allocated) {
2709 Py_DECREF(p);
2710 }
2711 if (q_allocated) {
2712 Py_DECREF(q);
2713 }
2714 return PyFloat_FromDouble(result);
2715
2716 error_exit:
2717 if (diffs != diffs_on_stack) {
2718 PyMem_Free(diffs);
2719 }
2720 if (p_allocated) {
2721 Py_DECREF(p);
2722 }
2723 if (q_allocated) {
2724 Py_DECREF(q);
2725 }
2726 return NULL;
2727}
2728
2729/*[clinic input]
2730math.hypot
2731
2732 *coordinates as args: array
2733
2734Multidimensional Euclidean distance from the origin to a point.
2735
2736Roughly equivalent to:
2737 sqrt(sum(x**2 for x in coordinates))
2738
2739For a two dimensional point (x, y), gives the hypotenuse
2740using the Pythagorean theorem: sqrt(x*x + y*y).
2741
2742For example, the hypotenuse of a 3/4/5 right triangle is:
2743
2744 >>> hypot(3.0, 4.0)
2745 5.0
2746[clinic start generated code]*/
2747
2748static PyObject *
2749math_hypot_impl(PyObject *module, PyObject * const *args,
2750 Py_ssize_t args_length)
2751/*[clinic end generated code: output=c9de404e24370068 input=1bceaf7d4fdcd9c2]*/
2752{
2753 Py_ssize_t i;
2754 PyObject *item;
2755 double max = 0.0;
2756 double x, result;
2757 int found_nan = 0;
2758 double coord_on_stack[NUM_STACK_ELEMS];
2759 double *coordinates = coord_on_stack;
2760
2761 if (args_length > NUM_STACK_ELEMS) {
2762 coordinates = (double *) PyMem_Malloc(args_length * sizeof(double));
2763 if (coordinates == NULL) {
2764 return PyErr_NoMemory();
2765 }
2766 }
2767 for (i = 0; i < args_length; i++) {
2768 item = args[i];
2769 ASSIGN_DOUBLE(x, item, error_exit);
2770 x = fabs(x);
2771 coordinates[i] = x;
2772 found_nan |= isnan(x);
2773 if (x > max) {
2774 max = x;
2775 }
2776 }
2777 result = vector_norm(args_length, coordinates, max, found_nan);
2778 if (coordinates != coord_on_stack) {
2779 PyMem_Free(coordinates);
2780 }
2781 return PyFloat_FromDouble(result);
2782
2783 error_exit:
2784 if (coordinates != coord_on_stack) {
2785 PyMem_Free(coordinates);
2786 }
2787 return NULL;
2788}
2789
2790#undef NUM_STACK_ELEMS
2791
2792/** sumprod() ***************************************************************/
2793
2794/* Forward declaration */
2795static inline int _check_long_mult_overflow(long a, long b);
2796
2797static inline bool
2798long_add_would_overflow(long a, long b)
2799{
2800 return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2801}
2802
2803/*[clinic input]
2804math.sumprod
2805
2806 p: object
2807 q: object
2808 /
2809
2810Return the sum of products of values from two iterables p and q.
2811
2812Roughly equivalent to:
2813
2814 sum(map(operator.mul, p, q, strict=True))
2815
2816For float and mixed int/float inputs, the intermediate products
2817and sums are computed with extended precision.
2818[clinic start generated code]*/
2819
2820static PyObject *
2821math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2822/*[clinic end generated code: output=6722dbfe60664554 input=a2880317828c61d2]*/
2823{
2824 PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2825 PyObject *p_it, *q_it, *total;
2826 iternextfunc p_next, q_next;
2827 bool p_stopped = false, q_stopped = false;
2828 bool int_path_enabled = true, int_total_in_use = false;
2829 bool flt_path_enabled = true, flt_total_in_use = false;
2830 long int_total = 0;
2831 TripleLength flt_total = tl_zero;
2832
2833 p_it = PyObject_GetIter(p);
2834 if (p_it == NULL) {
2835 return NULL;
2836 }
2837 q_it = PyObject_GetIter(q);
2838 if (q_it == NULL) {
2839 Py_DECREF(p_it);
2840 return NULL;
2841 }
2842 total = PyLong_FromLong(0);
2843 if (total == NULL) {
2844 Py_DECREF(p_it);
2845 Py_DECREF(q_it);
2846 return NULL;
2847 }
2848 p_next = *Py_TYPE(p_it)->tp_iternext;
2849 q_next = *Py_TYPE(q_it)->tp_iternext;
2850 while (1) {
2851 bool finished;
2852
2853 assert (p_i == NULL);
2854 assert (q_i == NULL);
2855 assert (term_i == NULL);
2856 assert (new_total == NULL);
2857
2858 assert (p_it != NULL);
2859 assert (q_it != NULL);
2860 assert (total != NULL);
2861
2862 p_i = p_next(p_it);
2863 if (p_i == NULL) {
2864 if (PyErr_Occurred()) {
2865 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2866 goto err_exit;
2867 }
2868 PyErr_Clear();
2869 }
2870 p_stopped = true;
2871 }
2872 q_i = q_next(q_it);
2873 if (q_i == NULL) {
2874 if (PyErr_Occurred()) {
2875 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2876 goto err_exit;
2877 }
2878 PyErr_Clear();
2879 }
2880 q_stopped = true;
2881 }
2882 if (p_stopped != q_stopped) {
2883 PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2884 goto err_exit;
2885 }
2886 finished = p_stopped & q_stopped;
2887
2888 if (int_path_enabled) {
2889
2890 if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2891 int overflow;
2892 long int_p, int_q, int_prod;
2893
2894 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2895 if (overflow) {
2896 goto finalize_int_path;
2897 }
2898 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2899 if (overflow) {
2900 goto finalize_int_path;
2901 }
2902 if (_check_long_mult_overflow(int_p, int_q)) {
2903 goto finalize_int_path;
2904 }
2905 int_prod = int_p * int_q;
2906 if (long_add_would_overflow(int_total, int_prod)) {
2907 goto finalize_int_path;
2908 }
2909 int_total += int_prod;
2910 int_total_in_use = true;
2911 Py_CLEAR(p_i);
2912 Py_CLEAR(q_i);
2913 continue;
2914 }
2915
2916 finalize_int_path:
2917 // We're finished, overflowed, or have a non-int
2918 int_path_enabled = false;
2919 if (int_total_in_use) {
2920 term_i = PyLong_FromLong(int_total);
2921 if (term_i == NULL) {
2922 goto err_exit;
2923 }
2924 new_total = PyNumber_Add(total, term_i);
2925 if (new_total == NULL) {
2926 goto err_exit;
2927 }
2928 Py_SETREF(total, new_total);
2929 new_total = NULL;
2930 Py_CLEAR(term_i);
2931 int_total = 0; // An ounce of prevention, ...
2932 int_total_in_use = false;
2933 }
2934 }
2935
2936 if (flt_path_enabled) {
2937
2938 if (!finished) {
2939 double flt_p, flt_q;
2940 bool p_type_float = PyFloat_CheckExact(p_i);
2941 bool q_type_float = PyFloat_CheckExact(q_i);
2942 if (p_type_float && q_type_float) {
2943 flt_p = PyFloat_AS_DOUBLE(p_i);
2944 flt_q = PyFloat_AS_DOUBLE(q_i);
2945 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2946 /* We care about float/int pairs and int/float pairs because
2947 they arise naturally in several use cases such as price
2948 times quantity, measurements with integer weights, or
2949 data selected by a vector of bools. */
2950 flt_p = PyFloat_AS_DOUBLE(p_i);
2951 flt_q = PyLong_AsDouble(q_i);
2952 if (flt_q == -1.0 && PyErr_Occurred()) {
2953 PyErr_Clear();
2954 goto finalize_flt_path;
2955 }
2956 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
2957 flt_q = PyFloat_AS_DOUBLE(q_i);
2958 flt_p = PyLong_AsDouble(p_i);
2959 if (flt_p == -1.0 && PyErr_Occurred()) {
2960 PyErr_Clear();
2961 goto finalize_flt_path;
2962 }
2963 } else {
2964 goto finalize_flt_path;
2965 }
2966 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2967 if (isfinite(new_flt_total.hi)) {
2968 flt_total = new_flt_total;
2969 flt_total_in_use = true;
2970 Py_CLEAR(p_i);
2971 Py_CLEAR(q_i);
2972 continue;
2973 }
2974 }
2975
2976 finalize_flt_path:
2977 // We're finished, overflowed, have a non-float, or got a non-finite value
2978 flt_path_enabled = false;
2979 if (flt_total_in_use) {
2980 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2981 if (term_i == NULL) {
2982 goto err_exit;
2983 }
2984 new_total = PyNumber_Add(total, term_i);
2985 if (new_total == NULL) {
2986 goto err_exit;
2987 }
2988 Py_SETREF(total, new_total);
2989 new_total = NULL;
2990 Py_CLEAR(term_i);
2991 flt_total = tl_zero;
2992 flt_total_in_use = false;
2993 }
2994 }
2995
2996 assert(!int_total_in_use);
2997 assert(!flt_total_in_use);
2998 if (finished) {
2999 goto normal_exit;
3000 }
3001 term_i = PyNumber_Multiply(p_i, q_i);
3002 if (term_i == NULL) {
3003 goto err_exit;
3004 }
3005 new_total = PyNumber_Add(total, term_i);
3006 if (new_total == NULL) {
3007 goto err_exit;
3008 }
3009 Py_SETREF(total, new_total);
3010 new_total = NULL;
3011 Py_CLEAR(p_i);
3012 Py_CLEAR(q_i);
3013 Py_CLEAR(term_i);
3014 }
3015
3016 normal_exit:
3017 Py_DECREF(p_it);
3018 Py_DECREF(q_it);
3019 return total;
3020
3021 err_exit:
3022 Py_DECREF(p_it);
3023 Py_DECREF(q_it);
3024 Py_DECREF(total);
3025 Py_XDECREF(p_i);
3026 Py_XDECREF(q_i);
3027 Py_XDECREF(term_i);
3028 Py_XDECREF(new_total);
3029 return NULL;
3030}
3031
3032
3033/* pow can't use math_2, but needs its own wrapper: the problem is
3034 that an infinite result can arise either as a result of overflow
3035 (in which case OverflowError should be raised) or as a result of
3036 e.g. 0.**-5. (for which ValueError needs to be raised.)
3037*/
3038
3039/*[clinic input]
3040math.pow
3041
3042 x: double
3043 y: double
3044 /
3045
3046Return x**y (x to the power of y).
3047[clinic start generated code]*/
3048
3049static PyObject *
3050math_pow_impl(PyObject *module, double x, double y)
3051/*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
3052{
3053 double r;
3054 int odd_y;
3055
3056 /* deal directly with IEEE specials, to cope with problems on various
3057 platforms whose semantics don't exactly match C99 */
3058 r = 0.; /* silence compiler warning */
3059 if (!isfinite(x) || !isfinite(y)) {
3060 errno = 0;
3061 if (isnan(x))
3062 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
3063 else if (isnan(y))
3064 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
3065 else if (isinf(x)) {
3066 odd_y = isfinite(y) && fmod(fabs(y), 2.0) == 1.0;
3067 if (y > 0.)
3068 r = odd_y ? x : fabs(x);
3069 else if (y == 0.)
3070 r = 1.;
3071 else /* y < 0. */
3072 r = odd_y ? copysign(0., x) : 0.;
3073 }
3074 else {
3075 assert(isinf(y));
3076 if (fabs(x) == 1.0)
3077 r = 1.;
3078 else if (y > 0. && fabs(x) > 1.0)
3079 r = y;
3080 else if (y < 0. && fabs(x) < 1.0) {
3081 r = -y; /* result is +inf */
3082 }
3083 else
3084 r = 0.;
3085 }
3086 }
3087 else {
3088 /* let libm handle finite**finite */
3089 errno = 0;
3090 r = pow(x, y);
3091 /* a NaN result should arise only from (-ve)**(finite
3092 non-integer); in this case we want to raise ValueError. */
3093 if (!isfinite(r)) {
3094 if (isnan(r)) {
3095 errno = EDOM;
3096 }
3097 /*
3098 an infinite result here arises either from:
3099 (A) (+/-0.)**negative (-> divide-by-zero)
3100 (B) overflow of x**y with x and y finite
3101 */
3102 else if (isinf(r)) {
3103 if (x == 0.)
3104 errno = EDOM;
3105 else
3106 errno = ERANGE;
3107 }
3108 }
3109 }
3110
3111 if (errno && is_error(r, 1))
3112 return NULL;
3113 else
3114 return PyFloat_FromDouble(r);
3115}
3116
3117
3118static const double degToRad = Py_MATH_PI / 180.0;
3119static const double radToDeg = 180.0 / Py_MATH_PI;
3120
3121/*[clinic input]
3122math.degrees
3123
3124 x: double
3125 /
3126
3127Convert angle x from radians to degrees.
3128[clinic start generated code]*/
3129
3130static PyObject *
3131math_degrees_impl(PyObject *module, double x)
3132/*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3133{
3134 return PyFloat_FromDouble(x * radToDeg);
3135}
3136
3137
3138/*[clinic input]
3139math.radians
3140
3141 x: double
3142 /
3143
3144Convert angle x from degrees to radians.
3145[clinic start generated code]*/
3146
3147static PyObject *
3148math_radians_impl(PyObject *module, double x)
3149/*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3150{
3151 return PyFloat_FromDouble(x * degToRad);
3152}
3153
3154
3155/*[clinic input]
3156math.isfinite
3157
3158 x: double
3159 /
3160
3161Return True if x is neither an infinity nor a NaN, and False otherwise.
3162[clinic start generated code]*/
3163
3164static PyObject *
3165math_isfinite_impl(PyObject *module, double x)
3166/*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3167{
3168 return PyBool_FromLong((long)isfinite(x));
3169}
3170
3171
3172/*[clinic input]
3173math.isnormal
3174
3175 x: double
3176 /
3177
3178Return True if x is normal, and False otherwise.
3179[clinic start generated code]*/
3180
3181static PyObject *
3182math_isnormal_impl(PyObject *module, double x)
3183/*[clinic end generated code: output=c7b302b5b89c3541 input=fdaa00c58aa7bc17]*/
3184{
3185 return PyBool_FromLong(isnormal(x));
3186}
3187
3188
3189/*[clinic input]
3190math.issubnormal
3191
3192 x: double
3193 /
3194
3195Return True if x is subnormal, and False otherwise.
3196[clinic start generated code]*/
3197
3198static PyObject *
3199math_issubnormal_impl(PyObject *module, double x)
3200/*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/
3201{
3202#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
3203 return PyBool_FromLong(issubnormal(x));
3204#else
3205 return PyBool_FromLong(isfinite(x) && x && !isnormal(x));
3206#endif
3207}
3208
3209
3210/*[clinic input]
3211math.isnan
3212
3213 x: double
3214 /
3215
3216Return True if x is a NaN (not a number), and False otherwise.
3217[clinic start generated code]*/
3218
3219static PyObject *
3220math_isnan_impl(PyObject *module, double x)
3221/*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3222{
3223 return PyBool_FromLong((long)isnan(x));
3224}
3225
3226
3227/*[clinic input]
3228math.isinf
3229
3230 x: double
3231 /
3232
3233Return True if x is a positive or negative infinity, and False otherwise.
3234[clinic start generated code]*/
3235
3236static PyObject *
3237math_isinf_impl(PyObject *module, double x)
3238/*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3239{
3240 return PyBool_FromLong((long)isinf(x));
3241}
3242
3243
3244/*[clinic input]
3245math.isclose -> bool
3246
3247 a: double
3248 b: double
3249 *
3250 rel_tol: double = 1e-09
3251 maximum difference for being considered "close", relative to the
3252 magnitude of the input values
3253 abs_tol: double = 0.0
3254 maximum difference for being considered "close", regardless of the
3255 magnitude of the input values
3256
3257Determine whether two floating-point numbers are close in value.
3258
3259Return True if a is close in value to b, and False otherwise.
3260
3261For the values to be considered close, the difference between them
3262must be smaller than at least one of the tolerances.
3263
3264-inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3265is, NaN is not close to anything, even itself. inf and -inf are
3266only close to themselves.
3267[clinic start generated code]*/
3268
3269static int
3270math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3271 double abs_tol)
3272/*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
3273{
3274 double diff = 0.0;
3275
3276 /* sanity check on the inputs */
3277 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3278 PyErr_SetString(PyExc_ValueError,
3279 "tolerances must be non-negative");
3280 return -1;
3281 }
3282
3283 if ( a == b ) {
3284 /* short circuit exact equality -- needed to catch two infinities of
3285 the same sign. And perhaps speeds things up a bit sometimes.
3286 */
3287 return 1;
3288 }
3289
3290 /* This catches the case of two infinities of opposite sign, or
3291 one infinity and one finite number. Two infinities of opposite
3292 sign would otherwise have an infinite relative tolerance.
3293 Two infinities of the same sign are caught by the equality check
3294 above.
3295 */
3296
3297 if (isinf(a) || isinf(b)) {
3298 return 0;
3299 }
3300
3301 /* now do the regular computation
3302 this is essentially the "weak" test from the Boost library
3303 */
3304
3305 diff = fabs(b - a);
3306
3307 return (((diff <= fabs(rel_tol * b)) ||
3308 (diff <= fabs(rel_tol * a))) ||
3309 (diff <= abs_tol));
3310}
3311
3312static inline int
3313_check_long_mult_overflow(long a, long b) {
3314
3315 /* From Python2's int_mul code:
3316
3317 Integer overflow checking for * is painful: Python tried a couple ways, but
3318 they didn't work on all platforms, or failed in endcases (a product of
3319 -sys.maxint-1 has been a particular pain).
3320
3321 Here's another way:
3322
3323 The native long product x*y is either exactly right or *way* off, being
3324 just the last n bits of the true product, where n is the number of bits
3325 in a long (the delivered product is the true product plus i*2**n for
3326 some integer i).
3327
3328 The native double product (double)x * (double)y is subject to three
3329 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3330 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3331 But, unlike the native long product, it's not in *range* trouble: even
3332 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3333 dynamic range of a double. So the leading 50 (or so) bits of the double
3334 product are correct.
3335
3336 We check these two ways against each other, and declare victory if they're
3337 approximately the same. Else, because the native long product is the only
3338 one that can lose catastrophic amounts of information, it's the native long
3339 product that must have overflowed.
3340
3341 */
3342
3343 long longprod = (long)((unsigned long)a * b);
3344 double doubleprod = (double)a * (double)b;
3345 double doubled_longprod = (double)longprod;
3346
3347 if (doubled_longprod == doubleprod) {
3348 return 0;
3349 }
3350
3351 const double diff = doubled_longprod - doubleprod;
3352 const double absdiff = diff >= 0.0 ? diff : -diff;
3353 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3354
3355 if (32.0 * absdiff <= absprod) {
3356 return 0;
3357 }
3358
3359 return 1;
3360}
3361
3362/*[clinic input]
3363math.prod
3364
3365 iterable: object
3366 /
3367 *
3368 start: object(c_default="NULL") = 1
3369
3370Calculate the product of all the elements in the input iterable.
3371
3372The default start value for the product is 1.
3373
3374When the iterable is empty, return the start value. This function is
3375intended specifically for use with numeric values and may reject
3376non-numeric types.
3377[clinic start generated code]*/
3378
3379static PyObject *
3380math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3381/*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3382{
3383 PyObject *result = start;
3384 PyObject *temp, *item, *iter;
3385
3386 iter = PyObject_GetIter(iterable);
3387 if (iter == NULL) {
3388 return NULL;
3389 }
3390
3391 if (result == NULL) {
3392 result = _PyLong_GetOne();
3393 }
3394 Py_INCREF(result);
3395#ifndef SLOW_PROD
3396 /* Fast paths for integers keeping temporary products in C.
3397 * Assumes all inputs are the same type.
3398 * If the assumption fails, default to use PyObjects instead.
3399 */
3400 if (PyLong_CheckExact(result)) {
3401 int overflow;
3402 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3403 /* If this already overflowed, don't even enter the loop. */
3404 if (overflow == 0) {
3405 Py_SETREF(result, NULL);
3406 }
3407 /* Loop over all the items in the iterable until we finish, we overflow
3408 * or we found a non integer element */
3409 while (result == NULL) {
3410 item = PyIter_Next(iter);
3411 if (item == NULL) {
3412 Py_DECREF(iter);
3413 if (PyErr_Occurred()) {
3414 return NULL;
3415 }
3416 return PyLong_FromLong(i_result);
3417 }
3418 if (PyLong_CheckExact(item)) {
3419 long b = PyLong_AsLongAndOverflow(item, &overflow);
3420 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3421 long x = i_result * b;
3422 i_result = x;
3423 Py_DECREF(item);
3424 continue;
3425 }
3426 }
3427 /* Either overflowed or is not an int.
3428 * Restore real objects and process normally */
3429 result = PyLong_FromLong(i_result);
3430 if (result == NULL) {
3431 Py_DECREF(item);
3432 Py_DECREF(iter);
3433 return NULL;
3434 }
3435 temp = PyNumber_Multiply(result, item);
3436 Py_DECREF(result);
3437 Py_DECREF(item);
3438 result = temp;
3439 if (result == NULL) {
3440 Py_DECREF(iter);
3441 return NULL;
3442 }
3443 }
3444 }
3445
3446 /* Fast paths for floats keeping temporary products in C.
3447 * Assumes all inputs are the same type.
3448 * If the assumption fails, default to use PyObjects instead.
3449 */
3450 if (PyFloat_CheckExact(result)) {
3451 double f_result = PyFloat_AS_DOUBLE(result);
3452 Py_SETREF(result, NULL);
3453 while(result == NULL) {
3454 item = PyIter_Next(iter);
3455 if (item == NULL) {
3456 Py_DECREF(iter);
3457 if (PyErr_Occurred()) {
3458 return NULL;
3459 }
3460 return PyFloat_FromDouble(f_result);
3461 }
3462 if (PyFloat_CheckExact(item)) {
3463 f_result *= PyFloat_AS_DOUBLE(item);
3464 Py_DECREF(item);
3465 continue;
3466 }
3467 if (PyLong_CheckExact(item)) {
3468 long value;
3469 int overflow;
3470 value = PyLong_AsLongAndOverflow(item, &overflow);
3471 if (!overflow) {
3472 f_result *= (double)value;
3473 Py_DECREF(item);
3474 continue;
3475 }
3476 }
3477 result = PyFloat_FromDouble(f_result);
3478 if (result == NULL) {
3479 Py_DECREF(item);
3480 Py_DECREF(iter);
3481 return NULL;
3482 }
3483 temp = PyNumber_Multiply(result, item);
3484 Py_DECREF(result);
3485 Py_DECREF(item);
3486 result = temp;
3487 if (result == NULL) {
3488 Py_DECREF(iter);
3489 return NULL;
3490 }
3491 }
3492 }
3493#endif
3494 /* Consume rest of the iterable (if any) that could not be handled
3495 * by specialized functions above.*/
3496 for(;;) {
3497 item = PyIter_Next(iter);
3498 if (item == NULL) {
3499 /* error, or end-of-sequence */
3500 if (PyErr_Occurred()) {
3501 Py_SETREF(result, NULL);
3502 }
3503 break;
3504 }
3505 temp = PyNumber_Multiply(result, item);
3506 Py_DECREF(result);
3507 Py_DECREF(item);
3508 result = temp;
3509 if (result == NULL)
3510 break;
3511 }
3512 Py_DECREF(iter);
3513 return result;
3514}
3515
3516
3517/* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3518
3519Python code to generate the values:
3520
3521 import math
3522
3523 for n in range(128):
3524 fac = math.factorial(n)
3525 fac_odd_part = fac // (fac & -fac)
3526 reduced_fac_odd_part = fac_odd_part % (2**64)
3527 print(f"{reduced_fac_odd_part:#018x}u")
3528*/
3529static const uint64_t reduced_factorial_odd_part[] = {
3530 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3531 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3532 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3533 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3534 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3535 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3536 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3537 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3538 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3539 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3540 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3541 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3542 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3543 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3544 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3545 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3546 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3547 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3548 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3549 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3550 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3551 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3552 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3553 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3554 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3555 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3556 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3557 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3558 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3559 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3560 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3561 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3562};
3563
3564/* inverses of reduced_factorial_odd_part values modulo 2**64.
3565
3566Python code to generate the values:
3567
3568 import math
3569
3570 for n in range(128):
3571 fac = math.factorial(n)
3572 fac_odd_part = fac // (fac & -fac)
3573 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3574 print(f"{inverted_fac_odd_part:#018x}u")
3575*/
3576static const uint64_t inverted_factorial_odd_part[] = {
3577 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3578 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3579 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3580 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3581 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3582 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3583 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3584 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3585 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3586 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3587 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3588 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3589 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3590 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3591 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3592 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3593 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3594 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3595 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3596 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3597 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3598 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3599 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3600 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3601 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3602 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3603 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3604 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3605 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3606 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3607 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3608 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3609};
3610
3611/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3612
3613Python code to generate the values:
3614
3615import math
3616
3617for n in range(128):
3618 fac = math.factorial(n)
3619 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3620 print(fac_trailing_zeros)
3621*/
3622
3623static const uint8_t factorial_trailing_zeros[] = {
3624 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3625 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3626 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3627 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3628 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3629 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3630 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3631 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3632};
3633
3634/* Number of permutations and combinations.
3635 * P(n, k) = n! / (n-k)!
3636 * C(n, k) = P(n, k) / k!
3637 */
3638
3639/* Calculate C(n, k) for n in the 63-bit range. */
3640static PyObject *
3641perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3642{
3643 assert(k != 0);
3644
3645 /* For small enough n and k the result fits in the 64-bit range and can
3646 * be calculated without allocating intermediate PyLong objects. */
3647 if (iscomb) {
3648 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3649 * fits into a uint64_t. Exclude k = 1, because the second fast
3650 * path is faster for this case.*/
3651 static const unsigned char fast_comb_limits1[] = {
3652 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3653 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3654 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3655 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3656 67, 67, 67, // 32-34
3657 };
3658 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3659 /*
3660 comb(n, k) fits into a uint64_t. We compute it as
3661
3662 comb_odd_part << shift
3663
3664 where 2**shift is the largest power of two dividing comb(n, k)
3665 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3666 calculated efficiently via arithmetic modulo 2**64, using three
3667 lookups and two uint64_t multiplications.
3668 */
3669 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3670 * inverted_factorial_odd_part[k]
3671 * inverted_factorial_odd_part[n - k];
3672 int shift = factorial_trailing_zeros[n]
3673 - factorial_trailing_zeros[k]
3674 - factorial_trailing_zeros[n - k];
3675 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3676 }
3677
3678 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3679 * fits into a long long (which is at least 64 bit). Only contains
3680 * items larger than in fast_comb_limits1. */
3681 static const unsigned long long fast_comb_limits2[] = {
3682 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3683 746, 453, 308, 227, 178, 147, // 8-13
3684 };
3685 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3686 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3687 unsigned long long result = n;
3688 for (unsigned long long i = 1; i < k;) {
3689 result *= --n;
3690 result /= ++i;
3691 }
3692 return PyLong_FromUnsignedLongLong(result);
3693 }
3694 }
3695 else {
3696 /* Maps k to the maximal n so that k <= n and P(n, k)
3697 * fits into a long long (which is at least 64 bit). */
3698 static const unsigned long long fast_perm_limits[] = {
3699 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3700 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3701 24, 22, 21, 20, 20, // 16-20
3702 };
3703 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3704 if (n <= 127) {
3705 /* P(n, k) fits into a uint64_t. */
3706 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3707 * inverted_factorial_odd_part[n - k];
3708 int shift = factorial_trailing_zeros[n]
3709 - factorial_trailing_zeros[n - k];
3710 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3711 }
3712
3713 /* P(n, k) = P(n, k-1) * (n-k+1) */
3714 unsigned long long result = n;
3715 for (unsigned long long i = 1; i < k;) {
3716 result *= --n;
3717 ++i;
3718 }
3719 return PyLong_FromUnsignedLongLong(result);
3720 }
3721 }
3722
3723 /* For larger n use recursive formulas:
3724 *
3725 * P(n, k) = P(n, j) * P(n-j, k-j)
3726 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3727 */
3728 unsigned long long j = k / 2;
3729 PyObject *a, *b;
3730 a = perm_comb_small(n, j, iscomb);
3731 if (a == NULL) {
3732 return NULL;
3733 }
3734 b = perm_comb_small(n - j, k - j, iscomb);
3735 if (b == NULL) {
3736 goto error;
3737 }
3738 Py_SETREF(a, PyNumber_Multiply(a, b));
3739 Py_DECREF(b);
3740 if (iscomb && a != NULL) {
3741 b = perm_comb_small(k, j, 1);
3742 if (b == NULL) {
3743 goto error;
3744 }
3745 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3746 Py_DECREF(b);
3747 }
3748 return a;
3749
3750error:
3751 Py_DECREF(a);
3752 return NULL;
3753}
3754
3755/* Calculate P(n, k) or C(n, k) using recursive formulas.
3756 * It is more efficient than sequential multiplication thanks to
3757 * Karatsuba multiplication.
3758 */
3759static PyObject *
3760perm_comb(PyObject *n, unsigned long long k, int iscomb)
3761{
3762 if (k == 0) {
3763 return PyLong_FromLong(1);
3764 }
3765 if (k == 1) {
3766 return Py_NewRef(n);
3767 }
3768
3769 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3770 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3771 unsigned long long j = k / 2;
3772 PyObject *a, *b;
3773 a = perm_comb(n, j, iscomb);
3774 if (a == NULL) {
3775 return NULL;
3776 }
3777 PyObject *t = PyLong_FromUnsignedLongLong(j);
3778 if (t == NULL) {
3779 goto error;
3780 }
3781 n = PyNumber_Subtract(n, t);
3782 Py_DECREF(t);
3783 if (n == NULL) {
3784 goto error;
3785 }
3786 b = perm_comb(n, k - j, iscomb);
3787 Py_DECREF(n);
3788 if (b == NULL) {
3789 goto error;
3790 }
3791 Py_SETREF(a, PyNumber_Multiply(a, b));
3792 Py_DECREF(b);
3793 if (iscomb && a != NULL) {
3794 b = perm_comb_small(k, j, 1);
3795 if (b == NULL) {
3796 goto error;
3797 }
3798 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3799 Py_DECREF(b);
3800 }
3801 return a;
3802
3803error:
3804 Py_DECREF(a);
3805 return NULL;
3806}
3807
3808/*[clinic input]
3809math.perm
3810
3811 n: object
3812 k: object = None
3813 /
3814
3815Number of ways to choose k items from n items without repetition and with order.
3816
3817Evaluates to n! / (n - k)! when k <= n and evaluates
3818to zero when k > n.
3819
3820If k is not specified or is None, then k defaults to n
3821and the function returns n!.
3822
3823Raises TypeError if either of the arguments are not integers.
3824Raises ValueError if either of the arguments are negative.
3825[clinic start generated code]*/
3826
3827static PyObject *
3828math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3829/*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3830{
3831 PyObject *result = NULL;
3832 int overflow, cmp;
3833 long long ki, ni;
3834
3835 if (k == Py_None) {
3836 return math_factorial(module, n);
3837 }
3838 n = PyNumber_Index(n);
3839 if (n == NULL) {
3840 return NULL;
3841 }
3842 k = PyNumber_Index(k);
3843 if (k == NULL) {
3844 Py_DECREF(n);
3845 return NULL;
3846 }
3847 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3848
3849 if (_PyLong_IsNegative((PyLongObject *)n)) {
3850 PyErr_SetString(PyExc_ValueError,
3851 "n must be a non-negative integer");
3852 goto error;
3853 }
3854 if (_PyLong_IsNegative((PyLongObject *)k)) {
3855 PyErr_SetString(PyExc_ValueError,
3856 "k must be a non-negative integer");
3857 goto error;
3858 }
3859
3860 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3861 if (cmp != 0) {
3862 if (cmp > 0) {
3863 result = PyLong_FromLong(0);
3864 goto done;
3865 }
3866 goto error;
3867 }
3868
3869 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3870 assert(overflow >= 0 && !PyErr_Occurred());
3871 if (overflow > 0) {
3872 PyErr_Format(PyExc_OverflowError,
3873 "k must not exceed %lld",
3874 LLONG_MAX);
3875 goto error;
3876 }
3877 assert(ki >= 0);
3878
3879 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3880 assert(overflow >= 0 && !PyErr_Occurred());
3881 if (!overflow && ki > 1) {
3882 assert(ni >= 0);
3883 result = perm_comb_small((unsigned long long)ni,
3884 (unsigned long long)ki, 0);
3885 }
3886 else {
3887 result = perm_comb(n, (unsigned long long)ki, 0);
3888 }
3889
3890done:
3891 Py_DECREF(n);
3892 Py_DECREF(k);
3893 return result;
3894
3895error:
3896 Py_DECREF(n);
3897 Py_DECREF(k);
3898 return NULL;
3899}
3900
3901/*[clinic input]
3902math.comb
3903
3904 n: object
3905 k: object
3906 /
3907
3908Number of ways to choose k items from n items without repetition and without order.
3909
3910Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3911to zero when k > n.
3912
3913Also called the binomial coefficient because it is equivalent
3914to the coefficient of k-th term in polynomial expansion of the
3915expression (1 + x)**n.
3916
3917Raises TypeError if either of the arguments are not integers.
3918Raises ValueError if either of the arguments are negative.
3919
3920[clinic start generated code]*/
3921
3922static PyObject *
3923math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3924/*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3925{
3926 PyObject *result = NULL, *temp;
3927 int overflow, cmp;
3928 long long ki, ni;
3929
3930 n = PyNumber_Index(n);
3931 if (n == NULL) {
3932 return NULL;
3933 }
3934 k = PyNumber_Index(k);
3935 if (k == NULL) {
3936 Py_DECREF(n);
3937 return NULL;
3938 }
3939 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3940
3941 if (_PyLong_IsNegative((PyLongObject *)n)) {
3942 PyErr_SetString(PyExc_ValueError,
3943 "n must be a non-negative integer");
3944 goto error;
3945 }
3946 if (_PyLong_IsNegative((PyLongObject *)k)) {
3947 PyErr_SetString(PyExc_ValueError,
3948 "k must be a non-negative integer");
3949 goto error;
3950 }
3951
3952 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3953 assert(overflow >= 0 && !PyErr_Occurred());
3954 if (!overflow) {
3955 assert(ni >= 0);
3956 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3957 assert(overflow >= 0 && !PyErr_Occurred());
3958 if (overflow || ki > ni) {
3959 result = PyLong_FromLong(0);
3960 goto done;
3961 }
3962 assert(ki >= 0);
3963
3964 ki = Py_MIN(ki, ni - ki);
3965 if (ki > 1) {
3966 result = perm_comb_small((unsigned long long)ni,
3967 (unsigned long long)ki, 1);
3968 goto done;
3969 }
3970 /* For k == 1 just return the original n in perm_comb(). */
3971 }
3972 else {
3973 /* k = min(k, n - k) */
3974 temp = PyNumber_Subtract(n, k);
3975 if (temp == NULL) {
3976 goto error;
3977 }
3978 assert(PyLong_Check(temp));
3979 if (_PyLong_IsNegative((PyLongObject *)temp)) {
3980 Py_DECREF(temp);
3981 result = PyLong_FromLong(0);
3982 goto done;
3983 }
3984 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3985 if (cmp > 0) {
3986 Py_SETREF(k, temp);
3987 }
3988 else {
3989 Py_DECREF(temp);
3990 if (cmp < 0) {
3991 goto error;
3992 }
3993 }
3994
3995 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3996 assert(overflow >= 0 && !PyErr_Occurred());
3997 if (overflow) {
3998 PyErr_Format(PyExc_OverflowError,
3999 "min(n - k, k) must not exceed %lld",
4000 LLONG_MAX);
4001 goto error;
4002 }
4003 assert(ki >= 0);
4004 }
4005
4006 result = perm_comb(n, (unsigned long long)ki, 1);
4007
4008done:
4009 Py_DECREF(n);
4010 Py_DECREF(k);
4011 return result;
4012
4013error:
4014 Py_DECREF(n);
4015 Py_DECREF(k);
4016 return NULL;
4017}
4018
4019
4020/*[clinic input]
4021math.nextafter
4022
4023 x: double
4024 y: double
4025 /
4026 *
4027 steps: object = None
4028
4029Return the floating-point value the given number of steps after x towards y.
4030
4031If steps is not specified or is None, it defaults to 1.
4032
4033Raises a TypeError, if x or y is not a double, or if steps is not an integer.
4034Raises ValueError if steps is negative.
4035[clinic start generated code]*/
4036
4037static PyObject *
4038math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
4039/*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
4040{
4041#if defined(_AIX)
4042 if (x == y) {
4043 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
4044 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
4045 return PyFloat_FromDouble(y);
4046 }
4047 if (isnan(x)) {
4048 return PyFloat_FromDouble(x);
4049 }
4050 if (isnan(y)) {
4051 return PyFloat_FromDouble(y);
4052 }
4053#endif
4054 if (steps == Py_None) {
4055 // fast path: we default to one step.
4056 return PyFloat_FromDouble(nextafter(x, y));
4057 }
4058 steps = PyNumber_Index(steps);
4059 if (steps == NULL) {
4060 return NULL;
4061 }
4062 assert(PyLong_CheckExact(steps));
4063 if (_PyLong_IsNegative((PyLongObject *)steps)) {
4064 PyErr_SetString(PyExc_ValueError,
4065 "steps must be a non-negative integer");
4066 Py_DECREF(steps);
4067 return NULL;
4068 }
4069
4070 unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
4071 // Conveniently, uint64_t and double have the same number of bits
4072 // on all the platforms we care about.
4073 // So if an overflow occurs, we can just use UINT64_MAX.
4074 Py_DECREF(steps);
4075 if (usteps_ull >= UINT64_MAX) {
4076 // This branch includes the case where an error occurred, since
4077 // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
4078 // usteps_ull can be strictly larger than UINT64_MAX on a machine
4079 // where unsigned long long has width > 64 bits.
4080 if (PyErr_Occurred()) {
4081 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
4082 PyErr_Clear();
4083 }
4084 else {
4085 return NULL;
4086 }
4087 }
4088 usteps_ull = UINT64_MAX;
4089 }
4090 assert(usteps_ull <= UINT64_MAX);
4091 uint64_t usteps = (uint64_t)usteps_ull;
4092
4093 if (usteps == 0) {
4094 return PyFloat_FromDouble(x);
4095 }
4096 if (isnan(x)) {
4097 return PyFloat_FromDouble(x);
4098 }
4099 if (isnan(y)) {
4100 return PyFloat_FromDouble(y);
4101 }
4102
4103 // We assume that double and uint64_t have the same endianness.
4104 // This is not guaranteed by the C-standard, but it is true for
4105 // all platforms we care about. (The most likely form of violation
4106 // would be a "mixed-endian" double.)
4107 union pun {double f; uint64_t i;};
4108 union pun ux = {x}, uy = {y};
4109 if (ux.i == uy.i) {
4110 return PyFloat_FromDouble(x);
4111 }
4112
4113 const uint64_t sign_bit = 1ULL<<63;
4114
4115 uint64_t ax = ux.i & ~sign_bit;
4116 uint64_t ay = uy.i & ~sign_bit;
4117
4118 // opposite signs
4119 if (((ux.i ^ uy.i) & sign_bit)) {
4120 // NOTE: ax + ay can never overflow, because their most significant bit
4121 // ain't set.
4122 if (ax + ay <= usteps) {
4123 return PyFloat_FromDouble(uy.f);
4124 // This comparison has to use <, because <= would get +0.0 vs -0.0
4125 // wrong.
4126 } else if (ax < usteps) {
4127 union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
4128 return PyFloat_FromDouble(result.f);
4129 } else {
4130 ux.i -= usteps;
4131 return PyFloat_FromDouble(ux.f);
4132 }
4133 // same sign
4134 } else if (ax > ay) {
4135 if (ax - ay >= usteps) {
4136 ux.i -= usteps;
4137 return PyFloat_FromDouble(ux.f);
4138 } else {
4139 return PyFloat_FromDouble(uy.f);
4140 }
4141 } else {
4142 if (ay - ax >= usteps) {
4143 ux.i += usteps;
4144 return PyFloat_FromDouble(ux.f);
4145 } else {
4146 return PyFloat_FromDouble(uy.f);
4147 }
4148 }
4149}
4150
4151
4152/*[clinic input]
4153math.ulp -> double
4154
4155 x: double
4156 /
4157
4158Return the value of the least significant bit of the float x.
4159[clinic start generated code]*/
4160
4161static double
4162math_ulp_impl(PyObject *module, double x)
4163/*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4164{
4165 if (isnan(x)) {
4166 return x;
4167 }
4168 x = fabs(x);
4169 if (isinf(x)) {
4170 return x;
4171 }
4172 double inf = Py_INFINITY;
4173 double x2 = nextafter(x, inf);
4174 if (isinf(x2)) {
4175 /* special case: x is the largest positive representable float */
4176 x2 = nextafter(x, -inf);
4177 return x - x2;
4178 }
4179 return x2 - x;
4180}
4181
4182static int
4183math_exec(PyObject *module)
4184{
4185
4186 if (PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
4187 return -1;
4188 }
4189 if (PyModule_Add(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
4190 return -1;
4191 }
4192 // 2pi
4193 if (PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
4194 return -1;
4195 }
4196 if (PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
4197 return -1;
4198 }
4199 if (PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
4200 return -1;
4201 }
4202 return 0;
4203}
4204
4205static PyMethodDef math_methods[] = {
4206 {"acos", math_acos, METH_O, math_acos_doc},
4207 {"acosh", math_acosh, METH_O, math_acosh_doc},
4208 {"asin", math_asin, METH_O, math_asin_doc},
4209 {"asinh", math_asinh, METH_O, math_asinh_doc},
4210 {"atan", math_atan, METH_O, math_atan_doc},
4211 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
4212 {"atanh", math_atanh, METH_O, math_atanh_doc},
4213 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
4214 MATH_CEIL_METHODDEF
4215 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
4216 {"cos", math_cos, METH_O, math_cos_doc},
4217 {"cosh", math_cosh, METH_O, math_cosh_doc},
4218 MATH_DEGREES_METHODDEF
4219 MATH_DIST_METHODDEF
4220 {"erf", math_erf, METH_O, math_erf_doc},
4221 {"erfc", math_erfc, METH_O, math_erfc_doc},
4222 {"exp", math_exp, METH_O, math_exp_doc},
4223 {"exp2", math_exp2, METH_O, math_exp2_doc},
4224 {"expm1", math_expm1, METH_O, math_expm1_doc},
4225 {"fabs", math_fabs, METH_O, math_fabs_doc},
4226 MATH_FACTORIAL_METHODDEF
4227 MATH_FLOOR_METHODDEF
4228 MATH_FMA_METHODDEF
4229 MATH_FMAX_METHODDEF
4230 MATH_FMOD_METHODDEF
4231 MATH_FMIN_METHODDEF
4232 MATH_FREXP_METHODDEF
4233 MATH_FSUM_METHODDEF
4234 {"gamma", math_gamma, METH_O, math_gamma_doc},
4235 MATH_GCD_METHODDEF
4236 MATH_HYPOT_METHODDEF
4237 MATH_ISCLOSE_METHODDEF
4238 MATH_ISFINITE_METHODDEF
4239 MATH_ISNORMAL_METHODDEF
4240 MATH_ISSUBNORMAL_METHODDEF
4241 MATH_ISINF_METHODDEF
4242 MATH_ISNAN_METHODDEF
4243 MATH_ISQRT_METHODDEF
4244 MATH_LCM_METHODDEF
4245 MATH_LDEXP_METHODDEF
4246 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
4247 {"log", _PyCFunction_CAST(math_log), METH_FASTCALL, math_log_doc},
4248 {"log1p", math_log1p, METH_O, math_log1p_doc},
4249 MATH_LOG10_METHODDEF
4250 MATH_LOG2_METHODDEF
4251 MATH_MODF_METHODDEF
4252 MATH_POW_METHODDEF
4253 MATH_RADIANS_METHODDEF
4254 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
4255 MATH_SIGNBIT_METHODDEF
4256 {"sin", math_sin, METH_O, math_sin_doc},
4257 {"sinh", math_sinh, METH_O, math_sinh_doc},
4258 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
4259 {"tan", math_tan, METH_O, math_tan_doc},
4260 {"tanh", math_tanh, METH_O, math_tanh_doc},
4261 MATH_SUMPROD_METHODDEF
4262 MATH_TRUNC_METHODDEF
4263 MATH_PROD_METHODDEF
4264 MATH_PERM_METHODDEF
4265 MATH_COMB_METHODDEF
4266 MATH_NEXTAFTER_METHODDEF
4267 MATH_ULP_METHODDEF
4268 {NULL, NULL} /* sentinel */
4269};
4270
4271static PyModuleDef_Slot math_slots[] = {
4272 {Py_mod_exec, math_exec},
4273 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
4274 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
4275 {0, NULL}
4276};
4277
4278PyDoc_STRVAR(module_doc,
4279"This module provides access to the mathematical functions\n"
4280"defined by the C standard.");
4281
4282static struct PyModuleDef mathmodule = {
4283 PyModuleDef_HEAD_INIT,
4284 .m_name = "math",
4285 .m_doc = module_doc,
4286 .m_size = 0,
4287 .m_methods = math_methods,
4288 .m_slots = math_slots,
4289};
4290
4291PyMODINIT_FUNC
4292PyInit_math(void)
4293{
4294 return PyModuleDef_Init(&mathmodule);
4295}