]> git.ipfire.org Git - thirdparty/Python/cpython.git/blob - Modules/mathmodule.c
48cd9a642de1503474f09ceda23497f82890f08d
[thirdparty/Python/cpython.git] / Modules / mathmodule.c
1 /* Math module -- standard C math library functions, pi and e */
2
3 /* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9 These are the "spirit of 754" rules:
10
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign). This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions. In that specific case, the
25 sign of the zero determines the result of 1/0.
26
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33
34 For #1, raise OverflowError.
35
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38
39 For #3 and #4, raise ValueError. It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42
43 */
44
45 /*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
55 #ifndef Py_BUILD_CORE_BUILTIN
56 # define Py_BUILD_CORE_MODULE 1
57 #endif
58
59 #include "Python.h"
60 #include "pycore_bitutils.h" // _Py_bit_length()
61 #include "pycore_call.h" // _PyObject_CallNoArgs()
62 #include "pycore_dtoa.h" // _Py_dg_infinity()
63 #include "pycore_long.h" // _PyLong_GetZero()
64 #include "pycore_moduleobject.h" // _PyModule_GetState()
65 #include "pycore_object.h" // _PyObject_LookupSpecial()
66 #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
67 /* For DBL_EPSILON in _math.h */
68 #include <float.h>
69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
70 #include "_math.h"
71 #include <stdbool.h>
72
73 #include "clinic/mathmodule.c.h"
74
75 /*[clinic input]
76 module math
77 [clinic start generated code]*/
78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79
80
81 typedef struct {
82 PyObject *str___ceil__;
83 PyObject *str___floor__;
84 PyObject *str___trunc__;
85 } math_module_state;
86
87 static inline math_module_state*
88 get_math_module_state(PyObject *module)
89 {
90 void *state = _PyModule_GetState(module);
91 assert(state != NULL);
92 return (math_module_state *)state;
93 }
94
95 /*
96 Double and triple length extended precision algorithms from:
97
98 Accurate Sum and Dot Product
99 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
100 https://doi.org/10.1137/030601818
101 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
102
103 */
104
105 typedef struct{ double hi; double lo; } DoubleLength;
106
107 static DoubleLength
108 dl_fast_sum(double a, double b)
109 {
110 /* Algorithm 1.1. Compensated summation of two floating point numbers. */
111 assert(fabs(a) >= fabs(b));
112 double x = a + b;
113 double y = (a - x) + b;
114 return (DoubleLength) {x, y};
115 }
116
117 static DoubleLength
118 dl_sum(double a, double b)
119 {
120 /* Algorithm 3.1 Error-free transformation of the sum */
121 double x = a + b;
122 double z = x - a;
123 double y = (a - (x - z)) + (b - z);
124 return (DoubleLength) {x, y};
125 }
126
127 #ifndef UNRELIABLE_FMA
128
129 static DoubleLength
130 dl_mul(double x, double y)
131 {
132 /* Algorithm 3.5. Error-free transformation of a product */
133 double z = x * y;
134 double zz = fma(x, y, -z);
135 return (DoubleLength) {z, zz};
136 }
137
138 #else
139
140 /*
141 The default implementation of dl_mul() depends on the C math library
142 having an accurate fma() function as required by § 7.12.13.1 of the
143 C99 standard.
144
145 The UNRELIABLE_FMA option is provided as a slower but accurate
146 alternative for builds where the fma() function is found wanting.
147 The speed penalty may be modest (17% slower on an Apple M1 Max),
148 so don't hesitate to enable this build option.
149
150 The algorithms are from the T. J. Dekker paper:
151 A Floating-Point Technique for Extending the Available Precision
152 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
153 */
154
155 static DoubleLength
156 dl_split(double x) {
157 // Dekker (5.5) and (5.6).
158 double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
159 double hi = t - (t - x);
160 double lo = x - hi;
161 return (DoubleLength) {hi, lo};
162 }
163
164 static DoubleLength
165 dl_mul(double x, double y)
166 {
167 // Dekker (5.12) and mul12()
168 DoubleLength xx = dl_split(x);
169 DoubleLength yy = dl_split(y);
170 double p = xx.hi * yy.hi;
171 double q = xx.hi * yy.lo + xx.lo * yy.hi;
172 double z = p + q;
173 double zz = p - z + q + xx.lo * yy.lo;
174 return (DoubleLength) {z, zz};
175 }
176
177 #endif
178
179 typedef struct { double hi; double lo; double tiny; } TripleLength;
180
181 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
182
183 static TripleLength
184 tl_fma(double x, double y, TripleLength total)
185 {
186 /* Algorithm 5.10 with SumKVert for K=3 */
187 DoubleLength pr = dl_mul(x, y);
188 DoubleLength sm = dl_sum(total.hi, pr.hi);
189 DoubleLength r1 = dl_sum(total.lo, pr.lo);
190 DoubleLength r2 = dl_sum(r1.hi, sm.lo);
191 return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
192 }
193
194 static double
195 tl_to_d(TripleLength total)
196 {
197 DoubleLength last = dl_sum(total.lo, total.hi);
198 return total.tiny + last.lo + last.hi;
199 }
200
201
202 /*
203 sin(pi*x), giving accurate results for all finite x (especially x
204 integral or close to an integer). This is here for use in the
205 reflection formula for the gamma function. It conforms to IEEE
206 754-2008 for finite arguments, but not for infinities or nans.
207 */
208
209 static const double pi = 3.141592653589793238462643383279502884197;
210 static const double logpi = 1.144729885849400174143427351353058711647;
211
212 /* Version of PyFloat_AsDouble() with in-line fast paths
213 for exact floats and integers. Gives a substantial
214 speed improvement for extracting float arguments.
215 */
216
217 #define ASSIGN_DOUBLE(target_var, obj, error_label) \
218 if (PyFloat_CheckExact(obj)) { \
219 target_var = PyFloat_AS_DOUBLE(obj); \
220 } \
221 else if (PyLong_CheckExact(obj)) { \
222 target_var = PyLong_AsDouble(obj); \
223 if (target_var == -1.0 && PyErr_Occurred()) { \
224 goto error_label; \
225 } \
226 } \
227 else { \
228 target_var = PyFloat_AsDouble(obj); \
229 if (target_var == -1.0 && PyErr_Occurred()) { \
230 goto error_label; \
231 } \
232 }
233
234 static double
235 m_sinpi(double x)
236 {
237 double y, r;
238 int n;
239 /* this function should only ever be called for finite arguments */
240 assert(Py_IS_FINITE(x));
241 y = fmod(fabs(x), 2.0);
242 n = (int)round(2.0*y);
243 assert(0 <= n && n <= 4);
244 switch (n) {
245 case 0:
246 r = sin(pi*y);
247 break;
248 case 1:
249 r = cos(pi*(y-0.5));
250 break;
251 case 2:
252 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
253 -0.0 instead of 0.0 when y == 1.0. */
254 r = sin(pi*(1.0-y));
255 break;
256 case 3:
257 r = -cos(pi*(y-1.5));
258 break;
259 case 4:
260 r = sin(pi*(y-2.0));
261 break;
262 default:
263 Py_UNREACHABLE();
264 }
265 return copysign(1.0, x)*r;
266 }
267
268 /* Implementation of the real gamma function. Kept here to work around
269 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
270 on various platforms (Windows, MacOS). In extensive but non-exhaustive
271 random tests, this function proved accurate to within <= 10 ulps across the
272 entire float domain. Note that accuracy may depend on the quality of the
273 system math functions, the pow function in particular. Special cases
274 follow C99 annex F. The parameters and method are tailored to platforms
275 whose double format is the IEEE 754 binary64 format.
276
277 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
278 and g=6.024680040776729583740234375; these parameters are amongst those
279 used by the Boost library. Following Boost (again), we re-express the
280 Lanczos sum as a rational function, and compute it that way. The
281 coefficients below were computed independently using MPFR, and have been
282 double-checked against the coefficients in the Boost source code.
283
284 For x < 0.0 we use the reflection formula.
285
286 There's one minor tweak that deserves explanation: Lanczos' formula for
287 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
288 values, x+g-0.5 can be represented exactly. However, in cases where it
289 can't be represented exactly the small error in x+g-0.5 can be magnified
290 significantly by the pow and exp calls, especially for large x. A cheap
291 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
292 involved in the computation of x+g-0.5 (that is, e = computed value of
293 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
294
295 Correction factor
296 -----------------
297 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
298 double, and e is tiny. Then:
299
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
301 = pow(y, x-0.5)/exp(y) * C,
302
303 where the correction_factor C is given by
304
305 C = pow(1-e/y, x-0.5) * exp(e)
306
307 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
308
309 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
310
311 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
312
313 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
314
315 Note that for accuracy, when computing r*C it's better to do
316
317 r + e*g/y*r;
318
319 than
320
321 r * (1 + e*g/y);
322
323 since the addition in the latter throws away most of the bits of
324 information in e*g/y.
325 */
326
327 #define LANCZOS_N 13
328 static const double lanczos_g = 6.024680040776729583740234375;
329 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
330 static const double lanczos_num_coeffs[LANCZOS_N] = {
331 23531376880.410759688572007674451636754734846804940,
332 42919803642.649098768957899047001988850926355848959,
333 35711959237.355668049440185451547166705960488635843,
334 17921034426.037209699919755754458931112671403265390,
335 6039542586.3520280050642916443072979210699388420708,
336 1439720407.3117216736632230727949123939715485786772,
337 248874557.86205415651146038641322942321632125127801,
338 31426415.585400194380614231628318205362874684987640,
339 2876370.6289353724412254090516208496135991145378768,
340 186056.26539522349504029498971604569928220784236328,
341 8071.6720023658162106380029022722506138218516325024,
342 210.82427775157934587250973392071336271166969580291,
343 2.5066282746310002701649081771338373386264310793408
344 };
345
346 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
347 static const double lanczos_den_coeffs[LANCZOS_N] = {
348 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
349 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
350
351 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
352 #define NGAMMA_INTEGRAL 23
353 static const double gamma_integral[NGAMMA_INTEGRAL] = {
354 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
355 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
356 1307674368000.0, 20922789888000.0, 355687428096000.0,
357 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
358 51090942171709440000.0, 1124000727777607680000.0,
359 };
360
361 /* Lanczos' sum L_g(x), for positive x */
362
363 static double
364 lanczos_sum(double x)
365 {
366 double num = 0.0, den = 0.0;
367 int i;
368 assert(x > 0.0);
369 /* evaluate the rational function lanczos_sum(x). For large
370 x, the obvious algorithm risks overflow, so we instead
371 rescale the denominator and numerator of the rational
372 function by x**(1-LANCZOS_N) and treat this as a
373 rational function in 1/x. This also reduces the error for
374 larger x values. The choice of cutoff point (5.0 below) is
375 somewhat arbitrary; in tests, smaller cutoff values than
376 this resulted in lower accuracy. */
377 if (x < 5.0) {
378 for (i = LANCZOS_N; --i >= 0; ) {
379 num = num * x + lanczos_num_coeffs[i];
380 den = den * x + lanczos_den_coeffs[i];
381 }
382 }
383 else {
384 for (i = 0; i < LANCZOS_N; i++) {
385 num = num / x + lanczos_num_coeffs[i];
386 den = den / x + lanczos_den_coeffs[i];
387 }
388 }
389 return num/den;
390 }
391
392 /* Constant for +infinity, generated in the same way as float('inf'). */
393
394 static double
395 m_inf(void)
396 {
397 #if _PY_SHORT_FLOAT_REPR == 1
398 return _Py_dg_infinity(0);
399 #else
400 return Py_HUGE_VAL;
401 #endif
402 }
403
404 /* Constant nan value, generated in the same way as float('nan'). */
405 /* We don't currently assume that Py_NAN is defined everywhere. */
406
407 #if _PY_SHORT_FLOAT_REPR == 1
408
409 static double
410 m_nan(void)
411 {
412 #if _PY_SHORT_FLOAT_REPR == 1
413 return _Py_dg_stdnan(0);
414 #else
415 return Py_NAN;
416 #endif
417 }
418
419 #endif
420
421 static double
422 m_tgamma(double x)
423 {
424 double absx, r, y, z, sqrtpow;
425
426 /* special cases */
427 if (!Py_IS_FINITE(x)) {
428 if (Py_IS_NAN(x) || x > 0.0)
429 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
430 else {
431 errno = EDOM;
432 return Py_NAN; /* tgamma(-inf) = nan, invalid */
433 }
434 }
435 if (x == 0.0) {
436 errno = EDOM;
437 /* tgamma(+-0.0) = +-inf, divide-by-zero */
438 return copysign(Py_HUGE_VAL, x);
439 }
440
441 /* integer arguments */
442 if (x == floor(x)) {
443 if (x < 0.0) {
444 errno = EDOM; /* tgamma(n) = nan, invalid for */
445 return Py_NAN; /* negative integers n */
446 }
447 if (x <= NGAMMA_INTEGRAL)
448 return gamma_integral[(int)x - 1];
449 }
450 absx = fabs(x);
451
452 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
453 if (absx < 1e-20) {
454 r = 1.0/x;
455 if (Py_IS_INFINITY(r))
456 errno = ERANGE;
457 return r;
458 }
459
460 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
461 x > 200, and underflows to +-0.0 for x < -200, not a negative
462 integer. */
463 if (absx > 200.0) {
464 if (x < 0.0) {
465 return 0.0/m_sinpi(x);
466 }
467 else {
468 errno = ERANGE;
469 return Py_HUGE_VAL;
470 }
471 }
472
473 y = absx + lanczos_g_minus_half;
474 /* compute error in sum */
475 if (absx > lanczos_g_minus_half) {
476 /* note: the correction can be foiled by an optimizing
477 compiler that (incorrectly) thinks that an expression like
478 a + b - a - b can be optimized to 0.0. This shouldn't
479 happen in a standards-conforming compiler. */
480 double q = y - absx;
481 z = q - lanczos_g_minus_half;
482 }
483 else {
484 double q = y - lanczos_g_minus_half;
485 z = q - absx;
486 }
487 z = z * lanczos_g / y;
488 if (x < 0.0) {
489 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
490 r -= z * r;
491 if (absx < 140.0) {
492 r /= pow(y, absx - 0.5);
493 }
494 else {
495 sqrtpow = pow(y, absx / 2.0 - 0.25);
496 r /= sqrtpow;
497 r /= sqrtpow;
498 }
499 }
500 else {
501 r = lanczos_sum(absx) / exp(y);
502 r += z * r;
503 if (absx < 140.0) {
504 r *= pow(y, absx - 0.5);
505 }
506 else {
507 sqrtpow = pow(y, absx / 2.0 - 0.25);
508 r *= sqrtpow;
509 r *= sqrtpow;
510 }
511 }
512 if (Py_IS_INFINITY(r))
513 errno = ERANGE;
514 return r;
515 }
516
517 /*
518 lgamma: natural log of the absolute value of the Gamma function.
519 For large arguments, Lanczos' formula works extremely well here.
520 */
521
522 static double
523 m_lgamma(double x)
524 {
525 double r;
526 double absx;
527
528 /* special cases */
529 if (!Py_IS_FINITE(x)) {
530 if (Py_IS_NAN(x))
531 return x; /* lgamma(nan) = nan */
532 else
533 return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
534 }
535
536 /* integer arguments */
537 if (x == floor(x) && x <= 2.0) {
538 if (x <= 0.0) {
539 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
540 return Py_HUGE_VAL; /* integers n <= 0 */
541 }
542 else {
543 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
544 }
545 }
546
547 absx = fabs(x);
548 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
549 if (absx < 1e-20)
550 return -log(absx);
551
552 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
553 having a second set of numerator coefficients for lanczos_sum that
554 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
555 subtraction below; it's probably not worth it. */
556 r = log(lanczos_sum(absx)) - lanczos_g;
557 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
558 if (x < 0.0)
559 /* Use reflection formula to get value for negative x. */
560 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
561 if (Py_IS_INFINITY(r))
562 errno = ERANGE;
563 return r;
564 }
565
566 /*
567 wrapper for atan2 that deals directly with special cases before
568 delegating to the platform libm for the remaining cases. This
569 is necessary to get consistent behaviour across platforms.
570 Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
571 always follow C99.
572 */
573
574 static double
575 m_atan2(double y, double x)
576 {
577 if (Py_IS_NAN(x) || Py_IS_NAN(y))
578 return Py_NAN;
579 if (Py_IS_INFINITY(y)) {
580 if (Py_IS_INFINITY(x)) {
581 if (copysign(1., x) == 1.)
582 /* atan2(+-inf, +inf) == +-pi/4 */
583 return copysign(0.25*Py_MATH_PI, y);
584 else
585 /* atan2(+-inf, -inf) == +-pi*3/4 */
586 return copysign(0.75*Py_MATH_PI, y);
587 }
588 /* atan2(+-inf, x) == +-pi/2 for finite x */
589 return copysign(0.5*Py_MATH_PI, y);
590 }
591 if (Py_IS_INFINITY(x) || y == 0.) {
592 if (copysign(1., x) == 1.)
593 /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
594 return copysign(0., y);
595 else
596 /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
597 return copysign(Py_MATH_PI, y);
598 }
599 return atan2(y, x);
600 }
601
602
603 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
604 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
605 binary floating-point format, the result is always exact. */
606
607 static double
608 m_remainder(double x, double y)
609 {
610 /* Deal with most common case first. */
611 if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
612 double absx, absy, c, m, r;
613
614 if (y == 0.0) {
615 return Py_NAN;
616 }
617
618 absx = fabs(x);
619 absy = fabs(y);
620 m = fmod(absx, absy);
621
622 /*
623 Warning: some subtlety here. What we *want* to know at this point is
624 whether the remainder m is less than, equal to, or greater than half
625 of absy. However, we can't do that comparison directly because we
626 can't be sure that 0.5*absy is representable (the multiplication
627 might incur precision loss due to underflow). So instead we compare
628 m with the complement c = absy - m: m < 0.5*absy if and only if m <
629 c, and so on. The catch is that absy - m might also not be
630 representable, but it turns out that it doesn't matter:
631
632 - if m > 0.5*absy then absy - m is exactly representable, by
633 Sterbenz's lemma, so m > c
634 - if m == 0.5*absy then again absy - m is exactly representable
635 and m == c
636 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
637 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
638 c, or (ii) absy is tiny, either subnormal or in the lowest normal
639 binade. Then absy - m is exactly representable and again m < c.
640 */
641
642 c = absy - m;
643 if (m < c) {
644 r = m;
645 }
646 else if (m > c) {
647 r = -c;
648 }
649 else {
650 /*
651 Here absx is exactly halfway between two multiples of absy,
652 and we need to choose the even multiple. x now has the form
653
654 absx = n * absy + m
655
656 for some integer n (recalling that m = 0.5*absy at this point).
657 If n is even we want to return m; if n is odd, we need to
658 return -m.
659
660 So
661
662 0.5 * (absx - m) = (n/2) * absy
663
664 and now reducing modulo absy gives us:
665
666 | m, if n is odd
667 fmod(0.5 * (absx - m), absy) = |
668 | 0, if n is even
669
670 Now m - 2.0 * fmod(...) gives the desired result: m
671 if n is even, -m if m is odd.
672
673 Note that all steps in fmod(0.5 * (absx - m), absy)
674 will be computed exactly, with no rounding error
675 introduced.
676 */
677 assert(m == c);
678 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
679 }
680 return copysign(1.0, x) * r;
681 }
682
683 /* Special values. */
684 if (Py_IS_NAN(x)) {
685 return x;
686 }
687 if (Py_IS_NAN(y)) {
688 return y;
689 }
690 if (Py_IS_INFINITY(x)) {
691 return Py_NAN;
692 }
693 assert(Py_IS_INFINITY(y));
694 return x;
695 }
696
697
698 /*
699 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
700 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
701 special values directly, passing positive non-special values through to
702 the system log/log10.
703 */
704
705 static double
706 m_log(double x)
707 {
708 if (Py_IS_FINITE(x)) {
709 if (x > 0.0)
710 return log(x);
711 errno = EDOM;
712 if (x == 0.0)
713 return -Py_HUGE_VAL; /* log(0) = -inf */
714 else
715 return Py_NAN; /* log(-ve) = nan */
716 }
717 else if (Py_IS_NAN(x))
718 return x; /* log(nan) = nan */
719 else if (x > 0.0)
720 return x; /* log(inf) = inf */
721 else {
722 errno = EDOM;
723 return Py_NAN; /* log(-inf) = nan */
724 }
725 }
726
727 /*
728 log2: log to base 2.
729
730 Uses an algorithm that should:
731
732 (a) produce exact results for powers of 2, and
733 (b) give a monotonic log2 (for positive finite floats),
734 assuming that the system log is monotonic.
735 */
736
737 static double
738 m_log2(double x)
739 {
740 if (!Py_IS_FINITE(x)) {
741 if (Py_IS_NAN(x))
742 return x; /* log2(nan) = nan */
743 else if (x > 0.0)
744 return x; /* log2(+inf) = +inf */
745 else {
746 errno = EDOM;
747 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
748 }
749 }
750
751 if (x > 0.0) {
752 return log2(x);
753 }
754 else if (x == 0.0) {
755 errno = EDOM;
756 return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
757 }
758 else {
759 errno = EDOM;
760 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
761 }
762 }
763
764 static double
765 m_log10(double x)
766 {
767 if (Py_IS_FINITE(x)) {
768 if (x > 0.0)
769 return log10(x);
770 errno = EDOM;
771 if (x == 0.0)
772 return -Py_HUGE_VAL; /* log10(0) = -inf */
773 else
774 return Py_NAN; /* log10(-ve) = nan */
775 }
776 else if (Py_IS_NAN(x))
777 return x; /* log10(nan) = nan */
778 else if (x > 0.0)
779 return x; /* log10(inf) = inf */
780 else {
781 errno = EDOM;
782 return Py_NAN; /* log10(-inf) = nan */
783 }
784 }
785
786
787 static PyObject *
788 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
789 {
790 PyObject *res, *x;
791 Py_ssize_t i;
792
793 if (nargs == 0) {
794 return PyLong_FromLong(0);
795 }
796 res = PyNumber_Index(args[0]);
797 if (res == NULL) {
798 return NULL;
799 }
800 if (nargs == 1) {
801 Py_SETREF(res, PyNumber_Absolute(res));
802 return res;
803 }
804
805 PyObject *one = _PyLong_GetOne(); // borrowed ref
806 for (i = 1; i < nargs; i++) {
807 x = _PyNumber_Index(args[i]);
808 if (x == NULL) {
809 Py_DECREF(res);
810 return NULL;
811 }
812 if (res == one) {
813 /* Fast path: just check arguments.
814 It is okay to use identity comparison here. */
815 Py_DECREF(x);
816 continue;
817 }
818 Py_SETREF(res, _PyLong_GCD(res, x));
819 Py_DECREF(x);
820 if (res == NULL) {
821 return NULL;
822 }
823 }
824 return res;
825 }
826
827 PyDoc_STRVAR(math_gcd_doc,
828 "gcd($module, *integers)\n"
829 "--\n"
830 "\n"
831 "Greatest Common Divisor.");
832
833
834 static PyObject *
835 long_lcm(PyObject *a, PyObject *b)
836 {
837 PyObject *g, *m, *f, *ab;
838
839 if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
840 return PyLong_FromLong(0);
841 }
842 g = _PyLong_GCD(a, b);
843 if (g == NULL) {
844 return NULL;
845 }
846 f = PyNumber_FloorDivide(a, g);
847 Py_DECREF(g);
848 if (f == NULL) {
849 return NULL;
850 }
851 m = PyNumber_Multiply(f, b);
852 Py_DECREF(f);
853 if (m == NULL) {
854 return NULL;
855 }
856 ab = PyNumber_Absolute(m);
857 Py_DECREF(m);
858 return ab;
859 }
860
861
862 static PyObject *
863 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
864 {
865 PyObject *res, *x;
866 Py_ssize_t i;
867
868 if (nargs == 0) {
869 return PyLong_FromLong(1);
870 }
871 res = PyNumber_Index(args[0]);
872 if (res == NULL) {
873 return NULL;
874 }
875 if (nargs == 1) {
876 Py_SETREF(res, PyNumber_Absolute(res));
877 return res;
878 }
879
880 PyObject *zero = _PyLong_GetZero(); // borrowed ref
881 for (i = 1; i < nargs; i++) {
882 x = PyNumber_Index(args[i]);
883 if (x == NULL) {
884 Py_DECREF(res);
885 return NULL;
886 }
887 if (res == zero) {
888 /* Fast path: just check arguments.
889 It is okay to use identity comparison here. */
890 Py_DECREF(x);
891 continue;
892 }
893 Py_SETREF(res, long_lcm(res, x));
894 Py_DECREF(x);
895 if (res == NULL) {
896 return NULL;
897 }
898 }
899 return res;
900 }
901
902
903 PyDoc_STRVAR(math_lcm_doc,
904 "lcm($module, *integers)\n"
905 "--\n"
906 "\n"
907 "Least Common Multiple.");
908
909
910 /* Call is_error when errno != 0, and where x is the result libm
911 * returned. is_error will usually set up an exception and return
912 * true (1), but may return false (0) without setting up an exception.
913 */
914 static int
915 is_error(double x)
916 {
917 int result = 1; /* presumption of guilt */
918 assert(errno); /* non-zero errno is a precondition for calling */
919 if (errno == EDOM)
920 PyErr_SetString(PyExc_ValueError, "math domain error");
921
922 else if (errno == ERANGE) {
923 /* ANSI C generally requires libm functions to set ERANGE
924 * on overflow, but also generally *allows* them to set
925 * ERANGE on underflow too. There's no consistency about
926 * the latter across platforms.
927 * Alas, C99 never requires that errno be set.
928 * Here we suppress the underflow errors (libm functions
929 * should return a zero on underflow, and +- HUGE_VAL on
930 * overflow, so testing the result for zero suffices to
931 * distinguish the cases).
932 *
933 * On some platforms (Ubuntu/ia64) it seems that errno can be
934 * set to ERANGE for subnormal results that do *not* underflow
935 * to zero. So to be safe, we'll ignore ERANGE whenever the
936 * function result is less than 1.5 in absolute value.
937 *
938 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
939 * are correctly detected, since the function may underflow
940 * toward -1.0 rather than 0.0.
941 */
942 if (fabs(x) < 1.5)
943 result = 0;
944 else
945 PyErr_SetString(PyExc_OverflowError,
946 "math range error");
947 }
948 else
949 /* Unexpected math error */
950 PyErr_SetFromErrno(PyExc_ValueError);
951 return result;
952 }
953
954 /*
955 math_1 is used to wrap a libm function f that takes a double
956 argument and returns a double.
957
958 The error reporting follows these rules, which are designed to do
959 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
960 platforms.
961
962 - a NaN result from non-NaN inputs causes ValueError to be raised
963 - an infinite result from finite inputs causes OverflowError to be
964 raised if can_overflow is 1, or raises ValueError if can_overflow
965 is 0.
966 - if the result is finite and errno == EDOM then ValueError is
967 raised
968 - if the result is finite and nonzero and errno == ERANGE then
969 OverflowError is raised
970
971 The last rule is used to catch overflow on platforms which follow
972 C89 but for which HUGE_VAL is not an infinity.
973
974 For the majority of one-argument functions these rules are enough
975 to ensure that Python's functions behave as specified in 'Annex F'
976 of the C99 standard, with the 'invalid' and 'divide-by-zero'
977 floating-point exceptions mapping to Python's ValueError and the
978 'overflow' floating-point exception mapping to OverflowError.
979 math_1 only works for functions that don't have singularities *and*
980 the possibility of overflow; fortunately, that covers everything we
981 care about right now.
982 */
983
984 static PyObject *
985 math_1(PyObject *arg, double (*func) (double), int can_overflow)
986 {
987 double x, r;
988 x = PyFloat_AsDouble(arg);
989 if (x == -1.0 && PyErr_Occurred())
990 return NULL;
991 errno = 0;
992 r = (*func)(x);
993 if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
994 PyErr_SetString(PyExc_ValueError,
995 "math domain error"); /* invalid arg */
996 return NULL;
997 }
998 if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
999 if (can_overflow)
1000 PyErr_SetString(PyExc_OverflowError,
1001 "math range error"); /* overflow */
1002 else
1003 PyErr_SetString(PyExc_ValueError,
1004 "math domain error"); /* singularity */
1005 return NULL;
1006 }
1007 if (Py_IS_FINITE(r) && errno && is_error(r))
1008 /* this branch unnecessary on most platforms */
1009 return NULL;
1010
1011 return PyFloat_FromDouble(r);
1012 }
1013
1014 /* variant of math_1, to be used when the function being wrapped is known to
1015 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1016 errno = ERANGE for overflow). */
1017
1018 static PyObject *
1019 math_1a(PyObject *arg, double (*func) (double))
1020 {
1021 double x, r;
1022 x = PyFloat_AsDouble(arg);
1023 if (x == -1.0 && PyErr_Occurred())
1024 return NULL;
1025 errno = 0;
1026 r = (*func)(x);
1027 if (errno && is_error(r))
1028 return NULL;
1029 return PyFloat_FromDouble(r);
1030 }
1031
1032 /*
1033 math_2 is used to wrap a libm function f that takes two double
1034 arguments and returns a double.
1035
1036 The error reporting follows these rules, which are designed to do
1037 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1038 platforms.
1039
1040 - a NaN result from non-NaN inputs causes ValueError to be raised
1041 - an infinite result from finite inputs causes OverflowError to be
1042 raised.
1043 - if the result is finite and errno == EDOM then ValueError is
1044 raised
1045 - if the result is finite and nonzero and errno == ERANGE then
1046 OverflowError is raised
1047
1048 The last rule is used to catch overflow on platforms which follow
1049 C89 but for which HUGE_VAL is not an infinity.
1050
1051 For most two-argument functions (copysign, fmod, hypot, atan2)
1052 these rules are enough to ensure that Python's functions behave as
1053 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1054 'divide-by-zero' floating-point exceptions mapping to Python's
1055 ValueError and the 'overflow' floating-point exception mapping to
1056 OverflowError.
1057 */
1058
1059 static PyObject *
1060 math_2(PyObject *const *args, Py_ssize_t nargs,
1061 double (*func) (double, double), const char *funcname)
1062 {
1063 double x, y, r;
1064 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1065 return NULL;
1066 x = PyFloat_AsDouble(args[0]);
1067 if (x == -1.0 && PyErr_Occurred()) {
1068 return NULL;
1069 }
1070 y = PyFloat_AsDouble(args[1]);
1071 if (y == -1.0 && PyErr_Occurred()) {
1072 return NULL;
1073 }
1074 errno = 0;
1075 r = (*func)(x, y);
1076 if (Py_IS_NAN(r)) {
1077 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1078 errno = EDOM;
1079 else
1080 errno = 0;
1081 }
1082 else if (Py_IS_INFINITY(r)) {
1083 if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1084 errno = ERANGE;
1085 else
1086 errno = 0;
1087 }
1088 if (errno && is_error(r))
1089 return NULL;
1090 else
1091 return PyFloat_FromDouble(r);
1092 }
1093
1094 #define FUNC1(funcname, func, can_overflow, docstring) \
1095 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1096 return math_1(args, func, can_overflow); \
1097 }\
1098 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1099
1100 #define FUNC1A(funcname, func, docstring) \
1101 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1102 return math_1a(args, func); \
1103 }\
1104 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1105
1106 #define FUNC2(funcname, func, docstring) \
1107 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1108 return math_2(args, nargs, func, #funcname); \
1109 }\
1110 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1111
1112 FUNC1(acos, acos, 0,
1113 "acos($module, x, /)\n--\n\n"
1114 "Return the arc cosine (measured in radians) of x.\n\n"
1115 "The result is between 0 and pi.")
1116 FUNC1(acosh, acosh, 0,
1117 "acosh($module, x, /)\n--\n\n"
1118 "Return the inverse hyperbolic cosine of x.")
1119 FUNC1(asin, asin, 0,
1120 "asin($module, x, /)\n--\n\n"
1121 "Return the arc sine (measured in radians) of x.\n\n"
1122 "The result is between -pi/2 and pi/2.")
1123 FUNC1(asinh, asinh, 0,
1124 "asinh($module, x, /)\n--\n\n"
1125 "Return the inverse hyperbolic sine of x.")
1126 FUNC1(atan, atan, 0,
1127 "atan($module, x, /)\n--\n\n"
1128 "Return the arc tangent (measured in radians) of x.\n\n"
1129 "The result is between -pi/2 and pi/2.")
1130 FUNC2(atan2, m_atan2,
1131 "atan2($module, y, x, /)\n--\n\n"
1132 "Return the arc tangent (measured in radians) of y/x.\n\n"
1133 "Unlike atan(y/x), the signs of both x and y are considered.")
1134 FUNC1(atanh, atanh, 0,
1135 "atanh($module, x, /)\n--\n\n"
1136 "Return the inverse hyperbolic tangent of x.")
1137 FUNC1(cbrt, cbrt, 0,
1138 "cbrt($module, x, /)\n--\n\n"
1139 "Return the cube root of x.")
1140
1141 /*[clinic input]
1142 math.ceil
1143
1144 x as number: object
1145 /
1146
1147 Return the ceiling of x as an Integral.
1148
1149 This is the smallest integer >= x.
1150 [clinic start generated code]*/
1151
1152 static PyObject *
1153 math_ceil(PyObject *module, PyObject *number)
1154 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1155 {
1156
1157 if (!PyFloat_CheckExact(number)) {
1158 math_module_state *state = get_math_module_state(module);
1159 PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
1160 if (method != NULL) {
1161 PyObject *result = _PyObject_CallNoArgs(method);
1162 Py_DECREF(method);
1163 return result;
1164 }
1165 if (PyErr_Occurred())
1166 return NULL;
1167 }
1168 double x = PyFloat_AsDouble(number);
1169 if (x == -1.0 && PyErr_Occurred())
1170 return NULL;
1171
1172 return PyLong_FromDouble(ceil(x));
1173 }
1174
1175 FUNC2(copysign, copysign,
1176 "copysign($module, x, y, /)\n--\n\n"
1177 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1178 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1179 "returns -1.0.\n")
1180 FUNC1(cos, cos, 0,
1181 "cos($module, x, /)\n--\n\n"
1182 "Return the cosine of x (measured in radians).")
1183 FUNC1(cosh, cosh, 1,
1184 "cosh($module, x, /)\n--\n\n"
1185 "Return the hyperbolic cosine of x.")
1186 FUNC1A(erf, erf,
1187 "erf($module, x, /)\n--\n\n"
1188 "Error function at x.")
1189 FUNC1A(erfc, erfc,
1190 "erfc($module, x, /)\n--\n\n"
1191 "Complementary error function at x.")
1192 FUNC1(exp, exp, 1,
1193 "exp($module, x, /)\n--\n\n"
1194 "Return e raised to the power of x.")
1195 FUNC1(exp2, exp2, 1,
1196 "exp2($module, x, /)\n--\n\n"
1197 "Return 2 raised to the power of x.")
1198 FUNC1(expm1, expm1, 1,
1199 "expm1($module, x, /)\n--\n\n"
1200 "Return exp(x)-1.\n\n"
1201 "This function avoids the loss of precision involved in the direct "
1202 "evaluation of exp(x)-1 for small x.")
1203 FUNC1(fabs, fabs, 0,
1204 "fabs($module, x, /)\n--\n\n"
1205 "Return the absolute value of the float x.")
1206
1207 /*[clinic input]
1208 math.floor
1209
1210 x as number: object
1211 /
1212
1213 Return the floor of x as an Integral.
1214
1215 This is the largest integer <= x.
1216 [clinic start generated code]*/
1217
1218 static PyObject *
1219 math_floor(PyObject *module, PyObject *number)
1220 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1221 {
1222 double x;
1223
1224 if (PyFloat_CheckExact(number)) {
1225 x = PyFloat_AS_DOUBLE(number);
1226 }
1227 else
1228 {
1229 math_module_state *state = get_math_module_state(module);
1230 PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
1231 if (method != NULL) {
1232 PyObject *result = _PyObject_CallNoArgs(method);
1233 Py_DECREF(method);
1234 return result;
1235 }
1236 if (PyErr_Occurred())
1237 return NULL;
1238 x = PyFloat_AsDouble(number);
1239 if (x == -1.0 && PyErr_Occurred())
1240 return NULL;
1241 }
1242 return PyLong_FromDouble(floor(x));
1243 }
1244
1245 FUNC1A(gamma, m_tgamma,
1246 "gamma($module, x, /)\n--\n\n"
1247 "Gamma function at x.")
1248 FUNC1A(lgamma, m_lgamma,
1249 "lgamma($module, x, /)\n--\n\n"
1250 "Natural logarithm of absolute value of Gamma function at x.")
1251 FUNC1(log1p, m_log1p, 0,
1252 "log1p($module, x, /)\n--\n\n"
1253 "Return the natural logarithm of 1+x (base e).\n\n"
1254 "The result is computed in a way which is accurate for x near zero.")
1255 FUNC2(remainder, m_remainder,
1256 "remainder($module, x, y, /)\n--\n\n"
1257 "Difference between x and the closest integer multiple of y.\n\n"
1258 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1259 "In the case where x is exactly halfway between two multiples of\n"
1260 "y, the nearest even value of n is used. The result is always exact.")
1261 FUNC1(sin, sin, 0,
1262 "sin($module, x, /)\n--\n\n"
1263 "Return the sine of x (measured in radians).")
1264 FUNC1(sinh, sinh, 1,
1265 "sinh($module, x, /)\n--\n\n"
1266 "Return the hyperbolic sine of x.")
1267 FUNC1(sqrt, sqrt, 0,
1268 "sqrt($module, x, /)\n--\n\n"
1269 "Return the square root of x.")
1270 FUNC1(tan, tan, 0,
1271 "tan($module, x, /)\n--\n\n"
1272 "Return the tangent of x (measured in radians).")
1273 FUNC1(tanh, tanh, 0,
1274 "tanh($module, x, /)\n--\n\n"
1275 "Return the hyperbolic tangent of x.")
1276
1277 /* Precision summation function as msum() by Raymond Hettinger in
1278 <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1279 enhanced with the exact partials sum and roundoff from Mark
1280 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1281 See those links for more details, proofs and other references.
1282
1283 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1284 roundTiesToEven are assumed.
1285
1286 Note 2: No provision is made for intermediate overflow handling;
1287 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1288 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1289 overflow of the first partial sum.
1290
1291 Note 3: The algorithm has two potential sources of fragility. First, C
1292 permits arithmetic operations on `double`s to be performed in an
1293 intermediate format whose range and precision may be greater than those of
1294 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1295 example on machines using the now largely historical x87 FPUs. In this case,
1296 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1297 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1298 should be safe from this source of errors. Second, an aggressively
1299 optimizing compiler can re-associate operations so that (for example) the
1300 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1301 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1302 re-association would be in violation of the C standard, and should not occur
1303 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1304 -fassociative-math). Such optimizations should be avoided for this module.
1305
1306 Note 4: The signature of math.fsum() differs from builtins.sum()
1307 because the start argument doesn't make sense in the context of
1308 accurate summation. Since the partials table is collapsed before
1309 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1310 accurate result returned by sum(itertools.chain(seq1, seq2)).
1311 */
1312
1313 #define NUM_PARTIALS 32 /* initial partials array size, on stack */
1314
1315 /* Extend the partials array p[] by doubling its size. */
1316 static int /* non-zero on error */
1317 _fsum_realloc(double **p_ptr, Py_ssize_t n,
1318 double *ps, Py_ssize_t *m_ptr)
1319 {
1320 void *v = NULL;
1321 Py_ssize_t m = *m_ptr;
1322
1323 m += m; /* double */
1324 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1325 double *p = *p_ptr;
1326 if (p == ps) {
1327 v = PyMem_Malloc(sizeof(double) * m);
1328 if (v != NULL)
1329 memcpy(v, ps, sizeof(double) * n);
1330 }
1331 else
1332 v = PyMem_Realloc(p, sizeof(double) * m);
1333 }
1334 if (v == NULL) { /* size overflow or no memory */
1335 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1336 return 1;
1337 }
1338 *p_ptr = (double*) v;
1339 *m_ptr = m;
1340 return 0;
1341 }
1342
1343 /* Full precision summation of a sequence of floats.
1344
1345 def msum(iterable):
1346 partials = [] # sorted, non-overlapping partial sums
1347 for x in iterable:
1348 i = 0
1349 for y in partials:
1350 if abs(x) < abs(y):
1351 x, y = y, x
1352 hi = x + y
1353 lo = y - (hi - x)
1354 if lo:
1355 partials[i] = lo
1356 i += 1
1357 x = hi
1358 partials[i:] = [x]
1359 return sum_exact(partials)
1360
1361 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1362 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1363 partial so that the list of partial sums remains exact.
1364
1365 Sum_exact() adds the partial sums exactly and correctly rounds the final
1366 result (using the round-half-to-even rule). The items in partials remain
1367 non-zero, non-special, non-overlapping and strictly increasing in
1368 magnitude, but possibly not all having the same sign.
1369
1370 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1371 */
1372
1373 /*[clinic input]
1374 math.fsum
1375
1376 seq: object
1377 /
1378
1379 Return an accurate floating point sum of values in the iterable seq.
1380
1381 Assumes IEEE-754 floating point arithmetic.
1382 [clinic start generated code]*/
1383
1384 static PyObject *
1385 math_fsum(PyObject *module, PyObject *seq)
1386 /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
1387 {
1388 PyObject *item, *iter, *sum = NULL;
1389 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1390 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1391 double xsave, special_sum = 0.0, inf_sum = 0.0;
1392 double hi, yr, lo = 0.0;
1393
1394 iter = PyObject_GetIter(seq);
1395 if (iter == NULL)
1396 return NULL;
1397
1398 for(;;) { /* for x in iterable */
1399 assert(0 <= n && n <= m);
1400 assert((m == NUM_PARTIALS && p == ps) ||
1401 (m > NUM_PARTIALS && p != NULL));
1402
1403 item = PyIter_Next(iter);
1404 if (item == NULL) {
1405 if (PyErr_Occurred())
1406 goto _fsum_error;
1407 break;
1408 }
1409 ASSIGN_DOUBLE(x, item, error_with_item);
1410 Py_DECREF(item);
1411
1412 xsave = x;
1413 for (i = j = 0; j < n; j++) { /* for y in partials */
1414 y = p[j];
1415 if (fabs(x) < fabs(y)) {
1416 t = x; x = y; y = t;
1417 }
1418 hi = x + y;
1419 yr = hi - x;
1420 lo = y - yr;
1421 if (lo != 0.0)
1422 p[i++] = lo;
1423 x = hi;
1424 }
1425
1426 n = i; /* ps[i:] = [x] */
1427 if (x != 0.0) {
1428 if (! Py_IS_FINITE(x)) {
1429 /* a nonfinite x could arise either as
1430 a result of intermediate overflow, or
1431 as a result of a nan or inf in the
1432 summands */
1433 if (Py_IS_FINITE(xsave)) {
1434 PyErr_SetString(PyExc_OverflowError,
1435 "intermediate overflow in fsum");
1436 goto _fsum_error;
1437 }
1438 if (Py_IS_INFINITY(xsave))
1439 inf_sum += xsave;
1440 special_sum += xsave;
1441 /* reset partials */
1442 n = 0;
1443 }
1444 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1445 goto _fsum_error;
1446 else
1447 p[n++] = x;
1448 }
1449 }
1450
1451 if (special_sum != 0.0) {
1452 if (Py_IS_NAN(inf_sum))
1453 PyErr_SetString(PyExc_ValueError,
1454 "-inf + inf in fsum");
1455 else
1456 sum = PyFloat_FromDouble(special_sum);
1457 goto _fsum_error;
1458 }
1459
1460 hi = 0.0;
1461 if (n > 0) {
1462 hi = p[--n];
1463 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1464 inexact. */
1465 while (n > 0) {
1466 x = hi;
1467 y = p[--n];
1468 assert(fabs(y) < fabs(x));
1469 hi = x + y;
1470 yr = hi - x;
1471 lo = y - yr;
1472 if (lo != 0.0)
1473 break;
1474 }
1475 /* Make half-even rounding work across multiple partials.
1476 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1477 digit to two instead of down to zero (the 1e-16 makes the 1
1478 slightly closer to two). With a potential 1 ULP rounding
1479 error fixed-up, math.fsum() can guarantee commutativity. */
1480 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1481 (lo > 0.0 && p[n-1] > 0.0))) {
1482 y = lo * 2.0;
1483 x = hi + y;
1484 yr = x - hi;
1485 if (y == yr)
1486 hi = x;
1487 }
1488 }
1489 sum = PyFloat_FromDouble(hi);
1490
1491 _fsum_error:
1492 Py_DECREF(iter);
1493 if (p != ps)
1494 PyMem_Free(p);
1495 return sum;
1496
1497 error_with_item:
1498 Py_DECREF(item);
1499 goto _fsum_error;
1500 }
1501
1502 #undef NUM_PARTIALS
1503
1504
1505 static unsigned long
1506 count_set_bits(unsigned long n)
1507 {
1508 unsigned long count = 0;
1509 while (n != 0) {
1510 ++count;
1511 n &= n - 1; /* clear least significant bit */
1512 }
1513 return count;
1514 }
1515
1516 /* Integer square root
1517
1518 Given a nonnegative integer `n`, we want to compute the largest integer
1519 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1520 square root of `n`.
1521
1522 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1523 a positive integer `n`, the algorithm produces at each iteration an integer
1524 approximation `a` to the square root of `n >> s` for some even integer `s`,
1525 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1526 zero and we have an approximation to the square root of `n` itself.
1527
1528 At every step, the approximation `a` is strictly within 1.0 of the true square
1529 root, so we have
1530
1531 (a - 1)**2 < (n >> s) < (a + 1)**2
1532
1533 After the final iteration, a check-and-correct step is needed to determine
1534 whether `a` or `a - 1` gives the desired integer square root of `n`.
1535
1536 The algorithm is remarkable in its simplicity. There's no need for a
1537 per-iteration check-and-correct step, and termination is straightforward: the
1538 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1539 for `n > 1`). The only tricky part of the correctness proof is in establishing
1540 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1541 iteration to the next. A sketch of the proof of this is given below.
1542
1543 In addition to the proof sketch, a formal, computer-verified proof
1544 of correctness (using Lean) of an equivalent recursive algorithm can be found
1545 here:
1546
1547 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1548
1549
1550 Here's Python code equivalent to the C implementation below:
1551
1552 def isqrt(n):
1553 """
1554 Return the integer part of the square root of the input.
1555 """
1556 n = operator.index(n)
1557
1558 if n < 0:
1559 raise ValueError("isqrt() argument must be nonnegative")
1560 if n == 0:
1561 return 0
1562
1563 c = (n.bit_length() - 1) // 2
1564 a = 1
1565 d = 0
1566 for s in reversed(range(c.bit_length())):
1567 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1568 e = d
1569 d = c >> s
1570 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1571
1572 return a - (a*a > n)
1573
1574
1575 Sketch of proof of correctness
1576 ------------------------------
1577
1578 The delicate part of the correctness proof is showing that the loop invariant
1579 is preserved from one iteration to the next. That is, just before the line
1580
1581 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1582
1583 is executed in the above code, we know that
1584
1585 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1586
1587 (since `e` is always the value of `d` from the previous iteration). We must
1588 prove that after that line is executed, we have
1589
1590 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1591
1592 To facilitate the proof, we make some changes of notation. Write `m` for
1593 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1594
1595 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1596
1597 or equivalently:
1598
1599 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1600
1601 Then we can rewrite (1) as:
1602
1603 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1604
1605 and we must show that (b - 1)**2 < m < (b + 1)**2.
1606
1607 From this point on, we switch to mathematical notation, so `/` means exact
1608 division rather than integer division and `^` is used for exponentiation. We
1609 use the `√` symbol for the exact square root. In (3), we can remove the
1610 implicit floor operation to give:
1611
1612 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1613
1614 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1615
1616 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1617
1618 Squaring and dividing through by `2^(d-e+1) a` gives
1619
1620 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1621
1622 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1623 right-hand side of (6) with `1`, and now replacing the central
1624 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1625
1626 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1627
1628 Or equivalently, from (2):
1629
1630 (7) -1 < b - √m < 1
1631
1632 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1633 to prove.
1634
1635 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1636 a` that was used to get line (7) above. From the definition of `c`, we have
1637 `4^c <= n`, which implies
1638
1639 (8) 4^d <= m
1640
1641 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1642 that `2d - 2e - 1 <= d` and hence that
1643
1644 (9) 4^(2d - 2e - 1) <= m
1645
1646 Dividing both sides by `4^(d - e)` gives
1647
1648 (10) 4^(d - e - 1) <= m / 4^(d - e)
1649
1650 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1651
1652 (11) 4^(d - e - 1) < (a + 1)^2
1653
1654 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1655 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1656 completes the proof sketch.
1657
1658 */
1659
1660 /*
1661 The _approximate_isqrt_tab table provides approximate square roots for
1662 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1663
1664 a = _approximate_isqrt_tab[(n >> 8) - 64]
1665
1666 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1667
1668 The table was computed in Python using the expression:
1669
1670 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1671 */
1672
1673 static const uint8_t _approximate_isqrt_tab[192] = {
1674 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1675 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1676 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1677 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1678 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1679 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1680 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1681 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1682 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1683 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1684 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1685 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1686 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1687 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1688 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1689 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1690 };
1691
1692 /* Approximate square root of a large 64-bit integer.
1693
1694 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1695 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1696
1697 static inline uint32_t
1698 _approximate_isqrt(uint64_t n)
1699 {
1700 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1701 u = (u << 7) + (uint32_t)(n >> 41) / u;
1702 return (u << 15) + (uint32_t)((n >> 17) / u);
1703 }
1704
1705 /*[clinic input]
1706 math.isqrt
1707
1708 n: object
1709 /
1710
1711 Return the integer part of the square root of the input.
1712 [clinic start generated code]*/
1713
1714 static PyObject *
1715 math_isqrt(PyObject *module, PyObject *n)
1716 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1717 {
1718 int a_too_large, c_bit_length;
1719 size_t c, d;
1720 uint64_t m;
1721 uint32_t u;
1722 PyObject *a = NULL, *b;
1723
1724 n = _PyNumber_Index(n);
1725 if (n == NULL) {
1726 return NULL;
1727 }
1728
1729 if (_PyLong_Sign(n) < 0) {
1730 PyErr_SetString(
1731 PyExc_ValueError,
1732 "isqrt() argument must be nonnegative");
1733 goto error;
1734 }
1735 if (_PyLong_Sign(n) == 0) {
1736 Py_DECREF(n);
1737 return PyLong_FromLong(0);
1738 }
1739
1740 /* c = (n.bit_length() - 1) // 2 */
1741 c = _PyLong_NumBits(n);
1742 if (c == (size_t)(-1)) {
1743 goto error;
1744 }
1745 c = (c - 1U) / 2U;
1746
1747 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1748 fast, almost branch-free algorithm. */
1749 if (c <= 31U) {
1750 int shift = 31 - (int)c;
1751 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1752 Py_DECREF(n);
1753 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1754 return NULL;
1755 }
1756 u = _approximate_isqrt(m << 2*shift) >> shift;
1757 u -= (uint64_t)u * u > m;
1758 return PyLong_FromUnsignedLong(u);
1759 }
1760
1761 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1762 arithmetic, then switch to using Python long integers. */
1763
1764 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1765 c_bit_length = 6;
1766 while ((c >> c_bit_length) > 0U) {
1767 ++c_bit_length;
1768 }
1769
1770 /* Initialise d and a. */
1771 d = c >> (c_bit_length - 5);
1772 b = _PyLong_Rshift(n, 2U*c - 62U);
1773 if (b == NULL) {
1774 goto error;
1775 }
1776 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1777 Py_DECREF(b);
1778 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1779 goto error;
1780 }
1781 u = _approximate_isqrt(m) >> (31U - d);
1782 a = PyLong_FromUnsignedLong(u);
1783 if (a == NULL) {
1784 goto error;
1785 }
1786
1787 for (int s = c_bit_length - 6; s >= 0; --s) {
1788 PyObject *q;
1789 size_t e = d;
1790
1791 d = c >> s;
1792
1793 /* q = (n >> 2*c - e - d + 1) // a */
1794 q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1795 if (q == NULL) {
1796 goto error;
1797 }
1798 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1799 if (q == NULL) {
1800 goto error;
1801 }
1802
1803 /* a = (a << d - 1 - e) + q */
1804 Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1805 if (a == NULL) {
1806 Py_DECREF(q);
1807 goto error;
1808 }
1809 Py_SETREF(a, PyNumber_Add(a, q));
1810 Py_DECREF(q);
1811 if (a == NULL) {
1812 goto error;
1813 }
1814 }
1815
1816 /* The correct result is either a or a - 1. Figure out which, and
1817 decrement a if necessary. */
1818
1819 /* a_too_large = n < a * a */
1820 b = PyNumber_Multiply(a, a);
1821 if (b == NULL) {
1822 goto error;
1823 }
1824 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1825 Py_DECREF(b);
1826 if (a_too_large == -1) {
1827 goto error;
1828 }
1829
1830 if (a_too_large) {
1831 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1832 }
1833 Py_DECREF(n);
1834 return a;
1835
1836 error:
1837 Py_XDECREF(a);
1838 Py_DECREF(n);
1839 return NULL;
1840 }
1841
1842 /* Divide-and-conquer factorial algorithm
1843 *
1844 * Based on the formula and pseudo-code provided at:
1845 * http://www.luschny.de/math/factorial/binarysplitfact.html
1846 *
1847 * Faster algorithms exist, but they're more complicated and depend on
1848 * a fast prime factorization algorithm.
1849 *
1850 * Notes on the algorithm
1851 * ----------------------
1852 *
1853 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1854 * computed separately, and then combined using a left shift.
1855 *
1856 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1857 * odd divisor) of factorial(n), using the formula:
1858 *
1859 * factorial_odd_part(n) =
1860 *
1861 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1862 *
1863 * Example: factorial_odd_part(20) =
1864 *
1865 * (1) *
1866 * (1) *
1867 * (1 * 3 * 5) *
1868 * (1 * 3 * 5 * 7 * 9) *
1869 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1870 *
1871 * Here i goes from large to small: the first term corresponds to i=4 (any
1872 * larger i gives an empty product), and the last term corresponds to i=0.
1873 * Each term can be computed from the last by multiplying by the extra odd
1874 * numbers required: e.g., to get from the penultimate term to the last one,
1875 * we multiply by (11 * 13 * 15 * 17 * 19).
1876 *
1877 * To see a hint of why this formula works, here are the same numbers as above
1878 * but with the even parts (i.e., the appropriate powers of 2) included. For
1879 * each subterm in the product for i, we multiply that subterm by 2**i:
1880 *
1881 * factorial(20) =
1882 *
1883 * (16) *
1884 * (8) *
1885 * (4 * 12 * 20) *
1886 * (2 * 6 * 10 * 14 * 18) *
1887 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1888 *
1889 * The factorial_partial_product function computes the product of all odd j in
1890 * range(start, stop) for given start and stop. It's used to compute the
1891 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1892 * operates recursively, repeatedly splitting the range into two roughly equal
1893 * pieces until the subranges are small enough to be computed using only C
1894 * integer arithmetic.
1895 *
1896 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1897 * the factorial) is computed independently in the main math_factorial
1898 * function. By standard results, its value is:
1899 *
1900 * two_valuation = n//2 + n//4 + n//8 + ....
1901 *
1902 * It can be shown (e.g., by complete induction on n) that two_valuation is
1903 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1904 * '1'-bits in the binary expansion of n.
1905 */
1906
1907 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1908 * divide and conquer. Assumes start and stop are odd and stop > start.
1909 * max_bits must be >= bit_length(stop - 2). */
1910
1911 static PyObject *
1912 factorial_partial_product(unsigned long start, unsigned long stop,
1913 unsigned long max_bits)
1914 {
1915 unsigned long midpoint, num_operands;
1916 PyObject *left = NULL, *right = NULL, *result = NULL;
1917
1918 /* If the return value will fit an unsigned long, then we can
1919 * multiply in a tight, fast loop where each multiply is O(1).
1920 * Compute an upper bound on the number of bits required to store
1921 * the answer.
1922 *
1923 * Storing some integer z requires floor(lg(z))+1 bits, which is
1924 * conveniently the value returned by bit_length(z). The
1925 * product x*y will require at most
1926 * bit_length(x) + bit_length(y) bits to store, based
1927 * on the idea that lg product = lg x + lg y.
1928 *
1929 * We know that stop - 2 is the largest number to be multiplied. From
1930 * there, we have: bit_length(answer) <= num_operands *
1931 * bit_length(stop - 2)
1932 */
1933
1934 num_operands = (stop - start) / 2;
1935 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1936 * unlikely case of an overflow in num_operands * max_bits. */
1937 if (num_operands <= 8 * SIZEOF_LONG &&
1938 num_operands * max_bits <= 8 * SIZEOF_LONG) {
1939 unsigned long j, total;
1940 for (total = start, j = start + 2; j < stop; j += 2)
1941 total *= j;
1942 return PyLong_FromUnsignedLong(total);
1943 }
1944
1945 /* find midpoint of range(start, stop), rounded up to next odd number. */
1946 midpoint = (start + num_operands) | 1;
1947 left = factorial_partial_product(start, midpoint,
1948 _Py_bit_length(midpoint - 2));
1949 if (left == NULL)
1950 goto error;
1951 right = factorial_partial_product(midpoint, stop, max_bits);
1952 if (right == NULL)
1953 goto error;
1954 result = PyNumber_Multiply(left, right);
1955
1956 error:
1957 Py_XDECREF(left);
1958 Py_XDECREF(right);
1959 return result;
1960 }
1961
1962 /* factorial_odd_part: compute the odd part of factorial(n). */
1963
1964 static PyObject *
1965 factorial_odd_part(unsigned long n)
1966 {
1967 long i;
1968 unsigned long v, lower, upper;
1969 PyObject *partial, *tmp, *inner, *outer;
1970
1971 inner = PyLong_FromLong(1);
1972 if (inner == NULL)
1973 return NULL;
1974 outer = Py_NewRef(inner);
1975
1976 upper = 3;
1977 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
1978 v = n >> i;
1979 if (v <= 2)
1980 continue;
1981 lower = upper;
1982 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
1983 upper = (v + 1) | 1;
1984 /* Here inner is the product of all odd integers j in the range (0,
1985 n/2**(i+1)]. The factorial_partial_product call below gives the
1986 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
1987 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
1988 /* inner *= partial */
1989 if (partial == NULL)
1990 goto error;
1991 tmp = PyNumber_Multiply(inner, partial);
1992 Py_DECREF(partial);
1993 if (tmp == NULL)
1994 goto error;
1995 Py_SETREF(inner, tmp);
1996 /* Now inner is the product of all odd integers j in the range (0,
1997 n/2**i], giving the inner product in the formula above. */
1998
1999 /* outer *= inner; */
2000 tmp = PyNumber_Multiply(outer, inner);
2001 if (tmp == NULL)
2002 goto error;
2003 Py_SETREF(outer, tmp);
2004 }
2005 Py_DECREF(inner);
2006 return outer;
2007
2008 error:
2009 Py_DECREF(outer);
2010 Py_DECREF(inner);
2011 return NULL;
2012 }
2013
2014
2015 /* Lookup table for small factorial values */
2016
2017 static const unsigned long SmallFactorials[] = {
2018 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2019 362880, 3628800, 39916800, 479001600,
2020 #if SIZEOF_LONG >= 8
2021 6227020800, 87178291200, 1307674368000,
2022 20922789888000, 355687428096000, 6402373705728000,
2023 121645100408832000, 2432902008176640000
2024 #endif
2025 };
2026
2027 /*[clinic input]
2028 math.factorial
2029
2030 n as arg: object
2031 /
2032
2033 Find n!.
2034
2035 Raise a ValueError if x is negative or non-integral.
2036 [clinic start generated code]*/
2037
2038 static PyObject *
2039 math_factorial(PyObject *module, PyObject *arg)
2040 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2041 {
2042 long x, two_valuation;
2043 int overflow;
2044 PyObject *result, *odd_part;
2045
2046 x = PyLong_AsLongAndOverflow(arg, &overflow);
2047 if (x == -1 && PyErr_Occurred()) {
2048 return NULL;
2049 }
2050 else if (overflow == 1) {
2051 PyErr_Format(PyExc_OverflowError,
2052 "factorial() argument should not exceed %ld",
2053 LONG_MAX);
2054 return NULL;
2055 }
2056 else if (overflow == -1 || x < 0) {
2057 PyErr_SetString(PyExc_ValueError,
2058 "factorial() not defined for negative values");
2059 return NULL;
2060 }
2061
2062 /* use lookup table if x is small */
2063 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2064 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2065
2066 /* else express in the form odd_part * 2**two_valuation, and compute as
2067 odd_part << two_valuation. */
2068 odd_part = factorial_odd_part(x);
2069 if (odd_part == NULL)
2070 return NULL;
2071 two_valuation = x - count_set_bits(x);
2072 result = _PyLong_Lshift(odd_part, two_valuation);
2073 Py_DECREF(odd_part);
2074 return result;
2075 }
2076
2077
2078 /*[clinic input]
2079 math.trunc
2080
2081 x: object
2082 /
2083
2084 Truncates the Real x to the nearest Integral toward 0.
2085
2086 Uses the __trunc__ magic method.
2087 [clinic start generated code]*/
2088
2089 static PyObject *
2090 math_trunc(PyObject *module, PyObject *x)
2091 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2092 {
2093 PyObject *trunc, *result;
2094
2095 if (PyFloat_CheckExact(x)) {
2096 return PyFloat_Type.tp_as_number->nb_int(x);
2097 }
2098
2099 if (Py_TYPE(x)->tp_dict == NULL) {
2100 if (PyType_Ready(Py_TYPE(x)) < 0)
2101 return NULL;
2102 }
2103
2104 math_module_state *state = get_math_module_state(module);
2105 trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
2106 if (trunc == NULL) {
2107 if (!PyErr_Occurred())
2108 PyErr_Format(PyExc_TypeError,
2109 "type %.100s doesn't define __trunc__ method",
2110 Py_TYPE(x)->tp_name);
2111 return NULL;
2112 }
2113 result = _PyObject_CallNoArgs(trunc);
2114 Py_DECREF(trunc);
2115 return result;
2116 }
2117
2118
2119 /*[clinic input]
2120 math.frexp
2121
2122 x: double
2123 /
2124
2125 Return the mantissa and exponent of x, as pair (m, e).
2126
2127 m is a float and e is an int, such that x = m * 2.**e.
2128 If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2129 [clinic start generated code]*/
2130
2131 static PyObject *
2132 math_frexp_impl(PyObject *module, double x)
2133 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2134 {
2135 int i;
2136 /* deal with special cases directly, to sidestep platform
2137 differences */
2138 if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2139 i = 0;
2140 }
2141 else {
2142 x = frexp(x, &i);
2143 }
2144 return Py_BuildValue("(di)", x, i);
2145 }
2146
2147
2148 /*[clinic input]
2149 math.ldexp
2150
2151 x: double
2152 i: object
2153 /
2154
2155 Return x * (2**i).
2156
2157 This is essentially the inverse of frexp().
2158 [clinic start generated code]*/
2159
2160 static PyObject *
2161 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2162 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2163 {
2164 double r;
2165 long exp;
2166 int overflow;
2167
2168 if (PyLong_Check(i)) {
2169 /* on overflow, replace exponent with either LONG_MAX
2170 or LONG_MIN, depending on the sign. */
2171 exp = PyLong_AsLongAndOverflow(i, &overflow);
2172 if (exp == -1 && PyErr_Occurred())
2173 return NULL;
2174 if (overflow)
2175 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2176 }
2177 else {
2178 PyErr_SetString(PyExc_TypeError,
2179 "Expected an int as second argument to ldexp.");
2180 return NULL;
2181 }
2182
2183 if (x == 0. || !Py_IS_FINITE(x)) {
2184 /* NaNs, zeros and infinities are returned unchanged */
2185 r = x;
2186 errno = 0;
2187 } else if (exp > INT_MAX) {
2188 /* overflow */
2189 r = copysign(Py_HUGE_VAL, x);
2190 errno = ERANGE;
2191 } else if (exp < INT_MIN) {
2192 /* underflow to +-0 */
2193 r = copysign(0., x);
2194 errno = 0;
2195 } else {
2196 errno = 0;
2197 r = ldexp(x, (int)exp);
2198 if (Py_IS_INFINITY(r))
2199 errno = ERANGE;
2200 }
2201
2202 if (errno && is_error(r))
2203 return NULL;
2204 return PyFloat_FromDouble(r);
2205 }
2206
2207
2208 /*[clinic input]
2209 math.modf
2210
2211 x: double
2212 /
2213
2214 Return the fractional and integer parts of x.
2215
2216 Both results carry the sign of x and are floats.
2217 [clinic start generated code]*/
2218
2219 static PyObject *
2220 math_modf_impl(PyObject *module, double x)
2221 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2222 {
2223 double y;
2224 /* some platforms don't do the right thing for NaNs and
2225 infinities, so we take care of special cases directly. */
2226 if (!Py_IS_FINITE(x)) {
2227 if (Py_IS_INFINITY(x))
2228 return Py_BuildValue("(dd)", copysign(0., x), x);
2229 else if (Py_IS_NAN(x))
2230 return Py_BuildValue("(dd)", x, x);
2231 }
2232
2233 errno = 0;
2234 x = modf(x, &y);
2235 return Py_BuildValue("(dd)", x, y);
2236 }
2237
2238
2239 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2240 do that by itself -- loghelper can. func is log or log10, and name is
2241 "log" or "log10". Note that overflow of the result isn't possible: an int
2242 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2243 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2244 small enough to fit in an IEEE single. log and log10 are even smaller.
2245 However, intermediate overflow is possible for an int if the number of bits
2246 in that int is larger than PY_SSIZE_T_MAX. */
2247
2248 static PyObject*
2249 loghelper(PyObject* arg, double (*func)(double))
2250 {
2251 /* If it is int, do it ourselves. */
2252 if (PyLong_Check(arg)) {
2253 double x, result;
2254 Py_ssize_t e;
2255
2256 /* Negative or zero inputs give a ValueError. */
2257 if (Py_SIZE(arg) <= 0) {
2258 PyErr_SetString(PyExc_ValueError,
2259 "math domain error");
2260 return NULL;
2261 }
2262
2263 x = PyLong_AsDouble(arg);
2264 if (x == -1.0 && PyErr_Occurred()) {
2265 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2266 return NULL;
2267 /* Here the conversion to double overflowed, but it's possible
2268 to compute the log anyway. Clear the exception and continue. */
2269 PyErr_Clear();
2270 x = _PyLong_Frexp((PyLongObject *)arg, &e);
2271 if (x == -1.0 && PyErr_Occurred())
2272 return NULL;
2273 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2274 result = func(x) + func(2.0) * e;
2275 }
2276 else
2277 /* Successfully converted x to a double. */
2278 result = func(x);
2279 return PyFloat_FromDouble(result);
2280 }
2281
2282 /* Else let libm handle it by itself. */
2283 return math_1(arg, func, 0);
2284 }
2285
2286
2287 /*[clinic input]
2288 math.log
2289
2290 x: object
2291 [
2292 base: object(c_default="NULL") = math.e
2293 ]
2294 /
2295
2296 Return the logarithm of x to the given base.
2297
2298 If the base not specified, returns the natural logarithm (base e) of x.
2299 [clinic start generated code]*/
2300
2301 static PyObject *
2302 math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2303 PyObject *base)
2304 /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
2305 {
2306 PyObject *num, *den;
2307 PyObject *ans;
2308
2309 num = loghelper(x, m_log);
2310 if (num == NULL || base == NULL)
2311 return num;
2312
2313 den = loghelper(base, m_log);
2314 if (den == NULL) {
2315 Py_DECREF(num);
2316 return NULL;
2317 }
2318
2319 ans = PyNumber_TrueDivide(num, den);
2320 Py_DECREF(num);
2321 Py_DECREF(den);
2322 return ans;
2323 }
2324
2325
2326 /*[clinic input]
2327 math.log2
2328
2329 x: object
2330 /
2331
2332 Return the base 2 logarithm of x.
2333 [clinic start generated code]*/
2334
2335 static PyObject *
2336 math_log2(PyObject *module, PyObject *x)
2337 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2338 {
2339 return loghelper(x, m_log2);
2340 }
2341
2342
2343 /*[clinic input]
2344 math.log10
2345
2346 x: object
2347 /
2348
2349 Return the base 10 logarithm of x.
2350 [clinic start generated code]*/
2351
2352 static PyObject *
2353 math_log10(PyObject *module, PyObject *x)
2354 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2355 {
2356 return loghelper(x, m_log10);
2357 }
2358
2359
2360 /*[clinic input]
2361 math.fmod
2362
2363 x: double
2364 y: double
2365 /
2366
2367 Return fmod(x, y), according to platform C.
2368
2369 x % y may differ.
2370 [clinic start generated code]*/
2371
2372 static PyObject *
2373 math_fmod_impl(PyObject *module, double x, double y)
2374 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2375 {
2376 double r;
2377 /* fmod(x, +/-Inf) returns x for finite x. */
2378 if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2379 return PyFloat_FromDouble(x);
2380 errno = 0;
2381 r = fmod(x, y);
2382 if (Py_IS_NAN(r)) {
2383 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2384 errno = EDOM;
2385 else
2386 errno = 0;
2387 }
2388 if (errno && is_error(r))
2389 return NULL;
2390 else
2391 return PyFloat_FromDouble(r);
2392 }
2393
2394 /*
2395 Given a *vec* of values, compute the vector norm:
2396
2397 sqrt(sum(x ** 2 for x in vec))
2398
2399 The *max* variable should be equal to the largest fabs(x).
2400 The *n* variable is the length of *vec*.
2401 If n==0, then *max* should be 0.0.
2402 If an infinity is present in the vec, *max* should be INF.
2403 The *found_nan* variable indicates whether some member of
2404 the *vec* is a NaN.
2405
2406 To avoid overflow/underflow and to achieve high accuracy giving results
2407 that are almost always correctly rounded, four techniques are used:
2408
2409 * lossless scaling using a power-of-two scaling factor
2410 * accurate squaring using Veltkamp-Dekker splitting [1]
2411 or an equivalent with an fma() call
2412 * compensated summation using a variant of the Neumaier algorithm [2]
2413 * differential correction of the square root [3]
2414
2415 The usual presentation of the Neumaier summation algorithm has an
2416 expensive branch depending on which operand has the larger
2417 magnitude. We avoid this cost by arranging the calculation so that
2418 fabs(csum) is always as large as fabs(x).
2419
2420 To establish the invariant, *csum* is initialized to 1.0 which is
2421 always larger than x**2 after scaling or after division by *max*.
2422 After the loop is finished, the initial 1.0 is subtracted out for a
2423 net zero effect on the final sum. Since *csum* will be greater than
2424 1.0, the subtraction of 1.0 will not cause fractional digits to be
2425 dropped from *csum*.
2426
2427 To get the full benefit from compensated summation, the largest
2428 addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2429 scaling or division by *max* should not be skipped even if not
2430 otherwise needed to prevent overflow or loss of precision.
2431
2432 The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
2433 gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2434 algorithm gives a *hi* value that is correctly rounded to half
2435 precision. When a value at or below 1.0 is correctly rounded, it
2436 never goes above 1.0. And when values at or below 1.0 are squared,
2437 they remain at or below 1.0, thus preserving the summation invariant.
2438
2439 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2440 each scaled vector element has a magnitude less than 1.0. After the
2441 Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2442 value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2443 Given that csum >= 1.0, we have:
2444 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2445 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2446
2447 To minimize loss of information during the accumulation of fractional
2448 values, each term has a separate accumulator. This also breaks up
2449 sequential dependencies in the inner loop so the CPU can maximize
2450 floating point throughput. [4] On a 2.6 GHz Haswell, adding one
2451 dimension has an incremental cost of only 5ns -- for example when
2452 moving from hypot(x,y) to hypot(x,y,z).
2453
2454 The square root differential correction is needed because a
2455 correctly rounded square root of a correctly rounded sum of
2456 squares can still be off by as much as one ulp.
2457
2458 The differential correction starts with a value *x* that is
2459 the difference between the square of *h*, the possibly inaccurately
2460 rounded square root, and the accurately computed sum of squares.
2461 The correction is the first order term of the Maclaurin series
2462 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2463
2464 Essentially, this differential correction is equivalent to one
2465 refinement step in Newton's divide-and-average square root
2466 algorithm, effectively doubling the number of accurate bits.
2467 This technique is used in Dekker's SQRT2 algorithm and again in
2468 Borges' ALGORITHM 4 and 5.
2469
2470 The hypot() function is faithfully rounded (less than 1 ulp error)
2471 and usually correctly rounded (within 1/2 ulp). The squaring
2472 step is exact. The Neumaier summation computes as if in doubled
2473 precision (106 bits) and has the advantage that its input squares
2474 are non-negative so that the condition number of the sum is one.
2475 The square root with a differential correction is likewise computed
2476 as if in double precision.
2477
2478 For n <= 1000, prior to the final addition that rounds the overall
2479 result, the internal accuracy of "h" together with its correction of
2480 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2481 against a Decimal implementation with prec=300. After 100 million
2482 trials, no incorrectly rounded examples were found. In addition,
2483 perfect commutativity (all permutations are exactly equal) was
2484 verified for 1 billion random inputs with n=5. [7]
2485
2486 References:
2487
2488 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2489 2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2490 3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
2491 4. Data dependency graph: https://bugs.python.org/file49439/hypot.png
2492 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2493 6. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
2494 7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
2495
2496 */
2497
2498 static inline double
2499 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2500 {
2501 double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2502 DoubleLength pr, sm;
2503 int max_e;
2504 Py_ssize_t i;
2505
2506 if (Py_IS_INFINITY(max)) {
2507 return max;
2508 }
2509 if (found_nan) {
2510 return Py_NAN;
2511 }
2512 if (max == 0.0 || n <= 1) {
2513 return max;
2514 }
2515 frexp(max, &max_e);
2516 if (max_e < -1023) {
2517 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
2518 So we first perform lossless scaling from subnormals back to normals,
2519 then recurse back to vector_norm(), and then finally undo the scaling.
2520 */
2521 for (i=0 ; i < n ; i++) {
2522 vec[i] /= DBL_MIN;
2523 }
2524 return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2525 }
2526 scale = ldexp(1.0, -max_e);
2527 assert(max * scale >= 0.5);
2528 assert(max * scale < 1.0);
2529 for (i=0 ; i < n ; i++) {
2530 x = vec[i];
2531 assert(Py_IS_FINITE(x) && fabs(x) <= max);
2532
2533 x *= scale;
2534 assert(fabs(x) < 1.0);
2535
2536 pr = dl_mul(x, x);
2537 assert(pr.hi <= 1.0);
2538
2539 sm = dl_fast_sum(csum, pr.hi);
2540 csum = sm.hi;
2541 frac1 += pr.lo;
2542 frac2 += sm.lo;
2543 }
2544 h = sqrt(csum - 1.0 + (frac1 + frac2));
2545 pr = dl_mul(-h, h);
2546 sm = dl_fast_sum(csum, pr.hi);
2547 csum = sm.hi;
2548 frac1 += pr.lo;
2549 frac2 += sm.lo;
2550 x = csum - 1.0 + (frac1 + frac2);
2551 return (h + x / (2.0 * h)) / scale;
2552 }
2553
2554 #define NUM_STACK_ELEMS 16
2555
2556 /*[clinic input]
2557 math.dist
2558
2559 p: object
2560 q: object
2561 /
2562
2563 Return the Euclidean distance between two points p and q.
2564
2565 The points should be specified as sequences (or iterables) of
2566 coordinates. Both inputs must have the same dimension.
2567
2568 Roughly equivalent to:
2569 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2570 [clinic start generated code]*/
2571
2572 static PyObject *
2573 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2574 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2575 {
2576 PyObject *item;
2577 double max = 0.0;
2578 double x, px, qx, result;
2579 Py_ssize_t i, m, n;
2580 int found_nan = 0, p_allocated = 0, q_allocated = 0;
2581 double diffs_on_stack[NUM_STACK_ELEMS];
2582 double *diffs = diffs_on_stack;
2583
2584 if (!PyTuple_Check(p)) {
2585 p = PySequence_Tuple(p);
2586 if (p == NULL) {
2587 return NULL;
2588 }
2589 p_allocated = 1;
2590 }
2591 if (!PyTuple_Check(q)) {
2592 q = PySequence_Tuple(q);
2593 if (q == NULL) {
2594 if (p_allocated) {
2595 Py_DECREF(p);
2596 }
2597 return NULL;
2598 }
2599 q_allocated = 1;
2600 }
2601
2602 m = PyTuple_GET_SIZE(p);
2603 n = PyTuple_GET_SIZE(q);
2604 if (m != n) {
2605 PyErr_SetString(PyExc_ValueError,
2606 "both points must have the same number of dimensions");
2607 goto error_exit;
2608 }
2609 if (n > NUM_STACK_ELEMS) {
2610 diffs = (double *) PyObject_Malloc(n * sizeof(double));
2611 if (diffs == NULL) {
2612 PyErr_NoMemory();
2613 goto error_exit;
2614 }
2615 }
2616 for (i=0 ; i<n ; i++) {
2617 item = PyTuple_GET_ITEM(p, i);
2618 ASSIGN_DOUBLE(px, item, error_exit);
2619 item = PyTuple_GET_ITEM(q, i);
2620 ASSIGN_DOUBLE(qx, item, error_exit);
2621 x = fabs(px - qx);
2622 diffs[i] = x;
2623 found_nan |= Py_IS_NAN(x);
2624 if (x > max) {
2625 max = x;
2626 }
2627 }
2628 result = vector_norm(n, diffs, max, found_nan);
2629 if (diffs != diffs_on_stack) {
2630 PyObject_Free(diffs);
2631 }
2632 if (p_allocated) {
2633 Py_DECREF(p);
2634 }
2635 if (q_allocated) {
2636 Py_DECREF(q);
2637 }
2638 return PyFloat_FromDouble(result);
2639
2640 error_exit:
2641 if (diffs != diffs_on_stack) {
2642 PyObject_Free(diffs);
2643 }
2644 if (p_allocated) {
2645 Py_DECREF(p);
2646 }
2647 if (q_allocated) {
2648 Py_DECREF(q);
2649 }
2650 return NULL;
2651 }
2652
2653 /* AC: cannot convert yet, waiting for *args support */
2654 static PyObject *
2655 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2656 {
2657 Py_ssize_t i;
2658 PyObject *item;
2659 double max = 0.0;
2660 double x, result;
2661 int found_nan = 0;
2662 double coord_on_stack[NUM_STACK_ELEMS];
2663 double *coordinates = coord_on_stack;
2664
2665 if (nargs > NUM_STACK_ELEMS) {
2666 coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
2667 if (coordinates == NULL) {
2668 return PyErr_NoMemory();
2669 }
2670 }
2671 for (i = 0; i < nargs; i++) {
2672 item = args[i];
2673 ASSIGN_DOUBLE(x, item, error_exit);
2674 x = fabs(x);
2675 coordinates[i] = x;
2676 found_nan |= Py_IS_NAN(x);
2677 if (x > max) {
2678 max = x;
2679 }
2680 }
2681 result = vector_norm(nargs, coordinates, max, found_nan);
2682 if (coordinates != coord_on_stack) {
2683 PyObject_Free(coordinates);
2684 }
2685 return PyFloat_FromDouble(result);
2686
2687 error_exit:
2688 if (coordinates != coord_on_stack) {
2689 PyObject_Free(coordinates);
2690 }
2691 return NULL;
2692 }
2693
2694 #undef NUM_STACK_ELEMS
2695
2696 PyDoc_STRVAR(math_hypot_doc,
2697 "hypot(*coordinates) -> value\n\n\
2698 Multidimensional Euclidean distance from the origin to a point.\n\
2699 \n\
2700 Roughly equivalent to:\n\
2701 sqrt(sum(x**2 for x in coordinates))\n\
2702 \n\
2703 For a two dimensional point (x, y), gives the hypotenuse\n\
2704 using the Pythagorean theorem: sqrt(x*x + y*y).\n\
2705 \n\
2706 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2707 \n\
2708 >>> hypot(3.0, 4.0)\n\
2709 5.0\n\
2710 ");
2711
2712 /** sumprod() ***************************************************************/
2713
2714 /* Forward declaration */
2715 static inline int _check_long_mult_overflow(long a, long b);
2716
2717 static inline bool
2718 long_add_would_overflow(long a, long b)
2719 {
2720 return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2721 }
2722
2723 /*[clinic input]
2724 math.sumprod
2725
2726 p: object
2727 q: object
2728 /
2729
2730 Return the sum of products of values from two iterables p and q.
2731
2732 Roughly equivalent to:
2733
2734 sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
2735
2736 For float and mixed int/float inputs, the intermediate products
2737 and sums are computed with extended precision.
2738 [clinic start generated code]*/
2739
2740 static PyObject *
2741 math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2742 /*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
2743 {
2744 PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2745 PyObject *p_it, *q_it, *total;
2746 iternextfunc p_next, q_next;
2747 bool p_stopped = false, q_stopped = false;
2748 bool int_path_enabled = true, int_total_in_use = false;
2749 bool flt_path_enabled = true, flt_total_in_use = false;
2750 long int_total = 0;
2751 TripleLength flt_total = tl_zero;
2752
2753 p_it = PyObject_GetIter(p);
2754 if (p_it == NULL) {
2755 return NULL;
2756 }
2757 q_it = PyObject_GetIter(q);
2758 if (q_it == NULL) {
2759 Py_DECREF(p_it);
2760 return NULL;
2761 }
2762 total = PyLong_FromLong(0);
2763 if (total == NULL) {
2764 Py_DECREF(p_it);
2765 Py_DECREF(q_it);
2766 return NULL;
2767 }
2768 p_next = *Py_TYPE(p_it)->tp_iternext;
2769 q_next = *Py_TYPE(q_it)->tp_iternext;
2770 while (1) {
2771 bool finished;
2772
2773 assert (p_i == NULL);
2774 assert (q_i == NULL);
2775 assert (term_i == NULL);
2776 assert (new_total == NULL);
2777
2778 assert (p_it != NULL);
2779 assert (q_it != NULL);
2780 assert (total != NULL);
2781
2782 p_i = p_next(p_it);
2783 if (p_i == NULL) {
2784 if (PyErr_Occurred()) {
2785 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2786 goto err_exit;
2787 }
2788 PyErr_Clear();
2789 }
2790 p_stopped = true;
2791 }
2792 q_i = q_next(q_it);
2793 if (q_i == NULL) {
2794 if (PyErr_Occurred()) {
2795 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2796 goto err_exit;
2797 }
2798 PyErr_Clear();
2799 }
2800 q_stopped = true;
2801 }
2802 if (p_stopped != q_stopped) {
2803 PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2804 goto err_exit;
2805 }
2806 finished = p_stopped & q_stopped;
2807
2808 if (int_path_enabled) {
2809
2810 if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2811 int overflow;
2812 long int_p, int_q, int_prod;
2813
2814 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2815 if (overflow) {
2816 goto finalize_int_path;
2817 }
2818 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2819 if (overflow) {
2820 goto finalize_int_path;
2821 }
2822 if (_check_long_mult_overflow(int_p, int_q)) {
2823 goto finalize_int_path;
2824 }
2825 int_prod = int_p * int_q;
2826 if (long_add_would_overflow(int_total, int_prod)) {
2827 goto finalize_int_path;
2828 }
2829 int_total += int_prod;
2830 int_total_in_use = true;
2831 Py_CLEAR(p_i);
2832 Py_CLEAR(q_i);
2833 continue;
2834 }
2835
2836 finalize_int_path:
2837 // We're finished, overflowed, or have a non-int
2838 int_path_enabled = false;
2839 if (int_total_in_use) {
2840 term_i = PyLong_FromLong(int_total);
2841 if (term_i == NULL) {
2842 goto err_exit;
2843 }
2844 new_total = PyNumber_Add(total, term_i);
2845 if (new_total == NULL) {
2846 goto err_exit;
2847 }
2848 Py_SETREF(total, new_total);
2849 new_total = NULL;
2850 Py_CLEAR(term_i);
2851 int_total = 0; // An ounce of prevention, ...
2852 int_total_in_use = false;
2853 }
2854 }
2855
2856 if (flt_path_enabled) {
2857
2858 if (!finished) {
2859 double flt_p, flt_q;
2860 bool p_type_float = PyFloat_CheckExact(p_i);
2861 bool q_type_float = PyFloat_CheckExact(q_i);
2862 if (p_type_float && q_type_float) {
2863 flt_p = PyFloat_AS_DOUBLE(p_i);
2864 flt_q = PyFloat_AS_DOUBLE(q_i);
2865 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2866 /* We care about float/int pairs and int/float pairs because
2867 they arise naturally in several use cases such as price
2868 times quantity, measurements with integer weights, or
2869 data selected by a vector of bools. */
2870 flt_p = PyFloat_AS_DOUBLE(p_i);
2871 flt_q = PyLong_AsDouble(q_i);
2872 if (flt_q == -1.0 && PyErr_Occurred()) {
2873 PyErr_Clear();
2874 goto finalize_flt_path;
2875 }
2876 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(q_i))) {
2877 flt_q = PyFloat_AS_DOUBLE(q_i);
2878 flt_p = PyLong_AsDouble(p_i);
2879 if (flt_p == -1.0 && PyErr_Occurred()) {
2880 PyErr_Clear();
2881 goto finalize_flt_path;
2882 }
2883 } else {
2884 goto finalize_flt_path;
2885 }
2886 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2887 if (isfinite(new_flt_total.hi)) {
2888 flt_total = new_flt_total;
2889 flt_total_in_use = true;
2890 Py_CLEAR(p_i);
2891 Py_CLEAR(q_i);
2892 continue;
2893 }
2894 }
2895
2896 finalize_flt_path:
2897 // We're finished, overflowed, have a non-float, or got a non-finite value
2898 flt_path_enabled = false;
2899 if (flt_total_in_use) {
2900 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2901 if (term_i == NULL) {
2902 goto err_exit;
2903 }
2904 new_total = PyNumber_Add(total, term_i);
2905 if (new_total == NULL) {
2906 goto err_exit;
2907 }
2908 Py_SETREF(total, new_total);
2909 new_total = NULL;
2910 Py_CLEAR(term_i);
2911 flt_total = tl_zero;
2912 flt_total_in_use = false;
2913 }
2914 }
2915
2916 assert(!int_total_in_use);
2917 assert(!flt_total_in_use);
2918 if (finished) {
2919 goto normal_exit;
2920 }
2921 term_i = PyNumber_Multiply(p_i, q_i);
2922 if (term_i == NULL) {
2923 goto err_exit;
2924 }
2925 new_total = PyNumber_Add(total, term_i);
2926 if (new_total == NULL) {
2927 goto err_exit;
2928 }
2929 Py_SETREF(total, new_total);
2930 new_total = NULL;
2931 Py_CLEAR(p_i);
2932 Py_CLEAR(q_i);
2933 Py_CLEAR(term_i);
2934 }
2935
2936 normal_exit:
2937 Py_DECREF(p_it);
2938 Py_DECREF(q_it);
2939 return total;
2940
2941 err_exit:
2942 Py_DECREF(p_it);
2943 Py_DECREF(q_it);
2944 Py_DECREF(total);
2945 Py_XDECREF(p_i);
2946 Py_XDECREF(q_i);
2947 Py_XDECREF(term_i);
2948 Py_XDECREF(new_total);
2949 return NULL;
2950 }
2951
2952
2953 /* pow can't use math_2, but needs its own wrapper: the problem is
2954 that an infinite result can arise either as a result of overflow
2955 (in which case OverflowError should be raised) or as a result of
2956 e.g. 0.**-5. (for which ValueError needs to be raised.)
2957 */
2958
2959 /*[clinic input]
2960 math.pow
2961
2962 x: double
2963 y: double
2964 /
2965
2966 Return x**y (x to the power of y).
2967 [clinic start generated code]*/
2968
2969 static PyObject *
2970 math_pow_impl(PyObject *module, double x, double y)
2971 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2972 {
2973 double r;
2974 int odd_y;
2975
2976 /* deal directly with IEEE specials, to cope with problems on various
2977 platforms whose semantics don't exactly match C99 */
2978 r = 0.; /* silence compiler warning */
2979 if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2980 errno = 0;
2981 if (Py_IS_NAN(x))
2982 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2983 else if (Py_IS_NAN(y))
2984 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2985 else if (Py_IS_INFINITY(x)) {
2986 odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2987 if (y > 0.)
2988 r = odd_y ? x : fabs(x);
2989 else if (y == 0.)
2990 r = 1.;
2991 else /* y < 0. */
2992 r = odd_y ? copysign(0., x) : 0.;
2993 }
2994 else if (Py_IS_INFINITY(y)) {
2995 if (fabs(x) == 1.0)
2996 r = 1.;
2997 else if (y > 0. && fabs(x) > 1.0)
2998 r = y;
2999 else if (y < 0. && fabs(x) < 1.0) {
3000 r = -y; /* result is +inf */
3001 }
3002 else
3003 r = 0.;
3004 }
3005 }
3006 else {
3007 /* let libm handle finite**finite */
3008 errno = 0;
3009 r = pow(x, y);
3010 /* a NaN result should arise only from (-ve)**(finite
3011 non-integer); in this case we want to raise ValueError. */
3012 if (!Py_IS_FINITE(r)) {
3013 if (Py_IS_NAN(r)) {
3014 errno = EDOM;
3015 }
3016 /*
3017 an infinite result here arises either from:
3018 (A) (+/-0.)**negative (-> divide-by-zero)
3019 (B) overflow of x**y with x and y finite
3020 */
3021 else if (Py_IS_INFINITY(r)) {
3022 if (x == 0.)
3023 errno = EDOM;
3024 else
3025 errno = ERANGE;
3026 }
3027 }
3028 }
3029
3030 if (errno && is_error(r))
3031 return NULL;
3032 else
3033 return PyFloat_FromDouble(r);
3034 }
3035
3036
3037 static const double degToRad = Py_MATH_PI / 180.0;
3038 static const double radToDeg = 180.0 / Py_MATH_PI;
3039
3040 /*[clinic input]
3041 math.degrees
3042
3043 x: double
3044 /
3045
3046 Convert angle x from radians to degrees.
3047 [clinic start generated code]*/
3048
3049 static PyObject *
3050 math_degrees_impl(PyObject *module, double x)
3051 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3052 {
3053 return PyFloat_FromDouble(x * radToDeg);
3054 }
3055
3056
3057 /*[clinic input]
3058 math.radians
3059
3060 x: double
3061 /
3062
3063 Convert angle x from degrees to radians.
3064 [clinic start generated code]*/
3065
3066 static PyObject *
3067 math_radians_impl(PyObject *module, double x)
3068 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3069 {
3070 return PyFloat_FromDouble(x * degToRad);
3071 }
3072
3073
3074 /*[clinic input]
3075 math.isfinite
3076
3077 x: double
3078 /
3079
3080 Return True if x is neither an infinity nor a NaN, and False otherwise.
3081 [clinic start generated code]*/
3082
3083 static PyObject *
3084 math_isfinite_impl(PyObject *module, double x)
3085 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3086 {
3087 return PyBool_FromLong((long)Py_IS_FINITE(x));
3088 }
3089
3090
3091 /*[clinic input]
3092 math.isnan
3093
3094 x: double
3095 /
3096
3097 Return True if x is a NaN (not a number), and False otherwise.
3098 [clinic start generated code]*/
3099
3100 static PyObject *
3101 math_isnan_impl(PyObject *module, double x)
3102 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3103 {
3104 return PyBool_FromLong((long)Py_IS_NAN(x));
3105 }
3106
3107
3108 /*[clinic input]
3109 math.isinf
3110
3111 x: double
3112 /
3113
3114 Return True if x is a positive or negative infinity, and False otherwise.
3115 [clinic start generated code]*/
3116
3117 static PyObject *
3118 math_isinf_impl(PyObject *module, double x)
3119 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3120 {
3121 return PyBool_FromLong((long)Py_IS_INFINITY(x));
3122 }
3123
3124
3125 /*[clinic input]
3126 math.isclose -> bool
3127
3128 a: double
3129 b: double
3130 *
3131 rel_tol: double = 1e-09
3132 maximum difference for being considered "close", relative to the
3133 magnitude of the input values
3134 abs_tol: double = 0.0
3135 maximum difference for being considered "close", regardless of the
3136 magnitude of the input values
3137
3138 Determine whether two floating point numbers are close in value.
3139
3140 Return True if a is close in value to b, and False otherwise.
3141
3142 For the values to be considered close, the difference between them
3143 must be smaller than at least one of the tolerances.
3144
3145 -inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3146 is, NaN is not close to anything, even itself. inf and -inf are
3147 only close to themselves.
3148 [clinic start generated code]*/
3149
3150 static int
3151 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3152 double abs_tol)
3153 /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
3154 {
3155 double diff = 0.0;
3156
3157 /* sanity check on the inputs */
3158 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3159 PyErr_SetString(PyExc_ValueError,
3160 "tolerances must be non-negative");
3161 return -1;
3162 }
3163
3164 if ( a == b ) {
3165 /* short circuit exact equality -- needed to catch two infinities of
3166 the same sign. And perhaps speeds things up a bit sometimes.
3167 */
3168 return 1;
3169 }
3170
3171 /* This catches the case of two infinities of opposite sign, or
3172 one infinity and one finite number. Two infinities of opposite
3173 sign would otherwise have an infinite relative tolerance.
3174 Two infinities of the same sign are caught by the equality check
3175 above.
3176 */
3177
3178 if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3179 return 0;
3180 }
3181
3182 /* now do the regular computation
3183 this is essentially the "weak" test from the Boost library
3184 */
3185
3186 diff = fabs(b - a);
3187
3188 return (((diff <= fabs(rel_tol * b)) ||
3189 (diff <= fabs(rel_tol * a))) ||
3190 (diff <= abs_tol));
3191 }
3192
3193 static inline int
3194 _check_long_mult_overflow(long a, long b) {
3195
3196 /* From Python2's int_mul code:
3197
3198 Integer overflow checking for * is painful: Python tried a couple ways, but
3199 they didn't work on all platforms, or failed in endcases (a product of
3200 -sys.maxint-1 has been a particular pain).
3201
3202 Here's another way:
3203
3204 The native long product x*y is either exactly right or *way* off, being
3205 just the last n bits of the true product, where n is the number of bits
3206 in a long (the delivered product is the true product plus i*2**n for
3207 some integer i).
3208
3209 The native double product (double)x * (double)y is subject to three
3210 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3211 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3212 But, unlike the native long product, it's not in *range* trouble: even
3213 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3214 dynamic range of a double. So the leading 50 (or so) bits of the double
3215 product are correct.
3216
3217 We check these two ways against each other, and declare victory if they're
3218 approximately the same. Else, because the native long product is the only
3219 one that can lose catastrophic amounts of information, it's the native long
3220 product that must have overflowed.
3221
3222 */
3223
3224 long longprod = (long)((unsigned long)a * b);
3225 double doubleprod = (double)a * (double)b;
3226 double doubled_longprod = (double)longprod;
3227
3228 if (doubled_longprod == doubleprod) {
3229 return 0;
3230 }
3231
3232 const double diff = doubled_longprod - doubleprod;
3233 const double absdiff = diff >= 0.0 ? diff : -diff;
3234 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3235
3236 if (32.0 * absdiff <= absprod) {
3237 return 0;
3238 }
3239
3240 return 1;
3241 }
3242
3243 /*[clinic input]
3244 math.prod
3245
3246 iterable: object
3247 /
3248 *
3249 start: object(c_default="NULL") = 1
3250
3251 Calculate the product of all the elements in the input iterable.
3252
3253 The default start value for the product is 1.
3254
3255 When the iterable is empty, return the start value. This function is
3256 intended specifically for use with numeric values and may reject
3257 non-numeric types.
3258 [clinic start generated code]*/
3259
3260 static PyObject *
3261 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3262 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3263 {
3264 PyObject *result = start;
3265 PyObject *temp, *item, *iter;
3266
3267 iter = PyObject_GetIter(iterable);
3268 if (iter == NULL) {
3269 return NULL;
3270 }
3271
3272 if (result == NULL) {
3273 result = _PyLong_GetOne();
3274 }
3275 Py_INCREF(result);
3276 #ifndef SLOW_PROD
3277 /* Fast paths for integers keeping temporary products in C.
3278 * Assumes all inputs are the same type.
3279 * If the assumption fails, default to use PyObjects instead.
3280 */
3281 if (PyLong_CheckExact(result)) {
3282 int overflow;
3283 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3284 /* If this already overflowed, don't even enter the loop. */
3285 if (overflow == 0) {
3286 Py_SETREF(result, NULL);
3287 }
3288 /* Loop over all the items in the iterable until we finish, we overflow
3289 * or we found a non integer element */
3290 while (result == NULL) {
3291 item = PyIter_Next(iter);
3292 if (item == NULL) {
3293 Py_DECREF(iter);
3294 if (PyErr_Occurred()) {
3295 return NULL;
3296 }
3297 return PyLong_FromLong(i_result);
3298 }
3299 if (PyLong_CheckExact(item)) {
3300 long b = PyLong_AsLongAndOverflow(item, &overflow);
3301 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3302 long x = i_result * b;
3303 i_result = x;
3304 Py_DECREF(item);
3305 continue;
3306 }
3307 }
3308 /* Either overflowed or is not an int.
3309 * Restore real objects and process normally */
3310 result = PyLong_FromLong(i_result);
3311 if (result == NULL) {
3312 Py_DECREF(item);
3313 Py_DECREF(iter);
3314 return NULL;
3315 }
3316 temp = PyNumber_Multiply(result, item);
3317 Py_DECREF(result);
3318 Py_DECREF(item);
3319 result = temp;
3320 if (result == NULL) {
3321 Py_DECREF(iter);
3322 return NULL;
3323 }
3324 }
3325 }
3326
3327 /* Fast paths for floats keeping temporary products in C.
3328 * Assumes all inputs are the same type.
3329 * If the assumption fails, default to use PyObjects instead.
3330 */
3331 if (PyFloat_CheckExact(result)) {
3332 double f_result = PyFloat_AS_DOUBLE(result);
3333 Py_SETREF(result, NULL);
3334 while(result == NULL) {
3335 item = PyIter_Next(iter);
3336 if (item == NULL) {
3337 Py_DECREF(iter);
3338 if (PyErr_Occurred()) {
3339 return NULL;
3340 }
3341 return PyFloat_FromDouble(f_result);
3342 }
3343 if (PyFloat_CheckExact(item)) {
3344 f_result *= PyFloat_AS_DOUBLE(item);
3345 Py_DECREF(item);
3346 continue;
3347 }
3348 if (PyLong_CheckExact(item)) {
3349 long value;
3350 int overflow;
3351 value = PyLong_AsLongAndOverflow(item, &overflow);
3352 if (!overflow) {
3353 f_result *= (double)value;
3354 Py_DECREF(item);
3355 continue;
3356 }
3357 }
3358 result = PyFloat_FromDouble(f_result);
3359 if (result == NULL) {
3360 Py_DECREF(item);
3361 Py_DECREF(iter);
3362 return NULL;
3363 }
3364 temp = PyNumber_Multiply(result, item);
3365 Py_DECREF(result);
3366 Py_DECREF(item);
3367 result = temp;
3368 if (result == NULL) {
3369 Py_DECREF(iter);
3370 return NULL;
3371 }
3372 }
3373 }
3374 #endif
3375 /* Consume rest of the iterable (if any) that could not be handled
3376 * by specialized functions above.*/
3377 for(;;) {
3378 item = PyIter_Next(iter);
3379 if (item == NULL) {
3380 /* error, or end-of-sequence */
3381 if (PyErr_Occurred()) {
3382 Py_SETREF(result, NULL);
3383 }
3384 break;
3385 }
3386 temp = PyNumber_Multiply(result, item);
3387 Py_DECREF(result);
3388 Py_DECREF(item);
3389 result = temp;
3390 if (result == NULL)
3391 break;
3392 }
3393 Py_DECREF(iter);
3394 return result;
3395 }
3396
3397
3398 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3399
3400 Python code to generate the values:
3401
3402 import math
3403
3404 for n in range(128):
3405 fac = math.factorial(n)
3406 fac_odd_part = fac // (fac & -fac)
3407 reduced_fac_odd_part = fac_odd_part % (2**64)
3408 print(f"{reduced_fac_odd_part:#018x}u")
3409 */
3410 static const uint64_t reduced_factorial_odd_part[] = {
3411 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3412 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3413 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3414 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3415 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3416 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3417 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3418 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3419 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3420 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3421 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3422 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3423 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3424 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3425 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3426 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3427 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3428 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3429 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3430 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3431 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3432 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3433 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3434 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3435 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3436 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3437 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3438 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3439 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3440 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3441 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3442 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3443 };
3444
3445 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3446
3447 Python code to generate the values:
3448
3449 import math
3450
3451 for n in range(128):
3452 fac = math.factorial(n)
3453 fac_odd_part = fac // (fac & -fac)
3454 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3455 print(f"{inverted_fac_odd_part:#018x}u")
3456 */
3457 static const uint64_t inverted_factorial_odd_part[] = {
3458 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3459 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3460 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3461 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3462 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3463 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3464 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3465 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3466 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3467 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3468 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3469 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3470 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3471 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3472 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3473 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3474 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3475 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3476 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3477 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3478 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3479 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3480 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3481 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3482 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3483 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3484 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3485 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3486 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3487 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3488 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3489 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3490 };
3491
3492 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3493
3494 Python code to generate the values:
3495
3496 import math
3497
3498 for n in range(128):
3499 fac = math.factorial(n)
3500 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3501 print(fac_trailing_zeros)
3502 */
3503
3504 static const uint8_t factorial_trailing_zeros[] = {
3505 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3506 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3507 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3508 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3509 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3510 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3511 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3512 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3513 };
3514
3515 /* Number of permutations and combinations.
3516 * P(n, k) = n! / (n-k)!
3517 * C(n, k) = P(n, k) / k!
3518 */
3519
3520 /* Calculate C(n, k) for n in the 63-bit range. */
3521 static PyObject *
3522 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3523 {
3524 if (k == 0) {
3525 return PyLong_FromLong(1);
3526 }
3527
3528 /* For small enough n and k the result fits in the 64-bit range and can
3529 * be calculated without allocating intermediate PyLong objects. */
3530 if (iscomb) {
3531 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3532 * fits into a uint64_t. Exclude k = 1, because the second fast
3533 * path is faster for this case.*/
3534 static const unsigned char fast_comb_limits1[] = {
3535 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3536 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3537 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3538 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3539 67, 67, 67, // 32-34
3540 };
3541 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3542 /*
3543 comb(n, k) fits into a uint64_t. We compute it as
3544
3545 comb_odd_part << shift
3546
3547 where 2**shift is the largest power of two dividing comb(n, k)
3548 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3549 calculated efficiently via arithmetic modulo 2**64, using three
3550 lookups and two uint64_t multiplications.
3551 */
3552 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3553 * inverted_factorial_odd_part[k]
3554 * inverted_factorial_odd_part[n - k];
3555 int shift = factorial_trailing_zeros[n]
3556 - factorial_trailing_zeros[k]
3557 - factorial_trailing_zeros[n - k];
3558 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3559 }
3560
3561 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3562 * fits into a long long (which is at least 64 bit). Only contains
3563 * items larger than in fast_comb_limits1. */
3564 static const unsigned long long fast_comb_limits2[] = {
3565 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3566 746, 453, 308, 227, 178, 147, // 8-13
3567 };
3568 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3569 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3570 unsigned long long result = n;
3571 for (unsigned long long i = 1; i < k;) {
3572 result *= --n;
3573 result /= ++i;
3574 }
3575 return PyLong_FromUnsignedLongLong(result);
3576 }
3577 }
3578 else {
3579 /* Maps k to the maximal n so that k <= n and P(n, k)
3580 * fits into a long long (which is at least 64 bit). */
3581 static const unsigned long long fast_perm_limits[] = {
3582 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3583 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3584 24, 22, 21, 20, 20, // 16-20
3585 };
3586 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3587 if (n <= 127) {
3588 /* P(n, k) fits into a uint64_t. */
3589 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3590 * inverted_factorial_odd_part[n - k];
3591 int shift = factorial_trailing_zeros[n]
3592 - factorial_trailing_zeros[n - k];
3593 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3594 }
3595
3596 /* P(n, k) = P(n, k-1) * (n-k+1) */
3597 unsigned long long result = n;
3598 for (unsigned long long i = 1; i < k;) {
3599 result *= --n;
3600 ++i;
3601 }
3602 return PyLong_FromUnsignedLongLong(result);
3603 }
3604 }
3605
3606 /* For larger n use recursive formulas:
3607 *
3608 * P(n, k) = P(n, j) * P(n-j, k-j)
3609 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3610 */
3611 unsigned long long j = k / 2;
3612 PyObject *a, *b;
3613 a = perm_comb_small(n, j, iscomb);
3614 if (a == NULL) {
3615 return NULL;
3616 }
3617 b = perm_comb_small(n - j, k - j, iscomb);
3618 if (b == NULL) {
3619 goto error;
3620 }
3621 Py_SETREF(a, PyNumber_Multiply(a, b));
3622 Py_DECREF(b);
3623 if (iscomb && a != NULL) {
3624 b = perm_comb_small(k, j, 1);
3625 if (b == NULL) {
3626 goto error;
3627 }
3628 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3629 Py_DECREF(b);
3630 }
3631 return a;
3632
3633 error:
3634 Py_DECREF(a);
3635 return NULL;
3636 }
3637
3638 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3639 * It is more efficient than sequential multiplication thanks to
3640 * Karatsuba multiplication.
3641 */
3642 static PyObject *
3643 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3644 {
3645 if (k == 0) {
3646 return PyLong_FromLong(1);
3647 }
3648 if (k == 1) {
3649 return Py_NewRef(n);
3650 }
3651
3652 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3653 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3654 unsigned long long j = k / 2;
3655 PyObject *a, *b;
3656 a = perm_comb(n, j, iscomb);
3657 if (a == NULL) {
3658 return NULL;
3659 }
3660 PyObject *t = PyLong_FromUnsignedLongLong(j);
3661 if (t == NULL) {
3662 goto error;
3663 }
3664 n = PyNumber_Subtract(n, t);
3665 Py_DECREF(t);
3666 if (n == NULL) {
3667 goto error;
3668 }
3669 b = perm_comb(n, k - j, iscomb);
3670 Py_DECREF(n);
3671 if (b == NULL) {
3672 goto error;
3673 }
3674 Py_SETREF(a, PyNumber_Multiply(a, b));
3675 Py_DECREF(b);
3676 if (iscomb && a != NULL) {
3677 b = perm_comb_small(k, j, 1);
3678 if (b == NULL) {
3679 goto error;
3680 }
3681 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3682 Py_DECREF(b);
3683 }
3684 return a;
3685
3686 error:
3687 Py_DECREF(a);
3688 return NULL;
3689 }
3690
3691 /*[clinic input]
3692 math.perm
3693
3694 n: object
3695 k: object = None
3696 /
3697
3698 Number of ways to choose k items from n items without repetition and with order.
3699
3700 Evaluates to n! / (n - k)! when k <= n and evaluates
3701 to zero when k > n.
3702
3703 If k is not specified or is None, then k defaults to n
3704 and the function returns n!.
3705
3706 Raises TypeError if either of the arguments are not integers.
3707 Raises ValueError if either of the arguments are negative.
3708 [clinic start generated code]*/
3709
3710 static PyObject *
3711 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3712 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3713 {
3714 PyObject *result = NULL;
3715 int overflow, cmp;
3716 long long ki, ni;
3717
3718 if (k == Py_None) {
3719 return math_factorial(module, n);
3720 }
3721 n = PyNumber_Index(n);
3722 if (n == NULL) {
3723 return NULL;
3724 }
3725 k = PyNumber_Index(k);
3726 if (k == NULL) {
3727 Py_DECREF(n);
3728 return NULL;
3729 }
3730 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3731
3732 if (Py_SIZE(n) < 0) {
3733 PyErr_SetString(PyExc_ValueError,
3734 "n must be a non-negative integer");
3735 goto error;
3736 }
3737 if (Py_SIZE(k) < 0) {
3738 PyErr_SetString(PyExc_ValueError,
3739 "k must be a non-negative integer");
3740 goto error;
3741 }
3742
3743 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3744 if (cmp != 0) {
3745 if (cmp > 0) {
3746 result = PyLong_FromLong(0);
3747 goto done;
3748 }
3749 goto error;
3750 }
3751
3752 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3753 assert(overflow >= 0 && !PyErr_Occurred());
3754 if (overflow > 0) {
3755 PyErr_Format(PyExc_OverflowError,
3756 "k must not exceed %lld",
3757 LLONG_MAX);
3758 goto error;
3759 }
3760 assert(ki >= 0);
3761
3762 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3763 assert(overflow >= 0 && !PyErr_Occurred());
3764 if (!overflow && ki > 1) {
3765 assert(ni >= 0);
3766 result = perm_comb_small((unsigned long long)ni,
3767 (unsigned long long)ki, 0);
3768 }
3769 else {
3770 result = perm_comb(n, (unsigned long long)ki, 0);
3771 }
3772
3773 done:
3774 Py_DECREF(n);
3775 Py_DECREF(k);
3776 return result;
3777
3778 error:
3779 Py_DECREF(n);
3780 Py_DECREF(k);
3781 return NULL;
3782 }
3783
3784 /*[clinic input]
3785 math.comb
3786
3787 n: object
3788 k: object
3789 /
3790
3791 Number of ways to choose k items from n items without repetition and without order.
3792
3793 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3794 to zero when k > n.
3795
3796 Also called the binomial coefficient because it is equivalent
3797 to the coefficient of k-th term in polynomial expansion of the
3798 expression (1 + x)**n.
3799
3800 Raises TypeError if either of the arguments are not integers.
3801 Raises ValueError if either of the arguments are negative.
3802
3803 [clinic start generated code]*/
3804
3805 static PyObject *
3806 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3807 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3808 {
3809 PyObject *result = NULL, *temp;
3810 int overflow, cmp;
3811 long long ki, ni;
3812
3813 n = PyNumber_Index(n);
3814 if (n == NULL) {
3815 return NULL;
3816 }
3817 k = PyNumber_Index(k);
3818 if (k == NULL) {
3819 Py_DECREF(n);
3820 return NULL;
3821 }
3822 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3823
3824 if (Py_SIZE(n) < 0) {
3825 PyErr_SetString(PyExc_ValueError,
3826 "n must be a non-negative integer");
3827 goto error;
3828 }
3829 if (Py_SIZE(k) < 0) {
3830 PyErr_SetString(PyExc_ValueError,
3831 "k must be a non-negative integer");
3832 goto error;
3833 }
3834
3835 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3836 assert(overflow >= 0 && !PyErr_Occurred());
3837 if (!overflow) {
3838 assert(ni >= 0);
3839 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3840 assert(overflow >= 0 && !PyErr_Occurred());
3841 if (overflow || ki > ni) {
3842 result = PyLong_FromLong(0);
3843 goto done;
3844 }
3845 assert(ki >= 0);
3846
3847 ki = Py_MIN(ki, ni - ki);
3848 if (ki > 1) {
3849 result = perm_comb_small((unsigned long long)ni,
3850 (unsigned long long)ki, 1);
3851 goto done;
3852 }
3853 /* For k == 1 just return the original n in perm_comb(). */
3854 }
3855 else {
3856 /* k = min(k, n - k) */
3857 temp = PyNumber_Subtract(n, k);
3858 if (temp == NULL) {
3859 goto error;
3860 }
3861 if (Py_SIZE(temp) < 0) {
3862 Py_DECREF(temp);
3863 result = PyLong_FromLong(0);
3864 goto done;
3865 }
3866 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3867 if (cmp > 0) {
3868 Py_SETREF(k, temp);
3869 }
3870 else {
3871 Py_DECREF(temp);
3872 if (cmp < 0) {
3873 goto error;
3874 }
3875 }
3876
3877 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3878 assert(overflow >= 0 && !PyErr_Occurred());
3879 if (overflow) {
3880 PyErr_Format(PyExc_OverflowError,
3881 "min(n - k, k) must not exceed %lld",
3882 LLONG_MAX);
3883 goto error;
3884 }
3885 assert(ki >= 0);
3886 }
3887
3888 result = perm_comb(n, (unsigned long long)ki, 1);
3889
3890 done:
3891 Py_DECREF(n);
3892 Py_DECREF(k);
3893 return result;
3894
3895 error:
3896 Py_DECREF(n);
3897 Py_DECREF(k);
3898 return NULL;
3899 }
3900
3901
3902 /*[clinic input]
3903 math.nextafter
3904
3905 x: double
3906 y: double
3907 /
3908
3909 Return the next floating-point value after x towards y.
3910 [clinic start generated code]*/
3911
3912 static PyObject *
3913 math_nextafter_impl(PyObject *module, double x, double y)
3914 /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3915 {
3916 #if defined(_AIX)
3917 if (x == y) {
3918 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3919 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3920 return PyFloat_FromDouble(y);
3921 }
3922 if (Py_IS_NAN(x)) {
3923 return PyFloat_FromDouble(x);
3924 }
3925 if (Py_IS_NAN(y)) {
3926 return PyFloat_FromDouble(y);
3927 }
3928 #endif
3929 return PyFloat_FromDouble(nextafter(x, y));
3930 }
3931
3932
3933 /*[clinic input]
3934 math.ulp -> double
3935
3936 x: double
3937 /
3938
3939 Return the value of the least significant bit of the float x.
3940 [clinic start generated code]*/
3941
3942 static double
3943 math_ulp_impl(PyObject *module, double x)
3944 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3945 {
3946 if (Py_IS_NAN(x)) {
3947 return x;
3948 }
3949 x = fabs(x);
3950 if (Py_IS_INFINITY(x)) {
3951 return x;
3952 }
3953 double inf = m_inf();
3954 double x2 = nextafter(x, inf);
3955 if (Py_IS_INFINITY(x2)) {
3956 /* special case: x is the largest positive representable float */
3957 x2 = nextafter(x, -inf);
3958 return x - x2;
3959 }
3960 return x2 - x;
3961 }
3962
3963 static int
3964 math_exec(PyObject *module)
3965 {
3966
3967 math_module_state *state = get_math_module_state(module);
3968 state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
3969 if (state->str___ceil__ == NULL) {
3970 return -1;
3971 }
3972 state->str___floor__ = PyUnicode_InternFromString("__floor__");
3973 if (state->str___floor__ == NULL) {
3974 return -1;
3975 }
3976 state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
3977 if (state->str___trunc__ == NULL) {
3978 return -1;
3979 }
3980 if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3981 return -1;
3982 }
3983 if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3984 return -1;
3985 }
3986 // 2pi
3987 if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3988 return -1;
3989 }
3990 if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3991 return -1;
3992 }
3993 #if _PY_SHORT_FLOAT_REPR == 1
3994 if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3995 return -1;
3996 }
3997 #endif
3998 return 0;
3999 }
4000
4001 static int
4002 math_clear(PyObject *module)
4003 {
4004 math_module_state *state = get_math_module_state(module);
4005 Py_CLEAR(state->str___ceil__);
4006 Py_CLEAR(state->str___floor__);
4007 Py_CLEAR(state->str___trunc__);
4008 return 0;
4009 }
4010
4011 static void
4012 math_free(void *module)
4013 {
4014 math_clear((PyObject *)module);
4015 }
4016
4017 static PyMethodDef math_methods[] = {
4018 {"acos", math_acos, METH_O, math_acos_doc},
4019 {"acosh", math_acosh, METH_O, math_acosh_doc},
4020 {"asin", math_asin, METH_O, math_asin_doc},
4021 {"asinh", math_asinh, METH_O, math_asinh_doc},
4022 {"atan", math_atan, METH_O, math_atan_doc},
4023 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
4024 {"atanh", math_atanh, METH_O, math_atanh_doc},
4025 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
4026 MATH_CEIL_METHODDEF
4027 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
4028 {"cos", math_cos, METH_O, math_cos_doc},
4029 {"cosh", math_cosh, METH_O, math_cosh_doc},
4030 MATH_DEGREES_METHODDEF
4031 MATH_DIST_METHODDEF
4032 {"erf", math_erf, METH_O, math_erf_doc},
4033 {"erfc", math_erfc, METH_O, math_erfc_doc},
4034 {"exp", math_exp, METH_O, math_exp_doc},
4035 {"exp2", math_exp2, METH_O, math_exp2_doc},
4036 {"expm1", math_expm1, METH_O, math_expm1_doc},
4037 {"fabs", math_fabs, METH_O, math_fabs_doc},
4038 MATH_FACTORIAL_METHODDEF
4039 MATH_FLOOR_METHODDEF
4040 MATH_FMOD_METHODDEF
4041 MATH_FREXP_METHODDEF
4042 MATH_FSUM_METHODDEF
4043 {"gamma", math_gamma, METH_O, math_gamma_doc},
4044 {"gcd", _PyCFunction_CAST(math_gcd), METH_FASTCALL, math_gcd_doc},
4045 {"hypot", _PyCFunction_CAST(math_hypot), METH_FASTCALL, math_hypot_doc},
4046 MATH_ISCLOSE_METHODDEF
4047 MATH_ISFINITE_METHODDEF
4048 MATH_ISINF_METHODDEF
4049 MATH_ISNAN_METHODDEF
4050 MATH_ISQRT_METHODDEF
4051 {"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc},
4052 MATH_LDEXP_METHODDEF
4053 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
4054 MATH_LOG_METHODDEF
4055 {"log1p", math_log1p, METH_O, math_log1p_doc},
4056 MATH_LOG10_METHODDEF
4057 MATH_LOG2_METHODDEF
4058 MATH_MODF_METHODDEF
4059 MATH_POW_METHODDEF
4060 MATH_RADIANS_METHODDEF
4061 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
4062 {"sin", math_sin, METH_O, math_sin_doc},
4063 {"sinh", math_sinh, METH_O, math_sinh_doc},
4064 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
4065 {"tan", math_tan, METH_O, math_tan_doc},
4066 {"tanh", math_tanh, METH_O, math_tanh_doc},
4067 MATH_SUMPROD_METHODDEF
4068 MATH_TRUNC_METHODDEF
4069 MATH_PROD_METHODDEF
4070 MATH_PERM_METHODDEF
4071 MATH_COMB_METHODDEF
4072 MATH_NEXTAFTER_METHODDEF
4073 MATH_ULP_METHODDEF
4074 {NULL, NULL} /* sentinel */
4075 };
4076
4077 static PyModuleDef_Slot math_slots[] = {
4078 {Py_mod_exec, math_exec},
4079 {0, NULL}
4080 };
4081
4082 PyDoc_STRVAR(module_doc,
4083 "This module provides access to the mathematical functions\n"
4084 "defined by the C standard.");
4085
4086 static struct PyModuleDef mathmodule = {
4087 PyModuleDef_HEAD_INIT,
4088 .m_name = "math",
4089 .m_doc = module_doc,
4090 .m_size = sizeof(math_module_state),
4091 .m_methods = math_methods,
4092 .m_slots = math_slots,
4093 .m_clear = math_clear,
4094 .m_free = math_free,
4095 };
4096
4097 PyMODINIT_FUNC
4098 PyInit_math(void)
4099 {
4100 return PyModuleDef_Init(&mathmodule);
4101 }