]> git.ipfire.org Git - thirdparty/Python/cpython.git/blame - Modules/mathmodule.c
Add more comments to hypot() (GH-102817)
[thirdparty/Python/cpython.git] / Modules / mathmodule.c
CommitLineData
85a5fbbd
GR
1/* Math module -- standard C math library functions, pi and e */
2
53876d9c
CH
3/* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
7 exceptions.
8
9These are the "spirit of 754" rules:
10
111. If the mathematical result is a real number, but of magnitude too
12large to approximate by a machine float, overflow is signaled and the
13result is an infinity (with the appropriate sign).
14
152. If the mathematical result is a real number, but of magnitude too
16small to approximate by a machine float, underflow is signaled and the
17result is a zero (with the appropriate sign).
18
193. At a singularity (a value x such that the limit of f(y) as y
20approaches x exists and is an infinity), "divide by zero" is signaled
21and the result is an infinity (with the appropriate sign). This is
22complicated a little by that the left-side and right-side limits may
23not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24from the positive or negative directions. In that specific case, the
25sign of the zero determines the result of 1/0.
26
274. At a point where a function has no defined result in the extended
28reals (i.e., the reals plus an infinity or two), invalid operation is
29signaled and a NaN is returned.
30
31And these are what Python has historically /tried/ to do (but not
32always successfully, as platform libm behavior varies a lot):
33
34For #1, raise OverflowError.
35
36For #2, return a zero (with the appropriate sign if that happens by
37accident ;-)).
38
39For #3 and #4, raise ValueError. It may have made sense to raise
40Python's ZeroDivisionError in #3, but historically that's only been
41raised for division by zero and mod by zero.
42
43*/
44
45/*
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
52 returned.
53 */
54
03e9f5dc
CH
55#ifndef Py_BUILD_CORE_BUILTIN
56# define Py_BUILD_CORE_MODULE 1
57#endif
58
8b43b19e 59#include "Python.h"
794e7d1a 60#include "pycore_bitutils.h" // _Py_bit_length()
d943d191
VS
61#include "pycore_call.h" // _PyObject_CallNoArgs()
62#include "pycore_dtoa.h" // _Py_dg_infinity()
37834136 63#include "pycore_long.h" // _PyLong_GetZero()
23c9febd
DN
64#include "pycore_moduleobject.h" // _PyModule_GetState()
65#include "pycore_object.h" // _PyObject_LookupSpecial()
9bbdde21 66#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
fa26245a
CH
67/* For DBL_EPSILON in _math.h */
68#include <float.h>
69/* For _Py_log1p with workarounds for buggy handling of zeros. */
664b511c 70#include "_math.h"
47b9f83a 71#include <stdbool.h>
85a5fbbd 72
c9ea9335
SS
73#include "clinic/mathmodule.c.h"
74
75/*[clinic input]
76module math
77[clinic start generated code]*/
78/*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79
80
23c9febd
DN
81typedef struct {
82 PyObject *str___ceil__;
83 PyObject *str___floor__;
84 PyObject *str___trunc__;
85} math_module_state;
86
87static inline math_module_state*
88get_math_module_state(PyObject *module)
89{
90 void *state = _PyModule_GetState(module);
91 assert(state != NULL);
92 return (math_module_state *)state;
93}
94
0a22aa05
RH
95/*
96Double and triple length extended precision algorithms from:
97
98 Accurate Sum and Dot Product
99 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
100 https://doi.org/10.1137/030601818
101 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
102
103*/
104
105typedef struct{ double hi; double lo; } DoubleLength;
106
107static DoubleLength
108dl_fast_sum(double a, double b)
109{
110 /* Algorithm 1.1. Compensated summation of two floating point numbers. */
111 assert(fabs(a) >= fabs(b));
112 double x = a + b;
113 double y = (a - x) + b;
114 return (DoubleLength) {x, y};
115}
116
117static DoubleLength
118dl_sum(double a, double b)
119{
120 /* Algorithm 3.1 Error-free transformation of the sum */
121 double x = a + b;
122 double z = x - a;
123 double y = (a - (x - z)) + (b - z);
124 return (DoubleLength) {x, y};
125}
126
127#ifndef UNRELIABLE_FMA
128
129static DoubleLength
130dl_mul(double x, double y)
131{
132 /* Algorithm 3.5. Error-free transformation of a product */
133 double z = x * y;
134 double zz = fma(x, y, -z);
135 return (DoubleLength) {z, zz};
136}
137
138#else
139
140/*
141 The default implementation of dl_mul() depends on the C math library
142 having an accurate fma() function as required by § 7.12.13.1 of the
143 C99 standard.
144
145 The UNRELIABLE_FMA option is provided as a slower but accurate
146 alternative for builds where the fma() function is found wanting.
147 The speed penalty may be modest (17% slower on an Apple M1 Max),
148 so don't hesitate to enable this build option.
149
150 The algorithms are from the T. J. Dekker paper:
151 A Floating-Point Technique for Extending the Available Precision
152 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
153*/
154
155static DoubleLength
156dl_split(double x) {
157 // Dekker (5.5) and (5.6).
158 double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
159 double hi = t - (t - x);
160 double lo = x - hi;
161 return (DoubleLength) {hi, lo};
162}
163
164static DoubleLength
165dl_mul(double x, double y)
166{
167 // Dekker (5.12) and mul12()
168 DoubleLength xx = dl_split(x);
169 DoubleLength yy = dl_split(y);
170 double p = xx.hi * yy.hi;
171 double q = xx.hi * yy.lo + xx.lo * yy.hi;
172 double z = p + q;
173 double zz = p - z + q + xx.lo * yy.lo;
174 return (DoubleLength) {z, zz};
175}
176
177#endif
178
179typedef struct { double hi; double lo; double tiny; } TripleLength;
180
181static const TripleLength tl_zero = {0.0, 0.0, 0.0};
182
183static TripleLength
184tl_fma(double x, double y, TripleLength total)
185{
186 /* Algorithm 5.10 with SumKVert for K=3 */
187 DoubleLength pr = dl_mul(x, y);
188 DoubleLength sm = dl_sum(total.hi, pr.hi);
189 DoubleLength r1 = dl_sum(total.lo, pr.lo);
190 DoubleLength r2 = dl_sum(r1.hi, sm.lo);
191 return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
192}
193
194static double
195tl_to_d(TripleLength total)
196{
197 DoubleLength last = dl_sum(total.lo, total.hi);
198 return total.tiny + last.lo + last.hi;
199}
200
201
12c4bdb0
MD
202/*
203 sin(pi*x), giving accurate results for all finite x (especially x
204 integral or close to an integer). This is here for use in the
205 reflection formula for the gamma function. It conforms to IEEE
206 754-2008 for finite arguments, but not for infinities or nans.
207*/
208
209static const double pi = 3.141592653589793238462643383279502884197;
9c91eb84 210static const double logpi = 1.144729885849400174143427351353058711647;
cfd735ea
RH
211
212/* Version of PyFloat_AsDouble() with in-line fast paths
213 for exact floats and integers. Gives a substantial
214 speed improvement for extracting float arguments.
215*/
216
217#define ASSIGN_DOUBLE(target_var, obj, error_label) \
218 if (PyFloat_CheckExact(obj)) { \
219 target_var = PyFloat_AS_DOUBLE(obj); \
220 } \
221 else if (PyLong_CheckExact(obj)) { \
222 target_var = PyLong_AsDouble(obj); \
223 if (target_var == -1.0 && PyErr_Occurred()) { \
224 goto error_label; \
225 } \
226 } \
227 else { \
228 target_var = PyFloat_AsDouble(obj); \
229 if (target_var == -1.0 && PyErr_Occurred()) { \
230 goto error_label; \
231 } \
232 }
233
12c4bdb0 234static double
f57cd828 235m_sinpi(double x)
8832b621 236{
f95a1b3c
AP
237 double y, r;
238 int n;
239 /* this function should only ever be called for finite arguments */
240 assert(Py_IS_FINITE(x));
241 y = fmod(fabs(x), 2.0);
242 n = (int)round(2.0*y);
243 assert(0 <= n && n <= 4);
244 switch (n) {
245 case 0:
246 r = sin(pi*y);
247 break;
248 case 1:
249 r = cos(pi*(y-0.5));
250 break;
251 case 2:
252 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
253 -0.0 instead of 0.0 when y == 1.0. */
254 r = sin(pi*(1.0-y));
255 break;
256 case 3:
257 r = -cos(pi*(y-1.5));
258 break;
259 case 4:
260 r = sin(pi*(y-2.0));
261 break;
262 default:
b2e57948 263 Py_UNREACHABLE();
f95a1b3c
AP
264 }
265 return copysign(1.0, x)*r;
12c4bdb0 266}
a40c793d 267
58395759
SK
268/* Implementation of the real gamma function. Kept here to work around
269 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
270 on various platforms (Windows, MacOS). In extensive but non-exhaustive
12c4bdb0
MD
271 random tests, this function proved accurate to within <= 10 ulps across the
272 entire float domain. Note that accuracy may depend on the quality of the
273 system math functions, the pow function in particular. Special cases
274 follow C99 annex F. The parameters and method are tailored to platforms
275 whose double format is the IEEE 754 binary64 format.
276
277 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
278 and g=6.024680040776729583740234375; these parameters are amongst those
279 used by the Boost library. Following Boost (again), we re-express the
280 Lanczos sum as a rational function, and compute it that way. The
281 coefficients below were computed independently using MPFR, and have been
282 double-checked against the coefficients in the Boost source code.
283
284 For x < 0.0 we use the reflection formula.
285
286 There's one minor tweak that deserves explanation: Lanczos' formula for
287 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
288 values, x+g-0.5 can be represented exactly. However, in cases where it
289 can't be represented exactly the small error in x+g-0.5 can be magnified
290 significantly by the pow and exp calls, especially for large x. A cheap
291 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
292 involved in the computation of x+g-0.5 (that is, e = computed value of
293 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
294
295 Correction factor
296 -----------------
297 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
298 double, and e is tiny. Then:
299
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
301 = pow(y, x-0.5)/exp(y) * C,
302
303 where the correction_factor C is given by
304
305 C = pow(1-e/y, x-0.5) * exp(e)
306
307 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
308
309 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
310
311 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
312
313 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
314
315 Note that for accuracy, when computing r*C it's better to do
316
317 r + e*g/y*r;
318
319 than
320
321 r * (1 + e*g/y);
322
323 since the addition in the latter throws away most of the bits of
324 information in e*g/y.
325*/
326
327#define LANCZOS_N 13
328static const double lanczos_g = 6.024680040776729583740234375;
329static const double lanczos_g_minus_half = 5.524680040776729583740234375;
330static const double lanczos_num_coeffs[LANCZOS_N] = {
f95a1b3c
AP
331 23531376880.410759688572007674451636754734846804940,
332 42919803642.649098768957899047001988850926355848959,
333 35711959237.355668049440185451547166705960488635843,
334 17921034426.037209699919755754458931112671403265390,
335 6039542586.3520280050642916443072979210699388420708,
336 1439720407.3117216736632230727949123939715485786772,
337 248874557.86205415651146038641322942321632125127801,
338 31426415.585400194380614231628318205362874684987640,
339 2876370.6289353724412254090516208496135991145378768,
340 186056.26539522349504029498971604569928220784236328,
341 8071.6720023658162106380029022722506138218516325024,
342 210.82427775157934587250973392071336271166969580291,
343 2.5066282746310002701649081771338373386264310793408
12c4bdb0
MD
344};
345
346/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
347static const double lanczos_den_coeffs[LANCZOS_N] = {
f95a1b3c
AP
348 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
349 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
12c4bdb0
MD
350
351/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
352#define NGAMMA_INTEGRAL 23
353static const double gamma_integral[NGAMMA_INTEGRAL] = {
f95a1b3c
AP
354 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
355 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
356 1307674368000.0, 20922789888000.0, 355687428096000.0,
357 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
358 51090942171709440000.0, 1124000727777607680000.0,
12c4bdb0
MD
359};
360
361/* Lanczos' sum L_g(x), for positive x */
362
363static double
364lanczos_sum(double x)
365{
f95a1b3c
AP
366 double num = 0.0, den = 0.0;
367 int i;
368 assert(x > 0.0);
369 /* evaluate the rational function lanczos_sum(x). For large
370 x, the obvious algorithm risks overflow, so we instead
371 rescale the denominator and numerator of the rational
372 function by x**(1-LANCZOS_N) and treat this as a
373 rational function in 1/x. This also reduces the error for
374 larger x values. The choice of cutoff point (5.0 below) is
375 somewhat arbitrary; in tests, smaller cutoff values than
376 this resulted in lower accuracy. */
377 if (x < 5.0) {
378 for (i = LANCZOS_N; --i >= 0; ) {
379 num = num * x + lanczos_num_coeffs[i];
380 den = den * x + lanczos_den_coeffs[i];
381 }
382 }
383 else {
384 for (i = 0; i < LANCZOS_N; i++) {
385 num = num / x + lanczos_num_coeffs[i];
386 den = den / x + lanczos_den_coeffs[i];
387 }
388 }
389 return num/den;
12c4bdb0
MD
390}
391
a5d0c7c2
MD
392/* Constant for +infinity, generated in the same way as float('inf'). */
393
394static double
395m_inf(void)
396{
9bbdde21 397#if _PY_SHORT_FLOAT_REPR == 1
a5d0c7c2
MD
398 return _Py_dg_infinity(0);
399#else
400 return Py_HUGE_VAL;
401#endif
402}
403
404/* Constant nan value, generated in the same way as float('nan'). */
405/* We don't currently assume that Py_NAN is defined everywhere. */
406
1b2611eb 407#if _PY_SHORT_FLOAT_REPR == 1
a5d0c7c2
MD
408
409static double
410m_nan(void)
411{
9bbdde21 412#if _PY_SHORT_FLOAT_REPR == 1
a5d0c7c2
MD
413 return _Py_dg_stdnan(0);
414#else
415 return Py_NAN;
416#endif
417}
418
419#endif
420
12c4bdb0
MD
421static double
422m_tgamma(double x)
423{
f95a1b3c
AP
424 double absx, r, y, z, sqrtpow;
425
426 /* special cases */
427 if (!Py_IS_FINITE(x)) {
428 if (Py_IS_NAN(x) || x > 0.0)
429 return x; /* tgamma(nan) = nan, tgamma(inf) = inf */
430 else {
431 errno = EDOM;
432 return Py_NAN; /* tgamma(-inf) = nan, invalid */
433 }
434 }
435 if (x == 0.0) {
436 errno = EDOM;
50203a69
MD
437 /* tgamma(+-0.0) = +-inf, divide-by-zero */
438 return copysign(Py_HUGE_VAL, x);
f95a1b3c
AP
439 }
440
441 /* integer arguments */
442 if (x == floor(x)) {
443 if (x < 0.0) {
444 errno = EDOM; /* tgamma(n) = nan, invalid for */
445 return Py_NAN; /* negative integers n */
446 }
447 if (x <= NGAMMA_INTEGRAL)
448 return gamma_integral[(int)x - 1];
449 }
450 absx = fabs(x);
451
452 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
453 if (absx < 1e-20) {
454 r = 1.0/x;
455 if (Py_IS_INFINITY(r))
456 errno = ERANGE;
457 return r;
458 }
459
460 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
461 x > 200, and underflows to +-0.0 for x < -200, not a negative
462 integer. */
463 if (absx > 200.0) {
464 if (x < 0.0) {
f57cd828 465 return 0.0/m_sinpi(x);
f95a1b3c
AP
466 }
467 else {
468 errno = ERANGE;
469 return Py_HUGE_VAL;
470 }
471 }
472
473 y = absx + lanczos_g_minus_half;
474 /* compute error in sum */
475 if (absx > lanczos_g_minus_half) {
476 /* note: the correction can be foiled by an optimizing
477 compiler that (incorrectly) thinks that an expression like
478 a + b - a - b can be optimized to 0.0. This shouldn't
479 happen in a standards-conforming compiler. */
480 double q = y - absx;
481 z = q - lanczos_g_minus_half;
482 }
483 else {
484 double q = y - lanczos_g_minus_half;
485 z = q - absx;
486 }
487 z = z * lanczos_g / y;
488 if (x < 0.0) {
f57cd828 489 r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
f95a1b3c
AP
490 r -= z * r;
491 if (absx < 140.0) {
492 r /= pow(y, absx - 0.5);
493 }
494 else {
495 sqrtpow = pow(y, absx / 2.0 - 0.25);
496 r /= sqrtpow;
497 r /= sqrtpow;
498 }
499 }
500 else {
501 r = lanczos_sum(absx) / exp(y);
502 r += z * r;
503 if (absx < 140.0) {
504 r *= pow(y, absx - 0.5);
505 }
506 else {
507 sqrtpow = pow(y, absx / 2.0 - 0.25);
508 r *= sqrtpow;
509 r *= sqrtpow;
510 }
511 }
512 if (Py_IS_INFINITY(r))
513 errno = ERANGE;
514 return r;
8832b621
GR
515}
516
05d2e084
MD
517/*
518 lgamma: natural log of the absolute value of the Gamma function.
519 For large arguments, Lanczos' formula works extremely well here.
520*/
521
522static double
523m_lgamma(double x)
524{
97553fdf 525 double r;
97553fdf 526 double absx;
f95a1b3c
AP
527
528 /* special cases */
529 if (!Py_IS_FINITE(x)) {
530 if (Py_IS_NAN(x))
531 return x; /* lgamma(nan) = nan */
532 else
533 return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
534 }
535
536 /* integer arguments */
537 if (x == floor(x) && x <= 2.0) {
538 if (x <= 0.0) {
539 errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
540 return Py_HUGE_VAL; /* integers n <= 0 */
541 }
542 else {
543 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
544 }
545 }
546
547 absx = fabs(x);
548 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
549 if (absx < 1e-20)
550 return -log(absx);
551
9c91eb84
MD
552 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
553 having a second set of numerator coefficients for lanczos_sum that
554 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
555 subtraction below; it's probably not worth it. */
556 r = log(lanczos_sum(absx)) - lanczos_g;
557 r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
558 if (x < 0.0)
559 /* Use reflection formula to get value for negative x. */
f57cd828 560 r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
f95a1b3c
AP
561 if (Py_IS_INFINITY(r))
562 errno = ERANGE;
563 return r;
05d2e084
MD
564}
565
e57950fb
CH
566/*
567 wrapper for atan2 that deals directly with special cases before
568 delegating to the platform libm for the remaining cases. This
569 is necessary to get consistent behaviour across platforms.
570 Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
571 always follow C99.
572*/
573
574static double
575m_atan2(double y, double x)
576{
f95a1b3c
AP
577 if (Py_IS_NAN(x) || Py_IS_NAN(y))
578 return Py_NAN;
579 if (Py_IS_INFINITY(y)) {
580 if (Py_IS_INFINITY(x)) {
581 if (copysign(1., x) == 1.)
582 /* atan2(+-inf, +inf) == +-pi/4 */
583 return copysign(0.25*Py_MATH_PI, y);
584 else
585 /* atan2(+-inf, -inf) == +-pi*3/4 */
586 return copysign(0.75*Py_MATH_PI, y);
587 }
588 /* atan2(+-inf, x) == +-pi/2 for finite x */
589 return copysign(0.5*Py_MATH_PI, y);
590 }
591 if (Py_IS_INFINITY(x) || y == 0.) {
592 if (copysign(1., x) == 1.)
593 /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
594 return copysign(0., y);
595 else
596 /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
597 return copysign(Py_MATH_PI, y);
598 }
599 return atan2(y, x);
e57950fb
CH
600}
601
a0ce375e
MD
602
603/* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
604 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
605 binary floating-point format, the result is always exact. */
606
607static double
608m_remainder(double x, double y)
609{
610 /* Deal with most common case first. */
611 if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
612 double absx, absy, c, m, r;
613
614 if (y == 0.0) {
615 return Py_NAN;
616 }
617
618 absx = fabs(x);
619 absy = fabs(y);
620 m = fmod(absx, absy);
621
622 /*
623 Warning: some subtlety here. What we *want* to know at this point is
624 whether the remainder m is less than, equal to, or greater than half
625 of absy. However, we can't do that comparison directly because we
01484703 626 can't be sure that 0.5*absy is representable (the multiplication
a0ce375e
MD
627 might incur precision loss due to underflow). So instead we compare
628 m with the complement c = absy - m: m < 0.5*absy if and only if m <
629 c, and so on. The catch is that absy - m might also not be
630 representable, but it turns out that it doesn't matter:
631
632 - if m > 0.5*absy then absy - m is exactly representable, by
633 Sterbenz's lemma, so m > c
634 - if m == 0.5*absy then again absy - m is exactly representable
635 and m == c
636 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
637 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
638 c, or (ii) absy is tiny, either subnormal or in the lowest normal
639 binade. Then absy - m is exactly representable and again m < c.
640 */
641
642 c = absy - m;
643 if (m < c) {
644 r = m;
645 }
646 else if (m > c) {
647 r = -c;
648 }
649 else {
650 /*
651 Here absx is exactly halfway between two multiples of absy,
652 and we need to choose the even multiple. x now has the form
653
654 absx = n * absy + m
655
656 for some integer n (recalling that m = 0.5*absy at this point).
657 If n is even we want to return m; if n is odd, we need to
658 return -m.
659
660 So
661
662 0.5 * (absx - m) = (n/2) * absy
663
664 and now reducing modulo absy gives us:
665
666 | m, if n is odd
667 fmod(0.5 * (absx - m), absy) = |
668 | 0, if n is even
669
670 Now m - 2.0 * fmod(...) gives the desired result: m
671 if n is even, -m if m is odd.
672
673 Note that all steps in fmod(0.5 * (absx - m), absy)
674 will be computed exactly, with no rounding error
675 introduced.
676 */
677 assert(m == c);
678 r = m - 2.0 * fmod(0.5 * (absx - m), absy);
679 }
680 return copysign(1.0, x) * r;
681 }
682
683 /* Special values. */
684 if (Py_IS_NAN(x)) {
685 return x;
686 }
687 if (Py_IS_NAN(y)) {
688 return y;
689 }
690 if (Py_IS_INFINITY(x)) {
691 return Py_NAN;
692 }
693 assert(Py_IS_INFINITY(y));
694 return x;
695}
696
697
e675f08e
MD
698/*
699 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
700 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
701 special values directly, passing positive non-special values through to
702 the system log/log10.
703 */
704
705static double
706m_log(double x)
707{
f95a1b3c
AP
708 if (Py_IS_FINITE(x)) {
709 if (x > 0.0)
710 return log(x);
711 errno = EDOM;
712 if (x == 0.0)
713 return -Py_HUGE_VAL; /* log(0) = -inf */
714 else
715 return Py_NAN; /* log(-ve) = nan */
716 }
717 else if (Py_IS_NAN(x))
718 return x; /* log(nan) = nan */
719 else if (x > 0.0)
720 return x; /* log(inf) = inf */
721 else {
722 errno = EDOM;
723 return Py_NAN; /* log(-inf) = nan */
724 }
e675f08e
MD
725}
726
fa0e3d52
VS
727/*
728 log2: log to base 2.
729
730 Uses an algorithm that should:
83b8c0be 731
fa0e3d52 732 (a) produce exact results for powers of 2, and
83b8c0be
MD
733 (b) give a monotonic log2 (for positive finite floats),
734 assuming that the system log is monotonic.
fa0e3d52
VS
735*/
736
737static double
738m_log2(double x)
739{
740 if (!Py_IS_FINITE(x)) {
741 if (Py_IS_NAN(x))
742 return x; /* log2(nan) = nan */
743 else if (x > 0.0)
744 return x; /* log2(+inf) = +inf */
745 else {
746 errno = EDOM;
747 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
748 }
749 }
750
751 if (x > 0.0) {
8f9f8d61 752 return log2(x);
fa0e3d52
VS
753 }
754 else if (x == 0.0) {
755 errno = EDOM;
756 return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
757 }
758 else {
759 errno = EDOM;
23442584 760 return Py_NAN; /* log2(-inf) = nan, invalid-operation */
fa0e3d52
VS
761 }
762}
763
e675f08e
MD
764static double
765m_log10(double x)
766{
f95a1b3c
AP
767 if (Py_IS_FINITE(x)) {
768 if (x > 0.0)
769 return log10(x);
770 errno = EDOM;
771 if (x == 0.0)
772 return -Py_HUGE_VAL; /* log10(0) = -inf */
773 else
774 return Py_NAN; /* log10(-ve) = nan */
775 }
776 else if (Py_IS_NAN(x))
777 return x; /* log10(nan) = nan */
778 else if (x > 0.0)
779 return x; /* log10(inf) = inf */
780 else {
781 errno = EDOM;
782 return Py_NAN; /* log10(-inf) = nan */
783 }
e675f08e
MD
784}
785
786
559e7f16
SS
787static PyObject *
788math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
789{
790 PyObject *res, *x;
791 Py_ssize_t i;
c9ea9335 792
559e7f16
SS
793 if (nargs == 0) {
794 return PyLong_FromLong(0);
795 }
796 res = PyNumber_Index(args[0]);
797 if (res == NULL) {
798 return NULL;
799 }
800 if (nargs == 1) {
801 Py_SETREF(res, PyNumber_Absolute(res));
802 return res;
803 }
3e7ee023
VS
804
805 PyObject *one = _PyLong_GetOne(); // borrowed ref
559e7f16 806 for (i = 1; i < nargs; i++) {
5f4b229d 807 x = _PyNumber_Index(args[i]);
559e7f16
SS
808 if (x == NULL) {
809 Py_DECREF(res);
810 return NULL;
811 }
3e7ee023 812 if (res == one) {
559e7f16
SS
813 /* Fast path: just check arguments.
814 It is okay to use identity comparison here. */
815 Py_DECREF(x);
816 continue;
817 }
818 Py_SETREF(res, _PyLong_GCD(res, x));
819 Py_DECREF(x);
820 if (res == NULL) {
821 return NULL;
822 }
823 }
824 return res;
825}
826
827PyDoc_STRVAR(math_gcd_doc,
828"gcd($module, *integers)\n"
829"--\n"
830"\n"
831"Greatest Common Divisor.");
c9ea9335 832
c9ea9335 833
48e47aaa 834static PyObject *
559e7f16 835long_lcm(PyObject *a, PyObject *b)
48e47aaa 836{
559e7f16 837 PyObject *g, *m, *f, *ab;
48e47aaa 838
559e7f16
SS
839 if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
840 return PyLong_FromLong(0);
841 }
842 g = _PyLong_GCD(a, b);
843 if (g == NULL) {
48e47aaa 844 return NULL;
559e7f16
SS
845 }
846 f = PyNumber_FloorDivide(a, g);
847 Py_DECREF(g);
848 if (f == NULL) {
48e47aaa
SS
849 return NULL;
850 }
559e7f16
SS
851 m = PyNumber_Multiply(f, b);
852 Py_DECREF(f);
853 if (m == NULL) {
854 return NULL;
855 }
856 ab = PyNumber_Absolute(m);
857 Py_DECREF(m);
858 return ab;
48e47aaa
SS
859}
860
48e47aaa 861
559e7f16
SS
862static PyObject *
863math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
864{
865 PyObject *res, *x;
866 Py_ssize_t i;
867
868 if (nargs == 0) {
869 return PyLong_FromLong(1);
870 }
871 res = PyNumber_Index(args[0]);
872 if (res == NULL) {
873 return NULL;
874 }
875 if (nargs == 1) {
876 Py_SETREF(res, PyNumber_Absolute(res));
877 return res;
878 }
3e7ee023
VS
879
880 PyObject *zero = _PyLong_GetZero(); // borrowed ref
559e7f16
SS
881 for (i = 1; i < nargs; i++) {
882 x = PyNumber_Index(args[i]);
883 if (x == NULL) {
884 Py_DECREF(res);
885 return NULL;
886 }
3e7ee023 887 if (res == zero) {
559e7f16
SS
888 /* Fast path: just check arguments.
889 It is okay to use identity comparison here. */
890 Py_DECREF(x);
891 continue;
892 }
893 Py_SETREF(res, long_lcm(res, x));
894 Py_DECREF(x);
895 if (res == NULL) {
896 return NULL;
897 }
898 }
899 return res;
900}
901
902
903PyDoc_STRVAR(math_lcm_doc,
904"lcm($module, *integers)\n"
905"--\n"
906"\n"
907"Least Common Multiple.");
908
909
12c4bdb0
MD
910/* Call is_error when errno != 0, and where x is the result libm
911 * returned. is_error will usually set up an exception and return
912 * true (1), but may return false (0) without setting up an exception.
913 */
914static int
915is_error(double x)
916{
f95a1b3c
AP
917 int result = 1; /* presumption of guilt */
918 assert(errno); /* non-zero errno is a precondition for calling */
919 if (errno == EDOM)
920 PyErr_SetString(PyExc_ValueError, "math domain error");
921
922 else if (errno == ERANGE) {
923 /* ANSI C generally requires libm functions to set ERANGE
924 * on overflow, but also generally *allows* them to set
925 * ERANGE on underflow too. There's no consistency about
926 * the latter across platforms.
927 * Alas, C99 never requires that errno be set.
928 * Here we suppress the underflow errors (libm functions
929 * should return a zero on underflow, and +- HUGE_VAL on
930 * overflow, so testing the result for zero suffices to
931 * distinguish the cases).
932 *
933 * On some platforms (Ubuntu/ia64) it seems that errno can be
934 * set to ERANGE for subnormal results that do *not* underflow
935 * to zero. So to be safe, we'll ignore ERANGE whenever the
3363e1cb
SD
936 * function result is less than 1.5 in absolute value.
937 *
938 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
939 * are correctly detected, since the function may underflow
940 * toward -1.0 rather than 0.0.
f95a1b3c 941 */
3363e1cb 942 if (fabs(x) < 1.5)
f95a1b3c
AP
943 result = 0;
944 else
945 PyErr_SetString(PyExc_OverflowError,
946 "math range error");
947 }
948 else
949 /* Unexpected math error */
950 PyErr_SetFromErrno(PyExc_ValueError);
951 return result;
12c4bdb0
MD
952}
953
53876d9c
CH
954/*
955 math_1 is used to wrap a libm function f that takes a double
c9ea9335 956 argument and returns a double.
53876d9c
CH
957
958 The error reporting follows these rules, which are designed to do
959 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
960 platforms.
961
962 - a NaN result from non-NaN inputs causes ValueError to be raised
963 - an infinite result from finite inputs causes OverflowError to be
964 raised if can_overflow is 1, or raises ValueError if can_overflow
965 is 0.
966 - if the result is finite and errno == EDOM then ValueError is
967 raised
968 - if the result is finite and nonzero and errno == ERANGE then
969 OverflowError is raised
970
971 The last rule is used to catch overflow on platforms which follow
972 C89 but for which HUGE_VAL is not an infinity.
973
974 For the majority of one-argument functions these rules are enough
975 to ensure that Python's functions behave as specified in 'Annex F'
976 of the C99 standard, with the 'invalid' and 'divide-by-zero'
977 floating-point exceptions mapping to Python's ValueError and the
978 'overflow' floating-point exception mapping to OverflowError.
979 math_1 only works for functions that don't have singularities *and*
980 the possibility of overflow; fortunately, that covers everything we
981 care about right now.
982*/
983
8b43b19e 984static PyObject *
45fa12ae 985math_1(PyObject *arg, double (*func) (double), int can_overflow)
85a5fbbd 986{
f95a1b3c
AP
987 double x, r;
988 x = PyFloat_AsDouble(arg);
989 if (x == -1.0 && PyErr_Occurred())
990 return NULL;
991 errno = 0;
f95a1b3c 992 r = (*func)(x);
f95a1b3c
AP
993 if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
994 PyErr_SetString(PyExc_ValueError,
995 "math domain error"); /* invalid arg */
996 return NULL;
997 }
998 if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
2354a759
BP
999 if (can_overflow)
1000 PyErr_SetString(PyExc_OverflowError,
1001 "math range error"); /* overflow */
1002 else
1003 PyErr_SetString(PyExc_ValueError,
1004 "math domain error"); /* singularity */
1005 return NULL;
f95a1b3c
AP
1006 }
1007 if (Py_IS_FINITE(r) && errno && is_error(r))
1008 /* this branch unnecessary on most platforms */
1009 return NULL;
1010
45fa12ae 1011 return PyFloat_FromDouble(r);
c2155835
JY
1012}
1013
12c4bdb0
MD
1014/* variant of math_1, to be used when the function being wrapped is known to
1015 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1016 errno = ERANGE for overflow). */
1017
1018static PyObject *
1019math_1a(PyObject *arg, double (*func) (double))
1020{
f95a1b3c
AP
1021 double x, r;
1022 x = PyFloat_AsDouble(arg);
1023 if (x == -1.0 && PyErr_Occurred())
1024 return NULL;
1025 errno = 0;
f95a1b3c 1026 r = (*func)(x);
f95a1b3c
AP
1027 if (errno && is_error(r))
1028 return NULL;
1029 return PyFloat_FromDouble(r);
12c4bdb0
MD
1030}
1031
53876d9c
CH
1032/*
1033 math_2 is used to wrap a libm function f that takes two double
1034 arguments and returns a double.
1035
1036 The error reporting follows these rules, which are designed to do
1037 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1038 platforms.
1039
1040 - a NaN result from non-NaN inputs causes ValueError to be raised
1041 - an infinite result from finite inputs causes OverflowError to be
1042 raised.
1043 - if the result is finite and errno == EDOM then ValueError is
1044 raised
1045 - if the result is finite and nonzero and errno == ERANGE then
1046 OverflowError is raised
1047
1048 The last rule is used to catch overflow on platforms which follow
1049 C89 but for which HUGE_VAL is not an infinity.
1050
1051 For most two-argument functions (copysign, fmod, hypot, atan2)
1052 these rules are enough to ensure that Python's functions behave as
1053 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1054 'divide-by-zero' floating-point exceptions mapping to Python's
1055 ValueError and the 'overflow' floating-point exception mapping to
1056 OverflowError.
1057*/
1058
8b43b19e 1059static PyObject *
d0d3e991
SS
1060math_2(PyObject *const *args, Py_ssize_t nargs,
1061 double (*func) (double, double), const char *funcname)
85a5fbbd 1062{
f95a1b3c 1063 double x, y, r;
d0d3e991 1064 if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
f95a1b3c 1065 return NULL;
d0d3e991 1066 x = PyFloat_AsDouble(args[0]);
5208b4b3
ZS
1067 if (x == -1.0 && PyErr_Occurred()) {
1068 return NULL;
1069 }
d0d3e991 1070 y = PyFloat_AsDouble(args[1]);
5208b4b3 1071 if (y == -1.0 && PyErr_Occurred()) {
f95a1b3c 1072 return NULL;
5208b4b3 1073 }
f95a1b3c 1074 errno = 0;
f95a1b3c 1075 r = (*func)(x, y);
f95a1b3c
AP
1076 if (Py_IS_NAN(r)) {
1077 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1078 errno = EDOM;
1079 else
1080 errno = 0;
1081 }
1082 else if (Py_IS_INFINITY(r)) {
1083 if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1084 errno = ERANGE;
1085 else
1086 errno = 0;
1087 }
1088 if (errno && is_error(r))
1089 return NULL;
1090 else
1091 return PyFloat_FromDouble(r);
85a5fbbd
GR
1092}
1093
f95a1b3c
AP
1094#define FUNC1(funcname, func, can_overflow, docstring) \
1095 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1096 return math_1(args, func, can_overflow); \
1097 }\
1098 PyDoc_STRVAR(math_##funcname##_doc, docstring);
85a5fbbd 1099
f95a1b3c
AP
1100#define FUNC1A(funcname, func, docstring) \
1101 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1102 return math_1a(args, func); \
1103 }\
1104 PyDoc_STRVAR(math_##funcname##_doc, docstring);
12c4bdb0 1105
40c48685 1106#define FUNC2(funcname, func, docstring) \
d0d3e991
SS
1107 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1108 return math_2(args, nargs, func, #funcname); \
f95a1b3c
AP
1109 }\
1110 PyDoc_STRVAR(math_##funcname##_doc, docstring);
c6e22902 1111
53876d9c 1112FUNC1(acos, acos, 0,
c9ea9335 1113 "acos($module, x, /)\n--\n\n"
dc3f99fa
GC
1114 "Return the arc cosine (measured in radians) of x.\n\n"
1115 "The result is between 0 and pi.")
fa26245a 1116FUNC1(acosh, acosh, 0,
c9ea9335
SS
1117 "acosh($module, x, /)\n--\n\n"
1118 "Return the inverse hyperbolic cosine of x.")
53876d9c 1119FUNC1(asin, asin, 0,
c9ea9335 1120 "asin($module, x, /)\n--\n\n"
dc3f99fa
GC
1121 "Return the arc sine (measured in radians) of x.\n\n"
1122 "The result is between -pi/2 and pi/2.")
fa26245a 1123FUNC1(asinh, asinh, 0,
c9ea9335
SS
1124 "asinh($module, x, /)\n--\n\n"
1125 "Return the inverse hyperbolic sine of x.")
53876d9c 1126FUNC1(atan, atan, 0,
c9ea9335 1127 "atan($module, x, /)\n--\n\n"
dc3f99fa
GC
1128 "Return the arc tangent (measured in radians) of x.\n\n"
1129 "The result is between -pi/2 and pi/2.")
e57950fb 1130FUNC2(atan2, m_atan2,
c9ea9335
SS
1131 "atan2($module, y, x, /)\n--\n\n"
1132 "Return the arc tangent (measured in radians) of y/x.\n\n"
fe71f813 1133 "Unlike atan(y/x), the signs of both x and y are considered.")
fa26245a 1134FUNC1(atanh, atanh, 0,
c9ea9335
SS
1135 "atanh($module, x, /)\n--\n\n"
1136 "Return the inverse hyperbolic tangent of x.")
ac867f10
AR
1137FUNC1(cbrt, cbrt, 0,
1138 "cbrt($module, x, /)\n--\n\n"
1139 "Return the cube root of x.")
c9ea9335
SS
1140
1141/*[clinic input]
1142math.ceil
1143
1144 x as number: object
1145 /
1146
1147Return the ceiling of x as an Integral.
1148
1149This is the smallest integer >= x.
1150[clinic start generated code]*/
13e05de9 1151
c9ea9335
SS
1152static PyObject *
1153math_ceil(PyObject *module, PyObject *number)
1154/*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1155{
f95a1b3c 1156
5fd5cb8d 1157 if (!PyFloat_CheckExact(number)) {
23c9febd
DN
1158 math_module_state *state = get_math_module_state(module);
1159 PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
5fd5cb8d 1160 if (method != NULL) {
ce3489cf 1161 PyObject *result = _PyObject_CallNoArgs(method);
5fd5cb8d
SS
1162 Py_DECREF(method);
1163 return result;
1164 }
f751bc9c 1165 if (PyErr_Occurred())
f95a1b3c 1166 return NULL;
f751bc9c 1167 }
5fd5cb8d
SS
1168 double x = PyFloat_AsDouble(number);
1169 if (x == -1.0 && PyErr_Occurred())
1170 return NULL;
1171
1172 return PyLong_FromDouble(ceil(x));
13e05de9
GR
1173}
1174
53876d9c 1175FUNC2(copysign, copysign,
c9ea9335
SS
1176 "copysign($module, x, y, /)\n--\n\n"
1177 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1178 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1179 "returns -1.0.\n")
53876d9c 1180FUNC1(cos, cos, 0,
c9ea9335
SS
1181 "cos($module, x, /)\n--\n\n"
1182 "Return the cosine of x (measured in radians).")
53876d9c 1183FUNC1(cosh, cosh, 1,
c9ea9335
SS
1184 "cosh($module, x, /)\n--\n\n"
1185 "Return the hyperbolic cosine of x.")
58395759 1186FUNC1A(erf, erf,
c9ea9335
SS
1187 "erf($module, x, /)\n--\n\n"
1188 "Error function at x.")
58395759 1189FUNC1A(erfc, erfc,
c9ea9335
SS
1190 "erfc($module, x, /)\n--\n\n"
1191 "Complementary error function at x.")
53876d9c 1192FUNC1(exp, exp, 1,
c9ea9335
SS
1193 "exp($module, x, /)\n--\n\n"
1194 "Return e raised to the power of x.")
6266e4af
G
1195FUNC1(exp2, exp2, 1,
1196 "exp2($module, x, /)\n--\n\n"
1197 "Return 2 raised to the power of x.")
fa26245a 1198FUNC1(expm1, expm1, 1,
c9ea9335
SS
1199 "expm1($module, x, /)\n--\n\n"
1200 "Return exp(x)-1.\n\n"
664b511c
MD
1201 "This function avoids the loss of precision involved in the direct "
1202 "evaluation of exp(x)-1 for small x.")
53876d9c 1203FUNC1(fabs, fabs, 0,
c9ea9335
SS
1204 "fabs($module, x, /)\n--\n\n"
1205 "Return the absolute value of the float x.")
1206
1207/*[clinic input]
1208math.floor
13e05de9 1209
c9ea9335
SS
1210 x as number: object
1211 /
1212
1213Return the floor of x as an Integral.
1214
1215This is the largest integer <= x.
1216[clinic start generated code]*/
1217
1218static PyObject *
1219math_floor(PyObject *module, PyObject *number)
1220/*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1221{
930f4518
RH
1222 double x;
1223
930f4518
RH
1224 if (PyFloat_CheckExact(number)) {
1225 x = PyFloat_AS_DOUBLE(number);
1226 }
1227 else
1228 {
23c9febd
DN
1229 math_module_state *state = get_math_module_state(module);
1230 PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
5fd5cb8d 1231 if (method != NULL) {
ce3489cf 1232 PyObject *result = _PyObject_CallNoArgs(method);
5fd5cb8d
SS
1233 Py_DECREF(method);
1234 return result;
1235 }
8bb9cde6 1236 if (PyErr_Occurred())
f95a1b3c 1237 return NULL;
930f4518
RH
1238 x = PyFloat_AsDouble(number);
1239 if (x == -1.0 && PyErr_Occurred())
1240 return NULL;
8bb9cde6 1241 }
5fd5cb8d 1242 return PyLong_FromDouble(floor(x));
13e05de9
GR
1243}
1244
12c4bdb0 1245FUNC1A(gamma, m_tgamma,
c9ea9335
SS
1246 "gamma($module, x, /)\n--\n\n"
1247 "Gamma function at x.")
05d2e084 1248FUNC1A(lgamma, m_lgamma,
c9ea9335
SS
1249 "lgamma($module, x, /)\n--\n\n"
1250 "Natural logarithm of absolute value of Gamma function at x.")
be64d951 1251FUNC1(log1p, m_log1p, 0,
c9ea9335
SS
1252 "log1p($module, x, /)\n--\n\n"
1253 "Return the natural logarithm of 1+x (base e).\n\n"
a0dfa82e 1254 "The result is computed in a way which is accurate for x near zero.")
a0ce375e
MD
1255FUNC2(remainder, m_remainder,
1256 "remainder($module, x, y, /)\n--\n\n"
1257 "Difference between x and the closest integer multiple of y.\n\n"
1258 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1259 "In the case where x is exactly halfway between two multiples of\n"
1260 "y, the nearest even value of n is used. The result is always exact.")
53876d9c 1261FUNC1(sin, sin, 0,
c9ea9335
SS
1262 "sin($module, x, /)\n--\n\n"
1263 "Return the sine of x (measured in radians).")
53876d9c 1264FUNC1(sinh, sinh, 1,
c9ea9335
SS
1265 "sinh($module, x, /)\n--\n\n"
1266 "Return the hyperbolic sine of x.")
53876d9c 1267FUNC1(sqrt, sqrt, 0,
c9ea9335
SS
1268 "sqrt($module, x, /)\n--\n\n"
1269 "Return the square root of x.")
53876d9c 1270FUNC1(tan, tan, 0,
c9ea9335
SS
1271 "tan($module, x, /)\n--\n\n"
1272 "Return the tangent of x (measured in radians).")
53876d9c 1273FUNC1(tanh, tanh, 0,
c9ea9335
SS
1274 "tanh($module, x, /)\n--\n\n"
1275 "Return the hyperbolic tangent of x.")
85a5fbbd 1276
2b7411df
BP
1277/* Precision summation function as msum() by Raymond Hettinger in
1278 <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1279 enhanced with the exact partials sum and roundoff from Mark
1280 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1281 See those links for more details, proofs and other references.
1282
87d3bd0e
MD
1283 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1284 roundTiesToEven are assumed.
2b7411df 1285
87d3bd0e
MD
1286 Note 2: No provision is made for intermediate overflow handling;
1287 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1288 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
2b7411df
BP
1289 overflow of the first partial sum.
1290
87d3bd0e
MD
1291 Note 3: The algorithm has two potential sources of fragility. First, C
1292 permits arithmetic operations on `double`s to be performed in an
1293 intermediate format whose range and precision may be greater than those of
1294 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1295 example on machines using the now largely historical x87 FPUs. In this case,
1296 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1297 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1298 should be safe from this source of errors. Second, an aggressively
1299 optimizing compiler can re-associate operations so that (for example) the
1300 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1301 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1302 re-association would be in violation of the C standard, and should not occur
1303 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1304 -fassociative-math). Such optimizations should be avoided for this module.
1305
1306 Note 4: The signature of math.fsum() differs from builtins.sum()
2b7411df
BP
1307 because the start argument doesn't make sense in the context of
1308 accurate summation. Since the partials table is collapsed before
1309 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1310 accurate result returned by sum(itertools.chain(seq1, seq2)).
1311*/
1312
1313#define NUM_PARTIALS 32 /* initial partials array size, on stack */
1314
1315/* Extend the partials array p[] by doubling its size. */
1316static int /* non-zero on error */
aa7633ab 1317_fsum_realloc(double **p_ptr, Py_ssize_t n,
2b7411df
BP
1318 double *ps, Py_ssize_t *m_ptr)
1319{
f95a1b3c
AP
1320 void *v = NULL;
1321 Py_ssize_t m = *m_ptr;
1322
1323 m += m; /* double */
049e509a 1324 if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
f95a1b3c
AP
1325 double *p = *p_ptr;
1326 if (p == ps) {
1327 v = PyMem_Malloc(sizeof(double) * m);
1328 if (v != NULL)
1329 memcpy(v, ps, sizeof(double) * n);
1330 }
1331 else
1332 v = PyMem_Realloc(p, sizeof(double) * m);
1333 }
1334 if (v == NULL) { /* size overflow or no memory */
1335 PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1336 return 1;
1337 }
1338 *p_ptr = (double*) v;
1339 *m_ptr = m;
1340 return 0;
2b7411df
BP
1341}
1342
1343/* Full precision summation of a sequence of floats.
1344
1345 def msum(iterable):
1346 partials = [] # sorted, non-overlapping partial sums
1347 for x in iterable:
fdb0accc
MD
1348 i = 0
1349 for y in partials:
1350 if abs(x) < abs(y):
1351 x, y = y, x
1352 hi = x + y
1353 lo = y - (hi - x)
1354 if lo:
1355 partials[i] = lo
1356 i += 1
1357 x = hi
1358 partials[i:] = [x]
2b7411df
BP
1359 return sum_exact(partials)
1360
1361 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1362 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1363 partial so that the list of partial sums remains exact.
1364
1365 Sum_exact() adds the partial sums exactly and correctly rounds the final
1366 result (using the round-half-to-even rule). The items in partials remain
1367 non-zero, non-special, non-overlapping and strictly increasing in
1368 magnitude, but possibly not all having the same sign.
1369
1370 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1371*/
1372
c9ea9335
SS
1373/*[clinic input]
1374math.fsum
1375
1376 seq: object
1377 /
1378
1379Return an accurate floating point sum of values in the iterable seq.
1380
1381Assumes IEEE-754 floating point arithmetic.
1382[clinic start generated code]*/
1383
1384static PyObject *
1385math_fsum(PyObject *module, PyObject *seq)
1386/*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
2b7411df 1387{
f95a1b3c
AP
1388 PyObject *item, *iter, *sum = NULL;
1389 Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1390 double x, y, t, ps[NUM_PARTIALS], *p = ps;
1391 double xsave, special_sum = 0.0, inf_sum = 0.0;
36f23293 1392 double hi, yr, lo = 0.0;
f95a1b3c
AP
1393
1394 iter = PyObject_GetIter(seq);
1395 if (iter == NULL)
1396 return NULL;
1397
f95a1b3c
AP
1398 for(;;) { /* for x in iterable */
1399 assert(0 <= n && n <= m);
1400 assert((m == NUM_PARTIALS && p == ps) ||
1401 (m > NUM_PARTIALS && p != NULL));
1402
1403 item = PyIter_Next(iter);
1404 if (item == NULL) {
1405 if (PyErr_Occurred())
1406 goto _fsum_error;
1407 break;
1408 }
cfd735ea 1409 ASSIGN_DOUBLE(x, item, error_with_item);
f95a1b3c 1410 Py_DECREF(item);
f95a1b3c
AP
1411
1412 xsave = x;
1413 for (i = j = 0; j < n; j++) { /* for y in partials */
1414 y = p[j];
1415 if (fabs(x) < fabs(y)) {
1416 t = x; x = y; y = t;
1417 }
1418 hi = x + y;
1419 yr = hi - x;
1420 lo = y - yr;
1421 if (lo != 0.0)
1422 p[i++] = lo;
1423 x = hi;
1424 }
1425
1426 n = i; /* ps[i:] = [x] */
1427 if (x != 0.0) {
1428 if (! Py_IS_FINITE(x)) {
1429 /* a nonfinite x could arise either as
1430 a result of intermediate overflow, or
1431 as a result of a nan or inf in the
1432 summands */
1433 if (Py_IS_FINITE(xsave)) {
1434 PyErr_SetString(PyExc_OverflowError,
1435 "intermediate overflow in fsum");
1436 goto _fsum_error;
1437 }
1438 if (Py_IS_INFINITY(xsave))
1439 inf_sum += xsave;
1440 special_sum += xsave;
1441 /* reset partials */
1442 n = 0;
1443 }
1444 else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1445 goto _fsum_error;
1446 else
1447 p[n++] = x;
1448 }
1449 }
1450
1451 if (special_sum != 0.0) {
1452 if (Py_IS_NAN(inf_sum))
1453 PyErr_SetString(PyExc_ValueError,
1454 "-inf + inf in fsum");
1455 else
1456 sum = PyFloat_FromDouble(special_sum);
1457 goto _fsum_error;
1458 }
1459
1460 hi = 0.0;
1461 if (n > 0) {
1462 hi = p[--n];
1463 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1464 inexact. */
1465 while (n > 0) {
1466 x = hi;
1467 y = p[--n];
1468 assert(fabs(y) < fabs(x));
1469 hi = x + y;
1470 yr = hi - x;
1471 lo = y - yr;
1472 if (lo != 0.0)
1473 break;
1474 }
1475 /* Make half-even rounding work across multiple partials.
1476 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1477 digit to two instead of down to zero (the 1e-16 makes the 1
1478 slightly closer to two). With a potential 1 ULP rounding
1479 error fixed-up, math.fsum() can guarantee commutativity. */
1480 if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1481 (lo > 0.0 && p[n-1] > 0.0))) {
1482 y = lo * 2.0;
1483 x = hi + y;
1484 yr = x - hi;
1485 if (y == yr)
1486 hi = x;
1487 }
1488 }
1489 sum = PyFloat_FromDouble(hi);
2b7411df 1490
cfd735ea 1491 _fsum_error:
f95a1b3c
AP
1492 Py_DECREF(iter);
1493 if (p != ps)
1494 PyMem_Free(p);
1495 return sum;
cfd735ea
RH
1496
1497 error_with_item:
1498 Py_DECREF(item);
1499 goto _fsum_error;
2b7411df
BP
1500}
1501
1502#undef NUM_PARTIALS
1503
2b7411df 1504
4c8a9a2d
MD
1505static unsigned long
1506count_set_bits(unsigned long n)
1507{
1508 unsigned long count = 0;
1509 while (n != 0) {
1510 ++count;
1511 n &= n - 1; /* clear least significant bit */
1512 }
1513 return count;
1514}
1515
73934b9d
MD
1516/* Integer square root
1517
1518Given a nonnegative integer `n`, we want to compute the largest integer
1519`a` for which `a * a <= n`, or equivalently the integer part of the exact
1520square root of `n`.
1521
1522We use an adaptive-precision pure-integer version of Newton's iteration. Given
1523a positive integer `n`, the algorithm produces at each iteration an integer
1524approximation `a` to the square root of `n >> s` for some even integer `s`,
1525with `s` decreasing as the iterations progress. On the final iteration, `s` is
1526zero and we have an approximation to the square root of `n` itself.
1527
1528At every step, the approximation `a` is strictly within 1.0 of the true square
1529root, so we have
1530
1531 (a - 1)**2 < (n >> s) < (a + 1)**2
1532
1533After the final iteration, a check-and-correct step is needed to determine
1534whether `a` or `a - 1` gives the desired integer square root of `n`.
1535
1536The algorithm is remarkable in its simplicity. There's no need for a
1537per-iteration check-and-correct step, and termination is straightforward: the
1538number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1539for `n > 1`). The only tricky part of the correctness proof is in establishing
1540that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1541iteration to the next. A sketch of the proof of this is given below.
1542
1543In addition to the proof sketch, a formal, computer-verified proof
1544of correctness (using Lean) of an equivalent recursive algorithm can be found
1545here:
1546
1547 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1548
1549
1550Here's Python code equivalent to the C implementation below:
1551
1552 def isqrt(n):
1553 """
1554 Return the integer part of the square root of the input.
1555 """
1556 n = operator.index(n)
1557
1558 if n < 0:
1559 raise ValueError("isqrt() argument must be nonnegative")
1560 if n == 0:
1561 return 0
1562
1563 c = (n.bit_length() - 1) // 2
1564 a = 1
1565 d = 0
1566 for s in reversed(range(c.bit_length())):
2dfeaa92 1567 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
73934b9d
MD
1568 e = d
1569 d = c >> s
1570 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
73934b9d
MD
1571
1572 return a - (a*a > n)
1573
1574
1575Sketch of proof of correctness
1576------------------------------
1577
1578The delicate part of the correctness proof is showing that the loop invariant
1579is preserved from one iteration to the next. That is, just before the line
1580
1581 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1582
1583is executed in the above code, we know that
1584
1585 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1586
1587(since `e` is always the value of `d` from the previous iteration). We must
1588prove that after that line is executed, we have
1589
1590 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1591
f7d72e48 1592To facilitate the proof, we make some changes of notation. Write `m` for
73934b9d
MD
1593`n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1594
1595 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1596
1597or equivalently:
1598
1599 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1600
1601Then we can rewrite (1) as:
1602
1603 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1604
1605and we must show that (b - 1)**2 < m < (b + 1)**2.
1606
1607From this point on, we switch to mathematical notation, so `/` means exact
1608division rather than integer division and `^` is used for exponentiation. We
1609use the `√` symbol for the exact square root. In (3), we can remove the
1610implicit floor operation to give:
1611
1612 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1613
1614Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1615
1616 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1617
1618Squaring and dividing through by `2^(d-e+1) a` gives
1619
1620 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1621
1622We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1623right-hand side of (6) with `1`, and now replacing the central
1624term `m / (2^(d-e+1) a)` with its floor in (6) gives
1625
1626 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1627
1628Or equivalently, from (2):
1629
1630 (7) -1 < b - √m < 1
1631
1632and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1633to prove.
1634
1635We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1636a` that was used to get line (7) above. From the definition of `c`, we have
1637`4^c <= n`, which implies
1638
1639 (8) 4^d <= m
1640
1641also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1642that `2d - 2e - 1 <= d` and hence that
1643
1644 (9) 4^(2d - 2e - 1) <= m
1645
1646Dividing both sides by `4^(d - e)` gives
1647
1648 (10) 4^(d - e - 1) <= m / 4^(d - e)
1649
1650But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1651
1652 (11) 4^(d - e - 1) < (a + 1)^2
1653
1654Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1655`a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1656completes the proof sketch.
1657
1658*/
1659
d02c5e9b
MD
1660/*
1661 The _approximate_isqrt_tab table provides approximate square roots for
1662 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1663
1664 a = _approximate_isqrt_tab[(n >> 8) - 64]
1665
1666 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1667
1668 The table was computed in Python using the expression:
1669
1670 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1671*/
1672
1673static const uint8_t _approximate_isqrt_tab[192] = {
1674 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1675 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1676 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1677 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1678 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1679 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1680 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1681 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1682 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1683 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1684 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1685 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1686 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1687 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1688 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1689 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1690};
5c08ce9b
MD
1691
1692/* Approximate square root of a large 64-bit integer.
1693
1694 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1695 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1696
d02c5e9b 1697static inline uint32_t
5c08ce9b
MD
1698_approximate_isqrt(uint64_t n)
1699{
d02c5e9b
MD
1700 uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1701 u = (u << 7) + (uint32_t)(n >> 41) / u;
1702 return (u << 15) + (uint32_t)((n >> 17) / u);
5c08ce9b
MD
1703}
1704
73934b9d
MD
1705/*[clinic input]
1706math.isqrt
1707
1708 n: object
1709 /
1710
1711Return the integer part of the square root of the input.
1712[clinic start generated code]*/
1713
1714static PyObject *
1715math_isqrt(PyObject *module, PyObject *n)
1716/*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1717{
5c08ce9b 1718 int a_too_large, c_bit_length;
73934b9d 1719 size_t c, d;
d02c5e9b
MD
1720 uint64_t m;
1721 uint32_t u;
73934b9d
MD
1722 PyObject *a = NULL, *b;
1723
5f4b229d 1724 n = _PyNumber_Index(n);
73934b9d
MD
1725 if (n == NULL) {
1726 return NULL;
1727 }
1728
1729 if (_PyLong_Sign(n) < 0) {
1730 PyErr_SetString(
1731 PyExc_ValueError,
1732 "isqrt() argument must be nonnegative");
1733 goto error;
1734 }
1735 if (_PyLong_Sign(n) == 0) {
1736 Py_DECREF(n);
1737 return PyLong_FromLong(0);
1738 }
1739
5c08ce9b 1740 /* c = (n.bit_length() - 1) // 2 */
73934b9d
MD
1741 c = _PyLong_NumBits(n);
1742 if (c == (size_t)(-1)) {
1743 goto error;
1744 }
1745 c = (c - 1U) / 2U;
1746
5c08ce9b 1747 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
d02c5e9b 1748 fast, almost branch-free algorithm. */
5c08ce9b 1749 if (c <= 31U) {
d02c5e9b 1750 int shift = 31 - (int)c;
5c08ce9b
MD
1751 m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1752 Py_DECREF(n);
1753 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1754 return NULL;
1755 }
d02c5e9b
MD
1756 u = _approximate_isqrt(m << 2*shift) >> shift;
1757 u -= (uint64_t)u * u > m;
1758 return PyLong_FromUnsignedLong(u);
73934b9d
MD
1759 }
1760
5c08ce9b
MD
1761 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1762 arithmetic, then switch to using Python long integers. */
1763
1764 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1765 c_bit_length = 6;
1766 while ((c >> c_bit_length) > 0U) {
1767 ++c_bit_length;
1768 }
1769
1770 /* Initialise d and a. */
1771 d = c >> (c_bit_length - 5);
1772 b = _PyLong_Rshift(n, 2U*c - 62U);
1773 if (b == NULL) {
1774 goto error;
1775 }
1776 m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1777 Py_DECREF(b);
1778 if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1779 goto error;
1780 }
1781 u = _approximate_isqrt(m) >> (31U - d);
d02c5e9b 1782 a = PyLong_FromUnsignedLong(u);
73934b9d
MD
1783 if (a == NULL) {
1784 goto error;
1785 }
5c08ce9b
MD
1786
1787 for (int s = c_bit_length - 6; s >= 0; --s) {
a5119e7d 1788 PyObject *q;
73934b9d
MD
1789 size_t e = d;
1790
1791 d = c >> s;
1792
1793 /* q = (n >> 2*c - e - d + 1) // a */
a5119e7d 1794 q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
73934b9d
MD
1795 if (q == NULL) {
1796 goto error;
1797 }
1798 Py_SETREF(q, PyNumber_FloorDivide(q, a));
1799 if (q == NULL) {
1800 goto error;
1801 }
1802
1803 /* a = (a << d - 1 - e) + q */
a5119e7d 1804 Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
73934b9d
MD
1805 if (a == NULL) {
1806 Py_DECREF(q);
1807 goto error;
1808 }
1809 Py_SETREF(a, PyNumber_Add(a, q));
1810 Py_DECREF(q);
1811 if (a == NULL) {
1812 goto error;
1813 }
1814 }
1815
1816 /* The correct result is either a or a - 1. Figure out which, and
1817 decrement a if necessary. */
1818
1819 /* a_too_large = n < a * a */
1820 b = PyNumber_Multiply(a, a);
1821 if (b == NULL) {
1822 goto error;
1823 }
1824 a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1825 Py_DECREF(b);
1826 if (a_too_large == -1) {
1827 goto error;
1828 }
1829
1830 if (a_too_large) {
37834136 1831 Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
73934b9d
MD
1832 }
1833 Py_DECREF(n);
1834 return a;
1835
1836 error:
1837 Py_XDECREF(a);
1838 Py_DECREF(n);
1839 return NULL;
1840}
1841
4c8a9a2d
MD
1842/* Divide-and-conquer factorial algorithm
1843 *
15f44ab0 1844 * Based on the formula and pseudo-code provided at:
4c8a9a2d
MD
1845 * http://www.luschny.de/math/factorial/binarysplitfact.html
1846 *
1847 * Faster algorithms exist, but they're more complicated and depend on
9527afd0 1848 * a fast prime factorization algorithm.
4c8a9a2d
MD
1849 *
1850 * Notes on the algorithm
1851 * ----------------------
1852 *
1853 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1854 * computed separately, and then combined using a left shift.
1855 *
1856 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1857 * odd divisor) of factorial(n), using the formula:
1858 *
1859 * factorial_odd_part(n) =
1860 *
1861 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1862 *
1863 * Example: factorial_odd_part(20) =
1864 *
1865 * (1) *
1866 * (1) *
1867 * (1 * 3 * 5) *
09605ad7 1868 * (1 * 3 * 5 * 7 * 9) *
4c8a9a2d
MD
1869 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1870 *
1871 * Here i goes from large to small: the first term corresponds to i=4 (any
1872 * larger i gives an empty product), and the last term corresponds to i=0.
1873 * Each term can be computed from the last by multiplying by the extra odd
1874 * numbers required: e.g., to get from the penultimate term to the last one,
1875 * we multiply by (11 * 13 * 15 * 17 * 19).
1876 *
1877 * To see a hint of why this formula works, here are the same numbers as above
1878 * but with the even parts (i.e., the appropriate powers of 2) included. For
1879 * each subterm in the product for i, we multiply that subterm by 2**i:
1880 *
1881 * factorial(20) =
1882 *
1883 * (16) *
1884 * (8) *
1885 * (4 * 12 * 20) *
1886 * (2 * 6 * 10 * 14 * 18) *
1887 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1888 *
1889 * The factorial_partial_product function computes the product of all odd j in
1890 * range(start, stop) for given start and stop. It's used to compute the
1891 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1892 * operates recursively, repeatedly splitting the range into two roughly equal
1893 * pieces until the subranges are small enough to be computed using only C
1894 * integer arithmetic.
1895 *
1896 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1897 * the factorial) is computed independently in the main math_factorial
1898 * function. By standard results, its value is:
1899 *
1900 * two_valuation = n//2 + n//4 + n//8 + ....
1901 *
1902 * It can be shown (e.g., by complete induction on n) that two_valuation is
1903 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1904 * '1'-bits in the binary expansion of n.
1905 */
1906
1907/* factorial_partial_product: Compute product(range(start, stop, 2)) using
1908 * divide and conquer. Assumes start and stop are odd and stop > start.
1909 * max_bits must be >= bit_length(stop - 2). */
1910
1911static PyObject *
1912factorial_partial_product(unsigned long start, unsigned long stop,
1913 unsigned long max_bits)
1914{
1915 unsigned long midpoint, num_operands;
1916 PyObject *left = NULL, *right = NULL, *result = NULL;
1917
1918 /* If the return value will fit an unsigned long, then we can
1919 * multiply in a tight, fast loop where each multiply is O(1).
1920 * Compute an upper bound on the number of bits required to store
1921 * the answer.
1922 *
1923 * Storing some integer z requires floor(lg(z))+1 bits, which is
1924 * conveniently the value returned by bit_length(z). The
1925 * product x*y will require at most
1926 * bit_length(x) + bit_length(y) bits to store, based
1927 * on the idea that lg product = lg x + lg y.
1928 *
1929 * We know that stop - 2 is the largest number to be multiplied. From
1930 * there, we have: bit_length(answer) <= num_operands *
1931 * bit_length(stop - 2)
1932 */
1933
1934 num_operands = (stop - start) / 2;
1935 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1936 * unlikely case of an overflow in num_operands * max_bits. */
1937 if (num_operands <= 8 * SIZEOF_LONG &&
1938 num_operands * max_bits <= 8 * SIZEOF_LONG) {
1939 unsigned long j, total;
1940 for (total = start, j = start + 2; j < stop; j += 2)
1941 total *= j;
1942 return PyLong_FromUnsignedLong(total);
1943 }
1944
1945 /* find midpoint of range(start, stop), rounded up to next odd number. */
1946 midpoint = (start + num_operands) | 1;
1947 left = factorial_partial_product(start, midpoint,
c5b79003 1948 _Py_bit_length(midpoint - 2));
4c8a9a2d
MD
1949 if (left == NULL)
1950 goto error;
1951 right = factorial_partial_product(midpoint, stop, max_bits);
1952 if (right == NULL)
1953 goto error;
1954 result = PyNumber_Multiply(left, right);
1955
1956 error:
1957 Py_XDECREF(left);
1958 Py_XDECREF(right);
1959 return result;
1960}
1961
1962/* factorial_odd_part: compute the odd part of factorial(n). */
1963
1964static PyObject *
1965factorial_odd_part(unsigned long n)
1966{
1967 long i;
1968 unsigned long v, lower, upper;
1969 PyObject *partial, *tmp, *inner, *outer;
1970
1971 inner = PyLong_FromLong(1);
1972 if (inner == NULL)
1973 return NULL;
3e2f7135 1974 outer = Py_NewRef(inner);
4c8a9a2d
MD
1975
1976 upper = 3;
c5b79003 1977 for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
4c8a9a2d
MD
1978 v = n >> i;
1979 if (v <= 2)
1980 continue;
1981 lower = upper;
1982 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
1983 upper = (v + 1) | 1;
1984 /* Here inner is the product of all odd integers j in the range (0,
1985 n/2**(i+1)]. The factorial_partial_product call below gives the
1986 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
c5b79003 1987 partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
4c8a9a2d
MD
1988 /* inner *= partial */
1989 if (partial == NULL)
1990 goto error;
1991 tmp = PyNumber_Multiply(inner, partial);
1992 Py_DECREF(partial);
1993 if (tmp == NULL)
1994 goto error;
7e3f09ca 1995 Py_SETREF(inner, tmp);
4c8a9a2d
MD
1996 /* Now inner is the product of all odd integers j in the range (0,
1997 n/2**i], giving the inner product in the formula above. */
1998
1999 /* outer *= inner; */
2000 tmp = PyNumber_Multiply(outer, inner);
2001 if (tmp == NULL)
2002 goto error;
7e3f09ca 2003 Py_SETREF(outer, tmp);
4c8a9a2d 2004 }
76464494
MD
2005 Py_DECREF(inner);
2006 return outer;
4c8a9a2d
MD
2007
2008 error:
2009 Py_DECREF(outer);
4c8a9a2d 2010 Py_DECREF(inner);
76464494 2011 return NULL;
4c8a9a2d
MD
2012}
2013
c9ea9335 2014
4c8a9a2d
MD
2015/* Lookup table for small factorial values */
2016
2017static const unsigned long SmallFactorials[] = {
2018 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2019 362880, 3628800, 39916800, 479001600,
2020#if SIZEOF_LONG >= 8
2021 6227020800, 87178291200, 1307674368000,
2022 20922789888000, 355687428096000, 6402373705728000,
2023 121645100408832000, 2432902008176640000
2024#endif
2025};
2026
c9ea9335
SS
2027/*[clinic input]
2028math.factorial
2029
1ba82d44 2030 n as arg: object
c9ea9335
SS
2031 /
2032
1ba82d44 2033Find n!.
c9ea9335
SS
2034
2035Raise a ValueError if x is negative or non-integral.
2036[clinic start generated code]*/
2037
c28e1fa7 2038static PyObject *
c9ea9335 2039math_factorial(PyObject *module, PyObject *arg)
1ba82d44 2040/*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
c28e1fa7 2041{
a5119e7d 2042 long x, two_valuation;
5990d286 2043 int overflow;
578c3955 2044 PyObject *result, *odd_part;
f95a1b3c 2045
578c3955 2046 x = PyLong_AsLongAndOverflow(arg, &overflow);
5990d286 2047 if (x == -1 && PyErr_Occurred()) {
f95a1b3c 2048 return NULL;
5990d286
MD
2049 }
2050 else if (overflow == 1) {
2051 PyErr_Format(PyExc_OverflowError,
2052 "factorial() argument should not exceed %ld",
2053 LONG_MAX);
2054 return NULL;
2055 }
2056 else if (overflow == -1 || x < 0) {
f95a1b3c 2057 PyErr_SetString(PyExc_ValueError,
4c8a9a2d 2058 "factorial() not defined for negative values");
f95a1b3c
AP
2059 return NULL;
2060 }
2061
4c8a9a2d 2062 /* use lookup table if x is small */
63941881 2063 if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
4c8a9a2d
MD
2064 return PyLong_FromUnsignedLong(SmallFactorials[x]);
2065
2066 /* else express in the form odd_part * 2**two_valuation, and compute as
2067 odd_part << two_valuation. */
2068 odd_part = factorial_odd_part(x);
2069 if (odd_part == NULL)
2070 return NULL;
a5119e7d
SS
2071 two_valuation = x - count_set_bits(x);
2072 result = _PyLong_Lshift(odd_part, two_valuation);
4c8a9a2d 2073 Py_DECREF(odd_part);
f95a1b3c 2074 return result;
c28e1fa7
GB
2075}
2076
c9ea9335
SS
2077
2078/*[clinic input]
2079math.trunc
2080
2081 x: object
2082 /
2083
2084Truncates the Real x to the nearest Integral toward 0.
2085
2086Uses the __trunc__ magic method.
2087[clinic start generated code]*/
c28e1fa7 2088
400adb03 2089static PyObject *
c9ea9335
SS
2090math_trunc(PyObject *module, PyObject *x)
2091/*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
400adb03 2092{
b0125892 2093 PyObject *trunc, *result;
f95a1b3c 2094
5fd5cb8d
SS
2095 if (PyFloat_CheckExact(x)) {
2096 return PyFloat_Type.tp_as_number->nb_int(x);
2097 }
2098
c9ea9335
SS
2099 if (Py_TYPE(x)->tp_dict == NULL) {
2100 if (PyType_Ready(Py_TYPE(x)) < 0)
f95a1b3c
AP
2101 return NULL;
2102 }
2103
23c9febd
DN
2104 math_module_state *state = get_math_module_state(module);
2105 trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
f95a1b3c 2106 if (trunc == NULL) {
8bb9cde6
BP
2107 if (!PyErr_Occurred())
2108 PyErr_Format(PyExc_TypeError,
2109 "type %.100s doesn't define __trunc__ method",
c9ea9335 2110 Py_TYPE(x)->tp_name);
f95a1b3c
AP
2111 return NULL;
2112 }
ce3489cf 2113 result = _PyObject_CallNoArgs(trunc);
b0125892
BP
2114 Py_DECREF(trunc);
2115 return result;
400adb03
CH
2116}
2117
c9ea9335
SS
2118
2119/*[clinic input]
2120math.frexp
2121
2122 x: double
2123 /
2124
2125Return the mantissa and exponent of x, as pair (m, e).
2126
2127m is a float and e is an int, such that x = m * 2.**e.
2128If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2129[clinic start generated code]*/
400adb03 2130
8b43b19e 2131static PyObject *
c9ea9335
SS
2132math_frexp_impl(PyObject *module, double x)
2133/*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
d18ad583 2134{
f95a1b3c 2135 int i;
f95a1b3c
AP
2136 /* deal with special cases directly, to sidestep platform
2137 differences */
2138 if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2139 i = 0;
2140 }
2141 else {
f95a1b3c 2142 x = frexp(x, &i);
f95a1b3c
AP
2143 }
2144 return Py_BuildValue("(di)", x, i);
d18ad583
GR
2145}
2146
c9ea9335
SS
2147
2148/*[clinic input]
2149math.ldexp
2150
2151 x: double
2152 i: object
2153 /
2154
2155Return x * (2**i).
2156
2157This is essentially the inverse of frexp().
2158[clinic start generated code]*/
c6e22902 2159
8b43b19e 2160static PyObject *
c9ea9335
SS
2161math_ldexp_impl(PyObject *module, double x, PyObject *i)
2162/*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
d18ad583 2163{
c9ea9335 2164 double r;
f95a1b3c
AP
2165 long exp;
2166 int overflow;
f95a1b3c 2167
c9ea9335 2168 if (PyLong_Check(i)) {
f95a1b3c
AP
2169 /* on overflow, replace exponent with either LONG_MAX
2170 or LONG_MIN, depending on the sign. */
c9ea9335 2171 exp = PyLong_AsLongAndOverflow(i, &overflow);
f95a1b3c
AP
2172 if (exp == -1 && PyErr_Occurred())
2173 return NULL;
2174 if (overflow)
2175 exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2176 }
2177 else {
2178 PyErr_SetString(PyExc_TypeError,
95949427 2179 "Expected an int as second argument to ldexp.");
f95a1b3c
AP
2180 return NULL;
2181 }
2182
2183 if (x == 0. || !Py_IS_FINITE(x)) {
2184 /* NaNs, zeros and infinities are returned unchanged */
2185 r = x;
2186 errno = 0;
2187 } else if (exp > INT_MAX) {
2188 /* overflow */
2189 r = copysign(Py_HUGE_VAL, x);
2190 errno = ERANGE;
2191 } else if (exp < INT_MIN) {
2192 /* underflow to +-0 */
2193 r = copysign(0., x);
2194 errno = 0;
2195 } else {
2196 errno = 0;
f95a1b3c 2197 r = ldexp(x, (int)exp);
f95a1b3c
AP
2198 if (Py_IS_INFINITY(r))
2199 errno = ERANGE;
2200 }
2201
2202 if (errno && is_error(r))
2203 return NULL;
2204 return PyFloat_FromDouble(r);
d18ad583
GR
2205}
2206
c9ea9335
SS
2207
2208/*[clinic input]
2209math.modf
2210
2211 x: double
2212 /
2213
2214Return the fractional and integer parts of x.
2215
2216Both results carry the sign of x and are floats.
2217[clinic start generated code]*/
c6e22902 2218
8b43b19e 2219static PyObject *
c9ea9335
SS
2220math_modf_impl(PyObject *module, double x)
2221/*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
d18ad583 2222{
c9ea9335 2223 double y;
f95a1b3c
AP
2224 /* some platforms don't do the right thing for NaNs and
2225 infinities, so we take care of special cases directly. */
2226 if (!Py_IS_FINITE(x)) {
2227 if (Py_IS_INFINITY(x))
2228 return Py_BuildValue("(dd)", copysign(0., x), x);
2229 else if (Py_IS_NAN(x))
2230 return Py_BuildValue("(dd)", x, x);
2231 }
2232
2233 errno = 0;
f95a1b3c 2234 x = modf(x, &y);
f95a1b3c 2235 return Py_BuildValue("(dd)", x, y);
d18ad583 2236}
85a5fbbd 2237
c6e22902 2238
95949427 2239/* A decent logarithm is easy to compute even for huge ints, but libm can't
78526168 2240 do that by itself -- loghelper can. func is log or log10, and name is
95949427 2241 "log" or "log10". Note that overflow of the result isn't possible: an int
6ecd9e53
MD
2242 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2243 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
78526168 2244 small enough to fit in an IEEE single. log and log10 are even smaller.
95949427
SS
2245 However, intermediate overflow is possible for an int if the number of bits
2246 in that int is larger than PY_SSIZE_T_MAX. */
78526168
TP
2247
2248static PyObject*
5a80e858 2249loghelper(PyObject* arg, double (*func)(double))
78526168 2250{
95949427 2251 /* If it is int, do it ourselves. */
f95a1b3c 2252 if (PyLong_Check(arg)) {
c6037174 2253 double x, result;
f95a1b3c 2254 Py_ssize_t e;
c6037174
MD
2255
2256 /* Negative or zero inputs give a ValueError. */
2257 if (Py_SIZE(arg) <= 0) {
f95a1b3c
AP
2258 PyErr_SetString(PyExc_ValueError,
2259 "math domain error");
2260 return NULL;
2261 }
c6037174
MD
2262
2263 x = PyLong_AsDouble(arg);
2264 if (x == -1.0 && PyErr_Occurred()) {
2265 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2266 return NULL;
2267 /* Here the conversion to double overflowed, but it's possible
2268 to compute the log anyway. Clear the exception and continue. */
2269 PyErr_Clear();
2270 x = _PyLong_Frexp((PyLongObject *)arg, &e);
2271 if (x == -1.0 && PyErr_Occurred())
2272 return NULL;
2273 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2274 result = func(x) + func(2.0) * e;
2275 }
2276 else
2277 /* Successfully converted x to a double. */
2278 result = func(x);
2279 return PyFloat_FromDouble(result);
f95a1b3c
AP
2280 }
2281
2282 /* Else let libm handle it by itself. */
2283 return math_1(arg, func, 0);
78526168
TP
2284}
2285
c9ea9335
SS
2286
2287/*[clinic input]
2288math.log
2289
2290 x: object
0672a6c2
MD
2291 [
2292 base: object(c_default="NULL") = math.e
2293 ]
c9ea9335
SS
2294 /
2295
2296Return the logarithm of x to the given base.
2297
0672a6c2 2298If the base not specified, returns the natural logarithm (base e) of x.
c9ea9335
SS
2299[clinic start generated code]*/
2300
78526168 2301static PyObject *
0672a6c2
MD
2302math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2303 PyObject *base)
2304/*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
78526168 2305{
f95a1b3c
AP
2306 PyObject *num, *den;
2307 PyObject *ans;
2308
5a80e858 2309 num = loghelper(x, m_log);
0672a6c2 2310 if (num == NULL || base == NULL)
f95a1b3c
AP
2311 return num;
2312
5a80e858 2313 den = loghelper(base, m_log);
f95a1b3c
AP
2314 if (den == NULL) {
2315 Py_DECREF(num);
2316 return NULL;
2317 }
2318
2319 ans = PyNumber_TrueDivide(num, den);
2320 Py_DECREF(num);
2321 Py_DECREF(den);
2322 return ans;
78526168
TP
2323}
2324
c9ea9335
SS
2325
2326/*[clinic input]
2327math.log2
2328
2329 x: object
2330 /
2331
2332Return the base 2 logarithm of x.
2333[clinic start generated code]*/
78526168 2334
fa0e3d52 2335static PyObject *
c9ea9335
SS
2336math_log2(PyObject *module, PyObject *x)
2337/*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
fa0e3d52 2338{
5a80e858 2339 return loghelper(x, m_log2);
fa0e3d52
VS
2340}
2341
c9ea9335
SS
2342
2343/*[clinic input]
2344math.log10
2345
2346 x: object
2347 /
2348
2349Return the base 10 logarithm of x.
2350[clinic start generated code]*/
fa0e3d52 2351
78526168 2352static PyObject *
c9ea9335
SS
2353math_log10(PyObject *module, PyObject *x)
2354/*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
78526168 2355{
5a80e858 2356 return loghelper(x, m_log10);
78526168
TP
2357}
2358
c9ea9335
SS
2359
2360/*[clinic input]
2361math.fmod
2362
2363 x: double
2364 y: double
2365 /
2366
2367Return fmod(x, y), according to platform C.
2368
2369x % y may differ.
2370[clinic start generated code]*/
78526168 2371
53876d9c 2372static PyObject *
c9ea9335
SS
2373math_fmod_impl(PyObject *module, double x, double y)
2374/*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
53876d9c 2375{
c9ea9335 2376 double r;
f95a1b3c
AP
2377 /* fmod(x, +/-Inf) returns x for finite x. */
2378 if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2379 return PyFloat_FromDouble(x);
2380 errno = 0;
f95a1b3c 2381 r = fmod(x, y);
f95a1b3c
AP
2382 if (Py_IS_NAN(r)) {
2383 if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2384 errno = EDOM;
2385 else
2386 errno = 0;
2387 }
2388 if (errno && is_error(r))
2389 return NULL;
2390 else
2391 return PyFloat_FromDouble(r);
53876d9c
CH
2392}
2393
13990745 2394/*
8e19c8be 2395Given a *vec* of values, compute the vector norm:
13990745 2396
8e19c8be 2397 sqrt(sum(x ** 2 for x in vec))
fff3c280 2398
8e19c8be
RH
2399The *max* variable should be equal to the largest fabs(x).
2400The *n* variable is the length of *vec*.
2401If n==0, then *max* should be 0.0.
745c0f39 2402If an infinity is present in the vec, *max* should be INF.
c630e104
RH
2403The *found_nan* variable indicates whether some member of
2404the *vec* is a NaN.
21786f51 2405
8e19c8be
RH
2406To avoid overflow/underflow and to achieve high accuracy giving results
2407that are almost always correctly rounded, four techniques are used:
2408
2409* lossless scaling using a power-of-two scaling factor
67c998de 2410* accurate squaring using Veltkamp-Dekker splitting [1]
0a22aa05 2411 or an equivalent with an fma() call
67c998de
RH
2412* compensated summation using a variant of the Neumaier algorithm [2]
2413* differential correction of the square root [3]
8e19c8be
RH
2414
2415The usual presentation of the Neumaier summation algorithm has an
2416expensive branch depending on which operand has the larger
2417magnitude. We avoid this cost by arranging the calculation so that
2418fabs(csum) is always as large as fabs(x).
2419
2420To establish the invariant, *csum* is initialized to 1.0 which is
457d4e97 2421always larger than x**2 after scaling or after division by *max*.
8e19c8be
RH
2422After the loop is finished, the initial 1.0 is subtracted out for a
2423net zero effect on the final sum. Since *csum* will be greater than
24241.0, the subtraction of 1.0 will not cause fractional digits to be
2425dropped from *csum*.
2426
2427To get the full benefit from compensated summation, the largest
2428addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2429scaling or division by *max* should not be skipped even if not
2430otherwise needed to prevent overflow or loss of precision.
2431
82e79480 2432The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
8e19c8be
RH
2433gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2434algorithm gives a *hi* value that is correctly rounded to half
2435precision. When a value at or below 1.0 is correctly rounded, it
2436never goes above 1.0. And when values at or below 1.0 are squared,
2437they remain at or below 1.0, thus preserving the summation invariant.
2438
27de2860
RH
2439Another interesting assertion is that csum+lo*lo == csum. In the loop,
2440each scaled vector element has a magnitude less than 1.0. After the
2441Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2442value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2443Given that csum >= 1.0, we have:
2444 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2445Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2446
92c38164 2447To minimize loss of information during the accumulation of fractional
67c998de
RH
2448values, each term has a separate accumulator. This also breaks up
2449sequential dependencies in the inner loop so the CPU can maximize
3adb23a1
RH
2450floating point throughput. [4] On an Apple M1 Max, hypot(*vec)
2451takes only 3.33 µsec when len(vec) == 1000.
92c38164 2452
8e19c8be
RH
2453The square root differential correction is needed because a
2454correctly rounded square root of a correctly rounded sum of
2455squares can still be off by as much as one ulp.
2456
2457The differential correction starts with a value *x* that is
2458the difference between the square of *h*, the possibly inaccurately
2459rounded square root, and the accurately computed sum of squares.
2460The correction is the first order term of the Maclaurin series
457d4e97 2461expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
8e19c8be
RH
2462
2463Essentially, this differential correction is equivalent to one
82e79480 2464refinement step in Newton's divide-and-average square root
8e19c8be
RH
2465algorithm, effectively doubling the number of accurate bits.
2466This technique is used in Dekker's SQRT2 algorithm and again in
2467Borges' ALGORITHM 4 and 5.
2468
0a22aa05
RH
2469The hypot() function is faithfully rounded (less than 1 ulp error)
2470and usually correctly rounded (within 1/2 ulp). The squaring
2471step is exact. The Neumaier summation computes as if in doubled
2472precision (106 bits) and has the advantage that its input squares
2473are non-negative so that the condition number of the sum is one.
2474The square root with a differential correction is likewise computed
3adb23a1 2475as if in doubled precision.
0a22aa05
RH
2476
2477For n <= 1000, prior to the final addition that rounds the overall
2478result, the internal accuracy of "h" together with its correction of
2479"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2480against a Decimal implementation with prec=300. After 100 million
2481trials, no incorrectly rounded examples were found. In addition,
2482perfect commutativity (all permutations are exactly equal) was
2483verified for 1 billion random inputs with n=5. [7]
67c998de 2484
8e19c8be
RH
2485References:
2486
24871. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
24882. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
92c38164 24893. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
457d4e97
RH
24904. Data dependency graph: https://bugs.python.org/file49439/hypot.png
24915. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
497126f7 24926. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
457d4e97 24937. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
fff3c280 2494
13990745
RH
2495*/
2496
2497static inline double
c630e104 2498vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
13990745 2499{
72186aa6 2500 double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
0a22aa05 2501 DoubleLength pr, sm;
fff3c280 2502 int max_e;
13990745
RH
2503 Py_ssize_t i;
2504
c630e104
RH
2505 if (Py_IS_INFINITY(max)) {
2506 return max;
2507 }
2508 if (found_nan) {
2509 return Py_NAN;
2510 }
f3267144 2511 if (max == 0.0 || n <= 1) {
745c0f39 2512 return max;
13990745 2513 }
fff3c280 2514 frexp(max, &max_e);
72186aa6 2515 if (max_e < -1023) {
3adb23a1 2516 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
fff3c280 2517 for (i=0 ; i < n ; i++) {
3adb23a1 2518 vec[i] /= DBL_MIN; // convert subnormals to normals
72186aa6
RH
2519 }
2520 return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2521 }
2522 scale = ldexp(1.0, -max_e);
2523 assert(max * scale >= 0.5);
2524 assert(max * scale < 1.0);
2525 for (i=0 ; i < n ; i++) {
2526 x = vec[i];
2527 assert(Py_IS_FINITE(x) && fabs(x) <= max);
3adb23a1 2528 x *= scale; // lossless scaling
72186aa6 2529 assert(fabs(x) < 1.0);
3adb23a1 2530 pr = dl_mul(x, x); // lossless squaring
72186aa6 2531 assert(pr.hi <= 1.0);
3adb23a1 2532 sm = dl_fast_sum(csum, pr.hi); // lossless addition
0a22aa05 2533 csum = sm.hi;
3adb23a1
RH
2534 frac1 += pr.lo; // lossy addition
2535 frac2 += sm.lo; // lossy addition
fff3c280 2536 }
72186aa6
RH
2537 h = sqrt(csum - 1.0 + (frac1 + frac2));
2538 pr = dl_mul(-h, h);
2539 sm = dl_fast_sum(csum, pr.hi);
2540 csum = sm.hi;
2541 frac1 += pr.lo;
2542 frac2 += sm.lo;
2543 x = csum - 1.0 + (frac1 + frac2);
3adb23a1
RH
2544 h += x / (2.0 * h); // differential correction
2545 return h / scale;
13990745
RH
2546}
2547
c630e104
RH
2548#define NUM_STACK_ELEMS 16
2549
9c18b1ae
RH
2550/*[clinic input]
2551math.dist
2552
6b5f1b49
RH
2553 p: object
2554 q: object
9c18b1ae
RH
2555 /
2556
2557Return the Euclidean distance between two points p and q.
2558
6b5f1b49
RH
2559The points should be specified as sequences (or iterables) of
2560coordinates. Both inputs must have the same dimension.
9c18b1ae
RH
2561
2562Roughly equivalent to:
2563 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2564[clinic start generated code]*/
2565
2566static PyObject *
2567math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
6b5f1b49 2568/*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
9c18b1ae
RH
2569{
2570 PyObject *item;
9c18b1ae 2571 double max = 0.0;
9c18b1ae
RH
2572 double x, px, qx, result;
2573 Py_ssize_t i, m, n;
6b5f1b49 2574 int found_nan = 0, p_allocated = 0, q_allocated = 0;
c630e104
RH
2575 double diffs_on_stack[NUM_STACK_ELEMS];
2576 double *diffs = diffs_on_stack;
9c18b1ae 2577
6b5f1b49
RH
2578 if (!PyTuple_Check(p)) {
2579 p = PySequence_Tuple(p);
2580 if (p == NULL) {
2581 return NULL;
2582 }
2583 p_allocated = 1;
2584 }
2585 if (!PyTuple_Check(q)) {
2586 q = PySequence_Tuple(q);
2587 if (q == NULL) {
2588 if (p_allocated) {
2589 Py_DECREF(p);
2590 }
2591 return NULL;
2592 }
2593 q_allocated = 1;
2594 }
2595
9c18b1ae
RH
2596 m = PyTuple_GET_SIZE(p);
2597 n = PyTuple_GET_SIZE(q);
2598 if (m != n) {
2599 PyErr_SetString(PyExc_ValueError,
2600 "both points must have the same number of dimensions");
ab575050 2601 goto error_exit;
9c18b1ae 2602 }
c630e104
RH
2603 if (n > NUM_STACK_ELEMS) {
2604 diffs = (double *) PyObject_Malloc(n * sizeof(double));
2605 if (diffs == NULL) {
ab575050
KA
2606 PyErr_NoMemory();
2607 goto error_exit;
c630e104 2608 }
9c18b1ae
RH
2609 }
2610 for (i=0 ; i<n ; i++) {
2611 item = PyTuple_GET_ITEM(p, i);
cfd735ea 2612 ASSIGN_DOUBLE(px, item, error_exit);
9c18b1ae 2613 item = PyTuple_GET_ITEM(q, i);
cfd735ea 2614 ASSIGN_DOUBLE(qx, item, error_exit);
9c18b1ae
RH
2615 x = fabs(px - qx);
2616 diffs[i] = x;
2617 found_nan |= Py_IS_NAN(x);
2618 if (x > max) {
2619 max = x;
2620 }
2621 }
c630e104
RH
2622 result = vector_norm(n, diffs, max, found_nan);
2623 if (diffs != diffs_on_stack) {
2624 PyObject_Free(diffs);
9c18b1ae 2625 }
6b5f1b49
RH
2626 if (p_allocated) {
2627 Py_DECREF(p);
2628 }
2629 if (q_allocated) {
2630 Py_DECREF(q);
2631 }
9c18b1ae 2632 return PyFloat_FromDouble(result);
c630e104
RH
2633
2634 error_exit:
2635 if (diffs != diffs_on_stack) {
2636 PyObject_Free(diffs);
2637 }
6b5f1b49
RH
2638 if (p_allocated) {
2639 Py_DECREF(p);
2640 }
2641 if (q_allocated) {
2642 Py_DECREF(q);
2643 }
c630e104 2644 return NULL;
9c18b1ae
RH
2645}
2646
c6dabe37 2647/* AC: cannot convert yet, waiting for *args support */
53876d9c 2648static PyObject *
d0d3e991 2649math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
53876d9c 2650{
d0d3e991 2651 Py_ssize_t i;
c6dabe37 2652 PyObject *item;
c6dabe37 2653 double max = 0.0;
c6dabe37
RH
2654 double x, result;
2655 int found_nan = 0;
c630e104
RH
2656 double coord_on_stack[NUM_STACK_ELEMS];
2657 double *coordinates = coord_on_stack;
c6dabe37 2658
d0d3e991
SS
2659 if (nargs > NUM_STACK_ELEMS) {
2660 coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
4c49da0c
ZS
2661 if (coordinates == NULL) {
2662 return PyErr_NoMemory();
2663 }
c630e104 2664 }
d0d3e991
SS
2665 for (i = 0; i < nargs; i++) {
2666 item = args[i];
cfd735ea 2667 ASSIGN_DOUBLE(x, item, error_exit);
c6dabe37
RH
2668 x = fabs(x);
2669 coordinates[i] = x;
2670 found_nan |= Py_IS_NAN(x);
2671 if (x > max) {
2672 max = x;
2673 }
f95a1b3c 2674 }
d0d3e991 2675 result = vector_norm(nargs, coordinates, max, found_nan);
c630e104
RH
2676 if (coordinates != coord_on_stack) {
2677 PyObject_Free(coordinates);
f95a1b3c 2678 }
c6dabe37 2679 return PyFloat_FromDouble(result);
c630e104
RH
2680
2681 error_exit:
2682 if (coordinates != coord_on_stack) {
2683 PyObject_Free(coordinates);
2684 }
2685 return NULL;
53876d9c
CH
2686}
2687
c630e104
RH
2688#undef NUM_STACK_ELEMS
2689
c6dabe37
RH
2690PyDoc_STRVAR(math_hypot_doc,
2691 "hypot(*coordinates) -> value\n\n\
2692Multidimensional Euclidean distance from the origin to a point.\n\
2693\n\
2694Roughly equivalent to:\n\
2695 sqrt(sum(x**2 for x in coordinates))\n\
2696\n\
2697For a two dimensional point (x, y), gives the hypotenuse\n\
2698using the Pythagorean theorem: sqrt(x*x + y*y).\n\
2699\n\
2700For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2701\n\
2702 >>> hypot(3.0, 4.0)\n\
2703 5.0\n\
2704");
53876d9c 2705
47b9f83a
RH
2706/** sumprod() ***************************************************************/
2707
2708/* Forward declaration */
2709static inline int _check_long_mult_overflow(long a, long b);
2710
2711static inline bool
2712long_add_would_overflow(long a, long b)
2713{
2714 return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2715}
2716
47b9f83a
RH
2717/*[clinic input]
2718math.sumprod
2719
2720 p: object
2721 q: object
2722 /
2723
2724Return the sum of products of values from two iterables p and q.
2725
2726Roughly equivalent to:
2727
2728 sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
2729
2730For float and mixed int/float inputs, the intermediate products
2731and sums are computed with extended precision.
2732[clinic start generated code]*/
2733
2734static PyObject *
2735math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2736/*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
2737{
2738 PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2739 PyObject *p_it, *q_it, *total;
2740 iternextfunc p_next, q_next;
2741 bool p_stopped = false, q_stopped = false;
2742 bool int_path_enabled = true, int_total_in_use = false;
2743 bool flt_path_enabled = true, flt_total_in_use = false;
2744 long int_total = 0;
b139bcd8 2745 TripleLength flt_total = tl_zero;
47b9f83a
RH
2746
2747 p_it = PyObject_GetIter(p);
2748 if (p_it == NULL) {
2749 return NULL;
2750 }
2751 q_it = PyObject_GetIter(q);
2752 if (q_it == NULL) {
2753 Py_DECREF(p_it);
2754 return NULL;
2755 }
2756 total = PyLong_FromLong(0);
2757 if (total == NULL) {
2758 Py_DECREF(p_it);
2759 Py_DECREF(q_it);
2760 return NULL;
2761 }
2762 p_next = *Py_TYPE(p_it)->tp_iternext;
2763 q_next = *Py_TYPE(q_it)->tp_iternext;
2764 while (1) {
2765 bool finished;
2766
2767 assert (p_i == NULL);
2768 assert (q_i == NULL);
2769 assert (term_i == NULL);
2770 assert (new_total == NULL);
2771
2772 assert (p_it != NULL);
2773 assert (q_it != NULL);
2774 assert (total != NULL);
2775
2776 p_i = p_next(p_it);
2777 if (p_i == NULL) {
2778 if (PyErr_Occurred()) {
2779 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2780 goto err_exit;
2781 }
2782 PyErr_Clear();
2783 }
2784 p_stopped = true;
2785 }
2786 q_i = q_next(q_it);
2787 if (q_i == NULL) {
2788 if (PyErr_Occurred()) {
2789 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2790 goto err_exit;
2791 }
2792 PyErr_Clear();
2793 }
2794 q_stopped = true;
2795 }
2796 if (p_stopped != q_stopped) {
2797 PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2798 goto err_exit;
2799 }
2800 finished = p_stopped & q_stopped;
2801
2802 if (int_path_enabled) {
2803
2804 if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2805 int overflow;
2806 long int_p, int_q, int_prod;
2807
2808 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2809 if (overflow) {
2810 goto finalize_int_path;
2811 }
2812 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2813 if (overflow) {
2814 goto finalize_int_path;
2815 }
2816 if (_check_long_mult_overflow(int_p, int_q)) {
2817 goto finalize_int_path;
2818 }
2819 int_prod = int_p * int_q;
2820 if (long_add_would_overflow(int_total, int_prod)) {
2821 goto finalize_int_path;
2822 }
2823 int_total += int_prod;
2824 int_total_in_use = true;
2825 Py_CLEAR(p_i);
2826 Py_CLEAR(q_i);
2827 continue;
2828 }
2829
2830 finalize_int_path:
997073c2 2831 // We're finished, overflowed, or have a non-int
47b9f83a
RH
2832 int_path_enabled = false;
2833 if (int_total_in_use) {
2834 term_i = PyLong_FromLong(int_total);
2835 if (term_i == NULL) {
2836 goto err_exit;
2837 }
2838 new_total = PyNumber_Add(total, term_i);
2839 if (new_total == NULL) {
2840 goto err_exit;
2841 }
2842 Py_SETREF(total, new_total);
2843 new_total = NULL;
2844 Py_CLEAR(term_i);
2845 int_total = 0; // An ounce of prevention, ...
2846 int_total_in_use = false;
2847 }
2848 }
2849
2850 if (flt_path_enabled) {
2851
2852 if (!finished) {
2853 double flt_p, flt_q;
2854 bool p_type_float = PyFloat_CheckExact(p_i);
2855 bool q_type_float = PyFloat_CheckExact(q_i);
2856 if (p_type_float && q_type_float) {
2857 flt_p = PyFloat_AS_DOUBLE(p_i);
2858 flt_q = PyFloat_AS_DOUBLE(q_i);
2859 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2860 /* We care about float/int pairs and int/float pairs because
2861 they arise naturally in several use cases such as price
2862 times quantity, measurements with integer weights, or
2863 data selected by a vector of bools. */
2864 flt_p = PyFloat_AS_DOUBLE(p_i);
2865 flt_q = PyLong_AsDouble(q_i);
2866 if (flt_q == -1.0 && PyErr_Occurred()) {
2867 PyErr_Clear();
2868 goto finalize_flt_path;
2869 }
2870 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(q_i))) {
2871 flt_q = PyFloat_AS_DOUBLE(q_i);
2872 flt_p = PyLong_AsDouble(p_i);
2873 if (flt_p == -1.0 && PyErr_Occurred()) {
2874 PyErr_Clear();
2875 goto finalize_flt_path;
2876 }
2877 } else {
2878 goto finalize_flt_path;
2879 }
84483aac 2880 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
47b9f83a
RH
2881 if (isfinite(new_flt_total.hi)) {
2882 flt_total = new_flt_total;
2883 flt_total_in_use = true;
2884 Py_CLEAR(p_i);
2885 Py_CLEAR(q_i);
2886 continue;
2887 }
2888 }
2889
2890 finalize_flt_path:
2891 // We're finished, overflowed, have a non-float, or got a non-finite value
2892 flt_path_enabled = false;
2893 if (flt_total_in_use) {
b139bcd8 2894 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
47b9f83a
RH
2895 if (term_i == NULL) {
2896 goto err_exit;
2897 }
2898 new_total = PyNumber_Add(total, term_i);
2899 if (new_total == NULL) {
2900 goto err_exit;
2901 }
2902 Py_SETREF(total, new_total);
2903 new_total = NULL;
2904 Py_CLEAR(term_i);
b139bcd8 2905 flt_total = tl_zero;
47b9f83a
RH
2906 flt_total_in_use = false;
2907 }
2908 }
2909
2910 assert(!int_total_in_use);
2911 assert(!flt_total_in_use);
2912 if (finished) {
2913 goto normal_exit;
2914 }
2915 term_i = PyNumber_Multiply(p_i, q_i);
2916 if (term_i == NULL) {
2917 goto err_exit;
2918 }
2919 new_total = PyNumber_Add(total, term_i);
2920 if (new_total == NULL) {
2921 goto err_exit;
2922 }
2923 Py_SETREF(total, new_total);
2924 new_total = NULL;
2925 Py_CLEAR(p_i);
2926 Py_CLEAR(q_i);
2927 Py_CLEAR(term_i);
2928 }
2929
2930 normal_exit:
2931 Py_DECREF(p_it);
2932 Py_DECREF(q_it);
2933 return total;
2934
2935 err_exit:
2936 Py_DECREF(p_it);
2937 Py_DECREF(q_it);
2938 Py_DECREF(total);
2939 Py_XDECREF(p_i);
2940 Py_XDECREF(q_i);
2941 Py_XDECREF(term_i);
2942 Py_XDECREF(new_total);
2943 return NULL;
2944}
2945
2946
53876d9c
CH
2947/* pow can't use math_2, but needs its own wrapper: the problem is
2948 that an infinite result can arise either as a result of overflow
2949 (in which case OverflowError should be raised) or as a result of
2950 e.g. 0.**-5. (for which ValueError needs to be raised.)
2951*/
2952
c9ea9335
SS
2953/*[clinic input]
2954math.pow
2955
2956 x: double
2957 y: double
2958 /
2959
2960Return x**y (x to the power of y).
2961[clinic start generated code]*/
2962
53876d9c 2963static PyObject *
c9ea9335
SS
2964math_pow_impl(PyObject *module, double x, double y)
2965/*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
53876d9c 2966{
c9ea9335 2967 double r;
f95a1b3c
AP
2968 int odd_y;
2969
f95a1b3c
AP
2970 /* deal directly with IEEE specials, to cope with problems on various
2971 platforms whose semantics don't exactly match C99 */
2972 r = 0.; /* silence compiler warning */
2973 if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2974 errno = 0;
2975 if (Py_IS_NAN(x))
2976 r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2977 else if (Py_IS_NAN(y))
2978 r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2979 else if (Py_IS_INFINITY(x)) {
2980 odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2981 if (y > 0.)
2982 r = odd_y ? x : fabs(x);
2983 else if (y == 0.)
2984 r = 1.;
2985 else /* y < 0. */
2986 r = odd_y ? copysign(0., x) : 0.;
2987 }
2988 else if (Py_IS_INFINITY(y)) {
2989 if (fabs(x) == 1.0)
2990 r = 1.;
2991 else if (y > 0. && fabs(x) > 1.0)
2992 r = y;
2993 else if (y < 0. && fabs(x) < 1.0) {
2994 r = -y; /* result is +inf */
f95a1b3c
AP
2995 }
2996 else
2997 r = 0.;
2998 }
2999 }
3000 else {
3001 /* let libm handle finite**finite */
3002 errno = 0;
f95a1b3c 3003 r = pow(x, y);
f95a1b3c
AP
3004 /* a NaN result should arise only from (-ve)**(finite
3005 non-integer); in this case we want to raise ValueError. */
3006 if (!Py_IS_FINITE(r)) {
3007 if (Py_IS_NAN(r)) {
3008 errno = EDOM;
3009 }
3010 /*
3011 an infinite result here arises either from:
3012 (A) (+/-0.)**negative (-> divide-by-zero)
3013 (B) overflow of x**y with x and y finite
3014 */
3015 else if (Py_IS_INFINITY(r)) {
3016 if (x == 0.)
3017 errno = EDOM;
3018 else
3019 errno = ERANGE;
3020 }
3021 }
3022 }
3023
3024 if (errno && is_error(r))
3025 return NULL;
3026 else
3027 return PyFloat_FromDouble(r);
53876d9c
CH
3028}
3029
53876d9c 3030
072c0f1b
CH
3031static const double degToRad = Py_MATH_PI / 180.0;
3032static const double radToDeg = 180.0 / Py_MATH_PI;
d6f2267a 3033
c9ea9335
SS
3034/*[clinic input]
3035math.degrees
3036
3037 x: double
3038 /
3039
3040Convert angle x from radians to degrees.
3041[clinic start generated code]*/
3042
d6f2267a 3043static PyObject *
c9ea9335
SS
3044math_degrees_impl(PyObject *module, double x)
3045/*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
d6f2267a 3046{
f95a1b3c 3047 return PyFloat_FromDouble(x * radToDeg);
d6f2267a
RH
3048}
3049
c9ea9335
SS
3050
3051/*[clinic input]
3052math.radians
3053
3054 x: double
3055 /
3056
3057Convert angle x from degrees to radians.
3058[clinic start generated code]*/
d6f2267a
RH
3059
3060static PyObject *
c9ea9335
SS
3061math_radians_impl(PyObject *module, double x)
3062/*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
d6f2267a 3063{
f95a1b3c 3064 return PyFloat_FromDouble(x * degToRad);
d6f2267a
RH
3065}
3066
c9ea9335
SS
3067
3068/*[clinic input]
3069math.isfinite
3070
3071 x: double
3072 /
3073
3074Return True if x is neither an infinity nor a NaN, and False otherwise.
3075[clinic start generated code]*/
78526168 3076
8e0c9968 3077static PyObject *
c9ea9335
SS
3078math_isfinite_impl(PyObject *module, double x)
3079/*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
8e0c9968 3080{
8e0c9968
MD
3081 return PyBool_FromLong((long)Py_IS_FINITE(x));
3082}
3083
c9ea9335
SS
3084
3085/*[clinic input]
3086math.isnan
3087
3088 x: double
3089 /
3090
3091Return True if x is a NaN (not a number), and False otherwise.
3092[clinic start generated code]*/
8e0c9968 3093
072c0f1b 3094static PyObject *
c9ea9335
SS
3095math_isnan_impl(PyObject *module, double x)
3096/*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
072c0f1b 3097{
f95a1b3c 3098 return PyBool_FromLong((long)Py_IS_NAN(x));
072c0f1b
CH
3099}
3100
c9ea9335
SS
3101
3102/*[clinic input]
3103math.isinf
3104
3105 x: double
3106 /
3107
3108Return True if x is a positive or negative infinity, and False otherwise.
3109[clinic start generated code]*/
072c0f1b
CH
3110
3111static PyObject *
c9ea9335
SS
3112math_isinf_impl(PyObject *module, double x)
3113/*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
072c0f1b 3114{
f95a1b3c 3115 return PyBool_FromLong((long)Py_IS_INFINITY(x));
072c0f1b
CH
3116}
3117
072c0f1b 3118
c9ea9335
SS
3119/*[clinic input]
3120math.isclose -> bool
d5519ed7 3121
c9ea9335
SS
3122 a: double
3123 b: double
3124 *
3125 rel_tol: double = 1e-09
3126 maximum difference for being considered "close", relative to the
3127 magnitude of the input values
3128 abs_tol: double = 0.0
3129 maximum difference for being considered "close", regardless of the
3130 magnitude of the input values
d5519ed7 3131
c9ea9335 3132Determine whether two floating point numbers are close in value.
d5519ed7 3133
c9ea9335
SS
3134Return True if a is close in value to b, and False otherwise.
3135
3136For the values to be considered close, the difference between them
3137must be smaller than at least one of the tolerances.
3138
3139-inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3140is, NaN is not close to anything, even itself. inf and -inf are
3141only close to themselves.
3142[clinic start generated code]*/
3143
3144static int
3145math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3146 double abs_tol)
3147/*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
3148{
3149 double diff = 0.0;
d5519ed7
TE
3150
3151 /* sanity check on the inputs */
3152 if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3153 PyErr_SetString(PyExc_ValueError,
3154 "tolerances must be non-negative");
c9ea9335 3155 return -1;
d5519ed7
TE
3156 }
3157
3158 if ( a == b ) {
3159 /* short circuit exact equality -- needed to catch two infinities of
3160 the same sign. And perhaps speeds things up a bit sometimes.
3161 */
c9ea9335 3162 return 1;
d5519ed7
TE
3163 }
3164
3165 /* This catches the case of two infinities of opposite sign, or
3166 one infinity and one finite number. Two infinities of opposite
3167 sign would otherwise have an infinite relative tolerance.
3168 Two infinities of the same sign are caught by the equality check
3169 above.
3170 */
3171
3172 if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
c9ea9335 3173 return 0;
d5519ed7
TE
3174 }
3175
3176 /* now do the regular computation
3177 this is essentially the "weak" test from the Boost library
3178 */
3179
3180 diff = fabs(b - a);
3181
c9ea9335
SS
3182 return (((diff <= fabs(rel_tol * b)) ||
3183 (diff <= fabs(rel_tol * a))) ||
3184 (diff <= abs_tol));
d5519ed7
TE
3185}
3186
0411411c
PG
3187static inline int
3188_check_long_mult_overflow(long a, long b) {
3189
3190 /* From Python2's int_mul code:
3191
3192 Integer overflow checking for * is painful: Python tried a couple ways, but
3193 they didn't work on all platforms, or failed in endcases (a product of
3194 -sys.maxint-1 has been a particular pain).
3195
3196 Here's another way:
3197
3198 The native long product x*y is either exactly right or *way* off, being
3199 just the last n bits of the true product, where n is the number of bits
3200 in a long (the delivered product is the true product plus i*2**n for
3201 some integer i).
3202
3203 The native double product (double)x * (double)y is subject to three
3204 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3205 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3206 But, unlike the native long product, it's not in *range* trouble: even
3207 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3208 dynamic range of a double. So the leading 50 (or so) bits of the double
3209 product are correct.
3210
3211 We check these two ways against each other, and declare victory if they're
3212 approximately the same. Else, because the native long product is the only
3213 one that can lose catastrophic amounts of information, it's the native long
3214 product that must have overflowed.
3215
3216 */
3217
3218 long longprod = (long)((unsigned long)a * b);
3219 double doubleprod = (double)a * (double)b;
3220 double doubled_longprod = (double)longprod;
3221
3222 if (doubled_longprod == doubleprod) {
3223 return 0;
3224 }
3225
3226 const double diff = doubled_longprod - doubleprod;
3227 const double absdiff = diff >= 0.0 ? diff : -diff;
3228 const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3229
3230 if (32.0 * absdiff <= absprod) {
3231 return 0;
3232 }
3233
3234 return 1;
3235}
d5519ed7 3236
bc098515
PG
3237/*[clinic input]
3238math.prod
3239
3240 iterable: object
3241 /
3242 *
3243 start: object(c_default="NULL") = 1
3244
3245Calculate the product of all the elements in the input iterable.
3246
3247The default start value for the product is 1.
3248
3249When the iterable is empty, return the start value. This function is
3250intended specifically for use with numeric values and may reject
3251non-numeric types.
3252[clinic start generated code]*/
3253
3254static PyObject *
3255math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3256/*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3257{
3258 PyObject *result = start;
3259 PyObject *temp, *item, *iter;
3260
3261 iter = PyObject_GetIter(iterable);
3262 if (iter == NULL) {
3263 return NULL;
3264 }
3265
3266 if (result == NULL) {
84975146 3267 result = _PyLong_GetOne();
bc098515 3268 }
84975146 3269 Py_INCREF(result);
bc098515
PG
3270#ifndef SLOW_PROD
3271 /* Fast paths for integers keeping temporary products in C.
3272 * Assumes all inputs are the same type.
3273 * If the assumption fails, default to use PyObjects instead.
3274 */
3275 if (PyLong_CheckExact(result)) {
3276 int overflow;
3277 long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3278 /* If this already overflowed, don't even enter the loop. */
3279 if (overflow == 0) {
81f7359f 3280 Py_SETREF(result, NULL);
bc098515
PG
3281 }
3282 /* Loop over all the items in the iterable until we finish, we overflow
3283 * or we found a non integer element */
84975146 3284 while (result == NULL) {
bc098515
PG
3285 item = PyIter_Next(iter);
3286 if (item == NULL) {
3287 Py_DECREF(iter);
3288 if (PyErr_Occurred()) {
3289 return NULL;
3290 }
3291 return PyLong_FromLong(i_result);
3292 }
3293 if (PyLong_CheckExact(item)) {
3294 long b = PyLong_AsLongAndOverflow(item, &overflow);
0411411c
PG
3295 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3296 long x = i_result * b;
bc098515
PG
3297 i_result = x;
3298 Py_DECREF(item);
3299 continue;
3300 }
3301 }
3302 /* Either overflowed or is not an int.
3303 * Restore real objects and process normally */
3304 result = PyLong_FromLong(i_result);
3305 if (result == NULL) {
3306 Py_DECREF(item);
3307 Py_DECREF(iter);
3308 return NULL;
3309 }
3310 temp = PyNumber_Multiply(result, item);
3311 Py_DECREF(result);
3312 Py_DECREF(item);
3313 result = temp;
3314 if (result == NULL) {
3315 Py_DECREF(iter);
3316 return NULL;
3317 }
3318 }
3319 }
3320
3321 /* Fast paths for floats keeping temporary products in C.
3322 * Assumes all inputs are the same type.
3323 * If the assumption fails, default to use PyObjects instead.
3324 */
3325 if (PyFloat_CheckExact(result)) {
3326 double f_result = PyFloat_AS_DOUBLE(result);
81f7359f 3327 Py_SETREF(result, NULL);
bc098515
PG
3328 while(result == NULL) {
3329 item = PyIter_Next(iter);
3330 if (item == NULL) {
3331 Py_DECREF(iter);
3332 if (PyErr_Occurred()) {
3333 return NULL;
3334 }
3335 return PyFloat_FromDouble(f_result);
3336 }
3337 if (PyFloat_CheckExact(item)) {
3338 f_result *= PyFloat_AS_DOUBLE(item);
3339 Py_DECREF(item);
3340 continue;
3341 }
3342 if (PyLong_CheckExact(item)) {
3343 long value;
3344 int overflow;
3345 value = PyLong_AsLongAndOverflow(item, &overflow);
3346 if (!overflow) {
3347 f_result *= (double)value;
3348 Py_DECREF(item);
3349 continue;
3350 }
3351 }
3352 result = PyFloat_FromDouble(f_result);
3353 if (result == NULL) {
3354 Py_DECREF(item);
3355 Py_DECREF(iter);
3356 return NULL;
3357 }
3358 temp = PyNumber_Multiply(result, item);
3359 Py_DECREF(result);
3360 Py_DECREF(item);
3361 result = temp;
3362 if (result == NULL) {
3363 Py_DECREF(iter);
3364 return NULL;
3365 }
3366 }
3367 }
3368#endif
3369 /* Consume rest of the iterable (if any) that could not be handled
3370 * by specialized functions above.*/
3371 for(;;) {
3372 item = PyIter_Next(iter);
3373 if (item == NULL) {
3374 /* error, or end-of-sequence */
3375 if (PyErr_Occurred()) {
81f7359f 3376 Py_SETREF(result, NULL);
bc098515
PG
3377 }
3378 break;
3379 }
3380 temp = PyNumber_Multiply(result, item);
3381 Py_DECREF(result);
3382 Py_DECREF(item);
3383 result = temp;
3384 if (result == NULL)
3385 break;
3386 }
3387 Py_DECREF(iter);
3388 return result;
3389}
3390
3391
2d787971
SS
3392/* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3393
3394Python code to generate the values:
3395
3396 import math
3397
3398 for n in range(128):
3399 fac = math.factorial(n)
3400 fac_odd_part = fac // (fac & -fac)
3401 reduced_fac_odd_part = fac_odd_part % (2**64)
3402 print(f"{reduced_fac_odd_part:#018x}u")
3403*/
3404static const uint64_t reduced_factorial_odd_part[] = {
3405 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3406 0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3407 0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3408 0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3409 0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3410 0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3411 0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3412 0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3413 0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3414 0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3415 0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3416 0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3417 0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3418 0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3419 0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3420 0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3421 0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3422 0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3423 0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3424 0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3425 0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3426 0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3427 0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3428 0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3429 0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3430 0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3431 0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3432 0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3433 0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3434 0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3435 0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3436 0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3437};
3438
3439/* inverses of reduced_factorial_odd_part values modulo 2**64.
3440
3441Python code to generate the values:
3442
3443 import math
3444
3445 for n in range(128):
3446 fac = math.factorial(n)
3447 fac_odd_part = fac // (fac & -fac)
3448 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3449 print(f"{inverted_fac_odd_part:#018x}u")
3450*/
3451static const uint64_t inverted_factorial_odd_part[] = {
3452 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3453 0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3454 0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3455 0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3456 0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3457 0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3458 0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3459 0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3460 0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3461 0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3462 0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3463 0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3464 0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3465 0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3466 0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3467 0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3468 0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3469 0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3470 0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3471 0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3472 0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3473 0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3474 0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3475 0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3476 0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3477 0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3478 0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3479 0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3480 0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3481 0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3482 0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3483 0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3484};
3485
3486/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3487
3488Python code to generate the values:
3489
3490import math
3491
3492for n in range(128):
3493 fac = math.factorial(n)
3494 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3495 print(fac_trailing_zeros)
3496*/
3497
3498static const uint8_t factorial_trailing_zeros[] = {
3499 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3500 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3501 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3502 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3503 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3504 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3505 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3506 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3507};
3508
60c320c3
SS
3509/* Number of permutations and combinations.
3510 * P(n, k) = n! / (n-k)!
3511 * C(n, k) = P(n, k) / k!
3512 */
3513
3514/* Calculate C(n, k) for n in the 63-bit range. */
3515static PyObject *
3516perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3517{
60c320c3
SS
3518 if (k == 0) {
3519 return PyLong_FromLong(1);
3520 }
3521
3522 /* For small enough n and k the result fits in the 64-bit range and can
3523 * be calculated without allocating intermediate PyLong objects. */
2d787971
SS
3524 if (iscomb) {
3525 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3526 * fits into a uint64_t. Exclude k = 1, because the second fast
3527 * path is faster for this case.*/
3528 static const unsigned char fast_comb_limits1[] = {
3529 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3530 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3531 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3532 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3533 67, 67, 67, // 32-34
3534 };
3535 if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3536 /*
3537 comb(n, k) fits into a uint64_t. We compute it as
3538
3539 comb_odd_part << shift
3540
3541 where 2**shift is the largest power of two dividing comb(n, k)
3542 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3543 calculated efficiently via arithmetic modulo 2**64, using three
3544 lookups and two uint64_t multiplications.
3545 */
3546 uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3547 * inverted_factorial_odd_part[k]
3548 * inverted_factorial_odd_part[n - k];
3549 int shift = factorial_trailing_zeros[n]
3550 - factorial_trailing_zeros[k]
3551 - factorial_trailing_zeros[n - k];
3552 return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3553 }
3554
3555 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3556 * fits into a long long (which is at least 64 bit). Only contains
3557 * items larger than in fast_comb_limits1. */
3558 static const unsigned long long fast_comb_limits2[] = {
3559 0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3560 746, 453, 308, 227, 178, 147, // 8-13
3561 };
3562 if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3563 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3564 unsigned long long result = n;
60c320c3
SS
3565 for (unsigned long long i = 1; i < k;) {
3566 result *= --n;
3567 result /= ++i;
3568 }
2d787971 3569 return PyLong_FromUnsignedLongLong(result);
60c320c3 3570 }
2d787971
SS
3571 }
3572 else {
3573 /* Maps k to the maximal n so that k <= n and P(n, k)
3574 * fits into a long long (which is at least 64 bit). */
3575 static const unsigned long long fast_perm_limits[] = {
3576 0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3577 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3578 24, 22, 21, 20, 20, // 16-20
3579 };
3580 if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3581 if (n <= 127) {
3582 /* P(n, k) fits into a uint64_t. */
3583 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3584 * inverted_factorial_odd_part[n - k];
3585 int shift = factorial_trailing_zeros[n]
3586 - factorial_trailing_zeros[n - k];
3587 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3588 }
3589
3590 /* P(n, k) = P(n, k-1) * (n-k+1) */
3591 unsigned long long result = n;
60c320c3
SS
3592 for (unsigned long long i = 1; i < k;) {
3593 result *= --n;
3594 ++i;
3595 }
2d787971 3596 return PyLong_FromUnsignedLongLong(result);
60c320c3 3597 }
60c320c3
SS
3598 }
3599
2d787971
SS
3600 /* For larger n use recursive formulas:
3601 *
3602 * P(n, k) = P(n, j) * P(n-j, k-j)
3603 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3604 */
60c320c3
SS
3605 unsigned long long j = k / 2;
3606 PyObject *a, *b;
3607 a = perm_comb_small(n, j, iscomb);
3608 if (a == NULL) {
3609 return NULL;
3610 }
3611 b = perm_comb_small(n - j, k - j, iscomb);
3612 if (b == NULL) {
3613 goto error;
3614 }
3615 Py_SETREF(a, PyNumber_Multiply(a, b));
3616 Py_DECREF(b);
3617 if (iscomb && a != NULL) {
3618 b = perm_comb_small(k, j, 1);
3619 if (b == NULL) {
3620 goto error;
3621 }
3622 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3623 Py_DECREF(b);
3624 }
3625 return a;
3626
3627error:
3628 Py_DECREF(a);
3629 return NULL;
3630}
3631
3632/* Calculate P(n, k) or C(n, k) using recursive formulas.
3633 * It is more efficient than sequential multiplication thanks to
3634 * Karatsuba multiplication.
3635 */
3636static PyObject *
3637perm_comb(PyObject *n, unsigned long long k, int iscomb)
3638{
3639 if (k == 0) {
3640 return PyLong_FromLong(1);
3641 }
3642 if (k == 1) {
3e2f7135 3643 return Py_NewRef(n);
60c320c3
SS
3644 }
3645
3646 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3647 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3648 unsigned long long j = k / 2;
3649 PyObject *a, *b;
3650 a = perm_comb(n, j, iscomb);
3651 if (a == NULL) {
3652 return NULL;
3653 }
3654 PyObject *t = PyLong_FromUnsignedLongLong(j);
3655 if (t == NULL) {
3656 goto error;
3657 }
3658 n = PyNumber_Subtract(n, t);
3659 Py_DECREF(t);
3660 if (n == NULL) {
3661 goto error;
3662 }
3663 b = perm_comb(n, k - j, iscomb);
3664 Py_DECREF(n);
3665 if (b == NULL) {
3666 goto error;
3667 }
3668 Py_SETREF(a, PyNumber_Multiply(a, b));
3669 Py_DECREF(b);
3670 if (iscomb && a != NULL) {
3671 b = perm_comb_small(k, j, 1);
3672 if (b == NULL) {
3673 goto error;
3674 }
3675 Py_SETREF(a, PyNumber_FloorDivide(a, b));
3676 Py_DECREF(b);
3677 }
3678 return a;
3679
3680error:
3681 Py_DECREF(a);
3682 return NULL;
3683}
3684
5ae299ac
SS
3685/*[clinic input]
3686math.perm
3687
3688 n: object
e119b3d1 3689 k: object = None
5ae299ac
SS
3690 /
3691
3692Number of ways to choose k items from n items without repetition and with order.
3693
963eb0f4
RH
3694Evaluates to n! / (n - k)! when k <= n and evaluates
3695to zero when k > n.
5ae299ac 3696
e119b3d1
RH
3697If k is not specified or is None, then k defaults to n
3698and the function returns n!.
3699
963eb0f4
RH
3700Raises TypeError if either of the arguments are not integers.
3701Raises ValueError if either of the arguments are negative.
5ae299ac
SS
3702[clinic start generated code]*/
3703
3704static PyObject *
3705math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
e119b3d1 3706/*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
5ae299ac 3707{
60c320c3 3708 PyObject *result = NULL;
5ae299ac 3709 int overflow, cmp;
60c320c3 3710 long long ki, ni;
5ae299ac 3711
e119b3d1
RH
3712 if (k == Py_None) {
3713 return math_factorial(module, n);
3714 }
5ae299ac
SS
3715 n = PyNumber_Index(n);
3716 if (n == NULL) {
3717 return NULL;
3718 }
5ae299ac
SS
3719 k = PyNumber_Index(k);
3720 if (k == NULL) {
3721 Py_DECREF(n);
3722 return NULL;
3723 }
60c320c3 3724 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
5ae299ac
SS
3725
3726 if (Py_SIZE(n) < 0) {
3727 PyErr_SetString(PyExc_ValueError,
3728 "n must be a non-negative integer");
3729 goto error;
3730 }
45e0411e
MD
3731 if (Py_SIZE(k) < 0) {
3732 PyErr_SetString(PyExc_ValueError,
3733 "k must be a non-negative integer");
3734 goto error;
3735 }
3736
5ae299ac
SS
3737 cmp = PyObject_RichCompareBool(n, k, Py_LT);
3738 if (cmp != 0) {
3739 if (cmp > 0) {
963eb0f4
RH
3740 result = PyLong_FromLong(0);
3741 goto done;
5ae299ac
SS
3742 }
3743 goto error;
3744 }
3745
60c320c3
SS
3746 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3747 assert(overflow >= 0 && !PyErr_Occurred());
5ae299ac
SS
3748 if (overflow > 0) {
3749 PyErr_Format(PyExc_OverflowError,
3750 "k must not exceed %lld",
3751 LLONG_MAX);
3752 goto error;
3753 }
60c320c3 3754 assert(ki >= 0);
5ae299ac 3755
60c320c3
SS
3756 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3757 assert(overflow >= 0 && !PyErr_Occurred());
3758 if (!overflow && ki > 1) {
3759 assert(ni >= 0);
3760 result = perm_comb_small((unsigned long long)ni,
3761 (unsigned long long)ki, 0);
5ae299ac 3762 }
60c320c3
SS
3763 else {
3764 result = perm_comb(n, (unsigned long long)ki, 0);
5ae299ac 3765 }
5ae299ac
SS
3766
3767done:
3768 Py_DECREF(n);
3769 Py_DECREF(k);
3770 return result;
3771
3772error:
5ae299ac
SS
3773 Py_DECREF(n);
3774 Py_DECREF(k);
3775 return NULL;
3776}
3777
4a686504
YA
3778/*[clinic input]
3779math.comb
3780
2b843ac0
SS
3781 n: object
3782 k: object
3783 /
4a686504 3784
2b843ac0 3785Number of ways to choose k items from n items without repetition and without order.
4a686504 3786
963eb0f4
RH
3787Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3788to zero when k > n.
3789
3790Also called the binomial coefficient because it is equivalent
3791to the coefficient of k-th term in polynomial expansion of the
3792expression (1 + x)**n.
4a686504 3793
963eb0f4
RH
3794Raises TypeError if either of the arguments are not integers.
3795Raises ValueError if either of the arguments are negative.
4a686504
YA
3796
3797[clinic start generated code]*/
3798
3799static PyObject *
3800math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
963eb0f4 3801/*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
4a686504 3802{
60c320c3 3803 PyObject *result = NULL, *temp;
4a686504 3804 int overflow, cmp;
60c320c3 3805 long long ki, ni;
4a686504 3806
2b843ac0
SS
3807 n = PyNumber_Index(n);
3808 if (n == NULL) {
3809 return NULL;
4a686504 3810 }
2b843ac0
SS
3811 k = PyNumber_Index(k);
3812 if (k == NULL) {
3813 Py_DECREF(n);
3814 return NULL;
4a686504 3815 }
60c320c3 3816 assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
4a686504 3817
2b843ac0
SS
3818 if (Py_SIZE(n) < 0) {
3819 PyErr_SetString(PyExc_ValueError,
3820 "n must be a non-negative integer");
3821 goto error;
4a686504 3822 }
45e0411e
MD
3823 if (Py_SIZE(k) < 0) {
3824 PyErr_SetString(PyExc_ValueError,
3825 "k must be a non-negative integer");
3826 goto error;
3827 }
3828
60c320c3
SS
3829 ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3830 assert(overflow >= 0 && !PyErr_Occurred());
3831 if (!overflow) {
3832 assert(ni >= 0);
3833 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3834 assert(overflow >= 0 && !PyErr_Occurred());
3835 if (overflow || ki > ni) {
3836 result = PyLong_FromLong(0);
3837 goto done;
3838 }
3839 assert(ki >= 0);
02b5417f 3840
60c320c3
SS
3841 ki = Py_MIN(ki, ni - ki);
3842 if (ki > 1) {
3843 result = perm_comb_small((unsigned long long)ni,
3844 (unsigned long long)ki, 1);
3845 goto done;
3846 }
3847 /* For k == 1 just return the original n in perm_comb(). */
4a686504
YA
3848 }
3849 else {
60c320c3
SS
3850 /* k = min(k, n - k) */
3851 temp = PyNumber_Subtract(n, k);
3852 if (temp == NULL) {
2b843ac0
SS
3853 goto error;
3854 }
60c320c3
SS
3855 if (Py_SIZE(temp) < 0) {
3856 Py_DECREF(temp);
3857 result = PyLong_FromLong(0);
3858 goto done;
4a686504 3859 }
60c320c3
SS
3860 cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3861 if (cmp > 0) {
3862 Py_SETREF(k, temp);
4a686504 3863 }
60c320c3
SS
3864 else {
3865 Py_DECREF(temp);
3866 if (cmp < 0) {
3867 goto error;
3868 }
4a686504 3869 }
60c320c3
SS
3870
3871 ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3872 assert(overflow >= 0 && !PyErr_Occurred());
3873 if (overflow) {
3874 PyErr_Format(PyExc_OverflowError,
3875 "min(n - k, k) must not exceed %lld",
3876 LLONG_MAX);
2b843ac0 3877 goto error;
4a686504 3878 }
60c320c3 3879 assert(ki >= 0);
4a686504 3880 }
60c320c3
SS
3881
3882 result = perm_comb(n, (unsigned long long)ki, 1);
4a686504 3883
2b843ac0
SS
3884done:
3885 Py_DECREF(n);
3886 Py_DECREF(k);
3887 return result;
4a686504 3888
2b843ac0 3889error:
2b843ac0
SS
3890 Py_DECREF(n);
3891 Py_DECREF(k);
4a686504
YA
3892 return NULL;
3893}
3894
3895
100fafcf
VS
3896/*[clinic input]
3897math.nextafter
3898
3899 x: double
3900 y: double
3901 /
3902
3903Return the next floating-point value after x towards y.
3904[clinic start generated code]*/
3905
3906static PyObject *
3907math_nextafter_impl(PyObject *module, double x, double y)
3908/*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3909{
85ead4fc
VS
3910#if defined(_AIX)
3911 if (x == y) {
3912 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3913 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3914 return PyFloat_FromDouble(y);
3915 }
c1c3493f 3916 if (Py_IS_NAN(x)) {
0837f99d 3917 return PyFloat_FromDouble(x);
c1c3493f
VS
3918 }
3919 if (Py_IS_NAN(y)) {
0837f99d 3920 return PyFloat_FromDouble(y);
c1c3493f 3921 }
85ead4fc
VS
3922#endif
3923 return PyFloat_FromDouble(nextafter(x, y));
100fafcf
VS
3924}
3925
3926
0b2ab219
VS
3927/*[clinic input]
3928math.ulp -> double
3929
3930 x: double
3931 /
3932
3933Return the value of the least significant bit of the float x.
3934[clinic start generated code]*/
3935
3936static double
3937math_ulp_impl(PyObject *module, double x)
3938/*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3939{
3940 if (Py_IS_NAN(x)) {
3941 return x;
3942 }
3943 x = fabs(x);
3944 if (Py_IS_INFINITY(x)) {
3945 return x;
3946 }
3947 double inf = m_inf();
3948 double x2 = nextafter(x, inf);
3949 if (Py_IS_INFINITY(x2)) {
3950 /* special case: x is the largest positive representable float */
3951 x2 = nextafter(x, -inf);
3952 return x - x2;
3953 }
3954 return x2 - x;
3955}
3956
5be82413
DN
3957static int
3958math_exec(PyObject *module)
3959{
23c9febd
DN
3960
3961 math_module_state *state = get_math_module_state(module);
3962 state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
3963 if (state->str___ceil__ == NULL) {
3964 return -1;
3965 }
3966 state->str___floor__ = PyUnicode_InternFromString("__floor__");
3967 if (state->str___floor__ == NULL) {
3968 return -1;
3969 }
3970 state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
3971 if (state->str___trunc__ == NULL) {
3972 return -1;
3973 }
5be82413
DN
3974 if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3975 return -1;
3976 }
3977 if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3978 return -1;
3979 }
3980 // 2pi
3981 if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3982 return -1;
3983 }
3984 if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3985 return -1;
3986 }
1b2611eb 3987#if _PY_SHORT_FLOAT_REPR == 1
5be82413
DN
3988 if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3989 return -1;
3990 }
3991#endif
3992 return 0;
3993}
0b2ab219 3994
23c9febd
DN
3995static int
3996math_clear(PyObject *module)
3997{
3998 math_module_state *state = get_math_module_state(module);
3999 Py_CLEAR(state->str___ceil__);
4000 Py_CLEAR(state->str___floor__);
4001 Py_CLEAR(state->str___trunc__);
4002 return 0;
4003}
4004
4005static void
4006math_free(void *module)
4007{
4008 math_clear((PyObject *)module);
4009}
4010
8b43b19e 4011static PyMethodDef math_methods[] = {
f95a1b3c
AP
4012 {"acos", math_acos, METH_O, math_acos_doc},
4013 {"acosh", math_acosh, METH_O, math_acosh_doc},
4014 {"asin", math_asin, METH_O, math_asin_doc},
4015 {"asinh", math_asinh, METH_O, math_asinh_doc},
4016 {"atan", math_atan, METH_O, math_atan_doc},
804f2529 4017 {"atan2", _PyCFunction_CAST(math_atan2), METH_FASTCALL, math_atan2_doc},
f95a1b3c 4018 {"atanh", math_atanh, METH_O, math_atanh_doc},
ac867f10 4019 {"cbrt", math_cbrt, METH_O, math_cbrt_doc},
c9ea9335 4020 MATH_CEIL_METHODDEF
804f2529 4021 {"copysign", _PyCFunction_CAST(math_copysign), METH_FASTCALL, math_copysign_doc},
f95a1b3c
AP
4022 {"cos", math_cos, METH_O, math_cos_doc},
4023 {"cosh", math_cosh, METH_O, math_cosh_doc},
c9ea9335 4024 MATH_DEGREES_METHODDEF
9c18b1ae 4025 MATH_DIST_METHODDEF
f95a1b3c
AP
4026 {"erf", math_erf, METH_O, math_erf_doc},
4027 {"erfc", math_erfc, METH_O, math_erfc_doc},
4028 {"exp", math_exp, METH_O, math_exp_doc},
6266e4af 4029 {"exp2", math_exp2, METH_O, math_exp2_doc},
f95a1b3c
AP
4030 {"expm1", math_expm1, METH_O, math_expm1_doc},
4031 {"fabs", math_fabs, METH_O, math_fabs_doc},
c9ea9335
SS
4032 MATH_FACTORIAL_METHODDEF
4033 MATH_FLOOR_METHODDEF
4034 MATH_FMOD_METHODDEF
4035 MATH_FREXP_METHODDEF
4036 MATH_FSUM_METHODDEF
f95a1b3c 4037 {"gamma", math_gamma, METH_O, math_gamma_doc},
804f2529
VS
4038 {"gcd", _PyCFunction_CAST(math_gcd), METH_FASTCALL, math_gcd_doc},
4039 {"hypot", _PyCFunction_CAST(math_hypot), METH_FASTCALL, math_hypot_doc},
c9ea9335
SS
4040 MATH_ISCLOSE_METHODDEF
4041 MATH_ISFINITE_METHODDEF
4042 MATH_ISINF_METHODDEF
4043 MATH_ISNAN_METHODDEF
73934b9d 4044 MATH_ISQRT_METHODDEF
804f2529 4045 {"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc},
c9ea9335 4046 MATH_LDEXP_METHODDEF
f95a1b3c 4047 {"lgamma", math_lgamma, METH_O, math_lgamma_doc},
c9ea9335 4048 MATH_LOG_METHODDEF
f95a1b3c 4049 {"log1p", math_log1p, METH_O, math_log1p_doc},
c9ea9335
SS
4050 MATH_LOG10_METHODDEF
4051 MATH_LOG2_METHODDEF
4052 MATH_MODF_METHODDEF
4053 MATH_POW_METHODDEF
4054 MATH_RADIANS_METHODDEF
804f2529 4055 {"remainder", _PyCFunction_CAST(math_remainder), METH_FASTCALL, math_remainder_doc},
f95a1b3c
AP
4056 {"sin", math_sin, METH_O, math_sin_doc},
4057 {"sinh", math_sinh, METH_O, math_sinh_doc},
4058 {"sqrt", math_sqrt, METH_O, math_sqrt_doc},
4059 {"tan", math_tan, METH_O, math_tan_doc},
4060 {"tanh", math_tanh, METH_O, math_tanh_doc},
47b9f83a 4061 MATH_SUMPROD_METHODDEF
c9ea9335 4062 MATH_TRUNC_METHODDEF
bc098515 4063 MATH_PROD_METHODDEF
5ae299ac 4064 MATH_PERM_METHODDEF
4a686504 4065 MATH_COMB_METHODDEF
100fafcf 4066 MATH_NEXTAFTER_METHODDEF
0b2ab219 4067 MATH_ULP_METHODDEF
f95a1b3c 4068 {NULL, NULL} /* sentinel */
85a5fbbd
GR
4069};
4070
5be82413
DN
4071static PyModuleDef_Slot math_slots[] = {
4072 {Py_mod_exec, math_exec},
4073 {0, NULL}
4074};
c6e22902 4075
14f8b4cf 4076PyDoc_STRVAR(module_doc,
6faad355
NB
4077"This module provides access to the mathematical functions\n"
4078"defined by the C standard.");
c6e22902 4079
1a21451b 4080static struct PyModuleDef mathmodule = {
f95a1b3c 4081 PyModuleDef_HEAD_INIT,
5be82413
DN
4082 .m_name = "math",
4083 .m_doc = module_doc,
23c9febd 4084 .m_size = sizeof(math_module_state),
5be82413
DN
4085 .m_methods = math_methods,
4086 .m_slots = math_slots,
23c9febd
DN
4087 .m_clear = math_clear,
4088 .m_free = math_free,
1a21451b
ML
4089};
4090
fe51c6d6 4091PyMODINIT_FUNC
1a21451b 4092PyInit_math(void)
85a5fbbd 4093{
5be82413 4094 return PyModuleDef_Init(&mathmodule);
85a5fbbd 4095}