1 /* Math module -- standard C math library functions, pi and e */
3 /* Here are some comments from Tim Peters, extracted from the
4 discussion attached to http://bugs.python.org/issue1640. They
5 describe the general aims of the math module with respect to
6 special values, IEEE-754 floating-point exceptions, and Python
9 These are the "spirit of 754" rules:
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign). This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions. In that specific case, the
25 sign of the zero determines the result of 1/0.
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
34 For #1, raise OverflowError.
36 For #2, return a zero (with the appropriate sign if that happens by
39 For #3 and #4, raise ValueError. It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
46 In general, on an IEEE-754 platform the aim is to follow the C99
47 standard, including Annex 'F', whenever possible. Where the
48 standard recommends raising the 'divide-by-zero' or 'invalid'
49 floating-point exceptions, Python should raise a ValueError. Where
50 the standard recommends raising 'overflow', Python should raise an
51 OverflowError. In all other circumstances a value should be
55 #ifndef Py_BUILD_CORE_BUILTIN
56 # define Py_BUILD_CORE_MODULE 1
60 #include "pycore_abstract.h" // _PyNumber_Index()
61 #include "pycore_bitutils.h" // _Py_bit_length()
62 #include "pycore_call.h" // _PyObject_CallNoArgs()
63 #include "pycore_long.h" // _PyLong_GetZero()
64 #include "pycore_moduleobject.h" // _PyModule_GetState()
65 #include "pycore_object.h" // _PyObject_LookupSpecial()
66 #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
67 /* For DBL_EPSILON in _math.h */
69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
73 #include "clinic/mathmodule.c.h"
77 [clinic start generated code]*/
78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
83 Double and triple length extended precision algorithms from:
85 Accurate Sum and Dot Product
86 by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
87 https://doi.org/10.1137/030601818
88 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
92 typedef struct{ double hi
; double lo
; } DoubleLength
;
95 dl_fast_sum(double a
, double b
)
97 /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
98 assert(fabs(a
) >= fabs(b
));
100 double y
= (a
- x
) + b
;
101 return (DoubleLength
) {x
, y
};
105 dl_sum(double a
, double b
)
107 /* Algorithm 3.1 Error-free transformation of the sum */
110 double y
= (a
- (x
- z
)) + (b
- z
);
111 return (DoubleLength
) {x
, y
};
114 #ifndef UNRELIABLE_FMA
117 dl_mul(double x
, double y
)
119 /* Algorithm 3.5. Error-free transformation of a product */
121 double zz
= fma(x
, y
, -z
);
122 return (DoubleLength
) {z
, zz
};
128 The default implementation of dl_mul() depends on the C math library
129 having an accurate fma() function as required by § 7.12.13.1 of the
132 The UNRELIABLE_FMA option is provided as a slower but accurate
133 alternative for builds where the fma() function is found wanting.
134 The speed penalty may be modest (17% slower on an Apple M1 Max),
135 so don't hesitate to enable this build option.
137 The algorithms are from the T. J. Dekker paper:
138 A Floating-Point Technique for Extending the Available Precision
139 https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
144 // Dekker (5.5) and (5.6).
145 double t
= x
* 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
146 double hi
= t
- (t
- x
);
148 return (DoubleLength
) {hi
, lo
};
152 dl_mul(double x
, double y
)
154 // Dekker (5.12) and mul12()
155 DoubleLength xx
= dl_split(x
);
156 DoubleLength yy
= dl_split(y
);
157 double p
= xx
.hi
* yy
.hi
;
158 double q
= xx
.hi
* yy
.lo
+ xx
.lo
* yy
.hi
;
160 double zz
= p
- z
+ q
+ xx
.lo
* yy
.lo
;
161 return (DoubleLength
) {z
, zz
};
166 typedef struct { double hi
; double lo
; double tiny
; } TripleLength
;
168 static const TripleLength tl_zero
= {0.0, 0.0, 0.0};
171 tl_fma(double x
, double y
, TripleLength total
)
173 /* Algorithm 5.10 with SumKVert for K=3 */
174 DoubleLength pr
= dl_mul(x
, y
);
175 DoubleLength sm
= dl_sum(total
.hi
, pr
.hi
);
176 DoubleLength r1
= dl_sum(total
.lo
, pr
.lo
);
177 DoubleLength r2
= dl_sum(r1
.hi
, sm
.lo
);
178 return (TripleLength
) {sm
.hi
, r2
.hi
, total
.tiny
+ r1
.lo
+ r2
.lo
};
182 tl_to_d(TripleLength total
)
184 DoubleLength last
= dl_sum(total
.lo
, total
.hi
);
185 return total
.tiny
+ last
.lo
+ last
.hi
;
190 sin(pi*x), giving accurate results for all finite x (especially x
191 integral or close to an integer). This is here for use in the
192 reflection formula for the gamma function. It conforms to IEEE
193 754-2008 for finite arguments, but not for infinities or nans.
196 static const double pi
= 3.141592653589793238462643383279502884197;
197 static const double logpi
= 1.144729885849400174143427351353058711647;
199 /* Version of PyFloat_AsDouble() with in-line fast paths
200 for exact floats and integers. Gives a substantial
201 speed improvement for extracting float arguments.
204 #define ASSIGN_DOUBLE(target_var, obj, error_label) \
205 if (PyFloat_CheckExact(obj)) { \
206 target_var = PyFloat_AS_DOUBLE(obj); \
208 else if (PyLong_CheckExact(obj)) { \
209 target_var = PyLong_AsDouble(obj); \
210 if (target_var == -1.0 && PyErr_Occurred()) { \
215 target_var = PyFloat_AsDouble(obj); \
216 if (target_var == -1.0 && PyErr_Occurred()) { \
226 /* this function should only ever be called for finite arguments */
228 y
= fmod(fabs(x
), 2.0);
229 n
= (int)round(2.0*y
);
230 assert(0 <= n
&& n
<= 4);
239 /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
240 -0.0 instead of 0.0 when y == 1.0. */
244 r
= -cos(pi
*(y
-1.5));
252 return copysign(1.0, x
)*r
;
255 /* Implementation of the real gamma function. Kept here to work around
256 issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
257 on various platforms (Windows, MacOS). In extensive but non-exhaustive
258 random tests, this function proved accurate to within <= 10 ulps across the
259 entire float domain. Note that accuracy may depend on the quality of the
260 system math functions, the pow function in particular. Special cases
261 follow C99 annex F. The parameters and method are tailored to platforms
262 whose double format is the IEEE 754 binary64 format.
264 Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
265 and g=6.024680040776729583740234375; these parameters are amongst those
266 used by the Boost library. Following Boost (again), we re-express the
267 Lanczos sum as a rational function, and compute it that way. The
268 coefficients below were computed independently using MPFR, and have been
269 double-checked against the coefficients in the Boost source code.
271 For x < 0.0 we use the reflection formula.
273 There's one minor tweak that deserves explanation: Lanczos' formula for
274 Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
275 values, x+g-0.5 can be represented exactly. However, in cases where it
276 can't be represented exactly the small error in x+g-0.5 can be magnified
277 significantly by the pow and exp calls, especially for large x. A cheap
278 correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
279 involved in the computation of x+g-0.5 (that is, e = computed value of
280 x+g-0.5 - exact value of x+g-0.5). Here's the proof:
284 Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
285 double, and e is tiny. Then:
287 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
288 = pow(y, x-0.5)/exp(y) * C,
290 where the correction_factor C is given by
292 C = pow(1-e/y, x-0.5) * exp(e)
294 Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
296 C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
298 But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
300 pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
302 Note that for accuracy, when computing r*C it's better to do
310 since the addition in the latter throws away most of the bits of
311 information in e*g/y.
315 static const double lanczos_g
= 6.024680040776729583740234375;
316 static const double lanczos_g_minus_half
= 5.524680040776729583740234375;
317 static const double lanczos_num_coeffs
[LANCZOS_N
] = {
318 23531376880.410759688572007674451636754734846804940,
319 42919803642.649098768957899047001988850926355848959,
320 35711959237.355668049440185451547166705960488635843,
321 17921034426.037209699919755754458931112671403265390,
322 6039542586.3520280050642916443072979210699388420708,
323 1439720407.3117216736632230727949123939715485786772,
324 248874557.86205415651146038641322942321632125127801,
325 31426415.585400194380614231628318205362874684987640,
326 2876370.6289353724412254090516208496135991145378768,
327 186056.26539522349504029498971604569928220784236328,
328 8071.6720023658162106380029022722506138218516325024,
329 210.82427775157934587250973392071336271166969580291,
330 2.5066282746310002701649081771338373386264310793408
333 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
334 static const double lanczos_den_coeffs
[LANCZOS_N
] = {
335 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
336 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
338 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
339 #define NGAMMA_INTEGRAL 23
340 static const double gamma_integral
[NGAMMA_INTEGRAL
] = {
341 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
342 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
343 1307674368000.0, 20922789888000.0, 355687428096000.0,
344 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
345 51090942171709440000.0, 1124000727777607680000.0,
348 /* Lanczos' sum L_g(x), for positive x */
351 lanczos_sum(double x
)
353 double num
= 0.0, den
= 0.0;
356 /* evaluate the rational function lanczos_sum(x). For large
357 x, the obvious algorithm risks overflow, so we instead
358 rescale the denominator and numerator of the rational
359 function by x**(1-LANCZOS_N) and treat this as a
360 rational function in 1/x. This also reduces the error for
361 larger x values. The choice of cutoff point (5.0 below) is
362 somewhat arbitrary; in tests, smaller cutoff values than
363 this resulted in lower accuracy. */
365 for (i
= LANCZOS_N
; --i
>= 0; ) {
366 num
= num
* x
+ lanczos_num_coeffs
[i
];
367 den
= den
* x
+ lanczos_den_coeffs
[i
];
371 for (i
= 0; i
< LANCZOS_N
; i
++) {
372 num
= num
/ x
+ lanczos_num_coeffs
[i
];
373 den
= den
/ x
+ lanczos_den_coeffs
[i
];
383 double absx
, r
, y
, z
, sqrtpow
;
387 if (isnan(x
) || x
> 0.0)
388 return x
; /* tgamma(nan) = nan, tgamma(inf) = inf */
391 return Py_NAN
; /* tgamma(-inf) = nan, invalid */
396 /* tgamma(+-0.0) = +-inf, divide-by-zero */
397 return copysign(Py_INFINITY
, x
);
400 /* integer arguments */
403 errno
= EDOM
; /* tgamma(n) = nan, invalid for */
404 return Py_NAN
; /* negative integers n */
406 if (x
<= NGAMMA_INTEGRAL
)
407 return gamma_integral
[(int)x
- 1];
411 /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
419 /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
420 x > 200, and underflows to +-0.0 for x < -200, not a negative
424 return 0.0/m_sinpi(x
);
432 y
= absx
+ lanczos_g_minus_half
;
433 /* compute error in sum */
434 if (absx
> lanczos_g_minus_half
) {
435 /* note: the correction can be foiled by an optimizing
436 compiler that (incorrectly) thinks that an expression like
437 a + b - a - b can be optimized to 0.0. This shouldn't
438 happen in a standards-conforming compiler. */
440 z
= q
- lanczos_g_minus_half
;
443 double q
= y
- lanczos_g_minus_half
;
446 z
= z
* lanczos_g
/ y
;
448 r
= -pi
/ m_sinpi(absx
) / absx
* exp(y
) / lanczos_sum(absx
);
451 r
/= pow(y
, absx
- 0.5);
454 sqrtpow
= pow(y
, absx
/ 2.0 - 0.25);
460 r
= lanczos_sum(absx
) / exp(y
);
463 r
*= pow(y
, absx
- 0.5);
466 sqrtpow
= pow(y
, absx
/ 2.0 - 0.25);
477 lgamma: natural log of the absolute value of the Gamma function.
478 For large arguments, Lanczos' formula works extremely well here.
490 return x
; /* lgamma(nan) = nan */
492 return Py_INFINITY
; /* lgamma(+-inf) = +inf */
495 /* integer arguments */
496 if (x
== floor(x
) && x
<= 2.0) {
498 errno
= EDOM
; /* lgamma(n) = inf, divide-by-zero for */
499 return Py_INFINITY
; /* integers n <= 0 */
502 return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
507 /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
511 /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
512 having a second set of numerator coefficients for lanczos_sum that
513 absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
514 subtraction below; it's probably not worth it. */
515 r
= log(lanczos_sum(absx
)) - lanczos_g
;
516 r
+= (absx
- 0.5) * (log(absx
+ lanczos_g
- 0.5) - 1);
518 /* Use reflection formula to get value for negative x. */
519 r
= logpi
- log(fabs(m_sinpi(absx
))) - log(absx
) - r
;
525 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
526 multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
527 binary floating-point format, the result is always exact. */
530 m_remainder(double x
, double y
)
532 /* Deal with most common case first. */
533 if (isfinite(x
) && isfinite(y
)) {
534 double absx
, absy
, c
, m
, r
;
542 m
= fmod(absx
, absy
);
545 Warning: some subtlety here. What we *want* to know at this point is
546 whether the remainder m is less than, equal to, or greater than half
547 of absy. However, we can't do that comparison directly because we
548 can't be sure that 0.5*absy is representable (the multiplication
549 might incur precision loss due to underflow). So instead we compare
550 m with the complement c = absy - m: m < 0.5*absy if and only if m <
551 c, and so on. The catch is that absy - m might also not be
552 representable, but it turns out that it doesn't matter:
554 - if m > 0.5*absy then absy - m is exactly representable, by
555 Sterbenz's lemma, so m > c
556 - if m == 0.5*absy then again absy - m is exactly representable
558 - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
559 in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
560 c, or (ii) absy is tiny, either subnormal or in the lowest normal
561 binade. Then absy - m is exactly representable and again m < c.
573 Here absx is exactly halfway between two multiples of absy,
574 and we need to choose the even multiple. x now has the form
578 for some integer n (recalling that m = 0.5*absy at this point).
579 If n is even we want to return m; if n is odd, we need to
584 0.5 * (absx - m) = (n/2) * absy
586 and now reducing modulo absy gives us:
589 fmod(0.5 * (absx - m), absy) = |
592 Now m - 2.0 * fmod(...) gives the desired result: m
593 if n is even, -m if m is odd.
595 Note that all steps in fmod(0.5 * (absx - m), absy)
596 will be computed exactly, with no rounding error
600 r
= m
- 2.0 * fmod(0.5 * (absx
- m
), absy
);
602 return copysign(1.0, x
) * r
;
605 /* Special values. */
621 Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
622 log(-ve), log(NaN). Here are wrappers for log and log10 that deal with
623 special values directly, passing positive non-special values through to
624 the system log/log10.
635 return -Py_INFINITY
; /* log(0) = -inf */
637 return Py_NAN
; /* log(-ve) = nan */
640 return x
; /* log(nan) = nan */
642 return x
; /* log(inf) = inf */
645 return Py_NAN
; /* log(-inf) = nan */
652 Uses an algorithm that should:
654 (a) produce exact results for powers of 2, and
655 (b) give a monotonic log2 (for positive finite floats),
656 assuming that the system log is monotonic.
664 return x
; /* log2(nan) = nan */
666 return x
; /* log2(+inf) = +inf */
669 return Py_NAN
; /* log2(-inf) = nan, invalid-operation */
678 return -Py_INFINITY
; /* log2(0) = -inf, divide-by-zero */
682 return Py_NAN
; /* log2(-inf) = nan, invalid-operation */
694 return -Py_INFINITY
; /* log10(0) = -inf */
696 return Py_NAN
; /* log10(-ve) = nan */
699 return x
; /* log10(nan) = nan */
701 return x
; /* log10(inf) = inf */
704 return Py_NAN
; /* log10(-inf) = nan */
712 *integers as args: array
714 Greatest Common Divisor.
715 [clinic start generated code]*/
718 math_gcd_impl(PyObject
*module
, PyObject
* const *args
,
719 Py_ssize_t args_length
)
720 /*[clinic end generated code: output=a26c95907374ffb4 input=ded7f0ea3850c05c]*/
722 // Fast-path for the common case: gcd(int, int)
723 if (args_length
== 2 && PyLong_CheckExact(args
[0]) && PyLong_CheckExact(args
[1]))
725 return _PyLong_GCD(args
[0], args
[1]);
728 if (args_length
== 0) {
729 return PyLong_FromLong(0);
732 PyObject
*res
= PyNumber_Index(args
[0]);
736 if (args_length
== 1) {
737 Py_SETREF(res
, PyNumber_Absolute(res
));
741 PyObject
*one
= _PyLong_GetOne(); // borrowed ref
742 for (Py_ssize_t i
= 1; i
< args_length
; i
++) {
743 PyObject
*x
= _PyNumber_Index(args
[i
]);
749 /* Fast path: just check arguments.
750 It is okay to use identity comparison here. */
754 Py_SETREF(res
, _PyLong_GCD(res
, x
));
765 long_lcm(PyObject
*a
, PyObject
*b
)
767 PyObject
*g
, *m
, *f
, *ab
;
769 if (_PyLong_IsZero((PyLongObject
*)a
) || _PyLong_IsZero((PyLongObject
*)b
)) {
770 return PyLong_FromLong(0);
772 g
= _PyLong_GCD(a
, b
);
776 f
= PyNumber_FloorDivide(a
, g
);
781 m
= PyNumber_Multiply(f
, b
);
786 ab
= PyNumber_Absolute(m
);
795 *integers as args: array
797 Least Common Multiple.
798 [clinic start generated code]*/
801 math_lcm_impl(PyObject
*module
, PyObject
* const *args
,
802 Py_ssize_t args_length
)
803 /*[clinic end generated code: output=c8a59a5c2e55c816 input=3e4f4b7cdf948a98]*/
808 if (args_length
== 0) {
809 return PyLong_FromLong(1);
811 res
= PyNumber_Index(args
[0]);
815 if (args_length
== 1) {
816 Py_SETREF(res
, PyNumber_Absolute(res
));
820 PyObject
*zero
= _PyLong_GetZero(); // borrowed ref
821 for (i
= 1; i
< args_length
; i
++) {
822 x
= PyNumber_Index(args
[i
]);
828 /* Fast path: just check arguments.
829 It is okay to use identity comparison here. */
833 Py_SETREF(res
, long_lcm(res
, x
));
843 /* Call is_error when errno != 0, and where x is the result libm
844 * returned. is_error will usually set up an exception and return
845 * true (1), but may return false (0) without setting up an exception.
848 is_error(double x
, int raise_edom
)
850 int result
= 1; /* presumption of guilt */
851 assert(errno
); /* non-zero errno is a precondition for calling */
854 PyErr_SetString(PyExc_ValueError
, "math domain error");
858 else if (errno
== ERANGE
) {
859 /* ANSI C generally requires libm functions to set ERANGE
860 * on overflow, but also generally *allows* them to set
861 * ERANGE on underflow too. There's no consistency about
862 * the latter across platforms.
863 * Alas, C99 never requires that errno be set.
864 * Here we suppress the underflow errors (libm functions
865 * should return a zero on underflow, and +- HUGE_VAL on
866 * overflow, so testing the result for zero suffices to
867 * distinguish the cases).
869 * On some platforms (Ubuntu/ia64) it seems that errno can be
870 * set to ERANGE for subnormal results that do *not* underflow
871 * to zero. So to be safe, we'll ignore ERANGE whenever the
872 * function result is less than 1.5 in absolute value.
874 * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
875 * are correctly detected, since the function may underflow
876 * toward -1.0 rather than 0.0.
881 PyErr_SetString(PyExc_OverflowError
,
885 /* Unexpected math error */
886 PyErr_SetFromErrno(PyExc_ValueError
);
891 math_1 is used to wrap a libm function f that takes a double
892 argument and returns a double.
894 The error reporting follows these rules, which are designed to do
895 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
898 - a NaN result from non-NaN inputs causes ValueError to be raised
899 - an infinite result from finite inputs causes OverflowError to be
900 raised if can_overflow is 1, or raises ValueError if can_overflow
902 - if the result is finite and errno == EDOM then ValueError is
904 - if the result is finite and nonzero and errno == ERANGE then
905 OverflowError is raised
907 The last rule is used to catch overflow on platforms which follow
908 C89 but for which HUGE_VAL is not an infinity.
910 For the majority of one-argument functions these rules are enough
911 to ensure that Python's functions behave as specified in 'Annex F'
912 of the C99 standard, with the 'invalid' and 'divide-by-zero'
913 floating-point exceptions mapping to Python's ValueError and the
914 'overflow' floating-point exception mapping to OverflowError.
915 math_1 only works for functions that don't have singularities *and*
916 the possibility of overflow; fortunately, that covers everything we
917 care about right now.
921 math_1(PyObject
*arg
, double (*func
) (double), int can_overflow
,
925 x
= PyFloat_AsDouble(arg
);
926 if (x
== -1.0 && PyErr_Occurred())
930 if (isnan(r
) && !isnan(x
))
931 goto domain_err
; /* domain error */
932 if (isinf(r
) && isfinite(x
)) {
934 PyErr_SetString(PyExc_OverflowError
,
935 "math range error"); /* overflow */
937 goto domain_err
; /* singularity */
940 if (isfinite(r
) && errno
&& is_error(r
, 1))
941 /* this branch unnecessary on most platforms */
944 return PyFloat_FromDouble(r
);
948 char *buf
= PyOS_double_to_string(x
, 'r', 0, Py_DTSF_ADD_DOT_0
, NULL
);
950 PyErr_Format(PyExc_ValueError
, err_msg
, buf
);
955 PyErr_SetString(PyExc_ValueError
, "math domain error");
960 /* variant of math_1, to be used when the function being wrapped is known to
961 set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
962 errno = ERANGE for overflow). */
965 math_1a(PyObject
*arg
, double (*func
) (double), const char *err_msg
)
968 x
= PyFloat_AsDouble(arg
);
969 if (x
== -1.0 && PyErr_Occurred())
973 if (errno
&& is_error(r
, err_msg
? 0 : 1)) {
974 if (err_msg
&& errno
== EDOM
) {
975 assert(!PyErr_Occurred()); /* exception is not set by is_error() */
976 char *buf
= PyOS_double_to_string(x
, 'r', 0, Py_DTSF_ADD_DOT_0
, NULL
);
978 PyErr_Format(PyExc_ValueError
, err_msg
, buf
);
984 return PyFloat_FromDouble(r
);
988 math_2 is used to wrap a libm function f that takes two double
989 arguments and returns a double.
991 The error reporting follows these rules, which are designed to do
992 the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
995 - a NaN result from non-NaN inputs causes ValueError to be raised
996 - an infinite result from finite inputs causes OverflowError to be
998 - if the result is finite and errno == EDOM then ValueError is
1000 - if the result is finite and nonzero and errno == ERANGE then
1001 OverflowError is raised
1003 The last rule is used to catch overflow on platforms which follow
1004 C89 but for which HUGE_VAL is not an infinity.
1006 For most two-argument functions (copysign, fmod, hypot, atan2)
1007 these rules are enough to ensure that Python's functions behave as
1008 specified in 'Annex F' of the C99 standard, with the 'invalid' and
1009 'divide-by-zero' floating-point exceptions mapping to Python's
1010 ValueError and the 'overflow' floating-point exception mapping to
1015 math_2(PyObject
*const *args
, Py_ssize_t nargs
,
1016 double (*func
) (double, double), const char *funcname
)
1019 if (!_PyArg_CheckPositional(funcname
, nargs
, 2, 2))
1021 x
= PyFloat_AsDouble(args
[0]);
1022 if (x
== -1.0 && PyErr_Occurred()) {
1025 y
= PyFloat_AsDouble(args
[1]);
1026 if (y
== -1.0 && PyErr_Occurred()) {
1032 if (!isnan(x
) && !isnan(y
))
1037 else if (isinf(r
)) {
1038 if (isfinite(x
) && isfinite(y
))
1043 if (errno
&& is_error(r
, 1))
1046 return PyFloat_FromDouble(r
);
1049 #define FUNC1(funcname, func, can_overflow, docstring) \
1050 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1051 return math_1(args, func, can_overflow, NULL); \
1053 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1055 #define FUNC1D(funcname, func, can_overflow, docstring, err_msg) \
1056 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1057 return math_1(args, func, can_overflow, err_msg); \
1059 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1061 #define FUNC1A(funcname, func, docstring) \
1062 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1063 return math_1a(args, func, NULL); \
1065 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1067 #define FUNC1AD(funcname, func, docstring, err_msg) \
1068 static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1069 return math_1a(args, func, err_msg); \
1071 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1073 #define FUNC2(funcname, func, docstring) \
1074 static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1075 return math_2(args, nargs, func, #funcname); \
1077 PyDoc_STRVAR(math_##funcname##_doc, docstring);
1079 FUNC1D(acos
, acos
, 0,
1080 "acos($module, x, /)\n--\n\n"
1081 "Return the arc cosine (measured in radians) of x.\n\n"
1082 "The result is between 0 and pi.",
1083 "expected a number in range from -1 up to 1, got %s")
1084 FUNC1D(acosh
, acosh
, 0,
1085 "acosh($module, x, /)\n--\n\n"
1086 "Return the inverse hyperbolic cosine of x.",
1087 "expected argument value not less than 1, got %s")
1088 FUNC1D(asin
, asin
, 0,
1089 "asin($module, x, /)\n--\n\n"
1090 "Return the arc sine (measured in radians) of x.\n\n"
1091 "The result is between -pi/2 and pi/2.",
1092 "expected a number in range from -1 up to 1, got %s")
1093 FUNC1(asinh
, asinh
, 0,
1094 "asinh($module, x, /)\n--\n\n"
1095 "Return the inverse hyperbolic sine of x.")
1096 FUNC1(atan
, atan
, 0,
1097 "atan($module, x, /)\n--\n\n"
1098 "Return the arc tangent (measured in radians) of x.\n\n"
1099 "The result is between -pi/2 and pi/2.")
1101 "atan2($module, y, x, /)\n--\n\n"
1102 "Return the arc tangent (measured in radians) of y/x.\n\n"
1103 "Unlike atan(y/x), the signs of both x and y are considered.")
1104 FUNC1D(atanh
, atanh
, 0,
1105 "atanh($module, x, /)\n--\n\n"
1106 "Return the inverse hyperbolic tangent of x.",
1107 "expected a number between -1 and 1, got %s")
1108 FUNC1(cbrt
, cbrt
, 0,
1109 "cbrt($module, x, /)\n--\n\n"
1110 "Return the cube root of x.")
1118 Return the ceiling of x as an Integral.
1120 This is the smallest integer >= x.
1121 [clinic start generated code]*/
1124 math_ceil(PyObject
*module
, PyObject
*number
)
1125 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1129 if (PyFloat_CheckExact(number
)) {
1130 x
= PyFloat_AS_DOUBLE(number
);
1133 PyObject
*result
= _PyObject_MaybeCallSpecialNoArgs(number
, &_Py_ID(__ceil__
));
1134 if (result
!= NULL
) {
1137 else if (PyErr_Occurred()) {
1140 x
= PyFloat_AsDouble(number
);
1141 if (x
== -1.0 && PyErr_Occurred()) {
1145 return PyLong_FromDouble(ceil(x
));
1148 FUNC2(copysign
, copysign
,
1149 "copysign($module, x, y, /)\n--\n\n"
1150 "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1151 "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1154 "cos($module, x, /)\n--\n\n"
1155 "Return the cosine of x (measured in radians).",
1156 "expected a finite input, got %s")
1157 FUNC1(cosh
, cosh
, 1,
1158 "cosh($module, x, /)\n--\n\n"
1159 "Return the hyperbolic cosine of x.")
1161 "erf($module, x, /)\n--\n\n"
1162 "Error function at x.")
1164 "erfc($module, x, /)\n--\n\n"
1165 "Complementary error function at x.")
1167 "exp($module, x, /)\n--\n\n"
1168 "Return e raised to the power of x.")
1169 FUNC1(exp2
, exp2
, 1,
1170 "exp2($module, x, /)\n--\n\n"
1171 "Return 2 raised to the power of x.")
1172 FUNC1(expm1
, expm1
, 1,
1173 "expm1($module, x, /)\n--\n\n"
1174 "Return exp(x)-1.\n\n"
1175 "This function avoids the loss of precision involved in the direct "
1176 "evaluation of exp(x)-1 for small x.")
1177 FUNC1(fabs
, fabs
, 0,
1178 "fabs($module, x, /)\n--\n\n"
1179 "Return the absolute value of the float x.")
1187 Return the floor of x as an Integral.
1189 This is the largest integer <= x.
1190 [clinic start generated code]*/
1193 math_floor(PyObject
*module
, PyObject
*number
)
1194 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1198 if (PyFloat_CheckExact(number
)) {
1199 x
= PyFloat_AS_DOUBLE(number
);
1202 PyObject
*result
= _PyObject_MaybeCallSpecialNoArgs(number
, &_Py_ID(__floor__
));
1203 if (result
!= NULL
) {
1206 else if (PyErr_Occurred()) {
1209 x
= PyFloat_AsDouble(number
);
1210 if (x
== -1.0 && PyErr_Occurred()) {
1214 return PyLong_FromDouble(floor(x
));
1224 Return the larger of two floating-point arguments.
1225 [clinic start generated code]*/
1228 math_fmax_impl(PyObject
*module
, double x
, double y
)
1229 /*[clinic end generated code: output=00692358d312fee2 input=021596c027336ffe]*/
1241 Return the smaller of two floating-point arguments.
1242 [clinic start generated code]*/
1245 math_fmin_impl(PyObject
*module
, double x
, double y
)
1246 /*[clinic end generated code: output=3d5b7826bd292dd9 input=d12e64ccc33f878a]*/
1251 FUNC1AD(gamma
, m_tgamma
,
1252 "gamma($module, x, /)\n--\n\n"
1253 "Gamma function at x.",
1254 "expected a noninteger or positive integer, got %s")
1255 FUNC1AD(lgamma
, m_lgamma
,
1256 "lgamma($module, x, /)\n--\n\n"
1257 "Natural logarithm of absolute value of Gamma function at x.",
1258 "expected a noninteger or positive integer, got %s")
1259 FUNC1D(log1p
, m_log1p
, 0,
1260 "log1p($module, x, /)\n--\n\n"
1261 "Return the natural logarithm of 1+x (base e).\n\n"
1262 "The result is computed in a way which is accurate for x near zero.",
1263 "expected argument value > -1, got %s")
1264 FUNC2(remainder
, m_remainder
,
1265 "remainder($module, x, y, /)\n--\n\n"
1266 "Difference between x and the closest integer multiple of y.\n\n"
1267 "Return x - n*y where n*y is the closest integer multiple of y.\n"
1268 "In the case where x is exactly halfway between two multiples of\n"
1269 "y, the nearest even value of n is used. The result is always exact.")
1277 Return True if the sign of x is negative and False otherwise.
1278 [clinic start generated code]*/
1281 math_signbit_impl(PyObject
*module
, double x
)
1282 /*[clinic end generated code: output=20c5f20156a9b871 input=3d3493fbcb5bdb3e]*/
1284 return PyBool_FromLong(signbit(x
));
1288 "sin($module, x, /)\n--\n\n"
1289 "Return the sine of x (measured in radians).",
1290 "expected a finite input, got %s")
1291 FUNC1(sinh
, sinh
, 1,
1292 "sinh($module, x, /)\n--\n\n"
1293 "Return the hyperbolic sine of x.")
1294 FUNC1D(sqrt
, sqrt
, 0,
1295 "sqrt($module, x, /)\n--\n\n"
1296 "Return the square root of x.",
1297 "expected a nonnegative input, got %s")
1299 "tan($module, x, /)\n--\n\n"
1300 "Return the tangent of x (measured in radians).",
1301 "expected a finite input, got %s")
1302 FUNC1(tanh
, tanh
, 0,
1303 "tanh($module, x, /)\n--\n\n"
1304 "Return the hyperbolic tangent of x.")
1306 /* Precision summation function as msum() by Raymond Hettinger in
1307 <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
1308 enhanced with the exact partials sum and roundoff from Mark
1309 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1310 See those links for more details, proofs and other references.
1312 Note 1: IEEE 754 floating-point semantics with a rounding mode of
1313 roundTiesToEven are assumed.
1315 Note 2: No provision is made for intermediate overflow handling;
1316 therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1317 fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1318 overflow of the first partial sum.
1320 Note 3: The algorithm has two potential sources of fragility. First, C
1321 permits arithmetic operations on `double`s to be performed in an
1322 intermediate format whose range and precision may be greater than those of
1323 `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1324 example on machines using the now largely historical x87 FPUs. In this case,
1325 `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1326 `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1327 should be safe from this source of errors. Second, an aggressively
1328 optimizing compiler can re-associate operations so that (for example) the
1329 statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1330 re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1331 re-association would be in violation of the C standard, and should not occur
1332 except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1333 -fassociative-math). Such optimizations should be avoided for this module.
1335 Note 4: The signature of math.fsum() differs from builtins.sum()
1336 because the start argument doesn't make sense in the context of
1337 accurate summation. Since the partials table is collapsed before
1338 returning a result, sum(seq2, start=sum(seq1)) may not equal the
1339 accurate result returned by sum(itertools.chain(seq1, seq2)).
1342 #define NUM_PARTIALS 32 /* initial partials array size, on stack */
1344 /* Extend the partials array p[] by doubling its size. */
1345 static int /* non-zero on error */
1346 _fsum_realloc(double **p_ptr
, Py_ssize_t n
,
1347 double *ps
, Py_ssize_t
*m_ptr
)
1350 Py_ssize_t m
= *m_ptr
;
1352 m
+= m
; /* double */
1353 if (n
< m
&& (size_t)m
< ((size_t)PY_SSIZE_T_MAX
/ sizeof(double))) {
1356 v
= PyMem_Malloc(sizeof(double) * m
);
1358 memcpy(v
, ps
, sizeof(double) * n
);
1361 v
= PyMem_Realloc(p
, sizeof(double) * m
);
1363 if (v
== NULL
) { /* size overflow or no memory */
1364 PyErr_SetString(PyExc_MemoryError
, "math.fsum partials");
1367 *p_ptr
= (double*) v
;
1372 /* Full precision summation of a sequence of floats.
1375 partials = [] # sorted, non-overlapping partial sums
1388 return sum_exact(partials)
1390 Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo
1391 are exactly equal to x+y. The inner loop applies hi/lo summation to each
1392 partial so that the list of partial sums remains exact.
1394 Sum_exact() adds the partial sums exactly and correctly rounds the final
1395 result (using the round-half-to-even rule). The items in partials remain
1396 non-zero, non-special, non-overlapping and strictly increasing in
1397 magnitude, but possibly not all having the same sign.
1399 Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1408 Return an accurate floating-point sum of values in the iterable seq.
1410 Assumes IEEE-754 floating-point arithmetic.
1411 [clinic start generated code]*/
1414 math_fsum(PyObject
*module
, PyObject
*seq
)
1415 /*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
1417 PyObject
*item
, *iter
, *sum
= NULL
;
1418 Py_ssize_t i
, j
, n
= 0, m
= NUM_PARTIALS
;
1419 double x
, y
, t
, ps
[NUM_PARTIALS
], *p
= ps
;
1420 double xsave
, special_sum
= 0.0, inf_sum
= 0.0;
1421 double hi
, yr
, lo
= 0.0;
1423 iter
= PyObject_GetIter(seq
);
1427 for(;;) { /* for x in iterable */
1428 assert(0 <= n
&& n
<= m
);
1429 assert((m
== NUM_PARTIALS
&& p
== ps
) ||
1430 (m
> NUM_PARTIALS
&& p
!= NULL
));
1432 item
= PyIter_Next(iter
);
1434 if (PyErr_Occurred())
1438 ASSIGN_DOUBLE(x
, item
, error_with_item
);
1442 for (i
= j
= 0; j
< n
; j
++) { /* for y in partials */
1444 if (fabs(x
) < fabs(y
)) {
1445 t
= x
; x
= y
; y
= t
;
1455 n
= i
; /* ps[i:] = [x] */
1457 if (! isfinite(x
)) {
1458 /* a nonfinite x could arise either as
1459 a result of intermediate overflow, or
1460 as a result of a nan or inf in the
1462 if (isfinite(xsave
)) {
1463 PyErr_SetString(PyExc_OverflowError
,
1464 "intermediate overflow in fsum");
1469 special_sum
+= xsave
;
1470 /* reset partials */
1473 else if (n
>= m
&& _fsum_realloc(&p
, n
, ps
, &m
))
1480 if (special_sum
!= 0.0) {
1482 PyErr_SetString(PyExc_ValueError
,
1483 "-inf + inf in fsum");
1485 sum
= PyFloat_FromDouble(special_sum
);
1492 /* sum_exact(ps, hi) from the top, stop when the sum becomes
1497 assert(fabs(y
) < fabs(x
));
1504 /* Make half-even rounding work across multiple partials.
1505 Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1506 digit to two instead of down to zero (the 1e-16 makes the 1
1507 slightly closer to two). With a potential 1 ULP rounding
1508 error fixed-up, math.fsum() can guarantee commutativity. */
1509 if (n
> 0 && ((lo
< 0.0 && p
[n
-1] < 0.0) ||
1510 (lo
> 0.0 && p
[n
-1] > 0.0))) {
1518 sum
= PyFloat_FromDouble(hi
);
1534 static unsigned long
1535 count_set_bits(unsigned long n
)
1537 unsigned long count
= 0;
1540 n
&= n
- 1; /* clear least significant bit */
1545 /* Integer square root
1547 Given a nonnegative integer `n`, we want to compute the largest integer
1548 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1551 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1552 a positive integer `n`, the algorithm produces at each iteration an integer
1553 approximation `a` to the square root of `n >> s` for some even integer `s`,
1554 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1555 zero and we have an approximation to the square root of `n` itself.
1557 At every step, the approximation `a` is strictly within 1.0 of the true square
1560 (a - 1)**2 < (n >> s) < (a + 1)**2
1562 After the final iteration, a check-and-correct step is needed to determine
1563 whether `a` or `a - 1` gives the desired integer square root of `n`.
1565 The algorithm is remarkable in its simplicity. There's no need for a
1566 per-iteration check-and-correct step, and termination is straightforward: the
1567 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1568 for `n > 1`). The only tricky part of the correctness proof is in establishing
1569 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1570 iteration to the next. A sketch of the proof of this is given below.
1572 In addition to the proof sketch, a formal, computer-verified proof
1573 of correctness (using Lean) of an equivalent recursive algorithm can be found
1576 https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1579 Here's Python code equivalent to the C implementation below:
1583 Return the integer part of the square root of the input.
1585 n = operator.index(n)
1588 raise ValueError("isqrt() argument must be nonnegative")
1592 c = (n.bit_length() - 1) // 2
1595 for s in reversed(range(c.bit_length())):
1596 # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1599 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1601 return a - (a*a > n)
1604 Sketch of proof of correctness
1605 ------------------------------
1607 The delicate part of the correctness proof is showing that the loop invariant
1608 is preserved from one iteration to the next. That is, just before the line
1610 a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1612 is executed in the above code, we know that
1614 (1) (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1616 (since `e` is always the value of `d` from the previous iteration). We must
1617 prove that after that line is executed, we have
1619 (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1621 To facilitate the proof, we make some changes of notation. Write `m` for
1622 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1624 b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1628 (2) b = (a << d - e - 1) + (m >> d - e + 1) // a
1630 Then we can rewrite (1) as:
1632 (3) (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1634 and we must show that (b - 1)**2 < m < (b + 1)**2.
1636 From this point on, we switch to mathematical notation, so `/` means exact
1637 division rather than integer division and `^` is used for exponentiation. We
1638 use the `√` symbol for the exact square root. In (3), we can remove the
1639 implicit floor operation to give:
1641 (4) (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1643 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1645 (5) 0 <= | 2^(d-e)a - √m | < 2^(d-e)
1647 Squaring and dividing through by `2^(d-e+1) a` gives
1649 (6) 0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1651 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1652 right-hand side of (6) with `1`, and now replacing the central
1653 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1655 (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1657 Or equivalently, from (2):
1661 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1664 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1665 a` that was used to get line (7) above. From the definition of `c`, we have
1666 `4^c <= n`, which implies
1670 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1671 that `2d - 2e - 1 <= d` and hence that
1673 (9) 4^(2d - 2e - 1) <= m
1675 Dividing both sides by `4^(d - e)` gives
1677 (10) 4^(d - e - 1) <= m / 4^(d - e)
1679 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1681 (11) 4^(d - e - 1) < (a + 1)^2
1683 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1684 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1685 completes the proof sketch.
1690 The _approximate_isqrt_tab table provides approximate square roots for
1691 16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1693 a = _approximate_isqrt_tab[(n >> 8) - 64]
1695 is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1697 The table was computed in Python using the expression:
1699 [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1702 static const uint8_t _approximate_isqrt_tab
[192] = {
1703 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1704 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1705 151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1706 160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1707 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1708 179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1709 187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1710 195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1711 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1712 210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1713 217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1714 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1715 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1716 238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1717 244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1718 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1721 /* Approximate square root of a large 64-bit integer.
1723 Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1724 satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1726 static inline uint32_t
1727 _approximate_isqrt(uint64_t n
)
1729 uint32_t u
= _approximate_isqrt_tab
[(n
>> 56) - 64];
1730 u
= (u
<< 7) + (uint32_t)(n
>> 41) / u
;
1731 return (u
<< 15) + (uint32_t)((n
>> 17) / u
);
1740 Return the integer part of the square root of the input.
1741 [clinic start generated code]*/
1744 math_isqrt(PyObject
*module
, PyObject
*n
)
1745 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1747 int a_too_large
, c_bit_length
;
1751 PyObject
*a
= NULL
, *b
;
1753 n
= _PyNumber_Index(n
);
1758 if (_PyLong_IsNegative((PyLongObject
*)n
)) {
1761 "isqrt() argument must be nonnegative");
1764 if (_PyLong_IsZero((PyLongObject
*)n
)) {
1766 return PyLong_FromLong(0);
1769 /* c = (n.bit_length() - 1) // 2 */
1770 c
= _PyLong_NumBits(n
);
1772 assert(!PyErr_Occurred());
1775 /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1776 fast, almost branch-free algorithm. */
1778 int shift
= 31 - (int)c
;
1779 m
= (uint64_t)PyLong_AsUnsignedLongLong(n
);
1781 if (m
== (uint64_t)(-1) && PyErr_Occurred()) {
1784 u
= _approximate_isqrt(m
<< 2*shift
) >> shift
;
1785 u
-= (uint64_t)u
* u
> m
;
1786 return PyLong_FromUnsignedLong(u
);
1789 /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1790 arithmetic, then switch to using Python long integers. */
1792 /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1794 while ((c
>> c_bit_length
) > 0) {
1798 /* Initialise d and a. */
1799 d
= c
>> (c_bit_length
- 5);
1800 b
= _PyLong_Rshift(n
, 2*c
- 62);
1804 m
= (uint64_t)PyLong_AsUnsignedLongLong(b
);
1806 if (m
== (uint64_t)(-1) && PyErr_Occurred()) {
1809 u
= _approximate_isqrt(m
) >> (31U - d
);
1810 a
= PyLong_FromUnsignedLong(u
);
1815 for (int s
= c_bit_length
- 6; s
>= 0; --s
) {
1821 /* q = (n >> 2*c - e - d + 1) // a */
1822 q
= _PyLong_Rshift(n
, 2*c
- d
- e
+ 1);
1826 Py_SETREF(q
, PyNumber_FloorDivide(q
, a
));
1831 /* a = (a << d - 1 - e) + q */
1832 Py_SETREF(a
, _PyLong_Lshift(a
, d
- 1 - e
));
1837 Py_SETREF(a
, PyNumber_Add(a
, q
));
1844 /* The correct result is either a or a - 1. Figure out which, and
1845 decrement a if necessary. */
1847 /* a_too_large = n < a * a */
1848 b
= PyNumber_Multiply(a
, a
);
1852 a_too_large
= PyObject_RichCompareBool(n
, b
, Py_LT
);
1854 if (a_too_large
== -1) {
1859 Py_SETREF(a
, PyNumber_Subtract(a
, _PyLong_GetOne()));
1870 /* Divide-and-conquer factorial algorithm
1872 * Based on the formula and pseudo-code provided at:
1873 * http://www.luschny.de/math/factorial/binarysplitfact.html
1875 * Faster algorithms exist, but they're more complicated and depend on
1876 * a fast prime factorization algorithm.
1878 * Notes on the algorithm
1879 * ----------------------
1881 * factorial(n) is written in the form 2**k * m, with m odd. k and m are
1882 * computed separately, and then combined using a left shift.
1884 * The function factorial_odd_part computes the odd part m (i.e., the greatest
1885 * odd divisor) of factorial(n), using the formula:
1887 * factorial_odd_part(n) =
1889 * product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1891 * Example: factorial_odd_part(20) =
1896 * (1 * 3 * 5 * 7 * 9) *
1897 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1899 * Here i goes from large to small: the first term corresponds to i=4 (any
1900 * larger i gives an empty product), and the last term corresponds to i=0.
1901 * Each term can be computed from the last by multiplying by the extra odd
1902 * numbers required: e.g., to get from the penultimate term to the last one,
1903 * we multiply by (11 * 13 * 15 * 17 * 19).
1905 * To see a hint of why this formula works, here are the same numbers as above
1906 * but with the even parts (i.e., the appropriate powers of 2) included. For
1907 * each subterm in the product for i, we multiply that subterm by 2**i:
1914 * (2 * 6 * 10 * 14 * 18) *
1915 * (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1917 * The factorial_partial_product function computes the product of all odd j in
1918 * range(start, stop) for given start and stop. It's used to compute the
1919 * partial products like (11 * 13 * 15 * 17 * 19) in the example above. It
1920 * operates recursively, repeatedly splitting the range into two roughly equal
1921 * pieces until the subranges are small enough to be computed using only C
1922 * integer arithmetic.
1924 * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1925 * the factorial) is computed independently in the main math_factorial
1926 * function. By standard results, its value is:
1928 * two_valuation = n//2 + n//4 + n//8 + ....
1930 * It can be shown (e.g., by complete induction on n) that two_valuation is
1931 * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1932 * '1'-bits in the binary expansion of n.
1935 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1936 * divide and conquer. Assumes start and stop are odd and stop > start.
1937 * max_bits must be >= bit_length(stop - 2). */
1940 factorial_partial_product(unsigned long start
, unsigned long stop
,
1941 unsigned long max_bits
)
1943 unsigned long midpoint
, num_operands
;
1944 PyObject
*left
= NULL
, *right
= NULL
, *result
= NULL
;
1946 /* If the return value will fit an unsigned long, then we can
1947 * multiply in a tight, fast loop where each multiply is O(1).
1948 * Compute an upper bound on the number of bits required to store
1951 * Storing some integer z requires floor(lg(z))+1 bits, which is
1952 * conveniently the value returned by bit_length(z). The
1953 * product x*y will require at most
1954 * bit_length(x) + bit_length(y) bits to store, based
1955 * on the idea that lg product = lg x + lg y.
1957 * We know that stop - 2 is the largest number to be multiplied. From
1958 * there, we have: bit_length(answer) <= num_operands *
1959 * bit_length(stop - 2)
1962 num_operands
= (stop
- start
) / 2;
1963 /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1964 * unlikely case of an overflow in num_operands * max_bits. */
1965 if (num_operands
<= 8 * SIZEOF_LONG
&&
1966 num_operands
* max_bits
<= 8 * SIZEOF_LONG
) {
1967 unsigned long j
, total
;
1968 for (total
= start
, j
= start
+ 2; j
< stop
; j
+= 2)
1970 return PyLong_FromUnsignedLong(total
);
1973 /* find midpoint of range(start, stop), rounded up to next odd number. */
1974 midpoint
= (start
+ num_operands
) | 1;
1975 left
= factorial_partial_product(start
, midpoint
,
1976 _Py_bit_length(midpoint
- 2));
1979 right
= factorial_partial_product(midpoint
, stop
, max_bits
);
1982 result
= PyNumber_Multiply(left
, right
);
1990 /* factorial_odd_part: compute the odd part of factorial(n). */
1993 factorial_odd_part(unsigned long n
)
1996 unsigned long v
, lower
, upper
;
1997 PyObject
*partial
, *tmp
, *inner
, *outer
;
1999 inner
= PyLong_FromLong(1);
2002 outer
= Py_NewRef(inner
);
2005 for (i
= _Py_bit_length(n
) - 2; i
>= 0; i
--) {
2010 /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2011 upper
= (v
+ 1) | 1;
2012 /* Here inner is the product of all odd integers j in the range (0,
2013 n/2**(i+1)]. The factorial_partial_product call below gives the
2014 product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2015 partial
= factorial_partial_product(lower
, upper
, _Py_bit_length(upper
-2));
2016 /* inner *= partial */
2017 if (partial
== NULL
)
2019 tmp
= PyNumber_Multiply(inner
, partial
);
2023 Py_SETREF(inner
, tmp
);
2024 /* Now inner is the product of all odd integers j in the range (0,
2025 n/2**i], giving the inner product in the formula above. */
2027 /* outer *= inner; */
2028 tmp
= PyNumber_Multiply(outer
, inner
);
2031 Py_SETREF(outer
, tmp
);
2043 /* Lookup table for small factorial values */
2045 static const unsigned long SmallFactorials
[] = {
2046 1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2047 362880, 3628800, 39916800, 479001600,
2048 #if SIZEOF_LONG >= 8
2049 6227020800, 87178291200, 1307674368000,
2050 20922789888000, 355687428096000, 6402373705728000,
2051 121645100408832000, 2432902008176640000
2062 [clinic start generated code]*/
2065 math_factorial(PyObject
*module
, PyObject
*arg
)
2066 /*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/
2068 long x
, two_valuation
;
2070 PyObject
*result
, *odd_part
;
2072 x
= PyLong_AsLongAndOverflow(arg
, &overflow
);
2073 if (x
== -1 && PyErr_Occurred()) {
2076 else if (overflow
== 1) {
2077 PyErr_Format(PyExc_OverflowError
,
2078 "factorial() argument should not exceed %ld",
2082 else if (overflow
== -1 || x
< 0) {
2083 PyErr_SetString(PyExc_ValueError
,
2084 "factorial() not defined for negative values");
2088 /* use lookup table if x is small */
2089 if (x
< (long)Py_ARRAY_LENGTH(SmallFactorials
))
2090 return PyLong_FromUnsignedLong(SmallFactorials
[x
]);
2092 /* else express in the form odd_part * 2**two_valuation, and compute as
2093 odd_part << two_valuation. */
2094 odd_part
= factorial_odd_part(x
);
2095 if (odd_part
== NULL
)
2097 two_valuation
= x
- count_set_bits(x
);
2098 result
= _PyLong_Lshift(odd_part
, two_valuation
);
2099 Py_DECREF(odd_part
);
2110 Truncates the Real x to the nearest Integral toward 0.
2112 Uses the __trunc__ magic method.
2113 [clinic start generated code]*/
2116 math_trunc(PyObject
*module
, PyObject
*x
)
2117 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2119 if (PyFloat_CheckExact(x
)) {
2120 return PyFloat_Type
.tp_as_number
->nb_int(x
);
2123 PyObject
*result
= _PyObject_MaybeCallSpecialNoArgs(x
, &_Py_ID(__trunc__
));
2124 if (result
!= NULL
) {
2127 else if (!PyErr_Occurred()) {
2128 PyErr_Format(PyExc_TypeError
,
2129 "type %.100s doesn't define __trunc__ method",
2130 Py_TYPE(x
)->tp_name
);
2142 Return the mantissa and exponent of x, as pair (m, e).
2144 m is a float and e is an int, such that x = m * 2.**e.
2145 If x is 0, m and e are both 0. Else 0.5 <= abs(m) < 1.0.
2146 [clinic start generated code]*/
2149 math_frexp_impl(PyObject
*module
, double x
)
2150 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2153 /* deal with special cases directly, to sidestep platform
2155 if (isnan(x
) || isinf(x
) || !x
) {
2161 return Py_BuildValue("(di)", x
, i
);
2174 This is essentially the inverse of frexp().
2175 [clinic start generated code]*/
2178 math_ldexp_impl(PyObject
*module
, double x
, PyObject
*i
)
2179 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2185 if (PyLong_Check(i
)) {
2186 /* on overflow, replace exponent with either LONG_MAX
2187 or LONG_MIN, depending on the sign. */
2188 exp
= PyLong_AsLongAndOverflow(i
, &overflow
);
2189 if (exp
== -1 && PyErr_Occurred())
2192 exp
= overflow
< 0 ? LONG_MIN
: LONG_MAX
;
2195 PyErr_SetString(PyExc_TypeError
,
2196 "Expected an int as second argument to ldexp.");
2200 if (x
== 0. || !isfinite(x
)) {
2201 /* NaNs, zeros and infinities are returned unchanged */
2204 } else if (exp
> INT_MAX
) {
2206 r
= copysign(Py_INFINITY
, x
);
2208 } else if (exp
< INT_MIN
) {
2209 /* underflow to +-0 */
2210 r
= copysign(0., x
);
2214 r
= ldexp(x
, (int)exp
);
2216 if (DBL_MIN
> r
&& r
> -DBL_MIN
) {
2217 /* Denormal (or zero) results can be incorrectly rounded here (rather,
2218 truncated). Fixed in newer versions of the C runtime, included
2221 frexp(x
, &original_exp
);
2222 if (original_exp
> DBL_MIN_EXP
) {
2223 /* Shift down to the smallest normal binade. No bits lost. */
2224 int shift
= DBL_MIN_EXP
- original_exp
;
2225 x
= ldexp(x
, shift
);
2228 /* Multiplying by 2**exp finishes the job, and the HW will round as
2229 appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted
2230 to be < 0.5ULP of smallest denorm, so should be thrown away. If
2231 exp is so very negative that ldexp underflows to 0, that's fine;
2232 no need to check in advance. */
2233 r
= x
*ldexp(1.0, (int)exp
);
2240 if (errno
&& is_error(r
, 1))
2242 return PyFloat_FromDouble(r
);
2252 Return the fractional and integer parts of x.
2254 Both results carry the sign of x and are floats.
2255 [clinic start generated code]*/
2258 math_modf_impl(PyObject
*module
, double x
)
2259 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2262 /* some platforms don't do the right thing for NaNs and
2263 infinities, so we take care of special cases directly. */
2265 return Py_BuildValue("(dd)", copysign(0., x
), x
);
2267 return Py_BuildValue("(dd)", x
, x
);
2271 return Py_BuildValue("(dd)", x
, y
);
2275 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2276 do that by itself -- loghelper can. func is log or log10, and name is
2277 "log" or "log10". Note that overflow of the result isn't possible: an int
2278 can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2279 than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2280 small enough to fit in an IEEE single. log and log10 are even smaller.
2281 However, intermediate overflow is possible for an int if the number of bits
2282 in that int is larger than PY_SSIZE_T_MAX. */
2285 loghelper(PyObject
* arg
, double (*func
)(double))
2287 /* If it is int, do it ourselves. */
2288 if (PyLong_Check(arg
)) {
2292 /* Negative or zero inputs give a ValueError. */
2293 if (!_PyLong_IsPositive((PyLongObject
*)arg
)) {
2294 /* The input can be an arbitrary large integer, so we
2295 don't include it's value in the error message. */
2296 PyErr_SetString(PyExc_ValueError
,
2297 "expected a positive input");
2301 x
= PyLong_AsDouble(arg
);
2302 if (x
== -1.0 && PyErr_Occurred()) {
2303 if (!PyErr_ExceptionMatches(PyExc_OverflowError
))
2305 /* Here the conversion to double overflowed, but it's possible
2306 to compute the log anyway. Clear the exception and continue. */
2308 x
= _PyLong_Frexp((PyLongObject
*)arg
, &e
);
2310 assert(!PyErr_Occurred());
2311 /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2312 result
= func(x
) + func(2.0) * e
;
2315 /* Successfully converted x to a double. */
2317 return PyFloat_FromDouble(result
);
2320 /* Else let libm handle it by itself. */
2321 return math_1(arg
, func
, 0, "expected a positive input, got %s");
2325 /* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2326 for support of multiple signatures */
2328 math_log(PyObject
*module
, PyObject
* const *args
, Py_ssize_t nargs
)
2330 PyObject
*num
, *den
;
2333 if (!_PyArg_CheckPositional("log", nargs
, 1, 2))
2336 num
= loghelper(args
[0], m_log
);
2337 if (num
== NULL
|| nargs
== 1)
2340 den
= loghelper(args
[1], m_log
);
2346 ans
= PyNumber_TrueDivide(num
, den
);
2352 PyDoc_STRVAR(math_log_doc
,
2353 "log(x, [base=math.e])\n\
2354 Return the logarithm of x to the given base.\n\n\
2355 If the base is not specified, returns the natural logarithm (base e) of x.");
2363 Return the base 2 logarithm of x.
2364 [clinic start generated code]*/
2367 math_log2(PyObject
*module
, PyObject
*x
)
2368 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2370 return loghelper(x
, m_log2
);
2380 Return the base 10 logarithm of x.
2381 [clinic start generated code]*/
2384 math_log10(PyObject
*module
, PyObject
*x
)
2385 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2387 return loghelper(x
, m_log10
);
2399 Fused multiply-add operation.
2401 Compute (x * y) + z with a single round.
2402 [clinic start generated code]*/
2405 math_fma_impl(PyObject
*module
, double x
, double y
, double z
)
2406 /*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2408 double r
= fma(x
, y
, z
);
2410 /* Fast path: if we got a finite result, we're done. */
2412 return PyFloat_FromDouble(r
);
2415 /* Non-finite result. Raise an exception if appropriate, else return r. */
2417 if (!isnan(x
) && !isnan(y
) && !isnan(z
)) {
2418 /* NaN result from non-NaN inputs. */
2419 PyErr_SetString(PyExc_ValueError
, "invalid operation in fma");
2423 else if (isfinite(x
) && isfinite(y
) && isfinite(z
)) {
2424 /* Infinite result from finite inputs. */
2425 PyErr_SetString(PyExc_OverflowError
, "overflow in fma");
2429 return PyFloat_FromDouble(r
);
2440 Return fmod(x, y), according to platform C.
2443 [clinic start generated code]*/
2446 math_fmod_impl(PyObject
*module
, double x
, double y
)
2447 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2450 /* fmod(x, +/-Inf) returns x for finite x. */
2451 if (isinf(y
) && isfinite(x
))
2452 return PyFloat_FromDouble(x
);
2456 /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2457 for zero result. But C99+ says: "if y is nonzero, the result
2458 has the same sign as x".
2460 if (r
== 0.0 && y
!= 0.0) {
2465 if (!isnan(x
) && !isnan(y
))
2470 if (errno
&& is_error(r
, 1))
2473 return PyFloat_FromDouble(r
);
2477 Given a *vec* of values, compute the vector norm:
2479 sqrt(sum(x ** 2 for x in vec))
2481 The *max* variable should be equal to the largest fabs(x).
2482 The *n* variable is the length of *vec*.
2483 If n==0, then *max* should be 0.0.
2484 If an infinity is present in the vec, *max* should be INF.
2485 The *found_nan* variable indicates whether some member of
2488 To avoid overflow/underflow and to achieve high accuracy giving results
2489 that are almost always correctly rounded, four techniques are used:
2491 * lossless scaling using a power-of-two scaling factor
2492 * accurate squaring using Veltkamp-Dekker splitting [1]
2493 or an equivalent with an fma() call
2494 * compensated summation using a variant of the Neumaier algorithm [2]
2495 * differential correction of the square root [3]
2497 The usual presentation of the Neumaier summation algorithm has an
2498 expensive branch depending on which operand has the larger
2499 magnitude. We avoid this cost by arranging the calculation so that
2500 fabs(csum) is always as large as fabs(x).
2502 To establish the invariant, *csum* is initialized to 1.0 which is
2503 always larger than x**2 after scaling or after division by *max*.
2504 After the loop is finished, the initial 1.0 is subtracted out for a
2505 net zero effect on the final sum. Since *csum* will be greater than
2506 1.0, the subtraction of 1.0 will not cause fractional digits to be
2507 dropped from *csum*.
2509 To get the full benefit from compensated summation, the largest
2510 addend should be in the range: 0.5 <= |x| <= 1.0. Accordingly,
2511 scaling or division by *max* should not be skipped even if not
2512 otherwise needed to prevent overflow or loss of precision.
2514 The assertion that hi*hi <= 1.0 is a bit subtle. Each vector element
2515 gets scaled to a magnitude below 1.0. The Veltkamp-Dekker splitting
2516 algorithm gives a *hi* value that is correctly rounded to half
2517 precision. When a value at or below 1.0 is correctly rounded, it
2518 never goes above 1.0. And when values at or below 1.0 are squared,
2519 they remain at or below 1.0, thus preserving the summation invariant.
2521 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2522 each scaled vector element has a magnitude less than 1.0. After the
2523 Veltkamp split, *lo* has a maximum value of 2**-27. So the maximum
2524 value of *lo* squared is 2**-54. The value of ulp(1.0)/2.0 is 2**-53.
2525 Given that csum >= 1.0, we have:
2526 lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2527 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2529 To minimize loss of information during the accumulation of fractional
2530 values, each term has a separate accumulator. This also breaks up
2531 sequential dependencies in the inner loop so the CPU can maximize
2532 floating-point throughput. [4] On an Apple M1 Max, hypot(*vec)
2533 takes only 3.33 µsec when len(vec) == 1000.
2535 The square root differential correction is needed because a
2536 correctly rounded square root of a correctly rounded sum of
2537 squares can still be off by as much as one ulp.
2539 The differential correction starts with a value *x* that is
2540 the difference between the square of *h*, the possibly inaccurately
2541 rounded square root, and the accurately computed sum of squares.
2542 The correction is the first order term of the Maclaurin series
2543 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2545 Essentially, this differential correction is equivalent to one
2546 refinement step in Newton's divide-and-average square root
2547 algorithm, effectively doubling the number of accurate bits.
2548 This technique is used in Dekker's SQRT2 algorithm and again in
2549 Borges' ALGORITHM 4 and 5.
2551 The hypot() function is faithfully rounded (less than 1 ulp error)
2552 and usually correctly rounded (within 1/2 ulp). The squaring
2553 step is exact. The Neumaier summation computes as if in doubled
2554 precision (106 bits) and has the advantage that its input squares
2555 are non-negative so that the condition number of the sum is one.
2556 The square root with a differential correction is likewise computed
2557 as if in doubled precision.
2559 For n <= 1000, prior to the final addition that rounds the overall
2560 result, the internal accuracy of "h" together with its correction of
2561 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2562 against a Decimal implementation with prec=300. After 100 million
2563 trials, no incorrectly rounded examples were found. In addition,
2564 perfect commutativity (all permutations are exactly equal) was
2565 verified for 1 billion random inputs with n=5. [7]
2569 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2570 2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2571 3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
2572 4. Data dependency graph: https://bugs.python.org/file49439/hypot.png
2573 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2574 6. Analysis of internal accuracy: https://bugs.python.org/file49484/best_frac.py
2575 7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
2579 static inline double
2580 vector_norm(Py_ssize_t n
, double *vec
, double max
, int found_nan
)
2582 double x
, h
, scale
, csum
= 1.0, frac1
= 0.0, frac2
= 0.0;
2583 DoubleLength pr
, sm
;
2593 if (max
== 0.0 || n
<= 1) {
2597 if (max_e
< -1023) {
2598 /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
2599 for (i
=0 ; i
< n
; i
++) {
2600 vec
[i
] /= DBL_MIN
; // convert subnormals to normals
2602 return DBL_MIN
* vector_norm(n
, vec
, max
/ DBL_MIN
, found_nan
);
2604 scale
= ldexp(1.0, -max_e
);
2605 assert(max
* scale
>= 0.5);
2606 assert(max
* scale
< 1.0);
2607 for (i
=0 ; i
< n
; i
++) {
2609 assert(isfinite(x
) && fabs(x
) <= max
);
2610 x
*= scale
; // lossless scaling
2611 assert(fabs(x
) < 1.0);
2612 pr
= dl_mul(x
, x
); // lossless squaring
2613 assert(pr
.hi
<= 1.0);
2614 sm
= dl_fast_sum(csum
, pr
.hi
); // lossless addition
2616 frac1
+= pr
.lo
; // lossy addition
2617 frac2
+= sm
.lo
; // lossy addition
2619 h
= sqrt(csum
- 1.0 + (frac1
+ frac2
));
2621 sm
= dl_fast_sum(csum
, pr
.hi
);
2625 x
= csum
- 1.0 + (frac1
+ frac2
);
2626 h
+= x
/ (2.0 * h
); // differential correction
2630 #define NUM_STACK_ELEMS 16
2639 Return the Euclidean distance between two points p and q.
2641 The points should be specified as sequences (or iterables) of
2642 coordinates. Both inputs must have the same dimension.
2644 Roughly equivalent to:
2645 sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2646 [clinic start generated code]*/
2649 math_dist_impl(PyObject
*module
, PyObject
*p
, PyObject
*q
)
2650 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2654 double x
, px
, qx
, result
;
2656 int found_nan
= 0, p_allocated
= 0, q_allocated
= 0;
2657 double diffs_on_stack
[NUM_STACK_ELEMS
];
2658 double *diffs
= diffs_on_stack
;
2660 if (!PyTuple_Check(p
)) {
2661 p
= PySequence_Tuple(p
);
2667 if (!PyTuple_Check(q
)) {
2668 q
= PySequence_Tuple(q
);
2678 m
= PyTuple_GET_SIZE(p
);
2679 n
= PyTuple_GET_SIZE(q
);
2681 PyErr_SetString(PyExc_ValueError
,
2682 "both points must have the same number of dimensions");
2685 if (n
> NUM_STACK_ELEMS
) {
2686 diffs
= (double *) PyMem_Malloc(n
* sizeof(double));
2687 if (diffs
== NULL
) {
2692 for (i
=0 ; i
<n
; i
++) {
2693 item
= PyTuple_GET_ITEM(p
, i
);
2694 ASSIGN_DOUBLE(px
, item
, error_exit
);
2695 item
= PyTuple_GET_ITEM(q
, i
);
2696 ASSIGN_DOUBLE(qx
, item
, error_exit
);
2699 found_nan
|= isnan(x
);
2704 result
= vector_norm(n
, diffs
, max
, found_nan
);
2705 if (diffs
!= diffs_on_stack
) {
2714 return PyFloat_FromDouble(result
);
2717 if (diffs
!= diffs_on_stack
) {
2732 *coordinates as args: array
2734 Multidimensional Euclidean distance from the origin to a point.
2736 Roughly equivalent to:
2737 sqrt(sum(x**2 for x in coordinates))
2739 For a two dimensional point (x, y), gives the hypotenuse
2740 using the Pythagorean theorem: sqrt(x*x + y*y).
2742 For example, the hypotenuse of a 3/4/5 right triangle is:
2746 [clinic start generated code]*/
2749 math_hypot_impl(PyObject
*module
, PyObject
* const *args
,
2750 Py_ssize_t args_length
)
2751 /*[clinic end generated code: output=c9de404e24370068 input=1bceaf7d4fdcd9c2]*/
2758 double coord_on_stack
[NUM_STACK_ELEMS
];
2759 double *coordinates
= coord_on_stack
;
2761 if (args_length
> NUM_STACK_ELEMS
) {
2762 coordinates
= (double *) PyMem_Malloc(args_length
* sizeof(double));
2763 if (coordinates
== NULL
) {
2764 return PyErr_NoMemory();
2767 for (i
= 0; i
< args_length
; i
++) {
2769 ASSIGN_DOUBLE(x
, item
, error_exit
);
2772 found_nan
|= isnan(x
);
2777 result
= vector_norm(args_length
, coordinates
, max
, found_nan
);
2778 if (coordinates
!= coord_on_stack
) {
2779 PyMem_Free(coordinates
);
2781 return PyFloat_FromDouble(result
);
2784 if (coordinates
!= coord_on_stack
) {
2785 PyMem_Free(coordinates
);
2790 #undef NUM_STACK_ELEMS
2792 /** sumprod() ***************************************************************/
2794 /* Forward declaration */
2795 static inline int _check_long_mult_overflow(long a
, long b
);
2798 long_add_would_overflow(long a
, long b
)
2800 return (a
> 0) ? (b
> LONG_MAX
- a
) : (b
< LONG_MIN
- a
);
2810 Return the sum of products of values from two iterables p and q.
2812 Roughly equivalent to:
2814 sum(map(operator.mul, p, q, strict=True))
2816 For float and mixed int/float inputs, the intermediate products
2817 and sums are computed with extended precision.
2818 [clinic start generated code]*/
2821 math_sumprod_impl(PyObject
*module
, PyObject
*p
, PyObject
*q
)
2822 /*[clinic end generated code: output=6722dbfe60664554 input=a2880317828c61d2]*/
2824 PyObject
*p_i
= NULL
, *q_i
= NULL
, *term_i
= NULL
, *new_total
= NULL
;
2825 PyObject
*p_it
, *q_it
, *total
;
2826 iternextfunc p_next
, q_next
;
2827 bool p_stopped
= false, q_stopped
= false;
2828 bool int_path_enabled
= true, int_total_in_use
= false;
2829 bool flt_path_enabled
= true, flt_total_in_use
= false;
2831 TripleLength flt_total
= tl_zero
;
2833 p_it
= PyObject_GetIter(p
);
2837 q_it
= PyObject_GetIter(q
);
2842 total
= PyLong_FromLong(0);
2843 if (total
== NULL
) {
2848 p_next
= *Py_TYPE(p_it
)->tp_iternext
;
2849 q_next
= *Py_TYPE(q_it
)->tp_iternext
;
2853 assert (p_i
== NULL
);
2854 assert (q_i
== NULL
);
2855 assert (term_i
== NULL
);
2856 assert (new_total
== NULL
);
2858 assert (p_it
!= NULL
);
2859 assert (q_it
!= NULL
);
2860 assert (total
!= NULL
);
2864 if (PyErr_Occurred()) {
2865 if (!PyErr_ExceptionMatches(PyExc_StopIteration
)) {
2874 if (PyErr_Occurred()) {
2875 if (!PyErr_ExceptionMatches(PyExc_StopIteration
)) {
2882 if (p_stopped
!= q_stopped
) {
2883 PyErr_Format(PyExc_ValueError
, "Inputs are not the same length");
2886 finished
= p_stopped
& q_stopped
;
2888 if (int_path_enabled
) {
2890 if (!finished
&& PyLong_CheckExact(p_i
) & PyLong_CheckExact(q_i
)) {
2892 long int_p
, int_q
, int_prod
;
2894 int_p
= PyLong_AsLongAndOverflow(p_i
, &overflow
);
2896 goto finalize_int_path
;
2898 int_q
= PyLong_AsLongAndOverflow(q_i
, &overflow
);
2900 goto finalize_int_path
;
2902 if (_check_long_mult_overflow(int_p
, int_q
)) {
2903 goto finalize_int_path
;
2905 int_prod
= int_p
* int_q
;
2906 if (long_add_would_overflow(int_total
, int_prod
)) {
2907 goto finalize_int_path
;
2909 int_total
+= int_prod
;
2910 int_total_in_use
= true;
2917 // We're finished, overflowed, or have a non-int
2918 int_path_enabled
= false;
2919 if (int_total_in_use
) {
2920 term_i
= PyLong_FromLong(int_total
);
2921 if (term_i
== NULL
) {
2924 new_total
= PyNumber_Add(total
, term_i
);
2925 if (new_total
== NULL
) {
2928 Py_SETREF(total
, new_total
);
2931 int_total
= 0; // An ounce of prevention, ...
2932 int_total_in_use
= false;
2936 if (flt_path_enabled
) {
2939 double flt_p
, flt_q
;
2940 bool p_type_float
= PyFloat_CheckExact(p_i
);
2941 bool q_type_float
= PyFloat_CheckExact(q_i
);
2942 if (p_type_float
&& q_type_float
) {
2943 flt_p
= PyFloat_AS_DOUBLE(p_i
);
2944 flt_q
= PyFloat_AS_DOUBLE(q_i
);
2945 } else if (p_type_float
&& (PyLong_CheckExact(q_i
) || PyBool_Check(q_i
))) {
2946 /* We care about float/int pairs and int/float pairs because
2947 they arise naturally in several use cases such as price
2948 times quantity, measurements with integer weights, or
2949 data selected by a vector of bools. */
2950 flt_p
= PyFloat_AS_DOUBLE(p_i
);
2951 flt_q
= PyLong_AsDouble(q_i
);
2952 if (flt_q
== -1.0 && PyErr_Occurred()) {
2954 goto finalize_flt_path
;
2956 } else if (q_type_float
&& (PyLong_CheckExact(p_i
) || PyBool_Check(p_i
))) {
2957 flt_q
= PyFloat_AS_DOUBLE(q_i
);
2958 flt_p
= PyLong_AsDouble(p_i
);
2959 if (flt_p
== -1.0 && PyErr_Occurred()) {
2961 goto finalize_flt_path
;
2964 goto finalize_flt_path
;
2966 TripleLength new_flt_total
= tl_fma(flt_p
, flt_q
, flt_total
);
2967 if (isfinite(new_flt_total
.hi
)) {
2968 flt_total
= new_flt_total
;
2969 flt_total_in_use
= true;
2977 // We're finished, overflowed, have a non-float, or got a non-finite value
2978 flt_path_enabled
= false;
2979 if (flt_total_in_use
) {
2980 term_i
= PyFloat_FromDouble(tl_to_d(flt_total
));
2981 if (term_i
== NULL
) {
2984 new_total
= PyNumber_Add(total
, term_i
);
2985 if (new_total
== NULL
) {
2988 Py_SETREF(total
, new_total
);
2991 flt_total
= tl_zero
;
2992 flt_total_in_use
= false;
2996 assert(!int_total_in_use
);
2997 assert(!flt_total_in_use
);
3001 term_i
= PyNumber_Multiply(p_i
, q_i
);
3002 if (term_i
== NULL
) {
3005 new_total
= PyNumber_Add(total
, term_i
);
3006 if (new_total
== NULL
) {
3009 Py_SETREF(total
, new_total
);
3028 Py_XDECREF(new_total
);
3033 /* pow can't use math_2, but needs its own wrapper: the problem is
3034 that an infinite result can arise either as a result of overflow
3035 (in which case OverflowError should be raised) or as a result of
3036 e.g. 0.**-5. (for which ValueError needs to be raised.)
3046 Return x**y (x to the power of y).
3047 [clinic start generated code]*/
3050 math_pow_impl(PyObject
*module
, double x
, double y
)
3051 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
3056 /* deal directly with IEEE specials, to cope with problems on various
3057 platforms whose semantics don't exactly match C99 */
3058 r
= 0.; /* silence compiler warning */
3059 if (!isfinite(x
) || !isfinite(y
)) {
3062 r
= y
== 0. ? 1. : x
; /* NaN**0 = 1 */
3064 r
= x
== 1. ? 1. : y
; /* 1**NaN = 1 */
3065 else if (isinf(x
)) {
3066 odd_y
= isfinite(y
) && fmod(fabs(y
), 2.0) == 1.0;
3068 r
= odd_y
? x
: fabs(x
);
3072 r
= odd_y
? copysign(0., x
) : 0.;
3078 else if (y
> 0. && fabs(x
) > 1.0)
3080 else if (y
< 0. && fabs(x
) < 1.0) {
3081 r
= -y
; /* result is +inf */
3088 /* let libm handle finite**finite */
3091 /* a NaN result should arise only from (-ve)**(finite
3092 non-integer); in this case we want to raise ValueError. */
3098 an infinite result here arises either from:
3099 (A) (+/-0.)**negative (-> divide-by-zero)
3100 (B) overflow of x**y with x and y finite
3102 else if (isinf(r
)) {
3111 if (errno
&& is_error(r
, 1))
3114 return PyFloat_FromDouble(r
);
3118 static const double degToRad
= Py_MATH_PI
/ 180.0;
3119 static const double radToDeg
= 180.0 / Py_MATH_PI
;
3127 Convert angle x from radians to degrees.
3128 [clinic start generated code]*/
3131 math_degrees_impl(PyObject
*module
, double x
)
3132 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3134 return PyFloat_FromDouble(x
* radToDeg
);
3144 Convert angle x from degrees to radians.
3145 [clinic start generated code]*/
3148 math_radians_impl(PyObject
*module
, double x
)
3149 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3151 return PyFloat_FromDouble(x
* degToRad
);
3161 Return True if x is neither an infinity nor a NaN, and False otherwise.
3162 [clinic start generated code]*/
3165 math_isfinite_impl(PyObject
*module
, double x
)
3166 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3168 return PyBool_FromLong((long)isfinite(x
));
3178 Return True if x is normal, and False otherwise.
3179 [clinic start generated code]*/
3182 math_isnormal_impl(PyObject
*module
, double x
)
3183 /*[clinic end generated code: output=c7b302b5b89c3541 input=fdaa00c58aa7bc17]*/
3185 return PyBool_FromLong(isnormal(x
));
3195 Return True if x is subnormal, and False otherwise.
3196 [clinic start generated code]*/
3199 math_issubnormal_impl(PyObject
*module
, double x
)
3200 /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/
3202 #if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
3203 return PyBool_FromLong(issubnormal(x
));
3205 return PyBool_FromLong(isfinite(x
) && x
&& !isnormal(x
));
3216 Return True if x is a NaN (not a number), and False otherwise.
3217 [clinic start generated code]*/
3220 math_isnan_impl(PyObject
*module
, double x
)
3221 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3223 return PyBool_FromLong((long)isnan(x
));
3233 Return True if x is a positive or negative infinity, and False otherwise.
3234 [clinic start generated code]*/
3237 math_isinf_impl(PyObject
*module
, double x
)
3238 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3240 return PyBool_FromLong((long)isinf(x
));
3245 math.isclose -> bool
3250 rel_tol: double = 1e-09
3251 maximum difference for being considered "close", relative to the
3252 magnitude of the input values
3253 abs_tol: double = 0.0
3254 maximum difference for being considered "close", regardless of the
3255 magnitude of the input values
3257 Determine whether two floating-point numbers are close in value.
3259 Return True if a is close in value to b, and False otherwise.
3261 For the values to be considered close, the difference between them
3262 must be smaller than at least one of the tolerances.
3264 -inf, inf and NaN behave similarly to the IEEE 754 Standard. That
3265 is, NaN is not close to anything, even itself. inf and -inf are
3266 only close to themselves.
3267 [clinic start generated code]*/
3270 math_isclose_impl(PyObject
*module
, double a
, double b
, double rel_tol
,
3272 /*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
3276 /* sanity check on the inputs */
3277 if (rel_tol
< 0.0 || abs_tol
< 0.0 ) {
3278 PyErr_SetString(PyExc_ValueError
,
3279 "tolerances must be non-negative");
3284 /* short circuit exact equality -- needed to catch two infinities of
3285 the same sign. And perhaps speeds things up a bit sometimes.
3290 /* This catches the case of two infinities of opposite sign, or
3291 one infinity and one finite number. Two infinities of opposite
3292 sign would otherwise have an infinite relative tolerance.
3293 Two infinities of the same sign are caught by the equality check
3297 if (isinf(a
) || isinf(b
)) {
3301 /* now do the regular computation
3302 this is essentially the "weak" test from the Boost library
3307 return (((diff
<= fabs(rel_tol
* b
)) ||
3308 (diff
<= fabs(rel_tol
* a
))) ||
3313 _check_long_mult_overflow(long a
, long b
) {
3315 /* From Python2's int_mul code:
3317 Integer overflow checking for * is painful: Python tried a couple ways, but
3318 they didn't work on all platforms, or failed in endcases (a product of
3319 -sys.maxint-1 has been a particular pain).
3323 The native long product x*y is either exactly right or *way* off, being
3324 just the last n bits of the true product, where n is the number of bits
3325 in a long (the delivered product is the true product plus i*2**n for
3328 The native double product (double)x * (double)y is subject to three
3329 rounding errors: on a sizeof(long)==8 box, each cast to double can lose
3330 info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3331 But, unlike the native long product, it's not in *range* trouble: even
3332 if sizeof(long)==32 (256-bit longs), the product easily fits in the
3333 dynamic range of a double. So the leading 50 (or so) bits of the double
3334 product are correct.
3336 We check these two ways against each other, and declare victory if they're
3337 approximately the same. Else, because the native long product is the only
3338 one that can lose catastrophic amounts of information, it's the native long
3339 product that must have overflowed.
3343 long longprod
= (long)((unsigned long)a
* b
);
3344 double doubleprod
= (double)a
* (double)b
;
3345 double doubled_longprod
= (double)longprod
;
3347 if (doubled_longprod
== doubleprod
) {
3351 const double diff
= doubled_longprod
- doubleprod
;
3352 const double absdiff
= diff
>= 0.0 ? diff
: -diff
;
3353 const double absprod
= doubleprod
>= 0.0 ? doubleprod
: -doubleprod
;
3355 if (32.0 * absdiff
<= absprod
) {
3368 start: object(c_default="NULL") = 1
3370 Calculate the product of all the elements in the input iterable.
3372 The default start value for the product is 1.
3374 When the iterable is empty, return the start value. This function is
3375 intended specifically for use with numeric values and may reject
3377 [clinic start generated code]*/
3380 math_prod_impl(PyObject
*module
, PyObject
*iterable
, PyObject
*start
)
3381 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3383 PyObject
*result
= start
;
3384 PyObject
*temp
, *item
, *iter
;
3386 iter
= PyObject_GetIter(iterable
);
3391 if (result
== NULL
) {
3392 result
= _PyLong_GetOne();
3396 /* Fast paths for integers keeping temporary products in C.
3397 * Assumes all inputs are the same type.
3398 * If the assumption fails, default to use PyObjects instead.
3400 if (PyLong_CheckExact(result
)) {
3402 long i_result
= PyLong_AsLongAndOverflow(result
, &overflow
);
3403 /* If this already overflowed, don't even enter the loop. */
3404 if (overflow
== 0) {
3405 Py_SETREF(result
, NULL
);
3407 /* Loop over all the items in the iterable until we finish, we overflow
3408 * or we found a non integer element */
3409 while (result
== NULL
) {
3410 item
= PyIter_Next(iter
);
3413 if (PyErr_Occurred()) {
3416 return PyLong_FromLong(i_result
);
3418 if (PyLong_CheckExact(item
)) {
3419 long b
= PyLong_AsLongAndOverflow(item
, &overflow
);
3420 if (overflow
== 0 && !_check_long_mult_overflow(i_result
, b
)) {
3421 long x
= i_result
* b
;
3427 /* Either overflowed or is not an int.
3428 * Restore real objects and process normally */
3429 result
= PyLong_FromLong(i_result
);
3430 if (result
== NULL
) {
3435 temp
= PyNumber_Multiply(result
, item
);
3439 if (result
== NULL
) {
3446 /* Fast paths for floats keeping temporary products in C.
3447 * Assumes all inputs are the same type.
3448 * If the assumption fails, default to use PyObjects instead.
3450 if (PyFloat_CheckExact(result
)) {
3451 double f_result
= PyFloat_AS_DOUBLE(result
);
3452 Py_SETREF(result
, NULL
);
3453 while(result
== NULL
) {
3454 item
= PyIter_Next(iter
);
3457 if (PyErr_Occurred()) {
3460 return PyFloat_FromDouble(f_result
);
3462 if (PyFloat_CheckExact(item
)) {
3463 f_result
*= PyFloat_AS_DOUBLE(item
);
3467 if (PyLong_CheckExact(item
)) {
3470 value
= PyLong_AsLongAndOverflow(item
, &overflow
);
3472 f_result
*= (double)value
;
3477 result
= PyFloat_FromDouble(f_result
);
3478 if (result
== NULL
) {
3483 temp
= PyNumber_Multiply(result
, item
);
3487 if (result
== NULL
) {
3494 /* Consume rest of the iterable (if any) that could not be handled
3495 * by specialized functions above.*/
3497 item
= PyIter_Next(iter
);
3499 /* error, or end-of-sequence */
3500 if (PyErr_Occurred()) {
3501 Py_SETREF(result
, NULL
);
3505 temp
= PyNumber_Multiply(result
, item
);
3517 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3519 Python code to generate the values:
3523 for n in range(128):
3524 fac = math.factorial(n)
3525 fac_odd_part = fac // (fac & -fac)
3526 reduced_fac_odd_part = fac_odd_part % (2**64)
3527 print(f"{reduced_fac_odd_part:#018x}u")
3529 static const uint64_t reduced_factorial_odd_part
[] = {
3530 0x0000000000000001u
, 0x0000000000000001u
, 0x0000000000000001u
, 0x0000000000000003u
,
3531 0x0000000000000003u
, 0x000000000000000fu
, 0x000000000000002du
, 0x000000000000013bu
,
3532 0x000000000000013bu
, 0x0000000000000b13u
, 0x000000000000375fu
, 0x0000000000026115u
,
3533 0x000000000007233fu
, 0x00000000005cca33u
, 0x0000000002898765u
, 0x00000000260eeeebu
,
3534 0x00000000260eeeebu
, 0x0000000286fddd9bu
, 0x00000016beecca73u
, 0x000001b02b930689u
,
3535 0x00000870d9df20adu
, 0x0000b141df4dae31u
, 0x00079dd498567c1bu
, 0x00af2e19afc5266du
,
3536 0x020d8a4d0f4f7347u
, 0x335281867ec241efu
, 0x9b3093d46fdd5923u
, 0x5e1f9767cc5866b1u
,
3537 0x92dd23d6966aced7u
, 0xa30d0f4f0a196e5bu
, 0x8dc3e5a1977d7755u
, 0x2ab8ce915831734bu
,
3538 0x2ab8ce915831734bu
, 0x81d2a0bc5e5fdcabu
, 0x9efcac82445da75bu
, 0xbc8b95cf58cde171u
,
3539 0xa0e8444a1f3cecf9u
, 0x4191deb683ce3ffdu
, 0xddd3878bc84ebfc7u
, 0xcb39a64b83ff3751u
,
3540 0xf8203f7993fc1495u
, 0xbd2a2a78b35f4bddu
, 0x84757be6b6d13921u
, 0x3fbbcfc0b524988bu
,
3541 0xbd11ed47c8928df9u
, 0x3c26b59e41c2f4c5u
, 0x677a5137e883fdb3u
, 0xff74e943b03b93ddu
,
3542 0xfe5ebbcb10b2bb97u
, 0xb021f1de3235e7e7u
, 0x33509eb2e743a58fu
, 0x390f9da41279fb7du
,
3543 0xe5cb0154f031c559u
, 0x93074695ba4ddb6du
, 0x81c471caa636247fu
, 0xe1347289b5a1d749u
,
3544 0x286f21c3f76ce2ffu
, 0x00be84a2173e8ac7u
, 0x1595065ca215b88bu
, 0xf95877595b018809u
,
3545 0x9c2efe3c5516f887u
, 0x373294604679382bu
, 0xaf1ff7a888adcd35u
, 0x18ddf279a2c5800bu
,
3546 0x18ddf279a2c5800bu
, 0x505a90e2542582cbu
, 0x5bacad2cd8d5dc2bu
, 0xfe3152bcbff89f41u
,
3547 0xe1467e88bf829351u
, 0xb8001adb9e31b4d5u
, 0x2803ac06a0cbb91fu
, 0x1904b5d698805799u
,
3548 0xe12a648b5c831461u
, 0x3516abbd6160cfa9u
, 0xac46d25f12fe036du
, 0x78bfa1da906b00efu
,
3549 0xf6390338b7f111bdu
, 0x0f25f80f538255d9u
, 0x4ec8ca55b8db140fu
, 0x4ff670740b9b30a1u
,
3550 0x8fd032443a07f325u
, 0x80dfe7965c83eeb5u
, 0xa3dc1714d1213afdu
, 0x205b7bbfcdc62007u
,
3551 0xa78126bbe140a093u
, 0x9de1dc61ca7550cfu
, 0x84f0046d01b492c5u
, 0x2d91810b945de0f3u
,
3552 0xf5408b7f6008aa71u
, 0x43707f4863034149u
, 0xdac65fb9679279d5u
, 0xc48406e7d1114eb7u
,
3553 0xa7dc9ed3c88e1271u
, 0xfb25b2efdb9cb30du
, 0x1bebda0951c4df63u
, 0x5c85e975580ee5bdu
,
3554 0x1591bc60082cb137u
, 0x2c38606318ef25d7u
, 0x76ca72f7c5c63e27u
, 0xf04a75d17baa0915u
,
3555 0x77458175139ae30du
, 0x0e6c1330bc1b9421u
, 0xdf87d2b5797e8293u
, 0xefa5c703e1e68925u
,
3556 0x2b6b1b3278b4f6e1u
, 0xceee27b382394249u
, 0xd74e3829f5dab91du
, 0xfdb17989c26b5f1fu
,
3557 0xc1b7d18781530845u
, 0x7b4436b2105a8561u
, 0x7ba7c0418372a7d7u
, 0x9dbc5c67feb6c639u
,
3558 0x502686d7f6ff6b8fu
, 0x6101855406be7a1fu
, 0x9956afb5806930e7u
, 0xe1f0ee88af40f7c5u
,
3559 0x984b057bda5c1151u
, 0x9a49819acc13ea05u
, 0x8ef0dead0896ef27u
, 0x71f7826efe292b21u
,
3560 0xad80a480e46986efu
, 0x01cdc0ebf5e0c6f7u
, 0x6e06f839968f68dbu
, 0xdd5943ab56e76139u
,
3561 0xcdcf31bf8604c5e7u
, 0x7e2b4a847054a1cbu
, 0x0ca75697a4d3d0f5u
, 0x4703f53ac514a98bu
,
3564 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3566 Python code to generate the values:
3570 for n in range(128):
3571 fac = math.factorial(n)
3572 fac_odd_part = fac // (fac & -fac)
3573 inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3574 print(f"{inverted_fac_odd_part:#018x}u")
3576 static const uint64_t inverted_factorial_odd_part
[] = {
3577 0x0000000000000001u
, 0x0000000000000001u
, 0x0000000000000001u
, 0xaaaaaaaaaaaaaaabu
,
3578 0xaaaaaaaaaaaaaaabu
, 0xeeeeeeeeeeeeeeefu
, 0x4fa4fa4fa4fa4fa5u
, 0x2ff2ff2ff2ff2ff3u
,
3579 0x2ff2ff2ff2ff2ff3u
, 0x938cc70553e3771bu
, 0xb71c27cddd93e49fu
, 0xb38e3229fcdee63du
,
3580 0xe684bb63544a4cbfu
, 0xc2f684917ca340fbu
, 0xf747c9cba417526du
, 0xbb26eb51d7bd49c3u
,
3581 0xbb26eb51d7bd49c3u
, 0xb0a7efb985294093u
, 0xbe4b8c69f259eabbu
, 0x6854d17ed6dc4fb9u
,
3582 0xe1aa904c915f4325u
, 0x3b8206df131cead1u
, 0x79c6009fea76fe13u
, 0xd8c5d381633cd365u
,
3583 0x4841f12b21144677u
, 0x4a91ff68200b0d0fu
, 0x8f9513a58c4f9e8bu
, 0x2b3e690621a42251u
,
3584 0x4f520f00e03c04e7u
, 0x2edf84ee600211d3u
, 0xadcaa2764aaacdfdu
, 0x161f4f9033f4fe63u
,
3585 0x161f4f9033f4fe63u
, 0xbada2932ea4d3e03u
, 0xcec189f3efaa30d3u
, 0xf7475bb68330bf91u
,
3586 0x37eb7bf7d5b01549u
, 0x46b35660a4e91555u
, 0xa567c12d81f151f7u
, 0x4c724007bb2071b1u
,
3587 0x0f4a0cce58a016bdu
, 0xfa21068e66106475u
, 0x244ab72b5a318ae1u
, 0x366ce67e080d0f23u
,
3588 0xd666fdae5dd2a449u
, 0xd740ddd0acc06a0du
, 0xb050bbbb28e6f97bu
, 0x70b003fe890a5c75u
,
3589 0xd03aabff83037427u
, 0x13ec4ca72c783bd7u
, 0x90282c06afdbd96fu
, 0x4414ddb9db4a95d5u
,
3590 0xa2c68735ae6832e9u
, 0xbf72d71455676665u
, 0xa8469fab6b759b7fu
, 0xc1e55b56e606caf9u
,
3591 0x40455630fc4a1cffu
, 0x0120a7b0046d16f7u
, 0xa7c3553b08faef23u
, 0x9f0bfd1b08d48639u
,
3592 0xa433ffce9a304d37u
, 0xa22ad1d53915c683u
, 0xcb6cbc723ba5dd1du
, 0x547fb1b8ab9d0ba3u
,
3593 0x547fb1b8ab9d0ba3u
, 0x8f15a826498852e3u
, 0x32e1a03f38880283u
, 0x3de4cce63283f0c1u
,
3594 0x5dfe6667e4da95b1u
, 0xfda6eeeef479e47du
, 0xf14de991cc7882dfu
, 0xe68db79247630ca9u
,
3595 0xa7d6db8207ee8fa1u
, 0x255e1f0fcf034499u
, 0xc9a8990e43dd7e65u
, 0x3279b6f289702e0fu
,
3596 0xe7b5905d9b71b195u
, 0x03025ba41ff0da69u
, 0xb7df3d6d3be55aefu
, 0xf89b212ebff2b361u
,
3597 0xfe856d095996f0adu
, 0xd6e533e9fdf20f9du
, 0xf8c0e84a63da3255u
, 0xa677876cd91b4db7u
,
3598 0x07ed4f97780d7d9bu
, 0x90a8705f258db62fu
, 0xa41bbb2be31b1c0du
, 0x6ec28690b038383bu
,
3599 0xdb860c3bb2edd691u
, 0x0838286838a980f9u
, 0x558417a74b36f77du
, 0x71779afc3646ef07u
,
3600 0x743cda377ccb6e91u
, 0x7fdf9f3fe89153c5u
, 0xdc97d25df49b9a4bu
, 0x76321a778eb37d95u
,
3601 0x7cbb5e27da3bd487u
, 0x9cff4ade1a009de7u
, 0x70eb166d05c15197u
, 0xdcf0460b71d5fe3du
,
3602 0x5ac1ee5260b6a3c5u
, 0xc922dedfdd78efe1u
, 0xe5d381dc3b8eeb9bu
, 0xd57e5347bafc6aadu
,
3603 0x86939040983acd21u
, 0x395b9d69740a4ff9u
, 0x1467299c8e43d135u
, 0x5fe440fcad975cdfu
,
3604 0xcaa9a39794a6ca8du
, 0xf61dbd640868dea1u
, 0xac09d98d74843be7u
, 0x2b103b9e1a6b4809u
,
3605 0x2ab92d16960f536fu
, 0x6653323d5e3681dfu
, 0xefd48c1c0624e2d7u
, 0xa496fefe04816f0du
,
3606 0x1754a7b07bbdd7b1u
, 0x23353c829a3852cdu
, 0xbf831261abd59097u
, 0x57a8e656df0618e1u
,
3607 0x16e9206c3100680fu
, 0xadad4c6ee921dac7u
, 0x635f2b3860265353u
, 0xdd6d0059f44b3d09u
,
3608 0xac4dd6b894447dd7u
, 0x42ea183eeaa87be3u
, 0x15612d1550ee5b5du
, 0x226fa19d656cb623u
,
3611 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3613 Python code to generate the values:
3617 for n in range(128):
3618 fac = math.factorial(n)
3619 fac_trailing_zeros = (fac & -fac).bit_length() - 1
3620 print(fac_trailing_zeros)
3623 static const uint8_t factorial_trailing_zeros
[] = {
3624 0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3625 15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3626 31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3627 46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3628 63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
3629 78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
3630 94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
3631 109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
3634 /* Number of permutations and combinations.
3635 * P(n, k) = n! / (n-k)!
3636 * C(n, k) = P(n, k) / k!
3639 /* Calculate C(n, k) for n in the 63-bit range. */
3641 perm_comb_small(unsigned long long n
, unsigned long long k
, int iscomb
)
3645 /* For small enough n and k the result fits in the 64-bit range and can
3646 * be calculated without allocating intermediate PyLong objects. */
3648 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3649 * fits into a uint64_t. Exclude k = 1, because the second fast
3650 * path is faster for this case.*/
3651 static const unsigned char fast_comb_limits1
[] = {
3652 0, 0, 127, 127, 127, 127, 127, 127, // 0-7
3653 127, 127, 127, 127, 127, 127, 127, 127, // 8-15
3654 116, 105, 97, 91, 86, 82, 78, 76, // 16-23
3655 74, 72, 71, 70, 69, 68, 68, 67, // 24-31
3656 67, 67, 67, // 32-34
3658 if (k
< Py_ARRAY_LENGTH(fast_comb_limits1
) && n
<= fast_comb_limits1
[k
]) {
3660 comb(n, k) fits into a uint64_t. We compute it as
3662 comb_odd_part << shift
3664 where 2**shift is the largest power of two dividing comb(n, k)
3665 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3666 calculated efficiently via arithmetic modulo 2**64, using three
3667 lookups and two uint64_t multiplications.
3669 uint64_t comb_odd_part
= reduced_factorial_odd_part
[n
]
3670 * inverted_factorial_odd_part
[k
]
3671 * inverted_factorial_odd_part
[n
- k
];
3672 int shift
= factorial_trailing_zeros
[n
]
3673 - factorial_trailing_zeros
[k
]
3674 - factorial_trailing_zeros
[n
- k
];
3675 return PyLong_FromUnsignedLongLong(comb_odd_part
<< shift
);
3678 /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3679 * fits into a long long (which is at least 64 bit). Only contains
3680 * items larger than in fast_comb_limits1. */
3681 static const unsigned long long fast_comb_limits2
[] = {
3682 0, ULLONG_MAX
, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
3683 746, 453, 308, 227, 178, 147, // 8-13
3685 if (k
< Py_ARRAY_LENGTH(fast_comb_limits2
) && n
<= fast_comb_limits2
[k
]) {
3686 /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3687 unsigned long long result
= n
;
3688 for (unsigned long long i
= 1; i
< k
;) {
3692 return PyLong_FromUnsignedLongLong(result
);
3696 /* Maps k to the maximal n so that k <= n and P(n, k)
3697 * fits into a long long (which is at least 64 bit). */
3698 static const unsigned long long fast_perm_limits
[] = {
3699 0, ULLONG_MAX
, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
3700 259, 142, 88, 61, 45, 36, 30, 26, // 8-15
3701 24, 22, 21, 20, 20, // 16-20
3703 if (k
< Py_ARRAY_LENGTH(fast_perm_limits
) && n
<= fast_perm_limits
[k
]) {
3705 /* P(n, k) fits into a uint64_t. */
3706 uint64_t perm_odd_part
= reduced_factorial_odd_part
[n
]
3707 * inverted_factorial_odd_part
[n
- k
];
3708 int shift
= factorial_trailing_zeros
[n
]
3709 - factorial_trailing_zeros
[n
- k
];
3710 return PyLong_FromUnsignedLongLong(perm_odd_part
<< shift
);
3713 /* P(n, k) = P(n, k-1) * (n-k+1) */
3714 unsigned long long result
= n
;
3715 for (unsigned long long i
= 1; i
< k
;) {
3719 return PyLong_FromUnsignedLongLong(result
);
3723 /* For larger n use recursive formulas:
3725 * P(n, k) = P(n, j) * P(n-j, k-j)
3726 * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3728 unsigned long long j
= k
/ 2;
3730 a
= perm_comb_small(n
, j
, iscomb
);
3734 b
= perm_comb_small(n
- j
, k
- j
, iscomb
);
3738 Py_SETREF(a
, PyNumber_Multiply(a
, b
));
3740 if (iscomb
&& a
!= NULL
) {
3741 b
= perm_comb_small(k
, j
, 1);
3745 Py_SETREF(a
, PyNumber_FloorDivide(a
, b
));
3755 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3756 * It is more efficient than sequential multiplication thanks to
3757 * Karatsuba multiplication.
3760 perm_comb(PyObject
*n
, unsigned long long k
, int iscomb
)
3763 return PyLong_FromLong(1);
3766 return Py_NewRef(n
);
3769 /* P(n, k) = P(n, j) * P(n-j, k-j) */
3770 /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3771 unsigned long long j
= k
/ 2;
3773 a
= perm_comb(n
, j
, iscomb
);
3777 PyObject
*t
= PyLong_FromUnsignedLongLong(j
);
3781 n
= PyNumber_Subtract(n
, t
);
3786 b
= perm_comb(n
, k
- j
, iscomb
);
3791 Py_SETREF(a
, PyNumber_Multiply(a
, b
));
3793 if (iscomb
&& a
!= NULL
) {
3794 b
= perm_comb_small(k
, j
, 1);
3798 Py_SETREF(a
, PyNumber_FloorDivide(a
, b
));
3815 Number of ways to choose k items from n items without repetition and with order.
3817 Evaluates to n! / (n - k)! when k <= n and evaluates
3820 If k is not specified or is None, then k defaults to n
3821 and the function returns n!.
3823 Raises TypeError if either of the arguments are not integers.
3824 Raises ValueError if either of the arguments are negative.
3825 [clinic start generated code]*/
3828 math_perm_impl(PyObject
*module
, PyObject
*n
, PyObject
*k
)
3829 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3831 PyObject
*result
= NULL
;
3836 return math_factorial(module
, n
);
3838 n
= PyNumber_Index(n
);
3842 k
= PyNumber_Index(k
);
3847 assert(PyLong_CheckExact(n
) && PyLong_CheckExact(k
));
3849 if (_PyLong_IsNegative((PyLongObject
*)n
)) {
3850 PyErr_SetString(PyExc_ValueError
,
3851 "n must be a non-negative integer");
3854 if (_PyLong_IsNegative((PyLongObject
*)k
)) {
3855 PyErr_SetString(PyExc_ValueError
,
3856 "k must be a non-negative integer");
3860 cmp
= PyObject_RichCompareBool(n
, k
, Py_LT
);
3863 result
= PyLong_FromLong(0);
3869 ki
= PyLong_AsLongLongAndOverflow(k
, &overflow
);
3870 assert(overflow
>= 0 && !PyErr_Occurred());
3872 PyErr_Format(PyExc_OverflowError
,
3873 "k must not exceed %lld",
3879 ni
= PyLong_AsLongLongAndOverflow(n
, &overflow
);
3880 assert(overflow
>= 0 && !PyErr_Occurred());
3881 if (!overflow
&& ki
> 1) {
3883 result
= perm_comb_small((unsigned long long)ni
,
3884 (unsigned long long)ki
, 0);
3887 result
= perm_comb(n
, (unsigned long long)ki
, 0);
3908 Number of ways to choose k items from n items without repetition and without order.
3910 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3913 Also called the binomial coefficient because it is equivalent
3914 to the coefficient of k-th term in polynomial expansion of the
3915 expression (1 + x)**n.
3917 Raises TypeError if either of the arguments are not integers.
3918 Raises ValueError if either of the arguments are negative.
3920 [clinic start generated code]*/
3923 math_comb_impl(PyObject
*module
, PyObject
*n
, PyObject
*k
)
3924 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3926 PyObject
*result
= NULL
, *temp
;
3930 n
= PyNumber_Index(n
);
3934 k
= PyNumber_Index(k
);
3939 assert(PyLong_CheckExact(n
) && PyLong_CheckExact(k
));
3941 if (_PyLong_IsNegative((PyLongObject
*)n
)) {
3942 PyErr_SetString(PyExc_ValueError
,
3943 "n must be a non-negative integer");
3946 if (_PyLong_IsNegative((PyLongObject
*)k
)) {
3947 PyErr_SetString(PyExc_ValueError
,
3948 "k must be a non-negative integer");
3952 ni
= PyLong_AsLongLongAndOverflow(n
, &overflow
);
3953 assert(overflow
>= 0 && !PyErr_Occurred());
3956 ki
= PyLong_AsLongLongAndOverflow(k
, &overflow
);
3957 assert(overflow
>= 0 && !PyErr_Occurred());
3958 if (overflow
|| ki
> ni
) {
3959 result
= PyLong_FromLong(0);
3964 ki
= Py_MIN(ki
, ni
- ki
);
3966 result
= perm_comb_small((unsigned long long)ni
,
3967 (unsigned long long)ki
, 1);
3970 /* For k == 1 just return the original n in perm_comb(). */
3973 /* k = min(k, n - k) */
3974 temp
= PyNumber_Subtract(n
, k
);
3978 assert(PyLong_Check(temp
));
3979 if (_PyLong_IsNegative((PyLongObject
*)temp
)) {
3981 result
= PyLong_FromLong(0);
3984 cmp
= PyObject_RichCompareBool(temp
, k
, Py_LT
);
3995 ki
= PyLong_AsLongLongAndOverflow(k
, &overflow
);
3996 assert(overflow
>= 0 && !PyErr_Occurred());
3998 PyErr_Format(PyExc_OverflowError
,
3999 "min(n - k, k) must not exceed %lld",
4006 result
= perm_comb(n
, (unsigned long long)ki
, 1);
4027 steps: object = None
4029 Return the floating-point value the given number of steps after x towards y.
4031 If steps is not specified or is None, it defaults to 1.
4033 Raises a TypeError, if x or y is not a double, or if steps is not an integer.
4034 Raises ValueError if steps is negative.
4035 [clinic start generated code]*/
4038 math_nextafter_impl(PyObject
*module
, double x
, double y
, PyObject
*steps
)
4039 /*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
4043 /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
4044 Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
4045 return PyFloat_FromDouble(y
);
4048 return PyFloat_FromDouble(x
);
4051 return PyFloat_FromDouble(y
);
4054 if (steps
== Py_None
) {
4055 // fast path: we default to one step.
4056 return PyFloat_FromDouble(nextafter(x
, y
));
4058 steps
= PyNumber_Index(steps
);
4059 if (steps
== NULL
) {
4062 assert(PyLong_CheckExact(steps
));
4063 if (_PyLong_IsNegative((PyLongObject
*)steps
)) {
4064 PyErr_SetString(PyExc_ValueError
,
4065 "steps must be a non-negative integer");
4070 unsigned long long usteps_ull
= PyLong_AsUnsignedLongLong(steps
);
4071 // Conveniently, uint64_t and double have the same number of bits
4072 // on all the platforms we care about.
4073 // So if an overflow occurs, we can just use UINT64_MAX.
4075 if (usteps_ull
>= UINT64_MAX
) {
4076 // This branch includes the case where an error occurred, since
4077 // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
4078 // usteps_ull can be strictly larger than UINT64_MAX on a machine
4079 // where unsigned long long has width > 64 bits.
4080 if (PyErr_Occurred()) {
4081 if (PyErr_ExceptionMatches(PyExc_OverflowError
)) {
4088 usteps_ull
= UINT64_MAX
;
4090 assert(usteps_ull
<= UINT64_MAX
);
4091 uint64_t usteps
= (uint64_t)usteps_ull
;
4094 return PyFloat_FromDouble(x
);
4097 return PyFloat_FromDouble(x
);
4100 return PyFloat_FromDouble(y
);
4103 // We assume that double and uint64_t have the same endianness.
4104 // This is not guaranteed by the C-standard, but it is true for
4105 // all platforms we care about. (The most likely form of violation
4106 // would be a "mixed-endian" double.)
4107 union pun
{double f
; uint64_t i
;};
4108 union pun ux
= {x
}, uy
= {y
};
4110 return PyFloat_FromDouble(x
);
4113 const uint64_t sign_bit
= 1ULL<<63;
4115 uint64_t ax
= ux
.i
& ~sign_bit
;
4116 uint64_t ay
= uy
.i
& ~sign_bit
;
4119 if (((ux
.i
^ uy
.i
) & sign_bit
)) {
4120 // NOTE: ax + ay can never overflow, because their most significant bit
4122 if (ax
+ ay
<= usteps
) {
4123 return PyFloat_FromDouble(uy
.f
);
4124 // This comparison has to use <, because <= would get +0.0 vs -0.0
4126 } else if (ax
< usteps
) {
4127 union pun result
= {.i
= (uy
.i
& sign_bit
) | (usteps
- ax
)};
4128 return PyFloat_FromDouble(result
.f
);
4131 return PyFloat_FromDouble(ux
.f
);
4134 } else if (ax
> ay
) {
4135 if (ax
- ay
>= usteps
) {
4137 return PyFloat_FromDouble(ux
.f
);
4139 return PyFloat_FromDouble(uy
.f
);
4142 if (ay
- ax
>= usteps
) {
4144 return PyFloat_FromDouble(ux
.f
);
4146 return PyFloat_FromDouble(uy
.f
);
4158 Return the value of the least significant bit of the float x.
4159 [clinic start generated code]*/
4162 math_ulp_impl(PyObject
*module
, double x
)
4163 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4172 double inf
= Py_INFINITY
;
4173 double x2
= nextafter(x
, inf
);
4175 /* special case: x is the largest positive representable float */
4176 x2
= nextafter(x
, -inf
);
4183 math_exec(PyObject
*module
)
4186 if (PyModule_Add(module
, "pi", PyFloat_FromDouble(Py_MATH_PI
)) < 0) {
4189 if (PyModule_Add(module
, "e", PyFloat_FromDouble(Py_MATH_E
)) < 0) {
4193 if (PyModule_Add(module
, "tau", PyFloat_FromDouble(Py_MATH_TAU
)) < 0) {
4196 if (PyModule_Add(module
, "inf", PyFloat_FromDouble(Py_INFINITY
)) < 0) {
4199 if (PyModule_Add(module
, "nan", PyFloat_FromDouble(fabs(Py_NAN
))) < 0) {
4205 static PyMethodDef math_methods
[] = {
4206 {"acos", math_acos
, METH_O
, math_acos_doc
},
4207 {"acosh", math_acosh
, METH_O
, math_acosh_doc
},
4208 {"asin", math_asin
, METH_O
, math_asin_doc
},
4209 {"asinh", math_asinh
, METH_O
, math_asinh_doc
},
4210 {"atan", math_atan
, METH_O
, math_atan_doc
},
4211 {"atan2", _PyCFunction_CAST(math_atan2
), METH_FASTCALL
, math_atan2_doc
},
4212 {"atanh", math_atanh
, METH_O
, math_atanh_doc
},
4213 {"cbrt", math_cbrt
, METH_O
, math_cbrt_doc
},
4215 {"copysign", _PyCFunction_CAST(math_copysign
), METH_FASTCALL
, math_copysign_doc
},
4216 {"cos", math_cos
, METH_O
, math_cos_doc
},
4217 {"cosh", math_cosh
, METH_O
, math_cosh_doc
},
4218 MATH_DEGREES_METHODDEF
4220 {"erf", math_erf
, METH_O
, math_erf_doc
},
4221 {"erfc", math_erfc
, METH_O
, math_erfc_doc
},
4222 {"exp", math_exp
, METH_O
, math_exp_doc
},
4223 {"exp2", math_exp2
, METH_O
, math_exp2_doc
},
4224 {"expm1", math_expm1
, METH_O
, math_expm1_doc
},
4225 {"fabs", math_fabs
, METH_O
, math_fabs_doc
},
4226 MATH_FACTORIAL_METHODDEF
4227 MATH_FLOOR_METHODDEF
4232 MATH_FREXP_METHODDEF
4234 {"gamma", math_gamma
, METH_O
, math_gamma_doc
},
4236 MATH_HYPOT_METHODDEF
4237 MATH_ISCLOSE_METHODDEF
4238 MATH_ISFINITE_METHODDEF
4239 MATH_ISNORMAL_METHODDEF
4240 MATH_ISSUBNORMAL_METHODDEF
4241 MATH_ISINF_METHODDEF
4242 MATH_ISNAN_METHODDEF
4243 MATH_ISQRT_METHODDEF
4245 MATH_LDEXP_METHODDEF
4246 {"lgamma", math_lgamma
, METH_O
, math_lgamma_doc
},
4247 {"log", _PyCFunction_CAST(math_log
), METH_FASTCALL
, math_log_doc
},
4248 {"log1p", math_log1p
, METH_O
, math_log1p_doc
},
4249 MATH_LOG10_METHODDEF
4253 MATH_RADIANS_METHODDEF
4254 {"remainder", _PyCFunction_CAST(math_remainder
), METH_FASTCALL
, math_remainder_doc
},
4255 MATH_SIGNBIT_METHODDEF
4256 {"sin", math_sin
, METH_O
, math_sin_doc
},
4257 {"sinh", math_sinh
, METH_O
, math_sinh_doc
},
4258 {"sqrt", math_sqrt
, METH_O
, math_sqrt_doc
},
4259 {"tan", math_tan
, METH_O
, math_tan_doc
},
4260 {"tanh", math_tanh
, METH_O
, math_tanh_doc
},
4261 MATH_SUMPROD_METHODDEF
4262 MATH_TRUNC_METHODDEF
4266 MATH_NEXTAFTER_METHODDEF
4268 {NULL
, NULL
} /* sentinel */
4271 static PyModuleDef_Slot math_slots
[] = {
4272 {Py_mod_exec
, math_exec
},
4273 {Py_mod_multiple_interpreters
, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
},
4274 {Py_mod_gil
, Py_MOD_GIL_NOT_USED
},
4278 PyDoc_STRVAR(module_doc
,
4279 "This module provides access to the mathematical functions\n"
4280 "defined by the C standard.");
4282 static struct PyModuleDef mathmodule
= {
4283 PyModuleDef_HEAD_INIT
,
4285 .m_doc
= module_doc
,
4287 .m_methods
= math_methods
,
4288 .m_slots
= math_slots
,
4294 return PyModuleDef_Init(&mathmodule
);