}
/*
-** Two inputs are multiplied to get a 128-bit result. Return
-** the high-order 64 bits of that result.
+** Two inputs are multiplied to get a 128-bit result. Write the
+** lower 64-bits of the result into *pLo, and return the high-order
+** 64 bits.
*/
-static u64 sqlite3Multiply128(u64 a, u64 b){
+static u64 sqlite3Multiply128(u64 a, u64 b, u64 *pLo){
#if (defined(__GNUC__) || defined(__clang__)) \
&& (defined(__x86_64__) || defined(__aarch64__) || defined(__riscv))
- return ((__uint128_t)a * b) >> 64;
+ __uint128_t r = (__int128_t)a * b;
+ *pLo = (u64)r;
+ return (u64)(r>>64);
#elif defined(_MSC_VER) && defined(_M_X64)
+ *pLo = a*b;
return __umulh(a, b);
#else
- u64 a1 = (u32)a;
- u64 a2 = a >> 32;
- u64 b1 = (u32)b;
- u64 b2 = b >> 32;
- u64 p0 = a1 * b1;
- u64 p1 = a1 * b2;
- u64 p2 = a2 * b1;
- u64 p3 = a2 * b2;
- u64 carry = ((p0 >> 32) + (u32)p1 + (u32)p2) >> 32;
- return p3 + (p1 >> 32) + (p2 >> 32) + carry;
+ u64 a0 = (u32)a;
+ u64 a1 = a >> 32;
+ u64 b0 = (u32)b;
+ u64 b1 = b >> 32;
+ u64 a0b0 = a0 * b0;
+ u64 a1b1 = a1 * b1;
+ u64 a0b1 = a0 * b1;
+ u64 a1b0 = a1 * b0;
+ u64 t = (a0b0 >> 32) + (u32)a0b1 + (u32)a1b0;
+ *pLo = (a0b0 & UINT64_C(0xffffffff)) | (t << 32);
+ return a1b1 + (a0b1>>32) + (a1b0>>32) + (t>>32);
#endif
}
+/*
+** A is an unsigned 96-bit integer formed by (a<<32)+aLo.
+** B is an unsigned 64-bit integer.
+**
+** Compute the upper 96 bits of 160-bit result of A*B.
+**
+** Write ((A*B)>>64 & 0xffffffff) (the middle 32 bits of A*B)
+** into *pLo. Return the upper 64 bits of A*B.
+**
+** The lower 64 bits of A*B are discarded.
+*/
+static u64 sqlite3Multiply160(u64 a, u32 aLo, u64 b, u32 *pLo){
+ u64 x2 = a>>32;
+ u64 x1 = a&0xffffffff;
+ u64 x0 = aLo;
+ u64 y1 = b>>32;
+ u64 y0 = b&0xffffffff;
+ u64 x2y1 = x2*y1;
+ u64 r4 = x2y1>>32;
+ u64 x2y0 = x2*y0;
+ u64 x1y1 = x1*y1;
+ u64 r3 = (x2y1 & 0xffffffff) + (x2y0 >>32) + (x1y1 >>32);
+ u64 x1y0 = x1*y0;
+ u64 x0y1 = x0*y1;
+ u64 r2 = (x2y0 & 0xffffffff) + (x1y1 & 0xffffffff) +
+ (x1y0 >>32) + (x0y1>>32);
+ u64 x0y0 = x0*y0;
+ u64 r1 = (x1y0 & 0xffffffff) + (x0y1 & 0xffffffff) +
+ (x0y0 >>32);
+ r2 += r1>>32;
+ r3 += r2>>32;
+ *pLo = r2&0xffffffff;
+ return (r4<<32) + r3;
+}
+
/*
** Return a u64 with the N-th bit set.
*/
** as appropriate so the most significant 64 bits fit exactly into a
** 64-bit unsigned integer.
**
+** Write into *pLo the next 32 significant bits of the answer after
+** the first 64.
+**
** Algorithm:
**
** (1) For p between 0 and 26, return the value directly from the aBase[]
** then refine that result (if necessary) by a single multiplication
** against aBase[].
*/
-static u64 powerOfTen(int p){
+static u64 powerOfTen(int p, u32 *pLo){
static const u64 aBase[] = {
UINT64_C(0x8000000000000000), /* 0: 1.0e+0 << 63 */
UINT64_C(0xa000000000000000), /* 1: 1.0e+1 << 60 */
static const u64 aScale[] = {
UINT64_C(0x8049a4ac0c5811ae), /* 0: 1.0e-351 << 1229 */
UINT64_C(0xcf42894a5dce35ea), /* 1: 1.0e-324 << 1140 */
- UINT64_C(0xa76c582338ed2622), /* 2: 1.0e-297 << 1050 */
+ UINT64_C(0xa76c582338ed2621), /* 2: 1.0e-297 << 1050 */
UINT64_C(0x873e4f75e2224e68), /* 3: 1.0e-270 << 960 */
- UINT64_C(0xda7f5bf590966849), /* 4: 1.0e-243 << 871 */
- UINT64_C(0xb080392cc4349ded), /* 5: 1.0e-216 << 781 */
+ UINT64_C(0xda7f5bf590966848), /* 4: 1.0e-243 << 871 */
+ UINT64_C(0xb080392cc4349dec), /* 5: 1.0e-216 << 781 */
UINT64_C(0x8e938662882af53e), /* 6: 1.0e-189 << 691 */
UINT64_C(0xe65829b3046b0afa), /* 7: 1.0e-162 << 602 */
- UINT64_C(0xba121a4650e4ddec), /* 8: 1.0e-135 << 512 */
+ UINT64_C(0xba121a4650e4ddeb), /* 8: 1.0e-135 << 512 */
UINT64_C(0x964e858c91ba2655), /* 9: 1.0e-108 << 422 */
- UINT64_C(0xf2d56790ab41c2a3), /* 10: 1.0e-81 << 333 */
- UINT64_C(0xc428d05aa4751e4d), /* 11: 1.0e-54 << 243 */
+ UINT64_C(0xf2d56790ab41c2a2), /* 10: 1.0e-81 << 333 */
+ UINT64_C(0xc428d05aa4751e4c), /* 11: 1.0e-54 << 243 */
UINT64_C(0x9e74d1b791e07e48), /* 12: 1.0e-27 << 153 */
- UINT64_C(0x8000000000000000), /* 13: 1.0e+0 << 63 */
+ UINT64_C(0xcccccccccccccccc), /* 13: 1.0e+0 << 63 (Special case) */
UINT64_C(0xcecb8f27f4200f3a), /* 14: 1.0e+27 >> 26 */
- UINT64_C(0xa70c3c40a64e6c52), /* 15: 1.0e+54 >> 116 */
+ UINT64_C(0xa70c3c40a64e6c51), /* 15: 1.0e+54 >> 116 */
UINT64_C(0x86f0ac99b4e8dafd), /* 16: 1.0e+81 >> 206 */
- UINT64_C(0xda01ee641a708dea), /* 17: 1.0e+108 >> 295 */
+ UINT64_C(0xda01ee641a708de9), /* 17: 1.0e+108 >> 295 */
UINT64_C(0xb01ae745b101e9e4), /* 18: 1.0e+135 >> 385 */
UINT64_C(0x8e41ade9fbebc27d), /* 19: 1.0e+162 >> 475 */
- UINT64_C(0xe5d3ef282a242e82), /* 20: 1.0e+189 >> 564 */
+ UINT64_C(0xe5d3ef282a242e81), /* 20: 1.0e+189 >> 564 */
UINT64_C(0xb9a74a0637ce2ee1), /* 21: 1.0e+216 >> 654 */
UINT64_C(0x95f83d0a1fb69cd9), /* 22: 1.0e+243 >> 744 */
- UINT64_C(0xf24a01a73cf2dcd0), /* 23: 1.0e+270 >> 833 */
+ UINT64_C(0xf24a01a73cf2dccf), /* 23: 1.0e+270 >> 833 */
UINT64_C(0xc3b8358109e84f07), /* 24: 1.0e+297 >> 923 */
UINT64_C(0x9e19db92b4e31ba9), /* 25: 1.0e+324 >> 1013 */
};
+ static const unsigned int aScaleLo[] = {
+ 0x205b896d, /* 0: 1.0e-351 << 1229 */
+ 0x52064cad, /* 1: 1.0e-324 << 1140 */
+ 0xaf2af2b8, /* 2: 1.0e-297 << 1050 */
+ 0x5a7744a7, /* 3: 1.0e-270 << 960 */
+ 0xaf39a475, /* 4: 1.0e-243 << 871 */
+ 0xbd8d794e, /* 5: 1.0e-216 << 781 */
+ 0x547eb47b, /* 6: 1.0e-189 << 691 */
+ 0x0cb4a5a3, /* 7: 1.0e-162 << 602 */
+ 0x92f34d62, /* 8: 1.0e-135 << 512 */
+ 0x3a6a07f9, /* 9: 1.0e-108 << 422 */
+ 0xfae27299, /* 10: 1.0e-81 << 333 */
+ 0xaa97e14c, /* 11: 1.0e-54 << 243 */
+ 0x775ea265, /* 12: 1.0e-27 << 153 */
+ 0xcccccccc, /* 13: 1.0e-1 << 67 (Special case) */
+ 0x00000000, /* 14: 1.0e+27 >> 26 */
+ 0x999090b6, /* 15: 1.0e+54 >> 116 */
+ 0x69a028bb, /* 16: 1.0e+81 >> 206 */
+ 0xe80e6f48, /* 17: 1.0e+108 >> 295 */
+ 0x5ec05dd0, /* 18: 1.0e+135 >> 385 */
+ 0x14588f14, /* 19: 1.0e+162 >> 475 */
+ 0x8f1668c9, /* 20: 1.0e+189 >> 564 */
+ 0x6d953e2c, /* 21: 1.0e+216 >> 654 */
+ 0x4abdaf10, /* 22: 1.0e+243 >> 744 */
+ 0xbc633b39, /* 23: 1.0e+270 >> 833 */
+ 0x0a862f81, /* 24: 1.0e+297 >> 923 */
+ 0x6c07a2c2, /* 25: 1.0e+324 >> 1013 */
+ };
int g, n;
- u64 x, y;
+ u64 s, x;
+ u32 lo;
assert( p>=POWERSOF10_FIRST && p<=POWERSOF10_LAST );
if( p<0 ){
+ if( p==(-1) ){
+ *pLo = aScaleLo[13];
+ return aScale[13];
+ }
g = p/27;
n = p%27;
if( n ){
n += 27;
}
}else if( p<27 ){
+ *pLo = 0;
return aBase[p];
}else{
g = p/27;
n = p%27;
}
- y = aScale[g+13];
+ s = aScale[g+13];
if( n==0 ){
- return y;
+ *pLo = aScaleLo[g+13];
+ return s;
}
- x = sqlite3Multiply128(aBase[n],y);
+ x = sqlite3Multiply160(s,aScaleLo[g+13],aBase[n],&lo);
if( (U64_BIT(63) & x)==0 ){
- x = (x<<1)|1;
+ x = x<<1 | ((lo>>31)&1);
+ lo = (lo<<1) | 1;
}
+ *pLo = lo;
return x;
}
*/
static void sqlite3Fp2Convert10(u64 m, int e, int n, u64 *pD, int *pP){
int p;
- u64 h;
+ u64 h, d1;
+ u32 d2;
assert( n>=1 && n<=18 );
p = n - 1 - pwr2to10(e+63);
- h = sqlite3Multiply128(m, powerOfTen(p));
+ h = sqlite3Multiply128(m, powerOfTen(p,&d2), &d1);
assert( -(e + pwr10to2(p) + 2) >= 0 );
assert( -(e + pwr10to2(p) + 1) <= 63 );
if( n==18 ){
** Return an IEEE754 floating point value that approximates d*pow(10,p).
*/
static double sqlite3Fp10Convert2(u64 d, int p){
- u64 out;
- int e1;
- int lz;
- int lp;
- int x;
- u64 h;
+ int b = 64 - countLeadingZeros(d);
+ int lp = pwr10to2(p);
+ int e = 53 - b - lp;
+ if( e > 1074 ){
+ if( e>=1130 ) return 0.0;
+ e = 1074;
+ }
+ if( p<POWERSOF10_FIRST ) return 0.0;
+ if( p>POWERSOF10_LAST ) return INFINITY;
+ int s = -(e-(64-b) + lp + 3);
+ u32 pwr10l;
+ u64 pwr10h = powerOfTen(p, &pwr10l);
+ if( pwr10l!=0 ){
+ pwr10h++;
+ pwr10l = ~pwr10l;
+ }
+ u64 x = d<<(64-b);
+ u64 lo;
+ u64 hi = sqlite3Multiply128(x,pwr10h,&lo);
+ u32 mid1 = lo>>32;
+ u64 sticky = 1;
+ if( (hi & (U64_BIT(s)-1))==0 ) {
+ u32 mid2 = sqlite3Multiply128(x,((u64)pwr10l)<<32,&lo)>>32;
+ sticky = (mid1-mid2 > 1);
+ hi -= mid1 < mid2;
+ }
+ u64 u = (hi>>s) | sticky;
+ int adj = (u >= U64_BIT(55)-2);
+ if( adj ){
+ u = (u>>adj) | (u&1);
+ e -= adj;
+ }
+ u64 m = (u + 1 + ((u>>2)&1)) >> 2;
+ if( e<=(-972) ) return INFINITY;
+ if((m & U64_BIT(52)) != 0){
+ m = (m & ~U64_BIT(52)) | ((u64)(1075-e)<<52);
+ }
double r;
- assert( (d & U64_BIT(63))==0 );
- assert( d!=0 );
- if( p<POWERSOF10_FIRST ){
- return 0.0;
- }
- if( p>POWERSOF10_LAST ){
- return INFINITY;
- }
- lz = countLeadingZeros(d);
- lp = pwr10to2(p);
- e1 = lz - (lp + 11);
- if( e1>1074 ){
- if( e1>=1130 ) return 0.0;
- e1 = 1074;
- }
- h = sqlite3Multiply128(d<<lz, powerOfTen(p));
- x = lz - (e1 + lp + 3);
- assert( x >= 0 );
- assert( x <= 63 );
- out = h >> x;
- if( out >= U64_BIT(55)-2 ){
- out >>= 1;
- e1--;
- }
- if( e1<=(-972) ){
- return INFINITY;
- }
- out = (out + 2) >> 2;
- if( (out & U64_BIT(52))!=0 ){
- out = (out & ~U64_BIT(52)) | ((u64)(1075-e1)<<52);
- }
- memcpy(&r, &out, 8);
+ memcpy(&r,&m,8);
return r;
}