From: drh Date: Wed, 1 Feb 2017 15:24:32 +0000 (+0000) Subject: Use compiler intrinsic functions (when available) for byteswapping in RTREE. X-Git-Tag: version-3.17.0~42 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=03626e38127d4d0667eb23243c479719c1d30b9a;p=thirdparty%2Fsqlite.git Use compiler intrinsic functions (when available) for byteswapping in RTREE. FossilOrigin-Name: 82fcd54a5941c20895ffc22d8009c1ebdae44eda --- diff --git a/ext/rtree/rtree.c b/ext/rtree/rtree.c index f1db60b22f..637fb79d05 100644 --- a/ext/rtree/rtree.c +++ b/ext/rtree/rtree.c @@ -362,6 +362,65 @@ struct RtreeMatchArg { # define MIN(x,y) ((x) > (y) ? (y) : (x)) #endif +/* What version of GCC is being used. 0 means GCC is not being used */ +#ifndef GCC_VERSION +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif +#endif + +/* What version of CLANG is being used. 0 means CLANG is not being used */ +#ifndef CLANG_VERSION +#if defined(__clang__) && !defined(_WIN32) +# define CLANG_VERSION \ + (__clang_major__*1000000+__clang_minor__*1000+__clang_patchlevel__) +#else +# define CLANG_VERSION 0 +#endif +#endif + +/* The testcase() macro should already be defined in the amalgamation. If +** it is not, make it a no-op. +*/ +#ifndef SQLITE_AMALGMATION +# define testcase(X) +#endif + +/* +** Macros to determine whether the machine is big or little endian, +** and whether or not that determination is run-time or compile-time. +** +** For best performance, an attempt is made to guess at the byte-order +** using C-preprocessor macros. If that is unsuccessful, or if +** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined +** at run-time. +*/ +#ifndef SQLITE_BYTEORDER +#if (defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ + defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER) +# define SQLITE_BYTEORDER 1234 +#endif +#if (defined(sparc) || defined(__ppc__)) \ + && !defined(SQLITE_RUNTIME_BYTEORDER) +# define SQLITE_BYTEORDER 4321 +#endif +# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ +#endif + + +/* What version of MSVC is being used. 0 means MSVC is not being used */ +#ifndef MSVC_VERSION +#if defined(_MSC_VER) +# define MSVC_VERSION _MSC_VER +#else +# define MSVC_VERSION 0 +#endif +#endif + /* ** Functions to deserialize a 16 bit integer, 32 bit real number and ** 64 bit integer. The deserialized value is returned. @@ -370,12 +429,16 @@ static int readInt16(u8 *p){ return (p[0]<<8) + p[1]; } static void readCoord(u8 *p, RtreeCoord *pCoord){ -#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 - memcpy(&pCoord->u, p, 4); + assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + pCoord->u = _byteswap_ulong(*(u32*)p); +#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) + pCoord->u = __builtin_bswap32(*(u32*)p); +#elif SQLITE_BYTEORDER==1234 pCoord->u = ((pCoord->u>>24)&0xff)|((pCoord->u>>8)&0xff00)| ((pCoord->u&0xff)<<24)|((pCoord->u&0xff00)<<8); -#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 - memcpy(&pCoord->u, p, 4); +#elif SQLITE_BYTEORDER==4321 + pCoord->u = *(u32*)p; #else pCoord->u = ( (((u32)p[0]) << 24) + @@ -386,6 +449,20 @@ static void readCoord(u8 *p, RtreeCoord *pCoord){ #endif } static i64 readInt64(u8 *p){ + testcase( ((((char*)p) - (char*)0)&7)!=0 ); /* not always 8-byte aligned */ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + u64 x; + memcpy(&x, p, 8); + return (i64)_byteswap_uint64(x); +#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) + u64 x; + memcpy(&x, p, 8); + return (i64)__builtin_bswap64(x); +#elif SQLITE_BYTEORDER==4321 + i64 x; + memcpy(&x, p, 8); + return x; +#else return ( (((i64)p[0]) << 56) + (((i64)p[1]) << 48) + @@ -396,6 +473,7 @@ static i64 readInt64(u8 *p){ (((i64)p[6]) << 8) + (((i64)p[7]) << 0) ); +#endif } /* @@ -410,16 +488,38 @@ static int writeInt16(u8 *p, int i){ } static int writeCoord(u8 *p, RtreeCoord *pCoord){ u32 i; + assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ assert( sizeof(RtreeCoord)==4 ); assert( sizeof(u32)==4 ); +#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) + i = __builtin_bswap32(pCoord->u); + memcpy(p, &i, 4); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + i = _byteswap_ulong(pCoord->u); + memcpy(p, &i, 4); +#elif SQLITE_BYTEORDER==4321 + i = pCoord->u; + memcpy(p, &i, 4); +#else i = pCoord->u; p[0] = (i>>24)&0xFF; p[1] = (i>>16)&0xFF; p[2] = (i>> 8)&0xFF; p[3] = (i>> 0)&0xFF; +#endif return 4; } static int writeInt64(u8 *p, i64 i){ + testcase( ((((char*)p) - (char*)0)&7)!=0 ); /* Not always 8-byte aligned */ +#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) + i = (i64)__builtin_bswap64((u64)i); + memcpy(p, &i, 8); +#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 + i = (i64)_byteswap_uint64((u64)i); + memcpy(p, &i, 8); +#elif SQLITE_BYTEORDER==4321 + memcpy(p, &i, 8); +#else p[0] = (i>>56)&0xFF; p[1] = (i>>48)&0xFF; p[2] = (i>>40)&0xFF; @@ -428,6 +528,7 @@ static int writeInt64(u8 *p, i64 i){ p[5] = (i>>16)&0xFF; p[6] = (i>> 8)&0xFF; p[7] = (i>> 0)&0xFF; +#endif return 8; } @@ -754,11 +855,6 @@ static void nodeGetCell( pData += 8; ii += 2; }while( iinDim*2 ); -#if 0 - for(ii=0; iinDim*2; ii++){ - readCoord(&pData[ii*4], &pCoord[ii]); - } -#endif } @@ -927,15 +1023,22 @@ static int rtreeEof(sqlite3_vtab_cursor *cur){ ** false. a[] is the four bytes of the on-disk record to be decoded. ** Store the results in "r". ** -** There are three versions of this macro, one each for little-endian and -** big-endian processors and a third generic implementation. The endian- -** specific implementations are much faster and are preferred if the -** processor endianness is known at compile-time. The SQLITE_BYTEORDER -** macro is part of sqliteInt.h and hence the endian-specific -** implementation will only be used if this module is compiled as part -** of the amalgamation. -*/ -#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 +** There are five versions of this macro. The last one is generic. The +** other four are various architectures-specific optimizations. +*/ +#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = _byteswap_ulong(*(u32*)a); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = __builtin_bswap32(*(u32*)a); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif SQLITE_BYTEORDER==1234 #define RTREE_DECODE_COORD(eInt, a, r) { \ RtreeCoord c; /* Coordinate decoded */ \ memcpy(&c.u,a,4); \ @@ -943,7 +1046,7 @@ static int rtreeEof(sqlite3_vtab_cursor *cur){ ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ } -#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 +#elif SQLITE_BYTEORDER==4321 #define RTREE_DECODE_COORD(eInt, a, r) { \ RtreeCoord c; /* Coordinate decoded */ \ memcpy(&c.u,a,4); \ @@ -986,6 +1089,7 @@ static int rtreeCallbackConstraint( i = 0; do{ pCellData += 8; + assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]); RTREE_DECODE_COORD(eInt, (pCellData+4), aCoord[i+1]); i+= 2; @@ -1029,6 +1133,7 @@ static void rtreeNonleafConstraint( assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE || p->op==RTREE_GT || p->op==RTREE_EQ ); + assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ switch( p->op ){ case RTREE_LE: case RTREE_LT: @@ -1069,6 +1174,7 @@ static void rtreeLeafConstraint( assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE || p->op==RTREE_GT || p->op==RTREE_EQ ); pCellData += 8 + p->iCoord*4; + assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ RTREE_DECODE_COORD(eInt, pCellData, xN); switch( p->op ){ case RTREE_LE: if( xN <= p->u.rValue ) return; break; diff --git a/manifest b/manifest index 8145330051..bc185a2197 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sbuild\sby\smaking\sthe\sOPFLAG_ISNOOP\smacro\savailable\sunconditionally. -D 2017-02-01T15:19:29.202 +C Use\scompiler\sintrinsic\sfunctions\s(when\savailable)\sfor\sbyteswapping\sin\sRTREE. +D 2017-02-01T15:24:32.835 F Makefile.in 5f415e7867296d678fed2e6779aea10c1318b4bc F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc b8ca53350ae545e3562403d5da2a69cec79308da @@ -264,7 +264,7 @@ F ext/rbu/sqlite3rbu.c bb0de6cdbdb14a7d55a097238a434b7e99caf318 F ext/rbu/sqlite3rbu.h 6fb6294c34a9ca93b5894a33bca530c6f08decba F ext/rbu/test_rbu.c 5aa22616afac6f71ebd3d9bc9bf1006cfabcca88 F ext/rtree/README 6315c0d73ebf0ec40dedb5aa0e942bc8b54e3761 -F ext/rtree/rtree.c f66b3d232ea98285548107caaac55f110a0b6709 +F ext/rtree/rtree.c d8ef14e964a9390197ba7e511aa47b89dc39416c F ext/rtree/rtree.h 834dbcb82dc85b2481cde6a07cdadfddc99e9b9e F ext/rtree/rtree1.test 42dadfc7b44a436cd74a1bebc0b9b689e4eaf7ec F ext/rtree/rtree2.test acbb3a4ce0f4fbc2c304d2b4b784cfa161856bba @@ -1552,7 +1552,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 7f8570208c06c056d426e9299d9930181a0464f8 -R bbb1f490f7311ec1c087c3ef4607e5e0 +P 510933cb24c5bf883265af3a6075e60a4b5ffa37 +R 13778722a0ec2443f74339612adbbcf9 U drh -Z 645035379b970696e5aed344f8a4be19 +Z b1e5b219635e474580d8132f194fff51 diff --git a/manifest.uuid b/manifest.uuid index 0823e8bab6..6e2e6a9d62 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -510933cb24c5bf883265af3a6075e60a4b5ffa37 \ No newline at end of file +82fcd54a5941c20895ffc22d8009c1ebdae44eda \ No newline at end of file