From: drh Date: Fri, 3 Jun 2016 01:01:57 +0000 (+0000) Subject: Performance optimizations on the CSV virtual table. Disallow WITHOUT ROWID X-Git-Tag: version-3.14.0~115^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ac9c3d2c1805e7266fe53b9dcb4d75a22fec0335;p=thirdparty%2Fsqlite.git Performance optimizations on the CSV virtual table. Disallow WITHOUT ROWID virtual tables that have an xUpdate method, for now. FossilOrigin-Name: 3134b3266c36c9d018e8d365ef46ef638c0792f4 --- diff --git a/ext/misc/csv.c b/ext/misc/csv.c index 343c866c95..c0d8ecd3fc 100644 --- a/ext/misc/csv.c +++ b/ext/misc/csv.c @@ -27,6 +27,17 @@ ** filename = "../http.log", ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" ** ); +** +** Instead of specifying a file, the text of the CSV can be loaded using +** the data= parameter. +** +** If the columns=N parameter is supplied, then the CSV file is assumed to have +** N columns. If the columns parameter is omitted, the CSV file is opened +** as soon as the virtual table is constructed and the first row of the CSV +** is read in order to count the tables. +** +** Some extra debugging features (used for testing virtual tables) are available +** if this module is compiled with -DSQLITE_TEST. */ #include SQLITE_EXTENSION_INIT1 @@ -202,42 +213,43 @@ static char *csv_read_one_field(CsvReader *p){ if( c=='"' ){ int pc, ppc; int startLine = p->nLine; - int cQuote = c; pc = ppc = 0; while( 1 ){ c = csv_getc(p); - if( c=='\n' ) p->nLine++; - if( c==cQuote ){ - if( pc==cQuote ){ - pc = 0; - continue; + if( c<='"' || pc=='"' ){ + if( c=='\n' ) p->nLine++; + if( c=='"' ){ + if( pc=='"' ){ + pc = 0; + continue; + } + } + if( (c==',' && pc=='"') + || (c=='\n' && pc=='"') + || (c=='\n' && pc=='\r' && ppc=='"') + || (c==EOF && pc=='"') + ){ + do{ p->n--; }while( p->z[p->n]!='"' ); + p->cTerm = c; + break; + } + if( pc=='"' && c!='\r' ){ + csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); + break; + } + if( c==EOF ){ + csv_errmsg(p, "line %d: unterminated %c-quoted field\n", + startLine, '"'); + p->cTerm = c; + break; } - } - if( (c==',' && pc==cQuote) - || (c=='\n' && pc==cQuote) - || (c=='\n' && pc=='\r' && ppc==cQuote) - || (c==EOF && pc==cQuote) - ){ - do{ p->n--; }while( p->z[p->n]!=cQuote ); - p->cTerm = c; - break; - } - if( pc==cQuote && c!='\r' ){ - csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote); - break; - } - if( c==EOF ){ - csv_errmsg(p, "line %d: unterminated %c-quoted field\n", - startLine, cQuote); - p->cTerm = c; - break; } if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ - while( c!=EOF && c!=',' && c!='\n' ){ + while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); } @@ -287,6 +299,7 @@ typedef struct CsvCursor { sqlite3_vtab_cursor base; /* Base class. Must be first */ CsvReader rdr; /* The CsvReader object */ char **azVal; /* Value of the current row */ + int *aLen; /* Length of each entry */ sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ } CsvCursor; @@ -410,6 +423,9 @@ static int csv_boolean(const char *z){ ** header=YES|NO First row of CSV defines the names of ** columns if "yes". Default "no". ** columns=N Assume the CSV file contains N columns. +** +** Only available if compiled with SQLITE_TEST: +** ** testflags=N Bitmask of test flags. Optional ** ** If schema= is omitted, then the columns are named "c0", "c1", "c2", @@ -428,7 +444,9 @@ static int csvtabConnect( int bHeader = -1; /* header= flags. -1 means not seen yet */ int rc = SQLITE_OK; /* Result code from this routine */ int i, j; /* Loop counters */ +#ifdef SQLITE_TEST int tstFlags = 0; /* Value for testflags=N parameter */ +#endif int nCol = -99; /* Value of the columns= parameter */ CsvReader sRdr; /* A CSV file reader used to store an error ** message and/or to count the number of columns */ @@ -469,9 +487,11 @@ static int csvtabConnect( goto csvtab_connect_error; } }else +#ifdef SQLITE_TEST if( (zValue = csv_parameter("testflags",9,z))!=0 ){ tstFlags = (unsigned int)atoi(zValue); }else +#endif if( (zValue = csv_parameter("columns",7,z))!=0 ){ if( nCol>0 ){ csv_errmsg(&sRdr, "more than one 'columns' parameter"); @@ -510,7 +530,9 @@ static int csvtabConnect( } pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; pNew->zData = CSV_DATA; CSV_DATA = 0; +#ifdef SQLITE_TEST pNew->tstFlags = tstFlags; +#endif pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; csv_reader_reset(&sRdr); if( CSV_SCHEMA==0 ){ @@ -557,6 +579,7 @@ static void csvtabCursorRowReset(CsvCursor *pCur){ for(i=0; inCol; i++){ sqlite3_free(pCur->azVal[i]); pCur->azVal[i] = 0; + pCur->aLen[i] = 0; } } @@ -591,10 +614,13 @@ static int csvtabClose(sqlite3_vtab_cursor *cur){ static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ CsvTable *pTab = (CsvTable*)p; CsvCursor *pCur; - pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); + size_t nByte; + nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; + pCur = sqlite3_malloc( nByte ); if( pCur==0 ) return SQLITE_NOMEM; - memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); + memset(pCur, 0, nByte); pCur->azVal = (char**)&pCur[1]; + pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; *ppCursor = &pCur->base; if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ csv_xfer_error(pTab, &pCur->rdr); @@ -613,23 +639,33 @@ static int csvtabNext(sqlite3_vtab_cursor *cur){ CsvTable *pTab = (CsvTable*)cur->pVtab; int i = 0; char *z; - csvtabCursorRowReset(pCur); do{ z = csv_read_one_field(&pCur->rdr); if( z==0 ){ csv_xfer_error(pTab, &pCur->rdr); break; } - z = sqlite3_mprintf("%s", z); - if( z==0 ){ - csv_errmsg(&pCur->rdr, "out of memory"); - csv_xfer_error(pTab, &pCur->rdr); - break; - } if( inCol ){ - pCur->azVal[i++] = z; + if( pCur->aLen[i] < pCur->rdr.n+1 ){ + char *zNew = sqlite3_realloc(pCur->azVal[i], pCur->rdr.n+1); + if( zNew==0 ){ + csv_errmsg(&pCur->rdr, "out of memory"); + csv_xfer_error(pTab, &pCur->rdr); + break; + } + pCur->azVal[i] = zNew; + pCur->aLen[i] = pCur->rdr.n+1; + } + memcpy(pCur->azVal[i], z, pCur->rdr.n+1); + i++; } - }while( z!=0 && pCur->rdr.cTerm==',' ); + }while( pCur->rdr.cTerm==',' ); + while( inCol ){ + sqlite3_free(pCur->azVal[i]); + pCur->azVal[i] = 0; + pCur->aLen[i] = 0; + i++; + } if( z==0 || pCur->rdr.cTerm==EOF ){ pCur->iRowid = -1; }else{ @@ -707,37 +743,37 @@ static int csvtabBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ - CsvTable *pTab = (CsvTable*)tab; - int i; - int nConst = 0; pIdxInfo->estimatedCost = 1000000; - if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){ - return SQLITE_OK; - } - /* The usual (and sensible) case is to take the "return SQLITE_OK" above. - ** The code below only runs when testflags=1. The code below - ** generates an artifical and unrealistic plan which is useful - ** for testing virtual table logic but is not helpfulto real applications. - ** - ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual - ** table (even though it is not) and the cost of running the virtual table - ** is reduced from 1 million to just 10. The constraints are *not* marked - ** as omittable, however, so the query planner should still generate a - ** plan that gives a correct answer, even if they plan is not optimal. - */ - for(i=0; inConstraint; i++){ - unsigned char op; - if( pIdxInfo->aConstraint[i].usable==0 ) continue; - op = pIdxInfo->aConstraint[i].op; - if( op==SQLITE_INDEX_CONSTRAINT_EQ - || op==SQLITE_INDEX_CONSTRAINT_LIKE - || op==SQLITE_INDEX_CONSTRAINT_GLOB - ){ - pIdxInfo->estimatedCost = 10; - pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; - nConst++; +#ifdef SQLITE_TEST + if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ + /* The usual (and sensible) case is to always do a full table scan. + ** The code in this branch only runs when testflags=1. This code + ** generates an artifical and unrealistic plan which is useful + ** for testing virtual table logic but is not helpful to real applications. + ** + ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual + ** table (even though it is not) and the cost of running the virtual table + ** is reduced from 1 million to just 10. The constraints are *not* marked + ** as omittable, however, so the query planner should still generate a + ** plan that gives a correct answer, even if they plan is not optimal. + */ + int i; + int nConst = 0; + for(i=0; inConstraint; i++){ + unsigned char op; + if( pIdxInfo->aConstraint[i].usable==0 ) continue; + op = pIdxInfo->aConstraint[i].op; + if( op==SQLITE_INDEX_CONSTRAINT_EQ + || op==SQLITE_INDEX_CONSTRAINT_LIKE + || op==SQLITE_INDEX_CONSTRAINT_GLOB + ){ + pIdxInfo->estimatedCost = 10; + pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; + nConst++; + } } } +#endif return SQLITE_OK; } @@ -765,6 +801,41 @@ static sqlite3_module CsvModule = { 0, /* xRename */ }; +#ifdef SQLITE_TEST +/* +** For virtual table testing, make a version of the CSV virtual table +** available that has an xUpdate function. But the xUpdate always returns +** SQLITE_READONLY since the CSV file is not really writable. +*/ +static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ + return SQLITE_READONLY; +} +static sqlite3_module CsvModuleFauxWrite = { + 0, /* iVersion */ + csvtabCreate, /* xCreate */ + csvtabConnect, /* xConnect */ + csvtabBestIndex, /* xBestIndex */ + csvtabDisconnect, /* xDisconnect */ + csvtabDisconnect, /* xDestroy */ + csvtabOpen, /* xOpen - open a cursor */ + csvtabClose, /* xClose - close a cursor */ + csvtabFilter, /* xFilter - configure scan constraints */ + csvtabNext, /* xNext - advance a cursor */ + csvtabEof, /* xEof - check for end of scan */ + csvtabColumn, /* xColumn - read data */ + csvtabRowid, /* xRowid - read data */ + csvtabUpdate, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; +#endif /* SQLITE_TEST */ + + + #ifdef _WIN32 __declspec(dllexport) #endif @@ -778,6 +849,13 @@ int sqlite3_csv_init( char **pzErrMsg, const sqlite3_api_routines *pApi ){ + int rc; SQLITE_EXTENSION_INIT2(pApi); - return sqlite3_create_module(db, "csv", &CsvModule, 0); + rc = sqlite3_create_module(db, "csv", &CsvModule, 0); +#ifdef SQLITE_TEST + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); + } +#endif + return rc; } diff --git a/manifest b/manifest index 2da7d447f7..21eb765e24 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sCSV\sextension\sto\sthe\stest\sfixture.\s\sFix\sa\smemory\sleak\sin\sthe\sCSV\nextension.\s\sAdd\stest\scases\sfor\sthe\sCSV\sextension,\sincluding\sone\sthat\suses\na\sWITHOUT\sROWID\svirtual\stable\sparticipating\sin\sthe\sOR\soptimization. -D 2016-06-02T23:13:21.510 +C Performance\soptimizations\son\sthe\sCSV\svirtual\stable.\s\sDisallow\sWITHOUT\sROWID\nvirtual\stables\sthat\shave\san\sxUpdate\smethod,\sfor\snow. +D 2016-06-03T01:01:57.592 F Makefile.in 7321ef0b584224781ec7731408857fa8962c32cc F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 831503fc4e988f571590af1405645fff121b5f1e @@ -206,7 +206,7 @@ F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 F ext/misc/amatch.c 211108e201105e4bb0c076527b8cfd34330fc234 F ext/misc/closure.c 0d2a038df8fbae7f19de42e7c7d71f2e4dc88704 F ext/misc/compress.c 122faa92d25033d6c3f07c39231de074ab3d2e83 -F ext/misc/csv.c 58ad4e9eb25310a2712e0cb78592dc15924f0379 +F ext/misc/csv.c f01126ba170fd4ef7c752b156568a80c912d4441 F ext/misc/eval.c f971962e92ebb8b0a4e6b62949463ee454d88fa2 F ext/misc/fileio.c d4171c815d6543a9edef8308aab2951413cd8d0f F ext/misc/fuzzer.c 7c64b8197bb77b7d64eff7cac7848870235d4c25 @@ -456,7 +456,7 @@ F src/vdbeblob.c c9f2f494b911c6fa34efd9803f0a10807da80f77 F src/vdbemem.c 5cfef60e60e19cab6275d1b975bf4c791d575beb F src/vdbesort.c 91fda3909326860382b0ca8aa251e609c6a9d62c F src/vdbetrace.c f75c5455d8cf389ef86a8bfdfd3177e0e3692484 -F src/vtab.c a80b4e40ed8687daa3303e970d938b1f613a3eeb +F src/vtab.c a9e8175477539660814a4704e8eaba6df3678651 F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 02eeecc265f6ffd0597378f5d8ae9070b62a406a F src/wal.h 2f7c831cf3b071fa548bf2d5cac640846a7ff19c @@ -612,7 +612,7 @@ F test/crashM.test d95f59046fa749b0d0822edf18a717788c8f318d F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2 F test/createtab.test b5de160630b209c4b8925bdcbbaf48cc90b67fe8 F test/cse.test 277350a26264495e86b1785f34d2d0c8600e021c -F test/csv01.test 44a9786f6cb4dcf40f3d5d59844dbb88d2265e5d +F test/csv01.test 69aecc17f11f597390b47700aec8d748b8364140 F test/ctime.test 7bd009071e242aac4f18521581536b652b789a47 F test/cursorhint.test 7bc346788390475e77a345da2b92270d04d35856 F test/date.test 984ac1e3e5e031386866f034006148d3972b4a65 @@ -1498,7 +1498,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 769191042aa14e6eccdfe2391fc1011171d5c9ad -R 7cb009be35227902926fe0d852282f91 +P 95f483e86e30ae68108904400e18ed41d389446b +R 110a9ed97c9cb7052ae396116c46b04d U drh -Z 7f2a8b3dbd34984ea991a68c56b4587b +Z 863bf523824cd922d795a07ca8451a6d diff --git a/manifest.uuid b/manifest.uuid index 816aa44fdf..935a081c06 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -95f483e86e30ae68108904400e18ed41d389446b \ No newline at end of file +3134b3266c36c9d018e8d365ef46ef638c0792f4 \ No newline at end of file diff --git a/src/vtab.c b/src/vtab.c index b4268536a0..30cdd43e4b 100644 --- a/src/vtab.c +++ b/src/vtab.c @@ -762,6 +762,9 @@ int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){ pNew->nCol = 0; pNew->aCol = 0; assert( pTab->pIndex==0 ); + if( !HasRowid(pNew) && pCtx->pVTable->pMod->pModule->xUpdate!=0 ){ + rc = SQLITE_ERROR; + } pIdx = pNew->pIndex; if( pIdx ){ assert( pIdx->pNext==0 ); diff --git a/test/csv01.test b/test/csv01.test index aaec857992..ba3a947dfc 100644 --- a/test/csv01.test +++ b/test/csv01.test @@ -88,4 +88,20 @@ do_execsql_test 3.1 { SELECT a FROM t3 WHERE +b=6 OR c=7 OR d=12 ORDER BY +a; } {5 9} +do_catchsql_test 4.0 { + DROP TABLE t3; + CREATE VIRTUAL TABLE temp.t4 USING csv_wr( + data= +'1,2,3,4 +5,6,7,8 +9,10,11,12 +13,14,15,16 +', + columns=4, + schema= + 'CREATE TABLE t3(a PRIMARY KEY,b TEXT,c TEXT,d TEXT) WITHOUT ROWID', + testflags=1 + ); +} {1 {vtable constructor failed: t4}} + finish_test