From: drh Date: Tue, 31 May 2016 16:22:48 +0000 (+0000) Subject: Add the "csv" virtual table for reading CSV files, as an extension in X-Git-Tag: version-3.14.0~121 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=724b18966deb18ae6b1559c31de419e13fea465d;p=thirdparty%2Fsqlite.git Add the "csv" virtual table for reading CSV files, as an extension in the ext/misc/ subfolder. FossilOrigin-Name: 00d3570c8bb96469c984903e20de589e998d4636 --- diff --git a/ext/misc/csv.c b/ext/misc/csv.c new file mode 100644 index 0000000000..62521d81f7 --- /dev/null +++ b/ext/misc/csv.c @@ -0,0 +1,622 @@ +/* +** 2016-05-28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains the implementation of an SQLite virtual table for +** reading CSV files. +** +** Usage: +** +** .load ./csv +** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); +** SELECT * FROM csv; +** +** The columns are named "c1", "c2", "c3", ... by default. But the +** application can define its own CREATE TABLE statement as an additional +** parameter. For example: +** +** CREATE VIRTUAL TABLE temp.csv2 USING csv( +** filename = "../http.log", +** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" +** ); +*/ +#include +SQLITE_EXTENSION_INIT1 +#include +#include +#include +#include +#include +#include + +/* +** A macro to hint to the compiler that a function should not be +** inlined. +*/ +#if defined(__GNUC__) +# define CSV_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && _MSC_VER>=1310 +# define CSV_NOINLINE __declspec(noinline) +#else +# define CSV_NOINLINE +#endif + + +/* Max size of the error message in a CsvReader */ +#define CSV_MXERR 200 + +/* A context object used when read a CSV file. */ +typedef struct CsvReader CsvReader; +struct CsvReader { + FILE *in; /* Read the CSV text from this input stream */ + char *z; /* Accumulated text for a field */ + int n; /* Number of bytes in z */ + int nAlloc; /* Space allocated for z[] */ + int nLine; /* Current line number */ + int cTerm; /* Character that terminated the most recent field */ + char zErr[CSV_MXERR]; /* Error message */ +}; + +/* Initialize a CsvReader object */ +static void csv_reader_init(CsvReader *p){ + memset(p, 0, sizeof(*p)); +} + +/* Close and reset a CsvReader object */ +static void csv_reader_reset(CsvReader *p){ + if( p->in ) fclose(p->in); + sqlite3_free(p->z); + csv_reader_init(p); +} + +/* Report an error on a CsvReader */ +static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ + va_list ap; + va_start(ap, zFormat); + sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); + va_end(ap); +} + +/* Open the file associated with a CsvReader +** Return the number of errors. +*/ +static int csv_reader_open(CsvReader *p, const char *zFilename){ + p->in = fopen(zFilename, "rb"); + if( p->in==0 ){ + csv_errmsg(p, "cannot open '%s' for reading", zFilename); + return 1; + } + return 0; +} + +/* Increase the size of p->z and append character c to the end. +** Return 0 on success and non-zero if there is an OOM error */ +static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ + char *zNew; + int nNew = p->nAlloc*2 + 100; + zNew = sqlite3_realloc64(p->z, nNew); + if( zNew ){ + p->z = zNew; + p->nAlloc = nNew; + p->z[p->n++] = c; + return 0; + }else{ + csv_errmsg(p, "out of memory"); + return 1; + } +} + +/* Append a single character to the CsvReader.z[] array. +** Return 0 on success and non-zero if there is an OOM error */ +static int csv_append(CsvReader *p, char c){ + if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); + p->z[p->n++] = c; + return 0; +} + +/* Read a single field of CSV text. Compatible with rfc4180 and extended +** with the option of having a separator other than ",". +** +** + Input comes from p->in. +** + Store results in p->z of length p->n. Space to hold p->z comes +** from sqlite3_malloc64(). +** + Keep track of the line number in p->nLine. +** + Store the character that terminates the field in p->cTerm. Store +** EOF on end-of-file. +** +** Return "" at EOF. Return 0 on an OOM error. +*/ +static char *csv_read_one_field(CsvReader *p){ + int c; + p->n = 0; + c = fgetc(p->in); + if( c==EOF ){ + p->cTerm = EOF; + return ""; + } + if( c=='"' ){ + int pc, ppc; + int startLine = p->nLine; + int cQuote = c; + pc = ppc = 0; + while( 1 ){ + c = fgetc(p->in); + if( c=='\n' ) p->nLine++; + if( c==cQuote ){ + if( pc==cQuote ){ + pc = 0; + continue; + } + } + if( (c==',' && pc==cQuote) + || (c=='\n' && pc==cQuote) + || (c=='\n' && pc=='\r' && ppc==cQuote) + || (c==EOF && pc==cQuote) + ){ + do{ p->n--; }while( p->z[p->n]!=cQuote ); + p->cTerm = c; + break; + } + if( pc==cQuote && c!='\r' ){ + csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote); + break; + } + if( c==EOF ){ + csv_errmsg(p, "line %d: unterminated %c-quoted field\n", + startLine, cQuote); + p->cTerm = c; + break; + } + if( csv_append(p, (char)c) ) return 0; + ppc = pc; + pc = c; + } + }else{ + while( c!=EOF && c!=',' && c!='\n' ){ + if( csv_append(p, (char)c) ) return 0; + c = fgetc(p->in); + } + if( c=='\n' ){ + p->nLine++; + if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; + } + p->cTerm = c; + } + if( p->z ) p->z[p->n] = 0; + return p->z; +} + + +/* Forward references to the various virtual table methods implemented +** in this file. */ +static int csvtabCreate(sqlite3*, void*, int, const char*const*, + sqlite3_vtab**,char**); +static int csvtabConnect(sqlite3*, void*, int, const char*const*, + sqlite3_vtab**,char**); +static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); +static int csvtabDisconnect(sqlite3_vtab*); +static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); +static int csvtabClose(sqlite3_vtab_cursor*); +static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, + int argc, sqlite3_value **argv); +static int csvtabNext(sqlite3_vtab_cursor*); +static int csvtabEof(sqlite3_vtab_cursor*); +static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); +static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); + +/* An instance of the CSV virtual table */ +typedef struct CsvTable { + sqlite3_vtab base; /* Base class. Must be first */ + char *zFilename; /* Name of the CSV file */ + long iStart; /* Offset to start of data in zFilename */ + int nCol; /* Number of columns in the CSV file */ +} CsvTable; + +/* A cursor for the CSV virtual table */ +typedef struct CsvCursor { + sqlite3_vtab_cursor base; /* Base class. Must be first */ + CsvReader rdr; /* The CsvReader object */ + char **azVal; /* Value of the current row */ + sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ +} CsvCursor; + +/* Transfer error message text from a reader into a CsvTable */ +static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ + sqlite3_free(pTab->base.zErrMsg); + pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); +} + +/* +** This method is the destructor fo a CsvTable object. +*/ +static int csvtabDisconnect(sqlite3_vtab *pVtab){ + CsvTable *p = (CsvTable*)pVtab; + sqlite3_free(p->zFilename); + sqlite3_free(p); + return SQLITE_OK; +} + +/* Skip leading whitespace. Return a pointer to the first non-whitespace +** character, or to the zero terminator if the string has only whitespace */ +static const char *csv_skip_whitespace(const char *z){ + while( isspace((unsigned char)z[0]) ) z++; + return z; +} + +/* Remove trailing whitespace from the end of string z[] */ +static void csv_trim_whitespace(char *z){ + size_t n = strlen(z); + while( n>0 && isspace((unsigned char)z[n]) ) n--; + z[n] = 0; +} + +/* Dequote the string */ +static void csv_dequote(char *z){ + int i, j; + char cQuote = z[0]; + size_t n; + + if( cQuote!='\'' && cQuote!='"' ) return; + n = strlen(z); + if( n<2 || z[n-1]!=z[0] ) return; + for(i=1, j=0; i=0 ){ + csv_errmsg(&sRdr, "more than one 'header' parameter"); + goto csvtab_connect_error; + } + x = csv_boolean(zValue); + if( x==1 ){ + bHeader = 1; + }else if( x==0 ){ + bHeader = 0; + }else{ + csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); + goto csvtab_connect_error; + } + }else + { + csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); + goto csvtab_connect_error; + } + } + if( zFilename==0 ){ + csv_errmsg(&sRdr, "missing 'filename' parameter"); + goto csvtab_connect_error; + } + if( csv_reader_open(&sRdr, zFilename) ){ + goto csvtab_connect_error; + } + pNew = sqlite3_malloc( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) goto csvtab_connect_oom; + memset(pNew, 0, sizeof(*pNew)); + do{ + const char *z = csv_read_one_field(&sRdr); + if( z==0 ) goto csvtab_connect_oom; + pNew->nCol++; + }while( sRdr.cTerm==',' ); + pNew->zFilename = zFilename; + zFilename = 0; + pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; + csv_reader_reset(&sRdr); + if( zSchema==0 ){ + char *zSep = ""; + zSchema = sqlite3_mprintf("CREATE TABLE x("); + if( zSchema==0 ) goto csvtab_connect_oom; + for(i=0; inCol; i++){ + zSchema = sqlite3_mprintf("%z%sc%d TEXT",zSchema, zSep, i); + zSep = ","; + } + zSchema = sqlite3_mprintf("%z);", zSchema); + } + rc = sqlite3_declare_vtab(db, zSchema); + if( rc ) goto csvtab_connect_error; + sqlite3_free(zSchema); + return SQLITE_OK; + +csvtab_connect_oom: + rc = SQLITE_NOMEM; + csv_errmsg(&sRdr, "out of memory"); + +csvtab_connect_error: + if( pNew ) csvtabDisconnect(&pNew->base); + sqlite3_free(zFilename); + sqlite3_free(zSchema); + if( sRdr.zErr[0] ){ + sqlite3_free(*pzErr); + *pzErr = sqlite3_mprintf("%s", sRdr.zErr); + } + csv_reader_reset(&sRdr); + return rc; +} + +/* +** Reset the current row content held by a CsvCursor. +*/ +static void csvtabCursorRowReset(CsvCursor *pCur){ + CsvTable *pTab = (CsvTable*)pCur->base.pVtab; + int i; + for(i=0; inCol; i++){ + sqlite3_free(pCur->azVal[i]); + pCur->azVal[i] = 0; + } +} + +/* +** The xConnect and xCreate methods do the same thing, but they must be +** different so that the virtual table is not an eponymous virtual table. +*/ +static int csvtabCreate( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Destructor for a CsvCursor. +*/ +static int csvtabClose(sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + csvtabCursorRowReset(pCur); + csv_reader_reset(&pCur->rdr); + sqlite3_free(cur); + return SQLITE_OK; +} + +/* +** Constructor for a new CsvTable cursor object. +*/ +static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + CsvTable *pTab = (CsvTable*)p; + CsvCursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); + pCur->azVal = (char**)&pCur[1]; + *ppCursor = &pCur->base; + if( csv_reader_open(&pCur->rdr, pTab->zFilename) ){ + csv_xfer_error(pTab, &pCur->rdr); + return SQLITE_ERROR; + } + return SQLITE_OK; +} + + +/* +** Advance a CsvCursor to its next row of input. +** Set the EOF marker if we reach the end of input. +*/ +static int csvtabNext(sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + CsvTable *pTab = (CsvTable*)cur->pVtab; + int i = 0; + char *z; + csvtabCursorRowReset(pCur); + do{ + z = csv_read_one_field(&pCur->rdr); + if( z==0 ){ + csv_xfer_error(pTab, &pCur->rdr); + break; + } + z = sqlite3_mprintf("%s", z); + if( z==0 ){ + csv_errmsg(&pCur->rdr, "out of memory"); + csv_xfer_error(pTab, &pCur->rdr); + break; + } + if( inCol ){ + pCur->azVal[i++] = z; + } + }while( z!=0 && pCur->rdr.cTerm==',' ); + if( z==0 || pCur->rdr.cTerm==EOF ){ + pCur->iRowid = -1; + }else{ + pCur->iRowid++; + } + return SQLITE_OK; +} + +/* +** Return values of columns for the row at which the CsvCursor +** is currently pointing. +*/ +static int csvtabColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + CsvCursor *pCur = (CsvCursor*)cur; + CsvTable *pTab = (CsvTable*)cur->pVtab; + if( i>=0 && inCol && pCur->azVal[i]!=0 ){ + sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. +*/ +static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + CsvCursor *pCur = (CsvCursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int csvtabEof(sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + return pCur->iRowid<0; +} + +/* +** Only a full table scan is supported. So xFilter simply rewinds to +** the beginning. +*/ +static int csvtabFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + CsvCursor *pCur = (CsvCursor*)pVtabCursor; + CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; + pCur->iRowid = 0; + fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); + return csvtabNext(pVtabCursor); +} + +/* +** Only a forwards full table scan is supported. xBestIndex is a no-op. +*/ +static int csvtabBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + return SQLITE_OK; +} + + +static sqlite3_module CsvModule = { + 0, /* iVersion */ + csvtabCreate, /* xCreate */ + csvtabConnect, /* xConnect */ + csvtabBestIndex, /* xBestIndex */ + csvtabDisconnect, /* xDisconnect */ + csvtabDisconnect, /* xDestroy */ + csvtabOpen, /* xOpen - open a cursor */ + csvtabClose, /* xClose - close a cursor */ + csvtabFilter, /* xFilter - configure scan constraints */ + csvtabNext, /* xNext - advance a cursor */ + csvtabEof, /* xEof - check for end of scan */ + csvtabColumn, /* xColumn - read data */ + csvtabRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; + +#ifdef _WIN32 +__declspec(dllexport) +#endif +/* +** This routine is called when the extension is loaded. The new +** CSV virtual table module is registered with the calling database +** connection. +*/ +int sqlite3_csv_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi); + return sqlite3_create_module(db, "csv", &CsvModule, 0); +} diff --git a/manifest b/manifest index ce51d87c84..e2d03ea381 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sFTS5\sproblem\s(segfault\sor\sincorrect\squery\sresults)\swith\s"...\sMATCH\s'x\sOR\sy'\sORDER\sBY\srank"\squeries\swhen\seither\stoken\s'x'\sor\s'y'\sis\scompletely\sabsent\sfrom\sthe\sdataset. -D 2016-05-30T08:28:21.370 +C Add\sthe\s"csv"\svirtual\stable\sfor\sreading\sCSV\sfiles,\sas\san\sextension\sin\nthe\sext/misc/\ssubfolder. +D 2016-05-31T16:22:48.849 F Makefile.in f59e0763ff448719fc1bd25513882b0567286317 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 306d73e854b1a92ea06e5d1e637faa5c44de53c7 @@ -206,6 +206,7 @@ F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 F ext/misc/amatch.c 211108e201105e4bb0c076527b8cfd34330fc234 F ext/misc/closure.c 0d2a038df8fbae7f19de42e7c7d71f2e4dc88704 F ext/misc/compress.c 122faa92d25033d6c3f07c39231de074ab3d2e83 +F ext/misc/csv.c 386ea82a7eeac9850000b43913a20453bc816a70 F ext/misc/eval.c f971962e92ebb8b0a4e6b62949463ee454d88fa2 F ext/misc/fileio.c d4171c815d6543a9edef8308aab2951413cd8d0f F ext/misc/fuzzer.c 7c64b8197bb77b7d64eff7cac7848870235d4c25 @@ -1496,7 +1497,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 24f258c2392290168cf34622b89a4a406a3dd853 -R 09bf6149ab89919173c2bf6834d1442a -U dan -Z 067d36e8e3bb6a90972bf218c0cabd70 +P 64ca1a835a89fd211078d2cd8f9b649e89be528d +R b775d969691f2ff7971a8ef71faf9f8c +U drh +Z 58655b63021cdf0ee9fb657659454c01 diff --git a/manifest.uuid b/manifest.uuid index 15daff8194..649908c68d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -64ca1a835a89fd211078d2cd8f9b649e89be528d \ No newline at end of file +00d3570c8bb96469c984903e20de589e998d4636 \ No newline at end of file