From: drh Date: Mon, 22 Sep 2014 14:30:51 +0000 (+0000) Subject: Tune the query planner to be more aggressive about using automatic indexes X-Git-Tag: version-3.8.7~84 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7e07433fe72598b932a2115d2774a98e12f50789;p=thirdparty%2Fsqlite.git Tune the query planner to be more aggressive about using automatic indexes on views and subqueries for which there is not opportunity to declare a persistent schema index. FossilOrigin-Name: 41de1643bfc9ae25e20790d707e2789b665baa2b --- diff --git a/manifest b/manifest index b51e6af8f7..926eedc817 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Disable\sshared\smemory\soperations\susing\sthe\sunix-nolock\sVFS. -D 2014-09-22T03:22:27.824 +C Tune\sthe\squery\splanner\sto\sbe\smore\saggressive\sabout\susing\sautomatic\sindexes\non\sviews\sand\ssubqueries\sfor\swhich\sthere\sis\snot\sopportunity\sto\sdeclare\sa\npersistent\sschema\sindex. +D 2014-09-22T14:30:51.911 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -302,7 +302,7 @@ F src/vtab.c 019dbfd0406a7447c990e1f7bd1dfcdb8895697f F src/wal.c 10e7de7ce90865a68153f001a61f1d985cd17983 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c c253b95b4ee44b21c406e2a1052636c31ea27804 -F src/where.c 0888567c0e01a41b6001647e333f8ccfd3ae7d36 +F src/where.c 3f859ecfada8643ce8255f20d481b5a3e0921662 F src/whereInt.h 124d970450955a6982e174b07c320ae6d62a595c F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -341,7 +341,7 @@ F test/auth.test 855233ef26eb3601b6886567ea4e326c72959360 F test/auth2.test 264c6af53cad9aba5218c68bbe18036e39007bfa F test/auth3.test 5cfa94ed90c6617c42b7ba4b133fd79678b251c7 F test/autoinc.test c58912526998a39e11f66b533e23cfabea7f25b7 -F test/autoindex1.test 762ff3f8e25d852aae55c6462ca166a80c0cde61 +F test/autoindex1.test 6ff78b94f43a59616c06c11c55b12935173506d7 F test/autoindex2.test 60d2fc6f38364308ce73a9beb01b47ded38697de F test/autoindex3.test 8254f689c3241081fad52b7bea18ba53e07e14a2 F test/autovacuum.test 941892505d2c0f410a0cb5970dfa1c7c4e5f6e74 @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d5880abd63c83c88e135257373afa0a3fd88297e -R 78a2249f9b04bb5d20ea2c24b3aed300 +P 10a6e510497b471d67ac3dfb19ff256a7d18adf4 +R f6ce79b0bd0352edf4a2c65db2a400a7 U drh -Z 8d87b3613ef02056468b7e3fc8779d9f +Z b1246ec5b6570c51d45a58350d46840c diff --git a/manifest.uuid b/manifest.uuid index 87021972ad..a03afa027d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -10a6e510497b471d67ac3dfb19ff256a7d18adf4 \ No newline at end of file +41de1643bfc9ae25e20790d707e2789b665baa2b \ No newline at end of file diff --git a/src/where.c b/src/where.c index 318065d486..d1862b499a 100644 --- a/src/where.c +++ b/src/where.c @@ -4718,9 +4718,17 @@ static int whereLoopAddBtree( pNew->nLTerm = 1; pNew->aLTerm[0] = pTerm; /* TUNING: One-time cost for computing the automatic index is - ** approximately 7*N*log2(N) where N is the number of rows in - ** the table being indexed. */ - pNew->rSetup = rLogSize + rSize + 28; assert( 28==sqlite3LogEst(7) ); + ** estimated to be X*N*log2(N) where N is the number of rows in + ** the table being indexed and where X is 7 (LogEst=28) for normal + ** tables or 1.375 (LogEst=4) for views and subqueries. The value + ** of X is smaller for views and subqueries so that the query planner + ** will be more aggressive about generating automatic indexes for + ** those objects, since there is no opportunity to add schema + ** indexes on subqueries and views. */ + pNew->rSetup = rLogSize + rSize + 4; + if( pTab->pSelect==0 && (pTab->tabFlags & TF_Ephemeral)==0 ){ + pNew->rSetup += 24; + } ApplyCostMultiplier(pNew->rSetup, pTab->costMult); /* TUNING: Each index lookup yields 20 rows in the table. This ** is more than the usual guess of 10 rows, since we have no way diff --git a/test/autoindex1.test b/test/autoindex1.test index 6cb0ab146a..bcde5bc2e7 100644 --- a/test/autoindex1.test +++ b/test/autoindex1.test @@ -413,4 +413,101 @@ do_execsql_test autoindex1-801 { WHERE mimetypes._id=10 AND data14 IS NOT NULL; } {/SEARCH TABLE data .*SEARCH TABLE raw_contacts/} +# Another test case from an important user of SQLite. The key feature of +# this test is that the "aggindex" subquery should make use of an +# automatic index. If it does, the query is fast. If it does not, the +# query is deathly slow. It worked OK in 3.7.17 but started going slow +# with version 3.8.0. The problem was fixed for 3.8.7 by reducing the +# cost estimate for automatic indexes on views and subqueries. +# +db close +forcedelete test.db +sqlite3 db test.db +do_execsql_test autoindex1-900 { + CREATE TABLE messages (ROWID INTEGER PRIMARY KEY AUTOINCREMENT, message_id, document_id BLOB, in_reply_to, remote_id INTEGER, sender INTEGER, subject_prefix, subject INTEGER, date_sent INTEGER, date_received INTEGER, date_created INTEGER, date_last_viewed INTEGER, mailbox INTEGER, remote_mailbox INTEGER, original_mailbox INTEGER, flags INTEGER, read, flagged, size INTEGER, color, encoding, type INTEGER, pad, conversation_id INTEGER DEFAULT -1, snippet TEXT DEFAULT NULL, fuzzy_ancestor INTEGER DEFAULT NULL, automated_conversation INTEGER DEFAULT 0, root_status INTEGER DEFAULT -1, conversation_position INTEGER DEFAULT -1); + CREATE INDEX date_index ON messages(date_received); + CREATE INDEX date_last_viewed_index ON messages(date_last_viewed); + CREATE INDEX date_created_index ON messages(date_created); + CREATE INDEX message_message_id_mailbox_index ON messages(message_id, mailbox); + CREATE INDEX message_document_id_index ON messages(document_id); + CREATE INDEX message_read_index ON messages(read); + CREATE INDEX message_flagged_index ON messages(flagged); + CREATE INDEX message_mailbox_index ON messages(mailbox, date_received); + CREATE INDEX message_remote_mailbox_index ON messages(remote_mailbox, remote_id); + CREATE INDEX message_type_index ON messages(type); + CREATE INDEX message_conversation_id_conversation_position_index ON messages(conversation_id, conversation_position); + CREATE INDEX message_fuzzy_ancestor_index ON messages(fuzzy_ancestor); + CREATE INDEX message_subject_fuzzy_ancestor_index ON messages(subject, fuzzy_ancestor); + CREATE INDEX message_sender_subject_automated_conversation_index ON messages(sender, subject, automated_conversation); + CREATE INDEX message_sender_index ON messages(sender); + CREATE INDEX message_root_status ON messages(root_status); + CREATE TABLE subjects (ROWID INTEGER PRIMARY KEY, subject COLLATE RTRIM, normalized_subject COLLATE RTRIM); + CREATE INDEX subject_subject_index ON subjects(subject); + CREATE INDEX subject_normalized_subject_index ON subjects(normalized_subject); + CREATE TABLE addresses (ROWID INTEGER PRIMARY KEY, address COLLATE NOCASE, comment, UNIQUE(address, comment)); + CREATE INDEX addresses_address_index ON addresses(address); + CREATE TABLE mailboxes (ROWID INTEGER PRIMARY KEY, url UNIQUE, total_count INTEGER DEFAULT 0, unread_count INTEGER DEFAULT 0, unseen_count INTEGER DEFAULT 0, deleted_count INTEGER DEFAULT 0, unread_count_adjusted_for_duplicates INTEGER DEFAULT 0, change_identifier, source INTEGER, alleged_change_identifier); + CREATE INDEX mailboxes_source_index ON mailboxes(source); + CREATE TABLE labels (ROWID INTEGER PRIMARY KEY, message_id INTEGER NOT NULL, mailbox_id INTEGER NOT NULL, UNIQUE(message_id, mailbox_id)); + CREATE INDEX labels_message_id_mailbox_id_index ON labels(message_id, mailbox_id); + CREATE INDEX labels_mailbox_id_index ON labels(mailbox_id); + + explain query plan + SELECT messages.ROWID, + messages.message_id, + messages.remote_id, + messages.date_received, + messages.date_sent, + messages.flags, + messages.size, + messages.color, + messages.date_last_viewed, + messages.subject_prefix, + subjects.subject, + sender.comment, + sender.address, + NULL, + messages.mailbox, + messages.original_mailbox, + NULL, + NULL, + messages.type, + messages.document_id, + sender, + NULL, + messages.conversation_id, + messages.conversation_position, + agglabels.labels + FROM mailboxes AS mailbox + JOIN messages ON mailbox.ROWID = messages.mailbox + LEFT OUTER JOIN subjects ON messages.subject = subjects.ROWID + LEFT OUTER JOIN addresses AS sender ON messages.sender = sender.ROWID + LEFT OUTER JOIN ( + SELECT message_id, group_concat(mailbox_id) as labels + FROM labels GROUP BY message_id + ) AS agglabels ON messages.ROWID = agglabels.message_id + WHERE (mailbox.url = 'imap://email.app@imap.gmail.com/%5BGmail%5D/All%20Mail') + AND (messages.ROWID IN ( + SELECT labels.message_id + FROM labels JOIN mailboxes ON labels.mailbox_id = mailboxes.ROWID + WHERE mailboxes.url = 'imap://email.app@imap.gmail.com/INBOX')) + AND messages.mailbox in (6,12,18,24,30,36,42,1,7,13,19,25,31,37,43,2,8, + 14,20,26,32,38,3,9,15,21,27,33,39,4,10,16,22,28, + 34,40,5,11,17,23,35,41) + ORDER BY date_received DESC; +} {/agglabels USING AUTOMATIC COVERING INDEX/} + +# A test case for VIEWs +# +do_execsql_test autoindex1-901 { + CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z); + CREATE TABLE t2(a, b); + CREATE VIEW agg2 AS SELECT a, sum(b) AS m FROM t2 GROUP BY a; + EXPLAIN QUERY PLAN + SELECT t1.z, agg2.m + FROM t1 JOIN agg2 ON t1.y=agg2.m + WHERE t1.x IN (1,2,3); +} {/USING AUTOMATIC COVERING INDEX/} + + finish_test