sb->s[0] = '\0';
}
-void append(StringBuffer *sb, const char *zFrom){
- int nFrom = strlen(zFrom);
+void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
if( sb->len + nFrom >= sb->alloced ){
sb->alloced = sb->len + nFrom + 100;
sb->s = realloc(sb->s, sb->alloced+1);
return;
}
}
- strcpy(sb->s + sb->len, zFrom);
+ memcpy(sb->s + sb->len, zFrom, nFrom);
sb->len += nFrom;
+ sb->s[sb->len] = 0;
+}
+void append(StringBuffer *sb, const char *zFrom){
+ nappend(sb, zFrom, strlen(zFrom));
}
/* We encode variable-length integers in little-endian order using seven bits
int nMatch; /* Total number of matches */
int nAlloc; /* Space allocated for aMatch[] */
struct snippetMatch { /* One entry for each matching term */
- char exemplar; /* True if this match should be shown in the snippet */
+ char snStatus; /* Status flag for use while constructing snippets */
short int iCol; /* The column that contains the match */
short int iTerm; /* The index in Query.pTerms[] of the matching term */
short int nByte; /* Number of bytes in the term */
- short int nContext; /* Number of bytes of context for this match */
int iStart; /* The offset to the first character of the term */
- int iContext; /* Start of the context */
} *aMatch; /* Points to space obtained from malloc */
char *zOffset; /* Text rendering of aMatch[] */
int nOffset; /* strlen(zOffset) */
+ char *zSnippet; /* Snippet text */
+ int nSnippet; /* strlen(zSnippet) */
} Snippet;
static void snippetClear(Snippet *p){
free(p->aMatch);
free(p->zOffset);
+ free(p->zSnippet);
memset(p, 0, sizeof(*p));
}
/*
}
i = p->nMatch++;
pMatch = &p->aMatch[i];
- pMatch->exemplar = 0;
pMatch->iCol = iCol;
pMatch->iTerm = iTerm;
pMatch->iStart = iStart;
}
/*
-** Scan all matches in Snippet and mark the exemplars. Exemplars are
-** matches that we definitely want to include in the snippet.
+** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
+** of matching words some of which might be in zDoc. zDoc is column
+** number iCol.
**
-** Generally speaking, each keyword in the search phrase will have
-** a single exemplar. When a keyword matches at multiple points
-** within the document, the trick is figuring which of these matches
-** should be the examplar.
+** iBreak is suggested spot in zDoc where we could begin or end an
+** excerpt. Return a value similar to iBreak but possibly adjusted
+** to be a little left or right so that the break point is better.
+*/
+static int wordBoundary(
+ int iBreak, /* The suggested break point */
+ const char *zDoc, /* Document text */
+ int nDoc, /* Number of bytes in zDoc[] */
+ struct snippetMatch *aMatch, /* Matching words */
+ int nMatch, /* Number of entries in aMatch[] */
+ int iCol /* The column number for zDoc[] */
+){
+ int i;
+ if( iBreak<=10 ){
+ return 0;
+ }
+ if( iBreak>=nDoc-10 ){
+ return nDoc;
+ }
+ for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
+ while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
+ if( i<nMatch ){
+ if( aMatch[i].iStart<iBreak+10 ){
+ return aMatch[i].iStart;
+ }
+ if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
+ return aMatch[i-1].iStart;
+ }
+ }
+ for(i=1; i<=10; i++){
+ if( isspace(zDoc[iBreak-i]) ){
+ return iBreak - i + 1;
+ }
+ if( isspace(zDoc[iBreak+i]) ){
+ return iBreak + i + 1;
+ }
+ }
+ return iBreak;
+}
+
+/*
+** Allowed values for Snippet.aMatch[].snStatus
*/
-static void snippetFindExemplars(Snippet *p, Query *pQ){
+#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
+#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
+
+/*
+** Generate the text of a snippet.
+*/
+static void snippetText(
+ fulltext_cursor *pCursor, /* The cursor we need the snippet for */
+ const char *zStartMark, /* Markup to appear before each match */
+ const char *zEndMark, /* Markup to appear after each match */
+ const char *zEllipsis /* Ellipsis mark */
+){
int i, j;
- for(i=0; i<pQ->nTerms; i++){
- for(j=0; j<p->nMatch; j++){
- if( p->aMatch[j].iTerm==i ){
- p->aMatch[j].exemplar = 1;
+ struct snippetMatch *aMatch;
+ int nMatch;
+ int nDesired;
+ StringBuffer sb;
+ int tailCol = -1;
+ int tailOffset = -1;
+ int iCol;
+ int nDoc;
+ const char *zDoc;
+ int iStart, iEnd;
+ int wantEllipsis;
+ int tailEllipsis = 0;
+ int iMatch;
+
+
+ free(pCursor->snippet.zSnippet);
+ pCursor->snippet.zSnippet = 0;
+ aMatch = pCursor->snippet.aMatch;
+ nMatch = pCursor->snippet.nMatch;
+ initStringBuffer(&sb);
+
+ for(i=0; i<nMatch; i++){
+ aMatch[i].snStatus = SNIPPET_IGNORE;
+ }
+ nDesired = 0;
+ for(i=0; i<pCursor->q.nTerms; i++){
+ for(j=0; j<nMatch; j++){
+ if( aMatch[j].iTerm==i ){
+ aMatch[j].snStatus = SNIPPET_DESIRED;
+ nDesired++;
break;
}
}
}
-}
-static void snippetText(Snippet *p, Query *pQ){
-
+ iMatch = 0;
+ for(i=0; i<nMatch && nDesired>0; i++){
+ if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
+ nDesired--;
+ iCol = aMatch[i].iCol;
+ zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
+ nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
+ iStart = aMatch[i].iStart - 40;
+ iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
+ if( iStart<=10 ){
+ iStart = 0;
+ wantEllipsis = 0;
+ }else{
+ wantEllipsis = 1;
+ }
+ if( iCol==tailCol && iStart<=tailOffset+20 ){
+ iStart = tailOffset;
+ wantEllipsis = 0;
+ tailEllipsis = 0;
+ }
+ if( wantEllipsis || tailEllipsis ){
+ append(&sb, zEllipsis);
+ }
+ iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
+ iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
+ if( iEnd>=nDoc-10 ){
+ iEnd = nDoc;
+ tailEllipsis = 0;
+ }else{
+ tailEllipsis = 1;
+ }
+ while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
+ while( iStart<iEnd ){
+ while( iMatch<nMatch && aMatch[iMatch].iStart<iStart ){ iMatch++; }
+ if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd ){
+ nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
+ iStart = aMatch[iMatch].iStart;
+ append(&sb, zStartMark);
+ nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
+ append(&sb, zEndMark);
+ iStart += aMatch[iMatch].nByte;
+ for(j=iMatch+1; j<nMatch; j++){
+ if( aMatch[j].iTerm==aMatch[iMatch].iTerm
+ && aMatch[j].snStatus==SNIPPET_DESIRED ){
+ nDesired--;
+ aMatch[j].snStatus = SNIPPET_IGNORE;
+ }
+ }
+ }else{
+ nappend(&sb, &zDoc[iStart], iEnd - iStart);
+ iStart = iEnd;
+ }
+ }
+ tailCol = iCol;
+ tailOffset = iEnd;
+ }
+ if( tailEllipsis ){
+ append(&sb, zEllipsis);
+ }
+ pCursor->snippet.zSnippet = sb.s;
+ pCursor->snippet.nSnippet = sb.len;
}
sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
}else{
+ const char *zStart = "<b>";
+ const char *zEnd = "</b>";
+ const char *zEllipsis = "<b>...</b>";
memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
- /* TODO: Return the snippet */
+ if( argc>=2 ){
+ zStart = (const char*)sqlite3_value_text(argv[1]);
+ if( argc>=3 ){
+ zEnd = (const char*)sqlite3_value_text(argv[2]);
+ if( argc>=4 ){
+ zEllipsis = (const char*)sqlite3_value_text(argv[3]);
+ }
+ }
+ }
+ snippetAllOffsets(pCursor);
+ snippetText(pCursor, zStart, zEnd, zEllipsis);
+ sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
+ pCursor->snippet.nSnippet, SQLITE_STATIC);
}
}
-C Fixed\sa\sbuild\sproblem\sin\ssqlite3_extension_init().\s(CVS\s3430)
-D 2006-09-18T21:14:40
+C Implementation\sof\sthe\ssnippet()\sfunction\sfor\sFTS1.\s\sIncludes\sa\sfew\nsimple\stest\scases\sbut\smore\stesting\sis\sneeded.\s(CVS\s3431)
+D 2006-09-21T02:03:09
F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
-F ext/fts1/fts1.c 50770451c8d3c693f7819dad33d397246f44ea90
+F ext/fts1/fts1.c 02c5b614ff8055b374b88acaf5cae3a834da3150
F ext/fts1/fts1.h 6060b8f62c1d925ea8356cb1a6598073eb9159a6
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
F test/format4.test bf3bed3b13c63abfb3cfec232597a319a31d0bcc
F test/fts1a.test 54fd9451c00fb91074d5abdc207b05dcba6d2d65
F test/fts1b.test 5d8a01aefbecc8b7442b36c94c05eb7a845462d5
-F test/fts1c.test 4d84cfcacce229e4802fd676462f4616fabadad3
+F test/fts1c.test a57cb192d59ddacba64d17c326ff99393c181dc6
F test/func.test 0ed54b5aeaad319f68016c033acfebef56f5874a
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P cd4e1de896ef715c444071f758b74dbb607e0572
-R cdcaff3d2acee8f53c20d70e6e102e11
-U adamd
-Z 00727001b33fedbb26b4905a3830ed9f
+P bb2e1871cb10b470f96c793bb137c043ef30e1da
+R 2733a08fd53c5688fa09fb4ef51647ba
+U drh
+Z 50f7084cc6542485b2db3e3ffa3bb7c3