]>
git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
4 * Online help index routines for CUPS.
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
17 * Include necessary headers...
20 #include "cgi-private.h"
25 * List of common English words that should not be indexed...
28 static char help_common_words
[][6] =
133 static help_word_t
*help_add_word(help_node_t
*n
, const char *text
);
134 static void help_delete_node(help_node_t
*n
);
135 static void help_delete_word(help_word_t
*w
);
136 static int help_load_directory(help_index_t
*hi
,
137 const char *directory
,
138 const char *relative
);
139 static int help_load_file(help_index_t
*hi
,
140 const char *filename
,
141 const char *relative
,
143 static help_node_t
*help_new_node(const char *filename
, const char *anchor
,
144 const char *section
, const char *text
,
145 time_t mtime
, off_t offset
,
147 __attribute__((nonnull(1,3,4)));
148 static int help_sort_by_name(help_node_t
*p1
, help_node_t
*p2
);
149 static int help_sort_by_score(help_node_t
*p1
, help_node_t
*p2
);
150 static int help_sort_words(help_word_t
*w1
, help_word_t
*w2
);
154 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
158 helpDeleteIndex(help_index_t
*hi
) /* I - Help index */
160 help_node_t
*node
; /* Current node */
163 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi
));
168 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
170 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
173 help_delete_node(node
);
176 cupsArrayDelete(hi
->nodes
);
177 cupsArrayDelete(hi
->sorted
);
184 * 'helpFindNode()' - Find a node in an index.
187 help_node_t
* /* O - Node pointer or NULL */
188 helpFindNode(help_index_t
*hi
, /* I - Index */
189 const char *filename
, /* I - Filename */
190 const char *anchor
) /* I - Anchor */
192 help_node_t key
; /* Search key */
195 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
196 hi
, filename
, anchor
));
199 * Range check input...
202 if (!hi
|| !filename
)
206 * Initialize the search key...
209 key
.filename
= (char *)filename
;
210 key
.anchor
= (char *)anchor
;
213 * Return any match...
216 return ((help_node_t
*)cupsArrayFind(hi
->nodes
, &key
));
221 * 'helpLoadIndex()' - Load a help index from disk.
224 help_index_t
* /* O - Index pointer or NULL */
225 helpLoadIndex(const char *hifile
, /* I - Index filename */
226 const char *directory
) /* I - Directory that is indexed */
228 help_index_t
*hi
; /* Help index */
229 cups_file_t
*fp
; /* Current file */
230 char line
[2048], /* Line from file */
231 *ptr
, /* Pointer into line */
232 *filename
, /* Filename in line */
233 *anchor
, /* Anchor in line */
234 *sectptr
, /* Section pointer in line */
235 section
[1024], /* Section name */
236 *text
; /* Text in line */
237 time_t mtime
; /* Modification time */
238 off_t offset
; /* Offset into file */
239 size_t length
; /* Length in bytes */
240 int update
; /* Update? */
241 help_node_t
*node
; /* Current node */
242 help_word_t
*word
; /* Current word */
245 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
249 * Create a new, empty index.
252 if ((hi
= (help_index_t
*)calloc(1, sizeof(help_index_t
))) == NULL
)
255 hi
->nodes
= cupsArrayNew((cups_array_func_t
)help_sort_by_name
, NULL
);
256 hi
->sorted
= cupsArrayNew((cups_array_func_t
)help_sort_by_score
, NULL
);
258 if (!hi
->nodes
|| !hi
->sorted
)
260 cupsArrayDelete(hi
->nodes
);
261 cupsArrayDelete(hi
->sorted
);
267 * Try loading the existing index file...
270 if ((fp
= cupsFileOpen(hifile
, "r")) != NULL
)
273 * Lock the file and then read the first line...
278 if (cupsFileGets(fp
, line
, sizeof(line
)) && !strcmp(line
, "HELPV2"))
281 * Got a valid header line, now read the data lines...
286 while (cupsFileGets(fp
, line
, sizeof(line
)))
289 * Each line looks like one of the following:
291 * filename mtime offset length "section" "text"
292 * filename#anchor offset length "text"
299 * Read a word in the current node...
302 if (!node
|| (ptr
= strrchr(line
, ' ')) == NULL
)
305 if ((word
= help_add_word(node
, ptr
+ 1)) != NULL
)
306 word
->count
= atoi(line
+ 1);
316 if ((ptr
= strchr(line
, ' ')) == NULL
)
319 while (isspace(*ptr
& 255))
322 if ((anchor
= strrchr(filename
, '#')) != NULL
)
328 mtime
= strtol(ptr
, &ptr
, 10);
330 offset
= strtoll(ptr
, &ptr
, 10);
331 length
= (size_t)strtoll(ptr
, &ptr
, 10);
333 while (isspace(*ptr
& 255))
348 while (*ptr
&& *ptr
!= '\"')
356 strlcpy(section
, sectptr
, sizeof(section
));
358 while (isspace(*ptr
& 255))
368 while (*ptr
&& *ptr
!= '\"')
376 if ((node
= help_new_node(filename
, anchor
, section
, text
,
377 mtime
, offset
, length
)) == NULL
)
382 cupsArrayAdd(hi
->nodes
, node
);
391 * Scan for new/updated files...
394 update
= help_load_directory(hi
, directory
, NULL
);
397 * Remove any files that are no longer installed...
400 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
402 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
406 * Delete this node...
409 cupsArrayRemove(hi
->nodes
, node
);
410 help_delete_node(node
);
414 * Add nodes to the sorted array...
417 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
419 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
420 cupsArrayAdd(hi
->sorted
, node
);
423 * Save the index if we updated it...
427 helpSaveIndex(hi
, hifile
);
430 * Return the index...
438 * 'helpSaveIndex()' - Save a help index to disk.
441 int /* O - 0 on success, -1 on error */
442 helpSaveIndex(help_index_t
*hi
, /* I - Index */
443 const char *hifile
) /* I - Index filename */
445 cups_file_t
*fp
; /* Index file */
446 help_node_t
*node
; /* Current node */
447 help_word_t
*word
; /* Current word */
450 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi
, hifile
));
453 * Try creating a new index file...
456 if ((fp
= cupsFileOpen(hifile
, "w9")) == NULL
)
460 * Lock the file while we write it...
465 cupsFilePuts(fp
, "HELPV2\n");
467 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
469 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
472 * Write the current node with/without the anchor...
477 if (cupsFilePrintf(fp
, "%s#%s " CUPS_LLFMT
" " CUPS_LLFMT
" \"%s\"\n",
478 node
->filename
, node
->anchor
,
479 CUPS_LLCAST node
->offset
, CUPS_LLCAST node
->length
,
485 if (cupsFilePrintf(fp
, "%s %d " CUPS_LLFMT
" " CUPS_LLFMT
" \"%s\" \"%s\"\n",
486 node
->filename
, (int)node
->mtime
,
487 CUPS_LLCAST node
->offset
, CUPS_LLCAST node
->length
,
488 node
->section
? node
->section
: "", node
->text
) < 0)
493 * Then write the words associated with the node...
496 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
498 word
= (help_word_t
*)cupsArrayNext(node
->words
))
499 if (cupsFilePrintf(fp
, " %d %s\n", word
->count
, word
->text
) < 0)
505 if (cupsFileClose(fp
) < 0)
515 * 'helpSearchIndex()' - Search an index.
518 help_index_t
* /* O - Search index */
519 helpSearchIndex(help_index_t
*hi
, /* I - Index */
520 const char *query
, /* I - Query string */
521 const char *section
, /* I - Limit search to this section */
522 const char *filename
) /* I - Limit search to this file */
524 help_index_t
*search
; /* Search index */
525 help_node_t
*node
; /* Current node */
526 help_word_t
*word
; /* Current word */
527 void *sc
; /* Search context */
528 int matches
; /* Number of matches */
531 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
532 hi
, query
, filename
));
542 * Reset the scores of all nodes to 0...
545 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
547 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
551 * Find the first node to search in...
556 node
= helpFindNode(hi
, filename
, NULL
);
561 node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
564 * Convert the query into a regular expression...
567 sc
= cgiCompileSearch(query
);
572 * Allocate a search index...
575 search
= calloc(1, sizeof(help_index_t
));
582 search
->nodes
= cupsArrayNew((cups_array_func_t
)help_sort_by_name
, NULL
);
583 search
->sorted
= cupsArrayNew((cups_array_func_t
)help_sort_by_score
, NULL
);
585 if (!search
->nodes
|| !search
->sorted
)
587 cupsArrayDelete(search
->nodes
);
588 cupsArrayDelete(search
->sorted
);
597 * Check each node in the index, adding matching nodes to the
601 for (; node
; node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
602 if (section
&& strcmp(node
->section
, section
))
604 else if (filename
&& strcmp(node
->filename
, filename
))
608 matches
= cgiDoSearch(sc
, node
->text
);
610 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
612 word
= (help_word_t
*)cupsArrayNext(node
->words
))
613 if (cgiDoSearch(sc
, word
->text
) > 0)
614 matches
+= word
->count
;
619 * Found a match, add the node to the search index...
622 node
->score
= matches
;
624 cupsArrayAdd(search
->nodes
, node
);
625 cupsArrayAdd(search
->sorted
, node
);
630 * Free the search context...
636 * Return the results...
644 * 'help_add_word()' - Add a word to a node.
647 static help_word_t
* /* O - New word */
648 help_add_word(help_node_t
*n
, /* I - Node */
649 const char *text
) /* I - Word text */
651 help_word_t
*w
, /* New word */
652 key
; /* Search key */
655 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n
, text
));
658 * Create the words array as needed...
662 n
->words
= cupsArrayNew((cups_array_func_t
)help_sort_words
, NULL
);
665 * See if the word is already added...
668 key
.text
= (char *)text
;
670 if ((w
= (help_word_t
*)cupsArrayFind(n
->words
, &key
)) == NULL
)
673 * Create a new word...
676 if ((w
= calloc(1, sizeof(help_word_t
))) == NULL
)
679 if ((w
->text
= strdup(text
)) == NULL
)
685 cupsArrayAdd(n
->words
, w
);
689 * Bump the counter for this word and return it...
699 * 'help_delete_node()' - Free all memory used by a node.
703 help_delete_node(help_node_t
*n
) /* I - Node */
705 help_word_t
*w
; /* Current word */
708 DEBUG_printf(("2help_delete_node(n=%p)", n
));
725 for (w
= (help_word_t
*)cupsArrayFirst(n
->words
);
727 w
= (help_word_t
*)cupsArrayNext(n
->words
))
730 cupsArrayDelete(n
->words
);
737 * 'help_delete_word()' - Free all memory used by a word.
741 help_delete_word(help_word_t
*w
) /* I - Word */
743 DEBUG_printf(("2help_delete_word(w=%p)", w
));
756 * 'help_load_directory()' - Load a directory of files into an index.
759 static int /* O - 0 = success, -1 = error, 1 = updated */
761 help_index_t
*hi
, /* I - Index */
762 const char *directory
, /* I - Directory */
763 const char *relative
) /* I - Relative path */
765 cups_dir_t
*dir
; /* Directory file */
766 cups_dentry_t
*dent
; /* Directory entry */
767 char *ext
, /* Pointer to extension */
768 filename
[1024], /* Full filename */
769 relname
[1024]; /* Relative filename */
770 int update
; /* Updated? */
771 help_node_t
*node
; /* Current node */
774 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
775 hi
, directory
, relative
));
778 * Open the directory and scan it...
781 if ((dir
= cupsDirOpen(directory
)) == NULL
)
786 while ((dent
= cupsDirRead(dir
)) != NULL
)
792 if (dent
->filename
[0] == '.')
796 * Get absolute and relative filenames...
799 snprintf(filename
, sizeof(filename
), "%s/%s", directory
, dent
->filename
);
801 snprintf(relname
, sizeof(relname
), "%s/%s", relative
, dent
->filename
);
803 strlcpy(relname
, dent
->filename
, sizeof(relname
));
806 * Check if we have a HTML file...
809 if ((ext
= strstr(dent
->filename
, ".html")) != NULL
&&
810 (!ext
[5] || !strcmp(ext
+ 5, ".gz")))
813 * HTML file, see if we have already indexed the file...
816 if ((node
= helpFindNode(hi
, relname
, NULL
)) != NULL
)
819 * File already indexed - check dates to confirm that the
820 * index is up-to-date...
823 if (node
->mtime
== dent
->fileinfo
.st_mtime
)
826 * Same modification time, so mark all of the nodes
827 * for this file as up-to-date...
830 for (; node
; node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
831 if (!strcmp(node
->filename
, relname
))
842 help_load_file(hi
, filename
, relname
, dent
->fileinfo
.st_mtime
);
844 else if (S_ISDIR(dent
->fileinfo
.st_mode
))
847 * Process sub-directory...
850 if (help_load_directory(hi
, filename
, relname
) == 1)
862 * 'help_load_file()' - Load a HTML files into an index.
865 static int /* O - 0 = success, -1 = error */
867 help_index_t
*hi
, /* I - Index */
868 const char *filename
, /* I - Filename */
869 const char *relative
, /* I - Relative path */
870 time_t mtime
) /* I - Modification time */
872 cups_file_t
*fp
; /* HTML file */
873 help_node_t
*node
; /* Current node */
874 char line
[1024], /* Line from file */
875 temp
[1024], /* Temporary word */
876 section
[1024], /* Section */
877 *ptr
, /* Pointer into line */
878 *anchor
, /* Anchor name */
879 *text
; /* Text for anchor */
880 off_t offset
; /* File offset */
881 char quote
; /* Quote character */
882 help_word_t
*word
; /* Current word */
883 int wordlen
; /* Length of word */
886 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
887 "mtime=%ld)", hi
, filename
, relative
, (long)mtime
));
889 if ((fp
= cupsFileOpen(filename
, "r")) == NULL
)
895 strlcpy(section
, "Other", sizeof(section
));
897 while (cupsFileGets(fp
, line
, sizeof(line
)))
900 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
903 if (!_cups_strncasecmp(line
, "<!-- SECTION:", 13))
906 * Got section line, copy it!
909 for (ptr
= line
+ 13; isspace(*ptr
& 255); ptr
++);
911 strlcpy(section
, ptr
, sizeof(section
));
912 if ((ptr
= strstr(section
, "-->")) != NULL
)
915 * Strip comment stuff from end of line...
918 for (*ptr
-- = '\0'; ptr
> line
&& isspace(*ptr
& 255); *ptr
-- = '\0');
920 if (isspace(*ptr
& 255))
926 for (ptr
= line
; (ptr
= strchr(ptr
, '<')) != NULL
;)
930 if (!_cups_strncasecmp(ptr
, "TITLE>", 6))
939 else if (!_cups_strncasecmp(ptr
, "A NAME=", 7))
947 if (*ptr
== '\"' || *ptr
== '\'')
950 * Get quoted anchor...
955 if ((ptr
= strchr(anchor
, quote
)) != NULL
)
963 * Get unquoted anchor...
968 for (ptr
= anchor
; *ptr
&& *ptr
!= '>' && !isspace(*ptr
& 255); ptr
++);
977 * Got the anchor, now lets find the end...
980 while (*ptr
&& *ptr
!= '>')
992 * Now collect text for the link...
996 while ((ptr
= strchr(text
, '<')) == NULL
)
998 ptr
= text
+ strlen(text
);
999 if (ptr
>= (line
+ sizeof(line
) - 2))
1004 if (!cupsFileGets(fp
, ptr
, sizeof(line
) - (size_t)(ptr
- line
) - 1))
1011 node
->length
= (size_t)(offset
- node
->offset
);
1019 if ((node
= helpFindNode(hi
, relative
, anchor
)) != NULL
)
1022 * Node already in the index, so replace the text and other
1026 cupsArrayRemove(hi
->nodes
, node
);
1029 free(node
->section
);
1036 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
1038 word
= (help_word_t
*)cupsArrayNext(node
->words
))
1039 help_delete_word(word
);
1041 cupsArrayDelete(node
->words
);
1045 node
->section
= section
[0] ? strdup(section
) : NULL
;
1046 node
->text
= strdup(text
);
1047 node
->mtime
= mtime
;
1048 node
->offset
= offset
;
1057 node
= help_new_node(relative
, anchor
, section
, text
, mtime
, offset
, 0);
1061 * Go through the text value and replace tabs and newlines with
1062 * whitespace and eliminate extra whitespace...
1065 for (ptr
= node
->text
, text
= node
->text
; *ptr
;)
1066 if (isspace(*ptr
& 255))
1068 while (isspace(*ptr
& 255))
1073 else if (text
!= ptr
)
1084 * (Re)add the node to the array...
1087 cupsArrayAdd(hi
->nodes
, node
);
1097 * Scan this line for words...
1100 for (ptr
= line
; *ptr
; ptr
++)
1103 * Skip HTML stuff...
1108 if (!strncmp(ptr
, "<!--", 4))
1111 * Skip HTML comment...
1114 if ((text
= strstr(ptr
+ 4, "-->")) == NULL
)
1115 ptr
+= strlen(ptr
) - 1;
1122 * Skip HTML element...
1125 for (ptr
++; *ptr
&& *ptr
!= '>'; ptr
++)
1127 if (*ptr
== '\"' || *ptr
== '\'')
1129 for (quote
= *ptr
++; *ptr
&& *ptr
!= quote
; ptr
++);
1142 else if (*ptr
== '&')
1145 * Skip HTML entity...
1148 for (ptr
++; *ptr
&& *ptr
!= ';'; ptr
++);
1155 else if (!isalnum(*ptr
& 255))
1159 * Found the start of a word, search until we find the end...
1162 for (text
= ptr
, ptr
++; *ptr
&& isalnum(*ptr
& 255); ptr
++);
1164 wordlen
= (int)(ptr
- text
);
1166 memcpy(temp
, text
, (size_t)wordlen
);
1167 temp
[wordlen
] = '\0';
1171 if (wordlen
> 1 && !bsearch(temp
, help_common_words
,
1172 (sizeof(help_common_words
) /
1173 sizeof(help_common_words
[0])),
1174 sizeof(help_common_words
[0]),
1175 (int (*)(const void *, const void *))
1177 help_add_word(node
, temp
);
1182 * Get the offset of the next line...
1185 offset
= cupsFileTell(fp
);
1191 node
->length
= (size_t)(offset
- node
->offset
);
1198 * 'help_new_node()' - Create a new node and add it to an index.
1201 static help_node_t
* /* O - Node pointer or NULL on error */
1202 help_new_node(const char *filename
, /* I - Filename */
1203 const char *anchor
, /* I - Anchor */
1204 const char *section
, /* I - Section */
1205 const char *text
, /* I - Text */
1206 time_t mtime
, /* I - Modification time */
1207 off_t offset
, /* I - Offset in file */
1208 size_t length
) /* I - Length in bytes */
1210 help_node_t
*n
; /* Node */
1213 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1214 "mtime=%ld, offset=%ld, length=%ld)", filename
, anchor
, text
,
1215 (long)mtime
, (long)offset
, (long)length
));
1217 n
= (help_node_t
*)calloc(1, sizeof(help_node_t
));
1221 n
->filename
= strdup(filename
);
1222 n
->anchor
= anchor
? strdup(anchor
) : NULL
;
1223 n
->section
= (section
&& *section
) ? strdup(section
) : NULL
;
1224 n
->text
= strdup(text
);
1234 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1237 static int /* O - Difference */
1238 help_sort_by_name(help_node_t
*n1
, /* I - First node */
1239 help_node_t
*n2
) /* I - Second node */
1241 int diff
; /* Difference */
1244 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1245 n1
, n1
->filename
, n1
->anchor
,
1246 n2
, n2
->filename
, n2
->anchor
));
1248 if ((diff
= strcmp(n1
->filename
, n2
->filename
)) != 0)
1251 if (!n1
->anchor
&& !n2
->anchor
)
1253 else if (!n1
->anchor
)
1255 else if (!n2
->anchor
)
1258 return (strcmp(n1
->anchor
, n2
->anchor
));
1263 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1266 static int /* O - Difference */
1267 help_sort_by_score(help_node_t
*n1
, /* I - First node */
1268 help_node_t
*n2
) /* I - Second node */
1270 int diff
; /* Difference */
1273 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1274 "n2=%p(%d \"%s\" \"%s\")",
1275 n1
, n1
->score
, n1
->section
, n1
->text
,
1276 n2
, n2
->score
, n2
->section
, n2
->text
));
1278 if (n1
->score
!= n2
->score
)
1279 return (n2
->score
- n1
->score
);
1281 if (n1
->section
&& !n2
->section
)
1283 else if (!n1
->section
&& n2
->section
)
1285 else if (n1
->section
&& n2
->section
&&
1286 (diff
= strcmp(n1
->section
, n2
->section
)) != 0)
1289 return (_cups_strcasecmp(n1
->text
, n2
->text
));
1294 * 'help_sort_words()' - Sort words alphabetically.
1297 static int /* O - Difference */
1298 help_sort_words(help_word_t
*w1
, /* I - Second word */
1299 help_word_t
*w2
) /* I - Second word */
1301 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1302 w1
, w1
->text
, w2
, w2
->text
));
1304 return (_cups_strcasecmp(w1
->text
, w2
->text
));