]>
git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
9dbc23cb7bae7a6c2c1dd40a1aa97fab8caf64d7
2 * Online help index routines for CUPS.
4 * Copyright © 2020-2024 by OpenPrinting.
5 * Copyright © 2007-2019 by Apple Inc.
6 * Copyright © 1997-2007 by Easy Software Products.
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more
13 * Include necessary headers...
16 #include "cgi-private.h"
21 * List of common English words that should not be indexed...
24 static char help_common_words
[][6] =
129 static help_word_t
*help_add_word(help_node_t
*n
, const char *text
);
130 static void help_delete_node(help_node_t
*n
);
131 static void help_delete_word(help_word_t
*w
);
132 static int help_load_directory(help_index_t
*hi
,
133 const char *directory
,
134 const char *relative
);
135 static int help_load_file(help_index_t
*hi
,
136 const char *filename
,
137 const char *relative
,
139 static help_node_t
*help_new_node(const char *filename
, const char *anchor
, const char *section
, const char *text
, time_t mtime
, off_t offset
, size_t length
) _CUPS_NONNULL(1,3,4);
140 static int help_sort_by_name(help_node_t
*p1
, help_node_t
*p2
, void *data
);
141 static int help_sort_by_score(help_node_t
*p1
, help_node_t
*p2
, void *data
);
142 static int help_sort_words(help_word_t
*w1
, help_word_t
*w2
, void *data
);
146 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
150 helpDeleteIndex(help_index_t
*hi
) /* I - Help index */
152 help_node_t
*node
; /* Current node */
158 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
160 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
163 help_delete_node(node
);
166 cupsArrayDelete(hi
->nodes
);
167 cupsArrayDelete(hi
->sorted
);
174 * 'helpFindNode()' - Find a node in an index.
177 help_node_t
* /* O - Node pointer or NULL */
178 helpFindNode(help_index_t
*hi
, /* I - Index */
179 const char *filename
, /* I - Filename */
180 const char *anchor
) /* I - Anchor */
182 help_node_t key
; /* Search key */
186 * Range check input...
189 if (!hi
|| !filename
)
193 * Initialize the search key...
196 key
.filename
= (char *)filename
;
197 key
.anchor
= (char *)anchor
;
200 * Return any match...
203 return ((help_node_t
*)cupsArrayFind(hi
->nodes
, &key
));
208 * 'helpLoadIndex()' - Load a help index from disk.
211 help_index_t
* /* O - Index pointer or NULL */
212 helpLoadIndex(const char *hifile
, /* I - Index filename */
213 const char *directory
) /* I - Directory that is indexed */
215 help_index_t
*hi
; /* Help index */
216 cups_file_t
*fp
; /* Current file */
217 char line
[2048], /* Line from file */
218 *ptr
, /* Pointer into line */
219 *filename
, /* Filename in line */
220 *anchor
, /* Anchor in line */
221 *sectptr
, /* Section pointer in line */
222 section
[1024], /* Section name */
223 *text
; /* Text in line */
224 time_t mtime
; /* Modification time */
225 off_t offset
; /* Offset into file */
226 size_t length
; /* Length in bytes */
227 int update
; /* Update? */
228 help_node_t
*node
; /* Current node */
229 help_word_t
*word
; /* Current word */
233 * Create a new, empty index.
236 if ((hi
= (help_index_t
*)calloc(1, sizeof(help_index_t
))) == NULL
)
239 hi
->nodes
= cupsArrayNew((cups_array_func_t
)help_sort_by_name
, NULL
);
240 hi
->sorted
= cupsArrayNew((cups_array_func_t
)help_sort_by_score
, NULL
);
242 if (!hi
->nodes
|| !hi
->sorted
)
244 cupsArrayDelete(hi
->nodes
);
245 cupsArrayDelete(hi
->sorted
);
251 * Try loading the existing index file...
254 if ((fp
= cupsFileOpen(hifile
, "r")) != NULL
)
257 * Lock the file and then read the first line...
262 if (cupsFileGets(fp
, line
, sizeof(line
)) && !strcmp(line
, "HELPV2"))
265 * Got a valid header line, now read the data lines...
270 while (cupsFileGets(fp
, line
, sizeof(line
)))
273 * Each line looks like one of the following:
275 * filename mtime offset length "section" "text"
276 * filename#anchor offset length "text"
283 * Read a word in the current node...
286 if (!node
|| (ptr
= strrchr(line
, ' ')) == NULL
)
289 if ((word
= help_add_word(node
, ptr
+ 1)) != NULL
)
290 word
->count
= atoi(line
+ 1);
300 if ((ptr
= strchr(line
, ' ')) == NULL
)
303 while (isspace(*ptr
& 255))
306 if ((anchor
= strrchr(filename
, '#')) != NULL
)
312 mtime
= strtol(ptr
, &ptr
, 10);
314 offset
= strtoll(ptr
, &ptr
, 10);
315 length
= (size_t)strtoll(ptr
, &ptr
, 10);
317 while (isspace(*ptr
& 255))
332 while (*ptr
&& *ptr
!= '\"')
340 cupsCopyString(section
, sectptr
, sizeof(section
));
342 while (isspace(*ptr
& 255))
354 while (*ptr
&& *ptr
!= '\"')
362 if ((node
= help_new_node(filename
, anchor
, section
, text
,
363 mtime
, offset
, length
)) == NULL
)
368 cupsArrayAdd(hi
->nodes
, node
);
377 * Scan for new/updated files...
380 update
= help_load_directory(hi
, directory
, NULL
);
383 * Remove any files that are no longer installed...
386 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
388 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
392 * Delete this node...
395 cupsArrayRemove(hi
->nodes
, node
);
396 help_delete_node(node
);
400 * Add nodes to the sorted array...
403 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
405 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
406 cupsArrayAdd(hi
->sorted
, node
);
409 * Save the index if we updated it...
413 helpSaveIndex(hi
, hifile
);
416 * Return the index...
424 * 'helpSaveIndex()' - Save a help index to disk.
427 int /* O - 0 on success, -1 on error */
428 helpSaveIndex(help_index_t
*hi
, /* I - Index */
429 const char *hifile
) /* I - Index filename */
431 cups_file_t
*fp
; /* Index file */
432 help_node_t
*node
; /* Current node */
433 help_word_t
*word
; /* Current word */
437 * Try creating a new index file...
440 if ((fp
= cupsFileOpen(hifile
, "w9")) == NULL
)
444 * Lock the file while we write it...
449 cupsFilePuts(fp
, "HELPV2\n");
451 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
453 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
456 * Write the current node with/without the anchor...
461 if (cupsFilePrintf(fp
, "%s#%s " CUPS_LLFMT
" " CUPS_LLFMT
" \"%s\"\n",
462 node
->filename
, node
->anchor
,
463 CUPS_LLCAST node
->offset
, CUPS_LLCAST node
->length
,
469 if (cupsFilePrintf(fp
, "%s %d " CUPS_LLFMT
" " CUPS_LLFMT
" \"%s\" \"%s\"\n",
470 node
->filename
, (int)node
->mtime
,
471 CUPS_LLCAST node
->offset
, CUPS_LLCAST node
->length
,
472 node
->section
? node
->section
: "", node
->text
) < 0)
477 * Then write the words associated with the node...
480 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
482 word
= (help_word_t
*)cupsArrayNext(node
->words
))
483 if (cupsFilePrintf(fp
, " %d %s\n", word
->count
, word
->text
) < 0)
489 if (cupsFileClose(fp
) < 0)
499 * 'helpSearchIndex()' - Search an index.
502 help_index_t
* /* O - Search index */
503 helpSearchIndex(help_index_t
*hi
, /* I - Index */
504 const char *query
, /* I - Query string */
505 const char *section
, /* I - Limit search to this section */
506 const char *filename
) /* I - Limit search to this file */
508 help_index_t
*search
; /* Search index */
509 help_node_t
*node
; /* Current node */
510 help_word_t
*word
; /* Current word */
511 void *sc
; /* Search context */
512 int matches
; /* Number of matches */
523 * Reset the scores of all nodes to 0...
526 for (node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
528 node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
532 * Find the first node to search in...
537 node
= helpFindNode(hi
, filename
, NULL
);
542 node
= (help_node_t
*)cupsArrayFirst(hi
->nodes
);
545 * Convert the query into a regular expression...
548 sc
= cgiCompileSearch(query
);
553 * Allocate a search index...
556 search
= calloc(1, sizeof(help_index_t
));
563 search
->nodes
= cupsArrayNew((cups_array_func_t
)help_sort_by_name
, NULL
);
564 search
->sorted
= cupsArrayNew((cups_array_func_t
)help_sort_by_score
, NULL
);
566 if (!search
->nodes
|| !search
->sorted
)
568 cupsArrayDelete(search
->nodes
);
569 cupsArrayDelete(search
->sorted
);
578 * Check each node in the index, adding matching nodes to the
582 for (; node
; node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
583 if (node
->section
&& section
&& strcmp(node
->section
, section
))
585 else if (filename
&& strcmp(node
->filename
, filename
))
589 matches
= cgiDoSearch(sc
, node
->text
);
591 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
593 word
= (help_word_t
*)cupsArrayNext(node
->words
))
594 if (cgiDoSearch(sc
, word
->text
) > 0)
595 matches
+= word
->count
;
600 * Found a match, add the node to the search index...
603 node
->score
= matches
;
605 cupsArrayAdd(search
->nodes
, node
);
606 cupsArrayAdd(search
->sorted
, node
);
611 * Free the search context...
617 * Return the results...
625 * 'help_add_word()' - Add a word to a node.
628 static help_word_t
* /* O - New word */
629 help_add_word(help_node_t
*n
, /* I - Node */
630 const char *text
) /* I - Word text */
632 help_word_t
*w
, /* New word */
633 key
; /* Search key */
637 * Create the words array as needed...
641 n
->words
= cupsArrayNew((cups_array_func_t
)help_sort_words
, NULL
);
644 * See if the word is already added...
647 key
.text
= (char *)text
;
649 if ((w
= (help_word_t
*)cupsArrayFind(n
->words
, &key
)) == NULL
)
652 * Create a new word...
655 if ((w
= calloc(1, sizeof(help_word_t
))) == NULL
)
658 if ((w
->text
= strdup(text
)) == NULL
)
664 cupsArrayAdd(n
->words
, w
);
668 * Bump the counter for this word and return it...
678 * 'help_delete_node()' - Free all memory used by a node.
682 help_delete_node(help_node_t
*n
) /* I - Node */
684 help_word_t
*w
; /* Current word */
702 for (w
= (help_word_t
*)cupsArrayFirst(n
->words
);
704 w
= (help_word_t
*)cupsArrayNext(n
->words
))
707 cupsArrayDelete(n
->words
);
714 * 'help_delete_word()' - Free all memory used by a word.
718 help_delete_word(help_word_t
*w
) /* I - Word */
731 * 'help_load_directory()' - Load a directory of files into an index.
734 static int /* O - 0 = success, -1 = error, 1 = updated */
736 help_index_t
*hi
, /* I - Index */
737 const char *directory
, /* I - Directory */
738 const char *relative
) /* I - Relative path */
740 cups_dir_t
*dir
; /* Directory file */
741 cups_dentry_t
*dent
; /* Directory entry */
742 char *ext
, /* Pointer to extension */
743 filename
[1024], /* Full filename */
744 relname
[1024]; /* Relative filename */
745 int update
; /* Updated? */
746 help_node_t
*node
; /* Current node */
750 * Open the directory and scan it...
753 if ((dir
= cupsDirOpen(directory
)) == NULL
)
758 while ((dent
= cupsDirRead(dir
)) != NULL
)
764 if (dent
->filename
[0] == '.')
768 * Get absolute and relative filenames...
771 snprintf(filename
, sizeof(filename
), "%s/%s", directory
, dent
->filename
);
773 snprintf(relname
, sizeof(relname
), "%s/%s", relative
, dent
->filename
);
775 cupsCopyString(relname
, dent
->filename
, sizeof(relname
));
778 * Check if we have a HTML file...
781 if ((ext
= strstr(dent
->filename
, ".html")) != NULL
&&
782 (!ext
[5] || !strcmp(ext
+ 5, ".gz")))
785 * HTML file, see if we have already indexed the file...
788 if ((node
= helpFindNode(hi
, relname
, NULL
)) != NULL
)
791 * File already indexed - check dates to confirm that the
792 * index is up-to-date...
795 if (node
->mtime
== dent
->fileinfo
.st_mtime
)
798 * Same modification time, so mark all of the nodes
799 * for this file as up-to-date...
802 for (; node
; node
= (help_node_t
*)cupsArrayNext(hi
->nodes
))
803 if (!strcmp(node
->filename
, relname
))
814 help_load_file(hi
, filename
, relname
, dent
->fileinfo
.st_mtime
);
816 else if (S_ISDIR(dent
->fileinfo
.st_mode
))
819 * Process sub-directory...
822 if (help_load_directory(hi
, filename
, relname
) == 1)
834 * 'help_load_file()' - Load a HTML files into an index.
837 static int /* O - 0 = success, -1 = error */
839 help_index_t
*hi
, /* I - Index */
840 const char *filename
, /* I - Filename */
841 const char *relative
, /* I - Relative path */
842 time_t mtime
) /* I - Modification time */
844 cups_file_t
*fp
; /* HTML file */
845 help_node_t
*node
; /* Current node */
846 char line
[1024], /* Line from file */
847 temp
[1024], /* Temporary word */
848 section
[1024], /* Section */
849 *ptr
, /* Pointer into line */
850 *anchor
, /* Anchor name */
851 *text
; /* Text for anchor */
852 off_t offset
; /* File offset */
853 char quote
; /* Quote character */
854 help_word_t
*word
; /* Current word */
855 size_t wordlen
; /* Length of word */
858 if ((fp
= cupsFileOpen(filename
, "r")) == NULL
)
864 if (strstr(filename
, "/man-") != NULL
)
865 cupsCopyString(section
, "Man Pages", sizeof(section
));
867 cupsCopyString(section
, "Other", sizeof(section
));
869 while (cupsFileGets(fp
, line
, sizeof(line
)))
872 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
875 if ((ptr
= strstr(line
, "<!-- SECTION:")) != NULL
)
878 * Got section line, copy it!
881 for (ptr
+= 13; isspace(*ptr
& 255); ptr
++);
883 cupsCopyString(section
, ptr
, sizeof(section
));
884 if ((ptr
= strstr(section
, "-->")) != NULL
)
887 * Strip comment stuff from end of line...
890 for (*ptr
-- = '\0'; ptr
> line
&& isspace(*ptr
& 255); *ptr
-- = '\0');
892 if (isspace(*ptr
& 255))
898 for (ptr
= line
; (ptr
= strchr(ptr
, '<')) != NULL
;)
902 if (!_cups_strncasecmp(ptr
, "TITLE>", 6))
913 char *idptr
; /* Pointer to ID */
915 if (!_cups_strncasecmp(ptr
, "A NAME=", 7))
917 else if ((idptr
= strstr(ptr
, " ID=")) != NULL
)
919 else if ((idptr
= strstr(ptr
, " id=")) != NULL
)
928 if (*ptr
== '\"' || *ptr
== '\'')
931 * Get quoted anchor...
936 if ((ptr
= strchr(anchor
, quote
)) != NULL
)
944 * Get unquoted anchor...
949 for (ptr
= anchor
; *ptr
&& *ptr
!= '>' && !isspace(*ptr
& 255); ptr
++);
958 * Got the anchor, now lets find the end...
961 while (*ptr
&& *ptr
!= '>')
971 * Now collect text for the link...
975 while ((ptr
= strchr(text
, '<')) == NULL
)
977 ptr
= text
+ strlen(text
);
978 if (ptr
>= (line
+ sizeof(line
) - 2))
983 if (!cupsFileGets(fp
, ptr
, sizeof(line
) - (size_t)(ptr
- line
) - 1))
990 node
->length
= (size_t)(offset
- node
->offset
);
998 if ((node
= helpFindNode(hi
, relative
, anchor
)) != NULL
)
1001 * Node already in the index, so replace the text and other
1005 cupsArrayRemove(hi
->nodes
, node
);
1008 free(node
->section
);
1015 for (word
= (help_word_t
*)cupsArrayFirst(node
->words
);
1017 word
= (help_word_t
*)cupsArrayNext(node
->words
))
1018 help_delete_word(word
);
1020 cupsArrayDelete(node
->words
);
1024 node
->section
= section
[0] ? strdup(section
) : NULL
;
1025 node
->text
= strdup(text
);
1026 node
->mtime
= mtime
;
1027 node
->offset
= offset
;
1036 node
= help_new_node(relative
, anchor
, section
, text
, mtime
, offset
, 0);
1040 * Go through the text value and replace tabs and newlines with
1041 * whitespace and eliminate extra whitespace...
1044 for (ptr
= node
->text
, text
= node
->text
; *ptr
;)
1045 if (isspace(*ptr
& 255))
1047 while (isspace(*ptr
& 255))
1052 else if (text
!= ptr
)
1063 * (Re)add the node to the array...
1066 cupsArrayAdd(hi
->nodes
, node
);
1076 * Scan this line for words...
1079 for (ptr
= line
; *ptr
; ptr
++)
1082 * Skip HTML stuff...
1087 if (!strncmp(ptr
, "<!--", 4))
1090 * Skip HTML comment...
1093 if ((text
= strstr(ptr
+ 4, "-->")) == NULL
)
1094 ptr
+= strlen(ptr
) - 1;
1101 * Skip HTML element...
1104 for (ptr
++; *ptr
&& *ptr
!= '>'; ptr
++)
1106 if (*ptr
== '\"' || *ptr
== '\'')
1108 for (quote
= *ptr
++; *ptr
&& *ptr
!= quote
; ptr
++);
1121 else if (*ptr
== '&')
1124 * Skip HTML entity...
1127 for (ptr
++; *ptr
&& *ptr
!= ';'; ptr
++);
1134 else if (!isalnum(*ptr
& 255))
1138 * Found the start of a word, search until we find the end...
1141 for (text
= ptr
, ptr
++; *ptr
&& isalnum(*ptr
& 255); ptr
++);
1143 wordlen
= (size_t)(ptr
- text
);
1145 memcpy(temp
, text
, wordlen
);
1146 temp
[wordlen
] = '\0';
1150 if (wordlen
> 1 && !bsearch(temp
, help_common_words
,
1151 (sizeof(help_common_words
) /
1152 sizeof(help_common_words
[0])),
1153 sizeof(help_common_words
[0]),
1154 (int (*)(const void *, const void *))
1156 help_add_word(node
, temp
);
1161 * Get the offset of the next line...
1164 offset
= cupsFileTell(fp
);
1170 node
->length
= (size_t)(offset
- node
->offset
);
1177 * 'help_new_node()' - Create a new node and add it to an index.
1180 static help_node_t
* /* O - Node pointer or NULL on error */
1181 help_new_node(const char *filename
, /* I - Filename */
1182 const char *anchor
, /* I - Anchor */
1183 const char *section
, /* I - Section */
1184 const char *text
, /* I - Text */
1185 time_t mtime
, /* I - Modification time */
1186 off_t offset
, /* I - Offset in file */
1187 size_t length
) /* I - Length in bytes */
1189 help_node_t
*n
; /* Node */
1192 n
= (help_node_t
*)calloc(1, sizeof(help_node_t
));
1196 n
->filename
= strdup(filename
);
1197 n
->anchor
= anchor
? strdup(anchor
) : NULL
;
1198 n
->section
= (section
&& *section
) ? strdup(section
) : NULL
;
1199 n
->text
= strdup(text
);
1209 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1212 static int /* O - Difference */
1214 help_node_t
*n1
, /* I - First node */
1215 help_node_t
*n2
, /* I - Second node */
1216 void *data
) /* Unused */
1218 int diff
; /* Difference */
1223 if ((diff
= strcmp(n1
->filename
, n2
->filename
)) != 0)
1226 if (!n1
->anchor
&& !n2
->anchor
)
1228 else if (!n1
->anchor
)
1230 else if (!n2
->anchor
)
1233 return (strcmp(n1
->anchor
, n2
->anchor
));
1238 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1241 static int /* O - Difference */
1242 help_sort_by_score(help_node_t
*n1
, /* I - First node */
1243 help_node_t
*n2
, /* I - Second node */
1244 void *data
) /* I - Unused */
1246 int diff
; /* Difference */
1251 if (n1
->score
!= n2
->score
)
1252 return (n2
->score
- n1
->score
);
1254 if (n1
->section
&& !n2
->section
)
1256 else if (!n1
->section
&& n2
->section
)
1258 else if (n1
->section
&& n2
->section
&&
1259 (diff
= strcmp(n1
->section
, n2
->section
)) != 0)
1262 return (_cups_strcasecmp(n1
->text
, n2
->text
));
1267 * 'help_sort_words()' - Sort words alphabetically.
1270 static int /* O - Difference */
1271 help_sort_words(help_word_t
*w1
, /* I - Second word */
1272 help_word_t
*w2
, /* I - Second word */
1273 void *data
) /* Unused */
1276 return (_cups_strcasecmp(w1
->text
, w2
->text
));