]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Add man page for and installation of ippeveprinter commands.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright 2007-2017 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
8 */
9
10 /*
11 * Include necessary headers...
12 */
13
14 #include "cgi-private.h"
15 #include <cups/dir.h>
16
17
18 /*
19 * List of common English words that should not be indexed...
20 */
21
22 static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
123 /*
124 * Local functions...
125 */
126
127 static help_word_t *help_add_word(help_node_t *n, const char *text);
128 static void help_delete_node(help_node_t *n);
129 static void help_delete_word(help_word_t *w);
130 static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133 static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137 static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
138 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
139 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
140 static int help_sort_words(help_word_t *w1, help_word_t *w2);
141
142
143 /*
144 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
145 */
146
147 void
148 helpDeleteIndex(help_index_t *hi) /* I - Help index */
149 {
150 help_node_t *node; /* Current node */
151
152
153 if (!hi)
154 return;
155
156 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
157 node;
158 node = (help_node_t *)cupsArrayNext(hi->nodes))
159 {
160 if (!hi->search)
161 help_delete_node(node);
162 }
163
164 cupsArrayDelete(hi->nodes);
165 cupsArrayDelete(hi->sorted);
166
167 free(hi);
168 }
169
170
171 /*
172 * 'helpFindNode()' - Find a node in an index.
173 */
174
175 help_node_t * /* O - Node pointer or NULL */
176 helpFindNode(help_index_t *hi, /* I - Index */
177 const char *filename, /* I - Filename */
178 const char *anchor) /* I - Anchor */
179 {
180 help_node_t key; /* Search key */
181
182
183 /*
184 * Range check input...
185 */
186
187 if (!hi || !filename)
188 return (NULL);
189
190 /*
191 * Initialize the search key...
192 */
193
194 key.filename = (char *)filename;
195 key.anchor = (char *)anchor;
196
197 /*
198 * Return any match...
199 */
200
201 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
202 }
203
204
205 /*
206 * 'helpLoadIndex()' - Load a help index from disk.
207 */
208
209 help_index_t * /* O - Index pointer or NULL */
210 helpLoadIndex(const char *hifile, /* I - Index filename */
211 const char *directory) /* I - Directory that is indexed */
212 {
213 help_index_t *hi; /* Help index */
214 cups_file_t *fp; /* Current file */
215 char line[2048], /* Line from file */
216 *ptr, /* Pointer into line */
217 *filename, /* Filename in line */
218 *anchor, /* Anchor in line */
219 *sectptr, /* Section pointer in line */
220 section[1024], /* Section name */
221 *text; /* Text in line */
222 time_t mtime; /* Modification time */
223 off_t offset; /* Offset into file */
224 size_t length; /* Length in bytes */
225 int update; /* Update? */
226 help_node_t *node; /* Current node */
227 help_word_t *word; /* Current word */
228
229
230 /*
231 * Create a new, empty index.
232 */
233
234 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
235 return (NULL);
236
237 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
238 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
239
240 if (!hi->nodes || !hi->sorted)
241 {
242 cupsArrayDelete(hi->nodes);
243 cupsArrayDelete(hi->sorted);
244 free(hi);
245 return (NULL);
246 }
247
248 /*
249 * Try loading the existing index file...
250 */
251
252 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
253 {
254 /*
255 * Lock the file and then read the first line...
256 */
257
258 cupsFileLock(fp, 1);
259
260 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
261 {
262 /*
263 * Got a valid header line, now read the data lines...
264 */
265
266 node = NULL;
267
268 while (cupsFileGets(fp, line, sizeof(line)))
269 {
270 /*
271 * Each line looks like one of the following:
272 *
273 * filename mtime offset length "section" "text"
274 * filename#anchor offset length "text"
275 * SP count word
276 */
277
278 if (line[0] == ' ')
279 {
280 /*
281 * Read a word in the current node...
282 */
283
284 if (!node || (ptr = strrchr(line, ' ')) == NULL)
285 continue;
286
287 if ((word = help_add_word(node, ptr + 1)) != NULL)
288 word->count = atoi(line + 1);
289 }
290 else
291 {
292 /*
293 * Add a node...
294 */
295
296 filename = line;
297
298 if ((ptr = strchr(line, ' ')) == NULL)
299 break;
300
301 while (isspace(*ptr & 255))
302 *ptr++ = '\0';
303
304 if ((anchor = strrchr(filename, '#')) != NULL)
305 {
306 *anchor++ = '\0';
307 mtime = 0;
308 }
309 else
310 mtime = strtol(ptr, &ptr, 10);
311
312 offset = strtoll(ptr, &ptr, 10);
313 length = (size_t)strtoll(ptr, &ptr, 10);
314
315 while (isspace(*ptr & 255))
316 ptr ++;
317
318 if (!anchor)
319 {
320 /*
321 * Get section...
322 */
323
324 if (*ptr != '\"')
325 break;
326
327 ptr ++;
328 sectptr = ptr;
329
330 while (*ptr && *ptr != '\"')
331 ptr ++;
332
333 if (*ptr != '\"')
334 break;
335
336 *ptr++ = '\0';
337
338 strlcpy(section, sectptr, sizeof(section));
339
340 while (isspace(*ptr & 255))
341 ptr ++;
342 }
343
344 if (*ptr != '\"')
345 break;
346
347 ptr ++;
348 text = ptr;
349
350 while (*ptr && *ptr != '\"')
351 ptr ++;
352
353 if (*ptr != '\"')
354 break;
355
356 *ptr++ = '\0';
357
358 if ((node = help_new_node(filename, anchor, section, text,
359 mtime, offset, length)) == NULL)
360 break;
361
362 node->score = -1;
363
364 cupsArrayAdd(hi->nodes, node);
365 }
366 }
367 }
368
369 cupsFileClose(fp);
370 }
371
372 /*
373 * Scan for new/updated files...
374 */
375
376 update = help_load_directory(hi, directory, NULL);
377
378 /*
379 * Remove any files that are no longer installed...
380 */
381
382 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
383 node;
384 node = (help_node_t *)cupsArrayNext(hi->nodes))
385 if (node->score < 0)
386 {
387 /*
388 * Delete this node...
389 */
390
391 cupsArrayRemove(hi->nodes, node);
392 help_delete_node(node);
393 }
394
395 /*
396 * Add nodes to the sorted array...
397 */
398
399 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
400 node;
401 node = (help_node_t *)cupsArrayNext(hi->nodes))
402 cupsArrayAdd(hi->sorted, node);
403
404 /*
405 * Save the index if we updated it...
406 */
407
408 if (update)
409 helpSaveIndex(hi, hifile);
410
411 /*
412 * Return the index...
413 */
414
415 return (hi);
416 }
417
418
419 /*
420 * 'helpSaveIndex()' - Save a help index to disk.
421 */
422
423 int /* O - 0 on success, -1 on error */
424 helpSaveIndex(help_index_t *hi, /* I - Index */
425 const char *hifile) /* I - Index filename */
426 {
427 cups_file_t *fp; /* Index file */
428 help_node_t *node; /* Current node */
429 help_word_t *word; /* Current word */
430
431
432 /*
433 * Try creating a new index file...
434 */
435
436 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
437 return (-1);
438
439 /*
440 * Lock the file while we write it...
441 */
442
443 cupsFileLock(fp, 1);
444
445 cupsFilePuts(fp, "HELPV2\n");
446
447 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
448 node;
449 node = (help_node_t *)cupsArrayNext(hi->nodes))
450 {
451 /*
452 * Write the current node with/without the anchor...
453 */
454
455 if (node->anchor)
456 {
457 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
458 node->filename, node->anchor,
459 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
460 node->text) < 0)
461 break;
462 }
463 else
464 {
465 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
466 node->filename, (int)node->mtime,
467 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
468 node->section ? node->section : "", node->text) < 0)
469 break;
470 }
471
472 /*
473 * Then write the words associated with the node...
474 */
475
476 for (word = (help_word_t *)cupsArrayFirst(node->words);
477 word;
478 word = (help_word_t *)cupsArrayNext(node->words))
479 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
480 break;
481 }
482
483 cupsFileFlush(fp);
484
485 if (cupsFileClose(fp) < 0)
486 return (-1);
487 else if (node)
488 return (-1);
489 else
490 return (0);
491 }
492
493
494 /*
495 * 'helpSearchIndex()' - Search an index.
496 */
497
498 help_index_t * /* O - Search index */
499 helpSearchIndex(help_index_t *hi, /* I - Index */
500 const char *query, /* I - Query string */
501 const char *section, /* I - Limit search to this section */
502 const char *filename) /* I - Limit search to this file */
503 {
504 help_index_t *search; /* Search index */
505 help_node_t *node; /* Current node */
506 help_word_t *word; /* Current word */
507 void *sc; /* Search context */
508 int matches; /* Number of matches */
509
510
511 /*
512 * Range check...
513 */
514
515 if (!hi || !query)
516 return (NULL);
517
518 /*
519 * Reset the scores of all nodes to 0...
520 */
521
522 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
523 node;
524 node = (help_node_t *)cupsArrayNext(hi->nodes))
525 node->score = 0;
526
527 /*
528 * Find the first node to search in...
529 */
530
531 if (filename)
532 {
533 node = helpFindNode(hi, filename, NULL);
534 if (!node)
535 return (NULL);
536 }
537 else
538 node = (help_node_t *)cupsArrayFirst(hi->nodes);
539
540 /*
541 * Convert the query into a regular expression...
542 */
543
544 sc = cgiCompileSearch(query);
545 if (!sc)
546 return (NULL);
547
548 /*
549 * Allocate a search index...
550 */
551
552 search = calloc(1, sizeof(help_index_t));
553 if (!search)
554 {
555 cgiFreeSearch(sc);
556 return (NULL);
557 }
558
559 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
560 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
561
562 if (!search->nodes || !search->sorted)
563 {
564 cupsArrayDelete(search->nodes);
565 cupsArrayDelete(search->sorted);
566 free(search);
567 cgiFreeSearch(sc);
568 return (NULL);
569 }
570
571 search->search = 1;
572
573 /*
574 * Check each node in the index, adding matching nodes to the
575 * search index...
576 */
577
578 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
579 if (section && strcmp(node->section, section))
580 continue;
581 else if (filename && strcmp(node->filename, filename))
582 continue;
583 else
584 {
585 matches = cgiDoSearch(sc, node->text);
586
587 for (word = (help_word_t *)cupsArrayFirst(node->words);
588 word;
589 word = (help_word_t *)cupsArrayNext(node->words))
590 if (cgiDoSearch(sc, word->text) > 0)
591 matches += word->count;
592
593 if (matches > 0)
594 {
595 /*
596 * Found a match, add the node to the search index...
597 */
598
599 node->score = matches;
600
601 cupsArrayAdd(search->nodes, node);
602 cupsArrayAdd(search->sorted, node);
603 }
604 }
605
606 /*
607 * Free the search context...
608 */
609
610 cgiFreeSearch(sc);
611
612 /*
613 * Return the results...
614 */
615
616 return (search);
617 }
618
619
620 /*
621 * 'help_add_word()' - Add a word to a node.
622 */
623
624 static help_word_t * /* O - New word */
625 help_add_word(help_node_t *n, /* I - Node */
626 const char *text) /* I - Word text */
627 {
628 help_word_t *w, /* New word */
629 key; /* Search key */
630
631
632 /*
633 * Create the words array as needed...
634 */
635
636 if (!n->words)
637 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
638
639 /*
640 * See if the word is already added...
641 */
642
643 key.text = (char *)text;
644
645 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
646 {
647 /*
648 * Create a new word...
649 */
650
651 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
652 return (NULL);
653
654 if ((w->text = strdup(text)) == NULL)
655 {
656 free(w);
657 return (NULL);
658 }
659
660 cupsArrayAdd(n->words, w);
661 }
662
663 /*
664 * Bump the counter for this word and return it...
665 */
666
667 w->count ++;
668
669 return (w);
670 }
671
672
673 /*
674 * 'help_delete_node()' - Free all memory used by a node.
675 */
676
677 static void
678 help_delete_node(help_node_t *n) /* I - Node */
679 {
680 help_word_t *w; /* Current word */
681
682
683 if (!n)
684 return;
685
686 if (n->filename)
687 free(n->filename);
688
689 if (n->anchor)
690 free(n->anchor);
691
692 if (n->section)
693 free(n->section);
694
695 if (n->text)
696 free(n->text);
697
698 for (w = (help_word_t *)cupsArrayFirst(n->words);
699 w;
700 w = (help_word_t *)cupsArrayNext(n->words))
701 help_delete_word(w);
702
703 cupsArrayDelete(n->words);
704
705 free(n);
706 }
707
708
709 /*
710 * 'help_delete_word()' - Free all memory used by a word.
711 */
712
713 static void
714 help_delete_word(help_word_t *w) /* I - Word */
715 {
716 if (!w)
717 return;
718
719 if (w->text)
720 free(w->text);
721
722 free(w);
723 }
724
725
726 /*
727 * 'help_load_directory()' - Load a directory of files into an index.
728 */
729
730 static int /* O - 0 = success, -1 = error, 1 = updated */
731 help_load_directory(
732 help_index_t *hi, /* I - Index */
733 const char *directory, /* I - Directory */
734 const char *relative) /* I - Relative path */
735 {
736 cups_dir_t *dir; /* Directory file */
737 cups_dentry_t *dent; /* Directory entry */
738 char *ext, /* Pointer to extension */
739 filename[1024], /* Full filename */
740 relname[1024]; /* Relative filename */
741 int update; /* Updated? */
742 help_node_t *node; /* Current node */
743
744
745 /*
746 * Open the directory and scan it...
747 */
748
749 if ((dir = cupsDirOpen(directory)) == NULL)
750 return (0);
751
752 update = 0;
753
754 while ((dent = cupsDirRead(dir)) != NULL)
755 {
756 /*
757 * Skip "." files...
758 */
759
760 if (dent->filename[0] == '.')
761 continue;
762
763 /*
764 * Get absolute and relative filenames...
765 */
766
767 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
768 if (relative)
769 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
770 else
771 strlcpy(relname, dent->filename, sizeof(relname));
772
773 /*
774 * Check if we have a HTML file...
775 */
776
777 if ((ext = strstr(dent->filename, ".html")) != NULL &&
778 (!ext[5] || !strcmp(ext + 5, ".gz")))
779 {
780 /*
781 * HTML file, see if we have already indexed the file...
782 */
783
784 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
785 {
786 /*
787 * File already indexed - check dates to confirm that the
788 * index is up-to-date...
789 */
790
791 if (node->mtime == dent->fileinfo.st_mtime)
792 {
793 /*
794 * Same modification time, so mark all of the nodes
795 * for this file as up-to-date...
796 */
797
798 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
799 if (!strcmp(node->filename, relname))
800 node->score = 0;
801 else
802 break;
803
804 continue;
805 }
806 }
807
808 update = 1;
809
810 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
811 }
812 else if (S_ISDIR(dent->fileinfo.st_mode))
813 {
814 /*
815 * Process sub-directory...
816 */
817
818 if (help_load_directory(hi, filename, relname) == 1)
819 update = 1;
820 }
821 }
822
823 cupsDirClose(dir);
824
825 return (update);
826 }
827
828
829 /*
830 * 'help_load_file()' - Load a HTML files into an index.
831 */
832
833 static int /* O - 0 = success, -1 = error */
834 help_load_file(
835 help_index_t *hi, /* I - Index */
836 const char *filename, /* I - Filename */
837 const char *relative, /* I - Relative path */
838 time_t mtime) /* I - Modification time */
839 {
840 cups_file_t *fp; /* HTML file */
841 help_node_t *node; /* Current node */
842 char line[1024], /* Line from file */
843 temp[1024], /* Temporary word */
844 section[1024], /* Section */
845 *ptr, /* Pointer into line */
846 *anchor, /* Anchor name */
847 *text; /* Text for anchor */
848 off_t offset; /* File offset */
849 char quote; /* Quote character */
850 help_word_t *word; /* Current word */
851 int wordlen; /* Length of word */
852
853
854 if ((fp = cupsFileOpen(filename, "r")) == NULL)
855 return (-1);
856
857 node = NULL;
858 offset = 0;
859
860 strlcpy(section, "Other", sizeof(section));
861
862 while (cupsFileGets(fp, line, sizeof(line)))
863 {
864 /*
865 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
866 */
867
868 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
869 {
870 /*
871 * Got section line, copy it!
872 */
873
874 for (ptr += 13; isspace(*ptr & 255); ptr ++);
875
876 strlcpy(section, ptr, sizeof(section));
877 if ((ptr = strstr(section, "-->")) != NULL)
878 {
879 /*
880 * Strip comment stuff from end of line...
881 */
882
883 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
884
885 if (isspace(*ptr & 255))
886 *ptr = '\0';
887 }
888 continue;
889 }
890
891 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
892 {
893 ptr ++;
894
895 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
896 {
897 /*
898 * Found the title...
899 */
900
901 anchor = NULL;
902 ptr += 6;
903 }
904 else
905 {
906 char *idptr; /* Pointer to ID */
907
908 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
909 ptr += 7;
910 else if ((idptr = strstr(ptr, " ID=")) != NULL)
911 ptr = idptr + 4;
912 else if ((idptr = strstr(ptr, " id=")) != NULL)
913 ptr = idptr + 4;
914 else
915 continue;
916
917 /*
918 * Found an anchor...
919 */
920
921 if (*ptr == '\"' || *ptr == '\'')
922 {
923 /*
924 * Get quoted anchor...
925 */
926
927 quote = *ptr;
928 anchor = ptr + 1;
929 if ((ptr = strchr(anchor, quote)) != NULL)
930 *ptr++ = '\0';
931 else
932 break;
933 }
934 else
935 {
936 /*
937 * Get unquoted anchor...
938 */
939
940 anchor = ptr + 1;
941
942 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
943
944 if (*ptr != '>')
945 *ptr++ = '\0';
946 else
947 break;
948 }
949
950 /*
951 * Got the anchor, now lets find the end...
952 */
953
954 while (*ptr && *ptr != '>')
955 ptr ++;
956
957 if (*ptr != '>')
958 break;
959
960 *ptr++ = '\0';
961 }
962
963 /*
964 * Now collect text for the link...
965 */
966
967 text = ptr;
968 while ((ptr = strchr(text, '<')) == NULL)
969 {
970 ptr = text + strlen(text);
971 if (ptr >= (line + sizeof(line) - 2))
972 break;
973
974 *ptr++ = ' ';
975
976 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
977 break;
978 }
979
980 *ptr = '\0';
981
982 if (node)
983 node->length = (size_t)(offset - node->offset);
984
985 if (!*text)
986 {
987 node = NULL;
988 break;
989 }
990
991 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
992 {
993 /*
994 * Node already in the index, so replace the text and other
995 * data...
996 */
997
998 cupsArrayRemove(hi->nodes, node);
999
1000 if (node->section)
1001 free(node->section);
1002
1003 if (node->text)
1004 free(node->text);
1005
1006 if (node->words)
1007 {
1008 for (word = (help_word_t *)cupsArrayFirst(node->words);
1009 word;
1010 word = (help_word_t *)cupsArrayNext(node->words))
1011 help_delete_word(word);
1012
1013 cupsArrayDelete(node->words);
1014 node->words = NULL;
1015 }
1016
1017 node->section = section[0] ? strdup(section) : NULL;
1018 node->text = strdup(text);
1019 node->mtime = mtime;
1020 node->offset = offset;
1021 node->score = 0;
1022 }
1023 else
1024 {
1025 /*
1026 * New node...
1027 */
1028
1029 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1030 }
1031
1032 /*
1033 * Go through the text value and replace tabs and newlines with
1034 * whitespace and eliminate extra whitespace...
1035 */
1036
1037 for (ptr = node->text, text = node->text; *ptr;)
1038 if (isspace(*ptr & 255))
1039 {
1040 while (isspace(*ptr & 255))
1041 ptr ++;
1042
1043 *text++ = ' ';
1044 }
1045 else if (text != ptr)
1046 *text++ = *ptr++;
1047 else
1048 {
1049 text ++;
1050 ptr ++;
1051 }
1052
1053 *text = '\0';
1054
1055 /*
1056 * (Re)add the node to the array...
1057 */
1058
1059 cupsArrayAdd(hi->nodes, node);
1060
1061 if (!anchor)
1062 node = NULL;
1063 break;
1064 }
1065
1066 if (node)
1067 {
1068 /*
1069 * Scan this line for words...
1070 */
1071
1072 for (ptr = line; *ptr; ptr ++)
1073 {
1074 /*
1075 * Skip HTML stuff...
1076 */
1077
1078 if (*ptr == '<')
1079 {
1080 if (!strncmp(ptr, "<!--", 4))
1081 {
1082 /*
1083 * Skip HTML comment...
1084 */
1085
1086 if ((text = strstr(ptr + 4, "-->")) == NULL)
1087 ptr += strlen(ptr) - 1;
1088 else
1089 ptr = text + 2;
1090 }
1091 else
1092 {
1093 /*
1094 * Skip HTML element...
1095 */
1096
1097 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1098 {
1099 if (*ptr == '\"' || *ptr == '\'')
1100 {
1101 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1102
1103 if (!*ptr)
1104 ptr --;
1105 }
1106 }
1107
1108 if (!*ptr)
1109 ptr --;
1110 }
1111
1112 continue;
1113 }
1114 else if (*ptr == '&')
1115 {
1116 /*
1117 * Skip HTML entity...
1118 */
1119
1120 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1121
1122 if (!*ptr)
1123 ptr --;
1124
1125 continue;
1126 }
1127 else if (!isalnum(*ptr & 255))
1128 continue;
1129
1130 /*
1131 * Found the start of a word, search until we find the end...
1132 */
1133
1134 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1135
1136 wordlen = (int)(ptr - text);
1137
1138 memcpy(temp, text, (size_t)wordlen);
1139 temp[wordlen] = '\0';
1140
1141 ptr --;
1142
1143 if (wordlen > 1 && !bsearch(temp, help_common_words,
1144 (sizeof(help_common_words) /
1145 sizeof(help_common_words[0])),
1146 sizeof(help_common_words[0]),
1147 (int (*)(const void *, const void *))
1148 _cups_strcasecmp))
1149 help_add_word(node, temp);
1150 }
1151 }
1152
1153 /*
1154 * Get the offset of the next line...
1155 */
1156
1157 offset = cupsFileTell(fp);
1158 }
1159
1160 cupsFileClose(fp);
1161
1162 if (node)
1163 node->length = (size_t)(offset - node->offset);
1164
1165 return (0);
1166 }
1167
1168
1169 /*
1170 * 'help_new_node()' - Create a new node and add it to an index.
1171 */
1172
1173 static help_node_t * /* O - Node pointer or NULL on error */
1174 help_new_node(const char *filename, /* I - Filename */
1175 const char *anchor, /* I - Anchor */
1176 const char *section, /* I - Section */
1177 const char *text, /* I - Text */
1178 time_t mtime, /* I - Modification time */
1179 off_t offset, /* I - Offset in file */
1180 size_t length) /* I - Length in bytes */
1181 {
1182 help_node_t *n; /* Node */
1183
1184
1185 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1186 if (!n)
1187 return (NULL);
1188
1189 n->filename = strdup(filename);
1190 n->anchor = anchor ? strdup(anchor) : NULL;
1191 n->section = *section ? strdup(section) : NULL;
1192 n->text = strdup(text);
1193 n->mtime = mtime;
1194 n->offset = offset;
1195 n->length = length;
1196
1197 return (n);
1198 }
1199
1200
1201 /*
1202 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1203 */
1204
1205 static int /* O - Difference */
1206 help_sort_by_name(help_node_t *n1, /* I - First node */
1207 help_node_t *n2) /* I - Second node */
1208 {
1209 int diff; /* Difference */
1210
1211
1212 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1213 return (diff);
1214
1215 if (!n1->anchor && !n2->anchor)
1216 return (0);
1217 else if (!n1->anchor)
1218 return (-1);
1219 else if (!n2->anchor)
1220 return (1);
1221 else
1222 return (strcmp(n1->anchor, n2->anchor));
1223 }
1224
1225
1226 /*
1227 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1228 */
1229
1230 static int /* O - Difference */
1231 help_sort_by_score(help_node_t *n1, /* I - First node */
1232 help_node_t *n2) /* I - Second node */
1233 {
1234 int diff; /* Difference */
1235
1236
1237 if (n1->score != n2->score)
1238 return (n2->score - n1->score);
1239
1240 if (n1->section && !n2->section)
1241 return (1);
1242 else if (!n1->section && n2->section)
1243 return (-1);
1244 else if (n1->section && n2->section &&
1245 (diff = strcmp(n1->section, n2->section)) != 0)
1246 return (diff);
1247
1248 return (_cups_strcasecmp(n1->text, n2->text));
1249 }
1250
1251
1252 /*
1253 * 'help_sort_words()' - Sort words alphabetically.
1254 */
1255
1256 static int /* O - Difference */
1257 help_sort_words(help_word_t *w1, /* I - Second word */
1258 help_word_t *w2) /* I - Second word */
1259 {
1260 return (_cups_strcasecmp(w1->text, w2->text));
1261 }