]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
9dbc23cb7bae7a6c2c1dd40a1aa97fab8caf64d7
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright © 2020-2024 by OpenPrinting.
5 * Copyright © 2007-2019 by Apple Inc.
6 * Copyright © 1997-2007 by Easy Software Products.
7 *
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more
9 * information.
10 */
11
12 /*
13 * Include necessary headers...
14 */
15
16 #include "cgi-private.h"
17 #include <cups/dir.h>
18
19
20 /*
21 * List of common English words that should not be indexed...
22 */
23
24 static char help_common_words[][6] =
25 {
26 "about",
27 "all",
28 "an",
29 "and",
30 "are",
31 "as",
32 "at",
33 "be",
34 "been",
35 "but",
36 "by",
37 "call",
38 "can",
39 "come",
40 "could",
41 "day",
42 "did",
43 "do",
44 "down",
45 "each",
46 "find",
47 "first",
48 "for",
49 "from",
50 "go",
51 "had",
52 "has",
53 "have",
54 "he",
55 "her",
56 "him",
57 "his",
58 "hot",
59 "how",
60 "if",
61 "in",
62 "is",
63 "it",
64 "know",
65 "like",
66 "long",
67 "look",
68 "make",
69 "many",
70 "may",
71 "more",
72 "most",
73 "my",
74 "no",
75 "now",
76 "of",
77 "on",
78 "one",
79 "or",
80 "other",
81 "out",
82 "over",
83 "said",
84 "see",
85 "she",
86 "side",
87 "so",
88 "some",
89 "sound",
90 "than",
91 "that",
92 "the",
93 "their",
94 "them",
95 "then",
96 "there",
97 "these",
98 "they",
99 "thing",
100 "this",
101 "time",
102 "to",
103 "two",
104 "up",
105 "use",
106 "was",
107 "water",
108 "way",
109 "we",
110 "were",
111 "what",
112 "when",
113 "which",
114 "who",
115 "will",
116 "with",
117 "word",
118 "would",
119 "write",
120 "you",
121 "your"
122 };
123
124
125 /*
126 * Local functions...
127 */
128
129 static help_word_t *help_add_word(help_node_t *n, const char *text);
130 static void help_delete_node(help_node_t *n);
131 static void help_delete_word(help_word_t *w);
132 static int help_load_directory(help_index_t *hi,
133 const char *directory,
134 const char *relative);
135 static int help_load_file(help_index_t *hi,
136 const char *filename,
137 const char *relative,
138 time_t mtime);
139 static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
140 static int help_sort_by_name(help_node_t *p1, help_node_t *p2, void *data);
141 static int help_sort_by_score(help_node_t *p1, help_node_t *p2, void *data);
142 static int help_sort_words(help_word_t *w1, help_word_t *w2, void *data);
143
144
145 /*
146 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
147 */
148
149 void
150 helpDeleteIndex(help_index_t *hi) /* I - Help index */
151 {
152 help_node_t *node; /* Current node */
153
154
155 if (!hi)
156 return;
157
158 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159 node;
160 node = (help_node_t *)cupsArrayNext(hi->nodes))
161 {
162 if (!hi->search)
163 help_delete_node(node);
164 }
165
166 cupsArrayDelete(hi->nodes);
167 cupsArrayDelete(hi->sorted);
168
169 free(hi);
170 }
171
172
173 /*
174 * 'helpFindNode()' - Find a node in an index.
175 */
176
177 help_node_t * /* O - Node pointer or NULL */
178 helpFindNode(help_index_t *hi, /* I - Index */
179 const char *filename, /* I - Filename */
180 const char *anchor) /* I - Anchor */
181 {
182 help_node_t key; /* Search key */
183
184
185 /*
186 * Range check input...
187 */
188
189 if (!hi || !filename)
190 return (NULL);
191
192 /*
193 * Initialize the search key...
194 */
195
196 key.filename = (char *)filename;
197 key.anchor = (char *)anchor;
198
199 /*
200 * Return any match...
201 */
202
203 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
204 }
205
206
207 /*
208 * 'helpLoadIndex()' - Load a help index from disk.
209 */
210
211 help_index_t * /* O - Index pointer or NULL */
212 helpLoadIndex(const char *hifile, /* I - Index filename */
213 const char *directory) /* I - Directory that is indexed */
214 {
215 help_index_t *hi; /* Help index */
216 cups_file_t *fp; /* Current file */
217 char line[2048], /* Line from file */
218 *ptr, /* Pointer into line */
219 *filename, /* Filename in line */
220 *anchor, /* Anchor in line */
221 *sectptr, /* Section pointer in line */
222 section[1024], /* Section name */
223 *text; /* Text in line */
224 time_t mtime; /* Modification time */
225 off_t offset; /* Offset into file */
226 size_t length; /* Length in bytes */
227 int update; /* Update? */
228 help_node_t *node; /* Current node */
229 help_word_t *word; /* Current word */
230
231
232 /*
233 * Create a new, empty index.
234 */
235
236 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
237 return (NULL);
238
239 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
240 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
241
242 if (!hi->nodes || !hi->sorted)
243 {
244 cupsArrayDelete(hi->nodes);
245 cupsArrayDelete(hi->sorted);
246 free(hi);
247 return (NULL);
248 }
249
250 /*
251 * Try loading the existing index file...
252 */
253
254 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
255 {
256 /*
257 * Lock the file and then read the first line...
258 */
259
260 cupsFileLock(fp, 1);
261
262 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
263 {
264 /*
265 * Got a valid header line, now read the data lines...
266 */
267
268 node = NULL;
269
270 while (cupsFileGets(fp, line, sizeof(line)))
271 {
272 /*
273 * Each line looks like one of the following:
274 *
275 * filename mtime offset length "section" "text"
276 * filename#anchor offset length "text"
277 * SP count word
278 */
279
280 if (line[0] == ' ')
281 {
282 /*
283 * Read a word in the current node...
284 */
285
286 if (!node || (ptr = strrchr(line, ' ')) == NULL)
287 continue;
288
289 if ((word = help_add_word(node, ptr + 1)) != NULL)
290 word->count = atoi(line + 1);
291 }
292 else
293 {
294 /*
295 * Add a node...
296 */
297
298 filename = line;
299
300 if ((ptr = strchr(line, ' ')) == NULL)
301 break;
302
303 while (isspace(*ptr & 255))
304 *ptr++ = '\0';
305
306 if ((anchor = strrchr(filename, '#')) != NULL)
307 {
308 *anchor++ = '\0';
309 mtime = 0;
310 }
311 else
312 mtime = strtol(ptr, &ptr, 10);
313
314 offset = strtoll(ptr, &ptr, 10);
315 length = (size_t)strtoll(ptr, &ptr, 10);
316
317 while (isspace(*ptr & 255))
318 ptr ++;
319
320 if (!anchor)
321 {
322 /*
323 * Get section...
324 */
325
326 if (*ptr != '\"')
327 break;
328
329 ptr ++;
330 sectptr = ptr;
331
332 while (*ptr && *ptr != '\"')
333 ptr ++;
334
335 if (*ptr != '\"')
336 break;
337
338 *ptr++ = '\0';
339
340 cupsCopyString(section, sectptr, sizeof(section));
341
342 while (isspace(*ptr & 255))
343 ptr ++;
344 }
345 else
346 section[0] = '\0';
347
348 if (*ptr != '\"')
349 break;
350
351 ptr ++;
352 text = ptr;
353
354 while (*ptr && *ptr != '\"')
355 ptr ++;
356
357 if (*ptr != '\"')
358 break;
359
360 *ptr++ = '\0';
361
362 if ((node = help_new_node(filename, anchor, section, text,
363 mtime, offset, length)) == NULL)
364 break;
365
366 node->score = -1;
367
368 cupsArrayAdd(hi->nodes, node);
369 }
370 }
371 }
372
373 cupsFileClose(fp);
374 }
375
376 /*
377 * Scan for new/updated files...
378 */
379
380 update = help_load_directory(hi, directory, NULL);
381
382 /*
383 * Remove any files that are no longer installed...
384 */
385
386 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
387 node;
388 node = (help_node_t *)cupsArrayNext(hi->nodes))
389 if (node->score < 0)
390 {
391 /*
392 * Delete this node...
393 */
394
395 cupsArrayRemove(hi->nodes, node);
396 help_delete_node(node);
397 }
398
399 /*
400 * Add nodes to the sorted array...
401 */
402
403 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
404 node;
405 node = (help_node_t *)cupsArrayNext(hi->nodes))
406 cupsArrayAdd(hi->sorted, node);
407
408 /*
409 * Save the index if we updated it...
410 */
411
412 if (update)
413 helpSaveIndex(hi, hifile);
414
415 /*
416 * Return the index...
417 */
418
419 return (hi);
420 }
421
422
423 /*
424 * 'helpSaveIndex()' - Save a help index to disk.
425 */
426
427 int /* O - 0 on success, -1 on error */
428 helpSaveIndex(help_index_t *hi, /* I - Index */
429 const char *hifile) /* I - Index filename */
430 {
431 cups_file_t *fp; /* Index file */
432 help_node_t *node; /* Current node */
433 help_word_t *word; /* Current word */
434
435
436 /*
437 * Try creating a new index file...
438 */
439
440 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
441 return (-1);
442
443 /*
444 * Lock the file while we write it...
445 */
446
447 cupsFileLock(fp, 1);
448
449 cupsFilePuts(fp, "HELPV2\n");
450
451 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
452 node;
453 node = (help_node_t *)cupsArrayNext(hi->nodes))
454 {
455 /*
456 * Write the current node with/without the anchor...
457 */
458
459 if (node->anchor)
460 {
461 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
462 node->filename, node->anchor,
463 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
464 node->text) < 0)
465 break;
466 }
467 else
468 {
469 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
470 node->filename, (int)node->mtime,
471 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
472 node->section ? node->section : "", node->text) < 0)
473 break;
474 }
475
476 /*
477 * Then write the words associated with the node...
478 */
479
480 for (word = (help_word_t *)cupsArrayFirst(node->words);
481 word;
482 word = (help_word_t *)cupsArrayNext(node->words))
483 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
484 break;
485 }
486
487 cupsFileFlush(fp);
488
489 if (cupsFileClose(fp) < 0)
490 return (-1);
491 else if (node)
492 return (-1);
493 else
494 return (0);
495 }
496
497
498 /*
499 * 'helpSearchIndex()' - Search an index.
500 */
501
502 help_index_t * /* O - Search index */
503 helpSearchIndex(help_index_t *hi, /* I - Index */
504 const char *query, /* I - Query string */
505 const char *section, /* I - Limit search to this section */
506 const char *filename) /* I - Limit search to this file */
507 {
508 help_index_t *search; /* Search index */
509 help_node_t *node; /* Current node */
510 help_word_t *word; /* Current word */
511 void *sc; /* Search context */
512 int matches; /* Number of matches */
513
514
515 /*
516 * Range check...
517 */
518
519 if (!hi || !query)
520 return (NULL);
521
522 /*
523 * Reset the scores of all nodes to 0...
524 */
525
526 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
527 node;
528 node = (help_node_t *)cupsArrayNext(hi->nodes))
529 node->score = 0;
530
531 /*
532 * Find the first node to search in...
533 */
534
535 if (filename)
536 {
537 node = helpFindNode(hi, filename, NULL);
538 if (!node)
539 return (NULL);
540 }
541 else
542 node = (help_node_t *)cupsArrayFirst(hi->nodes);
543
544 /*
545 * Convert the query into a regular expression...
546 */
547
548 sc = cgiCompileSearch(query);
549 if (!sc)
550 return (NULL);
551
552 /*
553 * Allocate a search index...
554 */
555
556 search = calloc(1, sizeof(help_index_t));
557 if (!search)
558 {
559 cgiFreeSearch(sc);
560 return (NULL);
561 }
562
563 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
564 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
565
566 if (!search->nodes || !search->sorted)
567 {
568 cupsArrayDelete(search->nodes);
569 cupsArrayDelete(search->sorted);
570 free(search);
571 cgiFreeSearch(sc);
572 return (NULL);
573 }
574
575 search->search = 1;
576
577 /*
578 * Check each node in the index, adding matching nodes to the
579 * search index...
580 */
581
582 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
583 if (node->section && section && strcmp(node->section, section))
584 continue;
585 else if (filename && strcmp(node->filename, filename))
586 continue;
587 else
588 {
589 matches = cgiDoSearch(sc, node->text);
590
591 for (word = (help_word_t *)cupsArrayFirst(node->words);
592 word;
593 word = (help_word_t *)cupsArrayNext(node->words))
594 if (cgiDoSearch(sc, word->text) > 0)
595 matches += word->count;
596
597 if (matches > 0)
598 {
599 /*
600 * Found a match, add the node to the search index...
601 */
602
603 node->score = matches;
604
605 cupsArrayAdd(search->nodes, node);
606 cupsArrayAdd(search->sorted, node);
607 }
608 }
609
610 /*
611 * Free the search context...
612 */
613
614 cgiFreeSearch(sc);
615
616 /*
617 * Return the results...
618 */
619
620 return (search);
621 }
622
623
624 /*
625 * 'help_add_word()' - Add a word to a node.
626 */
627
628 static help_word_t * /* O - New word */
629 help_add_word(help_node_t *n, /* I - Node */
630 const char *text) /* I - Word text */
631 {
632 help_word_t *w, /* New word */
633 key; /* Search key */
634
635
636 /*
637 * Create the words array as needed...
638 */
639
640 if (!n->words)
641 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
642
643 /*
644 * See if the word is already added...
645 */
646
647 key.text = (char *)text;
648
649 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
650 {
651 /*
652 * Create a new word...
653 */
654
655 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
656 return (NULL);
657
658 if ((w->text = strdup(text)) == NULL)
659 {
660 free(w);
661 return (NULL);
662 }
663
664 cupsArrayAdd(n->words, w);
665 }
666
667 /*
668 * Bump the counter for this word and return it...
669 */
670
671 w->count ++;
672
673 return (w);
674 }
675
676
677 /*
678 * 'help_delete_node()' - Free all memory used by a node.
679 */
680
681 static void
682 help_delete_node(help_node_t *n) /* I - Node */
683 {
684 help_word_t *w; /* Current word */
685
686
687 if (!n)
688 return;
689
690 if (n->filename)
691 free(n->filename);
692
693 if (n->anchor)
694 free(n->anchor);
695
696 if (n->section)
697 free(n->section);
698
699 if (n->text)
700 free(n->text);
701
702 for (w = (help_word_t *)cupsArrayFirst(n->words);
703 w;
704 w = (help_word_t *)cupsArrayNext(n->words))
705 help_delete_word(w);
706
707 cupsArrayDelete(n->words);
708
709 free(n);
710 }
711
712
713 /*
714 * 'help_delete_word()' - Free all memory used by a word.
715 */
716
717 static void
718 help_delete_word(help_word_t *w) /* I - Word */
719 {
720 if (!w)
721 return;
722
723 if (w->text)
724 free(w->text);
725
726 free(w);
727 }
728
729
730 /*
731 * 'help_load_directory()' - Load a directory of files into an index.
732 */
733
734 static int /* O - 0 = success, -1 = error, 1 = updated */
735 help_load_directory(
736 help_index_t *hi, /* I - Index */
737 const char *directory, /* I - Directory */
738 const char *relative) /* I - Relative path */
739 {
740 cups_dir_t *dir; /* Directory file */
741 cups_dentry_t *dent; /* Directory entry */
742 char *ext, /* Pointer to extension */
743 filename[1024], /* Full filename */
744 relname[1024]; /* Relative filename */
745 int update; /* Updated? */
746 help_node_t *node; /* Current node */
747
748
749 /*
750 * Open the directory and scan it...
751 */
752
753 if ((dir = cupsDirOpen(directory)) == NULL)
754 return (0);
755
756 update = 0;
757
758 while ((dent = cupsDirRead(dir)) != NULL)
759 {
760 /*
761 * Skip "." files...
762 */
763
764 if (dent->filename[0] == '.')
765 continue;
766
767 /*
768 * Get absolute and relative filenames...
769 */
770
771 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
772 if (relative)
773 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
774 else
775 cupsCopyString(relname, dent->filename, sizeof(relname));
776
777 /*
778 * Check if we have a HTML file...
779 */
780
781 if ((ext = strstr(dent->filename, ".html")) != NULL &&
782 (!ext[5] || !strcmp(ext + 5, ".gz")))
783 {
784 /*
785 * HTML file, see if we have already indexed the file...
786 */
787
788 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
789 {
790 /*
791 * File already indexed - check dates to confirm that the
792 * index is up-to-date...
793 */
794
795 if (node->mtime == dent->fileinfo.st_mtime)
796 {
797 /*
798 * Same modification time, so mark all of the nodes
799 * for this file as up-to-date...
800 */
801
802 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
803 if (!strcmp(node->filename, relname))
804 node->score = 0;
805 else
806 break;
807
808 continue;
809 }
810 }
811
812 update = 1;
813
814 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
815 }
816 else if (S_ISDIR(dent->fileinfo.st_mode))
817 {
818 /*
819 * Process sub-directory...
820 */
821
822 if (help_load_directory(hi, filename, relname) == 1)
823 update = 1;
824 }
825 }
826
827 cupsDirClose(dir);
828
829 return (update);
830 }
831
832
833 /*
834 * 'help_load_file()' - Load a HTML files into an index.
835 */
836
837 static int /* O - 0 = success, -1 = error */
838 help_load_file(
839 help_index_t *hi, /* I - Index */
840 const char *filename, /* I - Filename */
841 const char *relative, /* I - Relative path */
842 time_t mtime) /* I - Modification time */
843 {
844 cups_file_t *fp; /* HTML file */
845 help_node_t *node; /* Current node */
846 char line[1024], /* Line from file */
847 temp[1024], /* Temporary word */
848 section[1024], /* Section */
849 *ptr, /* Pointer into line */
850 *anchor, /* Anchor name */
851 *text; /* Text for anchor */
852 off_t offset; /* File offset */
853 char quote; /* Quote character */
854 help_word_t *word; /* Current word */
855 size_t wordlen; /* Length of word */
856
857
858 if ((fp = cupsFileOpen(filename, "r")) == NULL)
859 return (-1);
860
861 node = NULL;
862 offset = 0;
863
864 if (strstr(filename, "/man-") != NULL)
865 cupsCopyString(section, "Man Pages", sizeof(section));
866 else
867 cupsCopyString(section, "Other", sizeof(section));
868
869 while (cupsFileGets(fp, line, sizeof(line)))
870 {
871 /*
872 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
873 */
874
875 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
876 {
877 /*
878 * Got section line, copy it!
879 */
880
881 for (ptr += 13; isspace(*ptr & 255); ptr ++);
882
883 cupsCopyString(section, ptr, sizeof(section));
884 if ((ptr = strstr(section, "-->")) != NULL)
885 {
886 /*
887 * Strip comment stuff from end of line...
888 */
889
890 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
891
892 if (isspace(*ptr & 255))
893 *ptr = '\0';
894 }
895 continue;
896 }
897
898 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
899 {
900 ptr ++;
901
902 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
903 {
904 /*
905 * Found the title...
906 */
907
908 anchor = NULL;
909 ptr += 6;
910 }
911 else
912 {
913 char *idptr; /* Pointer to ID */
914
915 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
916 ptr += 7;
917 else if ((idptr = strstr(ptr, " ID=")) != NULL)
918 ptr = idptr + 4;
919 else if ((idptr = strstr(ptr, " id=")) != NULL)
920 ptr = idptr + 4;
921 else
922 continue;
923
924 /*
925 * Found an anchor...
926 */
927
928 if (*ptr == '\"' || *ptr == '\'')
929 {
930 /*
931 * Get quoted anchor...
932 */
933
934 quote = *ptr;
935 anchor = ptr + 1;
936 if ((ptr = strchr(anchor, quote)) != NULL)
937 *ptr++ = '\0';
938 else
939 break;
940 }
941 else
942 {
943 /*
944 * Get unquoted anchor...
945 */
946
947 anchor = ptr + 1;
948
949 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
950
951 if (*ptr != '>')
952 *ptr++ = '\0';
953 else
954 break;
955 }
956
957 /*
958 * Got the anchor, now lets find the end...
959 */
960
961 while (*ptr && *ptr != '>')
962 ptr ++;
963
964 if (*ptr != '>')
965 break;
966
967 *ptr++ = '\0';
968 }
969
970 /*
971 * Now collect text for the link...
972 */
973
974 text = ptr;
975 while ((ptr = strchr(text, '<')) == NULL)
976 {
977 ptr = text + strlen(text);
978 if (ptr >= (line + sizeof(line) - 2))
979 break;
980
981 *ptr++ = ' ';
982
983 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
984 break;
985 }
986
987 *ptr = '\0';
988
989 if (node)
990 node->length = (size_t)(offset - node->offset);
991
992 if (!*text)
993 {
994 node = NULL;
995 break;
996 }
997
998 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
999 {
1000 /*
1001 * Node already in the index, so replace the text and other
1002 * data...
1003 */
1004
1005 cupsArrayRemove(hi->nodes, node);
1006
1007 if (node->section)
1008 free(node->section);
1009
1010 if (node->text)
1011 free(node->text);
1012
1013 if (node->words)
1014 {
1015 for (word = (help_word_t *)cupsArrayFirst(node->words);
1016 word;
1017 word = (help_word_t *)cupsArrayNext(node->words))
1018 help_delete_word(word);
1019
1020 cupsArrayDelete(node->words);
1021 node->words = NULL;
1022 }
1023
1024 node->section = section[0] ? strdup(section) : NULL;
1025 node->text = strdup(text);
1026 node->mtime = mtime;
1027 node->offset = offset;
1028 node->score = 0;
1029 }
1030 else
1031 {
1032 /*
1033 * New node...
1034 */
1035
1036 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1037 }
1038
1039 /*
1040 * Go through the text value and replace tabs and newlines with
1041 * whitespace and eliminate extra whitespace...
1042 */
1043
1044 for (ptr = node->text, text = node->text; *ptr;)
1045 if (isspace(*ptr & 255))
1046 {
1047 while (isspace(*ptr & 255))
1048 ptr ++;
1049
1050 *text++ = ' ';
1051 }
1052 else if (text != ptr)
1053 *text++ = *ptr++;
1054 else
1055 {
1056 text ++;
1057 ptr ++;
1058 }
1059
1060 *text = '\0';
1061
1062 /*
1063 * (Re)add the node to the array...
1064 */
1065
1066 cupsArrayAdd(hi->nodes, node);
1067
1068 if (!anchor)
1069 node = NULL;
1070 break;
1071 }
1072
1073 if (node)
1074 {
1075 /*
1076 * Scan this line for words...
1077 */
1078
1079 for (ptr = line; *ptr; ptr ++)
1080 {
1081 /*
1082 * Skip HTML stuff...
1083 */
1084
1085 if (*ptr == '<')
1086 {
1087 if (!strncmp(ptr, "<!--", 4))
1088 {
1089 /*
1090 * Skip HTML comment...
1091 */
1092
1093 if ((text = strstr(ptr + 4, "-->")) == NULL)
1094 ptr += strlen(ptr) - 1;
1095 else
1096 ptr = text + 2;
1097 }
1098 else
1099 {
1100 /*
1101 * Skip HTML element...
1102 */
1103
1104 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1105 {
1106 if (*ptr == '\"' || *ptr == '\'')
1107 {
1108 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1109
1110 if (!*ptr)
1111 ptr --;
1112 }
1113 }
1114
1115 if (!*ptr)
1116 ptr --;
1117 }
1118
1119 continue;
1120 }
1121 else if (*ptr == '&')
1122 {
1123 /*
1124 * Skip HTML entity...
1125 */
1126
1127 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1128
1129 if (!*ptr)
1130 ptr --;
1131
1132 continue;
1133 }
1134 else if (!isalnum(*ptr & 255))
1135 continue;
1136
1137 /*
1138 * Found the start of a word, search until we find the end...
1139 */
1140
1141 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1142
1143 wordlen = (size_t)(ptr - text);
1144
1145 memcpy(temp, text, wordlen);
1146 temp[wordlen] = '\0';
1147
1148 ptr --;
1149
1150 if (wordlen > 1 && !bsearch(temp, help_common_words,
1151 (sizeof(help_common_words) /
1152 sizeof(help_common_words[0])),
1153 sizeof(help_common_words[0]),
1154 (int (*)(const void *, const void *))
1155 _cups_strcasecmp))
1156 help_add_word(node, temp);
1157 }
1158 }
1159
1160 /*
1161 * Get the offset of the next line...
1162 */
1163
1164 offset = cupsFileTell(fp);
1165 }
1166
1167 cupsFileClose(fp);
1168
1169 if (node)
1170 node->length = (size_t)(offset - node->offset);
1171
1172 return (0);
1173 }
1174
1175
1176 /*
1177 * 'help_new_node()' - Create a new node and add it to an index.
1178 */
1179
1180 static help_node_t * /* O - Node pointer or NULL on error */
1181 help_new_node(const char *filename, /* I - Filename */
1182 const char *anchor, /* I - Anchor */
1183 const char *section, /* I - Section */
1184 const char *text, /* I - Text */
1185 time_t mtime, /* I - Modification time */
1186 off_t offset, /* I - Offset in file */
1187 size_t length) /* I - Length in bytes */
1188 {
1189 help_node_t *n; /* Node */
1190
1191
1192 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1193 if (!n)
1194 return (NULL);
1195
1196 n->filename = strdup(filename);
1197 n->anchor = anchor ? strdup(anchor) : NULL;
1198 n->section = (section && *section) ? strdup(section) : NULL;
1199 n->text = strdup(text);
1200 n->mtime = mtime;
1201 n->offset = offset;
1202 n->length = length;
1203
1204 return (n);
1205 }
1206
1207
1208 /*
1209 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1210 */
1211
1212 static int /* O - Difference */
1213 help_sort_by_name(
1214 help_node_t *n1, /* I - First node */
1215 help_node_t *n2, /* I - Second node */
1216 void *data) /* Unused */
1217 {
1218 int diff; /* Difference */
1219
1220
1221 (void)data;
1222
1223 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1224 return (diff);
1225
1226 if (!n1->anchor && !n2->anchor)
1227 return (0);
1228 else if (!n1->anchor)
1229 return (-1);
1230 else if (!n2->anchor)
1231 return (1);
1232 else
1233 return (strcmp(n1->anchor, n2->anchor));
1234 }
1235
1236
1237 /*
1238 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1239 */
1240
1241 static int /* O - Difference */
1242 help_sort_by_score(help_node_t *n1, /* I - First node */
1243 help_node_t *n2, /* I - Second node */
1244 void *data) /* I - Unused */
1245 {
1246 int diff; /* Difference */
1247
1248
1249 (void)data;
1250
1251 if (n1->score != n2->score)
1252 return (n2->score - n1->score);
1253
1254 if (n1->section && !n2->section)
1255 return (1);
1256 else if (!n1->section && n2->section)
1257 return (-1);
1258 else if (n1->section && n2->section &&
1259 (diff = strcmp(n1->section, n2->section)) != 0)
1260 return (diff);
1261
1262 return (_cups_strcasecmp(n1->text, n2->text));
1263 }
1264
1265
1266 /*
1267 * 'help_sort_words()' - Sort words alphabetically.
1268 */
1269
1270 static int /* O - Difference */
1271 help_sort_words(help_word_t *w1, /* I - Second word */
1272 help_word_t *w2, /* I - Second word */
1273 void *data) /* Unused */
1274 {
1275 (void)data;
1276 return (_cups_strcasecmp(w1->text, w2->text));
1277 }