]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
License change: Apache License, Version 2.0.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright 2007-2015 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
8 */
9
10 /*
11 * Include necessary headers...
12 */
13
14 #include "cgi-private.h"
15 #include <cups/dir.h>
16
17
18 /*
19 * List of common English words that should not be indexed...
20 */
21
22 static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
123 /*
124 * Local functions...
125 */
126
127 static help_word_t *help_add_word(help_node_t *n, const char *text);
128 static void help_delete_node(help_node_t *n);
129 static void help_delete_word(help_word_t *w);
130 static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133 static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137 static help_node_t *help_new_node(const char *filename, const char *anchor,
138 const char *section, const char *text,
139 time_t mtime, off_t offset,
140 size_t length)
141 __attribute__((nonnull(1,3,4)));
142 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
143 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
144 static int help_sort_words(help_word_t *w1, help_word_t *w2);
145
146
147 /*
148 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
149 */
150
151 void
152 helpDeleteIndex(help_index_t *hi) /* I - Help index */
153 {
154 help_node_t *node; /* Current node */
155
156
157 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
158
159 if (!hi)
160 return;
161
162 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
163 node;
164 node = (help_node_t *)cupsArrayNext(hi->nodes))
165 {
166 if (!hi->search)
167 help_delete_node(node);
168 }
169
170 cupsArrayDelete(hi->nodes);
171 cupsArrayDelete(hi->sorted);
172
173 free(hi);
174 }
175
176
177 /*
178 * 'helpFindNode()' - Find a node in an index.
179 */
180
181 help_node_t * /* O - Node pointer or NULL */
182 helpFindNode(help_index_t *hi, /* I - Index */
183 const char *filename, /* I - Filename */
184 const char *anchor) /* I - Anchor */
185 {
186 help_node_t key; /* Search key */
187
188
189 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
190 hi, filename, anchor));
191
192 /*
193 * Range check input...
194 */
195
196 if (!hi || !filename)
197 return (NULL);
198
199 /*
200 * Initialize the search key...
201 */
202
203 key.filename = (char *)filename;
204 key.anchor = (char *)anchor;
205
206 /*
207 * Return any match...
208 */
209
210 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
211 }
212
213
214 /*
215 * 'helpLoadIndex()' - Load a help index from disk.
216 */
217
218 help_index_t * /* O - Index pointer or NULL */
219 helpLoadIndex(const char *hifile, /* I - Index filename */
220 const char *directory) /* I - Directory that is indexed */
221 {
222 help_index_t *hi; /* Help index */
223 cups_file_t *fp; /* Current file */
224 char line[2048], /* Line from file */
225 *ptr, /* Pointer into line */
226 *filename, /* Filename in line */
227 *anchor, /* Anchor in line */
228 *sectptr, /* Section pointer in line */
229 section[1024], /* Section name */
230 *text; /* Text in line */
231 time_t mtime; /* Modification time */
232 off_t offset; /* Offset into file */
233 size_t length; /* Length in bytes */
234 int update; /* Update? */
235 help_node_t *node; /* Current node */
236 help_word_t *word; /* Current word */
237
238
239 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
240 hifile, directory));
241
242 /*
243 * Create a new, empty index.
244 */
245
246 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
247 return (NULL);
248
249 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
250 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
251
252 if (!hi->nodes || !hi->sorted)
253 {
254 cupsArrayDelete(hi->nodes);
255 cupsArrayDelete(hi->sorted);
256 free(hi);
257 return (NULL);
258 }
259
260 /*
261 * Try loading the existing index file...
262 */
263
264 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
265 {
266 /*
267 * Lock the file and then read the first line...
268 */
269
270 cupsFileLock(fp, 1);
271
272 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
273 {
274 /*
275 * Got a valid header line, now read the data lines...
276 */
277
278 node = NULL;
279
280 while (cupsFileGets(fp, line, sizeof(line)))
281 {
282 /*
283 * Each line looks like one of the following:
284 *
285 * filename mtime offset length "section" "text"
286 * filename#anchor offset length "text"
287 * SP count word
288 */
289
290 if (line[0] == ' ')
291 {
292 /*
293 * Read a word in the current node...
294 */
295
296 if (!node || (ptr = strrchr(line, ' ')) == NULL)
297 continue;
298
299 if ((word = help_add_word(node, ptr + 1)) != NULL)
300 word->count = atoi(line + 1);
301 }
302 else
303 {
304 /*
305 * Add a node...
306 */
307
308 filename = line;
309
310 if ((ptr = strchr(line, ' ')) == NULL)
311 break;
312
313 while (isspace(*ptr & 255))
314 *ptr++ = '\0';
315
316 if ((anchor = strrchr(filename, '#')) != NULL)
317 {
318 *anchor++ = '\0';
319 mtime = 0;
320 }
321 else
322 mtime = strtol(ptr, &ptr, 10);
323
324 offset = strtoll(ptr, &ptr, 10);
325 length = (size_t)strtoll(ptr, &ptr, 10);
326
327 while (isspace(*ptr & 255))
328 ptr ++;
329
330 if (!anchor)
331 {
332 /*
333 * Get section...
334 */
335
336 if (*ptr != '\"')
337 break;
338
339 ptr ++;
340 sectptr = ptr;
341
342 while (*ptr && *ptr != '\"')
343 ptr ++;
344
345 if (*ptr != '\"')
346 break;
347
348 *ptr++ = '\0';
349
350 strlcpy(section, sectptr, sizeof(section));
351
352 while (isspace(*ptr & 255))
353 ptr ++;
354 }
355
356 if (*ptr != '\"')
357 break;
358
359 ptr ++;
360 text = ptr;
361
362 while (*ptr && *ptr != '\"')
363 ptr ++;
364
365 if (*ptr != '\"')
366 break;
367
368 *ptr++ = '\0';
369
370 if ((node = help_new_node(filename, anchor, section, text,
371 mtime, offset, length)) == NULL)
372 break;
373
374 node->score = -1;
375
376 cupsArrayAdd(hi->nodes, node);
377 }
378 }
379 }
380
381 cupsFileClose(fp);
382 }
383
384 /*
385 * Scan for new/updated files...
386 */
387
388 update = help_load_directory(hi, directory, NULL);
389
390 /*
391 * Remove any files that are no longer installed...
392 */
393
394 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
395 node;
396 node = (help_node_t *)cupsArrayNext(hi->nodes))
397 if (node->score < 0)
398 {
399 /*
400 * Delete this node...
401 */
402
403 cupsArrayRemove(hi->nodes, node);
404 help_delete_node(node);
405 }
406
407 /*
408 * Add nodes to the sorted array...
409 */
410
411 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
412 node;
413 node = (help_node_t *)cupsArrayNext(hi->nodes))
414 cupsArrayAdd(hi->sorted, node);
415
416 /*
417 * Save the index if we updated it...
418 */
419
420 if (update)
421 helpSaveIndex(hi, hifile);
422
423 /*
424 * Return the index...
425 */
426
427 return (hi);
428 }
429
430
431 /*
432 * 'helpSaveIndex()' - Save a help index to disk.
433 */
434
435 int /* O - 0 on success, -1 on error */
436 helpSaveIndex(help_index_t *hi, /* I - Index */
437 const char *hifile) /* I - Index filename */
438 {
439 cups_file_t *fp; /* Index file */
440 help_node_t *node; /* Current node */
441 help_word_t *word; /* Current word */
442
443
444 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
445
446 /*
447 * Try creating a new index file...
448 */
449
450 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
451 return (-1);
452
453 /*
454 * Lock the file while we write it...
455 */
456
457 cupsFileLock(fp, 1);
458
459 cupsFilePuts(fp, "HELPV2\n");
460
461 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
462 node;
463 node = (help_node_t *)cupsArrayNext(hi->nodes))
464 {
465 /*
466 * Write the current node with/without the anchor...
467 */
468
469 if (node->anchor)
470 {
471 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
472 node->filename, node->anchor,
473 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
474 node->text) < 0)
475 break;
476 }
477 else
478 {
479 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
480 node->filename, (int)node->mtime,
481 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
482 node->section ? node->section : "", node->text) < 0)
483 break;
484 }
485
486 /*
487 * Then write the words associated with the node...
488 */
489
490 for (word = (help_word_t *)cupsArrayFirst(node->words);
491 word;
492 word = (help_word_t *)cupsArrayNext(node->words))
493 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
494 break;
495 }
496
497 cupsFileFlush(fp);
498
499 if (cupsFileClose(fp) < 0)
500 return (-1);
501 else if (node)
502 return (-1);
503 else
504 return (0);
505 }
506
507
508 /*
509 * 'helpSearchIndex()' - Search an index.
510 */
511
512 help_index_t * /* O - Search index */
513 helpSearchIndex(help_index_t *hi, /* I - Index */
514 const char *query, /* I - Query string */
515 const char *section, /* I - Limit search to this section */
516 const char *filename) /* I - Limit search to this file */
517 {
518 help_index_t *search; /* Search index */
519 help_node_t *node; /* Current node */
520 help_word_t *word; /* Current word */
521 void *sc; /* Search context */
522 int matches; /* Number of matches */
523
524
525 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
526 hi, query, filename));
527
528 /*
529 * Range check...
530 */
531
532 if (!hi || !query)
533 return (NULL);
534
535 /*
536 * Reset the scores of all nodes to 0...
537 */
538
539 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
540 node;
541 node = (help_node_t *)cupsArrayNext(hi->nodes))
542 node->score = 0;
543
544 /*
545 * Find the first node to search in...
546 */
547
548 if (filename)
549 {
550 node = helpFindNode(hi, filename, NULL);
551 if (!node)
552 return (NULL);
553 }
554 else
555 node = (help_node_t *)cupsArrayFirst(hi->nodes);
556
557 /*
558 * Convert the query into a regular expression...
559 */
560
561 sc = cgiCompileSearch(query);
562 if (!sc)
563 return (NULL);
564
565 /*
566 * Allocate a search index...
567 */
568
569 search = calloc(1, sizeof(help_index_t));
570 if (!search)
571 {
572 cgiFreeSearch(sc);
573 return (NULL);
574 }
575
576 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
577 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
578
579 if (!search->nodes || !search->sorted)
580 {
581 cupsArrayDelete(search->nodes);
582 cupsArrayDelete(search->sorted);
583 free(search);
584 cgiFreeSearch(sc);
585 return (NULL);
586 }
587
588 search->search = 1;
589
590 /*
591 * Check each node in the index, adding matching nodes to the
592 * search index...
593 */
594
595 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
596 if (section && strcmp(node->section, section))
597 continue;
598 else if (filename && strcmp(node->filename, filename))
599 continue;
600 else
601 {
602 matches = cgiDoSearch(sc, node->text);
603
604 for (word = (help_word_t *)cupsArrayFirst(node->words);
605 word;
606 word = (help_word_t *)cupsArrayNext(node->words))
607 if (cgiDoSearch(sc, word->text) > 0)
608 matches += word->count;
609
610 if (matches > 0)
611 {
612 /*
613 * Found a match, add the node to the search index...
614 */
615
616 node->score = matches;
617
618 cupsArrayAdd(search->nodes, node);
619 cupsArrayAdd(search->sorted, node);
620 }
621 }
622
623 /*
624 * Free the search context...
625 */
626
627 cgiFreeSearch(sc);
628
629 /*
630 * Return the results...
631 */
632
633 return (search);
634 }
635
636
637 /*
638 * 'help_add_word()' - Add a word to a node.
639 */
640
641 static help_word_t * /* O - New word */
642 help_add_word(help_node_t *n, /* I - Node */
643 const char *text) /* I - Word text */
644 {
645 help_word_t *w, /* New word */
646 key; /* Search key */
647
648
649 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
650
651 /*
652 * Create the words array as needed...
653 */
654
655 if (!n->words)
656 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
657
658 /*
659 * See if the word is already added...
660 */
661
662 key.text = (char *)text;
663
664 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
665 {
666 /*
667 * Create a new word...
668 */
669
670 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
671 return (NULL);
672
673 if ((w->text = strdup(text)) == NULL)
674 {
675 free(w);
676 return (NULL);
677 }
678
679 cupsArrayAdd(n->words, w);
680 }
681
682 /*
683 * Bump the counter for this word and return it...
684 */
685
686 w->count ++;
687
688 return (w);
689 }
690
691
692 /*
693 * 'help_delete_node()' - Free all memory used by a node.
694 */
695
696 static void
697 help_delete_node(help_node_t *n) /* I - Node */
698 {
699 help_word_t *w; /* Current word */
700
701
702 DEBUG_printf(("2help_delete_node(n=%p)", n));
703
704 if (!n)
705 return;
706
707 if (n->filename)
708 free(n->filename);
709
710 if (n->anchor)
711 free(n->anchor);
712
713 if (n->section)
714 free(n->section);
715
716 if (n->text)
717 free(n->text);
718
719 for (w = (help_word_t *)cupsArrayFirst(n->words);
720 w;
721 w = (help_word_t *)cupsArrayNext(n->words))
722 help_delete_word(w);
723
724 cupsArrayDelete(n->words);
725
726 free(n);
727 }
728
729
730 /*
731 * 'help_delete_word()' - Free all memory used by a word.
732 */
733
734 static void
735 help_delete_word(help_word_t *w) /* I - Word */
736 {
737 DEBUG_printf(("2help_delete_word(w=%p)", w));
738
739 if (!w)
740 return;
741
742 if (w->text)
743 free(w->text);
744
745 free(w);
746 }
747
748
749 /*
750 * 'help_load_directory()' - Load a directory of files into an index.
751 */
752
753 static int /* O - 0 = success, -1 = error, 1 = updated */
754 help_load_directory(
755 help_index_t *hi, /* I - Index */
756 const char *directory, /* I - Directory */
757 const char *relative) /* I - Relative path */
758 {
759 cups_dir_t *dir; /* Directory file */
760 cups_dentry_t *dent; /* Directory entry */
761 char *ext, /* Pointer to extension */
762 filename[1024], /* Full filename */
763 relname[1024]; /* Relative filename */
764 int update; /* Updated? */
765 help_node_t *node; /* Current node */
766
767
768 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
769 hi, directory, relative));
770
771 /*
772 * Open the directory and scan it...
773 */
774
775 if ((dir = cupsDirOpen(directory)) == NULL)
776 return (0);
777
778 update = 0;
779
780 while ((dent = cupsDirRead(dir)) != NULL)
781 {
782 /*
783 * Skip "." files...
784 */
785
786 if (dent->filename[0] == '.')
787 continue;
788
789 /*
790 * Get absolute and relative filenames...
791 */
792
793 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
794 if (relative)
795 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
796 else
797 strlcpy(relname, dent->filename, sizeof(relname));
798
799 /*
800 * Check if we have a HTML file...
801 */
802
803 if ((ext = strstr(dent->filename, ".html")) != NULL &&
804 (!ext[5] || !strcmp(ext + 5, ".gz")))
805 {
806 /*
807 * HTML file, see if we have already indexed the file...
808 */
809
810 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
811 {
812 /*
813 * File already indexed - check dates to confirm that the
814 * index is up-to-date...
815 */
816
817 if (node->mtime == dent->fileinfo.st_mtime)
818 {
819 /*
820 * Same modification time, so mark all of the nodes
821 * for this file as up-to-date...
822 */
823
824 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
825 if (!strcmp(node->filename, relname))
826 node->score = 0;
827 else
828 break;
829
830 continue;
831 }
832 }
833
834 update = 1;
835
836 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
837 }
838 else if (S_ISDIR(dent->fileinfo.st_mode))
839 {
840 /*
841 * Process sub-directory...
842 */
843
844 if (help_load_directory(hi, filename, relname) == 1)
845 update = 1;
846 }
847 }
848
849 cupsDirClose(dir);
850
851 return (update);
852 }
853
854
855 /*
856 * 'help_load_file()' - Load a HTML files into an index.
857 */
858
859 static int /* O - 0 = success, -1 = error */
860 help_load_file(
861 help_index_t *hi, /* I - Index */
862 const char *filename, /* I - Filename */
863 const char *relative, /* I - Relative path */
864 time_t mtime) /* I - Modification time */
865 {
866 cups_file_t *fp; /* HTML file */
867 help_node_t *node; /* Current node */
868 char line[1024], /* Line from file */
869 temp[1024], /* Temporary word */
870 section[1024], /* Section */
871 *ptr, /* Pointer into line */
872 *anchor, /* Anchor name */
873 *text; /* Text for anchor */
874 off_t offset; /* File offset */
875 char quote; /* Quote character */
876 help_word_t *word; /* Current word */
877 int wordlen; /* Length of word */
878
879
880 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
881 "mtime=%ld)", hi, filename, relative, (long)mtime));
882
883 if ((fp = cupsFileOpen(filename, "r")) == NULL)
884 return (-1);
885
886 node = NULL;
887 offset = 0;
888
889 strlcpy(section, "Other", sizeof(section));
890
891 while (cupsFileGets(fp, line, sizeof(line)))
892 {
893 /*
894 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
895 */
896
897 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
898 {
899 /*
900 * Got section line, copy it!
901 */
902
903 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
904
905 strlcpy(section, ptr, sizeof(section));
906 if ((ptr = strstr(section, "-->")) != NULL)
907 {
908 /*
909 * Strip comment stuff from end of line...
910 */
911
912 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
913
914 if (isspace(*ptr & 255))
915 *ptr = '\0';
916 }
917 continue;
918 }
919
920 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
921 {
922 ptr ++;
923
924 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
925 {
926 /*
927 * Found the title...
928 */
929
930 anchor = NULL;
931 ptr += 6;
932 }
933 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
934 {
935 /*
936 * Found an anchor...
937 */
938
939 ptr += 7;
940
941 if (*ptr == '\"' || *ptr == '\'')
942 {
943 /*
944 * Get quoted anchor...
945 */
946
947 quote = *ptr;
948 anchor = ptr + 1;
949 if ((ptr = strchr(anchor, quote)) != NULL)
950 *ptr++ = '\0';
951 else
952 break;
953 }
954 else
955 {
956 /*
957 * Get unquoted anchor...
958 */
959
960 anchor = ptr + 1;
961
962 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
963
964 if (*ptr)
965 *ptr++ = '\0';
966 else
967 break;
968 }
969
970 /*
971 * Got the anchor, now lets find the end...
972 */
973
974 while (*ptr && *ptr != '>')
975 ptr ++;
976
977 if (*ptr != '>')
978 break;
979
980 ptr ++;
981 }
982 else
983 continue;
984
985 /*
986 * Now collect text for the link...
987 */
988
989 text = ptr;
990 while ((ptr = strchr(text, '<')) == NULL)
991 {
992 ptr = text + strlen(text);
993 if (ptr >= (line + sizeof(line) - 2))
994 break;
995
996 *ptr++ = ' ';
997
998 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
999 break;
1000 }
1001
1002 *ptr = '\0';
1003
1004 if (node)
1005 node->length = (size_t)(offset - node->offset);
1006
1007 if (!*text)
1008 {
1009 node = NULL;
1010 break;
1011 }
1012
1013 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1014 {
1015 /*
1016 * Node already in the index, so replace the text and other
1017 * data...
1018 */
1019
1020 cupsArrayRemove(hi->nodes, node);
1021
1022 if (node->section)
1023 free(node->section);
1024
1025 if (node->text)
1026 free(node->text);
1027
1028 if (node->words)
1029 {
1030 for (word = (help_word_t *)cupsArrayFirst(node->words);
1031 word;
1032 word = (help_word_t *)cupsArrayNext(node->words))
1033 help_delete_word(word);
1034
1035 cupsArrayDelete(node->words);
1036 node->words = NULL;
1037 }
1038
1039 node->section = section[0] ? strdup(section) : NULL;
1040 node->text = strdup(text);
1041 node->mtime = mtime;
1042 node->offset = offset;
1043 node->score = 0;
1044 }
1045 else
1046 {
1047 /*
1048 * New node...
1049 */
1050
1051 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1052 }
1053
1054 /*
1055 * Go through the text value and replace tabs and newlines with
1056 * whitespace and eliminate extra whitespace...
1057 */
1058
1059 for (ptr = node->text, text = node->text; *ptr;)
1060 if (isspace(*ptr & 255))
1061 {
1062 while (isspace(*ptr & 255))
1063 ptr ++;
1064
1065 *text++ = ' ';
1066 }
1067 else if (text != ptr)
1068 *text++ = *ptr++;
1069 else
1070 {
1071 text ++;
1072 ptr ++;
1073 }
1074
1075 *text = '\0';
1076
1077 /*
1078 * (Re)add the node to the array...
1079 */
1080
1081 cupsArrayAdd(hi->nodes, node);
1082
1083 if (!anchor)
1084 node = NULL;
1085 break;
1086 }
1087
1088 if (node)
1089 {
1090 /*
1091 * Scan this line for words...
1092 */
1093
1094 for (ptr = line; *ptr; ptr ++)
1095 {
1096 /*
1097 * Skip HTML stuff...
1098 */
1099
1100 if (*ptr == '<')
1101 {
1102 if (!strncmp(ptr, "<!--", 4))
1103 {
1104 /*
1105 * Skip HTML comment...
1106 */
1107
1108 if ((text = strstr(ptr + 4, "-->")) == NULL)
1109 ptr += strlen(ptr) - 1;
1110 else
1111 ptr = text + 2;
1112 }
1113 else
1114 {
1115 /*
1116 * Skip HTML element...
1117 */
1118
1119 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1120 {
1121 if (*ptr == '\"' || *ptr == '\'')
1122 {
1123 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1124
1125 if (!*ptr)
1126 ptr --;
1127 }
1128 }
1129
1130 if (!*ptr)
1131 ptr --;
1132 }
1133
1134 continue;
1135 }
1136 else if (*ptr == '&')
1137 {
1138 /*
1139 * Skip HTML entity...
1140 */
1141
1142 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1143
1144 if (!*ptr)
1145 ptr --;
1146
1147 continue;
1148 }
1149 else if (!isalnum(*ptr & 255))
1150 continue;
1151
1152 /*
1153 * Found the start of a word, search until we find the end...
1154 */
1155
1156 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1157
1158 wordlen = (int)(ptr - text);
1159
1160 memcpy(temp, text, (size_t)wordlen);
1161 temp[wordlen] = '\0';
1162
1163 ptr --;
1164
1165 if (wordlen > 1 && !bsearch(temp, help_common_words,
1166 (sizeof(help_common_words) /
1167 sizeof(help_common_words[0])),
1168 sizeof(help_common_words[0]),
1169 (int (*)(const void *, const void *))
1170 _cups_strcasecmp))
1171 help_add_word(node, temp);
1172 }
1173 }
1174
1175 /*
1176 * Get the offset of the next line...
1177 */
1178
1179 offset = cupsFileTell(fp);
1180 }
1181
1182 cupsFileClose(fp);
1183
1184 if (node)
1185 node->length = (size_t)(offset - node->offset);
1186
1187 return (0);
1188 }
1189
1190
1191 /*
1192 * 'help_new_node()' - Create a new node and add it to an index.
1193 */
1194
1195 static help_node_t * /* O - Node pointer or NULL on error */
1196 help_new_node(const char *filename, /* I - Filename */
1197 const char *anchor, /* I - Anchor */
1198 const char *section, /* I - Section */
1199 const char *text, /* I - Text */
1200 time_t mtime, /* I - Modification time */
1201 off_t offset, /* I - Offset in file */
1202 size_t length) /* I - Length in bytes */
1203 {
1204 help_node_t *n; /* Node */
1205
1206
1207 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1208 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1209 (long)mtime, (long)offset, (long)length));
1210
1211 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1212 if (!n)
1213 return (NULL);
1214
1215 n->filename = strdup(filename);
1216 n->anchor = anchor ? strdup(anchor) : NULL;
1217 n->section = *section ? strdup(section) : NULL;
1218 n->text = strdup(text);
1219 n->mtime = mtime;
1220 n->offset = offset;
1221 n->length = length;
1222
1223 return (n);
1224 }
1225
1226
1227 /*
1228 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1229 */
1230
1231 static int /* O - Difference */
1232 help_sort_by_name(help_node_t *n1, /* I - First node */
1233 help_node_t *n2) /* I - Second node */
1234 {
1235 int diff; /* Difference */
1236
1237
1238 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1239 n1, n1->filename, n1->anchor,
1240 n2, n2->filename, n2->anchor));
1241
1242 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1243 return (diff);
1244
1245 if (!n1->anchor && !n2->anchor)
1246 return (0);
1247 else if (!n1->anchor)
1248 return (-1);
1249 else if (!n2->anchor)
1250 return (1);
1251 else
1252 return (strcmp(n1->anchor, n2->anchor));
1253 }
1254
1255
1256 /*
1257 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1258 */
1259
1260 static int /* O - Difference */
1261 help_sort_by_score(help_node_t *n1, /* I - First node */
1262 help_node_t *n2) /* I - Second node */
1263 {
1264 int diff; /* Difference */
1265
1266
1267 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1268 "n2=%p(%d \"%s\" \"%s\")",
1269 n1, n1->score, n1->section, n1->text,
1270 n2, n2->score, n2->section, n2->text));
1271
1272 if (n1->score != n2->score)
1273 return (n2->score - n1->score);
1274
1275 if (n1->section && !n2->section)
1276 return (1);
1277 else if (!n1->section && n2->section)
1278 return (-1);
1279 else if (n1->section && n2->section &&
1280 (diff = strcmp(n1->section, n2->section)) != 0)
1281 return (diff);
1282
1283 return (_cups_strcasecmp(n1->text, n2->text));
1284 }
1285
1286
1287 /*
1288 * 'help_sort_words()' - Sort words alphabetically.
1289 */
1290
1291 static int /* O - Difference */
1292 help_sort_words(help_word_t *w1, /* I - Second word */
1293 help_word_t *w2) /* I - Second word */
1294 {
1295 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1296 w1, w1->text, w2, w2->text));
1297
1298 return (_cups_strcasecmp(w1->text, w2->text));
1299 }