]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
5f76d7aa55ab9993a89027d39dd77f261e5a6abc
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright 2007-2017 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
8 */
9
10 /*
11 * Include necessary headers...
12 */
13
14 #include "cgi-private.h"
15 #include <cups/dir.h>
16
17
18 /*
19 * List of common English words that should not be indexed...
20 */
21
22 static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
123 /*
124 * Local functions...
125 */
126
127 static help_word_t *help_add_word(help_node_t *n, const char *text);
128 static void help_delete_node(help_node_t *n);
129 static void help_delete_word(help_word_t *w);
130 static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133 static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137 static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
138 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
139 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
140 static int help_sort_words(help_word_t *w1, help_word_t *w2);
141
142
143 /*
144 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
145 */
146
147 void
148 helpDeleteIndex(help_index_t *hi) /* I - Help index */
149 {
150 help_node_t *node; /* Current node */
151
152
153 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
154
155 if (!hi)
156 return;
157
158 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159 node;
160 node = (help_node_t *)cupsArrayNext(hi->nodes))
161 {
162 if (!hi->search)
163 help_delete_node(node);
164 }
165
166 cupsArrayDelete(hi->nodes);
167 cupsArrayDelete(hi->sorted);
168
169 free(hi);
170 }
171
172
173 /*
174 * 'helpFindNode()' - Find a node in an index.
175 */
176
177 help_node_t * /* O - Node pointer or NULL */
178 helpFindNode(help_index_t *hi, /* I - Index */
179 const char *filename, /* I - Filename */
180 const char *anchor) /* I - Anchor */
181 {
182 help_node_t key; /* Search key */
183
184
185 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
186 hi, filename, anchor));
187
188 /*
189 * Range check input...
190 */
191
192 if (!hi || !filename)
193 return (NULL);
194
195 /*
196 * Initialize the search key...
197 */
198
199 key.filename = (char *)filename;
200 key.anchor = (char *)anchor;
201
202 /*
203 * Return any match...
204 */
205
206 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
207 }
208
209
210 /*
211 * 'helpLoadIndex()' - Load a help index from disk.
212 */
213
214 help_index_t * /* O - Index pointer or NULL */
215 helpLoadIndex(const char *hifile, /* I - Index filename */
216 const char *directory) /* I - Directory that is indexed */
217 {
218 help_index_t *hi; /* Help index */
219 cups_file_t *fp; /* Current file */
220 char line[2048], /* Line from file */
221 *ptr, /* Pointer into line */
222 *filename, /* Filename in line */
223 *anchor, /* Anchor in line */
224 *sectptr, /* Section pointer in line */
225 section[1024], /* Section name */
226 *text; /* Text in line */
227 time_t mtime; /* Modification time */
228 off_t offset; /* Offset into file */
229 size_t length; /* Length in bytes */
230 int update; /* Update? */
231 help_node_t *node; /* Current node */
232 help_word_t *word; /* Current word */
233
234
235 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
236 hifile, directory));
237
238 /*
239 * Create a new, empty index.
240 */
241
242 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
243 return (NULL);
244
245 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
246 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
247
248 if (!hi->nodes || !hi->sorted)
249 {
250 cupsArrayDelete(hi->nodes);
251 cupsArrayDelete(hi->sorted);
252 free(hi);
253 return (NULL);
254 }
255
256 /*
257 * Try loading the existing index file...
258 */
259
260 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
261 {
262 /*
263 * Lock the file and then read the first line...
264 */
265
266 cupsFileLock(fp, 1);
267
268 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
269 {
270 /*
271 * Got a valid header line, now read the data lines...
272 */
273
274 node = NULL;
275
276 while (cupsFileGets(fp, line, sizeof(line)))
277 {
278 /*
279 * Each line looks like one of the following:
280 *
281 * filename mtime offset length "section" "text"
282 * filename#anchor offset length "text"
283 * SP count word
284 */
285
286 if (line[0] == ' ')
287 {
288 /*
289 * Read a word in the current node...
290 */
291
292 if (!node || (ptr = strrchr(line, ' ')) == NULL)
293 continue;
294
295 if ((word = help_add_word(node, ptr + 1)) != NULL)
296 word->count = atoi(line + 1);
297 }
298 else
299 {
300 /*
301 * Add a node...
302 */
303
304 filename = line;
305
306 if ((ptr = strchr(line, ' ')) == NULL)
307 break;
308
309 while (isspace(*ptr & 255))
310 *ptr++ = '\0';
311
312 if ((anchor = strrchr(filename, '#')) != NULL)
313 {
314 *anchor++ = '\0';
315 mtime = 0;
316 }
317 else
318 mtime = strtol(ptr, &ptr, 10);
319
320 offset = strtoll(ptr, &ptr, 10);
321 length = (size_t)strtoll(ptr, &ptr, 10);
322
323 while (isspace(*ptr & 255))
324 ptr ++;
325
326 if (!anchor)
327 {
328 /*
329 * Get section...
330 */
331
332 if (*ptr != '\"')
333 break;
334
335 ptr ++;
336 sectptr = ptr;
337
338 while (*ptr && *ptr != '\"')
339 ptr ++;
340
341 if (*ptr != '\"')
342 break;
343
344 *ptr++ = '\0';
345
346 strlcpy(section, sectptr, sizeof(section));
347
348 while (isspace(*ptr & 255))
349 ptr ++;
350 }
351
352 if (*ptr != '\"')
353 break;
354
355 ptr ++;
356 text = ptr;
357
358 while (*ptr && *ptr != '\"')
359 ptr ++;
360
361 if (*ptr != '\"')
362 break;
363
364 *ptr++ = '\0';
365
366 if ((node = help_new_node(filename, anchor, section, text,
367 mtime, offset, length)) == NULL)
368 break;
369
370 node->score = -1;
371
372 cupsArrayAdd(hi->nodes, node);
373 }
374 }
375 }
376
377 cupsFileClose(fp);
378 }
379
380 /*
381 * Scan for new/updated files...
382 */
383
384 update = help_load_directory(hi, directory, NULL);
385
386 /*
387 * Remove any files that are no longer installed...
388 */
389
390 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
391 node;
392 node = (help_node_t *)cupsArrayNext(hi->nodes))
393 if (node->score < 0)
394 {
395 /*
396 * Delete this node...
397 */
398
399 cupsArrayRemove(hi->nodes, node);
400 help_delete_node(node);
401 }
402
403 /*
404 * Add nodes to the sorted array...
405 */
406
407 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
408 node;
409 node = (help_node_t *)cupsArrayNext(hi->nodes))
410 cupsArrayAdd(hi->sorted, node);
411
412 /*
413 * Save the index if we updated it...
414 */
415
416 if (update)
417 helpSaveIndex(hi, hifile);
418
419 /*
420 * Return the index...
421 */
422
423 return (hi);
424 }
425
426
427 /*
428 * 'helpSaveIndex()' - Save a help index to disk.
429 */
430
431 int /* O - 0 on success, -1 on error */
432 helpSaveIndex(help_index_t *hi, /* I - Index */
433 const char *hifile) /* I - Index filename */
434 {
435 cups_file_t *fp; /* Index file */
436 help_node_t *node; /* Current node */
437 help_word_t *word; /* Current word */
438
439
440 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
441
442 /*
443 * Try creating a new index file...
444 */
445
446 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
447 return (-1);
448
449 /*
450 * Lock the file while we write it...
451 */
452
453 cupsFileLock(fp, 1);
454
455 cupsFilePuts(fp, "HELPV2\n");
456
457 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
458 node;
459 node = (help_node_t *)cupsArrayNext(hi->nodes))
460 {
461 /*
462 * Write the current node with/without the anchor...
463 */
464
465 if (node->anchor)
466 {
467 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
468 node->filename, node->anchor,
469 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
470 node->text) < 0)
471 break;
472 }
473 else
474 {
475 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
476 node->filename, (int)node->mtime,
477 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
478 node->section ? node->section : "", node->text) < 0)
479 break;
480 }
481
482 /*
483 * Then write the words associated with the node...
484 */
485
486 for (word = (help_word_t *)cupsArrayFirst(node->words);
487 word;
488 word = (help_word_t *)cupsArrayNext(node->words))
489 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
490 break;
491 }
492
493 cupsFileFlush(fp);
494
495 if (cupsFileClose(fp) < 0)
496 return (-1);
497 else if (node)
498 return (-1);
499 else
500 return (0);
501 }
502
503
504 /*
505 * 'helpSearchIndex()' - Search an index.
506 */
507
508 help_index_t * /* O - Search index */
509 helpSearchIndex(help_index_t *hi, /* I - Index */
510 const char *query, /* I - Query string */
511 const char *section, /* I - Limit search to this section */
512 const char *filename) /* I - Limit search to this file */
513 {
514 help_index_t *search; /* Search index */
515 help_node_t *node; /* Current node */
516 help_word_t *word; /* Current word */
517 void *sc; /* Search context */
518 int matches; /* Number of matches */
519
520
521 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
522 hi, query, filename));
523
524 /*
525 * Range check...
526 */
527
528 if (!hi || !query)
529 return (NULL);
530
531 /*
532 * Reset the scores of all nodes to 0...
533 */
534
535 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
536 node;
537 node = (help_node_t *)cupsArrayNext(hi->nodes))
538 node->score = 0;
539
540 /*
541 * Find the first node to search in...
542 */
543
544 if (filename)
545 {
546 node = helpFindNode(hi, filename, NULL);
547 if (!node)
548 return (NULL);
549 }
550 else
551 node = (help_node_t *)cupsArrayFirst(hi->nodes);
552
553 /*
554 * Convert the query into a regular expression...
555 */
556
557 sc = cgiCompileSearch(query);
558 if (!sc)
559 return (NULL);
560
561 /*
562 * Allocate a search index...
563 */
564
565 search = calloc(1, sizeof(help_index_t));
566 if (!search)
567 {
568 cgiFreeSearch(sc);
569 return (NULL);
570 }
571
572 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
573 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
574
575 if (!search->nodes || !search->sorted)
576 {
577 cupsArrayDelete(search->nodes);
578 cupsArrayDelete(search->sorted);
579 free(search);
580 cgiFreeSearch(sc);
581 return (NULL);
582 }
583
584 search->search = 1;
585
586 /*
587 * Check each node in the index, adding matching nodes to the
588 * search index...
589 */
590
591 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
592 if (section && strcmp(node->section, section))
593 continue;
594 else if (filename && strcmp(node->filename, filename))
595 continue;
596 else
597 {
598 matches = cgiDoSearch(sc, node->text);
599
600 for (word = (help_word_t *)cupsArrayFirst(node->words);
601 word;
602 word = (help_word_t *)cupsArrayNext(node->words))
603 if (cgiDoSearch(sc, word->text) > 0)
604 matches += word->count;
605
606 if (matches > 0)
607 {
608 /*
609 * Found a match, add the node to the search index...
610 */
611
612 node->score = matches;
613
614 cupsArrayAdd(search->nodes, node);
615 cupsArrayAdd(search->sorted, node);
616 }
617 }
618
619 /*
620 * Free the search context...
621 */
622
623 cgiFreeSearch(sc);
624
625 /*
626 * Return the results...
627 */
628
629 return (search);
630 }
631
632
633 /*
634 * 'help_add_word()' - Add a word to a node.
635 */
636
637 static help_word_t * /* O - New word */
638 help_add_word(help_node_t *n, /* I - Node */
639 const char *text) /* I - Word text */
640 {
641 help_word_t *w, /* New word */
642 key; /* Search key */
643
644
645 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
646
647 /*
648 * Create the words array as needed...
649 */
650
651 if (!n->words)
652 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
653
654 /*
655 * See if the word is already added...
656 */
657
658 key.text = (char *)text;
659
660 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
661 {
662 /*
663 * Create a new word...
664 */
665
666 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
667 return (NULL);
668
669 if ((w->text = strdup(text)) == NULL)
670 {
671 free(w);
672 return (NULL);
673 }
674
675 cupsArrayAdd(n->words, w);
676 }
677
678 /*
679 * Bump the counter for this word and return it...
680 */
681
682 w->count ++;
683
684 return (w);
685 }
686
687
688 /*
689 * 'help_delete_node()' - Free all memory used by a node.
690 */
691
692 static void
693 help_delete_node(help_node_t *n) /* I - Node */
694 {
695 help_word_t *w; /* Current word */
696
697
698 DEBUG_printf(("2help_delete_node(n=%p)", n));
699
700 if (!n)
701 return;
702
703 if (n->filename)
704 free(n->filename);
705
706 if (n->anchor)
707 free(n->anchor);
708
709 if (n->section)
710 free(n->section);
711
712 if (n->text)
713 free(n->text);
714
715 for (w = (help_word_t *)cupsArrayFirst(n->words);
716 w;
717 w = (help_word_t *)cupsArrayNext(n->words))
718 help_delete_word(w);
719
720 cupsArrayDelete(n->words);
721
722 free(n);
723 }
724
725
726 /*
727 * 'help_delete_word()' - Free all memory used by a word.
728 */
729
730 static void
731 help_delete_word(help_word_t *w) /* I - Word */
732 {
733 DEBUG_printf(("2help_delete_word(w=%p)", w));
734
735 if (!w)
736 return;
737
738 if (w->text)
739 free(w->text);
740
741 free(w);
742 }
743
744
745 /*
746 * 'help_load_directory()' - Load a directory of files into an index.
747 */
748
749 static int /* O - 0 = success, -1 = error, 1 = updated */
750 help_load_directory(
751 help_index_t *hi, /* I - Index */
752 const char *directory, /* I - Directory */
753 const char *relative) /* I - Relative path */
754 {
755 cups_dir_t *dir; /* Directory file */
756 cups_dentry_t *dent; /* Directory entry */
757 char *ext, /* Pointer to extension */
758 filename[1024], /* Full filename */
759 relname[1024]; /* Relative filename */
760 int update; /* Updated? */
761 help_node_t *node; /* Current node */
762
763
764 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
765 hi, directory, relative));
766
767 /*
768 * Open the directory and scan it...
769 */
770
771 if ((dir = cupsDirOpen(directory)) == NULL)
772 return (0);
773
774 update = 0;
775
776 while ((dent = cupsDirRead(dir)) != NULL)
777 {
778 /*
779 * Skip "." files...
780 */
781
782 if (dent->filename[0] == '.')
783 continue;
784
785 /*
786 * Get absolute and relative filenames...
787 */
788
789 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
790 if (relative)
791 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
792 else
793 strlcpy(relname, dent->filename, sizeof(relname));
794
795 /*
796 * Check if we have a HTML file...
797 */
798
799 if ((ext = strstr(dent->filename, ".html")) != NULL &&
800 (!ext[5] || !strcmp(ext + 5, ".gz")))
801 {
802 /*
803 * HTML file, see if we have already indexed the file...
804 */
805
806 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
807 {
808 /*
809 * File already indexed - check dates to confirm that the
810 * index is up-to-date...
811 */
812
813 if (node->mtime == dent->fileinfo.st_mtime)
814 {
815 /*
816 * Same modification time, so mark all of the nodes
817 * for this file as up-to-date...
818 */
819
820 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
821 if (!strcmp(node->filename, relname))
822 node->score = 0;
823 else
824 break;
825
826 continue;
827 }
828 }
829
830 update = 1;
831
832 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
833 }
834 else if (S_ISDIR(dent->fileinfo.st_mode))
835 {
836 /*
837 * Process sub-directory...
838 */
839
840 if (help_load_directory(hi, filename, relname) == 1)
841 update = 1;
842 }
843 }
844
845 cupsDirClose(dir);
846
847 return (update);
848 }
849
850
851 /*
852 * 'help_load_file()' - Load a HTML files into an index.
853 */
854
855 static int /* O - 0 = success, -1 = error */
856 help_load_file(
857 help_index_t *hi, /* I - Index */
858 const char *filename, /* I - Filename */
859 const char *relative, /* I - Relative path */
860 time_t mtime) /* I - Modification time */
861 {
862 cups_file_t *fp; /* HTML file */
863 help_node_t *node; /* Current node */
864 char line[1024], /* Line from file */
865 temp[1024], /* Temporary word */
866 section[1024], /* Section */
867 *ptr, /* Pointer into line */
868 *anchor, /* Anchor name */
869 *text; /* Text for anchor */
870 off_t offset; /* File offset */
871 char quote; /* Quote character */
872 help_word_t *word; /* Current word */
873 int wordlen; /* Length of word */
874
875
876 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
877 "mtime=%ld)", hi, filename, relative, (long)mtime));
878
879 if ((fp = cupsFileOpen(filename, "r")) == NULL)
880 return (-1);
881
882 node = NULL;
883 offset = 0;
884
885 strlcpy(section, "Other", sizeof(section));
886
887 while (cupsFileGets(fp, line, sizeof(line)))
888 {
889 /*
890 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
891 */
892
893 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
894 {
895 /*
896 * Got section line, copy it!
897 */
898
899 for (ptr += 13; isspace(*ptr & 255); ptr ++);
900
901 strlcpy(section, ptr, sizeof(section));
902 if ((ptr = strstr(section, "-->")) != NULL)
903 {
904 /*
905 * Strip comment stuff from end of line...
906 */
907
908 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
909
910 if (isspace(*ptr & 255))
911 *ptr = '\0';
912 }
913 continue;
914 }
915
916 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
917 {
918 ptr ++;
919
920 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
921 {
922 /*
923 * Found the title...
924 */
925
926 anchor = NULL;
927 ptr += 6;
928 }
929 else
930 {
931 char *idptr; /* Pointer to ID */
932
933 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
934 ptr += 7;
935 else if ((idptr = strstr(ptr, " ID=")) != NULL)
936 ptr = idptr + 4;
937 else if ((idptr = strstr(ptr, " id=")) != NULL)
938 ptr = idptr + 4;
939 else
940 continue;
941
942 /*
943 * Found an anchor...
944 */
945
946 if (*ptr == '\"' || *ptr == '\'')
947 {
948 /*
949 * Get quoted anchor...
950 */
951
952 quote = *ptr;
953 anchor = ptr + 1;
954 if ((ptr = strchr(anchor, quote)) != NULL)
955 *ptr++ = '\0';
956 else
957 break;
958 }
959 else
960 {
961 /*
962 * Get unquoted anchor...
963 */
964
965 anchor = ptr + 1;
966
967 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
968
969 if (*ptr != '>')
970 *ptr++ = '\0';
971 else
972 break;
973 }
974
975 /*
976 * Got the anchor, now lets find the end...
977 */
978
979 while (*ptr && *ptr != '>')
980 ptr ++;
981
982 if (*ptr != '>')
983 break;
984
985 *ptr++ = '\0';
986 }
987
988 /*
989 * Now collect text for the link...
990 */
991
992 text = ptr;
993 while ((ptr = strchr(text, '<')) == NULL)
994 {
995 ptr = text + strlen(text);
996 if (ptr >= (line + sizeof(line) - 2))
997 break;
998
999 *ptr++ = ' ';
1000
1001 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1002 break;
1003 }
1004
1005 *ptr = '\0';
1006
1007 if (node)
1008 node->length = (size_t)(offset - node->offset);
1009
1010 if (!*text)
1011 {
1012 node = NULL;
1013 break;
1014 }
1015
1016 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1017 {
1018 /*
1019 * Node already in the index, so replace the text and other
1020 * data...
1021 */
1022
1023 cupsArrayRemove(hi->nodes, node);
1024
1025 if (node->section)
1026 free(node->section);
1027
1028 if (node->text)
1029 free(node->text);
1030
1031 if (node->words)
1032 {
1033 for (word = (help_word_t *)cupsArrayFirst(node->words);
1034 word;
1035 word = (help_word_t *)cupsArrayNext(node->words))
1036 help_delete_word(word);
1037
1038 cupsArrayDelete(node->words);
1039 node->words = NULL;
1040 }
1041
1042 node->section = section[0] ? strdup(section) : NULL;
1043 node->text = strdup(text);
1044 node->mtime = mtime;
1045 node->offset = offset;
1046 node->score = 0;
1047 }
1048 else
1049 {
1050 /*
1051 * New node...
1052 */
1053
1054 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1055 }
1056
1057 /*
1058 * Go through the text value and replace tabs and newlines with
1059 * whitespace and eliminate extra whitespace...
1060 */
1061
1062 for (ptr = node->text, text = node->text; *ptr;)
1063 if (isspace(*ptr & 255))
1064 {
1065 while (isspace(*ptr & 255))
1066 ptr ++;
1067
1068 *text++ = ' ';
1069 }
1070 else if (text != ptr)
1071 *text++ = *ptr++;
1072 else
1073 {
1074 text ++;
1075 ptr ++;
1076 }
1077
1078 *text = '\0';
1079
1080 /*
1081 * (Re)add the node to the array...
1082 */
1083
1084 cupsArrayAdd(hi->nodes, node);
1085
1086 if (!anchor)
1087 node = NULL;
1088 break;
1089 }
1090
1091 if (node)
1092 {
1093 /*
1094 * Scan this line for words...
1095 */
1096
1097 for (ptr = line; *ptr; ptr ++)
1098 {
1099 /*
1100 * Skip HTML stuff...
1101 */
1102
1103 if (*ptr == '<')
1104 {
1105 if (!strncmp(ptr, "<!--", 4))
1106 {
1107 /*
1108 * Skip HTML comment...
1109 */
1110
1111 if ((text = strstr(ptr + 4, "-->")) == NULL)
1112 ptr += strlen(ptr) - 1;
1113 else
1114 ptr = text + 2;
1115 }
1116 else
1117 {
1118 /*
1119 * Skip HTML element...
1120 */
1121
1122 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1123 {
1124 if (*ptr == '\"' || *ptr == '\'')
1125 {
1126 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1127
1128 if (!*ptr)
1129 ptr --;
1130 }
1131 }
1132
1133 if (!*ptr)
1134 ptr --;
1135 }
1136
1137 continue;
1138 }
1139 else if (*ptr == '&')
1140 {
1141 /*
1142 * Skip HTML entity...
1143 */
1144
1145 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1146
1147 if (!*ptr)
1148 ptr --;
1149
1150 continue;
1151 }
1152 else if (!isalnum(*ptr & 255))
1153 continue;
1154
1155 /*
1156 * Found the start of a word, search until we find the end...
1157 */
1158
1159 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1160
1161 wordlen = (int)(ptr - text);
1162
1163 memcpy(temp, text, (size_t)wordlen);
1164 temp[wordlen] = '\0';
1165
1166 ptr --;
1167
1168 if (wordlen > 1 && !bsearch(temp, help_common_words,
1169 (sizeof(help_common_words) /
1170 sizeof(help_common_words[0])),
1171 sizeof(help_common_words[0]),
1172 (int (*)(const void *, const void *))
1173 _cups_strcasecmp))
1174 help_add_word(node, temp);
1175 }
1176 }
1177
1178 /*
1179 * Get the offset of the next line...
1180 */
1181
1182 offset = cupsFileTell(fp);
1183 }
1184
1185 cupsFileClose(fp);
1186
1187 if (node)
1188 node->length = (size_t)(offset - node->offset);
1189
1190 return (0);
1191 }
1192
1193
1194 /*
1195 * 'help_new_node()' - Create a new node and add it to an index.
1196 */
1197
1198 static help_node_t * /* O - Node pointer or NULL on error */
1199 help_new_node(const char *filename, /* I - Filename */
1200 const char *anchor, /* I - Anchor */
1201 const char *section, /* I - Section */
1202 const char *text, /* I - Text */
1203 time_t mtime, /* I - Modification time */
1204 off_t offset, /* I - Offset in file */
1205 size_t length) /* I - Length in bytes */
1206 {
1207 help_node_t *n; /* Node */
1208
1209
1210 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1211 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1212 (long)mtime, (long)offset, (long)length));
1213
1214 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1215 if (!n)
1216 return (NULL);
1217
1218 n->filename = strdup(filename);
1219 n->anchor = anchor ? strdup(anchor) : NULL;
1220 n->section = *section ? strdup(section) : NULL;
1221 n->text = strdup(text);
1222 n->mtime = mtime;
1223 n->offset = offset;
1224 n->length = length;
1225
1226 return (n);
1227 }
1228
1229
1230 /*
1231 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1232 */
1233
1234 static int /* O - Difference */
1235 help_sort_by_name(help_node_t *n1, /* I - First node */
1236 help_node_t *n2) /* I - Second node */
1237 {
1238 int diff; /* Difference */
1239
1240
1241 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1242 n1, n1->filename, n1->anchor,
1243 n2, n2->filename, n2->anchor));
1244
1245 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1246 return (diff);
1247
1248 if (!n1->anchor && !n2->anchor)
1249 return (0);
1250 else if (!n1->anchor)
1251 return (-1);
1252 else if (!n2->anchor)
1253 return (1);
1254 else
1255 return (strcmp(n1->anchor, n2->anchor));
1256 }
1257
1258
1259 /*
1260 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1261 */
1262
1263 static int /* O - Difference */
1264 help_sort_by_score(help_node_t *n1, /* I - First node */
1265 help_node_t *n2) /* I - Second node */
1266 {
1267 int diff; /* Difference */
1268
1269
1270 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1271 "n2=%p(%d \"%s\" \"%s\")",
1272 n1, n1->score, n1->section, n1->text,
1273 n2, n2->score, n2->section, n2->text));
1274
1275 if (n1->score != n2->score)
1276 return (n2->score - n1->score);
1277
1278 if (n1->section && !n2->section)
1279 return (1);
1280 else if (!n1->section && n2->section)
1281 return (-1);
1282 else if (n1->section && n2->section &&
1283 (diff = strcmp(n1->section, n2->section)) != 0)
1284 return (diff);
1285
1286 return (_cups_strcasecmp(n1->text, n2->text));
1287 }
1288
1289
1290 /*
1291 * 'help_sort_words()' - Sort words alphabetically.
1292 */
1293
1294 static int /* O - Difference */
1295 help_sort_words(help_word_t *w1, /* I - Second word */
1296 help_word_t *w2) /* I - Second word */
1297 {
1298 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1299 w1, w1->text, w2, w2->text));
1300
1301 return (_cups_strcasecmp(w1->text, w2->text));
1302 }