]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Merge pull request #5297 from FedericoYundt/patch-1
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright 2007-2017 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
8 */
9
10 /*
11 * Include necessary headers...
12 */
13
14 #include "cgi-private.h"
15 #include <cups/dir.h>
16
17
18 /*
19 * List of common English words that should not be indexed...
20 */
21
22 static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
123 /*
124 * Local functions...
125 */
126
127 static help_word_t *help_add_word(help_node_t *n, const char *text);
128 static void help_delete_node(help_node_t *n);
129 static void help_delete_word(help_word_t *w);
130 static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133 static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137 static help_node_t *help_new_node(const char *filename, const char *anchor,
138 const char *section, const char *text,
139 time_t mtime, off_t offset,
140 size_t length)
141 __attribute__((nonnull(1,3,4)));
142 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
143 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
144 static int help_sort_words(help_word_t *w1, help_word_t *w2);
145
146
147 /*
148 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
149 */
150
151 void
152 helpDeleteIndex(help_index_t *hi) /* I - Help index */
153 {
154 help_node_t *node; /* Current node */
155
156
157 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
158
159 if (!hi)
160 return;
161
162 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
163 node;
164 node = (help_node_t *)cupsArrayNext(hi->nodes))
165 {
166 if (!hi->search)
167 help_delete_node(node);
168 }
169
170 cupsArrayDelete(hi->nodes);
171 cupsArrayDelete(hi->sorted);
172
173 free(hi);
174 }
175
176
177 /*
178 * 'helpFindNode()' - Find a node in an index.
179 */
180
181 help_node_t * /* O - Node pointer or NULL */
182 helpFindNode(help_index_t *hi, /* I - Index */
183 const char *filename, /* I - Filename */
184 const char *anchor) /* I - Anchor */
185 {
186 help_node_t key; /* Search key */
187
188
189 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
190 hi, filename, anchor));
191
192 /*
193 * Range check input...
194 */
195
196 if (!hi || !filename)
197 return (NULL);
198
199 /*
200 * Initialize the search key...
201 */
202
203 key.filename = (char *)filename;
204 key.anchor = (char *)anchor;
205
206 /*
207 * Return any match...
208 */
209
210 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
211 }
212
213
214 /*
215 * 'helpLoadIndex()' - Load a help index from disk.
216 */
217
218 help_index_t * /* O - Index pointer or NULL */
219 helpLoadIndex(const char *hifile, /* I - Index filename */
220 const char *directory) /* I - Directory that is indexed */
221 {
222 help_index_t *hi; /* Help index */
223 cups_file_t *fp; /* Current file */
224 char line[2048], /* Line from file */
225 *ptr, /* Pointer into line */
226 *filename, /* Filename in line */
227 *anchor, /* Anchor in line */
228 *sectptr, /* Section pointer in line */
229 section[1024], /* Section name */
230 *text; /* Text in line */
231 time_t mtime; /* Modification time */
232 off_t offset; /* Offset into file */
233 size_t length; /* Length in bytes */
234 int update; /* Update? */
235 help_node_t *node; /* Current node */
236 help_word_t *word; /* Current word */
237
238
239 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
240 hifile, directory));
241
242 /*
243 * Create a new, empty index.
244 */
245
246 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
247 return (NULL);
248
249 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
250 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
251
252 if (!hi->nodes || !hi->sorted)
253 {
254 cupsArrayDelete(hi->nodes);
255 cupsArrayDelete(hi->sorted);
256 free(hi);
257 return (NULL);
258 }
259
260 /*
261 * Try loading the existing index file...
262 */
263
264 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
265 {
266 /*
267 * Lock the file and then read the first line...
268 */
269
270 cupsFileLock(fp, 1);
271
272 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
273 {
274 /*
275 * Got a valid header line, now read the data lines...
276 */
277
278 node = NULL;
279
280 while (cupsFileGets(fp, line, sizeof(line)))
281 {
282 /*
283 * Each line looks like one of the following:
284 *
285 * filename mtime offset length "section" "text"
286 * filename#anchor offset length "text"
287 * SP count word
288 */
289
290 if (line[0] == ' ')
291 {
292 /*
293 * Read a word in the current node...
294 */
295
296 if (!node || (ptr = strrchr(line, ' ')) == NULL)
297 continue;
298
299 if ((word = help_add_word(node, ptr + 1)) != NULL)
300 word->count = atoi(line + 1);
301 }
302 else
303 {
304 /*
305 * Add a node...
306 */
307
308 filename = line;
309
310 if ((ptr = strchr(line, ' ')) == NULL)
311 break;
312
313 while (isspace(*ptr & 255))
314 *ptr++ = '\0';
315
316 if ((anchor = strrchr(filename, '#')) != NULL)
317 {
318 *anchor++ = '\0';
319 mtime = 0;
320 }
321 else
322 mtime = strtol(ptr, &ptr, 10);
323
324 offset = strtoll(ptr, &ptr, 10);
325 length = (size_t)strtoll(ptr, &ptr, 10);
326
327 while (isspace(*ptr & 255))
328 ptr ++;
329
330 if (!anchor)
331 {
332 /*
333 * Get section...
334 */
335
336 if (*ptr != '\"')
337 break;
338
339 ptr ++;
340 sectptr = ptr;
341
342 while (*ptr && *ptr != '\"')
343 ptr ++;
344
345 if (*ptr != '\"')
346 break;
347
348 *ptr++ = '\0';
349
350 strlcpy(section, sectptr, sizeof(section));
351
352 while (isspace(*ptr & 255))
353 ptr ++;
354 }
355
356 if (*ptr != '\"')
357 break;
358
359 ptr ++;
360 text = ptr;
361
362 while (*ptr && *ptr != '\"')
363 ptr ++;
364
365 if (*ptr != '\"')
366 break;
367
368 *ptr++ = '\0';
369
370 if ((node = help_new_node(filename, anchor, section, text,
371 mtime, offset, length)) == NULL)
372 break;
373
374 node->score = -1;
375
376 cupsArrayAdd(hi->nodes, node);
377 }
378 }
379 }
380
381 cupsFileClose(fp);
382 }
383
384 /*
385 * Scan for new/updated files...
386 */
387
388 update = help_load_directory(hi, directory, NULL);
389
390 /*
391 * Remove any files that are no longer installed...
392 */
393
394 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
395 node;
396 node = (help_node_t *)cupsArrayNext(hi->nodes))
397 if (node->score < 0)
398 {
399 /*
400 * Delete this node...
401 */
402
403 cupsArrayRemove(hi->nodes, node);
404 help_delete_node(node);
405 }
406
407 /*
408 * Add nodes to the sorted array...
409 */
410
411 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
412 node;
413 node = (help_node_t *)cupsArrayNext(hi->nodes))
414 cupsArrayAdd(hi->sorted, node);
415
416 /*
417 * Save the index if we updated it...
418 */
419
420 if (update)
421 helpSaveIndex(hi, hifile);
422
423 /*
424 * Return the index...
425 */
426
427 return (hi);
428 }
429
430
431 /*
432 * 'helpSaveIndex()' - Save a help index to disk.
433 */
434
435 int /* O - 0 on success, -1 on error */
436 helpSaveIndex(help_index_t *hi, /* I - Index */
437 const char *hifile) /* I - Index filename */
438 {
439 cups_file_t *fp; /* Index file */
440 help_node_t *node; /* Current node */
441 help_word_t *word; /* Current word */
442
443
444 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
445
446 /*
447 * Try creating a new index file...
448 */
449
450 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
451 return (-1);
452
453 /*
454 * Lock the file while we write it...
455 */
456
457 cupsFileLock(fp, 1);
458
459 cupsFilePuts(fp, "HELPV2\n");
460
461 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
462 node;
463 node = (help_node_t *)cupsArrayNext(hi->nodes))
464 {
465 /*
466 * Write the current node with/without the anchor...
467 */
468
469 if (node->anchor)
470 {
471 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
472 node->filename, node->anchor,
473 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
474 node->text) < 0)
475 break;
476 }
477 else
478 {
479 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
480 node->filename, (int)node->mtime,
481 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
482 node->section ? node->section : "", node->text) < 0)
483 break;
484 }
485
486 /*
487 * Then write the words associated with the node...
488 */
489
490 for (word = (help_word_t *)cupsArrayFirst(node->words);
491 word;
492 word = (help_word_t *)cupsArrayNext(node->words))
493 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
494 break;
495 }
496
497 cupsFileFlush(fp);
498
499 if (cupsFileClose(fp) < 0)
500 return (-1);
501 else if (node)
502 return (-1);
503 else
504 return (0);
505 }
506
507
508 /*
509 * 'helpSearchIndex()' - Search an index.
510 */
511
512 help_index_t * /* O - Search index */
513 helpSearchIndex(help_index_t *hi, /* I - Index */
514 const char *query, /* I - Query string */
515 const char *section, /* I - Limit search to this section */
516 const char *filename) /* I - Limit search to this file */
517 {
518 help_index_t *search; /* Search index */
519 help_node_t *node; /* Current node */
520 help_word_t *word; /* Current word */
521 void *sc; /* Search context */
522 int matches; /* Number of matches */
523
524
525 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
526 hi, query, filename));
527
528 /*
529 * Range check...
530 */
531
532 if (!hi || !query)
533 return (NULL);
534
535 /*
536 * Reset the scores of all nodes to 0...
537 */
538
539 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
540 node;
541 node = (help_node_t *)cupsArrayNext(hi->nodes))
542 node->score = 0;
543
544 /*
545 * Find the first node to search in...
546 */
547
548 if (filename)
549 {
550 node = helpFindNode(hi, filename, NULL);
551 if (!node)
552 return (NULL);
553 }
554 else
555 node = (help_node_t *)cupsArrayFirst(hi->nodes);
556
557 /*
558 * Convert the query into a regular expression...
559 */
560
561 sc = cgiCompileSearch(query);
562 if (!sc)
563 return (NULL);
564
565 /*
566 * Allocate a search index...
567 */
568
569 search = calloc(1, sizeof(help_index_t));
570 if (!search)
571 {
572 cgiFreeSearch(sc);
573 return (NULL);
574 }
575
576 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
577 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
578
579 if (!search->nodes || !search->sorted)
580 {
581 cupsArrayDelete(search->nodes);
582 cupsArrayDelete(search->sorted);
583 free(search);
584 cgiFreeSearch(sc);
585 return (NULL);
586 }
587
588 search->search = 1;
589
590 /*
591 * Check each node in the index, adding matching nodes to the
592 * search index...
593 */
594
595 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
596 if (section && strcmp(node->section, section))
597 continue;
598 else if (filename && strcmp(node->filename, filename))
599 continue;
600 else
601 {
602 matches = cgiDoSearch(sc, node->text);
603
604 for (word = (help_word_t *)cupsArrayFirst(node->words);
605 word;
606 word = (help_word_t *)cupsArrayNext(node->words))
607 if (cgiDoSearch(sc, word->text) > 0)
608 matches += word->count;
609
610 if (matches > 0)
611 {
612 /*
613 * Found a match, add the node to the search index...
614 */
615
616 node->score = matches;
617
618 cupsArrayAdd(search->nodes, node);
619 cupsArrayAdd(search->sorted, node);
620 }
621 }
622
623 /*
624 * Free the search context...
625 */
626
627 cgiFreeSearch(sc);
628
629 /*
630 * Return the results...
631 */
632
633 return (search);
634 }
635
636
637 /*
638 * 'help_add_word()' - Add a word to a node.
639 */
640
641 static help_word_t * /* O - New word */
642 help_add_word(help_node_t *n, /* I - Node */
643 const char *text) /* I - Word text */
644 {
645 help_word_t *w, /* New word */
646 key; /* Search key */
647
648
649 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
650
651 /*
652 * Create the words array as needed...
653 */
654
655 if (!n->words)
656 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
657
658 /*
659 * See if the word is already added...
660 */
661
662 key.text = (char *)text;
663
664 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
665 {
666 /*
667 * Create a new word...
668 */
669
670 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
671 return (NULL);
672
673 if ((w->text = strdup(text)) == NULL)
674 {
675 free(w);
676 return (NULL);
677 }
678
679 cupsArrayAdd(n->words, w);
680 }
681
682 /*
683 * Bump the counter for this word and return it...
684 */
685
686 w->count ++;
687
688 return (w);
689 }
690
691
692 /*
693 * 'help_delete_node()' - Free all memory used by a node.
694 */
695
696 static void
697 help_delete_node(help_node_t *n) /* I - Node */
698 {
699 help_word_t *w; /* Current word */
700
701
702 DEBUG_printf(("2help_delete_node(n=%p)", n));
703
704 if (!n)
705 return;
706
707 if (n->filename)
708 free(n->filename);
709
710 if (n->anchor)
711 free(n->anchor);
712
713 if (n->section)
714 free(n->section);
715
716 if (n->text)
717 free(n->text);
718
719 for (w = (help_word_t *)cupsArrayFirst(n->words);
720 w;
721 w = (help_word_t *)cupsArrayNext(n->words))
722 help_delete_word(w);
723
724 cupsArrayDelete(n->words);
725
726 free(n);
727 }
728
729
730 /*
731 * 'help_delete_word()' - Free all memory used by a word.
732 */
733
734 static void
735 help_delete_word(help_word_t *w) /* I - Word */
736 {
737 DEBUG_printf(("2help_delete_word(w=%p)", w));
738
739 if (!w)
740 return;
741
742 if (w->text)
743 free(w->text);
744
745 free(w);
746 }
747
748
749 /*
750 * 'help_load_directory()' - Load a directory of files into an index.
751 */
752
753 static int /* O - 0 = success, -1 = error, 1 = updated */
754 help_load_directory(
755 help_index_t *hi, /* I - Index */
756 const char *directory, /* I - Directory */
757 const char *relative) /* I - Relative path */
758 {
759 cups_dir_t *dir; /* Directory file */
760 cups_dentry_t *dent; /* Directory entry */
761 char *ext, /* Pointer to extension */
762 filename[1024], /* Full filename */
763 relname[1024]; /* Relative filename */
764 int update; /* Updated? */
765 help_node_t *node; /* Current node */
766
767
768 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
769 hi, directory, relative));
770
771 /*
772 * Open the directory and scan it...
773 */
774
775 if ((dir = cupsDirOpen(directory)) == NULL)
776 return (0);
777
778 update = 0;
779
780 while ((dent = cupsDirRead(dir)) != NULL)
781 {
782 /*
783 * Skip "." files...
784 */
785
786 if (dent->filename[0] == '.')
787 continue;
788
789 /*
790 * Get absolute and relative filenames...
791 */
792
793 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
794 if (relative)
795 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
796 else
797 strlcpy(relname, dent->filename, sizeof(relname));
798
799 /*
800 * Check if we have a HTML file...
801 */
802
803 if ((ext = strstr(dent->filename, ".html")) != NULL &&
804 (!ext[5] || !strcmp(ext + 5, ".gz")))
805 {
806 /*
807 * HTML file, see if we have already indexed the file...
808 */
809
810 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
811 {
812 /*
813 * File already indexed - check dates to confirm that the
814 * index is up-to-date...
815 */
816
817 if (node->mtime == dent->fileinfo.st_mtime)
818 {
819 /*
820 * Same modification time, so mark all of the nodes
821 * for this file as up-to-date...
822 */
823
824 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
825 if (!strcmp(node->filename, relname))
826 node->score = 0;
827 else
828 break;
829
830 continue;
831 }
832 }
833
834 update = 1;
835
836 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
837 }
838 else if (S_ISDIR(dent->fileinfo.st_mode))
839 {
840 /*
841 * Process sub-directory...
842 */
843
844 if (help_load_directory(hi, filename, relname) == 1)
845 update = 1;
846 }
847 }
848
849 cupsDirClose(dir);
850
851 return (update);
852 }
853
854
855 /*
856 * 'help_load_file()' - Load a HTML files into an index.
857 */
858
859 static int /* O - 0 = success, -1 = error */
860 help_load_file(
861 help_index_t *hi, /* I - Index */
862 const char *filename, /* I - Filename */
863 const char *relative, /* I - Relative path */
864 time_t mtime) /* I - Modification time */
865 {
866 cups_file_t *fp; /* HTML file */
867 help_node_t *node; /* Current node */
868 char line[1024], /* Line from file */
869 temp[1024], /* Temporary word */
870 section[1024], /* Section */
871 *ptr, /* Pointer into line */
872 *anchor, /* Anchor name */
873 *text; /* Text for anchor */
874 off_t offset; /* File offset */
875 char quote; /* Quote character */
876 help_word_t *word; /* Current word */
877 int wordlen; /* Length of word */
878
879
880 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
881 "mtime=%ld)", hi, filename, relative, (long)mtime));
882
883 if ((fp = cupsFileOpen(filename, "r")) == NULL)
884 return (-1);
885
886 node = NULL;
887 offset = 0;
888
889 strlcpy(section, "Other", sizeof(section));
890
891 while (cupsFileGets(fp, line, sizeof(line)))
892 {
893 /*
894 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
895 */
896
897 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
898 {
899 /*
900 * Got section line, copy it!
901 */
902
903 for (ptr += 13; isspace(*ptr & 255); ptr ++);
904
905 strlcpy(section, ptr, sizeof(section));
906 if ((ptr = strstr(section, "-->")) != NULL)
907 {
908 /*
909 * Strip comment stuff from end of line...
910 */
911
912 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
913
914 if (isspace(*ptr & 255))
915 *ptr = '\0';
916 }
917 continue;
918 }
919
920 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
921 {
922 ptr ++;
923
924 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
925 {
926 /*
927 * Found the title...
928 */
929
930 anchor = NULL;
931 ptr += 6;
932 }
933 else
934 {
935 char *idptr; /* Pointer to ID */
936
937 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
938 ptr += 7;
939 else if ((idptr = strstr(ptr, " ID=")) != NULL)
940 ptr = idptr + 4;
941 else if ((idptr = strstr(ptr, " id=")) != NULL)
942 ptr = idptr + 4;
943 else
944 continue;
945
946 /*
947 * Found an anchor...
948 */
949
950 if (*ptr == '\"' || *ptr == '\'')
951 {
952 /*
953 * Get quoted anchor...
954 */
955
956 quote = *ptr;
957 anchor = ptr + 1;
958 if ((ptr = strchr(anchor, quote)) != NULL)
959 *ptr++ = '\0';
960 else
961 break;
962 }
963 else
964 {
965 /*
966 * Get unquoted anchor...
967 */
968
969 anchor = ptr + 1;
970
971 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
972
973 if (*ptr != '>')
974 *ptr++ = '\0';
975 else
976 break;
977 }
978
979 /*
980 * Got the anchor, now lets find the end...
981 */
982
983 while (*ptr && *ptr != '>')
984 ptr ++;
985
986 if (*ptr != '>')
987 break;
988
989 *ptr++ = '\0';
990 }
991
992 /*
993 * Now collect text for the link...
994 */
995
996 text = ptr;
997 while ((ptr = strchr(text, '<')) == NULL)
998 {
999 ptr = text + strlen(text);
1000 if (ptr >= (line + sizeof(line) - 2))
1001 break;
1002
1003 *ptr++ = ' ';
1004
1005 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1006 break;
1007 }
1008
1009 *ptr = '\0';
1010
1011 if (node)
1012 node->length = (size_t)(offset - node->offset);
1013
1014 if (!*text)
1015 {
1016 node = NULL;
1017 break;
1018 }
1019
1020 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1021 {
1022 /*
1023 * Node already in the index, so replace the text and other
1024 * data...
1025 */
1026
1027 cupsArrayRemove(hi->nodes, node);
1028
1029 if (node->section)
1030 free(node->section);
1031
1032 if (node->text)
1033 free(node->text);
1034
1035 if (node->words)
1036 {
1037 for (word = (help_word_t *)cupsArrayFirst(node->words);
1038 word;
1039 word = (help_word_t *)cupsArrayNext(node->words))
1040 help_delete_word(word);
1041
1042 cupsArrayDelete(node->words);
1043 node->words = NULL;
1044 }
1045
1046 node->section = section[0] ? strdup(section) : NULL;
1047 node->text = strdup(text);
1048 node->mtime = mtime;
1049 node->offset = offset;
1050 node->score = 0;
1051 }
1052 else
1053 {
1054 /*
1055 * New node...
1056 */
1057
1058 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1059 }
1060
1061 /*
1062 * Go through the text value and replace tabs and newlines with
1063 * whitespace and eliminate extra whitespace...
1064 */
1065
1066 for (ptr = node->text, text = node->text; *ptr;)
1067 if (isspace(*ptr & 255))
1068 {
1069 while (isspace(*ptr & 255))
1070 ptr ++;
1071
1072 *text++ = ' ';
1073 }
1074 else if (text != ptr)
1075 *text++ = *ptr++;
1076 else
1077 {
1078 text ++;
1079 ptr ++;
1080 }
1081
1082 *text = '\0';
1083
1084 /*
1085 * (Re)add the node to the array...
1086 */
1087
1088 cupsArrayAdd(hi->nodes, node);
1089
1090 if (!anchor)
1091 node = NULL;
1092 break;
1093 }
1094
1095 if (node)
1096 {
1097 /*
1098 * Scan this line for words...
1099 */
1100
1101 for (ptr = line; *ptr; ptr ++)
1102 {
1103 /*
1104 * Skip HTML stuff...
1105 */
1106
1107 if (*ptr == '<')
1108 {
1109 if (!strncmp(ptr, "<!--", 4))
1110 {
1111 /*
1112 * Skip HTML comment...
1113 */
1114
1115 if ((text = strstr(ptr + 4, "-->")) == NULL)
1116 ptr += strlen(ptr) - 1;
1117 else
1118 ptr = text + 2;
1119 }
1120 else
1121 {
1122 /*
1123 * Skip HTML element...
1124 */
1125
1126 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1127 {
1128 if (*ptr == '\"' || *ptr == '\'')
1129 {
1130 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1131
1132 if (!*ptr)
1133 ptr --;
1134 }
1135 }
1136
1137 if (!*ptr)
1138 ptr --;
1139 }
1140
1141 continue;
1142 }
1143 else if (*ptr == '&')
1144 {
1145 /*
1146 * Skip HTML entity...
1147 */
1148
1149 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1150
1151 if (!*ptr)
1152 ptr --;
1153
1154 continue;
1155 }
1156 else if (!isalnum(*ptr & 255))
1157 continue;
1158
1159 /*
1160 * Found the start of a word, search until we find the end...
1161 */
1162
1163 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1164
1165 wordlen = (int)(ptr - text);
1166
1167 memcpy(temp, text, (size_t)wordlen);
1168 temp[wordlen] = '\0';
1169
1170 ptr --;
1171
1172 if (wordlen > 1 && !bsearch(temp, help_common_words,
1173 (sizeof(help_common_words) /
1174 sizeof(help_common_words[0])),
1175 sizeof(help_common_words[0]),
1176 (int (*)(const void *, const void *))
1177 _cups_strcasecmp))
1178 help_add_word(node, temp);
1179 }
1180 }
1181
1182 /*
1183 * Get the offset of the next line...
1184 */
1185
1186 offset = cupsFileTell(fp);
1187 }
1188
1189 cupsFileClose(fp);
1190
1191 if (node)
1192 node->length = (size_t)(offset - node->offset);
1193
1194 return (0);
1195 }
1196
1197
1198 /*
1199 * 'help_new_node()' - Create a new node and add it to an index.
1200 */
1201
1202 static help_node_t * /* O - Node pointer or NULL on error */
1203 help_new_node(const char *filename, /* I - Filename */
1204 const char *anchor, /* I - Anchor */
1205 const char *section, /* I - Section */
1206 const char *text, /* I - Text */
1207 time_t mtime, /* I - Modification time */
1208 off_t offset, /* I - Offset in file */
1209 size_t length) /* I - Length in bytes */
1210 {
1211 help_node_t *n; /* Node */
1212
1213
1214 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1215 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1216 (long)mtime, (long)offset, (long)length));
1217
1218 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1219 if (!n)
1220 return (NULL);
1221
1222 n->filename = strdup(filename);
1223 n->anchor = anchor ? strdup(anchor) : NULL;
1224 n->section = *section ? strdup(section) : NULL;
1225 n->text = strdup(text);
1226 n->mtime = mtime;
1227 n->offset = offset;
1228 n->length = length;
1229
1230 return (n);
1231 }
1232
1233
1234 /*
1235 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1236 */
1237
1238 static int /* O - Difference */
1239 help_sort_by_name(help_node_t *n1, /* I - First node */
1240 help_node_t *n2) /* I - Second node */
1241 {
1242 int diff; /* Difference */
1243
1244
1245 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1246 n1, n1->filename, n1->anchor,
1247 n2, n2->filename, n2->anchor));
1248
1249 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1250 return (diff);
1251
1252 if (!n1->anchor && !n2->anchor)
1253 return (0);
1254 else if (!n1->anchor)
1255 return (-1);
1256 else if (!n2->anchor)
1257 return (1);
1258 else
1259 return (strcmp(n1->anchor, n2->anchor));
1260 }
1261
1262
1263 /*
1264 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1265 */
1266
1267 static int /* O - Difference */
1268 help_sort_by_score(help_node_t *n1, /* I - First node */
1269 help_node_t *n2) /* I - Second node */
1270 {
1271 int diff; /* Difference */
1272
1273
1274 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1275 "n2=%p(%d \"%s\" \"%s\")",
1276 n1, n1->score, n1->section, n1->text,
1277 n2, n2->score, n2->section, n2->text));
1278
1279 if (n1->score != n2->score)
1280 return (n2->score - n1->score);
1281
1282 if (n1->section && !n2->section)
1283 return (1);
1284 else if (!n1->section && n2->section)
1285 return (-1);
1286 else if (n1->section && n2->section &&
1287 (diff = strcmp(n1->section, n2->section)) != 0)
1288 return (diff);
1289
1290 return (_cups_strcasecmp(n1->text, n2->text));
1291 }
1292
1293
1294 /*
1295 * 'help_sort_words()' - Sort words alphabetically.
1296 */
1297
1298 static int /* O - Difference */
1299 help_sort_words(help_word_t *w1, /* I - Second word */
1300 help_word_t *w2) /* I - Second word */
1301 {
1302 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1303 w1, w1->text, w2, w2->text));
1304
1305 return (_cups_strcasecmp(w1->text, w2->text));
1306 }