]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Fix source file header text duplication text duplication.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * Online help index routines for CUPS.
3 *
4 * Copyright 2007-2015 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * These coded instructions, statements, and computer programs are the
8 * property of Apple Inc. and are protected by Federal copyright
9 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
10 * which should have been included with this file. If this file is
11 * missing or damaged, see the license at "http://www.cups.org/".
12 */
13
14 /*
15 * Include necessary headers...
16 */
17
18 #include "cgi-private.h"
19 #include <cups/dir.h>
20
21
22 /*
23 * List of common English words that should not be indexed...
24 */
25
26 static char help_common_words[][6] =
27 {
28 "about",
29 "all",
30 "an",
31 "and",
32 "are",
33 "as",
34 "at",
35 "be",
36 "been",
37 "but",
38 "by",
39 "call",
40 "can",
41 "come",
42 "could",
43 "day",
44 "did",
45 "do",
46 "down",
47 "each",
48 "find",
49 "first",
50 "for",
51 "from",
52 "go",
53 "had",
54 "has",
55 "have",
56 "he",
57 "her",
58 "him",
59 "his",
60 "hot",
61 "how",
62 "if",
63 "in",
64 "is",
65 "it",
66 "know",
67 "like",
68 "long",
69 "look",
70 "make",
71 "many",
72 "may",
73 "more",
74 "most",
75 "my",
76 "no",
77 "now",
78 "of",
79 "on",
80 "one",
81 "or",
82 "other",
83 "out",
84 "over",
85 "said",
86 "see",
87 "she",
88 "side",
89 "so",
90 "some",
91 "sound",
92 "than",
93 "that",
94 "the",
95 "their",
96 "them",
97 "then",
98 "there",
99 "these",
100 "they",
101 "thing",
102 "this",
103 "time",
104 "to",
105 "two",
106 "up",
107 "use",
108 "was",
109 "water",
110 "way",
111 "we",
112 "were",
113 "what",
114 "when",
115 "which",
116 "who",
117 "will",
118 "with",
119 "word",
120 "would",
121 "write",
122 "you",
123 "your"
124 };
125
126
127 /*
128 * Local functions...
129 */
130
131 static help_word_t *help_add_word(help_node_t *n, const char *text);
132 static void help_delete_node(help_node_t *n);
133 static void help_delete_word(help_word_t *w);
134 static int help_load_directory(help_index_t *hi,
135 const char *directory,
136 const char *relative);
137 static int help_load_file(help_index_t *hi,
138 const char *filename,
139 const char *relative,
140 time_t mtime);
141 static help_node_t *help_new_node(const char *filename, const char *anchor,
142 const char *section, const char *text,
143 time_t mtime, off_t offset,
144 size_t length)
145 __attribute__((nonnull(1,3,4)));
146 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
147 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
148 static int help_sort_words(help_word_t *w1, help_word_t *w2);
149
150
151 /*
152 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
153 */
154
155 void
156 helpDeleteIndex(help_index_t *hi) /* I - Help index */
157 {
158 help_node_t *node; /* Current node */
159
160
161 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
162
163 if (!hi)
164 return;
165
166 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
167 node;
168 node = (help_node_t *)cupsArrayNext(hi->nodes))
169 {
170 if (!hi->search)
171 help_delete_node(node);
172 }
173
174 cupsArrayDelete(hi->nodes);
175 cupsArrayDelete(hi->sorted);
176
177 free(hi);
178 }
179
180
181 /*
182 * 'helpFindNode()' - Find a node in an index.
183 */
184
185 help_node_t * /* O - Node pointer or NULL */
186 helpFindNode(help_index_t *hi, /* I - Index */
187 const char *filename, /* I - Filename */
188 const char *anchor) /* I - Anchor */
189 {
190 help_node_t key; /* Search key */
191
192
193 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
194 hi, filename, anchor));
195
196 /*
197 * Range check input...
198 */
199
200 if (!hi || !filename)
201 return (NULL);
202
203 /*
204 * Initialize the search key...
205 */
206
207 key.filename = (char *)filename;
208 key.anchor = (char *)anchor;
209
210 /*
211 * Return any match...
212 */
213
214 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
215 }
216
217
218 /*
219 * 'helpLoadIndex()' - Load a help index from disk.
220 */
221
222 help_index_t * /* O - Index pointer or NULL */
223 helpLoadIndex(const char *hifile, /* I - Index filename */
224 const char *directory) /* I - Directory that is indexed */
225 {
226 help_index_t *hi; /* Help index */
227 cups_file_t *fp; /* Current file */
228 char line[2048], /* Line from file */
229 *ptr, /* Pointer into line */
230 *filename, /* Filename in line */
231 *anchor, /* Anchor in line */
232 *sectptr, /* Section pointer in line */
233 section[1024], /* Section name */
234 *text; /* Text in line */
235 time_t mtime; /* Modification time */
236 off_t offset; /* Offset into file */
237 size_t length; /* Length in bytes */
238 int update; /* Update? */
239 help_node_t *node; /* Current node */
240 help_word_t *word; /* Current word */
241
242
243 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
244 hifile, directory));
245
246 /*
247 * Create a new, empty index.
248 */
249
250 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
251 return (NULL);
252
253 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
254 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
255
256 if (!hi->nodes || !hi->sorted)
257 {
258 cupsArrayDelete(hi->nodes);
259 cupsArrayDelete(hi->sorted);
260 free(hi);
261 return (NULL);
262 }
263
264 /*
265 * Try loading the existing index file...
266 */
267
268 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
269 {
270 /*
271 * Lock the file and then read the first line...
272 */
273
274 cupsFileLock(fp, 1);
275
276 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
277 {
278 /*
279 * Got a valid header line, now read the data lines...
280 */
281
282 node = NULL;
283
284 while (cupsFileGets(fp, line, sizeof(line)))
285 {
286 /*
287 * Each line looks like one of the following:
288 *
289 * filename mtime offset length "section" "text"
290 * filename#anchor offset length "text"
291 * SP count word
292 */
293
294 if (line[0] == ' ')
295 {
296 /*
297 * Read a word in the current node...
298 */
299
300 if (!node || (ptr = strrchr(line, ' ')) == NULL)
301 continue;
302
303 if ((word = help_add_word(node, ptr + 1)) != NULL)
304 word->count = atoi(line + 1);
305 }
306 else
307 {
308 /*
309 * Add a node...
310 */
311
312 filename = line;
313
314 if ((ptr = strchr(line, ' ')) == NULL)
315 break;
316
317 while (isspace(*ptr & 255))
318 *ptr++ = '\0';
319
320 if ((anchor = strrchr(filename, '#')) != NULL)
321 {
322 *anchor++ = '\0';
323 mtime = 0;
324 }
325 else
326 mtime = strtol(ptr, &ptr, 10);
327
328 offset = strtoll(ptr, &ptr, 10);
329 length = (size_t)strtoll(ptr, &ptr, 10);
330
331 while (isspace(*ptr & 255))
332 ptr ++;
333
334 if (!anchor)
335 {
336 /*
337 * Get section...
338 */
339
340 if (*ptr != '\"')
341 break;
342
343 ptr ++;
344 sectptr = ptr;
345
346 while (*ptr && *ptr != '\"')
347 ptr ++;
348
349 if (*ptr != '\"')
350 break;
351
352 *ptr++ = '\0';
353
354 strlcpy(section, sectptr, sizeof(section));
355
356 while (isspace(*ptr & 255))
357 ptr ++;
358 }
359
360 if (*ptr != '\"')
361 break;
362
363 ptr ++;
364 text = ptr;
365
366 while (*ptr && *ptr != '\"')
367 ptr ++;
368
369 if (*ptr != '\"')
370 break;
371
372 *ptr++ = '\0';
373
374 if ((node = help_new_node(filename, anchor, section, text,
375 mtime, offset, length)) == NULL)
376 break;
377
378 node->score = -1;
379
380 cupsArrayAdd(hi->nodes, node);
381 }
382 }
383 }
384
385 cupsFileClose(fp);
386 }
387
388 /*
389 * Scan for new/updated files...
390 */
391
392 update = help_load_directory(hi, directory, NULL);
393
394 /*
395 * Remove any files that are no longer installed...
396 */
397
398 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
399 node;
400 node = (help_node_t *)cupsArrayNext(hi->nodes))
401 if (node->score < 0)
402 {
403 /*
404 * Delete this node...
405 */
406
407 cupsArrayRemove(hi->nodes, node);
408 help_delete_node(node);
409 }
410
411 /*
412 * Add nodes to the sorted array...
413 */
414
415 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
416 node;
417 node = (help_node_t *)cupsArrayNext(hi->nodes))
418 cupsArrayAdd(hi->sorted, node);
419
420 /*
421 * Save the index if we updated it...
422 */
423
424 if (update)
425 helpSaveIndex(hi, hifile);
426
427 /*
428 * Return the index...
429 */
430
431 return (hi);
432 }
433
434
435 /*
436 * 'helpSaveIndex()' - Save a help index to disk.
437 */
438
439 int /* O - 0 on success, -1 on error */
440 helpSaveIndex(help_index_t *hi, /* I - Index */
441 const char *hifile) /* I - Index filename */
442 {
443 cups_file_t *fp; /* Index file */
444 help_node_t *node; /* Current node */
445 help_word_t *word; /* Current word */
446
447
448 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
449
450 /*
451 * Try creating a new index file...
452 */
453
454 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
455 return (-1);
456
457 /*
458 * Lock the file while we write it...
459 */
460
461 cupsFileLock(fp, 1);
462
463 cupsFilePuts(fp, "HELPV2\n");
464
465 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
466 node;
467 node = (help_node_t *)cupsArrayNext(hi->nodes))
468 {
469 /*
470 * Write the current node with/without the anchor...
471 */
472
473 if (node->anchor)
474 {
475 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
476 node->filename, node->anchor,
477 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
478 node->text) < 0)
479 break;
480 }
481 else
482 {
483 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
484 node->filename, (int)node->mtime,
485 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
486 node->section ? node->section : "", node->text) < 0)
487 break;
488 }
489
490 /*
491 * Then write the words associated with the node...
492 */
493
494 for (word = (help_word_t *)cupsArrayFirst(node->words);
495 word;
496 word = (help_word_t *)cupsArrayNext(node->words))
497 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
498 break;
499 }
500
501 cupsFileFlush(fp);
502
503 if (cupsFileClose(fp) < 0)
504 return (-1);
505 else if (node)
506 return (-1);
507 else
508 return (0);
509 }
510
511
512 /*
513 * 'helpSearchIndex()' - Search an index.
514 */
515
516 help_index_t * /* O - Search index */
517 helpSearchIndex(help_index_t *hi, /* I - Index */
518 const char *query, /* I - Query string */
519 const char *section, /* I - Limit search to this section */
520 const char *filename) /* I - Limit search to this file */
521 {
522 help_index_t *search; /* Search index */
523 help_node_t *node; /* Current node */
524 help_word_t *word; /* Current word */
525 void *sc; /* Search context */
526 int matches; /* Number of matches */
527
528
529 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
530 hi, query, filename));
531
532 /*
533 * Range check...
534 */
535
536 if (!hi || !query)
537 return (NULL);
538
539 /*
540 * Reset the scores of all nodes to 0...
541 */
542
543 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
544 node;
545 node = (help_node_t *)cupsArrayNext(hi->nodes))
546 node->score = 0;
547
548 /*
549 * Find the first node to search in...
550 */
551
552 if (filename)
553 {
554 node = helpFindNode(hi, filename, NULL);
555 if (!node)
556 return (NULL);
557 }
558 else
559 node = (help_node_t *)cupsArrayFirst(hi->nodes);
560
561 /*
562 * Convert the query into a regular expression...
563 */
564
565 sc = cgiCompileSearch(query);
566 if (!sc)
567 return (NULL);
568
569 /*
570 * Allocate a search index...
571 */
572
573 search = calloc(1, sizeof(help_index_t));
574 if (!search)
575 {
576 cgiFreeSearch(sc);
577 return (NULL);
578 }
579
580 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
581 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
582
583 if (!search->nodes || !search->sorted)
584 {
585 cupsArrayDelete(search->nodes);
586 cupsArrayDelete(search->sorted);
587 free(search);
588 cgiFreeSearch(sc);
589 return (NULL);
590 }
591
592 search->search = 1;
593
594 /*
595 * Check each node in the index, adding matching nodes to the
596 * search index...
597 */
598
599 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
600 if (section && strcmp(node->section, section))
601 continue;
602 else if (filename && strcmp(node->filename, filename))
603 continue;
604 else
605 {
606 matches = cgiDoSearch(sc, node->text);
607
608 for (word = (help_word_t *)cupsArrayFirst(node->words);
609 word;
610 word = (help_word_t *)cupsArrayNext(node->words))
611 if (cgiDoSearch(sc, word->text) > 0)
612 matches += word->count;
613
614 if (matches > 0)
615 {
616 /*
617 * Found a match, add the node to the search index...
618 */
619
620 node->score = matches;
621
622 cupsArrayAdd(search->nodes, node);
623 cupsArrayAdd(search->sorted, node);
624 }
625 }
626
627 /*
628 * Free the search context...
629 */
630
631 cgiFreeSearch(sc);
632
633 /*
634 * Return the results...
635 */
636
637 return (search);
638 }
639
640
641 /*
642 * 'help_add_word()' - Add a word to a node.
643 */
644
645 static help_word_t * /* O - New word */
646 help_add_word(help_node_t *n, /* I - Node */
647 const char *text) /* I - Word text */
648 {
649 help_word_t *w, /* New word */
650 key; /* Search key */
651
652
653 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
654
655 /*
656 * Create the words array as needed...
657 */
658
659 if (!n->words)
660 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
661
662 /*
663 * See if the word is already added...
664 */
665
666 key.text = (char *)text;
667
668 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
669 {
670 /*
671 * Create a new word...
672 */
673
674 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
675 return (NULL);
676
677 if ((w->text = strdup(text)) == NULL)
678 {
679 free(w);
680 return (NULL);
681 }
682
683 cupsArrayAdd(n->words, w);
684 }
685
686 /*
687 * Bump the counter for this word and return it...
688 */
689
690 w->count ++;
691
692 return (w);
693 }
694
695
696 /*
697 * 'help_delete_node()' - Free all memory used by a node.
698 */
699
700 static void
701 help_delete_node(help_node_t *n) /* I - Node */
702 {
703 help_word_t *w; /* Current word */
704
705
706 DEBUG_printf(("2help_delete_node(n=%p)", n));
707
708 if (!n)
709 return;
710
711 if (n->filename)
712 free(n->filename);
713
714 if (n->anchor)
715 free(n->anchor);
716
717 if (n->section)
718 free(n->section);
719
720 if (n->text)
721 free(n->text);
722
723 for (w = (help_word_t *)cupsArrayFirst(n->words);
724 w;
725 w = (help_word_t *)cupsArrayNext(n->words))
726 help_delete_word(w);
727
728 cupsArrayDelete(n->words);
729
730 free(n);
731 }
732
733
734 /*
735 * 'help_delete_word()' - Free all memory used by a word.
736 */
737
738 static void
739 help_delete_word(help_word_t *w) /* I - Word */
740 {
741 DEBUG_printf(("2help_delete_word(w=%p)", w));
742
743 if (!w)
744 return;
745
746 if (w->text)
747 free(w->text);
748
749 free(w);
750 }
751
752
753 /*
754 * 'help_load_directory()' - Load a directory of files into an index.
755 */
756
757 static int /* O - 0 = success, -1 = error, 1 = updated */
758 help_load_directory(
759 help_index_t *hi, /* I - Index */
760 const char *directory, /* I - Directory */
761 const char *relative) /* I - Relative path */
762 {
763 cups_dir_t *dir; /* Directory file */
764 cups_dentry_t *dent; /* Directory entry */
765 char *ext, /* Pointer to extension */
766 filename[1024], /* Full filename */
767 relname[1024]; /* Relative filename */
768 int update; /* Updated? */
769 help_node_t *node; /* Current node */
770
771
772 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
773 hi, directory, relative));
774
775 /*
776 * Open the directory and scan it...
777 */
778
779 if ((dir = cupsDirOpen(directory)) == NULL)
780 return (0);
781
782 update = 0;
783
784 while ((dent = cupsDirRead(dir)) != NULL)
785 {
786 /*
787 * Skip "." files...
788 */
789
790 if (dent->filename[0] == '.')
791 continue;
792
793 /*
794 * Get absolute and relative filenames...
795 */
796
797 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
798 if (relative)
799 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
800 else
801 strlcpy(relname, dent->filename, sizeof(relname));
802
803 /*
804 * Check if we have a HTML file...
805 */
806
807 if ((ext = strstr(dent->filename, ".html")) != NULL &&
808 (!ext[5] || !strcmp(ext + 5, ".gz")))
809 {
810 /*
811 * HTML file, see if we have already indexed the file...
812 */
813
814 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
815 {
816 /*
817 * File already indexed - check dates to confirm that the
818 * index is up-to-date...
819 */
820
821 if (node->mtime == dent->fileinfo.st_mtime)
822 {
823 /*
824 * Same modification time, so mark all of the nodes
825 * for this file as up-to-date...
826 */
827
828 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
829 if (!strcmp(node->filename, relname))
830 node->score = 0;
831 else
832 break;
833
834 continue;
835 }
836 }
837
838 update = 1;
839
840 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
841 }
842 else if (S_ISDIR(dent->fileinfo.st_mode))
843 {
844 /*
845 * Process sub-directory...
846 */
847
848 if (help_load_directory(hi, filename, relname) == 1)
849 update = 1;
850 }
851 }
852
853 cupsDirClose(dir);
854
855 return (update);
856 }
857
858
859 /*
860 * 'help_load_file()' - Load a HTML files into an index.
861 */
862
863 static int /* O - 0 = success, -1 = error */
864 help_load_file(
865 help_index_t *hi, /* I - Index */
866 const char *filename, /* I - Filename */
867 const char *relative, /* I - Relative path */
868 time_t mtime) /* I - Modification time */
869 {
870 cups_file_t *fp; /* HTML file */
871 help_node_t *node; /* Current node */
872 char line[1024], /* Line from file */
873 temp[1024], /* Temporary word */
874 section[1024], /* Section */
875 *ptr, /* Pointer into line */
876 *anchor, /* Anchor name */
877 *text; /* Text for anchor */
878 off_t offset; /* File offset */
879 char quote; /* Quote character */
880 help_word_t *word; /* Current word */
881 int wordlen; /* Length of word */
882
883
884 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
885 "mtime=%ld)", hi, filename, relative, (long)mtime));
886
887 if ((fp = cupsFileOpen(filename, "r")) == NULL)
888 return (-1);
889
890 node = NULL;
891 offset = 0;
892
893 strlcpy(section, "Other", sizeof(section));
894
895 while (cupsFileGets(fp, line, sizeof(line)))
896 {
897 /*
898 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
899 */
900
901 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
902 {
903 /*
904 * Got section line, copy it!
905 */
906
907 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
908
909 strlcpy(section, ptr, sizeof(section));
910 if ((ptr = strstr(section, "-->")) != NULL)
911 {
912 /*
913 * Strip comment stuff from end of line...
914 */
915
916 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
917
918 if (isspace(*ptr & 255))
919 *ptr = '\0';
920 }
921 continue;
922 }
923
924 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
925 {
926 ptr ++;
927
928 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
929 {
930 /*
931 * Found the title...
932 */
933
934 anchor = NULL;
935 ptr += 6;
936 }
937 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
938 {
939 /*
940 * Found an anchor...
941 */
942
943 ptr += 7;
944
945 if (*ptr == '\"' || *ptr == '\'')
946 {
947 /*
948 * Get quoted anchor...
949 */
950
951 quote = *ptr;
952 anchor = ptr + 1;
953 if ((ptr = strchr(anchor, quote)) != NULL)
954 *ptr++ = '\0';
955 else
956 break;
957 }
958 else
959 {
960 /*
961 * Get unquoted anchor...
962 */
963
964 anchor = ptr + 1;
965
966 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
967
968 if (*ptr)
969 *ptr++ = '\0';
970 else
971 break;
972 }
973
974 /*
975 * Got the anchor, now lets find the end...
976 */
977
978 while (*ptr && *ptr != '>')
979 ptr ++;
980
981 if (*ptr != '>')
982 break;
983
984 ptr ++;
985 }
986 else
987 continue;
988
989 /*
990 * Now collect text for the link...
991 */
992
993 text = ptr;
994 while ((ptr = strchr(text, '<')) == NULL)
995 {
996 ptr = text + strlen(text);
997 if (ptr >= (line + sizeof(line) - 2))
998 break;
999
1000 *ptr++ = ' ';
1001
1002 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1003 break;
1004 }
1005
1006 *ptr = '\0';
1007
1008 if (node)
1009 node->length = (size_t)(offset - node->offset);
1010
1011 if (!*text)
1012 {
1013 node = NULL;
1014 break;
1015 }
1016
1017 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1018 {
1019 /*
1020 * Node already in the index, so replace the text and other
1021 * data...
1022 */
1023
1024 cupsArrayRemove(hi->nodes, node);
1025
1026 if (node->section)
1027 free(node->section);
1028
1029 if (node->text)
1030 free(node->text);
1031
1032 if (node->words)
1033 {
1034 for (word = (help_word_t *)cupsArrayFirst(node->words);
1035 word;
1036 word = (help_word_t *)cupsArrayNext(node->words))
1037 help_delete_word(word);
1038
1039 cupsArrayDelete(node->words);
1040 node->words = NULL;
1041 }
1042
1043 node->section = section[0] ? strdup(section) : NULL;
1044 node->text = strdup(text);
1045 node->mtime = mtime;
1046 node->offset = offset;
1047 node->score = 0;
1048 }
1049 else
1050 {
1051 /*
1052 * New node...
1053 */
1054
1055 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1056 }
1057
1058 /*
1059 * Go through the text value and replace tabs and newlines with
1060 * whitespace and eliminate extra whitespace...
1061 */
1062
1063 for (ptr = node->text, text = node->text; *ptr;)
1064 if (isspace(*ptr & 255))
1065 {
1066 while (isspace(*ptr & 255))
1067 ptr ++;
1068
1069 *text++ = ' ';
1070 }
1071 else if (text != ptr)
1072 *text++ = *ptr++;
1073 else
1074 {
1075 text ++;
1076 ptr ++;
1077 }
1078
1079 *text = '\0';
1080
1081 /*
1082 * (Re)add the node to the array...
1083 */
1084
1085 cupsArrayAdd(hi->nodes, node);
1086
1087 if (!anchor)
1088 node = NULL;
1089 break;
1090 }
1091
1092 if (node)
1093 {
1094 /*
1095 * Scan this line for words...
1096 */
1097
1098 for (ptr = line; *ptr; ptr ++)
1099 {
1100 /*
1101 * Skip HTML stuff...
1102 */
1103
1104 if (*ptr == '<')
1105 {
1106 if (!strncmp(ptr, "<!--", 4))
1107 {
1108 /*
1109 * Skip HTML comment...
1110 */
1111
1112 if ((text = strstr(ptr + 4, "-->")) == NULL)
1113 ptr += strlen(ptr) - 1;
1114 else
1115 ptr = text + 2;
1116 }
1117 else
1118 {
1119 /*
1120 * Skip HTML element...
1121 */
1122
1123 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1124 {
1125 if (*ptr == '\"' || *ptr == '\'')
1126 {
1127 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1128
1129 if (!*ptr)
1130 ptr --;
1131 }
1132 }
1133
1134 if (!*ptr)
1135 ptr --;
1136 }
1137
1138 continue;
1139 }
1140 else if (*ptr == '&')
1141 {
1142 /*
1143 * Skip HTML entity...
1144 */
1145
1146 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1147
1148 if (!*ptr)
1149 ptr --;
1150
1151 continue;
1152 }
1153 else if (!isalnum(*ptr & 255))
1154 continue;
1155
1156 /*
1157 * Found the start of a word, search until we find the end...
1158 */
1159
1160 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1161
1162 wordlen = (int)(ptr - text);
1163
1164 memcpy(temp, text, (size_t)wordlen);
1165 temp[wordlen] = '\0';
1166
1167 ptr --;
1168
1169 if (wordlen > 1 && !bsearch(temp, help_common_words,
1170 (sizeof(help_common_words) /
1171 sizeof(help_common_words[0])),
1172 sizeof(help_common_words[0]),
1173 (int (*)(const void *, const void *))
1174 _cups_strcasecmp))
1175 help_add_word(node, temp);
1176 }
1177 }
1178
1179 /*
1180 * Get the offset of the next line...
1181 */
1182
1183 offset = cupsFileTell(fp);
1184 }
1185
1186 cupsFileClose(fp);
1187
1188 if (node)
1189 node->length = (size_t)(offset - node->offset);
1190
1191 return (0);
1192 }
1193
1194
1195 /*
1196 * 'help_new_node()' - Create a new node and add it to an index.
1197 */
1198
1199 static help_node_t * /* O - Node pointer or NULL on error */
1200 help_new_node(const char *filename, /* I - Filename */
1201 const char *anchor, /* I - Anchor */
1202 const char *section, /* I - Section */
1203 const char *text, /* I - Text */
1204 time_t mtime, /* I - Modification time */
1205 off_t offset, /* I - Offset in file */
1206 size_t length) /* I - Length in bytes */
1207 {
1208 help_node_t *n; /* Node */
1209
1210
1211 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1212 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1213 (long)mtime, (long)offset, (long)length));
1214
1215 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1216 if (!n)
1217 return (NULL);
1218
1219 n->filename = strdup(filename);
1220 n->anchor = anchor ? strdup(anchor) : NULL;
1221 n->section = *section ? strdup(section) : NULL;
1222 n->text = strdup(text);
1223 n->mtime = mtime;
1224 n->offset = offset;
1225 n->length = length;
1226
1227 return (n);
1228 }
1229
1230
1231 /*
1232 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1233 */
1234
1235 static int /* O - Difference */
1236 help_sort_by_name(help_node_t *n1, /* I - First node */
1237 help_node_t *n2) /* I - Second node */
1238 {
1239 int diff; /* Difference */
1240
1241
1242 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1243 n1, n1->filename, n1->anchor,
1244 n2, n2->filename, n2->anchor));
1245
1246 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1247 return (diff);
1248
1249 if (!n1->anchor && !n2->anchor)
1250 return (0);
1251 else if (!n1->anchor)
1252 return (-1);
1253 else if (!n2->anchor)
1254 return (1);
1255 else
1256 return (strcmp(n1->anchor, n2->anchor));
1257 }
1258
1259
1260 /*
1261 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1262 */
1263
1264 static int /* O - Difference */
1265 help_sort_by_score(help_node_t *n1, /* I - First node */
1266 help_node_t *n2) /* I - Second node */
1267 {
1268 int diff; /* Difference */
1269
1270
1271 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1272 "n2=%p(%d \"%s\" \"%s\")",
1273 n1, n1->score, n1->section, n1->text,
1274 n2, n2->score, n2->section, n2->text));
1275
1276 if (n1->score != n2->score)
1277 return (n2->score - n1->score);
1278
1279 if (n1->section && !n2->section)
1280 return (1);
1281 else if (!n1->section && n2->section)
1282 return (-1);
1283 else if (n1->section && n2->section &&
1284 (diff = strcmp(n1->section, n2->section)) != 0)
1285 return (diff);
1286
1287 return (_cups_strcasecmp(n1->text, n2->text));
1288 }
1289
1290
1291 /*
1292 * 'help_sort_words()' - Sort words alphabetically.
1293 */
1294
1295 static int /* O - Difference */
1296 help_sort_words(help_word_t *w1, /* I - Second word */
1297 help_word_t *w2) /* I - Second word */
1298 {
1299 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1300 w1, w1->text, w2, w2->text));
1301
1302 return (_cups_strcasecmp(w1->text, w2->text));
1303 }