]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Fix build errors on Fedora.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * "$Id$"
3 *
4 * Online help index routines for CUPS.
5 *
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 */
15
16 /*
17 * Include necessary headers...
18 */
19
20 #include "cgi-private.h"
21 #include <cups/dir.h>
22
23
24 /*
25 * List of common English words that should not be indexed...
26 */
27
28 static char help_common_words[][6] =
29 {
30 "about",
31 "all",
32 "an",
33 "and",
34 "are",
35 "as",
36 "at",
37 "be",
38 "been",
39 "but",
40 "by",
41 "call",
42 "can",
43 "come",
44 "could",
45 "day",
46 "did",
47 "do",
48 "down",
49 "each",
50 "find",
51 "first",
52 "for",
53 "from",
54 "go",
55 "had",
56 "has",
57 "have",
58 "he",
59 "her",
60 "him",
61 "his",
62 "hot",
63 "how",
64 "if",
65 "in",
66 "is",
67 "it",
68 "know",
69 "like",
70 "long",
71 "look",
72 "make",
73 "many",
74 "may",
75 "more",
76 "most",
77 "my",
78 "no",
79 "now",
80 "of",
81 "on",
82 "one",
83 "or",
84 "other",
85 "out",
86 "over",
87 "said",
88 "see",
89 "she",
90 "side",
91 "so",
92 "some",
93 "sound",
94 "than",
95 "that",
96 "the",
97 "their",
98 "them",
99 "then",
100 "there",
101 "these",
102 "they",
103 "thing",
104 "this",
105 "time",
106 "to",
107 "two",
108 "up",
109 "use",
110 "was",
111 "water",
112 "way",
113 "we",
114 "were",
115 "what",
116 "when",
117 "which",
118 "who",
119 "will",
120 "with",
121 "word",
122 "would",
123 "write",
124 "you",
125 "your"
126 };
127
128
129 /*
130 * Local functions...
131 */
132
133 static help_word_t *help_add_word(help_node_t *n, const char *text);
134 static void help_delete_node(help_node_t *n);
135 static void help_delete_word(help_word_t *w);
136 static int help_load_directory(help_index_t *hi,
137 const char *directory,
138 const char *relative);
139 static int help_load_file(help_index_t *hi,
140 const char *filename,
141 const char *relative,
142 time_t mtime);
143 static help_node_t *help_new_node(const char *filename, const char *anchor,
144 const char *section, const char *text,
145 time_t mtime, off_t offset,
146 size_t length)
147 __attribute__((nonnull(1,3,4)));
148 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
149 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
150 static int help_sort_words(help_word_t *w1, help_word_t *w2);
151
152
153 /*
154 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
155 */
156
157 void
158 helpDeleteIndex(help_index_t *hi) /* I - Help index */
159 {
160 help_node_t *node; /* Current node */
161
162
163 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
164
165 if (!hi)
166 return;
167
168 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
169 node;
170 node = (help_node_t *)cupsArrayNext(hi->nodes))
171 {
172 if (!hi->search)
173 help_delete_node(node);
174 }
175
176 cupsArrayDelete(hi->nodes);
177 cupsArrayDelete(hi->sorted);
178
179 free(hi);
180 }
181
182
183 /*
184 * 'helpFindNode()' - Find a node in an index.
185 */
186
187 help_node_t * /* O - Node pointer or NULL */
188 helpFindNode(help_index_t *hi, /* I - Index */
189 const char *filename, /* I - Filename */
190 const char *anchor) /* I - Anchor */
191 {
192 help_node_t key; /* Search key */
193
194
195 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
196 hi, filename, anchor));
197
198 /*
199 * Range check input...
200 */
201
202 if (!hi || !filename)
203 return (NULL);
204
205 /*
206 * Initialize the search key...
207 */
208
209 key.filename = (char *)filename;
210 key.anchor = (char *)anchor;
211
212 /*
213 * Return any match...
214 */
215
216 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
217 }
218
219
220 /*
221 * 'helpLoadIndex()' - Load a help index from disk.
222 */
223
224 help_index_t * /* O - Index pointer or NULL */
225 helpLoadIndex(const char *hifile, /* I - Index filename */
226 const char *directory) /* I - Directory that is indexed */
227 {
228 help_index_t *hi; /* Help index */
229 cups_file_t *fp; /* Current file */
230 char line[2048], /* Line from file */
231 *ptr, /* Pointer into line */
232 *filename, /* Filename in line */
233 *anchor, /* Anchor in line */
234 *sectptr, /* Section pointer in line */
235 section[1024], /* Section name */
236 *text; /* Text in line */
237 time_t mtime; /* Modification time */
238 off_t offset; /* Offset into file */
239 size_t length; /* Length in bytes */
240 int update; /* Update? */
241 help_node_t *node; /* Current node */
242 help_word_t *word; /* Current word */
243
244
245 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
246 hifile, directory));
247
248 /*
249 * Create a new, empty index.
250 */
251
252 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
253 return (NULL);
254
255 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
256 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
257
258 if (!hi->nodes || !hi->sorted)
259 {
260 cupsArrayDelete(hi->nodes);
261 cupsArrayDelete(hi->sorted);
262 free(hi);
263 return (NULL);
264 }
265
266 /*
267 * Try loading the existing index file...
268 */
269
270 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
271 {
272 /*
273 * Lock the file and then read the first line...
274 */
275
276 cupsFileLock(fp, 1);
277
278 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
279 {
280 /*
281 * Got a valid header line, now read the data lines...
282 */
283
284 node = NULL;
285
286 while (cupsFileGets(fp, line, sizeof(line)))
287 {
288 /*
289 * Each line looks like one of the following:
290 *
291 * filename mtime offset length "section" "text"
292 * filename#anchor offset length "text"
293 * SP count word
294 */
295
296 if (line[0] == ' ')
297 {
298 /*
299 * Read a word in the current node...
300 */
301
302 if (!node || (ptr = strrchr(line, ' ')) == NULL)
303 continue;
304
305 if ((word = help_add_word(node, ptr + 1)) != NULL)
306 word->count = atoi(line + 1);
307 }
308 else
309 {
310 /*
311 * Add a node...
312 */
313
314 filename = line;
315
316 if ((ptr = strchr(line, ' ')) == NULL)
317 break;
318
319 while (isspace(*ptr & 255))
320 *ptr++ = '\0';
321
322 if ((anchor = strrchr(filename, '#')) != NULL)
323 {
324 *anchor++ = '\0';
325 mtime = 0;
326 }
327 else
328 mtime = strtol(ptr, &ptr, 10);
329
330 offset = strtoll(ptr, &ptr, 10);
331 length = (size_t)strtoll(ptr, &ptr, 10);
332
333 while (isspace(*ptr & 255))
334 ptr ++;
335
336 if (!anchor)
337 {
338 /*
339 * Get section...
340 */
341
342 if (*ptr != '\"')
343 break;
344
345 ptr ++;
346 sectptr = ptr;
347
348 while (*ptr && *ptr != '\"')
349 ptr ++;
350
351 if (*ptr != '\"')
352 break;
353
354 *ptr++ = '\0';
355
356 strlcpy(section, sectptr, sizeof(section));
357
358 while (isspace(*ptr & 255))
359 ptr ++;
360 }
361
362 if (*ptr != '\"')
363 break;
364
365 ptr ++;
366 text = ptr;
367
368 while (*ptr && *ptr != '\"')
369 ptr ++;
370
371 if (*ptr != '\"')
372 break;
373
374 *ptr++ = '\0';
375
376 if ((node = help_new_node(filename, anchor, section, text,
377 mtime, offset, length)) == NULL)
378 break;
379
380 node->score = -1;
381
382 cupsArrayAdd(hi->nodes, node);
383 }
384 }
385 }
386
387 cupsFileClose(fp);
388 }
389
390 /*
391 * Scan for new/updated files...
392 */
393
394 update = help_load_directory(hi, directory, NULL);
395
396 /*
397 * Remove any files that are no longer installed...
398 */
399
400 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
401 node;
402 node = (help_node_t *)cupsArrayNext(hi->nodes))
403 if (node->score < 0)
404 {
405 /*
406 * Delete this node...
407 */
408
409 cupsArrayRemove(hi->nodes, node);
410 help_delete_node(node);
411 }
412
413 /*
414 * Add nodes to the sorted array...
415 */
416
417 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
418 node;
419 node = (help_node_t *)cupsArrayNext(hi->nodes))
420 cupsArrayAdd(hi->sorted, node);
421
422 /*
423 * Save the index if we updated it...
424 */
425
426 if (update)
427 helpSaveIndex(hi, hifile);
428
429 /*
430 * Return the index...
431 */
432
433 return (hi);
434 }
435
436
437 /*
438 * 'helpSaveIndex()' - Save a help index to disk.
439 */
440
441 int /* O - 0 on success, -1 on error */
442 helpSaveIndex(help_index_t *hi, /* I - Index */
443 const char *hifile) /* I - Index filename */
444 {
445 cups_file_t *fp; /* Index file */
446 help_node_t *node; /* Current node */
447 help_word_t *word; /* Current word */
448
449
450 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
451
452 /*
453 * Try creating a new index file...
454 */
455
456 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
457 return (-1);
458
459 /*
460 * Lock the file while we write it...
461 */
462
463 cupsFileLock(fp, 1);
464
465 cupsFilePuts(fp, "HELPV2\n");
466
467 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
468 node;
469 node = (help_node_t *)cupsArrayNext(hi->nodes))
470 {
471 /*
472 * Write the current node with/without the anchor...
473 */
474
475 if (node->anchor)
476 {
477 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
478 node->filename, node->anchor,
479 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
480 node->text) < 0)
481 break;
482 }
483 else
484 {
485 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
486 node->filename, (int)node->mtime,
487 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
488 node->section ? node->section : "", node->text) < 0)
489 break;
490 }
491
492 /*
493 * Then write the words associated with the node...
494 */
495
496 for (word = (help_word_t *)cupsArrayFirst(node->words);
497 word;
498 word = (help_word_t *)cupsArrayNext(node->words))
499 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
500 break;
501 }
502
503 cupsFileFlush(fp);
504
505 if (cupsFileClose(fp) < 0)
506 return (-1);
507 else if (node)
508 return (-1);
509 else
510 return (0);
511 }
512
513
514 /*
515 * 'helpSearchIndex()' - Search an index.
516 */
517
518 help_index_t * /* O - Search index */
519 helpSearchIndex(help_index_t *hi, /* I - Index */
520 const char *query, /* I - Query string */
521 const char *section, /* I - Limit search to this section */
522 const char *filename) /* I - Limit search to this file */
523 {
524 help_index_t *search; /* Search index */
525 help_node_t *node; /* Current node */
526 help_word_t *word; /* Current word */
527 void *sc; /* Search context */
528 int matches; /* Number of matches */
529
530
531 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
532 hi, query, filename));
533
534 /*
535 * Range check...
536 */
537
538 if (!hi || !query)
539 return (NULL);
540
541 /*
542 * Reset the scores of all nodes to 0...
543 */
544
545 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
546 node;
547 node = (help_node_t *)cupsArrayNext(hi->nodes))
548 node->score = 0;
549
550 /*
551 * Find the first node to search in...
552 */
553
554 if (filename)
555 {
556 node = helpFindNode(hi, filename, NULL);
557 if (!node)
558 return (NULL);
559 }
560 else
561 node = (help_node_t *)cupsArrayFirst(hi->nodes);
562
563 /*
564 * Convert the query into a regular expression...
565 */
566
567 sc = cgiCompileSearch(query);
568 if (!sc)
569 return (NULL);
570
571 /*
572 * Allocate a search index...
573 */
574
575 search = calloc(1, sizeof(help_index_t));
576 if (!search)
577 {
578 cgiFreeSearch(sc);
579 return (NULL);
580 }
581
582 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
583 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
584
585 if (!search->nodes || !search->sorted)
586 {
587 cupsArrayDelete(search->nodes);
588 cupsArrayDelete(search->sorted);
589 free(search);
590 cgiFreeSearch(sc);
591 return (NULL);
592 }
593
594 search->search = 1;
595
596 /*
597 * Check each node in the index, adding matching nodes to the
598 * search index...
599 */
600
601 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
602 if (section && strcmp(node->section, section))
603 continue;
604 else if (filename && strcmp(node->filename, filename))
605 continue;
606 else
607 {
608 matches = cgiDoSearch(sc, node->text);
609
610 for (word = (help_word_t *)cupsArrayFirst(node->words);
611 word;
612 word = (help_word_t *)cupsArrayNext(node->words))
613 if (cgiDoSearch(sc, word->text) > 0)
614 matches += word->count;
615
616 if (matches > 0)
617 {
618 /*
619 * Found a match, add the node to the search index...
620 */
621
622 node->score = matches;
623
624 cupsArrayAdd(search->nodes, node);
625 cupsArrayAdd(search->sorted, node);
626 }
627 }
628
629 /*
630 * Free the search context...
631 */
632
633 cgiFreeSearch(sc);
634
635 /*
636 * Return the results...
637 */
638
639 return (search);
640 }
641
642
643 /*
644 * 'help_add_word()' - Add a word to a node.
645 */
646
647 static help_word_t * /* O - New word */
648 help_add_word(help_node_t *n, /* I - Node */
649 const char *text) /* I - Word text */
650 {
651 help_word_t *w, /* New word */
652 key; /* Search key */
653
654
655 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
656
657 /*
658 * Create the words array as needed...
659 */
660
661 if (!n->words)
662 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
663
664 /*
665 * See if the word is already added...
666 */
667
668 key.text = (char *)text;
669
670 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
671 {
672 /*
673 * Create a new word...
674 */
675
676 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
677 return (NULL);
678
679 if ((w->text = strdup(text)) == NULL)
680 {
681 free(w);
682 return (NULL);
683 }
684
685 cupsArrayAdd(n->words, w);
686 }
687
688 /*
689 * Bump the counter for this word and return it...
690 */
691
692 w->count ++;
693
694 return (w);
695 }
696
697
698 /*
699 * 'help_delete_node()' - Free all memory used by a node.
700 */
701
702 static void
703 help_delete_node(help_node_t *n) /* I - Node */
704 {
705 help_word_t *w; /* Current word */
706
707
708 DEBUG_printf(("2help_delete_node(n=%p)", n));
709
710 if (!n)
711 return;
712
713 if (n->filename)
714 free(n->filename);
715
716 if (n->anchor)
717 free(n->anchor);
718
719 if (n->section)
720 free(n->section);
721
722 if (n->text)
723 free(n->text);
724
725 for (w = (help_word_t *)cupsArrayFirst(n->words);
726 w;
727 w = (help_word_t *)cupsArrayNext(n->words))
728 help_delete_word(w);
729
730 cupsArrayDelete(n->words);
731
732 free(n);
733 }
734
735
736 /*
737 * 'help_delete_word()' - Free all memory used by a word.
738 */
739
740 static void
741 help_delete_word(help_word_t *w) /* I - Word */
742 {
743 DEBUG_printf(("2help_delete_word(w=%p)", w));
744
745 if (!w)
746 return;
747
748 if (w->text)
749 free(w->text);
750
751 free(w);
752 }
753
754
755 /*
756 * 'help_load_directory()' - Load a directory of files into an index.
757 */
758
759 static int /* O - 0 = success, -1 = error, 1 = updated */
760 help_load_directory(
761 help_index_t *hi, /* I - Index */
762 const char *directory, /* I - Directory */
763 const char *relative) /* I - Relative path */
764 {
765 cups_dir_t *dir; /* Directory file */
766 cups_dentry_t *dent; /* Directory entry */
767 char *ext, /* Pointer to extension */
768 filename[1024], /* Full filename */
769 relname[1024]; /* Relative filename */
770 int update; /* Updated? */
771 help_node_t *node; /* Current node */
772
773
774 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
775 hi, directory, relative));
776
777 /*
778 * Open the directory and scan it...
779 */
780
781 if ((dir = cupsDirOpen(directory)) == NULL)
782 return (0);
783
784 update = 0;
785
786 while ((dent = cupsDirRead(dir)) != NULL)
787 {
788 /*
789 * Skip "." files...
790 */
791
792 if (dent->filename[0] == '.')
793 continue;
794
795 /*
796 * Get absolute and relative filenames...
797 */
798
799 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
800 if (relative)
801 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
802 else
803 strlcpy(relname, dent->filename, sizeof(relname));
804
805 /*
806 * Check if we have a HTML file...
807 */
808
809 if ((ext = strstr(dent->filename, ".html")) != NULL &&
810 (!ext[5] || !strcmp(ext + 5, ".gz")))
811 {
812 /*
813 * HTML file, see if we have already indexed the file...
814 */
815
816 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
817 {
818 /*
819 * File already indexed - check dates to confirm that the
820 * index is up-to-date...
821 */
822
823 if (node->mtime == dent->fileinfo.st_mtime)
824 {
825 /*
826 * Same modification time, so mark all of the nodes
827 * for this file as up-to-date...
828 */
829
830 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
831 if (!strcmp(node->filename, relname))
832 node->score = 0;
833 else
834 break;
835
836 continue;
837 }
838 }
839
840 update = 1;
841
842 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
843 }
844 else if (S_ISDIR(dent->fileinfo.st_mode))
845 {
846 /*
847 * Process sub-directory...
848 */
849
850 if (help_load_directory(hi, filename, relname) == 1)
851 update = 1;
852 }
853 }
854
855 cupsDirClose(dir);
856
857 return (update);
858 }
859
860
861 /*
862 * 'help_load_file()' - Load a HTML files into an index.
863 */
864
865 static int /* O - 0 = success, -1 = error */
866 help_load_file(
867 help_index_t *hi, /* I - Index */
868 const char *filename, /* I - Filename */
869 const char *relative, /* I - Relative path */
870 time_t mtime) /* I - Modification time */
871 {
872 cups_file_t *fp; /* HTML file */
873 help_node_t *node; /* Current node */
874 char line[1024], /* Line from file */
875 temp[1024], /* Temporary word */
876 section[1024], /* Section */
877 *ptr, /* Pointer into line */
878 *anchor, /* Anchor name */
879 *text; /* Text for anchor */
880 off_t offset; /* File offset */
881 char quote; /* Quote character */
882 help_word_t *word; /* Current word */
883 int wordlen; /* Length of word */
884
885
886 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
887 "mtime=%ld)", hi, filename, relative, mtime));
888
889 if ((fp = cupsFileOpen(filename, "r")) == NULL)
890 return (-1);
891
892 node = NULL;
893 offset = 0;
894
895 strlcpy(section, "Other", sizeof(section));
896
897 while (cupsFileGets(fp, line, sizeof(line)))
898 {
899 /*
900 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
901 */
902
903 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
904 {
905 /*
906 * Got section line, copy it!
907 */
908
909 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
910
911 strlcpy(section, ptr, sizeof(section));
912 if ((ptr = strstr(section, "-->")) != NULL)
913 {
914 /*
915 * Strip comment stuff from end of line...
916 */
917
918 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
919
920 if (isspace(*ptr & 255))
921 *ptr = '\0';
922 }
923 continue;
924 }
925
926 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
927 {
928 ptr ++;
929
930 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
931 {
932 /*
933 * Found the title...
934 */
935
936 anchor = NULL;
937 ptr += 6;
938 }
939 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
940 {
941 /*
942 * Found an anchor...
943 */
944
945 ptr += 7;
946
947 if (*ptr == '\"' || *ptr == '\'')
948 {
949 /*
950 * Get quoted anchor...
951 */
952
953 quote = *ptr;
954 anchor = ptr + 1;
955 if ((ptr = strchr(anchor, quote)) != NULL)
956 *ptr++ = '\0';
957 else
958 break;
959 }
960 else
961 {
962 /*
963 * Get unquoted anchor...
964 */
965
966 anchor = ptr + 1;
967
968 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
969
970 if (*ptr)
971 *ptr++ = '\0';
972 else
973 break;
974 }
975
976 /*
977 * Got the anchor, now lets find the end...
978 */
979
980 while (*ptr && *ptr != '>')
981 ptr ++;
982
983 if (*ptr != '>')
984 break;
985
986 ptr ++;
987 }
988 else
989 continue;
990
991 /*
992 * Now collect text for the link...
993 */
994
995 text = ptr;
996 while ((ptr = strchr(text, '<')) == NULL)
997 {
998 ptr = text + strlen(text);
999 if (ptr >= (line + sizeof(line) - 2))
1000 break;
1001
1002 *ptr++ = ' ';
1003
1004 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1005 break;
1006 }
1007
1008 *ptr = '\0';
1009
1010 if (node)
1011 node->length = (size_t)(offset - node->offset);
1012
1013 if (!*text)
1014 {
1015 node = NULL;
1016 break;
1017 }
1018
1019 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1020 {
1021 /*
1022 * Node already in the index, so replace the text and other
1023 * data...
1024 */
1025
1026 cupsArrayRemove(hi->nodes, node);
1027
1028 if (node->section)
1029 free(node->section);
1030
1031 if (node->text)
1032 free(node->text);
1033
1034 if (node->words)
1035 {
1036 for (word = (help_word_t *)cupsArrayFirst(node->words);
1037 word;
1038 word = (help_word_t *)cupsArrayNext(node->words))
1039 help_delete_word(word);
1040
1041 cupsArrayDelete(node->words);
1042 node->words = NULL;
1043 }
1044
1045 node->section = section[0] ? strdup(section) : NULL;
1046 node->text = strdup(text);
1047 node->mtime = mtime;
1048 node->offset = offset;
1049 node->score = 0;
1050 }
1051 else
1052 {
1053 /*
1054 * New node...
1055 */
1056
1057 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1058 }
1059
1060 /*
1061 * Go through the text value and replace tabs and newlines with
1062 * whitespace and eliminate extra whitespace...
1063 */
1064
1065 for (ptr = node->text, text = node->text; *ptr;)
1066 if (isspace(*ptr & 255))
1067 {
1068 while (isspace(*ptr & 255))
1069 ptr ++;
1070
1071 *text++ = ' ';
1072 }
1073 else if (text != ptr)
1074 *text++ = *ptr++;
1075 else
1076 {
1077 text ++;
1078 ptr ++;
1079 }
1080
1081 *text = '\0';
1082
1083 /*
1084 * (Re)add the node to the array...
1085 */
1086
1087 cupsArrayAdd(hi->nodes, node);
1088
1089 if (!anchor)
1090 node = NULL;
1091 break;
1092 }
1093
1094 if (node)
1095 {
1096 /*
1097 * Scan this line for words...
1098 */
1099
1100 for (ptr = line; *ptr; ptr ++)
1101 {
1102 /*
1103 * Skip HTML stuff...
1104 */
1105
1106 if (*ptr == '<')
1107 {
1108 if (!strncmp(ptr, "<!--", 4))
1109 {
1110 /*
1111 * Skip HTML comment...
1112 */
1113
1114 if ((text = strstr(ptr + 4, "-->")) == NULL)
1115 ptr += strlen(ptr) - 1;
1116 else
1117 ptr = text + 2;
1118 }
1119 else
1120 {
1121 /*
1122 * Skip HTML element...
1123 */
1124
1125 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1126 {
1127 if (*ptr == '\"' || *ptr == '\'')
1128 {
1129 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1130
1131 if (!*ptr)
1132 ptr --;
1133 }
1134 }
1135
1136 if (!*ptr)
1137 ptr --;
1138 }
1139
1140 continue;
1141 }
1142 else if (*ptr == '&')
1143 {
1144 /*
1145 * Skip HTML entity...
1146 */
1147
1148 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1149
1150 if (!*ptr)
1151 ptr --;
1152
1153 continue;
1154 }
1155 else if (!isalnum(*ptr & 255))
1156 continue;
1157
1158 /*
1159 * Found the start of a word, search until we find the end...
1160 */
1161
1162 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1163
1164 wordlen = (int)(ptr - text);
1165
1166 memcpy(temp, text, (size_t)wordlen);
1167 temp[wordlen] = '\0';
1168
1169 ptr --;
1170
1171 if (wordlen > 1 && !bsearch(temp, help_common_words,
1172 (sizeof(help_common_words) /
1173 sizeof(help_common_words[0])),
1174 sizeof(help_common_words[0]),
1175 (int (*)(const void *, const void *))
1176 _cups_strcasecmp))
1177 help_add_word(node, temp);
1178 }
1179 }
1180
1181 /*
1182 * Get the offset of the next line...
1183 */
1184
1185 offset = cupsFileTell(fp);
1186 }
1187
1188 cupsFileClose(fp);
1189
1190 if (node)
1191 node->length = (size_t)(offset - node->offset);
1192
1193 return (0);
1194 }
1195
1196
1197 /*
1198 * 'help_new_node()' - Create a new node and add it to an index.
1199 */
1200
1201 static help_node_t * /* O - Node pointer or NULL on error */
1202 help_new_node(const char *filename, /* I - Filename */
1203 const char *anchor, /* I - Anchor */
1204 const char *section, /* I - Section */
1205 const char *text, /* I - Text */
1206 time_t mtime, /* I - Modification time */
1207 off_t offset, /* I - Offset in file */
1208 size_t length) /* I - Length in bytes */
1209 {
1210 help_node_t *n; /* Node */
1211
1212
1213 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1214 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1215 (long)mtime, (long)offset, (long)length));
1216
1217 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1218 if (!n)
1219 return (NULL);
1220
1221 n->filename = strdup(filename);
1222 n->anchor = anchor ? strdup(anchor) : NULL;
1223 n->section = (section && *section) ? strdup(section) : NULL;
1224 n->text = strdup(text);
1225 n->mtime = mtime;
1226 n->offset = offset;
1227 n->length = length;
1228
1229 return (n);
1230 }
1231
1232
1233 /*
1234 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1235 */
1236
1237 static int /* O - Difference */
1238 help_sort_by_name(help_node_t *n1, /* I - First node */
1239 help_node_t *n2) /* I - Second node */
1240 {
1241 int diff; /* Difference */
1242
1243
1244 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1245 n1, n1->filename, n1->anchor,
1246 n2, n2->filename, n2->anchor));
1247
1248 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1249 return (diff);
1250
1251 if (!n1->anchor && !n2->anchor)
1252 return (0);
1253 else if (!n1->anchor)
1254 return (-1);
1255 else if (!n2->anchor)
1256 return (1);
1257 else
1258 return (strcmp(n1->anchor, n2->anchor));
1259 }
1260
1261
1262 /*
1263 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1264 */
1265
1266 static int /* O - Difference */
1267 help_sort_by_score(help_node_t *n1, /* I - First node */
1268 help_node_t *n2) /* I - Second node */
1269 {
1270 int diff; /* Difference */
1271
1272
1273 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1274 "n2=%p(%d \"%s\" \"%s\")",
1275 n1, n1->score, n1->section, n1->text,
1276 n2, n2->score, n2->section, n2->text));
1277
1278 if (n1->score != n2->score)
1279 return (n2->score - n1->score);
1280
1281 if (n1->section && !n2->section)
1282 return (1);
1283 else if (!n1->section && n2->section)
1284 return (-1);
1285 else if (n1->section && n2->section &&
1286 (diff = strcmp(n1->section, n2->section)) != 0)
1287 return (diff);
1288
1289 return (_cups_strcasecmp(n1->text, n2->text));
1290 }
1291
1292
1293 /*
1294 * 'help_sort_words()' - Sort words alphabetically.
1295 */
1296
1297 static int /* O - Difference */
1298 help_sort_words(help_word_t *w1, /* I - Second word */
1299 help_word_t *w2) /* I - Second word */
1300 {
1301 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1302 w1, w1->text, w2, w2->text));
1303
1304 return (_cups_strcasecmp(w1->text, w2->text));
1305 }
1306
1307
1308 /*
1309 * End of "$Id$".
1310 */