]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Load cups into easysw/current.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * "$Id: help-index.c 6649 2007-07-11 21:46:42Z mike $"
3 *
4 * On-line help index routines for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 2007 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * Contents:
16 *
17 * helpDeleteIndex() - Delete an index, freeing all memory used.
18 * helpFindNode() - Find a node in an index.
19 * helpLoadIndex() - Load a help index from disk.
20 * helpSaveIndex() - Save a help index to disk.
21 * helpSearchIndex() - Search an index.
22 * help_add_word() - Add a word to a node.
23 * help_compile_search() - Convert a search string into a regular expression.
24 * help_delete_node() - Free all memory used by a node.
25 * help_delete_word() - Free all memory used by a word.
26 * help_load_directory() - Load a directory of files into an index.
27 * help_load_file() - Load a HTML files into an index.
28 * help_new_node() - Create a new node and add it to an index.
29 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
30 * help_sort_nodes_by_score() - Sort nodes by score and text.
31 * help_sort_words() - Sort words alphabetically.
32 */
33
34 /*
35 * Include necessary headers...
36 */
37
38 #include "cgi-private.h"
39 #include <cups/dir.h>
40
41
42 /*
43 * List of common English words that should not be indexed...
44 */
45
46 static char help_common_words[][6] =
47 {
48 "about",
49 "all",
50 "an",
51 "and",
52 "are",
53 "as",
54 "at",
55 "be",
56 "been",
57 "but",
58 "by",
59 "call",
60 "can",
61 "come",
62 "could",
63 "day",
64 "did",
65 "do",
66 "down",
67 "each",
68 "find",
69 "first",
70 "for",
71 "from",
72 "go",
73 "had",
74 "has",
75 "have",
76 "he",
77 "her",
78 "him",
79 "his",
80 "hot",
81 "how",
82 "if",
83 "in",
84 "is",
85 "it",
86 "know",
87 "like",
88 "long",
89 "look",
90 "make",
91 "many",
92 "may",
93 "more",
94 "most",
95 "my",
96 "no",
97 "now",
98 "of",
99 "on",
100 "one",
101 "or",
102 "other",
103 "out",
104 "over",
105 "said",
106 "see",
107 "she",
108 "side",
109 "so",
110 "some",
111 "sound",
112 "than",
113 "that",
114 "the",
115 "their",
116 "them",
117 "then",
118 "there",
119 "these",
120 "they",
121 "thing",
122 "this",
123 "time",
124 "to",
125 "two",
126 "up",
127 "use",
128 "was",
129 "water",
130 "way",
131 "we",
132 "were",
133 "what",
134 "when",
135 "which",
136 "who",
137 "will",
138 "with",
139 "word",
140 "would",
141 "write",
142 "you",
143 "your"
144 };
145
146
147 /*
148 * Local functions...
149 */
150
151 static help_word_t *help_add_word(help_node_t *n, const char *text);
152 static void help_delete_node(help_node_t *n);
153 static void help_delete_word(help_word_t *w);
154 static int help_load_directory(help_index_t *hi,
155 const char *directory,
156 const char *relative);
157 static int help_load_file(help_index_t *hi,
158 const char *filename,
159 const char *relative,
160 time_t mtime);
161 static help_node_t *help_new_node(const char *filename, const char *anchor,
162 const char *section, const char *text,
163 time_t mtime, off_t offset,
164 size_t length);
165 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
166 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
167 static int help_sort_words(help_word_t *w1, help_word_t *w2);
168
169
170 /*
171 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
172 */
173
174 void
175 helpDeleteIndex(help_index_t *hi) /* I - Help index */
176 {
177 help_node_t *node; /* Current node */
178
179
180 DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
181
182 if (!hi)
183 return;
184
185 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
186 node;
187 node = (help_node_t *)cupsArrayNext(hi->nodes))
188 {
189 if (!hi->search)
190 help_delete_node(node);
191 }
192
193 cupsArrayDelete(hi->nodes);
194 cupsArrayDelete(hi->sorted);
195
196 free(hi);
197 }
198
199
200 /*
201 * 'helpFindNode()' - Find a node in an index.
202 */
203
204 help_node_t * /* O - Node pointer or NULL */
205 helpFindNode(help_index_t *hi, /* I - Index */
206 const char *filename, /* I - Filename */
207 const char *anchor) /* I - Anchor */
208 {
209 help_node_t key; /* Search key */
210
211
212 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
213 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
214
215 /*
216 * Range check input...
217 */
218
219 if (!hi || !filename)
220 return (NULL);
221
222 /*
223 * Initialize the search key...
224 */
225
226 key.filename = (char *)filename;
227 key.anchor = (char *)anchor;
228
229 /*
230 * Return any match...
231 */
232
233 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
234 }
235
236
237 /*
238 * 'helpLoadIndex()' - Load a help index from disk.
239 */
240
241 help_index_t * /* O - Index pointer or NULL */
242 helpLoadIndex(const char *hifile, /* I - Index filename */
243 const char *directory) /* I - Directory that is indexed */
244 {
245 help_index_t *hi; /* Help index */
246 cups_file_t *fp; /* Current file */
247 char line[2048], /* Line from file */
248 *ptr, /* Pointer into line */
249 *filename, /* Filename in line */
250 *anchor, /* Anchor in line */
251 *sectptr, /* Section pointer in line */
252 section[1024], /* Section name */
253 *text; /* Text in line */
254 time_t mtime; /* Modification time */
255 off_t offset; /* Offset into file */
256 size_t length; /* Length in bytes */
257 int update; /* Update? */
258 help_node_t *node; /* Current node */
259 help_word_t *word; /* Current word */
260
261
262 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
263 hifile, directory));
264
265 /*
266 * Create a new, empty index.
267 */
268
269 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
270 return (NULL);
271
272 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
273 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
274
275 if (!hi->nodes || !hi->sorted)
276 {
277 cupsArrayDelete(hi->nodes);
278 cupsArrayDelete(hi->sorted);
279 free(hi);
280 return (NULL);
281 }
282
283 /*
284 * Try loading the existing index file...
285 */
286
287 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
288 {
289 /*
290 * Lock the file and then read the first line...
291 */
292
293 cupsFileLock(fp, 1);
294
295 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
296 {
297 /*
298 * Got a valid header line, now read the data lines...
299 */
300
301 node = NULL;
302
303 while (cupsFileGets(fp, line, sizeof(line)))
304 {
305 /*
306 * Each line looks like one of the following:
307 *
308 * filename mtime offset length "section" "text"
309 * filename#anchor offset length "text"
310 * SP count word
311 */
312
313 if (line[0] == ' ')
314 {
315 /*
316 * Read a word in the current node...
317 */
318
319 if (!node || (ptr = strrchr(line, ' ')) == NULL)
320 continue;
321
322 if ((word = help_add_word(node, ptr + 1)) != NULL)
323 word->count = atoi(line + 1);
324 }
325 else
326 {
327 /*
328 * Add a node...
329 */
330
331 filename = line;
332
333 if ((ptr = strchr(line, ' ')) == NULL)
334 break;
335
336 while (isspace(*ptr & 255))
337 *ptr++ = '\0';
338
339 if ((anchor = strrchr(filename, '#')) != NULL)
340 {
341 *anchor++ = '\0';
342 mtime = 0;
343 }
344 else
345 mtime = strtol(ptr, &ptr, 10);
346
347 offset = strtoll(ptr, &ptr, 10);
348 length = strtoll(ptr, &ptr, 10);
349
350 while (isspace(*ptr & 255))
351 ptr ++;
352
353 if (!anchor)
354 {
355 /*
356 * Get section...
357 */
358
359 if (*ptr != '\"')
360 break;
361
362 ptr ++;
363 sectptr = ptr;
364
365 while (*ptr && *ptr != '\"')
366 ptr ++;
367
368 if (*ptr != '\"')
369 break;
370
371 *ptr++ = '\0';
372
373 strlcpy(section, sectptr, sizeof(section));
374
375 while (isspace(*ptr & 255))
376 ptr ++;
377 }
378
379 if (*ptr != '\"')
380 break;
381
382 ptr ++;
383 text = ptr;
384
385 while (*ptr && *ptr != '\"')
386 ptr ++;
387
388 if (*ptr != '\"')
389 break;
390
391 *ptr++ = '\0';
392
393 if ((node = help_new_node(filename, anchor, section, text,
394 mtime, offset, length)) == NULL)
395 break;
396
397 node->score = -1;
398
399 cupsArrayAdd(hi->nodes, node);
400 }
401 }
402 }
403
404 cupsFileClose(fp);
405 }
406
407 /*
408 * Scan for new/updated files...
409 */
410
411 update = help_load_directory(hi, directory, NULL);
412
413 /*
414 * Remove any files that are no longer installed...
415 */
416
417 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
418 node;
419 node = (help_node_t *)cupsArrayNext(hi->nodes))
420 if (node->score < 0)
421 {
422 /*
423 * Delete this node...
424 */
425
426 cupsArrayRemove(hi->nodes, node);
427 help_delete_node(node);
428 }
429
430 /*
431 * Add nodes to the sorted array...
432 */
433
434 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
435 node;
436 node = (help_node_t *)cupsArrayNext(hi->nodes))
437 cupsArrayAdd(hi->sorted, node);
438
439 /*
440 * Save the index if we updated it...
441 */
442
443 if (update)
444 helpSaveIndex(hi, hifile);
445
446 /*
447 * Return the index...
448 */
449
450 return (hi);
451 }
452
453
454 /*
455 * 'helpSaveIndex()' - Save a help index to disk.
456 */
457
458 int /* O - 0 on success, -1 on error */
459 helpSaveIndex(help_index_t *hi, /* I - Index */
460 const char *hifile) /* I - Index filename */
461 {
462 cups_file_t *fp; /* Index file */
463 help_node_t *node; /* Current node */
464 help_word_t *word; /* Current word */
465
466
467 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
468
469 /*
470 * Try creating a new index file...
471 */
472
473 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
474 return (-1);
475
476 /*
477 * Lock the file while we write it...
478 */
479
480 cupsFileLock(fp, 1);
481
482 cupsFilePuts(fp, "HELPV2\n");
483
484 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
485 node;
486 node = (help_node_t *)cupsArrayNext(hi->nodes))
487 {
488 /*
489 * Write the current node with/without the anchor...
490 */
491
492 if (node->anchor)
493 {
494 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
495 node->filename, node->anchor,
496 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
497 node->text) < 0)
498 break;
499 }
500 else
501 {
502 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
503 node->filename, node->mtime,
504 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
505 node->section ? node->section : "", node->text) < 0)
506 break;
507 }
508
509 /*
510 * Then write the words associated with the node...
511 */
512
513 for (word = (help_word_t *)cupsArrayFirst(node->words);
514 word;
515 word = (help_word_t *)cupsArrayNext(node->words))
516 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
517 break;
518 }
519
520 cupsFileFlush(fp);
521
522 if (cupsFileClose(fp) < 0)
523 return (-1);
524 else if (node)
525 return (-1);
526 else
527 return (0);
528 }
529
530
531 /*
532 * 'helpSearchIndex()' - Search an index.
533 */
534
535 help_index_t * /* O - Search index */
536 helpSearchIndex(help_index_t *hi, /* I - Index */
537 const char *query, /* I - Query string */
538 const char *section, /* I - Limit search to this section */
539 const char *filename) /* I - Limit search to this file */
540 {
541 help_index_t *search; /* Search index */
542 help_node_t *node; /* Current node */
543 help_word_t *word; /* Current word */
544 void *sc; /* Search context */
545 int matches; /* Number of matches */
546
547
548 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
549 hi, query ? query : "(nil)",
550 filename ? filename : "(nil)"));
551
552 /*
553 * Range check...
554 */
555
556 if (!hi || !query)
557 return (NULL);
558
559 /*
560 * Reset the scores of all nodes to 0...
561 */
562
563 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
564 node;
565 node = (help_node_t *)cupsArrayNext(hi->nodes))
566 node->score = 0;
567
568 /*
569 * Find the first node to search in...
570 */
571
572 if (filename)
573 {
574 node = helpFindNode(hi, filename, NULL);
575 if (!node)
576 return (NULL);
577 }
578 else
579 node = (help_node_t *)cupsArrayFirst(hi->nodes);
580
581 /*
582 * Convert the query into a regular expression...
583 */
584
585 sc = cgiCompileSearch(query);
586 if (!sc)
587 return (NULL);
588
589 /*
590 * Allocate a search index...
591 */
592
593 search = calloc(1, sizeof(help_index_t));
594 if (!search)
595 {
596 cgiFreeSearch(sc);
597 return (NULL);
598 }
599
600 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
601 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
602
603 if (!search->nodes || !search->sorted)
604 {
605 cupsArrayDelete(search->nodes);
606 cupsArrayDelete(search->sorted);
607 free(search);
608 cgiFreeSearch(sc);
609 return (NULL);
610 }
611
612 search->search = 1;
613
614 /*
615 * Check each node in the index, adding matching nodes to the
616 * search index...
617 */
618
619 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
620 if (section && strcmp(node->section, section))
621 continue;
622 else if (filename && strcmp(node->filename, filename))
623 continue;
624 else
625 {
626 matches = cgiDoSearch(sc, node->text);
627
628 for (word = (help_word_t *)cupsArrayFirst(node->words);
629 word;
630 word = (help_word_t *)cupsArrayNext(node->words))
631 if (cgiDoSearch(sc, word->text) > 0)
632 matches += word->count;
633
634 if (matches > 0)
635 {
636 /*
637 * Found a match, add the node to the search index...
638 */
639
640 node->score = matches;
641
642 cupsArrayAdd(search->nodes, node);
643 cupsArrayAdd(search->sorted, node);
644 }
645 }
646
647 /*
648 * Free the search context...
649 */
650
651 cgiFreeSearch(sc);
652
653 /*
654 * Return the results...
655 */
656
657 return (search);
658 }
659
660
661 /*
662 * 'help_add_word()' - Add a word to a node.
663 */
664
665 static help_word_t * /* O - New word */
666 help_add_word(help_node_t *n, /* I - Node */
667 const char *text) /* I - Word text */
668 {
669 help_word_t *w, /* New word */
670 key; /* Search key */
671
672
673 DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
674
675 /*
676 * Create the words array as needed...
677 */
678
679 if (!n->words)
680 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
681
682 /*
683 * See if the word is already added...
684 */
685
686 key.text = (char *)text;
687
688 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
689 {
690 /*
691 * Create a new word...
692 */
693
694 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
695 return (NULL);
696
697 if ((w->text = strdup(text)) == NULL)
698 {
699 free(w);
700 return (NULL);
701 }
702
703 cupsArrayAdd(n->words, w);
704 }
705
706 /*
707 * Bump the counter for this word and return it...
708 */
709
710 w->count ++;
711
712 return (w);
713 }
714
715
716 /*
717 * 'help_delete_node()' - Free all memory used by a node.
718 */
719
720 static void
721 help_delete_node(help_node_t *n) /* I - Node */
722 {
723 help_word_t *w; /* Current word */
724
725
726 DEBUG_printf(("help_delete_node(n=%p)\n", n));
727
728 if (!n)
729 return;
730
731 if (n->filename)
732 free(n->filename);
733
734 if (n->anchor)
735 free(n->anchor);
736
737 if (n->section)
738 free(n->section);
739
740 if (n->text)
741 free(n->text);
742
743 for (w = (help_word_t *)cupsArrayFirst(n->words);
744 w;
745 w = (help_word_t *)cupsArrayNext(n->words))
746 help_delete_word(w);
747
748 cupsArrayDelete(n->words);
749
750 free(n);
751 }
752
753
754 /*
755 * 'help_delete_word()' - Free all memory used by a word.
756 */
757
758 static void
759 help_delete_word(help_word_t *w) /* I - Word */
760 {
761 DEBUG_printf(("help_delete_word(w=%p)\n", w));
762
763 if (!w)
764 return;
765
766 if (w->text)
767 free(w->text);
768
769 free(w);
770 }
771
772
773 /*
774 * 'help_load_directory()' - Load a directory of files into an index.
775 */
776
777 static int /* O - 0 = success, -1 = error, 1 = updated */
778 help_load_directory(
779 help_index_t *hi, /* I - Index */
780 const char *directory, /* I - Directory */
781 const char *relative) /* I - Relative path */
782 {
783 cups_dir_t *dir; /* Directory file */
784 cups_dentry_t *dent; /* Directory entry */
785 char *ext, /* Pointer to extension */
786 filename[1024], /* Full filename */
787 relname[1024]; /* Relative filename */
788 int update; /* Updated? */
789 help_node_t *node; /* Current node */
790
791
792 DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
793 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
794
795 /*
796 * Open the directory and scan it...
797 */
798
799 if ((dir = cupsDirOpen(directory)) == NULL)
800 return (0);
801
802 update = 0;
803
804 while ((dent = cupsDirRead(dir)) != NULL)
805 {
806 /*
807 * Skip "." files...
808 */
809
810 if (dent->filename[0] == '.')
811 continue;
812
813 /*
814 * Get absolute and relative filenames...
815 */
816
817 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
818 if (relative)
819 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
820 else
821 strlcpy(relname, dent->filename, sizeof(relname));
822
823 /*
824 * Check if we have a HTML file...
825 */
826
827 if ((ext = strstr(dent->filename, ".html")) != NULL &&
828 (!ext[5] || !strcmp(ext + 5, ".gz")))
829 {
830 /*
831 * HTML file, see if we have already indexed the file...
832 */
833
834 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
835 {
836 /*
837 * File already indexed - check dates to confirm that the
838 * index is up-to-date...
839 */
840
841 if (node->mtime == dent->fileinfo.st_mtime)
842 {
843 /*
844 * Same modification time, so mark all of the nodes
845 * for this file as up-to-date...
846 */
847
848 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
849 if (!strcmp(node->filename, relname))
850 node->score = 0;
851 else
852 break;
853
854 continue;
855 }
856 }
857
858 update = 1;
859
860 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
861 }
862 else if (S_ISDIR(dent->fileinfo.st_mode))
863 {
864 /*
865 * Process sub-directory...
866 */
867
868 if (help_load_directory(hi, filename, relname) == 1)
869 update = 1;
870 }
871 }
872
873 cupsDirClose(dir);
874
875 return (update);
876 }
877
878
879 /*
880 * 'help_load_file()' - Load a HTML files into an index.
881 */
882
883 static int /* O - 0 = success, -1 = error */
884 help_load_file(
885 help_index_t *hi, /* I - Index */
886 const char *filename, /* I - Filename */
887 const char *relative, /* I - Relative path */
888 time_t mtime) /* I - Modification time */
889 {
890 cups_file_t *fp; /* HTML file */
891 help_node_t *node; /* Current node */
892 char line[1024], /* Line from file */
893 temp[1024], /* Temporary word */
894 section[1024], /* Section */
895 *ptr, /* Pointer into line */
896 *anchor, /* Anchor name */
897 *text; /* Text for anchor */
898 off_t offset; /* File offset */
899 char quote; /* Quote character */
900 help_word_t *word; /* Current word */
901 int wordlen; /* Length of word */
902
903
904 DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
905 hi, filename ? filename : "(nil)",
906 relative ? relative : "(nil)", mtime));
907
908 if ((fp = cupsFileOpen(filename, "r")) == NULL)
909 return (-1);
910
911 node = NULL;
912 offset = 0;
913
914 strcpy(section, "Other");
915
916 while (cupsFileGets(fp, line, sizeof(line)))
917 {
918 /*
919 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
920 */
921
922 if (!strncasecmp(line, "<!-- SECTION:", 13))
923 {
924 /*
925 * Got section line, copy it!
926 */
927
928 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
929
930 strlcpy(section, ptr, sizeof(section));
931 if ((ptr = strstr(section, "-->")) != NULL)
932 {
933 /*
934 * Strip comment stuff from end of line...
935 */
936
937 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
938
939 if (isspace(*ptr & 255))
940 *ptr = '\0';
941 }
942 continue;
943 }
944
945 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
946 {
947 ptr ++;
948
949 if (!strncasecmp(ptr, "TITLE>", 6))
950 {
951 /*
952 * Found the title...
953 */
954
955 anchor = NULL;
956 ptr += 6;
957 }
958 else if (!strncasecmp(ptr, "A NAME=", 7))
959 {
960 /*
961 * Found an anchor...
962 */
963
964 ptr += 7;
965
966 if (*ptr == '\"' || *ptr == '\'')
967 {
968 /*
969 * Get quoted anchor...
970 */
971
972 quote = *ptr;
973 anchor = ptr + 1;
974 if ((ptr = strchr(anchor, quote)) != NULL)
975 *ptr++ = '\0';
976 else
977 break;
978 }
979 else
980 {
981 /*
982 * Get unquoted anchor...
983 */
984
985 anchor = ptr + 1;
986
987 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
988
989 if (*ptr)
990 *ptr++ = '\0';
991 else
992 break;
993 }
994
995 /*
996 * Got the anchor, now lets find the end...
997 */
998
999 while (*ptr && *ptr != '>')
1000 ptr ++;
1001
1002 if (*ptr != '>')
1003 break;
1004
1005 ptr ++;
1006 }
1007 else
1008 continue;
1009
1010 /*
1011 * Now collect text for the link...
1012 */
1013
1014 text = ptr;
1015 while ((ptr = strchr(text, '<')) == NULL)
1016 {
1017 ptr = text + strlen(text);
1018 if (ptr >= (line + sizeof(line) - 2))
1019 break;
1020
1021 *ptr++ = ' ';
1022
1023 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1024 break;
1025 }
1026
1027 *ptr = '\0';
1028
1029 if (node)
1030 node->length = offset - node->offset;
1031
1032 if (!*text)
1033 {
1034 node = NULL;
1035 break;
1036 }
1037
1038 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1039 {
1040 /*
1041 * Node already in the index, so replace the text and other
1042 * data...
1043 */
1044
1045 cupsArrayRemove(hi->nodes, node);
1046
1047 if (node->section)
1048 free(node->section);
1049
1050 if (node->text)
1051 free(node->text);
1052
1053 if (node->words)
1054 {
1055 for (word = (help_word_t *)cupsArrayFirst(node->words);
1056 word;
1057 word = (help_word_t *)cupsArrayNext(node->words))
1058 help_delete_word(word);
1059
1060 cupsArrayDelete(node->words);
1061 node->words = NULL;
1062 }
1063
1064 node->section = section[0] ? strdup(section) : NULL;
1065 node->text = strdup(text);
1066 node->mtime = mtime;
1067 node->offset = offset;
1068 node->score = 0;
1069 }
1070 else
1071 {
1072 /*
1073 * New node...
1074 */
1075
1076 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1077 }
1078
1079 /*
1080 * Go through the text value and replace tabs and newlines with
1081 * whitespace and eliminate extra whitespace...
1082 */
1083
1084 for (ptr = node->text, text = node->text; *ptr;)
1085 if (isspace(*ptr & 255))
1086 {
1087 while (isspace(*ptr & 255))
1088 ptr ++;
1089
1090 *text++ = ' ';
1091 }
1092 else if (text != ptr)
1093 *text++ = *ptr++;
1094 else
1095 {
1096 text ++;
1097 ptr ++;
1098 }
1099
1100 *text = '\0';
1101
1102 /*
1103 * (Re)add the node to the array...
1104 */
1105
1106 cupsArrayAdd(hi->nodes, node);
1107
1108 if (!anchor)
1109 node = NULL;
1110 break;
1111 }
1112
1113 if (node)
1114 {
1115 /*
1116 * Scan this line for words...
1117 */
1118
1119 for (ptr = line; *ptr; ptr ++)
1120 {
1121 /*
1122 * Skip HTML stuff...
1123 */
1124
1125 if (*ptr == '<')
1126 {
1127 if (!strncmp(ptr, "<!--", 4))
1128 {
1129 /*
1130 * Skip HTML comment...
1131 */
1132
1133 if ((text = strstr(ptr + 4, "-->")) == NULL)
1134 ptr += strlen(ptr) - 1;
1135 else
1136 ptr = text + 2;
1137 }
1138 else
1139 {
1140 /*
1141 * Skip HTML element...
1142 */
1143
1144 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1145 {
1146 if (*ptr == '\"' || *ptr == '\'')
1147 {
1148 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1149
1150 if (!*ptr)
1151 ptr --;
1152 }
1153 }
1154
1155 if (!*ptr)
1156 ptr --;
1157 }
1158
1159 continue;
1160 }
1161 else if (*ptr == '&')
1162 {
1163 /*
1164 * Skip HTML entity...
1165 */
1166
1167 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1168
1169 if (!*ptr)
1170 ptr --;
1171
1172 continue;
1173 }
1174 else if (!isalnum(*ptr & 255))
1175 continue;
1176
1177 /*
1178 * Found the start of a word, search until we find the end...
1179 */
1180
1181 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1182
1183 wordlen = ptr - text;
1184
1185 memcpy(temp, text, wordlen);
1186 temp[wordlen] = '\0';
1187
1188 ptr --;
1189
1190 if (wordlen > 1 && !bsearch(temp, help_common_words,
1191 (sizeof(help_common_words) /
1192 sizeof(help_common_words[0])),
1193 sizeof(help_common_words[0]),
1194 (int (*)(const void *, const void *))
1195 strcasecmp))
1196 help_add_word(node, temp);
1197 }
1198 }
1199
1200 /*
1201 * Get the offset of the next line...
1202 */
1203
1204 offset = cupsFileTell(fp);
1205 }
1206
1207 cupsFileClose(fp);
1208
1209 if (node)
1210 node->length = offset - node->offset;
1211
1212 return (0);
1213 }
1214
1215
1216 /*
1217 * 'help_new_node()' - Create a new node and add it to an index.
1218 */
1219
1220 static help_node_t * /* O - Node pointer or NULL on error */
1221 help_new_node(const char *filename, /* I - Filename */
1222 const char *anchor, /* I - Anchor */
1223 const char *section, /* I - Section */
1224 const char *text, /* I - Text */
1225 time_t mtime, /* I - Modification time */
1226 off_t offset, /* I - Offset in file */
1227 size_t length) /* I - Length in bytes */
1228 {
1229 help_node_t *n; /* Node */
1230
1231
1232 DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1233 "mtime=%ld, offset=%ld, length=%ld)\n",
1234 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
1235 text ? text : "(nil)", (long)mtime, (long)offset,
1236 (long)length));
1237
1238 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1239 if (!n)
1240 return (NULL);
1241
1242 n->filename = strdup(filename);
1243 n->anchor = anchor ? strdup(anchor) : NULL;
1244 n->section = (section && *section) ? strdup(section) : NULL;
1245 n->text = strdup(text);
1246 n->mtime = mtime;
1247 n->offset = offset;
1248 n->length = length;
1249
1250 return (n);
1251 }
1252
1253
1254 /*
1255 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1256 */
1257
1258 static int /* O - Difference */
1259 help_sort_by_name(help_node_t *n1, /* I - First node */
1260 help_node_t *n2) /* I - Second node */
1261 {
1262 int diff; /* Difference */
1263
1264
1265 DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1266 n1, n1->filename, n1->anchor ? n1->anchor : "",
1267 n2, n2->filename, n2->anchor ? n2->anchor : ""));
1268
1269 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1270 return (diff);
1271
1272 if (!n1->anchor && !n2->anchor)
1273 return (0);
1274 else if (!n1->anchor)
1275 return (-1);
1276 else if (!n2->anchor)
1277 return (1);
1278 else
1279 return (strcmp(n1->anchor, n2->anchor));
1280 }
1281
1282
1283 /*
1284 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1285 */
1286
1287 static int /* O - Difference */
1288 help_sort_by_score(help_node_t *n1, /* I - First node */
1289 help_node_t *n2) /* I - Second node */
1290 {
1291 int diff; /* Difference */
1292
1293
1294 DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1295 "n2=%p(%d \"%s\" \"%s\")\n",
1296 n1, n1->score, n1->section ? n1->section : "", n1->text,
1297 n2, n2->score, n2->section ? n2->section : "", n2->text));
1298
1299 if (n1->score != n2->score)
1300 return (n1->score - n2->score);
1301
1302 if (n1->section && !n2->section)
1303 return (1);
1304 else if (!n1->section && n2->section)
1305 return (-1);
1306 else if (n1->section && n2->section &&
1307 (diff = strcmp(n1->section, n2->section)) != 0)
1308 return (diff);
1309
1310 return (strcasecmp(n1->text, n2->text));
1311 }
1312
1313
1314 /*
1315 * 'help_sort_words()' - Sort words alphabetically.
1316 */
1317
1318 static int /* O - Difference */
1319 help_sort_words(help_word_t *w1, /* I - Second word */
1320 help_word_t *w2) /* I - Second word */
1321 {
1322 DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1323 w1, w1->text, w2, w2->text));
1324
1325 return (strcasecmp(w1->text, w2->text));
1326 }
1327
1328
1329 /*
1330 * End of "$Id: help-index.c 6649 2007-07-11 21:46:42Z mike $".
1331 */