]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Load cups into easysw/current.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * "$Id: help-index.c 6394 2007-03-25 00:01:14Z mike $"
3 *
4 * On-line help index routines for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2007 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are the
9 * property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the file
11 * "LICENSE.txt" which should have been included with this file. If this
12 * file is missing or damaged please contact Easy Software Products
13 * at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * helpDeleteIndex() - Delete an index, freeing all memory used.
27 * helpFindNode() - Find a node in an index.
28 * helpLoadIndex() - Load a help index from disk.
29 * helpSaveIndex() - Save a help index to disk.
30 * helpSearchIndex() - Search an index.
31 * help_add_word() - Add a word to a node.
32 * help_compile_search() - Convert a search string into a regular expression.
33 * help_delete_node() - Free all memory used by a node.
34 * help_delete_word() - Free all memory used by a word.
35 * help_load_directory() - Load a directory of files into an index.
36 * help_load_file() - Load a HTML files into an index.
37 * help_new_node() - Create a new node and add it to an index.
38 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
39 * help_sort_nodes_by_score() - Sort nodes by score and text.
40 * help_sort_words() - Sort words alphabetically.
41 */
42
43 /*
44 * Include necessary headers...
45 */
46
47 #include "cgi-private.h"
48 #include <cups/dir.h>
49
50
51 /*
52 * List of common English words that should not be indexed...
53 */
54
55 static char help_common_words[][6] =
56 {
57 "about",
58 "all",
59 "an",
60 "and",
61 "are",
62 "as",
63 "at",
64 "be",
65 "been",
66 "but",
67 "by",
68 "call",
69 "can",
70 "come",
71 "could",
72 "day",
73 "did",
74 "do",
75 "down",
76 "each",
77 "find",
78 "first",
79 "for",
80 "from",
81 "go",
82 "had",
83 "has",
84 "have",
85 "he",
86 "her",
87 "him",
88 "his",
89 "hot",
90 "how",
91 "if",
92 "in",
93 "is",
94 "it",
95 "know",
96 "like",
97 "long",
98 "look",
99 "make",
100 "many",
101 "may",
102 "more",
103 "most",
104 "my",
105 "no",
106 "now",
107 "of",
108 "on",
109 "one",
110 "or",
111 "other",
112 "out",
113 "over",
114 "said",
115 "see",
116 "she",
117 "side",
118 "so",
119 "some",
120 "sound",
121 "than",
122 "that",
123 "the",
124 "their",
125 "them",
126 "then",
127 "there",
128 "these",
129 "they",
130 "thing",
131 "this",
132 "time",
133 "to",
134 "two",
135 "up",
136 "use",
137 "was",
138 "water",
139 "way",
140 "we",
141 "were",
142 "what",
143 "when",
144 "which",
145 "who",
146 "will",
147 "with",
148 "word",
149 "would",
150 "write",
151 "you",
152 "your"
153 };
154
155
156 /*
157 * Local functions...
158 */
159
160 static help_word_t *help_add_word(help_node_t *n, const char *text);
161 static void help_delete_node(help_node_t *n);
162 static void help_delete_word(help_word_t *w);
163 static int help_load_directory(help_index_t *hi,
164 const char *directory,
165 const char *relative);
166 static int help_load_file(help_index_t *hi,
167 const char *filename,
168 const char *relative,
169 time_t mtime);
170 static help_node_t *help_new_node(const char *filename, const char *anchor,
171 const char *section, const char *text,
172 time_t mtime, off_t offset,
173 size_t length);
174 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
175 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
176 static int help_sort_words(help_word_t *w1, help_word_t *w2);
177
178
179 /*
180 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
181 */
182
183 void
184 helpDeleteIndex(help_index_t *hi) /* I - Help index */
185 {
186 help_node_t *node; /* Current node */
187
188
189 DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
190
191 if (!hi)
192 return;
193
194 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
195 node;
196 node = (help_node_t *)cupsArrayNext(hi->nodes))
197 {
198 if (!hi->search)
199 help_delete_node(node);
200 }
201
202 cupsArrayDelete(hi->nodes);
203 cupsArrayDelete(hi->sorted);
204
205 free(hi);
206 }
207
208
209 /*
210 * 'helpFindNode()' - Find a node in an index.
211 */
212
213 help_node_t * /* O - Node pointer or NULL */
214 helpFindNode(help_index_t *hi, /* I - Index */
215 const char *filename, /* I - Filename */
216 const char *anchor) /* I - Anchor */
217 {
218 help_node_t key; /* Search key */
219
220
221 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
222 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
223
224 /*
225 * Range check input...
226 */
227
228 if (!hi || !filename)
229 return (NULL);
230
231 /*
232 * Initialize the search key...
233 */
234
235 key.filename = (char *)filename;
236 key.anchor = (char *)anchor;
237
238 /*
239 * Return any match...
240 */
241
242 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
243 }
244
245
246 /*
247 * 'helpLoadIndex()' - Load a help index from disk.
248 */
249
250 help_index_t * /* O - Index pointer or NULL */
251 helpLoadIndex(const char *hifile, /* I - Index filename */
252 const char *directory) /* I - Directory that is indexed */
253 {
254 help_index_t *hi; /* Help index */
255 cups_file_t *fp; /* Current file */
256 char line[2048], /* Line from file */
257 *ptr, /* Pointer into line */
258 *filename, /* Filename in line */
259 *anchor, /* Anchor in line */
260 *sectptr, /* Section pointer in line */
261 section[1024], /* Section name */
262 *text; /* Text in line */
263 time_t mtime; /* Modification time */
264 off_t offset; /* Offset into file */
265 size_t length; /* Length in bytes */
266 int update; /* Update? */
267 help_node_t *node; /* Current node */
268 help_word_t *word; /* Current word */
269
270
271 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
272 hifile, directory));
273
274 /*
275 * Create a new, empty index.
276 */
277
278 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
279 return (NULL);
280
281 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
282 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
283
284 if (!hi->nodes || !hi->sorted)
285 {
286 cupsArrayDelete(hi->nodes);
287 cupsArrayDelete(hi->sorted);
288 free(hi);
289 return (NULL);
290 }
291
292 /*
293 * Try loading the existing index file...
294 */
295
296 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
297 {
298 /*
299 * Lock the file and then read the first line...
300 */
301
302 cupsFileLock(fp, 1);
303
304 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
305 {
306 /*
307 * Got a valid header line, now read the data lines...
308 */
309
310 node = NULL;
311
312 while (cupsFileGets(fp, line, sizeof(line)))
313 {
314 /*
315 * Each line looks like one of the following:
316 *
317 * filename mtime offset length "section" "text"
318 * filename#anchor offset length "text"
319 * SP count word
320 */
321
322 if (line[0] == ' ')
323 {
324 /*
325 * Read a word in the current node...
326 */
327
328 if (!node || (ptr = strrchr(line, ' ')) == NULL)
329 continue;
330
331 if ((word = help_add_word(node, ptr + 1)) != NULL)
332 word->count = atoi(line + 1);
333 }
334 else
335 {
336 /*
337 * Add a node...
338 */
339
340 filename = line;
341
342 if ((ptr = strchr(line, ' ')) == NULL)
343 break;
344
345 while (isspace(*ptr & 255))
346 *ptr++ = '\0';
347
348 if ((anchor = strrchr(filename, '#')) != NULL)
349 {
350 *anchor++ = '\0';
351 mtime = 0;
352 }
353 else
354 mtime = strtol(ptr, &ptr, 10);
355
356 offset = strtoll(ptr, &ptr, 10);
357 length = strtoll(ptr, &ptr, 10);
358
359 while (isspace(*ptr & 255))
360 ptr ++;
361
362 if (!anchor)
363 {
364 /*
365 * Get section...
366 */
367
368 if (*ptr != '\"')
369 break;
370
371 ptr ++;
372 sectptr = ptr;
373
374 while (*ptr && *ptr != '\"')
375 ptr ++;
376
377 if (*ptr != '\"')
378 break;
379
380 *ptr++ = '\0';
381
382 strlcpy(section, sectptr, sizeof(section));
383
384 while (isspace(*ptr & 255))
385 ptr ++;
386 }
387
388 if (*ptr != '\"')
389 break;
390
391 ptr ++;
392 text = ptr;
393
394 while (*ptr && *ptr != '\"')
395 ptr ++;
396
397 if (*ptr != '\"')
398 break;
399
400 *ptr++ = '\0';
401
402 if ((node = help_new_node(filename, anchor, section, text,
403 mtime, offset, length)) == NULL)
404 break;
405
406 node->score = -1;
407
408 cupsArrayAdd(hi->nodes, node);
409 }
410 }
411 }
412
413 cupsFileClose(fp);
414 }
415
416 /*
417 * Scan for new/updated files...
418 */
419
420 update = help_load_directory(hi, directory, NULL);
421
422 /*
423 * Remove any files that are no longer installed...
424 */
425
426 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
427 node;
428 node = (help_node_t *)cupsArrayNext(hi->nodes))
429 if (node->score < 0)
430 {
431 /*
432 * Delete this node...
433 */
434
435 cupsArrayRemove(hi->nodes, node);
436 help_delete_node(node);
437 }
438
439 /*
440 * Add nodes to the sorted array...
441 */
442
443 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
444 node;
445 node = (help_node_t *)cupsArrayNext(hi->nodes))
446 cupsArrayAdd(hi->sorted, node);
447
448 /*
449 * Save the index if we updated it...
450 */
451
452 if (update)
453 helpSaveIndex(hi, hifile);
454
455 /*
456 * Return the index...
457 */
458
459 return (hi);
460 }
461
462
463 /*
464 * 'helpSaveIndex()' - Save a help index to disk.
465 */
466
467 int /* O - 0 on success, -1 on error */
468 helpSaveIndex(help_index_t *hi, /* I - Index */
469 const char *hifile) /* I - Index filename */
470 {
471 cups_file_t *fp; /* Index file */
472 help_node_t *node; /* Current node */
473 help_word_t *word; /* Current word */
474
475
476 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
477
478 /*
479 * Try creating a new index file...
480 */
481
482 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
483 return (-1);
484
485 /*
486 * Lock the file while we write it...
487 */
488
489 cupsFileLock(fp, 1);
490
491 cupsFilePuts(fp, "HELPV2\n");
492
493 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
494 node;
495 node = (help_node_t *)cupsArrayNext(hi->nodes))
496 {
497 /*
498 * Write the current node with/without the anchor...
499 */
500
501 if (node->anchor)
502 {
503 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
504 node->filename, node->anchor,
505 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
506 node->text) < 0)
507 break;
508 }
509 else
510 {
511 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
512 node->filename, node->mtime,
513 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
514 node->section ? node->section : "", node->text) < 0)
515 break;
516 }
517
518 /*
519 * Then write the words associated with the node...
520 */
521
522 for (word = (help_word_t *)cupsArrayFirst(node->words);
523 word;
524 word = (help_word_t *)cupsArrayNext(node->words))
525 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
526 break;
527 }
528
529 cupsFileFlush(fp);
530
531 if (cupsFileClose(fp) < 0)
532 return (-1);
533 else if (node)
534 return (-1);
535 else
536 return (0);
537 }
538
539
540 /*
541 * 'helpSearchIndex()' - Search an index.
542 */
543
544 help_index_t * /* O - Search index */
545 helpSearchIndex(help_index_t *hi, /* I - Index */
546 const char *query, /* I - Query string */
547 const char *section, /* I - Limit search to this section */
548 const char *filename) /* I - Limit search to this file */
549 {
550 help_index_t *search; /* Search index */
551 help_node_t *node; /* Current node */
552 help_word_t *word; /* Current word */
553 void *sc; /* Search context */
554 int matches; /* Number of matches */
555
556
557 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
558 hi, query ? query : "(nil)",
559 filename ? filename : "(nil)"));
560
561 /*
562 * Range check...
563 */
564
565 if (!hi || !query)
566 return (NULL);
567
568 /*
569 * Reset the scores of all nodes to 0...
570 */
571
572 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
573 node;
574 node = (help_node_t *)cupsArrayNext(hi->nodes))
575 node->score = 0;
576
577 /*
578 * Find the first node to search in...
579 */
580
581 if (filename)
582 {
583 node = helpFindNode(hi, filename, NULL);
584 if (!node)
585 return (NULL);
586 }
587 else
588 node = (help_node_t *)cupsArrayFirst(hi->nodes);
589
590 /*
591 * Convert the query into a regular expression...
592 */
593
594 sc = cgiCompileSearch(query);
595 if (!sc)
596 return (NULL);
597
598 /*
599 * Allocate a search index...
600 */
601
602 search = calloc(1, sizeof(help_index_t));
603 if (!search)
604 {
605 cgiFreeSearch(sc);
606 return (NULL);
607 }
608
609 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
610 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
611
612 if (!search->nodes || !search->sorted)
613 {
614 cupsArrayDelete(search->nodes);
615 cupsArrayDelete(search->sorted);
616 free(search);
617 cgiFreeSearch(sc);
618 return (NULL);
619 }
620
621 search->search = 1;
622
623 /*
624 * Check each node in the index, adding matching nodes to the
625 * search index...
626 */
627
628 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
629 if (section && strcmp(node->section, section))
630 continue;
631 else if (filename && strcmp(node->filename, filename))
632 continue;
633 else
634 {
635 matches = cgiDoSearch(sc, node->text);
636
637 for (word = (help_word_t *)cupsArrayFirst(node->words);
638 word;
639 word = (help_word_t *)cupsArrayNext(node->words))
640 if (cgiDoSearch(sc, word->text) > 0)
641 matches += word->count;
642
643 if (matches > 0)
644 {
645 /*
646 * Found a match, add the node to the search index...
647 */
648
649 node->score = matches;
650
651 cupsArrayAdd(search->nodes, node);
652 cupsArrayAdd(search->sorted, node);
653 }
654 }
655
656 /*
657 * Free the search context...
658 */
659
660 cgiFreeSearch(sc);
661
662 /*
663 * Return the results...
664 */
665
666 return (search);
667 }
668
669
670 /*
671 * 'help_add_word()' - Add a word to a node.
672 */
673
674 static help_word_t * /* O - New word */
675 help_add_word(help_node_t *n, /* I - Node */
676 const char *text) /* I - Word text */
677 {
678 help_word_t *w, /* New word */
679 key; /* Search key */
680
681
682 DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
683
684 /*
685 * Create the words array as needed...
686 */
687
688 if (!n->words)
689 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
690
691 /*
692 * See if the word is already added...
693 */
694
695 key.text = (char *)text;
696
697 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
698 {
699 /*
700 * Create a new word...
701 */
702
703 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
704 return (NULL);
705
706 if ((w->text = strdup(text)) == NULL)
707 {
708 free(w);
709 return (NULL);
710 }
711
712 cupsArrayAdd(n->words, w);
713 }
714
715 /*
716 * Bump the counter for this word and return it...
717 */
718
719 w->count ++;
720
721 return (w);
722 }
723
724
725 /*
726 * 'help_delete_node()' - Free all memory used by a node.
727 */
728
729 static void
730 help_delete_node(help_node_t *n) /* I - Node */
731 {
732 help_word_t *w; /* Current word */
733
734
735 DEBUG_printf(("help_delete_node(n=%p)\n", n));
736
737 if (!n)
738 return;
739
740 if (n->filename)
741 free(n->filename);
742
743 if (n->anchor)
744 free(n->anchor);
745
746 if (n->section)
747 free(n->section);
748
749 if (n->text)
750 free(n->text);
751
752 for (w = (help_word_t *)cupsArrayFirst(n->words);
753 w;
754 w = (help_word_t *)cupsArrayNext(n->words))
755 help_delete_word(w);
756
757 cupsArrayDelete(n->words);
758
759 free(n);
760 }
761
762
763 /*
764 * 'help_delete_word()' - Free all memory used by a word.
765 */
766
767 static void
768 help_delete_word(help_word_t *w) /* I - Word */
769 {
770 DEBUG_printf(("help_delete_word(w=%p)\n", w));
771
772 if (!w)
773 return;
774
775 if (w->text)
776 free(w->text);
777
778 free(w);
779 }
780
781
782 /*
783 * 'help_load_directory()' - Load a directory of files into an index.
784 */
785
786 static int /* O - 0 = success, -1 = error, 1 = updated */
787 help_load_directory(
788 help_index_t *hi, /* I - Index */
789 const char *directory, /* I - Directory */
790 const char *relative) /* I - Relative path */
791 {
792 cups_dir_t *dir; /* Directory file */
793 cups_dentry_t *dent; /* Directory entry */
794 char *ext, /* Pointer to extension */
795 filename[1024], /* Full filename */
796 relname[1024]; /* Relative filename */
797 int update; /* Updated? */
798 help_node_t *node; /* Current node */
799
800
801 DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
802 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
803
804 /*
805 * Open the directory and scan it...
806 */
807
808 if ((dir = cupsDirOpen(directory)) == NULL)
809 return (0);
810
811 update = 0;
812
813 while ((dent = cupsDirRead(dir)) != NULL)
814 {
815 /*
816 * Skip "." files...
817 */
818
819 if (dent->filename[0] == '.')
820 continue;
821
822 /*
823 * Get absolute and relative filenames...
824 */
825
826 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
827 if (relative)
828 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
829 else
830 strlcpy(relname, dent->filename, sizeof(relname));
831
832 /*
833 * Check if we have a HTML file...
834 */
835
836 if ((ext = strstr(dent->filename, ".html")) != NULL &&
837 (!ext[5] || !strcmp(ext + 5, ".gz")))
838 {
839 /*
840 * HTML file, see if we have already indexed the file...
841 */
842
843 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
844 {
845 /*
846 * File already indexed - check dates to confirm that the
847 * index is up-to-date...
848 */
849
850 if (node->mtime == dent->fileinfo.st_mtime)
851 {
852 /*
853 * Same modification time, so mark all of the nodes
854 * for this file as up-to-date...
855 */
856
857 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
858 if (!strcmp(node->filename, relname))
859 node->score = 0;
860 else
861 break;
862
863 continue;
864 }
865 }
866
867 update = 1;
868
869 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
870 }
871 else if (S_ISDIR(dent->fileinfo.st_mode))
872 {
873 /*
874 * Process sub-directory...
875 */
876
877 if (help_load_directory(hi, filename, relname) == 1)
878 update = 1;
879 }
880 }
881
882 cupsDirClose(dir);
883
884 return (update);
885 }
886
887
888 /*
889 * 'help_load_file()' - Load a HTML files into an index.
890 */
891
892 static int /* O - 0 = success, -1 = error */
893 help_load_file(
894 help_index_t *hi, /* I - Index */
895 const char *filename, /* I - Filename */
896 const char *relative, /* I - Relative path */
897 time_t mtime) /* I - Modification time */
898 {
899 cups_file_t *fp; /* HTML file */
900 help_node_t *node; /* Current node */
901 char line[1024], /* Line from file */
902 temp[1024], /* Temporary word */
903 section[1024], /* Section */
904 *ptr, /* Pointer into line */
905 *anchor, /* Anchor name */
906 *text; /* Text for anchor */
907 off_t offset; /* File offset */
908 char quote; /* Quote character */
909 help_word_t *word; /* Current word */
910 int wordlen; /* Length of word */
911
912
913 DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
914 hi, filename ? filename : "(nil)",
915 relative ? relative : "(nil)", mtime));
916
917 if ((fp = cupsFileOpen(filename, "r")) == NULL)
918 return (-1);
919
920 node = NULL;
921 offset = 0;
922
923 strcpy(section, "Other");
924
925 while (cupsFileGets(fp, line, sizeof(line)))
926 {
927 /*
928 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
929 */
930
931 if (!strncasecmp(line, "<!-- SECTION:", 13))
932 {
933 /*
934 * Got section line, copy it!
935 */
936
937 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
938
939 strlcpy(section, ptr, sizeof(section));
940 if ((ptr = strstr(section, "-->")) != NULL)
941 {
942 /*
943 * Strip comment stuff from end of line...
944 */
945
946 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
947
948 if (isspace(*ptr & 255))
949 *ptr = '\0';
950 }
951 continue;
952 }
953
954 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
955 {
956 ptr ++;
957
958 if (!strncasecmp(ptr, "TITLE>", 6))
959 {
960 /*
961 * Found the title...
962 */
963
964 anchor = NULL;
965 ptr += 6;
966 }
967 else if (!strncasecmp(ptr, "A NAME=", 7))
968 {
969 /*
970 * Found an anchor...
971 */
972
973 ptr += 7;
974
975 if (*ptr == '\"' || *ptr == '\'')
976 {
977 /*
978 * Get quoted anchor...
979 */
980
981 quote = *ptr;
982 anchor = ptr + 1;
983 if ((ptr = strchr(anchor, quote)) != NULL)
984 *ptr++ = '\0';
985 else
986 break;
987 }
988 else
989 {
990 /*
991 * Get unquoted anchor...
992 */
993
994 anchor = ptr + 1;
995
996 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
997
998 if (*ptr)
999 *ptr++ = '\0';
1000 else
1001 break;
1002 }
1003
1004 /*
1005 * Got the anchor, now lets find the end...
1006 */
1007
1008 while (*ptr && *ptr != '>')
1009 ptr ++;
1010
1011 if (*ptr != '>')
1012 break;
1013
1014 ptr ++;
1015 }
1016 else
1017 continue;
1018
1019 /*
1020 * Now collect text for the link...
1021 */
1022
1023 text = ptr;
1024 while ((ptr = strchr(text, '<')) == NULL)
1025 {
1026 ptr = text + strlen(text);
1027 if (ptr >= (line + sizeof(line) - 2))
1028 break;
1029
1030 *ptr++ = ' ';
1031
1032 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1033 break;
1034 }
1035
1036 *ptr = '\0';
1037
1038 if (node)
1039 node->length = offset - node->offset;
1040
1041 if (!*text)
1042 {
1043 node = NULL;
1044 break;
1045 }
1046
1047 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1048 {
1049 /*
1050 * Node already in the index, so replace the text and other
1051 * data...
1052 */
1053
1054 cupsArrayRemove(hi->nodes, node);
1055
1056 if (node->section)
1057 free(node->section);
1058
1059 if (node->text)
1060 free(node->text);
1061
1062 if (node->words)
1063 {
1064 for (word = (help_word_t *)cupsArrayFirst(node->words);
1065 word;
1066 word = (help_word_t *)cupsArrayNext(node->words))
1067 help_delete_word(word);
1068
1069 cupsArrayDelete(node->words);
1070 node->words = NULL;
1071 }
1072
1073 node->section = section[0] ? strdup(section) : NULL;
1074 node->text = strdup(text);
1075 node->mtime = mtime;
1076 node->offset = offset;
1077 node->score = 0;
1078 }
1079 else
1080 {
1081 /*
1082 * New node...
1083 */
1084
1085 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1086 }
1087
1088 /*
1089 * Go through the text value and replace tabs and newlines with
1090 * whitespace and eliminate extra whitespace...
1091 */
1092
1093 for (ptr = node->text, text = node->text; *ptr;)
1094 if (isspace(*ptr & 255))
1095 {
1096 while (isspace(*ptr & 255))
1097 ptr ++;
1098
1099 *text++ = ' ';
1100 }
1101 else if (text != ptr)
1102 *text++ = *ptr++;
1103 else
1104 {
1105 text ++;
1106 ptr ++;
1107 }
1108
1109 *text = '\0';
1110
1111 /*
1112 * (Re)add the node to the array...
1113 */
1114
1115 cupsArrayAdd(hi->nodes, node);
1116
1117 if (!anchor)
1118 node = NULL;
1119 break;
1120 }
1121
1122 if (node)
1123 {
1124 /*
1125 * Scan this line for words...
1126 */
1127
1128 for (ptr = line; *ptr; ptr ++)
1129 {
1130 /*
1131 * Skip HTML stuff...
1132 */
1133
1134 if (*ptr == '<')
1135 {
1136 if (!strncmp(ptr, "<!--", 4))
1137 {
1138 /*
1139 * Skip HTML comment...
1140 */
1141
1142 if ((text = strstr(ptr + 4, "-->")) == NULL)
1143 ptr += strlen(ptr) - 1;
1144 else
1145 ptr = text + 2;
1146 }
1147 else
1148 {
1149 /*
1150 * Skip HTML element...
1151 */
1152
1153 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1154 {
1155 if (*ptr == '\"' || *ptr == '\'')
1156 {
1157 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1158
1159 if (!*ptr)
1160 ptr --;
1161 }
1162 }
1163
1164 if (!*ptr)
1165 ptr --;
1166 }
1167
1168 continue;
1169 }
1170 else if (*ptr == '&')
1171 {
1172 /*
1173 * Skip HTML entity...
1174 */
1175
1176 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1177
1178 if (!*ptr)
1179 ptr --;
1180
1181 continue;
1182 }
1183 else if (!isalnum(*ptr & 255))
1184 continue;
1185
1186 /*
1187 * Found the start of a word, search until we find the end...
1188 */
1189
1190 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1191
1192 wordlen = ptr - text;
1193
1194 memcpy(temp, text, wordlen);
1195 temp[wordlen] = '\0';
1196
1197 ptr --;
1198
1199 if (wordlen > 1 && !bsearch(temp, help_common_words,
1200 (sizeof(help_common_words) /
1201 sizeof(help_common_words[0])),
1202 sizeof(help_common_words[0]),
1203 (int (*)(const void *, const void *))
1204 strcasecmp))
1205 help_add_word(node, temp);
1206 }
1207 }
1208
1209 /*
1210 * Get the offset of the next line...
1211 */
1212
1213 offset = cupsFileTell(fp);
1214 }
1215
1216 cupsFileClose(fp);
1217
1218 if (node)
1219 node->length = offset - node->offset;
1220
1221 return (0);
1222 }
1223
1224
1225 /*
1226 * 'help_new_node()' - Create a new node and add it to an index.
1227 */
1228
1229 static help_node_t * /* O - Node pointer or NULL on error */
1230 help_new_node(const char *filename, /* I - Filename */
1231 const char *anchor, /* I - Anchor */
1232 const char *section, /* I - Section */
1233 const char *text, /* I - Text */
1234 time_t mtime, /* I - Modification time */
1235 off_t offset, /* I - Offset in file */
1236 size_t length) /* I - Length in bytes */
1237 {
1238 help_node_t *n; /* Node */
1239
1240
1241 DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1242 "mtime=%ld, offset=%ld, length=%ld)\n",
1243 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
1244 text ? text : "(nil)", (long)mtime, (long)offset,
1245 (long)length));
1246
1247 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1248 if (!n)
1249 return (NULL);
1250
1251 n->filename = strdup(filename);
1252 n->anchor = anchor ? strdup(anchor) : NULL;
1253 n->section = (section && *section) ? strdup(section) : NULL;
1254 n->text = strdup(text);
1255 n->mtime = mtime;
1256 n->offset = offset;
1257 n->length = length;
1258
1259 return (n);
1260 }
1261
1262
1263 /*
1264 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1265 */
1266
1267 static int /* O - Difference */
1268 help_sort_by_name(help_node_t *n1, /* I - First node */
1269 help_node_t *n2) /* I - Second node */
1270 {
1271 int diff; /* Difference */
1272
1273
1274 DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1275 n1, n1->filename, n1->anchor ? n1->anchor : "",
1276 n2, n2->filename, n2->anchor ? n2->anchor : ""));
1277
1278 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1279 return (diff);
1280
1281 if (!n1->anchor && !n2->anchor)
1282 return (0);
1283 else if (!n1->anchor)
1284 return (-1);
1285 else if (!n2->anchor)
1286 return (1);
1287 else
1288 return (strcmp(n1->anchor, n2->anchor));
1289 }
1290
1291
1292 /*
1293 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1294 */
1295
1296 static int /* O - Difference */
1297 help_sort_by_score(help_node_t *n1, /* I - First node */
1298 help_node_t *n2) /* I - Second node */
1299 {
1300 int diff; /* Difference */
1301
1302
1303 DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1304 "n2=%p(%d \"%s\" \"%s\")\n",
1305 n1, n1->score, n1->section ? n1->section : "", n1->text,
1306 n2, n2->score, n2->section ? n2->section : "", n2->text));
1307
1308 if (n1->score != n2->score)
1309 return (n1->score - n2->score);
1310
1311 if (n1->section && !n2->section)
1312 return (1);
1313 else if (!n1->section && n2->section)
1314 return (-1);
1315 else if (n1->section && n2->section &&
1316 (diff = strcmp(n1->section, n2->section)) != 0)
1317 return (diff);
1318
1319 return (strcasecmp(n1->text, n2->text));
1320 }
1321
1322
1323 /*
1324 * 'help_sort_words()' - Sort words alphabetically.
1325 */
1326
1327 static int /* O - Difference */
1328 help_sort_words(help_word_t *w1, /* I - Second word */
1329 help_word_t *w2) /* I - Second word */
1330 {
1331 DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1332 w1, w1->text, w2, w2->text));
1333
1334 return (strcasecmp(w1->text, w2->text));
1335 }
1336
1337
1338 /*
1339 * End of "$Id: help-index.c 6394 2007-03-25 00:01:14Z mike $".
1340 */