]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
df909a40615e39e59a3eac45e9f585695f945c80
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $"
3 *
4 * On-line help index routines for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2007 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are the
9 * property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the file
11 * "LICENSE.txt" which should have been included with this file. If this
12 * file is missing or damaged please contact Easy Software Products
13 * at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * helpDeleteIndex() - Delete an index, freeing all memory used.
27 * helpFindNode() - Find a node in an index.
28 * helpLoadIndex() - Load a help index from disk.
29 * helpSaveIndex() - Save a help index to disk.
30 * helpSearchIndex() - Search an index.
31 * help_add_word() - Add a word to a node.
32 * help_compile_search() - Convert a search string into a regular expression.
33 * help_delete_node() - Free all memory used by a node.
34 * help_delete_word() - Free all memory used by a word.
35 * help_load_directory() - Load a directory of files into an index.
36 * help_load_file() - Load a HTML files into an index.
37 * help_new_node() - Create a new node and add it to an index.
38 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
39 * help_sort_nodes_by_score() - Sort nodes by score and text.
40 * help_sort_words() - Sort words alphabetically.
41 */
42
43 /*
44 * Include necessary headers...
45 */
46
47 #include "cgi-private.h"
48 #include <cups/dir.h>
49
50
51 /*
52 * List of common English words that should not be indexed...
53 */
54
55 static char help_common_words[][6] =
56 {
57 "about",
58 "all",
59 "an",
60 "and",
61 "are",
62 "as",
63 "at",
64 "be",
65 "been",
66 "but",
67 "by",
68 "call",
69 "can",
70 "come",
71 "could",
72 "day",
73 "did",
74 "do",
75 "down",
76 "each",
77 "find",
78 "first",
79 "for",
80 "from",
81 "go",
82 "had",
83 "has",
84 "have",
85 "he",
86 "her",
87 "him",
88 "his",
89 "hot",
90 "how",
91 "if",
92 "in",
93 "is",
94 "it",
95 "know",
96 "like",
97 "long",
98 "look",
99 "make",
100 "many",
101 "may",
102 "more",
103 "most",
104 "my",
105 "no",
106 "now",
107 "of",
108 "on",
109 "one",
110 "or",
111 "other",
112 "out",
113 "over",
114 "said",
115 "see",
116 "she",
117 "side",
118 "so",
119 "some",
120 "sound",
121 "than",
122 "that",
123 "the",
124 "their",
125 "them",
126 "then",
127 "there",
128 "these",
129 "they",
130 "thing",
131 "this",
132 "time",
133 "to",
134 "two",
135 "up",
136 "use",
137 "was",
138 "water",
139 "way",
140 "we",
141 "were",
142 "what",
143 "when",
144 "which",
145 "who",
146 "will",
147 "with",
148 "word",
149 "would",
150 "write",
151 "you",
152 "your"
153 };
154
155
156 /*
157 * Local functions...
158 */
159
160 static help_word_t *help_add_word(help_node_t *n, const char *text);
161 static void help_delete_node(help_node_t *n);
162 static void help_delete_word(help_word_t *w);
163 static int help_load_directory(help_index_t *hi,
164 const char *directory,
165 const char *relative);
166 static int help_load_file(help_index_t *hi,
167 const char *filename,
168 const char *relative,
169 time_t mtime);
170 static help_node_t *help_new_node(const char *filename, const char *anchor,
171 const char *section, const char *text,
172 time_t mtime, off_t offset,
173 size_t length);
174 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
175 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
176 static int help_sort_words(help_word_t *w1, help_word_t *w2);
177
178
179 /*
180 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
181 */
182
183 void
184 helpDeleteIndex(help_index_t *hi) /* I - Help index */
185 {
186 help_node_t *node; /* Current node */
187
188
189 DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
190
191 if (!hi)
192 return;
193
194 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
195 node;
196 node = (help_node_t *)cupsArrayNext(hi->nodes))
197 {
198 if (!hi->search)
199 help_delete_node(node);
200 }
201
202 cupsArrayDelete(hi->nodes);
203 cupsArrayDelete(hi->sorted);
204
205 free(hi);
206 }
207
208
209 /*
210 * 'helpFindNode()' - Find a node in an index.
211 */
212
213 help_node_t * /* O - Node pointer or NULL */
214 helpFindNode(help_index_t *hi, /* I - Index */
215 const char *filename, /* I - Filename */
216 const char *anchor) /* I - Anchor */
217 {
218 help_node_t key; /* Search key */
219
220
221 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
222 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
223
224 /*
225 * Range check input...
226 */
227
228 if (!hi || !filename)
229 return (NULL);
230
231 /*
232 * Initialize the search key...
233 */
234
235 key.filename = (char *)filename;
236 key.anchor = (char *)anchor;
237
238 /*
239 * Return any match...
240 */
241
242 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
243 }
244
245
246 /*
247 * 'helpLoadIndex()' - Load a help index from disk.
248 */
249
250 help_index_t * /* O - Index pointer or NULL */
251 helpLoadIndex(const char *hifile, /* I - Index filename */
252 const char *directory) /* I - Directory that is indexed */
253 {
254 help_index_t *hi; /* Help index */
255 cups_file_t *fp; /* Current file */
256 char line[2048], /* Line from file */
257 *ptr, /* Pointer into line */
258 *filename, /* Filename in line */
259 *anchor, /* Anchor in line */
260 *sectptr, /* Section pointer in line */
261 section[1024], /* Section name */
262 *text; /* Text in line */
263 time_t mtime; /* Modification time */
264 off_t offset; /* Offset into file */
265 size_t length; /* Length in bytes */
266 int update; /* Update? */
267 help_node_t *node; /* Current node */
268 help_word_t *word; /* Current word */
269
270
271 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
272 hifile, directory));
273
274 /*
275 * Create a new, empty index.
276 */
277
278 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
279 return (NULL);
280
281 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
282 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
283
284 if (!hi->nodes || !hi->sorted)
285 {
286 cupsArrayDelete(hi->nodes);
287 cupsArrayDelete(hi->sorted);
288 free(hi);
289 return (NULL);
290 }
291
292 /*
293 * Try loading the existing index file...
294 */
295
296 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
297 {
298 /*
299 * Lock the file and then read the first line...
300 */
301
302 cupsFileLock(fp, 1);
303
304 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
305 {
306 /*
307 * Got a valid header line, now read the data lines...
308 */
309
310 node = NULL;
311
312 while (cupsFileGets(fp, line, sizeof(line)))
313 {
314 /*
315 * Each line looks like one of the following:
316 *
317 * filename mtime offset length "section" "text"
318 * filename#anchor offset length "text"
319 * SP count word
320 */
321
322 if (line[0] == ' ')
323 {
324 /*
325 * Read a word in the current node...
326 */
327
328 if (!node || (ptr = strrchr(line, ' ')) == NULL)
329 continue;
330
331 if ((word = help_add_word(node, ptr + 1)) != NULL)
332 word->count = atoi(line + 1);
333 }
334 else
335 {
336 /*
337 * Add a node...
338 */
339
340 filename = line;
341
342 if ((ptr = strchr(line, ' ')) == NULL)
343 break;
344
345 while (isspace(*ptr & 255))
346 *ptr++ = '\0';
347
348 if ((anchor = strrchr(filename, '#')) != NULL)
349 {
350 *anchor++ = '\0';
351 mtime = 0;
352 }
353 else
354 mtime = strtol(ptr, &ptr, 10);
355
356 offset = strtoll(ptr, &ptr, 10);
357 length = strtoll(ptr, &ptr, 10);
358
359 while (isspace(*ptr & 255))
360 ptr ++;
361
362 if (!anchor)
363 {
364 /*
365 * Get section...
366 */
367
368 if (*ptr != '\"')
369 break;
370
371 ptr ++;
372 sectptr = ptr;
373
374 while (*ptr && *ptr != '\"')
375 ptr ++;
376
377 if (*ptr != '\"')
378 break;
379
380 *ptr++ = '\0';
381
382 strlcpy(section, sectptr, sizeof(section));
383
384 while (isspace(*ptr & 255))
385 ptr ++;
386 }
387
388 if (*ptr != '\"')
389 break;
390
391 ptr ++;
392 text = ptr;
393
394 while (*ptr && *ptr != '\"')
395 ptr ++;
396
397 if (*ptr != '\"')
398 break;
399
400 *ptr++ = '\0';
401
402 if ((node = help_new_node(filename, anchor, section, text,
403 mtime, offset, length)) == NULL)
404 break;
405
406 node->score = -1;
407
408 cupsArrayAdd(hi->nodes, node);
409 }
410 }
411 }
412
413 cupsFileClose(fp);
414 }
415
416 /*
417 * Scan for new/updated files...
418 */
419
420 update = help_load_directory(hi, directory, NULL);
421
422 /*
423 * Remove any files that are no longer installed...
424 */
425
426 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
427 node;
428 node = (help_node_t *)cupsArrayNext(hi->nodes))
429 if (node->score < 0)
430 {
431 /*
432 * Delete this node...
433 */
434
435 cupsArrayRemove(hi->nodes, node);
436 help_delete_node(node);
437 }
438
439 /*
440 * Add nodes to the sorted array...
441 */
442
443 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
444 node;
445 node = (help_node_t *)cupsArrayNext(hi->nodes))
446 cupsArrayAdd(hi->sorted, node);
447
448 /*
449 * Save the index if we updated it...
450 */
451
452 if (update)
453 helpSaveIndex(hi, hifile);
454
455 /*
456 * Return the index...
457 */
458
459 return (hi);
460 }
461
462
463 /*
464 * 'helpSaveIndex()' - Save a help index to disk.
465 */
466
467 int /* O - 0 on success, -1 on error */
468 helpSaveIndex(help_index_t *hi, /* I - Index */
469 const char *hifile) /* I - Index filename */
470 {
471 cups_file_t *fp; /* Index file */
472 help_node_t *node; /* Current node */
473 help_word_t *word; /* Current word */
474
475
476 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
477
478 /*
479 * Try creating a new index file...
480 */
481
482 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
483 return (-1);
484
485 /*
486 * Lock the file while we write it...
487 */
488
489 cupsFileLock(fp, 1);
490
491 cupsFilePuts(fp, "HELPV2\n");
492
493 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
494 node;
495 node = (help_node_t *)cupsArrayNext(hi->nodes))
496 {
497 /*
498 * Write the current node with/without the anchor...
499 */
500
501 if (node->anchor)
502 {
503 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
504 node->filename, node->anchor,
505 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
506 node->text) < 0)
507 break;
508 }
509 else
510 {
511 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
512 node->filename, node->mtime,
513 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
514 node->section ? node->section : "", node->text) < 0)
515 break;
516 }
517
518 /*
519 * Then write the words associated with the node...
520 */
521
522 for (word = (help_word_t *)cupsArrayFirst(node->words);
523 word;
524 word = (help_word_t *)cupsArrayNext(node->words))
525 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
526 break;
527 }
528
529 cupsFileFlush(fp);
530
531 if (cupsFileClose(fp) < 0)
532 return (-1);
533 else if (node)
534 return (-1);
535 else
536 return (0);
537 }
538
539
540 /*
541 * 'helpSearchIndex()' - Search an index.
542 */
543
544 help_index_t * /* O - Search index */
545 helpSearchIndex(help_index_t *hi, /* I - Index */
546 const char *query, /* I - Query string */
547 const char *section, /* I - Limit search to this section */
548 const char *filename) /* I - Limit search to this file */
549 {
550 help_index_t *search; /* Search index */
551 help_node_t *node; /* Current node */
552 help_word_t *word; /* Current word */
553 void *sc; /* Search context */
554 int matches; /* Number of matches */
555
556
557 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
558 hi, query ? query : "(nil)",
559 filename ? filename : "(nil)"));
560
561 /*
562 * Range check...
563 */
564
565 if (!hi || !query)
566 return (NULL);
567
568 /*
569 * Reset the scores of all nodes to 0...
570 */
571
572 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
573 node;
574 node = (help_node_t *)cupsArrayNext(hi->nodes))
575 node->score = 0;
576
577 /*
578 * Find the first node to search in...
579 */
580
581 if (filename)
582 {
583 node = helpFindNode(hi, filename, NULL);
584 if (!node)
585 return (NULL);
586 }
587 else
588 node = (help_node_t *)cupsArrayFirst(hi->nodes);
589
590 /*
591 * Convert the query into a regular expression...
592 */
593
594 sc = cgiCompileSearch(query);
595 if (!sc)
596 return (NULL);
597
598 /*
599 * Allocate a search index...
600 */
601
602 search = calloc(1, sizeof(help_index_t));
603 if (!search)
604 {
605 cgiFreeSearch(sc);
606 return (NULL);
607 }
608
609 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
610 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
611
612 if (!search->nodes || !search->sorted)
613 {
614 cupsArrayDelete(search->nodes);
615 cupsArrayDelete(search->sorted);
616 free(search);
617 cgiFreeSearch(sc);
618 return (NULL);
619 }
620
621 search->search = 1;
622
623 /*
624 * Check each node in the index, adding matching nodes to the
625 * search index...
626 */
627
628 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
629 if (section && strcmp(node->section, section))
630 continue;
631 else if (filename && strcmp(node->filename, filename))
632 continue;
633 else
634 {
635 matches = cgiDoSearch(sc, node->text);
636
637 for (word = (help_word_t *)cupsArrayFirst(node->words);
638 word;
639 word = (help_word_t *)cupsArrayNext(node->words))
640 if (cgiDoSearch(sc, word->text) > 0)
641 matches += word->count;
642
643 if (matches > 0)
644 {
645 /*
646 * Found a match, add the node to the search index...
647 */
648
649 node->score = matches;
650
651 cupsArrayAdd(search->nodes, node);
652 cupsArrayAdd(search->sorted, node);
653 }
654 }
655
656 /*
657 * Free the search context...
658 */
659
660 cgiFreeSearch(sc);
661
662 /*
663 * Return the results...
664 */
665
666 return (search);
667 }
668
669
670 /*
671 * 'help_add_word()' - Add a word to a node.
672 */
673
674 static help_word_t * /* O - New word */
675 help_add_word(help_node_t *n, /* I - Node */
676 const char *text) /* I - Word text */
677 {
678 help_word_t *w, /* New word */
679 key; /* Search key */
680
681
682 DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
683
684 /*
685 * Create the words array as needed...
686 */
687
688 if (!n->words)
689 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
690
691 /*
692 * See if the word is already added...
693 */
694
695 key.text = (char *)text;
696
697 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
698 {
699 /*
700 * Create a new word...
701 */
702
703 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
704 return (NULL);
705
706 if ((w->text = strdup(text)) == NULL)
707 {
708 free(w);
709 return (NULL);
710 }
711
712 cupsArrayAdd(n->words, w);
713 }
714
715 /*
716 * Bump the counter for this word and return it...
717 */
718
719 w->count ++;
720
721 return (w);
722 }
723
724
725 /*
726 * 'help_delete_node()' - Free all memory used by a node.
727 */
728
729 static void
730 help_delete_node(help_node_t *n) /* I - Node */
731 {
732 help_word_t *w; /* Current word */
733
734
735 DEBUG_printf(("help_delete_node(n=%p)\n", n));
736
737 if (!n)
738 return;
739
740 if (n->filename)
741 free(n->filename);
742
743 if (n->anchor)
744 free(n->anchor);
745
746 if (n->section)
747 free(n->section);
748
749 if (n->text)
750 free(n->text);
751
752 for (w = (help_word_t *)cupsArrayFirst(n->words);
753 w;
754 w = (help_word_t *)cupsArrayNext(n->words))
755 help_delete_word(w);
756
757 cupsArrayDelete(n->words);
758
759 free(n);
760 }
761
762
763 /*
764 * 'help_delete_word()' - Free all memory used by a word.
765 */
766
767 static void
768 help_delete_word(help_word_t *w) /* I - Word */
769 {
770 DEBUG_printf(("help_delete_word(w=%p)\n", w));
771
772 if (!w)
773 return;
774
775 if (w->text)
776 free(w->text);
777
778 free(w);
779 }
780
781
782 /*
783 * 'help_load_directory()' - Load a directory of files into an index.
784 */
785
786 static int /* O - 0 = success, -1 = error, 1 = updated */
787 help_load_directory(
788 help_index_t *hi, /* I - Index */
789 const char *directory, /* I - Directory */
790 const char *relative) /* I - Relative path */
791 {
792 cups_dir_t *dir; /* Directory file */
793 cups_dentry_t *dent; /* Directory entry */
794 char *ext, /* Pointer to extension */
795 filename[1024], /* Full filename */
796 relname[1024]; /* Relative filename */
797 int update; /* Updated? */
798 help_node_t *node; /* Current node */
799
800
801 DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
802 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
803
804 /*
805 * Open the directory and scan it...
806 */
807
808 if ((dir = cupsDirOpen(directory)) == NULL)
809 return (0);
810
811 update = 0;
812
813 while ((dent = cupsDirRead(dir)) != NULL)
814 {
815 /*
816 * Skip "." files...
817 */
818
819 if (dent->filename[0] == '.')
820 continue;
821
822 /*
823 * Get absolute and relative filenames...
824 */
825
826 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
827 if (relative)
828 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
829 else
830 strlcpy(relname, dent->filename, sizeof(relname));
831
832 /*
833 * Check if we have a HTML file...
834 */
835
836 if ((ext = strstr(dent->filename, ".html")) != NULL &&
837 (!ext[5] || !strcmp(ext + 5, ".gz")))
838 {
839 /*
840 * HTML file, see if we have already indexed the file...
841 */
842
843 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
844 {
845 /*
846 * File already indexed - check dates to confirm that the
847 * index is up-to-date...
848 */
849
850 if (node->mtime == dent->fileinfo.st_mtime)
851 {
852 /*
853 * Same modification time, so mark all of the nodes
854 * for this file as up-to-date...
855 */
856
857 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
858 if (!strcmp(node->filename, relname))
859 node->score = 0;
860 else
861 break;
862
863 continue;
864 }
865 }
866
867 update = 1;
868
869 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
870 }
871 else if (S_ISDIR(dent->fileinfo.st_mode))
872 {
873 /*
874 * Process sub-directory...
875 */
876
877 if (help_load_directory(hi, filename, relname) == 1)
878 update = 1;
879 }
880 }
881
882 cupsDirClose(dir);
883
884 return (update);
885 }
886
887
888 /*
889 * 'help_load_file()' - Load a HTML files into an index.
890 */
891
892 static int /* O - 0 = success, -1 = error */
893 help_load_file(
894 help_index_t *hi, /* I - Index */
895 const char *filename, /* I - Filename */
896 const char *relative, /* I - Relative path */
897 time_t mtime) /* I - Modification time */
898 {
899 cups_file_t *fp; /* HTML file */
900 help_node_t *node; /* Current node */
901 char line[1024], /* Line from file */
902 section[1024], /* Section */
903 *ptr, /* Pointer into line */
904 *anchor, /* Anchor name */
905 *text; /* Text for anchor */
906 off_t offset; /* File offset */
907 char quote; /* Quote character */
908 help_word_t *word; /* Current word */
909 int wordlen; /* Length of word */
910
911
912 DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
913 hi, filename ? filename : "(nil)",
914 relative ? relative : "(nil)", mtime));
915
916 if ((fp = cupsFileOpen(filename, "r")) == NULL)
917 return (-1);
918
919 node = NULL;
920 offset = 0;
921
922 strcpy(section, "Other");
923
924 while (cupsFileGets(fp, line, sizeof(line)))
925 {
926 /*
927 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
928 */
929
930 if (!strncasecmp(line, "<!-- SECTION:", 13))
931 {
932 /*
933 * Got section line, copy it!
934 */
935
936 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
937
938 strlcpy(section, ptr, sizeof(section));
939 if ((ptr = strstr(section, "-->")) != NULL)
940 {
941 /*
942 * Strip comment stuff from end of line...
943 */
944
945 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
946
947 if (isspace(*ptr & 255))
948 *ptr = '\0';
949 }
950 continue;
951 }
952
953 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
954 {
955 ptr ++;
956
957 if (!strncasecmp(ptr, "TITLE>", 6))
958 {
959 /*
960 * Found the title...
961 */
962
963 anchor = NULL;
964 ptr += 6;
965 }
966 else if (!strncasecmp(ptr, "A NAME=", 7))
967 {
968 /*
969 * Found an anchor...
970 */
971
972 ptr += 7;
973
974 if (*ptr == '\"' || *ptr == '\'')
975 {
976 /*
977 * Get quoted anchor...
978 */
979
980 quote = *ptr;
981 anchor = ptr + 1;
982 if ((ptr = strchr(anchor, quote)) != NULL)
983 *ptr++ = '\0';
984 else
985 break;
986 }
987 else
988 {
989 /*
990 * Get unquoted anchor...
991 */
992
993 anchor = ptr + 1;
994
995 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
996
997 if (*ptr)
998 *ptr++ = '\0';
999 else
1000 break;
1001 }
1002
1003 /*
1004 * Got the anchor, now lets find the end...
1005 */
1006
1007 while (*ptr && *ptr != '>')
1008 ptr ++;
1009
1010 if (*ptr != '>')
1011 break;
1012
1013 ptr ++;
1014 }
1015 else
1016 continue;
1017
1018 /*
1019 * Now collect text for the link...
1020 */
1021
1022 text = ptr;
1023 while ((ptr = strchr(text, '<')) == NULL)
1024 {
1025 ptr = text + strlen(text);
1026 if (ptr >= (line + sizeof(line) - 2))
1027 break;
1028
1029 *ptr++ = ' ';
1030
1031 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1032 break;
1033 }
1034
1035 *ptr = '\0';
1036
1037 if (node)
1038 node->length = offset - node->offset;
1039
1040 if (!*text)
1041 {
1042 node = NULL;
1043 break;
1044 }
1045
1046 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1047 {
1048 /*
1049 * Node already in the index, so replace the text and other
1050 * data...
1051 */
1052
1053 cupsArrayRemove(hi->nodes, node);
1054
1055 if (node->section)
1056 free(node->section);
1057
1058 if (node->text)
1059 free(node->text);
1060
1061 if (node->words)
1062 {
1063 for (word = (help_word_t *)cupsArrayFirst(node->words);
1064 word;
1065 word = (help_word_t *)cupsArrayNext(node->words))
1066 help_delete_word(word);
1067
1068 cupsArrayDelete(node->words);
1069 node->words = NULL;
1070 }
1071
1072 node->section = section[0] ? strdup(section) : NULL;
1073 node->text = strdup(text);
1074 node->mtime = mtime;
1075 node->offset = offset;
1076 node->score = 0;
1077 }
1078 else
1079 {
1080 /*
1081 * New node...
1082 */
1083
1084 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1085 }
1086
1087 /*
1088 * Go through the text value and replace tabs and newlines with
1089 * whitespace and eliminate extra whitespace...
1090 */
1091
1092 for (ptr = node->text, text = node->text; *ptr;)
1093 if (isspace(*ptr & 255))
1094 {
1095 while (isspace(*ptr & 255))
1096 ptr ++;
1097
1098 *text++ = ' ';
1099 }
1100 else if (text != ptr)
1101 *text++ = *ptr++;
1102 else
1103 {
1104 text ++;
1105 ptr ++;
1106 }
1107
1108 *text = '\0';
1109
1110 /*
1111 * (Re)add the node to the array...
1112 */
1113
1114 cupsArrayAdd(hi->nodes, node);
1115
1116 if (!anchor)
1117 node = NULL;
1118 break;
1119 }
1120
1121 if (node)
1122 {
1123 /*
1124 * Scan this line for words...
1125 */
1126
1127 for (ptr = line; *ptr; ptr ++)
1128 {
1129 /*
1130 * Skip HTML stuff...
1131 */
1132
1133 if (*ptr == '<')
1134 {
1135 if (!strncmp(ptr, "<!--", 4))
1136 {
1137 /*
1138 * Skip HTML comment...
1139 */
1140
1141 if ((text = strstr(ptr + 4, "-->")) == NULL)
1142 ptr += strlen(ptr) - 1;
1143 else
1144 ptr = text + 2;
1145 }
1146 else
1147 {
1148 /*
1149 * Skip HTML element...
1150 */
1151
1152 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1153 if (*ptr == '\"' || *ptr == '\'')
1154 {
1155 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1156
1157 if (!*ptr)
1158 ptr --;
1159 }
1160
1161 if (!*ptr)
1162 ptr --;
1163 }
1164
1165 continue;
1166 }
1167 else if (*ptr == '&')
1168 {
1169 /*
1170 * Skip HTML entity...
1171 */
1172
1173 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1174
1175 if (!*ptr)
1176 ptr --;
1177
1178 continue;
1179 }
1180 else if (!isalnum(*ptr & 255))
1181 continue;
1182
1183 /*
1184 * Found the start of a word, search until we find the end...
1185 */
1186
1187 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1188
1189 wordlen = ptr - text;
1190
1191 if (*ptr)
1192 *ptr = '\0';
1193 else
1194 ptr --;
1195
1196 if (wordlen > 1 && !bsearch(text, help_common_words,
1197 (sizeof(help_common_words) /
1198 sizeof(help_common_words[0])),
1199 sizeof(help_common_words[0]),
1200 (int (*)(const void *, const void *))
1201 strcasecmp))
1202 help_add_word(node, text);
1203 }
1204 }
1205
1206 /*
1207 * Get the offset of the next line...
1208 */
1209
1210 offset = cupsFileTell(fp);
1211 }
1212
1213 cupsFileClose(fp);
1214
1215 if (node)
1216 node->length = offset - node->offset;
1217
1218 return (0);
1219 }
1220
1221
1222 /*
1223 * 'help_new_node()' - Create a new node and add it to an index.
1224 */
1225
1226 static help_node_t * /* O - Node pointer or NULL on error */
1227 help_new_node(const char *filename, /* I - Filename */
1228 const char *anchor, /* I - Anchor */
1229 const char *section, /* I - Section */
1230 const char *text, /* I - Text */
1231 time_t mtime, /* I - Modification time */
1232 off_t offset, /* I - Offset in file */
1233 size_t length) /* I - Length in bytes */
1234 {
1235 help_node_t *n; /* Node */
1236
1237
1238 DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1239 "mtime=%ld, offset=%ld, length=%ld)\n",
1240 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
1241 text ? text : "(nil)", (long)mtime, (long)offset,
1242 (long)length));
1243
1244 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1245 if (!n)
1246 return (NULL);
1247
1248 n->filename = strdup(filename);
1249 n->anchor = anchor ? strdup(anchor) : NULL;
1250 n->section = (section && *section) ? strdup(section) : NULL;
1251 n->text = strdup(text);
1252 n->mtime = mtime;
1253 n->offset = offset;
1254 n->length = length;
1255
1256 return (n);
1257 }
1258
1259
1260 /*
1261 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1262 */
1263
1264 static int /* O - Difference */
1265 help_sort_by_name(help_node_t *n1, /* I - First node */
1266 help_node_t *n2) /* I - Second node */
1267 {
1268 int diff; /* Difference */
1269
1270
1271 DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1272 n1, n1->filename, n1->anchor ? n1->anchor : "",
1273 n2, n2->filename, n2->anchor ? n2->anchor : ""));
1274
1275 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1276 return (diff);
1277
1278 if (!n1->anchor && !n2->anchor)
1279 return (0);
1280 else if (!n1->anchor)
1281 return (-1);
1282 else if (!n2->anchor)
1283 return (1);
1284 else
1285 return (strcmp(n1->anchor, n2->anchor));
1286 }
1287
1288
1289 /*
1290 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1291 */
1292
1293 static int /* O - Difference */
1294 help_sort_by_score(help_node_t *n1, /* I - First node */
1295 help_node_t *n2) /* I - Second node */
1296 {
1297 int diff; /* Difference */
1298
1299
1300 DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1301 "n2=%p(%d \"%s\" \"%s\")\n",
1302 n1, n1->score, n1->section ? n1->section : "", n1->text,
1303 n2, n2->score, n2->section ? n2->section : "", n2->text));
1304
1305 if (n1->score != n2->score)
1306 return (n1->score - n2->score);
1307
1308 if (n1->section && !n2->section)
1309 return (1);
1310 else if (!n1->section && n2->section)
1311 return (-1);
1312 else if (n1->section && n2->section &&
1313 (diff = strcmp(n1->section, n2->section)) != 0)
1314 return (diff);
1315
1316 return (strcasecmp(n1->text, n2->text));
1317 }
1318
1319
1320 /*
1321 * 'help_sort_words()' - Sort words alphabetically.
1322 */
1323
1324 static int /* O - Difference */
1325 help_sort_words(help_word_t *w1, /* I - Second word */
1326 help_word_t *w2) /* I - Second word */
1327 {
1328 DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1329 w1, w1->text, w2, w2->text));
1330
1331 return (strcasecmp(w1->text, w2->text));
1332 }
1333
1334
1335 /*
1336 * End of "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $".
1337 */