]> git.ipfire.org Git - thirdparty/cups.git/blob - cgi-bin/help-index.c
Merge changes from CUPS 1.6svn-r9968.
[thirdparty/cups.git] / cgi-bin / help-index.c
1 /*
2 * "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $"
3 *
4 * Online help index routines for CUPS.
5 *
6 * Copyright 2007-2011 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * Contents:
16 *
17 * helpDeleteIndex() - Delete an index, freeing all memory used.
18 * helpFindNode() - Find a node in an index.
19 * helpLoadIndex() - Load a help index from disk.
20 * helpSaveIndex() - Save a help index to disk.
21 * helpSearchIndex() - Search an index.
22 * help_add_word() - Add a word to a node.
23 * help_compile_search() - Convert a search string into a regular expression.
24 * help_delete_node() - Free all memory used by a node.
25 * help_delete_word() - Free all memory used by a word.
26 * help_load_directory() - Load a directory of files into an index.
27 * help_load_file() - Load a HTML files into an index.
28 * help_new_node() - Create a new node and add it to an index.
29 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
30 * help_sort_nodes_by_score() - Sort nodes by score and text.
31 * help_sort_words() - Sort words alphabetically.
32 */
33
34 /*
35 * Include necessary headers...
36 */
37
38 #include "cgi-private.h"
39 #include <cups/dir.h>
40
41
42 /*
43 * List of common English words that should not be indexed...
44 */
45
46 static char help_common_words[][6] =
47 {
48 "about",
49 "all",
50 "an",
51 "and",
52 "are",
53 "as",
54 "at",
55 "be",
56 "been",
57 "but",
58 "by",
59 "call",
60 "can",
61 "come",
62 "could",
63 "day",
64 "did",
65 "do",
66 "down",
67 "each",
68 "find",
69 "first",
70 "for",
71 "from",
72 "go",
73 "had",
74 "has",
75 "have",
76 "he",
77 "her",
78 "him",
79 "his",
80 "hot",
81 "how",
82 "if",
83 "in",
84 "is",
85 "it",
86 "know",
87 "like",
88 "long",
89 "look",
90 "make",
91 "many",
92 "may",
93 "more",
94 "most",
95 "my",
96 "no",
97 "now",
98 "of",
99 "on",
100 "one",
101 "or",
102 "other",
103 "out",
104 "over",
105 "said",
106 "see",
107 "she",
108 "side",
109 "so",
110 "some",
111 "sound",
112 "than",
113 "that",
114 "the",
115 "their",
116 "them",
117 "then",
118 "there",
119 "these",
120 "they",
121 "thing",
122 "this",
123 "time",
124 "to",
125 "two",
126 "up",
127 "use",
128 "was",
129 "water",
130 "way",
131 "we",
132 "were",
133 "what",
134 "when",
135 "which",
136 "who",
137 "will",
138 "with",
139 "word",
140 "would",
141 "write",
142 "you",
143 "your"
144 };
145
146
147 /*
148 * Local functions...
149 */
150
151 static help_word_t *help_add_word(help_node_t *n, const char *text);
152 static void help_delete_node(help_node_t *n);
153 static void help_delete_word(help_word_t *w);
154 static int help_load_directory(help_index_t *hi,
155 const char *directory,
156 const char *relative);
157 static int help_load_file(help_index_t *hi,
158 const char *filename,
159 const char *relative,
160 time_t mtime);
161 static help_node_t *help_new_node(const char *filename, const char *anchor,
162 const char *section, const char *text,
163 time_t mtime, off_t offset,
164 size_t length)
165 __attribute__((nonnull(1,3,4)));
166 static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
167 static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
168 static int help_sort_words(help_word_t *w1, help_word_t *w2);
169
170
171 /*
172 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
173 */
174
175 void
176 helpDeleteIndex(help_index_t *hi) /* I - Help index */
177 {
178 help_node_t *node; /* Current node */
179
180
181 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
182
183 if (!hi)
184 return;
185
186 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
187 node;
188 node = (help_node_t *)cupsArrayNext(hi->nodes))
189 {
190 if (!hi->search)
191 help_delete_node(node);
192 }
193
194 cupsArrayDelete(hi->nodes);
195 cupsArrayDelete(hi->sorted);
196
197 free(hi);
198 }
199
200
201 /*
202 * 'helpFindNode()' - Find a node in an index.
203 */
204
205 help_node_t * /* O - Node pointer or NULL */
206 helpFindNode(help_index_t *hi, /* I - Index */
207 const char *filename, /* I - Filename */
208 const char *anchor) /* I - Anchor */
209 {
210 help_node_t key; /* Search key */
211
212
213 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
214 hi, filename, anchor));
215
216 /*
217 * Range check input...
218 */
219
220 if (!hi || !filename)
221 return (NULL);
222
223 /*
224 * Initialize the search key...
225 */
226
227 key.filename = (char *)filename;
228 key.anchor = (char *)anchor;
229
230 /*
231 * Return any match...
232 */
233
234 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
235 }
236
237
238 /*
239 * 'helpLoadIndex()' - Load a help index from disk.
240 */
241
242 help_index_t * /* O - Index pointer or NULL */
243 helpLoadIndex(const char *hifile, /* I - Index filename */
244 const char *directory) /* I - Directory that is indexed */
245 {
246 help_index_t *hi; /* Help index */
247 cups_file_t *fp; /* Current file */
248 char line[2048], /* Line from file */
249 *ptr, /* Pointer into line */
250 *filename, /* Filename in line */
251 *anchor, /* Anchor in line */
252 *sectptr, /* Section pointer in line */
253 section[1024], /* Section name */
254 *text; /* Text in line */
255 time_t mtime; /* Modification time */
256 off_t offset; /* Offset into file */
257 size_t length; /* Length in bytes */
258 int update; /* Update? */
259 help_node_t *node; /* Current node */
260 help_word_t *word; /* Current word */
261
262
263 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
264 hifile, directory));
265
266 /*
267 * Create a new, empty index.
268 */
269
270 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
271 return (NULL);
272
273 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
274 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
275
276 if (!hi->nodes || !hi->sorted)
277 {
278 cupsArrayDelete(hi->nodes);
279 cupsArrayDelete(hi->sorted);
280 free(hi);
281 return (NULL);
282 }
283
284 /*
285 * Try loading the existing index file...
286 */
287
288 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
289 {
290 /*
291 * Lock the file and then read the first line...
292 */
293
294 cupsFileLock(fp, 1);
295
296 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
297 {
298 /*
299 * Got a valid header line, now read the data lines...
300 */
301
302 node = NULL;
303
304 while (cupsFileGets(fp, line, sizeof(line)))
305 {
306 /*
307 * Each line looks like one of the following:
308 *
309 * filename mtime offset length "section" "text"
310 * filename#anchor offset length "text"
311 * SP count word
312 */
313
314 if (line[0] == ' ')
315 {
316 /*
317 * Read a word in the current node...
318 */
319
320 if (!node || (ptr = strrchr(line, ' ')) == NULL)
321 continue;
322
323 if ((word = help_add_word(node, ptr + 1)) != NULL)
324 word->count = atoi(line + 1);
325 }
326 else
327 {
328 /*
329 * Add a node...
330 */
331
332 filename = line;
333
334 if ((ptr = strchr(line, ' ')) == NULL)
335 break;
336
337 while (isspace(*ptr & 255))
338 *ptr++ = '\0';
339
340 if ((anchor = strrchr(filename, '#')) != NULL)
341 {
342 *anchor++ = '\0';
343 mtime = 0;
344 }
345 else
346 mtime = strtol(ptr, &ptr, 10);
347
348 offset = strtoll(ptr, &ptr, 10);
349 length = strtoll(ptr, &ptr, 10);
350
351 while (isspace(*ptr & 255))
352 ptr ++;
353
354 if (!anchor)
355 {
356 /*
357 * Get section...
358 */
359
360 if (*ptr != '\"')
361 break;
362
363 ptr ++;
364 sectptr = ptr;
365
366 while (*ptr && *ptr != '\"')
367 ptr ++;
368
369 if (*ptr != '\"')
370 break;
371
372 *ptr++ = '\0';
373
374 strlcpy(section, sectptr, sizeof(section));
375
376 while (isspace(*ptr & 255))
377 ptr ++;
378 }
379
380 if (*ptr != '\"')
381 break;
382
383 ptr ++;
384 text = ptr;
385
386 while (*ptr && *ptr != '\"')
387 ptr ++;
388
389 if (*ptr != '\"')
390 break;
391
392 *ptr++ = '\0';
393
394 if ((node = help_new_node(filename, anchor, section, text,
395 mtime, offset, length)) == NULL)
396 break;
397
398 node->score = -1;
399
400 cupsArrayAdd(hi->nodes, node);
401 }
402 }
403 }
404
405 cupsFileClose(fp);
406 }
407
408 /*
409 * Scan for new/updated files...
410 */
411
412 update = help_load_directory(hi, directory, NULL);
413
414 /*
415 * Remove any files that are no longer installed...
416 */
417
418 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
419 node;
420 node = (help_node_t *)cupsArrayNext(hi->nodes))
421 if (node->score < 0)
422 {
423 /*
424 * Delete this node...
425 */
426
427 cupsArrayRemove(hi->nodes, node);
428 help_delete_node(node);
429 }
430
431 /*
432 * Add nodes to the sorted array...
433 */
434
435 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
436 node;
437 node = (help_node_t *)cupsArrayNext(hi->nodes))
438 cupsArrayAdd(hi->sorted, node);
439
440 /*
441 * Save the index if we updated it...
442 */
443
444 if (update)
445 helpSaveIndex(hi, hifile);
446
447 /*
448 * Return the index...
449 */
450
451 return (hi);
452 }
453
454
455 /*
456 * 'helpSaveIndex()' - Save a help index to disk.
457 */
458
459 int /* O - 0 on success, -1 on error */
460 helpSaveIndex(help_index_t *hi, /* I - Index */
461 const char *hifile) /* I - Index filename */
462 {
463 cups_file_t *fp; /* Index file */
464 help_node_t *node; /* Current node */
465 help_word_t *word; /* Current word */
466
467
468 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
469
470 /*
471 * Try creating a new index file...
472 */
473
474 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
475 return (-1);
476
477 /*
478 * Lock the file while we write it...
479 */
480
481 cupsFileLock(fp, 1);
482
483 cupsFilePuts(fp, "HELPV2\n");
484
485 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
486 node;
487 node = (help_node_t *)cupsArrayNext(hi->nodes))
488 {
489 /*
490 * Write the current node with/without the anchor...
491 */
492
493 if (node->anchor)
494 {
495 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
496 node->filename, node->anchor,
497 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
498 node->text) < 0)
499 break;
500 }
501 else
502 {
503 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
504 node->filename, (int)node->mtime,
505 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
506 node->section ? node->section : "", node->text) < 0)
507 break;
508 }
509
510 /*
511 * Then write the words associated with the node...
512 */
513
514 for (word = (help_word_t *)cupsArrayFirst(node->words);
515 word;
516 word = (help_word_t *)cupsArrayNext(node->words))
517 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
518 break;
519 }
520
521 cupsFileFlush(fp);
522
523 if (cupsFileClose(fp) < 0)
524 return (-1);
525 else if (node)
526 return (-1);
527 else
528 return (0);
529 }
530
531
532 /*
533 * 'helpSearchIndex()' - Search an index.
534 */
535
536 help_index_t * /* O - Search index */
537 helpSearchIndex(help_index_t *hi, /* I - Index */
538 const char *query, /* I - Query string */
539 const char *section, /* I - Limit search to this section */
540 const char *filename) /* I - Limit search to this file */
541 {
542 help_index_t *search; /* Search index */
543 help_node_t *node; /* Current node */
544 help_word_t *word; /* Current word */
545 void *sc; /* Search context */
546 int matches; /* Number of matches */
547
548
549 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
550 hi, query, filename));
551
552 /*
553 * Range check...
554 */
555
556 if (!hi || !query)
557 return (NULL);
558
559 /*
560 * Reset the scores of all nodes to 0...
561 */
562
563 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
564 node;
565 node = (help_node_t *)cupsArrayNext(hi->nodes))
566 node->score = 0;
567
568 /*
569 * Find the first node to search in...
570 */
571
572 if (filename)
573 {
574 node = helpFindNode(hi, filename, NULL);
575 if (!node)
576 return (NULL);
577 }
578 else
579 node = (help_node_t *)cupsArrayFirst(hi->nodes);
580
581 /*
582 * Convert the query into a regular expression...
583 */
584
585 sc = cgiCompileSearch(query);
586 if (!sc)
587 return (NULL);
588
589 /*
590 * Allocate a search index...
591 */
592
593 search = calloc(1, sizeof(help_index_t));
594 if (!search)
595 {
596 cgiFreeSearch(sc);
597 return (NULL);
598 }
599
600 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
601 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
602
603 if (!search->nodes || !search->sorted)
604 {
605 cupsArrayDelete(search->nodes);
606 cupsArrayDelete(search->sorted);
607 free(search);
608 cgiFreeSearch(sc);
609 return (NULL);
610 }
611
612 search->search = 1;
613
614 /*
615 * Check each node in the index, adding matching nodes to the
616 * search index...
617 */
618
619 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
620 if (section && strcmp(node->section, section))
621 continue;
622 else if (filename && strcmp(node->filename, filename))
623 continue;
624 else
625 {
626 matches = cgiDoSearch(sc, node->text);
627
628 for (word = (help_word_t *)cupsArrayFirst(node->words);
629 word;
630 word = (help_word_t *)cupsArrayNext(node->words))
631 if (cgiDoSearch(sc, word->text) > 0)
632 matches += word->count;
633
634 if (matches > 0)
635 {
636 /*
637 * Found a match, add the node to the search index...
638 */
639
640 node->score = matches;
641
642 cupsArrayAdd(search->nodes, node);
643 cupsArrayAdd(search->sorted, node);
644 }
645 }
646
647 /*
648 * Free the search context...
649 */
650
651 cgiFreeSearch(sc);
652
653 /*
654 * Return the results...
655 */
656
657 return (search);
658 }
659
660
661 /*
662 * 'help_add_word()' - Add a word to a node.
663 */
664
665 static help_word_t * /* O - New word */
666 help_add_word(help_node_t *n, /* I - Node */
667 const char *text) /* I - Word text */
668 {
669 help_word_t *w, /* New word */
670 key; /* Search key */
671
672
673 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
674
675 /*
676 * Create the words array as needed...
677 */
678
679 if (!n->words)
680 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
681
682 /*
683 * See if the word is already added...
684 */
685
686 key.text = (char *)text;
687
688 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
689 {
690 /*
691 * Create a new word...
692 */
693
694 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
695 return (NULL);
696
697 if ((w->text = strdup(text)) == NULL)
698 {
699 free(w);
700 return (NULL);
701 }
702
703 cupsArrayAdd(n->words, w);
704 }
705
706 /*
707 * Bump the counter for this word and return it...
708 */
709
710 w->count ++;
711
712 return (w);
713 }
714
715
716 /*
717 * 'help_delete_node()' - Free all memory used by a node.
718 */
719
720 static void
721 help_delete_node(help_node_t *n) /* I - Node */
722 {
723 help_word_t *w; /* Current word */
724
725
726 DEBUG_printf(("2help_delete_node(n=%p)", n));
727
728 if (!n)
729 return;
730
731 if (n->filename)
732 free(n->filename);
733
734 if (n->anchor)
735 free(n->anchor);
736
737 if (n->section)
738 free(n->section);
739
740 if (n->text)
741 free(n->text);
742
743 for (w = (help_word_t *)cupsArrayFirst(n->words);
744 w;
745 w = (help_word_t *)cupsArrayNext(n->words))
746 help_delete_word(w);
747
748 cupsArrayDelete(n->words);
749
750 free(n);
751 }
752
753
754 /*
755 * 'help_delete_word()' - Free all memory used by a word.
756 */
757
758 static void
759 help_delete_word(help_word_t *w) /* I - Word */
760 {
761 DEBUG_printf(("2help_delete_word(w=%p)", w));
762
763 if (!w)
764 return;
765
766 if (w->text)
767 free(w->text);
768
769 free(w);
770 }
771
772
773 /*
774 * 'help_load_directory()' - Load a directory of files into an index.
775 */
776
777 static int /* O - 0 = success, -1 = error, 1 = updated */
778 help_load_directory(
779 help_index_t *hi, /* I - Index */
780 const char *directory, /* I - Directory */
781 const char *relative) /* I - Relative path */
782 {
783 cups_dir_t *dir; /* Directory file */
784 cups_dentry_t *dent; /* Directory entry */
785 char *ext, /* Pointer to extension */
786 filename[1024], /* Full filename */
787 relname[1024]; /* Relative filename */
788 int update; /* Updated? */
789 help_node_t *node; /* Current node */
790
791
792 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
793 hi, directory, relative));
794
795 /*
796 * Open the directory and scan it...
797 */
798
799 if ((dir = cupsDirOpen(directory)) == NULL)
800 return (0);
801
802 update = 0;
803
804 while ((dent = cupsDirRead(dir)) != NULL)
805 {
806 /*
807 * Skip "." files...
808 */
809
810 if (dent->filename[0] == '.')
811 continue;
812
813 /*
814 * Get absolute and relative filenames...
815 */
816
817 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
818 if (relative)
819 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
820 else
821 strlcpy(relname, dent->filename, sizeof(relname));
822
823 /*
824 * Check if we have a HTML file...
825 */
826
827 if ((ext = strstr(dent->filename, ".html")) != NULL &&
828 (!ext[5] || !strcmp(ext + 5, ".gz")))
829 {
830 /*
831 * HTML file, see if we have already indexed the file...
832 */
833
834 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
835 {
836 /*
837 * File already indexed - check dates to confirm that the
838 * index is up-to-date...
839 */
840
841 if (node->mtime == dent->fileinfo.st_mtime)
842 {
843 /*
844 * Same modification time, so mark all of the nodes
845 * for this file as up-to-date...
846 */
847
848 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
849 if (!strcmp(node->filename, relname))
850 node->score = 0;
851 else
852 break;
853
854 continue;
855 }
856 }
857
858 update = 1;
859
860 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
861 }
862 else if (S_ISDIR(dent->fileinfo.st_mode))
863 {
864 /*
865 * Process sub-directory...
866 */
867
868 if (help_load_directory(hi, filename, relname) == 1)
869 update = 1;
870 }
871 }
872
873 cupsDirClose(dir);
874
875 return (update);
876 }
877
878
879 /*
880 * 'help_load_file()' - Load a HTML files into an index.
881 */
882
883 static int /* O - 0 = success, -1 = error */
884 help_load_file(
885 help_index_t *hi, /* I - Index */
886 const char *filename, /* I - Filename */
887 const char *relative, /* I - Relative path */
888 time_t mtime) /* I - Modification time */
889 {
890 cups_file_t *fp; /* HTML file */
891 help_node_t *node; /* Current node */
892 char line[1024], /* Line from file */
893 temp[1024], /* Temporary word */
894 section[1024], /* Section */
895 *ptr, /* Pointer into line */
896 *anchor, /* Anchor name */
897 *text; /* Text for anchor */
898 off_t offset; /* File offset */
899 char quote; /* Quote character */
900 help_word_t *word; /* Current word */
901 int wordlen; /* Length of word */
902
903
904 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
905 "mtime=%ld)", hi, filename, relative, mtime));
906
907 if ((fp = cupsFileOpen(filename, "r")) == NULL)
908 return (-1);
909
910 node = NULL;
911 offset = 0;
912
913 strcpy(section, "Other");
914
915 while (cupsFileGets(fp, line, sizeof(line)))
916 {
917 /*
918 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
919 */
920
921 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
922 {
923 /*
924 * Got section line, copy it!
925 */
926
927 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
928
929 strlcpy(section, ptr, sizeof(section));
930 if ((ptr = strstr(section, "-->")) != NULL)
931 {
932 /*
933 * Strip comment stuff from end of line...
934 */
935
936 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
937
938 if (isspace(*ptr & 255))
939 *ptr = '\0';
940 }
941 continue;
942 }
943
944 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
945 {
946 ptr ++;
947
948 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
949 {
950 /*
951 * Found the title...
952 */
953
954 anchor = NULL;
955 ptr += 6;
956 }
957 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
958 {
959 /*
960 * Found an anchor...
961 */
962
963 ptr += 7;
964
965 if (*ptr == '\"' || *ptr == '\'')
966 {
967 /*
968 * Get quoted anchor...
969 */
970
971 quote = *ptr;
972 anchor = ptr + 1;
973 if ((ptr = strchr(anchor, quote)) != NULL)
974 *ptr++ = '\0';
975 else
976 break;
977 }
978 else
979 {
980 /*
981 * Get unquoted anchor...
982 */
983
984 anchor = ptr + 1;
985
986 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
987
988 if (*ptr)
989 *ptr++ = '\0';
990 else
991 break;
992 }
993
994 /*
995 * Got the anchor, now lets find the end...
996 */
997
998 while (*ptr && *ptr != '>')
999 ptr ++;
1000
1001 if (*ptr != '>')
1002 break;
1003
1004 ptr ++;
1005 }
1006 else
1007 continue;
1008
1009 /*
1010 * Now collect text for the link...
1011 */
1012
1013 text = ptr;
1014 while ((ptr = strchr(text, '<')) == NULL)
1015 {
1016 ptr = text + strlen(text);
1017 if (ptr >= (line + sizeof(line) - 2))
1018 break;
1019
1020 *ptr++ = ' ';
1021
1022 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1023 break;
1024 }
1025
1026 *ptr = '\0';
1027
1028 if (node)
1029 node->length = offset - node->offset;
1030
1031 if (!*text)
1032 {
1033 node = NULL;
1034 break;
1035 }
1036
1037 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1038 {
1039 /*
1040 * Node already in the index, so replace the text and other
1041 * data...
1042 */
1043
1044 cupsArrayRemove(hi->nodes, node);
1045
1046 if (node->section)
1047 free(node->section);
1048
1049 if (node->text)
1050 free(node->text);
1051
1052 if (node->words)
1053 {
1054 for (word = (help_word_t *)cupsArrayFirst(node->words);
1055 word;
1056 word = (help_word_t *)cupsArrayNext(node->words))
1057 help_delete_word(word);
1058
1059 cupsArrayDelete(node->words);
1060 node->words = NULL;
1061 }
1062
1063 node->section = section[0] ? strdup(section) : NULL;
1064 node->text = strdup(text);
1065 node->mtime = mtime;
1066 node->offset = offset;
1067 node->score = 0;
1068 }
1069 else
1070 {
1071 /*
1072 * New node...
1073 */
1074
1075 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1076 }
1077
1078 /*
1079 * Go through the text value and replace tabs and newlines with
1080 * whitespace and eliminate extra whitespace...
1081 */
1082
1083 for (ptr = node->text, text = node->text; *ptr;)
1084 if (isspace(*ptr & 255))
1085 {
1086 while (isspace(*ptr & 255))
1087 ptr ++;
1088
1089 *text++ = ' ';
1090 }
1091 else if (text != ptr)
1092 *text++ = *ptr++;
1093 else
1094 {
1095 text ++;
1096 ptr ++;
1097 }
1098
1099 *text = '\0';
1100
1101 /*
1102 * (Re)add the node to the array...
1103 */
1104
1105 cupsArrayAdd(hi->nodes, node);
1106
1107 if (!anchor)
1108 node = NULL;
1109 break;
1110 }
1111
1112 if (node)
1113 {
1114 /*
1115 * Scan this line for words...
1116 */
1117
1118 for (ptr = line; *ptr; ptr ++)
1119 {
1120 /*
1121 * Skip HTML stuff...
1122 */
1123
1124 if (*ptr == '<')
1125 {
1126 if (!strncmp(ptr, "<!--", 4))
1127 {
1128 /*
1129 * Skip HTML comment...
1130 */
1131
1132 if ((text = strstr(ptr + 4, "-->")) == NULL)
1133 ptr += strlen(ptr) - 1;
1134 else
1135 ptr = text + 2;
1136 }
1137 else
1138 {
1139 /*
1140 * Skip HTML element...
1141 */
1142
1143 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1144 {
1145 if (*ptr == '\"' || *ptr == '\'')
1146 {
1147 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1148
1149 if (!*ptr)
1150 ptr --;
1151 }
1152 }
1153
1154 if (!*ptr)
1155 ptr --;
1156 }
1157
1158 continue;
1159 }
1160 else if (*ptr == '&')
1161 {
1162 /*
1163 * Skip HTML entity...
1164 */
1165
1166 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1167
1168 if (!*ptr)
1169 ptr --;
1170
1171 continue;
1172 }
1173 else if (!isalnum(*ptr & 255))
1174 continue;
1175
1176 /*
1177 * Found the start of a word, search until we find the end...
1178 */
1179
1180 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1181
1182 wordlen = ptr - text;
1183
1184 memcpy(temp, text, wordlen);
1185 temp[wordlen] = '\0';
1186
1187 ptr --;
1188
1189 if (wordlen > 1 && !bsearch(temp, help_common_words,
1190 (sizeof(help_common_words) /
1191 sizeof(help_common_words[0])),
1192 sizeof(help_common_words[0]),
1193 (int (*)(const void *, const void *))
1194 _cups_strcasecmp))
1195 help_add_word(node, temp);
1196 }
1197 }
1198
1199 /*
1200 * Get the offset of the next line...
1201 */
1202
1203 offset = cupsFileTell(fp);
1204 }
1205
1206 cupsFileClose(fp);
1207
1208 if (node)
1209 node->length = offset - node->offset;
1210
1211 return (0);
1212 }
1213
1214
1215 /*
1216 * 'help_new_node()' - Create a new node and add it to an index.
1217 */
1218
1219 static help_node_t * /* O - Node pointer or NULL on error */
1220 help_new_node(const char *filename, /* I - Filename */
1221 const char *anchor, /* I - Anchor */
1222 const char *section, /* I - Section */
1223 const char *text, /* I - Text */
1224 time_t mtime, /* I - Modification time */
1225 off_t offset, /* I - Offset in file */
1226 size_t length) /* I - Length in bytes */
1227 {
1228 help_node_t *n; /* Node */
1229
1230
1231 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1232 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1233 (long)mtime, (long)offset, (long)length));
1234
1235 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1236 if (!n)
1237 return (NULL);
1238
1239 n->filename = strdup(filename);
1240 n->anchor = anchor ? strdup(anchor) : NULL;
1241 n->section = (section && *section) ? strdup(section) : NULL;
1242 n->text = strdup(text);
1243 n->mtime = mtime;
1244 n->offset = offset;
1245 n->length = length;
1246
1247 return (n);
1248 }
1249
1250
1251 /*
1252 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1253 */
1254
1255 static int /* O - Difference */
1256 help_sort_by_name(help_node_t *n1, /* I - First node */
1257 help_node_t *n2) /* I - Second node */
1258 {
1259 int diff; /* Difference */
1260
1261
1262 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1263 n1, n1->filename, n1->anchor,
1264 n2, n2->filename, n2->anchor));
1265
1266 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1267 return (diff);
1268
1269 if (!n1->anchor && !n2->anchor)
1270 return (0);
1271 else if (!n1->anchor)
1272 return (-1);
1273 else if (!n2->anchor)
1274 return (1);
1275 else
1276 return (strcmp(n1->anchor, n2->anchor));
1277 }
1278
1279
1280 /*
1281 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1282 */
1283
1284 static int /* O - Difference */
1285 help_sort_by_score(help_node_t *n1, /* I - First node */
1286 help_node_t *n2) /* I - Second node */
1287 {
1288 int diff; /* Difference */
1289
1290
1291 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1292 "n2=%p(%d \"%s\" \"%s\")",
1293 n1, n1->score, n1->section, n1->text,
1294 n2, n2->score, n2->section, n2->text));
1295
1296 if (n1->score != n2->score)
1297 return (n2->score - n1->score);
1298
1299 if (n1->section && !n2->section)
1300 return (1);
1301 else if (!n1->section && n2->section)
1302 return (-1);
1303 else if (n1->section && n2->section &&
1304 (diff = strcmp(n1->section, n2->section)) != 0)
1305 return (diff);
1306
1307 return (_cups_strcasecmp(n1->text, n2->text));
1308 }
1309
1310
1311 /*
1312 * 'help_sort_words()' - Sort words alphabetically.
1313 */
1314
1315 static int /* O - Difference */
1316 help_sort_words(help_word_t *w1, /* I - Second word */
1317 help_word_t *w2) /* I - Second word */
1318 {
1319 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1320 w1, w1->text, w2, w2->text));
1321
1322 return (_cups_strcasecmp(w1->text, w2->text));
1323 }
1324
1325
1326 /*
1327 * End of "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $".
1328 */