]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Merge changes from CUPS 1.4svn-r7961.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
b19ccc9e 2 * "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $"
ef416fc2 3 *
79e1d494 4 * Online help index routines for the Common UNIX Printing System (CUPS).
ef416fc2 5 *
ae71f5de 6 * Copyright 2007-2008 by Apple Inc.
f7deaa1a 7 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 8 *
9 * These coded instructions, statements, and computer programs are the
bc44d920 10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 14 *
15 * Contents:
16 *
17 * helpDeleteIndex() - Delete an index, freeing all memory used.
18 * helpFindNode() - Find a node in an index.
19 * helpLoadIndex() - Load a help index from disk.
20 * helpSaveIndex() - Save a help index to disk.
21 * helpSearchIndex() - Search an index.
f7deaa1a 22 * help_add_word() - Add a word to a node.
ef416fc2 23 * help_compile_search() - Convert a search string into a regular expression.
ef416fc2 24 * help_delete_node() - Free all memory used by a node.
f7deaa1a 25 * help_delete_word() - Free all memory used by a word.
ef416fc2 26 * help_load_directory() - Load a directory of files into an index.
27 * help_load_file() - Load a HTML files into an index.
28 * help_new_node() - Create a new node and add it to an index.
29 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
30 * help_sort_nodes_by_score() - Sort nodes by score and text.
f7deaa1a 31 * help_sort_words() - Sort words alphabetically.
ef416fc2 32 */
33
34/*
35 * Include necessary headers...
36 */
37
38#include "cgi-private.h"
39#include <cups/dir.h>
40
41
f7deaa1a 42/*
43 * List of common English words that should not be indexed...
44 */
45
46static char help_common_words[][6] =
47 {
48 "about",
49 "all",
50 "an",
51 "and",
52 "are",
53 "as",
54 "at",
55 "be",
56 "been",
57 "but",
58 "by",
59 "call",
60 "can",
61 "come",
62 "could",
63 "day",
64 "did",
65 "do",
66 "down",
67 "each",
68 "find",
69 "first",
70 "for",
71 "from",
72 "go",
73 "had",
74 "has",
75 "have",
76 "he",
77 "her",
78 "him",
79 "his",
80 "hot",
81 "how",
82 "if",
83 "in",
84 "is",
85 "it",
86 "know",
87 "like",
88 "long",
89 "look",
90 "make",
91 "many",
92 "may",
93 "more",
94 "most",
95 "my",
96 "no",
97 "now",
98 "of",
99 "on",
100 "one",
101 "or",
102 "other",
103 "out",
104 "over",
105 "said",
106 "see",
107 "she",
108 "side",
109 "so",
110 "some",
111 "sound",
112 "than",
113 "that",
114 "the",
115 "their",
116 "them",
117 "then",
118 "there",
119 "these",
120 "they",
121 "thing",
122 "this",
123 "time",
124 "to",
125 "two",
126 "up",
127 "use",
128 "was",
129 "water",
130 "way",
131 "we",
132 "were",
133 "what",
134 "when",
135 "which",
136 "who",
137 "will",
138 "with",
139 "word",
140 "would",
141 "write",
142 "you",
143 "your"
144 };
145
146
ef416fc2 147/*
148 * Local functions...
149 */
150
f7deaa1a 151static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 152static void help_delete_node(help_node_t *n);
f7deaa1a 153static void help_delete_word(help_word_t *w);
ef416fc2 154static int help_load_directory(help_index_t *hi,
155 const char *directory,
156 const char *relative);
157static int help_load_file(help_index_t *hi,
158 const char *filename,
159 const char *relative,
160 time_t mtime);
161static help_node_t *help_new_node(const char *filename, const char *anchor,
162 const char *section, const char *text,
163 time_t mtime, off_t offset,
164 size_t length);
ecdc0628 165static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
166static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 167static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 168
169
170/*
171 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
172 */
173
174void
ecdc0628 175helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 176{
ecdc0628 177 help_node_t *node; /* Current node */
ef416fc2 178
179
180 DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
181
182 if (!hi)
183 return;
184
ecdc0628 185 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
186 node;
187 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 188 {
ecdc0628 189 if (!hi->search)
190 help_delete_node(node);
191 }
ef416fc2 192
ecdc0628 193 cupsArrayDelete(hi->nodes);
194 cupsArrayDelete(hi->sorted);
ef416fc2 195
196 free(hi);
197}
198
199
200/*
201 * 'helpFindNode()' - Find a node in an index.
202 */
203
ecdc0628 204help_node_t * /* O - Node pointer or NULL */
ef416fc2 205helpFindNode(help_index_t *hi, /* I - Index */
206 const char *filename, /* I - Filename */
207 const char *anchor) /* I - Anchor */
208{
ecdc0628 209 help_node_t key; /* Search key */
ef416fc2 210
211
212 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
213 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
214
215 /*
216 * Range check input...
217 */
218
219 if (!hi || !filename)
220 return (NULL);
221
222 /*
223 * Initialize the search key...
224 */
225
226 key.filename = (char *)filename;
227 key.anchor = (char *)anchor;
ef416fc2 228
229 /*
230 * Return any match...
231 */
232
ecdc0628 233 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 234}
235
236
237/*
238 * 'helpLoadIndex()' - Load a help index from disk.
239 */
240
241help_index_t * /* O - Index pointer or NULL */
242helpLoadIndex(const char *hifile, /* I - Index filename */
243 const char *directory) /* I - Directory that is indexed */
244{
245 help_index_t *hi; /* Help index */
246 cups_file_t *fp; /* Current file */
247 char line[2048], /* Line from file */
248 *ptr, /* Pointer into line */
249 *filename, /* Filename in line */
250 *anchor, /* Anchor in line */
251 *sectptr, /* Section pointer in line */
252 section[1024], /* Section name */
253 *text; /* Text in line */
254 time_t mtime; /* Modification time */
255 off_t offset; /* Offset into file */
256 size_t length; /* Length in bytes */
257 int update; /* Update? */
ef416fc2 258 help_node_t *node; /* Current node */
f7deaa1a 259 help_word_t *word; /* Current word */
ef416fc2 260
261
262 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
263 hifile, directory));
264
265 /*
266 * Create a new, empty index.
267 */
268
ecdc0628 269 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
270 return (NULL);
271
272 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
273 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
274
275 if (!hi->nodes || !hi->sorted)
276 {
277 cupsArrayDelete(hi->nodes);
278 cupsArrayDelete(hi->sorted);
279 free(hi);
280 return (NULL);
281 }
ef416fc2 282
283 /*
284 * Try loading the existing index file...
285 */
286
287 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
288 {
289 /*
290 * Lock the file and then read the first line...
291 */
292
293 cupsFileLock(fp, 1);
294
f7deaa1a 295 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 296 {
297 /*
298 * Got a valid header line, now read the data lines...
299 */
300
f7deaa1a 301 node = NULL;
302
ef416fc2 303 while (cupsFileGets(fp, line, sizeof(line)))
304 {
305 /*
306 * Each line looks like one of the following:
307 *
308 * filename mtime offset length "section" "text"
309 * filename#anchor offset length "text"
f7deaa1a 310 * SP count word
ef416fc2 311 */
312
f7deaa1a 313 if (line[0] == ' ')
ef416fc2 314 {
f7deaa1a 315 /*
316 * Read a word in the current node...
317 */
ef416fc2 318
f7deaa1a 319 if (!node || (ptr = strrchr(line, ' ')) == NULL)
320 continue;
ef416fc2 321
f7deaa1a 322 if ((word = help_add_word(node, ptr + 1)) != NULL)
323 word->count = atoi(line + 1);
324 }
325 else
ef416fc2 326 {
327 /*
f7deaa1a 328 * Add a node...
ef416fc2 329 */
330
f7deaa1a 331 filename = line;
ef416fc2 332
f7deaa1a 333 if ((ptr = strchr(line, ' ')) == NULL)
334 break;
ef416fc2 335
f7deaa1a 336 while (isspace(*ptr & 255))
337 *ptr++ = '\0';
ef416fc2 338
f7deaa1a 339 if ((anchor = strrchr(filename, '#')) != NULL)
340 {
341 *anchor++ = '\0';
342 mtime = 0;
343 }
344 else
345 mtime = strtol(ptr, &ptr, 10);
ef416fc2 346
f7deaa1a 347 offset = strtoll(ptr, &ptr, 10);
348 length = strtoll(ptr, &ptr, 10);
ef416fc2 349
350 while (isspace(*ptr & 255))
351 ptr ++;
ef416fc2 352
f7deaa1a 353 if (!anchor)
354 {
355 /*
356 * Get section...
357 */
ef416fc2 358
f7deaa1a 359 if (*ptr != '\"')
360 break;
ef416fc2 361
f7deaa1a 362 ptr ++;
363 sectptr = ptr;
ef416fc2 364
f7deaa1a 365 while (*ptr && *ptr != '\"')
366 ptr ++;
367
368 if (*ptr != '\"')
369 break;
ef416fc2 370
f7deaa1a 371 *ptr++ = '\0';
ef416fc2 372
f7deaa1a 373 strlcpy(section, sectptr, sizeof(section));
ef416fc2 374
f7deaa1a 375 while (isspace(*ptr & 255))
376 ptr ++;
377 }
ecdc0628 378
f7deaa1a 379 if (*ptr != '\"')
380 break;
381
382 ptr ++;
383 text = ptr;
384
385 while (*ptr && *ptr != '\"')
386 ptr ++;
387
388 if (*ptr != '\"')
389 break;
390
391 *ptr++ = '\0';
392
393 if ((node = help_new_node(filename, anchor, section, text,
394 mtime, offset, length)) == NULL)
395 break;
396
397 node->score = -1;
398
399 cupsArrayAdd(hi->nodes, node);
400 }
ef416fc2 401 }
402 }
403
404 cupsFileClose(fp);
405 }
406
407 /*
408 * Scan for new/updated files...
409 */
410
411 update = help_load_directory(hi, directory, NULL);
412
413 /*
414 * Remove any files that are no longer installed...
415 */
416
ecdc0628 417 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
418 node;
419 node = (help_node_t *)cupsArrayNext(hi->nodes))
420 if (node->score < 0)
ef416fc2 421 {
422 /*
423 * Delete this node...
424 */
425
ecdc0628 426 cupsArrayRemove(hi->nodes, node);
427 help_delete_node(node);
ef416fc2 428 }
ef416fc2 429
430 /*
ecdc0628 431 * Add nodes to the sorted array...
ef416fc2 432 */
433
ecdc0628 434 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
435 node;
436 node = (help_node_t *)cupsArrayNext(hi->nodes))
437 cupsArrayAdd(hi->sorted, node);
ef416fc2 438
439 /*
ecdc0628 440 * Save the index if we updated it...
ef416fc2 441 */
442
ecdc0628 443 if (update)
444 helpSaveIndex(hi, hifile);
ef416fc2 445
446 /*
447 * Return the index...
448 */
449
450 return (hi);
451}
452
453
454/*
455 * 'helpSaveIndex()' - Save a help index to disk.
456 */
457
458int /* O - 0 on success, -1 on error */
459helpSaveIndex(help_index_t *hi, /* I - Index */
460 const char *hifile) /* I - Index filename */
461{
462 cups_file_t *fp; /* Index file */
ef416fc2 463 help_node_t *node; /* Current node */
f7deaa1a 464 help_word_t *word; /* Current word */
ef416fc2 465
466
467 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
468
469 /*
470 * Try creating a new index file...
471 */
472
473 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
474 return (-1);
475
476 /*
477 * Lock the file while we write it...
478 */
479
480 cupsFileLock(fp, 1);
481
f7deaa1a 482 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 483
ecdc0628 484 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
485 node;
486 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 487 {
488 /*
489 * Write the current node with/without the anchor...
490 */
491
ef416fc2 492 if (node->anchor)
493 {
494 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
495 node->filename, node->anchor,
496 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
497 node->text) < 0)
498 break;
499 }
500 else
501 {
502 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 503 node->filename, (int)node->mtime,
ef416fc2 504 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
505 node->section ? node->section : "", node->text) < 0)
506 break;
507 }
f7deaa1a 508
509 /*
510 * Then write the words associated with the node...
511 */
512
513 for (word = (help_word_t *)cupsArrayFirst(node->words);
514 word;
515 word = (help_word_t *)cupsArrayNext(node->words))
516 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
517 break;
ef416fc2 518 }
519
ecdc0628 520 cupsFileFlush(fp);
521
ef416fc2 522 if (cupsFileClose(fp) < 0)
523 return (-1);
ecdc0628 524 else if (node)
ef416fc2 525 return (-1);
526 else
527 return (0);
528}
529
530
531/*
532 * 'helpSearchIndex()' - Search an index.
533 */
534
535help_index_t * /* O - Search index */
536helpSearchIndex(help_index_t *hi, /* I - Index */
537 const char *query, /* I - Query string */
538 const char *section, /* I - Limit search to this section */
539 const char *filename) /* I - Limit search to this file */
540{
ef416fc2 541 help_index_t *search; /* Search index */
ecdc0628 542 help_node_t *node; /* Current node */
f7deaa1a 543 help_word_t *word; /* Current word */
ef416fc2 544 void *sc; /* Search context */
545 int matches; /* Number of matches */
546
547
548 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
549 hi, query ? query : "(nil)",
550 filename ? filename : "(nil)"));
551
552 /*
553 * Range check...
554 */
555
556 if (!hi || !query)
557 return (NULL);
558
ecdc0628 559 /*
560 * Reset the scores of all nodes to 0...
561 */
562
563 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
564 node;
565 node = (help_node_t *)cupsArrayNext(hi->nodes))
566 node->score = 0;
567
568 /*
569 * Find the first node to search in...
570 */
ef416fc2 571
572 if (filename)
573 {
ecdc0628 574 node = helpFindNode(hi, filename, NULL);
575 if (!node)
ef416fc2 576 return (NULL);
577 }
578 else
ecdc0628 579 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 580
581 /*
582 * Convert the query into a regular expression...
583 */
584
585 sc = cgiCompileSearch(query);
586 if (!sc)
587 return (NULL);
588
589 /*
590 * Allocate a search index...
591 */
592
593 search = calloc(1, sizeof(help_index_t));
594 if (!search)
595 {
596 cgiFreeSearch(sc);
597 return (NULL);
598 }
599
ecdc0628 600 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
601 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
602
603 if (!search->nodes || !search->sorted)
604 {
605 cupsArrayDelete(search->nodes);
606 cupsArrayDelete(search->sorted);
607 free(search);
608 cgiFreeSearch(sc);
609 return (NULL);
610 }
611
ef416fc2 612 search->search = 1;
613
614 /*
615 * Check each node in the index, adding matching nodes to the
616 * search index...
617 */
618
ecdc0628 619 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
620 if (section && strcmp(node->section, section))
ef416fc2 621 continue;
ecdc0628 622 else if (filename && strcmp(node->filename, filename))
ef416fc2 623 continue;
f7deaa1a 624 else
ef416fc2 625 {
f7deaa1a 626 matches = cgiDoSearch(sc, node->text);
627
628 for (word = (help_word_t *)cupsArrayFirst(node->words);
629 word;
630 word = (help_word_t *)cupsArrayNext(node->words))
631 if (cgiDoSearch(sc, word->text) > 0)
632 matches += word->count;
ef416fc2 633
f7deaa1a 634 if (matches > 0)
635 {
636 /*
637 * Found a match, add the node to the search index...
638 */
ef416fc2 639
f7deaa1a 640 node->score = matches;
641
642 cupsArrayAdd(search->nodes, node);
643 cupsArrayAdd(search->sorted, node);
644 }
ef416fc2 645 }
646
647 /*
648 * Free the search context...
649 */
650
651 cgiFreeSearch(sc);
652
ef416fc2 653 /*
654 * Return the results...
655 */
656
657 return (search);
658}
659
660
f7deaa1a 661/*
662 * 'help_add_word()' - Add a word to a node.
663 */
664
665static help_word_t * /* O - New word */
666help_add_word(help_node_t *n, /* I - Node */
667 const char *text) /* I - Word text */
668{
669 help_word_t *w, /* New word */
670 key; /* Search key */
671
672
673 DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
674
675 /*
676 * Create the words array as needed...
677 */
678
679 if (!n->words)
680 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
681
682 /*
683 * See if the word is already added...
684 */
685
686 key.text = (char *)text;
687
688 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
689 {
690 /*
691 * Create a new word...
692 */
693
694 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
695 return (NULL);
696
697 if ((w->text = strdup(text)) == NULL)
698 {
699 free(w);
700 return (NULL);
701 }
702
703 cupsArrayAdd(n->words, w);
704 }
705
706 /*
707 * Bump the counter for this word and return it...
708 */
709
710 w->count ++;
711
712 return (w);
713}
714
715
ef416fc2 716/*
717 * 'help_delete_node()' - Free all memory used by a node.
718 */
719
720static void
721help_delete_node(help_node_t *n) /* I - Node */
722{
f7deaa1a 723 help_word_t *w; /* Current word */
724
725
ef416fc2 726 DEBUG_printf(("help_delete_node(n=%p)\n", n));
727
728 if (!n)
729 return;
730
731 if (n->filename)
732 free(n->filename);
733
734 if (n->anchor)
735 free(n->anchor);
736
737 if (n->section)
738 free(n->section);
739
740 if (n->text)
741 free(n->text);
742
f7deaa1a 743 for (w = (help_word_t *)cupsArrayFirst(n->words);
744 w;
745 w = (help_word_t *)cupsArrayNext(n->words))
746 help_delete_word(w);
747
748 cupsArrayDelete(n->words);
749
ef416fc2 750 free(n);
751}
752
753
f7deaa1a 754/*
755 * 'help_delete_word()' - Free all memory used by a word.
756 */
757
758static void
759help_delete_word(help_word_t *w) /* I - Word */
760{
761 DEBUG_printf(("help_delete_word(w=%p)\n", w));
762
763 if (!w)
764 return;
765
766 if (w->text)
767 free(w->text);
768
769 free(w);
770}
771
772
ef416fc2 773/*
774 * 'help_load_directory()' - Load a directory of files into an index.
775 */
776
777static int /* O - 0 = success, -1 = error, 1 = updated */
778help_load_directory(
779 help_index_t *hi, /* I - Index */
780 const char *directory, /* I - Directory */
781 const char *relative) /* I - Relative path */
782{
ef416fc2 783 cups_dir_t *dir; /* Directory file */
784 cups_dentry_t *dent; /* Directory entry */
785 char *ext, /* Pointer to extension */
786 filename[1024], /* Full filename */
787 relname[1024]; /* Relative filename */
788 int update; /* Updated? */
ecdc0628 789 help_node_t *node; /* Current node */
ef416fc2 790
791
792 DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
793 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
794
795 /*
796 * Open the directory and scan it...
797 */
798
799 if ((dir = cupsDirOpen(directory)) == NULL)
800 return (0);
801
802 update = 0;
803
804 while ((dent = cupsDirRead(dir)) != NULL)
805 {
ecdc0628 806 /*
807 * Skip "." files...
808 */
809
810 if (dent->filename[0] == '.')
811 continue;
812
ef416fc2 813 /*
814 * Get absolute and relative filenames...
815 */
816
817 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
818 if (relative)
819 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
820 else
821 strlcpy(relname, dent->filename, sizeof(relname));
822
823 /*
824 * Check if we have a HTML file...
825 */
826
827 if ((ext = strstr(dent->filename, ".html")) != NULL &&
828 (!ext[5] || !strcmp(ext + 5, ".gz")))
829 {
830 /*
831 * HTML file, see if we have already indexed the file...
832 */
833
834 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
835 {
836 /*
837 * File already indexed - check dates to confirm that the
838 * index is up-to-date...
839 */
840
ecdc0628 841 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 842 {
843 /*
844 * Same modification time, so mark all of the nodes
845 * for this file as up-to-date...
846 */
847
ecdc0628 848 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
849 if (!strcmp(node->filename, relname))
850 node->score = 0;
ef416fc2 851 else
852 break;
853
854 continue;
855 }
856 }
857
858 update = 1;
859
860 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
861 }
862 else if (S_ISDIR(dent->fileinfo.st_mode))
863 {
864 /*
865 * Process sub-directory...
866 */
867
868 if (help_load_directory(hi, filename, relname) == 1)
869 update = 1;
870 }
871 }
872
873 cupsDirClose(dir);
874
875 return (update);
876}
877
878
879/*
880 * 'help_load_file()' - Load a HTML files into an index.
881 */
882
883static int /* O - 0 = success, -1 = error */
884help_load_file(
885 help_index_t *hi, /* I - Index */
886 const char *filename, /* I - Filename */
887 const char *relative, /* I - Relative path */
888 time_t mtime) /* I - Modification time */
889{
890 cups_file_t *fp; /* HTML file */
ecdc0628 891 help_node_t *node; /* Current node */
ef416fc2 892 char line[1024], /* Line from file */
f42414bf 893 temp[1024], /* Temporary word */
ef416fc2 894 section[1024], /* Section */
895 *ptr, /* Pointer into line */
896 *anchor, /* Anchor name */
897 *text; /* Text for anchor */
898 off_t offset; /* File offset */
899 char quote; /* Quote character */
f7deaa1a 900 help_word_t *word; /* Current word */
901 int wordlen; /* Length of word */
ef416fc2 902
903
904 DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
905 hi, filename ? filename : "(nil)",
906 relative ? relative : "(nil)", mtime));
907
908 if ((fp = cupsFileOpen(filename, "r")) == NULL)
909 return (-1);
910
911 node = NULL;
912 offset = 0;
913
914 strcpy(section, "Other");
915
916 while (cupsFileGets(fp, line, sizeof(line)))
917 {
918 /*
919 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
920 */
921
922 if (!strncasecmp(line, "<!-- SECTION:", 13))
923 {
924 /*
925 * Got section line, copy it!
926 */
927
928 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
929
930 strlcpy(section, ptr, sizeof(section));
931 if ((ptr = strstr(section, "-->")) != NULL)
932 {
933 /*
934 * Strip comment stuff from end of line...
935 */
936
937 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
938
939 if (isspace(*ptr & 255))
940 *ptr = '\0';
941 }
942 continue;
943 }
944
945 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
946 {
947 ptr ++;
948
949 if (!strncasecmp(ptr, "TITLE>", 6))
950 {
951 /*
952 * Found the title...
953 */
954
955 anchor = NULL;
956 ptr += 6;
957 }
958 else if (!strncasecmp(ptr, "A NAME=", 7))
959 {
960 /*
961 * Found an anchor...
962 */
963
964 ptr += 7;
965
966 if (*ptr == '\"' || *ptr == '\'')
967 {
968 /*
969 * Get quoted anchor...
970 */
971
972 quote = *ptr;
973 anchor = ptr + 1;
974 if ((ptr = strchr(anchor, quote)) != NULL)
975 *ptr++ = '\0';
976 else
977 break;
978 }
979 else
980 {
981 /*
982 * Get unquoted anchor...
983 */
984
985 anchor = ptr + 1;
986
987 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
988
989 if (*ptr)
990 *ptr++ = '\0';
991 else
992 break;
993 }
994
995 /*
996 * Got the anchor, now lets find the end...
997 */
998
999 while (*ptr && *ptr != '>')
1000 ptr ++;
1001
1002 if (*ptr != '>')
1003 break;
1004
1005 ptr ++;
1006 }
1007 else
1008 continue;
1009
1010 /*
1011 * Now collect text for the link...
1012 */
1013
1014 text = ptr;
1015 while ((ptr = strchr(text, '<')) == NULL)
1016 {
1017 ptr = text + strlen(text);
1018 if (ptr >= (line + sizeof(line) - 2))
1019 break;
1020
1021 *ptr++ = ' ';
1022
1023 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1024 break;
1025 }
1026
1027 *ptr = '\0';
1028
1029 if (node)
1030 node->length = offset - node->offset;
1031
1032 if (!*text)
1033 {
1034 node = NULL;
1035 break;
1036 }
1037
ecdc0628 1038 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1039 {
1040 /*
1041 * Node already in the index, so replace the text and other
1042 * data...
1043 */
1044
ecdc0628 1045 cupsArrayRemove(hi->nodes, node);
ef416fc2 1046
1047 if (node->section)
1048 free(node->section);
1049
1050 if (node->text)
1051 free(node->text);
1052
f7deaa1a 1053 if (node->words)
1054 {
1055 for (word = (help_word_t *)cupsArrayFirst(node->words);
1056 word;
1057 word = (help_word_t *)cupsArrayNext(node->words))
1058 help_delete_word(word);
1059
1060 cupsArrayDelete(node->words);
1061 node->words = NULL;
1062 }
1063
ef416fc2 1064 node->section = section[0] ? strdup(section) : NULL;
1065 node->text = strdup(text);
1066 node->mtime = mtime;
1067 node->offset = offset;
1068 node->score = 0;
1069 }
1070 else
1071 {
1072 /*
1073 * New node...
1074 */
1075
1076 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1077 }
1078
1079 /*
1080 * Go through the text value and replace tabs and newlines with
1081 * whitespace and eliminate extra whitespace...
1082 */
1083
1084 for (ptr = node->text, text = node->text; *ptr;)
1085 if (isspace(*ptr & 255))
1086 {
1087 while (isspace(*ptr & 255))
ed486911 1088 ptr ++;
ef416fc2 1089
1090 *text++ = ' ';
1091 }
1092 else if (text != ptr)
1093 *text++ = *ptr++;
1094 else
1095 {
1096 text ++;
1097 ptr ++;
1098 }
1099
1100 *text = '\0';
1101
ecdc0628 1102 /*
1103 * (Re)add the node to the array...
1104 */
1105
1106 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1107
1108 if (!anchor)
1109 node = NULL;
ef416fc2 1110 break;
1111 }
1112
f7deaa1a 1113 if (node)
1114 {
1115 /*
1116 * Scan this line for words...
1117 */
1118
1119 for (ptr = line; *ptr; ptr ++)
1120 {
1121 /*
1122 * Skip HTML stuff...
1123 */
1124
1125 if (*ptr == '<')
1126 {
1127 if (!strncmp(ptr, "<!--", 4))
1128 {
1129 /*
1130 * Skip HTML comment...
1131 */
1132
1133 if ((text = strstr(ptr + 4, "-->")) == NULL)
1134 ptr += strlen(ptr) - 1;
1135 else
1136 ptr = text + 2;
1137 }
1138 else
1139 {
1140 /*
1141 * Skip HTML element...
1142 */
1143
1144 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1145 {
f7deaa1a 1146 if (*ptr == '\"' || *ptr == '\'')
1147 {
1148 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1149
1150 if (!*ptr)
1151 ptr --;
1152 }
f42414bf 1153 }
f7deaa1a 1154
1155 if (!*ptr)
1156 ptr --;
1157 }
1158
1159 continue;
1160 }
1161 else if (*ptr == '&')
1162 {
1163 /*
1164 * Skip HTML entity...
1165 */
1166
1167 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1168
1169 if (!*ptr)
1170 ptr --;
1171
1172 continue;
1173 }
1174 else if (!isalnum(*ptr & 255))
1175 continue;
1176
1177 /*
1178 * Found the start of a word, search until we find the end...
1179 */
1180
1181 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1182
1183 wordlen = ptr - text;
1184
f42414bf 1185 memcpy(temp, text, wordlen);
1186 temp[wordlen] = '\0';
1187
1188 ptr --;
f7deaa1a 1189
f42414bf 1190 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1191 (sizeof(help_common_words) /
1192 sizeof(help_common_words[0])),
1193 sizeof(help_common_words[0]),
1194 (int (*)(const void *, const void *))
1195 strcasecmp))
f42414bf 1196 help_add_word(node, temp);
f7deaa1a 1197 }
1198 }
1199
ef416fc2 1200 /*
1201 * Get the offset of the next line...
1202 */
1203
1204 offset = cupsFileTell(fp);
1205 }
1206
1207 cupsFileClose(fp);
1208
1209 if (node)
1210 node->length = offset - node->offset;
1211
1212 return (0);
1213}
1214
1215
1216/*
1217 * 'help_new_node()' - Create a new node and add it to an index.
1218 */
1219
1220static help_node_t * /* O - Node pointer or NULL on error */
1221help_new_node(const char *filename, /* I - Filename */
1222 const char *anchor, /* I - Anchor */
1223 const char *section, /* I - Section */
1224 const char *text, /* I - Text */
1225 time_t mtime, /* I - Modification time */
1226 off_t offset, /* I - Offset in file */
1227 size_t length) /* I - Length in bytes */
1228{
1229 help_node_t *n; /* Node */
1230
1231
ecdc0628 1232 DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1233 "mtime=%ld, offset=%ld, length=%ld)\n",
ef416fc2 1234 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
ecdc0628 1235 text ? text : "(nil)", (long)mtime, (long)offset,
1236 (long)length));
ef416fc2 1237
1238 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1239 if (!n)
1240 return (NULL);
1241
1242 n->filename = strdup(filename);
1243 n->anchor = anchor ? strdup(anchor) : NULL;
1244 n->section = (section && *section) ? strdup(section) : NULL;
1245 n->text = strdup(text);
1246 n->mtime = mtime;
1247 n->offset = offset;
1248 n->length = length;
1249
1250 return (n);
1251}
1252
1253
1254/*
1255 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1256 */
1257
1258static int /* O - Difference */
ecdc0628 1259help_sort_by_name(help_node_t *n1, /* I - First node */
1260 help_node_t *n2) /* I - Second node */
ef416fc2 1261{
ef416fc2 1262 int diff; /* Difference */
1263
1264
ecdc0628 1265 DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1266 n1, n1->filename, n1->anchor ? n1->anchor : "",
1267 n2, n2->filename, n2->anchor ? n2->anchor : ""));
ef416fc2 1268
ecdc0628 1269 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1270 return (diff);
1271
ecdc0628 1272 if (!n1->anchor && !n2->anchor)
ef416fc2 1273 return (0);
ecdc0628 1274 else if (!n1->anchor)
ef416fc2 1275 return (-1);
ecdc0628 1276 else if (!n2->anchor)
ef416fc2 1277 return (1);
1278 else
ecdc0628 1279 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1280}
1281
1282
1283/*
1284 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1285 */
1286
1287static int /* O - Difference */
ecdc0628 1288help_sort_by_score(help_node_t *n1, /* I - First node */
1289 help_node_t *n2) /* I - Second node */
ef416fc2 1290{
ef416fc2 1291 int diff; /* Difference */
1292
1293
ecdc0628 1294 DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1295 "n2=%p(%d \"%s\" \"%s\")\n",
1296 n1, n1->score, n1->section ? n1->section : "", n1->text,
1297 n2, n2->score, n2->section ? n2->section : "", n2->text));
ef416fc2 1298
ecdc0628 1299 if (n1->score != n2->score)
1f0275e3 1300 return (n2->score - n1->score);
ef416fc2 1301
ecdc0628 1302 if (n1->section && !n2->section)
ef416fc2 1303 return (1);
ecdc0628 1304 else if (!n1->section && n2->section)
ef416fc2 1305 return (-1);
ecdc0628 1306 else if (n1->section && n2->section &&
1307 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1308 return (diff);
1309
ecdc0628 1310 return (strcasecmp(n1->text, n2->text));
ef416fc2 1311}
1312
1313
1314/*
f7deaa1a 1315 * 'help_sort_words()' - Sort words alphabetically.
1316 */
1317
1318static int /* O - Difference */
1319help_sort_words(help_word_t *w1, /* I - Second word */
1320 help_word_t *w2) /* I - Second word */
1321{
1322 DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1323 w1, w1->text, w2, w2->text));
1324
1325 return (strcasecmp(w1->text, w2->text));
1326}
1327
1328
1329/*
b19ccc9e 1330 * End of "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $".
ef416fc2 1331 */