]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Load cups into easysw/current.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
f7deaa1a 2 * "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $"
ef416fc2 3 *
4 * On-line help index routines for the Common UNIX Printing System (CUPS).
5 *
f7deaa1a 6 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 7 *
8 * These coded instructions, statements, and computer programs are the
9 * property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the file
11 * "LICENSE.txt" which should have been included with this file. If this
12 * file is missing or damaged please contact Easy Software Products
13 * at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * helpDeleteIndex() - Delete an index, freeing all memory used.
27 * helpFindNode() - Find a node in an index.
28 * helpLoadIndex() - Load a help index from disk.
29 * helpSaveIndex() - Save a help index to disk.
30 * helpSearchIndex() - Search an index.
f7deaa1a 31 * help_add_word() - Add a word to a node.
ef416fc2 32 * help_compile_search() - Convert a search string into a regular expression.
ef416fc2 33 * help_delete_node() - Free all memory used by a node.
f7deaa1a 34 * help_delete_word() - Free all memory used by a word.
ef416fc2 35 * help_load_directory() - Load a directory of files into an index.
36 * help_load_file() - Load a HTML files into an index.
37 * help_new_node() - Create a new node and add it to an index.
38 * help_sort_nodes_by_name() - Sort nodes by section, filename, and anchor.
39 * help_sort_nodes_by_score() - Sort nodes by score and text.
f7deaa1a 40 * help_sort_words() - Sort words alphabetically.
ef416fc2 41 */
42
43/*
44 * Include necessary headers...
45 */
46
47#include "cgi-private.h"
48#include <cups/dir.h>
49
50
f7deaa1a 51/*
52 * List of common English words that should not be indexed...
53 */
54
55static char help_common_words[][6] =
56 {
57 "about",
58 "all",
59 "an",
60 "and",
61 "are",
62 "as",
63 "at",
64 "be",
65 "been",
66 "but",
67 "by",
68 "call",
69 "can",
70 "come",
71 "could",
72 "day",
73 "did",
74 "do",
75 "down",
76 "each",
77 "find",
78 "first",
79 "for",
80 "from",
81 "go",
82 "had",
83 "has",
84 "have",
85 "he",
86 "her",
87 "him",
88 "his",
89 "hot",
90 "how",
91 "if",
92 "in",
93 "is",
94 "it",
95 "know",
96 "like",
97 "long",
98 "look",
99 "make",
100 "many",
101 "may",
102 "more",
103 "most",
104 "my",
105 "no",
106 "now",
107 "of",
108 "on",
109 "one",
110 "or",
111 "other",
112 "out",
113 "over",
114 "said",
115 "see",
116 "she",
117 "side",
118 "so",
119 "some",
120 "sound",
121 "than",
122 "that",
123 "the",
124 "their",
125 "them",
126 "then",
127 "there",
128 "these",
129 "they",
130 "thing",
131 "this",
132 "time",
133 "to",
134 "two",
135 "up",
136 "use",
137 "was",
138 "water",
139 "way",
140 "we",
141 "were",
142 "what",
143 "when",
144 "which",
145 "who",
146 "will",
147 "with",
148 "word",
149 "would",
150 "write",
151 "you",
152 "your"
153 };
154
155
ef416fc2 156/*
157 * Local functions...
158 */
159
f7deaa1a 160static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 161static void help_delete_node(help_node_t *n);
f7deaa1a 162static void help_delete_word(help_word_t *w);
ef416fc2 163static int help_load_directory(help_index_t *hi,
164 const char *directory,
165 const char *relative);
166static int help_load_file(help_index_t *hi,
167 const char *filename,
168 const char *relative,
169 time_t mtime);
170static help_node_t *help_new_node(const char *filename, const char *anchor,
171 const char *section, const char *text,
172 time_t mtime, off_t offset,
173 size_t length);
ecdc0628 174static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
175static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 176static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 177
178
179/*
180 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
181 */
182
183void
ecdc0628 184helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 185{
ecdc0628 186 help_node_t *node; /* Current node */
ef416fc2 187
188
189 DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
190
191 if (!hi)
192 return;
193
ecdc0628 194 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
195 node;
196 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 197 {
ecdc0628 198 if (!hi->search)
199 help_delete_node(node);
200 }
ef416fc2 201
ecdc0628 202 cupsArrayDelete(hi->nodes);
203 cupsArrayDelete(hi->sorted);
ef416fc2 204
205 free(hi);
206}
207
208
209/*
210 * 'helpFindNode()' - Find a node in an index.
211 */
212
ecdc0628 213help_node_t * /* O - Node pointer or NULL */
ef416fc2 214helpFindNode(help_index_t *hi, /* I - Index */
215 const char *filename, /* I - Filename */
216 const char *anchor) /* I - Anchor */
217{
ecdc0628 218 help_node_t key; /* Search key */
ef416fc2 219
220
221 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
222 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
223
224 /*
225 * Range check input...
226 */
227
228 if (!hi || !filename)
229 return (NULL);
230
231 /*
232 * Initialize the search key...
233 */
234
235 key.filename = (char *)filename;
236 key.anchor = (char *)anchor;
ef416fc2 237
238 /*
239 * Return any match...
240 */
241
ecdc0628 242 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 243}
244
245
246/*
247 * 'helpLoadIndex()' - Load a help index from disk.
248 */
249
250help_index_t * /* O - Index pointer or NULL */
251helpLoadIndex(const char *hifile, /* I - Index filename */
252 const char *directory) /* I - Directory that is indexed */
253{
254 help_index_t *hi; /* Help index */
255 cups_file_t *fp; /* Current file */
256 char line[2048], /* Line from file */
257 *ptr, /* Pointer into line */
258 *filename, /* Filename in line */
259 *anchor, /* Anchor in line */
260 *sectptr, /* Section pointer in line */
261 section[1024], /* Section name */
262 *text; /* Text in line */
263 time_t mtime; /* Modification time */
264 off_t offset; /* Offset into file */
265 size_t length; /* Length in bytes */
266 int update; /* Update? */
ef416fc2 267 help_node_t *node; /* Current node */
f7deaa1a 268 help_word_t *word; /* Current word */
ef416fc2 269
270
271 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
272 hifile, directory));
273
274 /*
275 * Create a new, empty index.
276 */
277
ecdc0628 278 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
279 return (NULL);
280
281 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
282 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
283
284 if (!hi->nodes || !hi->sorted)
285 {
286 cupsArrayDelete(hi->nodes);
287 cupsArrayDelete(hi->sorted);
288 free(hi);
289 return (NULL);
290 }
ef416fc2 291
292 /*
293 * Try loading the existing index file...
294 */
295
296 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
297 {
298 /*
299 * Lock the file and then read the first line...
300 */
301
302 cupsFileLock(fp, 1);
303
f7deaa1a 304 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 305 {
306 /*
307 * Got a valid header line, now read the data lines...
308 */
309
f7deaa1a 310 node = NULL;
311
ef416fc2 312 while (cupsFileGets(fp, line, sizeof(line)))
313 {
314 /*
315 * Each line looks like one of the following:
316 *
317 * filename mtime offset length "section" "text"
318 * filename#anchor offset length "text"
f7deaa1a 319 * SP count word
ef416fc2 320 */
321
f7deaa1a 322 if (line[0] == ' ')
ef416fc2 323 {
f7deaa1a 324 /*
325 * Read a word in the current node...
326 */
ef416fc2 327
f7deaa1a 328 if (!node || (ptr = strrchr(line, ' ')) == NULL)
329 continue;
ef416fc2 330
f7deaa1a 331 if ((word = help_add_word(node, ptr + 1)) != NULL)
332 word->count = atoi(line + 1);
333 }
334 else
ef416fc2 335 {
336 /*
f7deaa1a 337 * Add a node...
ef416fc2 338 */
339
f7deaa1a 340 filename = line;
ef416fc2 341
f7deaa1a 342 if ((ptr = strchr(line, ' ')) == NULL)
343 break;
ef416fc2 344
f7deaa1a 345 while (isspace(*ptr & 255))
346 *ptr++ = '\0';
ef416fc2 347
f7deaa1a 348 if ((anchor = strrchr(filename, '#')) != NULL)
349 {
350 *anchor++ = '\0';
351 mtime = 0;
352 }
353 else
354 mtime = strtol(ptr, &ptr, 10);
ef416fc2 355
f7deaa1a 356 offset = strtoll(ptr, &ptr, 10);
357 length = strtoll(ptr, &ptr, 10);
ef416fc2 358
359 while (isspace(*ptr & 255))
360 ptr ++;
ef416fc2 361
f7deaa1a 362 if (!anchor)
363 {
364 /*
365 * Get section...
366 */
ef416fc2 367
f7deaa1a 368 if (*ptr != '\"')
369 break;
ef416fc2 370
f7deaa1a 371 ptr ++;
372 sectptr = ptr;
ef416fc2 373
f7deaa1a 374 while (*ptr && *ptr != '\"')
375 ptr ++;
376
377 if (*ptr != '\"')
378 break;
ef416fc2 379
f7deaa1a 380 *ptr++ = '\0';
ef416fc2 381
f7deaa1a 382 strlcpy(section, sectptr, sizeof(section));
ef416fc2 383
f7deaa1a 384 while (isspace(*ptr & 255))
385 ptr ++;
386 }
ecdc0628 387
f7deaa1a 388 if (*ptr != '\"')
389 break;
390
391 ptr ++;
392 text = ptr;
393
394 while (*ptr && *ptr != '\"')
395 ptr ++;
396
397 if (*ptr != '\"')
398 break;
399
400 *ptr++ = '\0';
401
402 if ((node = help_new_node(filename, anchor, section, text,
403 mtime, offset, length)) == NULL)
404 break;
405
406 node->score = -1;
407
408 cupsArrayAdd(hi->nodes, node);
409 }
ef416fc2 410 }
411 }
412
413 cupsFileClose(fp);
414 }
415
416 /*
417 * Scan for new/updated files...
418 */
419
420 update = help_load_directory(hi, directory, NULL);
421
422 /*
423 * Remove any files that are no longer installed...
424 */
425
ecdc0628 426 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
427 node;
428 node = (help_node_t *)cupsArrayNext(hi->nodes))
429 if (node->score < 0)
ef416fc2 430 {
431 /*
432 * Delete this node...
433 */
434
ecdc0628 435 cupsArrayRemove(hi->nodes, node);
436 help_delete_node(node);
ef416fc2 437 }
ef416fc2 438
439 /*
ecdc0628 440 * Add nodes to the sorted array...
ef416fc2 441 */
442
ecdc0628 443 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
444 node;
445 node = (help_node_t *)cupsArrayNext(hi->nodes))
446 cupsArrayAdd(hi->sorted, node);
ef416fc2 447
448 /*
ecdc0628 449 * Save the index if we updated it...
ef416fc2 450 */
451
ecdc0628 452 if (update)
453 helpSaveIndex(hi, hifile);
ef416fc2 454
455 /*
456 * Return the index...
457 */
458
459 return (hi);
460}
461
462
463/*
464 * 'helpSaveIndex()' - Save a help index to disk.
465 */
466
467int /* O - 0 on success, -1 on error */
468helpSaveIndex(help_index_t *hi, /* I - Index */
469 const char *hifile) /* I - Index filename */
470{
471 cups_file_t *fp; /* Index file */
ef416fc2 472 help_node_t *node; /* Current node */
f7deaa1a 473 help_word_t *word; /* Current word */
ef416fc2 474
475
476 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
477
478 /*
479 * Try creating a new index file...
480 */
481
482 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
483 return (-1);
484
485 /*
486 * Lock the file while we write it...
487 */
488
489 cupsFileLock(fp, 1);
490
f7deaa1a 491 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 492
ecdc0628 493 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
494 node;
495 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 496 {
497 /*
498 * Write the current node with/without the anchor...
499 */
500
ef416fc2 501 if (node->anchor)
502 {
503 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
504 node->filename, node->anchor,
505 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
506 node->text) < 0)
507 break;
508 }
509 else
510 {
511 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
512 node->filename, node->mtime,
513 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
514 node->section ? node->section : "", node->text) < 0)
515 break;
516 }
f7deaa1a 517
518 /*
519 * Then write the words associated with the node...
520 */
521
522 for (word = (help_word_t *)cupsArrayFirst(node->words);
523 word;
524 word = (help_word_t *)cupsArrayNext(node->words))
525 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
526 break;
ef416fc2 527 }
528
ecdc0628 529 cupsFileFlush(fp);
530
ef416fc2 531 if (cupsFileClose(fp) < 0)
532 return (-1);
ecdc0628 533 else if (node)
ef416fc2 534 return (-1);
535 else
536 return (0);
537}
538
539
540/*
541 * 'helpSearchIndex()' - Search an index.
542 */
543
544help_index_t * /* O - Search index */
545helpSearchIndex(help_index_t *hi, /* I - Index */
546 const char *query, /* I - Query string */
547 const char *section, /* I - Limit search to this section */
548 const char *filename) /* I - Limit search to this file */
549{
ef416fc2 550 help_index_t *search; /* Search index */
ecdc0628 551 help_node_t *node; /* Current node */
f7deaa1a 552 help_word_t *word; /* Current word */
ef416fc2 553 void *sc; /* Search context */
554 int matches; /* Number of matches */
555
556
557 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
558 hi, query ? query : "(nil)",
559 filename ? filename : "(nil)"));
560
561 /*
562 * Range check...
563 */
564
565 if (!hi || !query)
566 return (NULL);
567
ecdc0628 568 /*
569 * Reset the scores of all nodes to 0...
570 */
571
572 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
573 node;
574 node = (help_node_t *)cupsArrayNext(hi->nodes))
575 node->score = 0;
576
577 /*
578 * Find the first node to search in...
579 */
ef416fc2 580
581 if (filename)
582 {
ecdc0628 583 node = helpFindNode(hi, filename, NULL);
584 if (!node)
ef416fc2 585 return (NULL);
586 }
587 else
ecdc0628 588 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 589
590 /*
591 * Convert the query into a regular expression...
592 */
593
594 sc = cgiCompileSearch(query);
595 if (!sc)
596 return (NULL);
597
598 /*
599 * Allocate a search index...
600 */
601
602 search = calloc(1, sizeof(help_index_t));
603 if (!search)
604 {
605 cgiFreeSearch(sc);
606 return (NULL);
607 }
608
ecdc0628 609 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
610 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
611
612 if (!search->nodes || !search->sorted)
613 {
614 cupsArrayDelete(search->nodes);
615 cupsArrayDelete(search->sorted);
616 free(search);
617 cgiFreeSearch(sc);
618 return (NULL);
619 }
620
ef416fc2 621 search->search = 1;
622
623 /*
624 * Check each node in the index, adding matching nodes to the
625 * search index...
626 */
627
ecdc0628 628 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
629 if (section && strcmp(node->section, section))
ef416fc2 630 continue;
ecdc0628 631 else if (filename && strcmp(node->filename, filename))
ef416fc2 632 continue;
f7deaa1a 633 else
ef416fc2 634 {
f7deaa1a 635 matches = cgiDoSearch(sc, node->text);
636
637 for (word = (help_word_t *)cupsArrayFirst(node->words);
638 word;
639 word = (help_word_t *)cupsArrayNext(node->words))
640 if (cgiDoSearch(sc, word->text) > 0)
641 matches += word->count;
ef416fc2 642
f7deaa1a 643 if (matches > 0)
644 {
645 /*
646 * Found a match, add the node to the search index...
647 */
ef416fc2 648
f7deaa1a 649 node->score = matches;
650
651 cupsArrayAdd(search->nodes, node);
652 cupsArrayAdd(search->sorted, node);
653 }
ef416fc2 654 }
655
656 /*
657 * Free the search context...
658 */
659
660 cgiFreeSearch(sc);
661
ef416fc2 662 /*
663 * Return the results...
664 */
665
666 return (search);
667}
668
669
f7deaa1a 670/*
671 * 'help_add_word()' - Add a word to a node.
672 */
673
674static help_word_t * /* O - New word */
675help_add_word(help_node_t *n, /* I - Node */
676 const char *text) /* I - Word text */
677{
678 help_word_t *w, /* New word */
679 key; /* Search key */
680
681
682 DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
683
684 /*
685 * Create the words array as needed...
686 */
687
688 if (!n->words)
689 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
690
691 /*
692 * See if the word is already added...
693 */
694
695 key.text = (char *)text;
696
697 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
698 {
699 /*
700 * Create a new word...
701 */
702
703 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
704 return (NULL);
705
706 if ((w->text = strdup(text)) == NULL)
707 {
708 free(w);
709 return (NULL);
710 }
711
712 cupsArrayAdd(n->words, w);
713 }
714
715 /*
716 * Bump the counter for this word and return it...
717 */
718
719 w->count ++;
720
721 return (w);
722}
723
724
ef416fc2 725/*
726 * 'help_delete_node()' - Free all memory used by a node.
727 */
728
729static void
730help_delete_node(help_node_t *n) /* I - Node */
731{
f7deaa1a 732 help_word_t *w; /* Current word */
733
734
ef416fc2 735 DEBUG_printf(("help_delete_node(n=%p)\n", n));
736
737 if (!n)
738 return;
739
740 if (n->filename)
741 free(n->filename);
742
743 if (n->anchor)
744 free(n->anchor);
745
746 if (n->section)
747 free(n->section);
748
749 if (n->text)
750 free(n->text);
751
f7deaa1a 752 for (w = (help_word_t *)cupsArrayFirst(n->words);
753 w;
754 w = (help_word_t *)cupsArrayNext(n->words))
755 help_delete_word(w);
756
757 cupsArrayDelete(n->words);
758
ef416fc2 759 free(n);
760}
761
762
f7deaa1a 763/*
764 * 'help_delete_word()' - Free all memory used by a word.
765 */
766
767static void
768help_delete_word(help_word_t *w) /* I - Word */
769{
770 DEBUG_printf(("help_delete_word(w=%p)\n", w));
771
772 if (!w)
773 return;
774
775 if (w->text)
776 free(w->text);
777
778 free(w);
779}
780
781
ef416fc2 782/*
783 * 'help_load_directory()' - Load a directory of files into an index.
784 */
785
786static int /* O - 0 = success, -1 = error, 1 = updated */
787help_load_directory(
788 help_index_t *hi, /* I - Index */
789 const char *directory, /* I - Directory */
790 const char *relative) /* I - Relative path */
791{
ef416fc2 792 cups_dir_t *dir; /* Directory file */
793 cups_dentry_t *dent; /* Directory entry */
794 char *ext, /* Pointer to extension */
795 filename[1024], /* Full filename */
796 relname[1024]; /* Relative filename */
797 int update; /* Updated? */
ecdc0628 798 help_node_t *node; /* Current node */
ef416fc2 799
800
801 DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
802 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
803
804 /*
805 * Open the directory and scan it...
806 */
807
808 if ((dir = cupsDirOpen(directory)) == NULL)
809 return (0);
810
811 update = 0;
812
813 while ((dent = cupsDirRead(dir)) != NULL)
814 {
ecdc0628 815 /*
816 * Skip "." files...
817 */
818
819 if (dent->filename[0] == '.')
820 continue;
821
ef416fc2 822 /*
823 * Get absolute and relative filenames...
824 */
825
826 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
827 if (relative)
828 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
829 else
830 strlcpy(relname, dent->filename, sizeof(relname));
831
832 /*
833 * Check if we have a HTML file...
834 */
835
836 if ((ext = strstr(dent->filename, ".html")) != NULL &&
837 (!ext[5] || !strcmp(ext + 5, ".gz")))
838 {
839 /*
840 * HTML file, see if we have already indexed the file...
841 */
842
843 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
844 {
845 /*
846 * File already indexed - check dates to confirm that the
847 * index is up-to-date...
848 */
849
ecdc0628 850 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 851 {
852 /*
853 * Same modification time, so mark all of the nodes
854 * for this file as up-to-date...
855 */
856
ecdc0628 857 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
858 if (!strcmp(node->filename, relname))
859 node->score = 0;
ef416fc2 860 else
861 break;
862
863 continue;
864 }
865 }
866
867 update = 1;
868
869 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
870 }
871 else if (S_ISDIR(dent->fileinfo.st_mode))
872 {
873 /*
874 * Process sub-directory...
875 */
876
877 if (help_load_directory(hi, filename, relname) == 1)
878 update = 1;
879 }
880 }
881
882 cupsDirClose(dir);
883
884 return (update);
885}
886
887
888/*
889 * 'help_load_file()' - Load a HTML files into an index.
890 */
891
892static int /* O - 0 = success, -1 = error */
893help_load_file(
894 help_index_t *hi, /* I - Index */
895 const char *filename, /* I - Filename */
896 const char *relative, /* I - Relative path */
897 time_t mtime) /* I - Modification time */
898{
899 cups_file_t *fp; /* HTML file */
ecdc0628 900 help_node_t *node; /* Current node */
ef416fc2 901 char line[1024], /* Line from file */
902 section[1024], /* Section */
903 *ptr, /* Pointer into line */
904 *anchor, /* Anchor name */
905 *text; /* Text for anchor */
906 off_t offset; /* File offset */
907 char quote; /* Quote character */
f7deaa1a 908 help_word_t *word; /* Current word */
909 int wordlen; /* Length of word */
ef416fc2 910
911
912 DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
913 hi, filename ? filename : "(nil)",
914 relative ? relative : "(nil)", mtime));
915
916 if ((fp = cupsFileOpen(filename, "r")) == NULL)
917 return (-1);
918
919 node = NULL;
920 offset = 0;
921
922 strcpy(section, "Other");
923
924 while (cupsFileGets(fp, line, sizeof(line)))
925 {
926 /*
927 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
928 */
929
930 if (!strncasecmp(line, "<!-- SECTION:", 13))
931 {
932 /*
933 * Got section line, copy it!
934 */
935
936 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
937
938 strlcpy(section, ptr, sizeof(section));
939 if ((ptr = strstr(section, "-->")) != NULL)
940 {
941 /*
942 * Strip comment stuff from end of line...
943 */
944
945 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
946
947 if (isspace(*ptr & 255))
948 *ptr = '\0';
949 }
950 continue;
951 }
952
953 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
954 {
955 ptr ++;
956
957 if (!strncasecmp(ptr, "TITLE>", 6))
958 {
959 /*
960 * Found the title...
961 */
962
963 anchor = NULL;
964 ptr += 6;
965 }
966 else if (!strncasecmp(ptr, "A NAME=", 7))
967 {
968 /*
969 * Found an anchor...
970 */
971
972 ptr += 7;
973
974 if (*ptr == '\"' || *ptr == '\'')
975 {
976 /*
977 * Get quoted anchor...
978 */
979
980 quote = *ptr;
981 anchor = ptr + 1;
982 if ((ptr = strchr(anchor, quote)) != NULL)
983 *ptr++ = '\0';
984 else
985 break;
986 }
987 else
988 {
989 /*
990 * Get unquoted anchor...
991 */
992
993 anchor = ptr + 1;
994
995 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
996
997 if (*ptr)
998 *ptr++ = '\0';
999 else
1000 break;
1001 }
1002
1003 /*
1004 * Got the anchor, now lets find the end...
1005 */
1006
1007 while (*ptr && *ptr != '>')
1008 ptr ++;
1009
1010 if (*ptr != '>')
1011 break;
1012
1013 ptr ++;
1014 }
1015 else
1016 continue;
1017
1018 /*
1019 * Now collect text for the link...
1020 */
1021
1022 text = ptr;
1023 while ((ptr = strchr(text, '<')) == NULL)
1024 {
1025 ptr = text + strlen(text);
1026 if (ptr >= (line + sizeof(line) - 2))
1027 break;
1028
1029 *ptr++ = ' ';
1030
1031 if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1032 break;
1033 }
1034
1035 *ptr = '\0';
1036
1037 if (node)
1038 node->length = offset - node->offset;
1039
1040 if (!*text)
1041 {
1042 node = NULL;
1043 break;
1044 }
1045
ecdc0628 1046 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1047 {
1048 /*
1049 * Node already in the index, so replace the text and other
1050 * data...
1051 */
1052
ecdc0628 1053 cupsArrayRemove(hi->nodes, node);
ef416fc2 1054
1055 if (node->section)
1056 free(node->section);
1057
1058 if (node->text)
1059 free(node->text);
1060
f7deaa1a 1061 if (node->words)
1062 {
1063 for (word = (help_word_t *)cupsArrayFirst(node->words);
1064 word;
1065 word = (help_word_t *)cupsArrayNext(node->words))
1066 help_delete_word(word);
1067
1068 cupsArrayDelete(node->words);
1069 node->words = NULL;
1070 }
1071
ef416fc2 1072 node->section = section[0] ? strdup(section) : NULL;
1073 node->text = strdup(text);
1074 node->mtime = mtime;
1075 node->offset = offset;
1076 node->score = 0;
1077 }
1078 else
1079 {
1080 /*
1081 * New node...
1082 */
1083
1084 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1085 }
1086
1087 /*
1088 * Go through the text value and replace tabs and newlines with
1089 * whitespace and eliminate extra whitespace...
1090 */
1091
1092 for (ptr = node->text, text = node->text; *ptr;)
1093 if (isspace(*ptr & 255))
1094 {
1095 while (isspace(*ptr & 255))
ed486911 1096 ptr ++;
ef416fc2 1097
1098 *text++ = ' ';
1099 }
1100 else if (text != ptr)
1101 *text++ = *ptr++;
1102 else
1103 {
1104 text ++;
1105 ptr ++;
1106 }
1107
1108 *text = '\0';
1109
ecdc0628 1110 /*
1111 * (Re)add the node to the array...
1112 */
1113
1114 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1115
1116 if (!anchor)
1117 node = NULL;
ef416fc2 1118 break;
1119 }
1120
f7deaa1a 1121 if (node)
1122 {
1123 /*
1124 * Scan this line for words...
1125 */
1126
1127 for (ptr = line; *ptr; ptr ++)
1128 {
1129 /*
1130 * Skip HTML stuff...
1131 */
1132
1133 if (*ptr == '<')
1134 {
1135 if (!strncmp(ptr, "<!--", 4))
1136 {
1137 /*
1138 * Skip HTML comment...
1139 */
1140
1141 if ((text = strstr(ptr + 4, "-->")) == NULL)
1142 ptr += strlen(ptr) - 1;
1143 else
1144 ptr = text + 2;
1145 }
1146 else
1147 {
1148 /*
1149 * Skip HTML element...
1150 */
1151
1152 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1153 if (*ptr == '\"' || *ptr == '\'')
1154 {
1155 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1156
1157 if (!*ptr)
1158 ptr --;
1159 }
1160
1161 if (!*ptr)
1162 ptr --;
1163 }
1164
1165 continue;
1166 }
1167 else if (*ptr == '&')
1168 {
1169 /*
1170 * Skip HTML entity...
1171 */
1172
1173 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1174
1175 if (!*ptr)
1176 ptr --;
1177
1178 continue;
1179 }
1180 else if (!isalnum(*ptr & 255))
1181 continue;
1182
1183 /*
1184 * Found the start of a word, search until we find the end...
1185 */
1186
1187 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1188
1189 wordlen = ptr - text;
1190
1191 if (*ptr)
1192 *ptr = '\0';
1193 else
1194 ptr --;
1195
1196 if (wordlen > 1 && !bsearch(text, help_common_words,
1197 (sizeof(help_common_words) /
1198 sizeof(help_common_words[0])),
1199 sizeof(help_common_words[0]),
1200 (int (*)(const void *, const void *))
1201 strcasecmp))
1202 help_add_word(node, text);
1203 }
1204 }
1205
ef416fc2 1206 /*
1207 * Get the offset of the next line...
1208 */
1209
1210 offset = cupsFileTell(fp);
1211 }
1212
1213 cupsFileClose(fp);
1214
1215 if (node)
1216 node->length = offset - node->offset;
1217
1218 return (0);
1219}
1220
1221
1222/*
1223 * 'help_new_node()' - Create a new node and add it to an index.
1224 */
1225
1226static help_node_t * /* O - Node pointer or NULL on error */
1227help_new_node(const char *filename, /* I - Filename */
1228 const char *anchor, /* I - Anchor */
1229 const char *section, /* I - Section */
1230 const char *text, /* I - Text */
1231 time_t mtime, /* I - Modification time */
1232 off_t offset, /* I - Offset in file */
1233 size_t length) /* I - Length in bytes */
1234{
1235 help_node_t *n; /* Node */
1236
1237
ecdc0628 1238 DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1239 "mtime=%ld, offset=%ld, length=%ld)\n",
ef416fc2 1240 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
ecdc0628 1241 text ? text : "(nil)", (long)mtime, (long)offset,
1242 (long)length));
ef416fc2 1243
1244 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1245 if (!n)
1246 return (NULL);
1247
1248 n->filename = strdup(filename);
1249 n->anchor = anchor ? strdup(anchor) : NULL;
1250 n->section = (section && *section) ? strdup(section) : NULL;
1251 n->text = strdup(text);
1252 n->mtime = mtime;
1253 n->offset = offset;
1254 n->length = length;
1255
1256 return (n);
1257}
1258
1259
1260/*
1261 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1262 */
1263
1264static int /* O - Difference */
ecdc0628 1265help_sort_by_name(help_node_t *n1, /* I - First node */
1266 help_node_t *n2) /* I - Second node */
ef416fc2 1267{
ef416fc2 1268 int diff; /* Difference */
1269
1270
ecdc0628 1271 DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1272 n1, n1->filename, n1->anchor ? n1->anchor : "",
1273 n2, n2->filename, n2->anchor ? n2->anchor : ""));
ef416fc2 1274
ecdc0628 1275 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1276 return (diff);
1277
ecdc0628 1278 if (!n1->anchor && !n2->anchor)
ef416fc2 1279 return (0);
ecdc0628 1280 else if (!n1->anchor)
ef416fc2 1281 return (-1);
ecdc0628 1282 else if (!n2->anchor)
ef416fc2 1283 return (1);
1284 else
ecdc0628 1285 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1286}
1287
1288
1289/*
1290 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1291 */
1292
1293static int /* O - Difference */
ecdc0628 1294help_sort_by_score(help_node_t *n1, /* I - First node */
1295 help_node_t *n2) /* I - Second node */
ef416fc2 1296{
ef416fc2 1297 int diff; /* Difference */
1298
1299
ecdc0628 1300 DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1301 "n2=%p(%d \"%s\" \"%s\")\n",
1302 n1, n1->score, n1->section ? n1->section : "", n1->text,
1303 n2, n2->score, n2->section ? n2->section : "", n2->text));
ef416fc2 1304
ecdc0628 1305 if (n1->score != n2->score)
1306 return (n1->score - n2->score);
ef416fc2 1307
ecdc0628 1308 if (n1->section && !n2->section)
ef416fc2 1309 return (1);
ecdc0628 1310 else if (!n1->section && n2->section)
ef416fc2 1311 return (-1);
ecdc0628 1312 else if (n1->section && n2->section &&
1313 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1314 return (diff);
1315
ecdc0628 1316 return (strcasecmp(n1->text, n2->text));
ef416fc2 1317}
1318
1319
1320/*
f7deaa1a 1321 * 'help_sort_words()' - Sort words alphabetically.
1322 */
1323
1324static int /* O - Difference */
1325help_sort_words(help_word_t *w1, /* I - Second word */
1326 help_word_t *w2) /* I - Second word */
1327{
1328 DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1329 w1, w1->text, w2, w2->text));
1330
1331 return (strcasecmp(w1->text, w2->text));
1332}
1333
1334
1335/*
1336 * End of "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $".
ef416fc2 1337 */