]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Fix build errors on Fedora.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
f2d18633 2 * "$Id$"
ef416fc2 3 *
7e86f2f6 4 * Online help index routines for CUPS.
ef416fc2 5 *
7e86f2f6
MS
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 8 *
7e86f2f6
MS
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 14 */
15
16/*
17 * Include necessary headers...
18 */
19
20#include "cgi-private.h"
21#include <cups/dir.h>
22
23
f7deaa1a 24/*
25 * List of common English words that should not be indexed...
26 */
27
28static char help_common_words[][6] =
29 {
30 "about",
31 "all",
32 "an",
33 "and",
34 "are",
35 "as",
36 "at",
37 "be",
38 "been",
39 "but",
40 "by",
41 "call",
42 "can",
43 "come",
44 "could",
45 "day",
46 "did",
47 "do",
48 "down",
49 "each",
50 "find",
51 "first",
52 "for",
53 "from",
54 "go",
55 "had",
56 "has",
57 "have",
58 "he",
59 "her",
60 "him",
61 "his",
62 "hot",
63 "how",
64 "if",
65 "in",
66 "is",
67 "it",
68 "know",
69 "like",
70 "long",
71 "look",
72 "make",
73 "many",
74 "may",
75 "more",
76 "most",
77 "my",
78 "no",
79 "now",
80 "of",
81 "on",
82 "one",
83 "or",
84 "other",
85 "out",
86 "over",
87 "said",
88 "see",
89 "she",
90 "side",
91 "so",
92 "some",
93 "sound",
94 "than",
95 "that",
96 "the",
97 "their",
98 "them",
99 "then",
100 "there",
101 "these",
102 "they",
103 "thing",
104 "this",
105 "time",
106 "to",
107 "two",
108 "up",
109 "use",
110 "was",
111 "water",
112 "way",
113 "we",
114 "were",
115 "what",
116 "when",
117 "which",
118 "who",
119 "will",
120 "with",
121 "word",
122 "would",
123 "write",
124 "you",
125 "your"
126 };
127
128
ef416fc2 129/*
130 * Local functions...
131 */
132
f7deaa1a 133static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 134static void help_delete_node(help_node_t *n);
f7deaa1a 135static void help_delete_word(help_word_t *w);
ef416fc2 136static int help_load_directory(help_index_t *hi,
137 const char *directory,
138 const char *relative);
139static int help_load_file(help_index_t *hi,
140 const char *filename,
141 const char *relative,
142 time_t mtime);
143static help_node_t *help_new_node(const char *filename, const char *anchor,
144 const char *section, const char *text,
145 time_t mtime, off_t offset,
85dda01c
MS
146 size_t length)
147 __attribute__((nonnull(1,3,4)));
ecdc0628 148static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
149static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 150static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 151
152
153/*
154 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
155 */
156
157void
ecdc0628 158helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 159{
ecdc0628 160 help_node_t *node; /* Current node */
ef416fc2 161
162
85dda01c 163 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
ef416fc2 164
165 if (!hi)
166 return;
167
ecdc0628 168 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
169 node;
170 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 171 {
ecdc0628 172 if (!hi->search)
173 help_delete_node(node);
174 }
ef416fc2 175
ecdc0628 176 cupsArrayDelete(hi->nodes);
177 cupsArrayDelete(hi->sorted);
ef416fc2 178
179 free(hi);
180}
181
182
183/*
184 * 'helpFindNode()' - Find a node in an index.
185 */
186
ecdc0628 187help_node_t * /* O - Node pointer or NULL */
ef416fc2 188helpFindNode(help_index_t *hi, /* I - Index */
189 const char *filename, /* I - Filename */
190 const char *anchor) /* I - Anchor */
191{
ecdc0628 192 help_node_t key; /* Search key */
ef416fc2 193
194
85dda01c
MS
195 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
196 hi, filename, anchor));
ef416fc2 197
198 /*
199 * Range check input...
200 */
201
202 if (!hi || !filename)
203 return (NULL);
204
205 /*
206 * Initialize the search key...
207 */
208
209 key.filename = (char *)filename;
210 key.anchor = (char *)anchor;
ef416fc2 211
212 /*
213 * Return any match...
214 */
215
ecdc0628 216 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 217}
218
219
220/*
221 * 'helpLoadIndex()' - Load a help index from disk.
222 */
223
224help_index_t * /* O - Index pointer or NULL */
225helpLoadIndex(const char *hifile, /* I - Index filename */
226 const char *directory) /* I - Directory that is indexed */
227{
228 help_index_t *hi; /* Help index */
229 cups_file_t *fp; /* Current file */
230 char line[2048], /* Line from file */
231 *ptr, /* Pointer into line */
232 *filename, /* Filename in line */
233 *anchor, /* Anchor in line */
234 *sectptr, /* Section pointer in line */
235 section[1024], /* Section name */
236 *text; /* Text in line */
237 time_t mtime; /* Modification time */
238 off_t offset; /* Offset into file */
239 size_t length; /* Length in bytes */
240 int update; /* Update? */
ef416fc2 241 help_node_t *node; /* Current node */
f7deaa1a 242 help_word_t *word; /* Current word */
ef416fc2 243
244
85dda01c 245 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
ef416fc2 246 hifile, directory));
247
248 /*
249 * Create a new, empty index.
250 */
251
ecdc0628 252 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
253 return (NULL);
254
255 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
256 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
257
258 if (!hi->nodes || !hi->sorted)
259 {
260 cupsArrayDelete(hi->nodes);
261 cupsArrayDelete(hi->sorted);
262 free(hi);
263 return (NULL);
264 }
ef416fc2 265
266 /*
267 * Try loading the existing index file...
268 */
269
270 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
271 {
272 /*
273 * Lock the file and then read the first line...
274 */
275
276 cupsFileLock(fp, 1);
277
f7deaa1a 278 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 279 {
280 /*
281 * Got a valid header line, now read the data lines...
282 */
283
f7deaa1a 284 node = NULL;
285
ef416fc2 286 while (cupsFileGets(fp, line, sizeof(line)))
287 {
288 /*
289 * Each line looks like one of the following:
290 *
291 * filename mtime offset length "section" "text"
292 * filename#anchor offset length "text"
f7deaa1a 293 * SP count word
ef416fc2 294 */
295
f7deaa1a 296 if (line[0] == ' ')
ef416fc2 297 {
f7deaa1a 298 /*
299 * Read a word in the current node...
300 */
ef416fc2 301
f7deaa1a 302 if (!node || (ptr = strrchr(line, ' ')) == NULL)
303 continue;
ef416fc2 304
f7deaa1a 305 if ((word = help_add_word(node, ptr + 1)) != NULL)
306 word->count = atoi(line + 1);
307 }
308 else
ef416fc2 309 {
310 /*
f7deaa1a 311 * Add a node...
ef416fc2 312 */
313
f7deaa1a 314 filename = line;
ef416fc2 315
f7deaa1a 316 if ((ptr = strchr(line, ' ')) == NULL)
317 break;
ef416fc2 318
f7deaa1a 319 while (isspace(*ptr & 255))
320 *ptr++ = '\0';
ef416fc2 321
f7deaa1a 322 if ((anchor = strrchr(filename, '#')) != NULL)
323 {
324 *anchor++ = '\0';
325 mtime = 0;
326 }
327 else
328 mtime = strtol(ptr, &ptr, 10);
ef416fc2 329
f7deaa1a 330 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 331 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 332
333 while (isspace(*ptr & 255))
334 ptr ++;
ef416fc2 335
f7deaa1a 336 if (!anchor)
337 {
338 /*
339 * Get section...
340 */
ef416fc2 341
f7deaa1a 342 if (*ptr != '\"')
343 break;
ef416fc2 344
f7deaa1a 345 ptr ++;
346 sectptr = ptr;
ef416fc2 347
f7deaa1a 348 while (*ptr && *ptr != '\"')
349 ptr ++;
350
351 if (*ptr != '\"')
352 break;
ef416fc2 353
f7deaa1a 354 *ptr++ = '\0';
ef416fc2 355
f7deaa1a 356 strlcpy(section, sectptr, sizeof(section));
ef416fc2 357
f7deaa1a 358 while (isspace(*ptr & 255))
359 ptr ++;
360 }
ecdc0628 361
f7deaa1a 362 if (*ptr != '\"')
363 break;
364
365 ptr ++;
366 text = ptr;
367
368 while (*ptr && *ptr != '\"')
369 ptr ++;
370
371 if (*ptr != '\"')
372 break;
373
374 *ptr++ = '\0';
375
376 if ((node = help_new_node(filename, anchor, section, text,
377 mtime, offset, length)) == NULL)
378 break;
379
380 node->score = -1;
381
382 cupsArrayAdd(hi->nodes, node);
383 }
ef416fc2 384 }
385 }
386
387 cupsFileClose(fp);
388 }
389
390 /*
391 * Scan for new/updated files...
392 */
393
394 update = help_load_directory(hi, directory, NULL);
395
396 /*
397 * Remove any files that are no longer installed...
398 */
399
ecdc0628 400 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
401 node;
402 node = (help_node_t *)cupsArrayNext(hi->nodes))
403 if (node->score < 0)
ef416fc2 404 {
405 /*
406 * Delete this node...
407 */
408
ecdc0628 409 cupsArrayRemove(hi->nodes, node);
410 help_delete_node(node);
ef416fc2 411 }
ef416fc2 412
413 /*
ecdc0628 414 * Add nodes to the sorted array...
ef416fc2 415 */
416
ecdc0628 417 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
418 node;
419 node = (help_node_t *)cupsArrayNext(hi->nodes))
420 cupsArrayAdd(hi->sorted, node);
ef416fc2 421
422 /*
ecdc0628 423 * Save the index if we updated it...
ef416fc2 424 */
425
ecdc0628 426 if (update)
427 helpSaveIndex(hi, hifile);
ef416fc2 428
429 /*
430 * Return the index...
431 */
432
433 return (hi);
434}
435
436
437/*
438 * 'helpSaveIndex()' - Save a help index to disk.
439 */
440
441int /* O - 0 on success, -1 on error */
442helpSaveIndex(help_index_t *hi, /* I - Index */
443 const char *hifile) /* I - Index filename */
444{
445 cups_file_t *fp; /* Index file */
ef416fc2 446 help_node_t *node; /* Current node */
f7deaa1a 447 help_word_t *word; /* Current word */
ef416fc2 448
449
85dda01c 450 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
ef416fc2 451
452 /*
453 * Try creating a new index file...
454 */
455
456 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
457 return (-1);
458
459 /*
460 * Lock the file while we write it...
461 */
462
463 cupsFileLock(fp, 1);
464
f7deaa1a 465 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 466
ecdc0628 467 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
468 node;
469 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 470 {
471 /*
472 * Write the current node with/without the anchor...
473 */
474
ef416fc2 475 if (node->anchor)
476 {
477 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
478 node->filename, node->anchor,
479 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
480 node->text) < 0)
481 break;
482 }
483 else
484 {
485 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 486 node->filename, (int)node->mtime,
ef416fc2 487 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
488 node->section ? node->section : "", node->text) < 0)
489 break;
490 }
f7deaa1a 491
492 /*
493 * Then write the words associated with the node...
494 */
495
496 for (word = (help_word_t *)cupsArrayFirst(node->words);
497 word;
498 word = (help_word_t *)cupsArrayNext(node->words))
499 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
500 break;
ef416fc2 501 }
502
ecdc0628 503 cupsFileFlush(fp);
504
ef416fc2 505 if (cupsFileClose(fp) < 0)
506 return (-1);
ecdc0628 507 else if (node)
ef416fc2 508 return (-1);
509 else
510 return (0);
511}
512
513
514/*
515 * 'helpSearchIndex()' - Search an index.
516 */
517
518help_index_t * /* O - Search index */
519helpSearchIndex(help_index_t *hi, /* I - Index */
520 const char *query, /* I - Query string */
521 const char *section, /* I - Limit search to this section */
522 const char *filename) /* I - Limit search to this file */
523{
ef416fc2 524 help_index_t *search; /* Search index */
ecdc0628 525 help_node_t *node; /* Current node */
f7deaa1a 526 help_word_t *word; /* Current word */
ef416fc2 527 void *sc; /* Search context */
528 int matches; /* Number of matches */
529
530
85dda01c
MS
531 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
532 hi, query, filename));
ef416fc2 533
534 /*
535 * Range check...
536 */
537
538 if (!hi || !query)
539 return (NULL);
540
ecdc0628 541 /*
542 * Reset the scores of all nodes to 0...
543 */
544
545 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
546 node;
547 node = (help_node_t *)cupsArrayNext(hi->nodes))
548 node->score = 0;
549
550 /*
551 * Find the first node to search in...
552 */
ef416fc2 553
554 if (filename)
555 {
ecdc0628 556 node = helpFindNode(hi, filename, NULL);
557 if (!node)
ef416fc2 558 return (NULL);
559 }
560 else
ecdc0628 561 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 562
563 /*
564 * Convert the query into a regular expression...
565 */
566
567 sc = cgiCompileSearch(query);
568 if (!sc)
569 return (NULL);
570
571 /*
572 * Allocate a search index...
573 */
574
575 search = calloc(1, sizeof(help_index_t));
576 if (!search)
577 {
578 cgiFreeSearch(sc);
579 return (NULL);
580 }
581
ecdc0628 582 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
583 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 584
ecdc0628 585 if (!search->nodes || !search->sorted)
586 {
587 cupsArrayDelete(search->nodes);
588 cupsArrayDelete(search->sorted);
589 free(search);
590 cgiFreeSearch(sc);
591 return (NULL);
592 }
593
ef416fc2 594 search->search = 1;
595
596 /*
597 * Check each node in the index, adding matching nodes to the
598 * search index...
599 */
600
ecdc0628 601 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
602 if (section && strcmp(node->section, section))
ef416fc2 603 continue;
ecdc0628 604 else if (filename && strcmp(node->filename, filename))
ef416fc2 605 continue;
f7deaa1a 606 else
ef416fc2 607 {
f7deaa1a 608 matches = cgiDoSearch(sc, node->text);
609
610 for (word = (help_word_t *)cupsArrayFirst(node->words);
611 word;
612 word = (help_word_t *)cupsArrayNext(node->words))
613 if (cgiDoSearch(sc, word->text) > 0)
614 matches += word->count;
ef416fc2 615
f7deaa1a 616 if (matches > 0)
617 {
618 /*
619 * Found a match, add the node to the search index...
620 */
ef416fc2 621
f7deaa1a 622 node->score = matches;
623
321d8d57
MS
624 cupsArrayAdd(search->nodes, node);
625 cupsArrayAdd(search->sorted, node);
f7deaa1a 626 }
ef416fc2 627 }
628
629 /*
630 * Free the search context...
631 */
632
633 cgiFreeSearch(sc);
634
ef416fc2 635 /*
636 * Return the results...
637 */
638
639 return (search);
640}
641
642
f7deaa1a 643/*
644 * 'help_add_word()' - Add a word to a node.
645 */
646
647static help_word_t * /* O - New word */
648help_add_word(help_node_t *n, /* I - Node */
649 const char *text) /* I - Word text */
650{
651 help_word_t *w, /* New word */
652 key; /* Search key */
653
654
85dda01c 655 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
f7deaa1a 656
657 /*
658 * Create the words array as needed...
659 */
660
661 if (!n->words)
662 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
663
664 /*
665 * See if the word is already added...
666 */
667
668 key.text = (char *)text;
669
670 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
671 {
672 /*
673 * Create a new word...
674 */
675
676 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
677 return (NULL);
678
679 if ((w->text = strdup(text)) == NULL)
680 {
681 free(w);
682 return (NULL);
683 }
684
685 cupsArrayAdd(n->words, w);
686 }
687
688 /*
689 * Bump the counter for this word and return it...
690 */
691
692 w->count ++;
693
694 return (w);
695}
696
697
ef416fc2 698/*
699 * 'help_delete_node()' - Free all memory used by a node.
700 */
701
702static void
703help_delete_node(help_node_t *n) /* I - Node */
704{
f7deaa1a 705 help_word_t *w; /* Current word */
706
707
85dda01c 708 DEBUG_printf(("2help_delete_node(n=%p)", n));
ef416fc2 709
710 if (!n)
711 return;
712
713 if (n->filename)
714 free(n->filename);
715
716 if (n->anchor)
717 free(n->anchor);
718
719 if (n->section)
720 free(n->section);
721
722 if (n->text)
723 free(n->text);
724
f7deaa1a 725 for (w = (help_word_t *)cupsArrayFirst(n->words);
726 w;
727 w = (help_word_t *)cupsArrayNext(n->words))
728 help_delete_word(w);
729
730 cupsArrayDelete(n->words);
731
ef416fc2 732 free(n);
733}
734
735
f7deaa1a 736/*
737 * 'help_delete_word()' - Free all memory used by a word.
738 */
739
740static void
741help_delete_word(help_word_t *w) /* I - Word */
742{
85dda01c 743 DEBUG_printf(("2help_delete_word(w=%p)", w));
f7deaa1a 744
745 if (!w)
746 return;
747
748 if (w->text)
749 free(w->text);
750
751 free(w);
752}
753
754
ef416fc2 755/*
756 * 'help_load_directory()' - Load a directory of files into an index.
757 */
758
759static int /* O - 0 = success, -1 = error, 1 = updated */
760help_load_directory(
761 help_index_t *hi, /* I - Index */
762 const char *directory, /* I - Directory */
763 const char *relative) /* I - Relative path */
764{
ef416fc2 765 cups_dir_t *dir; /* Directory file */
766 cups_dentry_t *dent; /* Directory entry */
767 char *ext, /* Pointer to extension */
768 filename[1024], /* Full filename */
769 relname[1024]; /* Relative filename */
770 int update; /* Updated? */
ecdc0628 771 help_node_t *node; /* Current node */
ef416fc2 772
773
85dda01c
MS
774 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
775 hi, directory, relative));
ef416fc2 776
777 /*
778 * Open the directory and scan it...
779 */
780
781 if ((dir = cupsDirOpen(directory)) == NULL)
782 return (0);
783
784 update = 0;
785
786 while ((dent = cupsDirRead(dir)) != NULL)
787 {
ecdc0628 788 /*
789 * Skip "." files...
790 */
791
792 if (dent->filename[0] == '.')
793 continue;
794
ef416fc2 795 /*
796 * Get absolute and relative filenames...
797 */
798
799 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
800 if (relative)
801 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
802 else
803 strlcpy(relname, dent->filename, sizeof(relname));
804
805 /*
806 * Check if we have a HTML file...
807 */
808
809 if ((ext = strstr(dent->filename, ".html")) != NULL &&
810 (!ext[5] || !strcmp(ext + 5, ".gz")))
811 {
812 /*
813 * HTML file, see if we have already indexed the file...
814 */
815
816 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
817 {
818 /*
819 * File already indexed - check dates to confirm that the
820 * index is up-to-date...
821 */
822
ecdc0628 823 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 824 {
825 /*
826 * Same modification time, so mark all of the nodes
827 * for this file as up-to-date...
828 */
829
ecdc0628 830 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
831 if (!strcmp(node->filename, relname))
832 node->score = 0;
ef416fc2 833 else
834 break;
835
836 continue;
837 }
838 }
839
840 update = 1;
841
842 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
843 }
844 else if (S_ISDIR(dent->fileinfo.st_mode))
845 {
846 /*
847 * Process sub-directory...
848 */
849
850 if (help_load_directory(hi, filename, relname) == 1)
851 update = 1;
852 }
853 }
854
855 cupsDirClose(dir);
856
857 return (update);
858}
859
860
861/*
862 * 'help_load_file()' - Load a HTML files into an index.
863 */
864
865static int /* O - 0 = success, -1 = error */
866help_load_file(
867 help_index_t *hi, /* I - Index */
868 const char *filename, /* I - Filename */
869 const char *relative, /* I - Relative path */
870 time_t mtime) /* I - Modification time */
871{
872 cups_file_t *fp; /* HTML file */
ecdc0628 873 help_node_t *node; /* Current node */
ef416fc2 874 char line[1024], /* Line from file */
f42414bf 875 temp[1024], /* Temporary word */
ef416fc2 876 section[1024], /* Section */
877 *ptr, /* Pointer into line */
878 *anchor, /* Anchor name */
879 *text; /* Text for anchor */
880 off_t offset; /* File offset */
881 char quote; /* Quote character */
f7deaa1a 882 help_word_t *word; /* Current word */
883 int wordlen; /* Length of word */
ef416fc2 884
885
85dda01c
MS
886 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
887 "mtime=%ld)", hi, filename, relative, mtime));
ef416fc2 888
889 if ((fp = cupsFileOpen(filename, "r")) == NULL)
890 return (-1);
891
892 node = NULL;
893 offset = 0;
894
5a9febac 895 strlcpy(section, "Other", sizeof(section));
ef416fc2 896
897 while (cupsFileGets(fp, line, sizeof(line)))
898 {
899 /*
900 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
901 */
902
88f9aafc 903 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
ef416fc2 904 {
905 /*
906 * Got section line, copy it!
907 */
908
909 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
910
911 strlcpy(section, ptr, sizeof(section));
912 if ((ptr = strstr(section, "-->")) != NULL)
913 {
914 /*
915 * Strip comment stuff from end of line...
916 */
917
918 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
919
920 if (isspace(*ptr & 255))
921 *ptr = '\0';
922 }
923 continue;
924 }
925
926 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
927 {
928 ptr ++;
929
88f9aafc 930 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 931 {
932 /*
933 * Found the title...
934 */
935
936 anchor = NULL;
937 ptr += 6;
938 }
88f9aafc 939 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
ef416fc2 940 {
941 /*
942 * Found an anchor...
943 */
944
945 ptr += 7;
946
947 if (*ptr == '\"' || *ptr == '\'')
948 {
949 /*
950 * Get quoted anchor...
951 */
952
953 quote = *ptr;
954 anchor = ptr + 1;
955 if ((ptr = strchr(anchor, quote)) != NULL)
956 *ptr++ = '\0';
957 else
958 break;
959 }
960 else
961 {
962 /*
963 * Get unquoted anchor...
964 */
965
966 anchor = ptr + 1;
967
968 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
969
970 if (*ptr)
971 *ptr++ = '\0';
972 else
973 break;
974 }
975
976 /*
977 * Got the anchor, now lets find the end...
978 */
979
980 while (*ptr && *ptr != '>')
981 ptr ++;
982
983 if (*ptr != '>')
984 break;
985
986 ptr ++;
987 }
988 else
989 continue;
990
991 /*
992 * Now collect text for the link...
993 */
994
995 text = ptr;
996 while ((ptr = strchr(text, '<')) == NULL)
997 {
998 ptr = text + strlen(text);
999 if (ptr >= (line + sizeof(line) - 2))
1000 break;
1001
1002 *ptr++ = ' ';
1003
7e86f2f6 1004 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 1005 break;
1006 }
1007
1008 *ptr = '\0';
1009
1010 if (node)
7e86f2f6 1011 node->length = (size_t)(offset - node->offset);
ef416fc2 1012
1013 if (!*text)
1014 {
1015 node = NULL;
1016 break;
1017 }
1018
ecdc0628 1019 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1020 {
1021 /*
1022 * Node already in the index, so replace the text and other
1023 * data...
1024 */
1025
ecdc0628 1026 cupsArrayRemove(hi->nodes, node);
ef416fc2 1027
1028 if (node->section)
1029 free(node->section);
1030
1031 if (node->text)
1032 free(node->text);
1033
f7deaa1a 1034 if (node->words)
1035 {
1036 for (word = (help_word_t *)cupsArrayFirst(node->words);
1037 word;
1038 word = (help_word_t *)cupsArrayNext(node->words))
1039 help_delete_word(word);
1040
1041 cupsArrayDelete(node->words);
1042 node->words = NULL;
1043 }
1044
ef416fc2 1045 node->section = section[0] ? strdup(section) : NULL;
1046 node->text = strdup(text);
1047 node->mtime = mtime;
1048 node->offset = offset;
1049 node->score = 0;
1050 }
1051 else
1052 {
1053 /*
1054 * New node...
1055 */
1056
1057 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1058 }
1059
1060 /*
1061 * Go through the text value and replace tabs and newlines with
1062 * whitespace and eliminate extra whitespace...
1063 */
1064
1065 for (ptr = node->text, text = node->text; *ptr;)
1066 if (isspace(*ptr & 255))
1067 {
1068 while (isspace(*ptr & 255))
ed486911 1069 ptr ++;
ef416fc2 1070
1071 *text++ = ' ';
1072 }
1073 else if (text != ptr)
1074 *text++ = *ptr++;
1075 else
1076 {
1077 text ++;
1078 ptr ++;
1079 }
1080
1081 *text = '\0';
1082
ecdc0628 1083 /*
1084 * (Re)add the node to the array...
1085 */
1086
1087 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1088
1089 if (!anchor)
1090 node = NULL;
ef416fc2 1091 break;
1092 }
1093
f7deaa1a 1094 if (node)
1095 {
1096 /*
1097 * Scan this line for words...
1098 */
1099
1100 for (ptr = line; *ptr; ptr ++)
1101 {
1102 /*
1103 * Skip HTML stuff...
1104 */
1105
1106 if (*ptr == '<')
1107 {
1108 if (!strncmp(ptr, "<!--", 4))
1109 {
1110 /*
1111 * Skip HTML comment...
1112 */
1113
1114 if ((text = strstr(ptr + 4, "-->")) == NULL)
1115 ptr += strlen(ptr) - 1;
1116 else
1117 ptr = text + 2;
1118 }
1119 else
1120 {
1121 /*
1122 * Skip HTML element...
1123 */
1124
1125 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1126 {
f7deaa1a 1127 if (*ptr == '\"' || *ptr == '\'')
1128 {
1129 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1130
1131 if (!*ptr)
1132 ptr --;
1133 }
f42414bf 1134 }
f7deaa1a 1135
1136 if (!*ptr)
1137 ptr --;
1138 }
1139
1140 continue;
1141 }
1142 else if (*ptr == '&')
1143 {
1144 /*
1145 * Skip HTML entity...
1146 */
1147
1148 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1149
1150 if (!*ptr)
1151 ptr --;
1152
1153 continue;
1154 }
1155 else if (!isalnum(*ptr & 255))
1156 continue;
1157
1158 /*
1159 * Found the start of a word, search until we find the end...
1160 */
1161
1162 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1163
7e86f2f6 1164 wordlen = (int)(ptr - text);
f7deaa1a 1165
07623986 1166 memcpy(temp, text, (size_t)wordlen);
f42414bf 1167 temp[wordlen] = '\0';
1168
1169 ptr --;
f7deaa1a 1170
f42414bf 1171 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1172 (sizeof(help_common_words) /
1173 sizeof(help_common_words[0])),
1174 sizeof(help_common_words[0]),
1175 (int (*)(const void *, const void *))
88f9aafc 1176 _cups_strcasecmp))
f42414bf 1177 help_add_word(node, temp);
f7deaa1a 1178 }
1179 }
1180
ef416fc2 1181 /*
1182 * Get the offset of the next line...
1183 */
1184
1185 offset = cupsFileTell(fp);
1186 }
1187
1188 cupsFileClose(fp);
1189
1190 if (node)
7e86f2f6 1191 node->length = (size_t)(offset - node->offset);
ef416fc2 1192
1193 return (0);
1194}
1195
1196
1197/*
1198 * 'help_new_node()' - Create a new node and add it to an index.
1199 */
1200
1201static help_node_t * /* O - Node pointer or NULL on error */
1202help_new_node(const char *filename, /* I - Filename */
1203 const char *anchor, /* I - Anchor */
1204 const char *section, /* I - Section */
1205 const char *text, /* I - Text */
1206 time_t mtime, /* I - Modification time */
1207 off_t offset, /* I - Offset in file */
1208 size_t length) /* I - Length in bytes */
1209{
1210 help_node_t *n; /* Node */
1211
1212
85dda01c
MS
1213 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1214 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1215 (long)mtime, (long)offset, (long)length));
ef416fc2 1216
1217 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1218 if (!n)
1219 return (NULL);
1220
1221 n->filename = strdup(filename);
1222 n->anchor = anchor ? strdup(anchor) : NULL;
1223 n->section = (section && *section) ? strdup(section) : NULL;
1224 n->text = strdup(text);
1225 n->mtime = mtime;
1226 n->offset = offset;
1227 n->length = length;
1228
1229 return (n);
1230}
1231
1232
1233/*
1234 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1235 */
1236
1237static int /* O - Difference */
ecdc0628 1238help_sort_by_name(help_node_t *n1, /* I - First node */
1239 help_node_t *n2) /* I - Second node */
ef416fc2 1240{
ef416fc2 1241 int diff; /* Difference */
1242
1243
85dda01c
MS
1244 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1245 n1, n1->filename, n1->anchor,
1246 n2, n2->filename, n2->anchor));
ef416fc2 1247
ecdc0628 1248 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1249 return (diff);
1250
ecdc0628 1251 if (!n1->anchor && !n2->anchor)
ef416fc2 1252 return (0);
ecdc0628 1253 else if (!n1->anchor)
ef416fc2 1254 return (-1);
ecdc0628 1255 else if (!n2->anchor)
ef416fc2 1256 return (1);
1257 else
ecdc0628 1258 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1259}
1260
1261
1262/*
1263 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1264 */
1265
1266static int /* O - Difference */
ecdc0628 1267help_sort_by_score(help_node_t *n1, /* I - First node */
1268 help_node_t *n2) /* I - Second node */
ef416fc2 1269{
ef416fc2 1270 int diff; /* Difference */
1271
1272
85dda01c
MS
1273 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1274 "n2=%p(%d \"%s\" \"%s\")",
1275 n1, n1->score, n1->section, n1->text,
1276 n2, n2->score, n2->section, n2->text));
ef416fc2 1277
ecdc0628 1278 if (n1->score != n2->score)
1f0275e3 1279 return (n2->score - n1->score);
ef416fc2 1280
ecdc0628 1281 if (n1->section && !n2->section)
ef416fc2 1282 return (1);
ecdc0628 1283 else if (!n1->section && n2->section)
ef416fc2 1284 return (-1);
ecdc0628 1285 else if (n1->section && n2->section &&
1286 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1287 return (diff);
1288
88f9aafc 1289 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1290}
1291
1292
1293/*
f7deaa1a 1294 * 'help_sort_words()' - Sort words alphabetically.
1295 */
1296
1297static int /* O - Difference */
1298help_sort_words(help_word_t *w1, /* I - Second word */
1299 help_word_t *w2) /* I - Second word */
1300{
85dda01c 1301 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
f7deaa1a 1302 w1, w1->text, w2, w2->text));
1303
88f9aafc 1304 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1305}
1306
1307
1308/*
f2d18633 1309 * End of "$Id$".
ef416fc2 1310 */