]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
License change: Apache License, Version 2.0.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
3bb59731 4 * Copyright 2007-2015 by Apple Inc.
7e86f2f6 5 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 6 *
e3101897 7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
ef416fc2 8 */
9
10/*
11 * Include necessary headers...
12 */
13
14#include "cgi-private.h"
15#include <cups/dir.h>
16
17
f7deaa1a 18/*
19 * List of common English words that should not be indexed...
20 */
21
22static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
ef416fc2 123/*
124 * Local functions...
125 */
126
f7deaa1a 127static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 128static void help_delete_node(help_node_t *n);
f7deaa1a 129static void help_delete_word(help_word_t *w);
ef416fc2 130static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137static help_node_t *help_new_node(const char *filename, const char *anchor,
138 const char *section, const char *text,
139 time_t mtime, off_t offset,
85dda01c
MS
140 size_t length)
141 __attribute__((nonnull(1,3,4)));
ecdc0628 142static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
143static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 144static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 145
146
147/*
148 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
149 */
150
151void
ecdc0628 152helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 153{
ecdc0628 154 help_node_t *node; /* Current node */
ef416fc2 155
156
85dda01c 157 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
ef416fc2 158
159 if (!hi)
160 return;
161
ecdc0628 162 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
163 node;
164 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 165 {
ecdc0628 166 if (!hi->search)
167 help_delete_node(node);
168 }
ef416fc2 169
ecdc0628 170 cupsArrayDelete(hi->nodes);
171 cupsArrayDelete(hi->sorted);
ef416fc2 172
173 free(hi);
174}
175
176
177/*
178 * 'helpFindNode()' - Find a node in an index.
179 */
180
ecdc0628 181help_node_t * /* O - Node pointer or NULL */
ef416fc2 182helpFindNode(help_index_t *hi, /* I - Index */
183 const char *filename, /* I - Filename */
184 const char *anchor) /* I - Anchor */
185{
ecdc0628 186 help_node_t key; /* Search key */
ef416fc2 187
188
85dda01c
MS
189 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
190 hi, filename, anchor));
ef416fc2 191
192 /*
193 * Range check input...
194 */
195
196 if (!hi || !filename)
197 return (NULL);
198
199 /*
200 * Initialize the search key...
201 */
202
203 key.filename = (char *)filename;
204 key.anchor = (char *)anchor;
ef416fc2 205
206 /*
207 * Return any match...
208 */
209
ecdc0628 210 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 211}
212
213
214/*
215 * 'helpLoadIndex()' - Load a help index from disk.
216 */
217
218help_index_t * /* O - Index pointer or NULL */
219helpLoadIndex(const char *hifile, /* I - Index filename */
220 const char *directory) /* I - Directory that is indexed */
221{
222 help_index_t *hi; /* Help index */
223 cups_file_t *fp; /* Current file */
224 char line[2048], /* Line from file */
225 *ptr, /* Pointer into line */
226 *filename, /* Filename in line */
227 *anchor, /* Anchor in line */
228 *sectptr, /* Section pointer in line */
229 section[1024], /* Section name */
230 *text; /* Text in line */
231 time_t mtime; /* Modification time */
232 off_t offset; /* Offset into file */
233 size_t length; /* Length in bytes */
234 int update; /* Update? */
ef416fc2 235 help_node_t *node; /* Current node */
f7deaa1a 236 help_word_t *word; /* Current word */
ef416fc2 237
238
85dda01c 239 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
ef416fc2 240 hifile, directory));
241
242 /*
243 * Create a new, empty index.
244 */
245
ecdc0628 246 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
247 return (NULL);
248
249 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
250 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
251
252 if (!hi->nodes || !hi->sorted)
253 {
254 cupsArrayDelete(hi->nodes);
255 cupsArrayDelete(hi->sorted);
256 free(hi);
257 return (NULL);
258 }
ef416fc2 259
260 /*
261 * Try loading the existing index file...
262 */
263
264 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
265 {
266 /*
267 * Lock the file and then read the first line...
268 */
269
270 cupsFileLock(fp, 1);
271
f7deaa1a 272 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 273 {
274 /*
275 * Got a valid header line, now read the data lines...
276 */
277
f7deaa1a 278 node = NULL;
279
ef416fc2 280 while (cupsFileGets(fp, line, sizeof(line)))
281 {
282 /*
283 * Each line looks like one of the following:
284 *
285 * filename mtime offset length "section" "text"
286 * filename#anchor offset length "text"
f7deaa1a 287 * SP count word
ef416fc2 288 */
289
f7deaa1a 290 if (line[0] == ' ')
ef416fc2 291 {
f7deaa1a 292 /*
293 * Read a word in the current node...
294 */
ef416fc2 295
f7deaa1a 296 if (!node || (ptr = strrchr(line, ' ')) == NULL)
297 continue;
ef416fc2 298
f7deaa1a 299 if ((word = help_add_word(node, ptr + 1)) != NULL)
300 word->count = atoi(line + 1);
301 }
302 else
ef416fc2 303 {
304 /*
f7deaa1a 305 * Add a node...
ef416fc2 306 */
307
f7deaa1a 308 filename = line;
ef416fc2 309
f7deaa1a 310 if ((ptr = strchr(line, ' ')) == NULL)
311 break;
ef416fc2 312
f7deaa1a 313 while (isspace(*ptr & 255))
314 *ptr++ = '\0';
ef416fc2 315
f7deaa1a 316 if ((anchor = strrchr(filename, '#')) != NULL)
317 {
318 *anchor++ = '\0';
319 mtime = 0;
320 }
321 else
322 mtime = strtol(ptr, &ptr, 10);
ef416fc2 323
f7deaa1a 324 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 325 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 326
327 while (isspace(*ptr & 255))
328 ptr ++;
ef416fc2 329
f7deaa1a 330 if (!anchor)
331 {
332 /*
333 * Get section...
334 */
ef416fc2 335
f7deaa1a 336 if (*ptr != '\"')
337 break;
ef416fc2 338
f7deaa1a 339 ptr ++;
340 sectptr = ptr;
ef416fc2 341
f7deaa1a 342 while (*ptr && *ptr != '\"')
343 ptr ++;
344
345 if (*ptr != '\"')
346 break;
ef416fc2 347
f7deaa1a 348 *ptr++ = '\0';
ef416fc2 349
f7deaa1a 350 strlcpy(section, sectptr, sizeof(section));
ef416fc2 351
f7deaa1a 352 while (isspace(*ptr & 255))
353 ptr ++;
354 }
ecdc0628 355
f7deaa1a 356 if (*ptr != '\"')
357 break;
358
359 ptr ++;
360 text = ptr;
361
362 while (*ptr && *ptr != '\"')
363 ptr ++;
364
365 if (*ptr != '\"')
366 break;
367
368 *ptr++ = '\0';
369
370 if ((node = help_new_node(filename, anchor, section, text,
371 mtime, offset, length)) == NULL)
372 break;
373
374 node->score = -1;
375
376 cupsArrayAdd(hi->nodes, node);
377 }
ef416fc2 378 }
379 }
380
381 cupsFileClose(fp);
382 }
383
384 /*
385 * Scan for new/updated files...
386 */
387
388 update = help_load_directory(hi, directory, NULL);
389
390 /*
391 * Remove any files that are no longer installed...
392 */
393
ecdc0628 394 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
395 node;
396 node = (help_node_t *)cupsArrayNext(hi->nodes))
397 if (node->score < 0)
ef416fc2 398 {
399 /*
400 * Delete this node...
401 */
402
ecdc0628 403 cupsArrayRemove(hi->nodes, node);
404 help_delete_node(node);
ef416fc2 405 }
ef416fc2 406
407 /*
ecdc0628 408 * Add nodes to the sorted array...
ef416fc2 409 */
410
ecdc0628 411 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
412 node;
413 node = (help_node_t *)cupsArrayNext(hi->nodes))
414 cupsArrayAdd(hi->sorted, node);
ef416fc2 415
416 /*
ecdc0628 417 * Save the index if we updated it...
ef416fc2 418 */
419
ecdc0628 420 if (update)
421 helpSaveIndex(hi, hifile);
ef416fc2 422
423 /*
424 * Return the index...
425 */
426
427 return (hi);
428}
429
430
431/*
432 * 'helpSaveIndex()' - Save a help index to disk.
433 */
434
435int /* O - 0 on success, -1 on error */
436helpSaveIndex(help_index_t *hi, /* I - Index */
437 const char *hifile) /* I - Index filename */
438{
439 cups_file_t *fp; /* Index file */
ef416fc2 440 help_node_t *node; /* Current node */
f7deaa1a 441 help_word_t *word; /* Current word */
ef416fc2 442
443
85dda01c 444 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
ef416fc2 445
446 /*
447 * Try creating a new index file...
448 */
449
450 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
451 return (-1);
452
453 /*
454 * Lock the file while we write it...
455 */
456
457 cupsFileLock(fp, 1);
458
f7deaa1a 459 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 460
ecdc0628 461 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
462 node;
463 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 464 {
465 /*
466 * Write the current node with/without the anchor...
467 */
468
ef416fc2 469 if (node->anchor)
470 {
471 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
472 node->filename, node->anchor,
473 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
474 node->text) < 0)
475 break;
476 }
477 else
478 {
479 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 480 node->filename, (int)node->mtime,
ef416fc2 481 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
482 node->section ? node->section : "", node->text) < 0)
483 break;
484 }
f7deaa1a 485
486 /*
487 * Then write the words associated with the node...
488 */
489
490 for (word = (help_word_t *)cupsArrayFirst(node->words);
491 word;
492 word = (help_word_t *)cupsArrayNext(node->words))
493 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
494 break;
ef416fc2 495 }
496
ecdc0628 497 cupsFileFlush(fp);
498
ef416fc2 499 if (cupsFileClose(fp) < 0)
500 return (-1);
ecdc0628 501 else if (node)
ef416fc2 502 return (-1);
503 else
504 return (0);
505}
506
507
508/*
509 * 'helpSearchIndex()' - Search an index.
510 */
511
512help_index_t * /* O - Search index */
513helpSearchIndex(help_index_t *hi, /* I - Index */
514 const char *query, /* I - Query string */
515 const char *section, /* I - Limit search to this section */
516 const char *filename) /* I - Limit search to this file */
517{
ef416fc2 518 help_index_t *search; /* Search index */
ecdc0628 519 help_node_t *node; /* Current node */
f7deaa1a 520 help_word_t *word; /* Current word */
ef416fc2 521 void *sc; /* Search context */
522 int matches; /* Number of matches */
523
524
85dda01c
MS
525 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
526 hi, query, filename));
ef416fc2 527
528 /*
529 * Range check...
530 */
531
532 if (!hi || !query)
533 return (NULL);
534
ecdc0628 535 /*
536 * Reset the scores of all nodes to 0...
537 */
538
539 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
540 node;
541 node = (help_node_t *)cupsArrayNext(hi->nodes))
542 node->score = 0;
543
544 /*
545 * Find the first node to search in...
546 */
ef416fc2 547
548 if (filename)
549 {
ecdc0628 550 node = helpFindNode(hi, filename, NULL);
551 if (!node)
ef416fc2 552 return (NULL);
553 }
554 else
ecdc0628 555 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 556
557 /*
558 * Convert the query into a regular expression...
559 */
560
561 sc = cgiCompileSearch(query);
562 if (!sc)
563 return (NULL);
564
565 /*
566 * Allocate a search index...
567 */
568
569 search = calloc(1, sizeof(help_index_t));
570 if (!search)
571 {
572 cgiFreeSearch(sc);
573 return (NULL);
574 }
575
ecdc0628 576 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
577 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 578
ecdc0628 579 if (!search->nodes || !search->sorted)
580 {
581 cupsArrayDelete(search->nodes);
582 cupsArrayDelete(search->sorted);
583 free(search);
584 cgiFreeSearch(sc);
585 return (NULL);
586 }
587
ef416fc2 588 search->search = 1;
589
590 /*
591 * Check each node in the index, adding matching nodes to the
592 * search index...
593 */
594
ecdc0628 595 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
596 if (section && strcmp(node->section, section))
ef416fc2 597 continue;
ecdc0628 598 else if (filename && strcmp(node->filename, filename))
ef416fc2 599 continue;
f7deaa1a 600 else
ef416fc2 601 {
f7deaa1a 602 matches = cgiDoSearch(sc, node->text);
603
604 for (word = (help_word_t *)cupsArrayFirst(node->words);
605 word;
606 word = (help_word_t *)cupsArrayNext(node->words))
607 if (cgiDoSearch(sc, word->text) > 0)
608 matches += word->count;
ef416fc2 609
f7deaa1a 610 if (matches > 0)
611 {
612 /*
613 * Found a match, add the node to the search index...
614 */
ef416fc2 615
f7deaa1a 616 node->score = matches;
617
321d8d57
MS
618 cupsArrayAdd(search->nodes, node);
619 cupsArrayAdd(search->sorted, node);
f7deaa1a 620 }
ef416fc2 621 }
622
623 /*
624 * Free the search context...
625 */
626
627 cgiFreeSearch(sc);
628
ef416fc2 629 /*
630 * Return the results...
631 */
632
633 return (search);
634}
635
636
f7deaa1a 637/*
638 * 'help_add_word()' - Add a word to a node.
639 */
640
641static help_word_t * /* O - New word */
642help_add_word(help_node_t *n, /* I - Node */
643 const char *text) /* I - Word text */
644{
645 help_word_t *w, /* New word */
646 key; /* Search key */
647
648
85dda01c 649 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
f7deaa1a 650
651 /*
652 * Create the words array as needed...
653 */
654
655 if (!n->words)
656 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
657
658 /*
659 * See if the word is already added...
660 */
661
662 key.text = (char *)text;
663
664 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
665 {
666 /*
667 * Create a new word...
668 */
669
670 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
671 return (NULL);
672
673 if ((w->text = strdup(text)) == NULL)
674 {
675 free(w);
676 return (NULL);
677 }
678
679 cupsArrayAdd(n->words, w);
680 }
681
682 /*
683 * Bump the counter for this word and return it...
684 */
685
686 w->count ++;
687
688 return (w);
689}
690
691
ef416fc2 692/*
693 * 'help_delete_node()' - Free all memory used by a node.
694 */
695
696static void
697help_delete_node(help_node_t *n) /* I - Node */
698{
f7deaa1a 699 help_word_t *w; /* Current word */
700
701
85dda01c 702 DEBUG_printf(("2help_delete_node(n=%p)", n));
ef416fc2 703
704 if (!n)
705 return;
706
707 if (n->filename)
708 free(n->filename);
709
710 if (n->anchor)
711 free(n->anchor);
712
713 if (n->section)
714 free(n->section);
715
716 if (n->text)
717 free(n->text);
718
f7deaa1a 719 for (w = (help_word_t *)cupsArrayFirst(n->words);
720 w;
721 w = (help_word_t *)cupsArrayNext(n->words))
722 help_delete_word(w);
723
724 cupsArrayDelete(n->words);
725
ef416fc2 726 free(n);
727}
728
729
f7deaa1a 730/*
731 * 'help_delete_word()' - Free all memory used by a word.
732 */
733
734static void
735help_delete_word(help_word_t *w) /* I - Word */
736{
85dda01c 737 DEBUG_printf(("2help_delete_word(w=%p)", w));
f7deaa1a 738
739 if (!w)
740 return;
741
742 if (w->text)
743 free(w->text);
744
745 free(w);
746}
747
748
ef416fc2 749/*
750 * 'help_load_directory()' - Load a directory of files into an index.
751 */
752
753static int /* O - 0 = success, -1 = error, 1 = updated */
754help_load_directory(
755 help_index_t *hi, /* I - Index */
756 const char *directory, /* I - Directory */
757 const char *relative) /* I - Relative path */
758{
ef416fc2 759 cups_dir_t *dir; /* Directory file */
760 cups_dentry_t *dent; /* Directory entry */
761 char *ext, /* Pointer to extension */
762 filename[1024], /* Full filename */
763 relname[1024]; /* Relative filename */
764 int update; /* Updated? */
ecdc0628 765 help_node_t *node; /* Current node */
ef416fc2 766
767
85dda01c
MS
768 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
769 hi, directory, relative));
ef416fc2 770
771 /*
772 * Open the directory and scan it...
773 */
774
775 if ((dir = cupsDirOpen(directory)) == NULL)
776 return (0);
777
778 update = 0;
779
780 while ((dent = cupsDirRead(dir)) != NULL)
781 {
ecdc0628 782 /*
783 * Skip "." files...
784 */
785
786 if (dent->filename[0] == '.')
787 continue;
788
ef416fc2 789 /*
790 * Get absolute and relative filenames...
791 */
792
793 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
794 if (relative)
795 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
796 else
797 strlcpy(relname, dent->filename, sizeof(relname));
798
799 /*
800 * Check if we have a HTML file...
801 */
802
803 if ((ext = strstr(dent->filename, ".html")) != NULL &&
804 (!ext[5] || !strcmp(ext + 5, ".gz")))
805 {
806 /*
807 * HTML file, see if we have already indexed the file...
808 */
809
810 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
811 {
812 /*
813 * File already indexed - check dates to confirm that the
814 * index is up-to-date...
815 */
816
ecdc0628 817 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 818 {
819 /*
820 * Same modification time, so mark all of the nodes
821 * for this file as up-to-date...
822 */
823
ecdc0628 824 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
825 if (!strcmp(node->filename, relname))
826 node->score = 0;
ef416fc2 827 else
828 break;
829
830 continue;
831 }
832 }
833
834 update = 1;
835
836 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
837 }
838 else if (S_ISDIR(dent->fileinfo.st_mode))
839 {
840 /*
841 * Process sub-directory...
842 */
843
844 if (help_load_directory(hi, filename, relname) == 1)
845 update = 1;
846 }
847 }
848
849 cupsDirClose(dir);
850
851 return (update);
852}
853
854
855/*
856 * 'help_load_file()' - Load a HTML files into an index.
857 */
858
859static int /* O - 0 = success, -1 = error */
860help_load_file(
861 help_index_t *hi, /* I - Index */
862 const char *filename, /* I - Filename */
863 const char *relative, /* I - Relative path */
864 time_t mtime) /* I - Modification time */
865{
866 cups_file_t *fp; /* HTML file */
ecdc0628 867 help_node_t *node; /* Current node */
ef416fc2 868 char line[1024], /* Line from file */
f42414bf 869 temp[1024], /* Temporary word */
ef416fc2 870 section[1024], /* Section */
871 *ptr, /* Pointer into line */
872 *anchor, /* Anchor name */
873 *text; /* Text for anchor */
874 off_t offset; /* File offset */
875 char quote; /* Quote character */
f7deaa1a 876 help_word_t *word; /* Current word */
877 int wordlen; /* Length of word */
ef416fc2 878
879
85dda01c 880 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
2274af38 881 "mtime=%ld)", hi, filename, relative, (long)mtime));
ef416fc2 882
883 if ((fp = cupsFileOpen(filename, "r")) == NULL)
884 return (-1);
885
886 node = NULL;
887 offset = 0;
888
5a9febac 889 strlcpy(section, "Other", sizeof(section));
ef416fc2 890
891 while (cupsFileGets(fp, line, sizeof(line)))
892 {
893 /*
894 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
895 */
896
88f9aafc 897 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
ef416fc2 898 {
899 /*
900 * Got section line, copy it!
901 */
902
903 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
904
905 strlcpy(section, ptr, sizeof(section));
906 if ((ptr = strstr(section, "-->")) != NULL)
907 {
908 /*
909 * Strip comment stuff from end of line...
910 */
911
912 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
913
914 if (isspace(*ptr & 255))
915 *ptr = '\0';
916 }
917 continue;
918 }
919
920 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
921 {
922 ptr ++;
923
88f9aafc 924 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 925 {
926 /*
927 * Found the title...
928 */
929
930 anchor = NULL;
931 ptr += 6;
932 }
88f9aafc 933 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
ef416fc2 934 {
935 /*
936 * Found an anchor...
937 */
938
939 ptr += 7;
940
941 if (*ptr == '\"' || *ptr == '\'')
942 {
943 /*
944 * Get quoted anchor...
945 */
946
947 quote = *ptr;
948 anchor = ptr + 1;
949 if ((ptr = strchr(anchor, quote)) != NULL)
950 *ptr++ = '\0';
951 else
952 break;
953 }
954 else
955 {
956 /*
957 * Get unquoted anchor...
958 */
959
960 anchor = ptr + 1;
961
962 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
963
964 if (*ptr)
965 *ptr++ = '\0';
966 else
967 break;
968 }
969
970 /*
971 * Got the anchor, now lets find the end...
972 */
973
974 while (*ptr && *ptr != '>')
975 ptr ++;
976
977 if (*ptr != '>')
978 break;
979
980 ptr ++;
981 }
982 else
983 continue;
984
985 /*
986 * Now collect text for the link...
987 */
988
989 text = ptr;
990 while ((ptr = strchr(text, '<')) == NULL)
991 {
992 ptr = text + strlen(text);
993 if (ptr >= (line + sizeof(line) - 2))
994 break;
995
996 *ptr++ = ' ';
997
7e86f2f6 998 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 999 break;
1000 }
1001
1002 *ptr = '\0';
1003
1004 if (node)
7e86f2f6 1005 node->length = (size_t)(offset - node->offset);
ef416fc2 1006
1007 if (!*text)
1008 {
1009 node = NULL;
1010 break;
1011 }
1012
ecdc0628 1013 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1014 {
1015 /*
1016 * Node already in the index, so replace the text and other
1017 * data...
1018 */
1019
ecdc0628 1020 cupsArrayRemove(hi->nodes, node);
ef416fc2 1021
1022 if (node->section)
1023 free(node->section);
1024
1025 if (node->text)
1026 free(node->text);
1027
f7deaa1a 1028 if (node->words)
1029 {
1030 for (word = (help_word_t *)cupsArrayFirst(node->words);
1031 word;
1032 word = (help_word_t *)cupsArrayNext(node->words))
1033 help_delete_word(word);
1034
1035 cupsArrayDelete(node->words);
1036 node->words = NULL;
1037 }
1038
ef416fc2 1039 node->section = section[0] ? strdup(section) : NULL;
1040 node->text = strdup(text);
1041 node->mtime = mtime;
1042 node->offset = offset;
1043 node->score = 0;
1044 }
1045 else
1046 {
1047 /*
1048 * New node...
1049 */
1050
1051 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1052 }
1053
1054 /*
1055 * Go through the text value and replace tabs and newlines with
1056 * whitespace and eliminate extra whitespace...
1057 */
1058
1059 for (ptr = node->text, text = node->text; *ptr;)
1060 if (isspace(*ptr & 255))
1061 {
1062 while (isspace(*ptr & 255))
ed486911 1063 ptr ++;
ef416fc2 1064
1065 *text++ = ' ';
1066 }
1067 else if (text != ptr)
1068 *text++ = *ptr++;
1069 else
1070 {
1071 text ++;
1072 ptr ++;
1073 }
1074
1075 *text = '\0';
1076
ecdc0628 1077 /*
1078 * (Re)add the node to the array...
1079 */
1080
1081 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1082
1083 if (!anchor)
1084 node = NULL;
ef416fc2 1085 break;
1086 }
1087
f7deaa1a 1088 if (node)
1089 {
1090 /*
1091 * Scan this line for words...
1092 */
1093
1094 for (ptr = line; *ptr; ptr ++)
1095 {
1096 /*
1097 * Skip HTML stuff...
1098 */
1099
1100 if (*ptr == '<')
1101 {
1102 if (!strncmp(ptr, "<!--", 4))
1103 {
1104 /*
1105 * Skip HTML comment...
1106 */
1107
1108 if ((text = strstr(ptr + 4, "-->")) == NULL)
1109 ptr += strlen(ptr) - 1;
1110 else
1111 ptr = text + 2;
1112 }
1113 else
1114 {
1115 /*
1116 * Skip HTML element...
1117 */
1118
1119 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1120 {
f7deaa1a 1121 if (*ptr == '\"' || *ptr == '\'')
1122 {
1123 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1124
1125 if (!*ptr)
1126 ptr --;
1127 }
f42414bf 1128 }
f7deaa1a 1129
1130 if (!*ptr)
1131 ptr --;
1132 }
1133
1134 continue;
1135 }
1136 else if (*ptr == '&')
1137 {
1138 /*
1139 * Skip HTML entity...
1140 */
1141
1142 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1143
1144 if (!*ptr)
1145 ptr --;
1146
1147 continue;
1148 }
1149 else if (!isalnum(*ptr & 255))
1150 continue;
1151
1152 /*
1153 * Found the start of a word, search until we find the end...
1154 */
1155
1156 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1157
7e86f2f6 1158 wordlen = (int)(ptr - text);
f7deaa1a 1159
07623986 1160 memcpy(temp, text, (size_t)wordlen);
f42414bf 1161 temp[wordlen] = '\0';
1162
1163 ptr --;
f7deaa1a 1164
f42414bf 1165 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1166 (sizeof(help_common_words) /
1167 sizeof(help_common_words[0])),
1168 sizeof(help_common_words[0]),
1169 (int (*)(const void *, const void *))
88f9aafc 1170 _cups_strcasecmp))
f42414bf 1171 help_add_word(node, temp);
f7deaa1a 1172 }
1173 }
1174
ef416fc2 1175 /*
1176 * Get the offset of the next line...
1177 */
1178
1179 offset = cupsFileTell(fp);
1180 }
1181
1182 cupsFileClose(fp);
1183
1184 if (node)
7e86f2f6 1185 node->length = (size_t)(offset - node->offset);
ef416fc2 1186
1187 return (0);
1188}
1189
1190
1191/*
1192 * 'help_new_node()' - Create a new node and add it to an index.
1193 */
1194
1195static help_node_t * /* O - Node pointer or NULL on error */
1196help_new_node(const char *filename, /* I - Filename */
1197 const char *anchor, /* I - Anchor */
1198 const char *section, /* I - Section */
1199 const char *text, /* I - Text */
1200 time_t mtime, /* I - Modification time */
1201 off_t offset, /* I - Offset in file */
1202 size_t length) /* I - Length in bytes */
1203{
1204 help_node_t *n; /* Node */
1205
1206
85dda01c
MS
1207 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1208 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1209 (long)mtime, (long)offset, (long)length));
ef416fc2 1210
1211 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1212 if (!n)
1213 return (NULL);
1214
1215 n->filename = strdup(filename);
1216 n->anchor = anchor ? strdup(anchor) : NULL;
3bb59731 1217 n->section = *section ? strdup(section) : NULL;
ef416fc2 1218 n->text = strdup(text);
1219 n->mtime = mtime;
1220 n->offset = offset;
1221 n->length = length;
1222
1223 return (n);
1224}
1225
1226
1227/*
1228 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1229 */
1230
1231static int /* O - Difference */
ecdc0628 1232help_sort_by_name(help_node_t *n1, /* I - First node */
1233 help_node_t *n2) /* I - Second node */
ef416fc2 1234{
ef416fc2 1235 int diff; /* Difference */
1236
1237
85dda01c
MS
1238 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1239 n1, n1->filename, n1->anchor,
1240 n2, n2->filename, n2->anchor));
ef416fc2 1241
ecdc0628 1242 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1243 return (diff);
1244
ecdc0628 1245 if (!n1->anchor && !n2->anchor)
ef416fc2 1246 return (0);
ecdc0628 1247 else if (!n1->anchor)
ef416fc2 1248 return (-1);
ecdc0628 1249 else if (!n2->anchor)
ef416fc2 1250 return (1);
1251 else
ecdc0628 1252 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1253}
1254
1255
1256/*
1257 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1258 */
1259
1260static int /* O - Difference */
ecdc0628 1261help_sort_by_score(help_node_t *n1, /* I - First node */
1262 help_node_t *n2) /* I - Second node */
ef416fc2 1263{
ef416fc2 1264 int diff; /* Difference */
1265
1266
85dda01c
MS
1267 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1268 "n2=%p(%d \"%s\" \"%s\")",
1269 n1, n1->score, n1->section, n1->text,
1270 n2, n2->score, n2->section, n2->text));
ef416fc2 1271
ecdc0628 1272 if (n1->score != n2->score)
1f0275e3 1273 return (n2->score - n1->score);
ef416fc2 1274
ecdc0628 1275 if (n1->section && !n2->section)
ef416fc2 1276 return (1);
ecdc0628 1277 else if (!n1->section && n2->section)
ef416fc2 1278 return (-1);
ecdc0628 1279 else if (n1->section && n2->section &&
1280 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1281 return (diff);
1282
88f9aafc 1283 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1284}
1285
1286
1287/*
f7deaa1a 1288 * 'help_sort_words()' - Sort words alphabetically.
1289 */
1290
1291static int /* O - Difference */
1292help_sort_words(help_word_t *w1, /* I - Second word */
1293 help_word_t *w2) /* I - Second word */
1294{
85dda01c 1295 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
f7deaa1a 1296 w1, w1->text, w2, w2->text));
1297
88f9aafc 1298 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1299}