]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Merge pull request #5297 from FedericoYundt/patch-1
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
cfd375ad 4 * Copyright 2007-2017 by Apple Inc.
7e86f2f6 5 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 6 *
e3101897 7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
ef416fc2 8 */
9
10/*
11 * Include necessary headers...
12 */
13
14#include "cgi-private.h"
15#include <cups/dir.h>
16
17
f7deaa1a 18/*
19 * List of common English words that should not be indexed...
20 */
21
22static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
ef416fc2 123/*
124 * Local functions...
125 */
126
f7deaa1a 127static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 128static void help_delete_node(help_node_t *n);
f7deaa1a 129static void help_delete_word(help_word_t *w);
ef416fc2 130static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
137static help_node_t *help_new_node(const char *filename, const char *anchor,
138 const char *section, const char *text,
139 time_t mtime, off_t offset,
85dda01c
MS
140 size_t length)
141 __attribute__((nonnull(1,3,4)));
ecdc0628 142static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
143static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 144static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 145
146
147/*
148 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
149 */
150
151void
ecdc0628 152helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 153{
ecdc0628 154 help_node_t *node; /* Current node */
ef416fc2 155
156
85dda01c 157 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
ef416fc2 158
159 if (!hi)
160 return;
161
ecdc0628 162 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
163 node;
164 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 165 {
ecdc0628 166 if (!hi->search)
167 help_delete_node(node);
168 }
ef416fc2 169
ecdc0628 170 cupsArrayDelete(hi->nodes);
171 cupsArrayDelete(hi->sorted);
ef416fc2 172
173 free(hi);
174}
175
176
177/*
178 * 'helpFindNode()' - Find a node in an index.
179 */
180
ecdc0628 181help_node_t * /* O - Node pointer or NULL */
ef416fc2 182helpFindNode(help_index_t *hi, /* I - Index */
183 const char *filename, /* I - Filename */
184 const char *anchor) /* I - Anchor */
185{
ecdc0628 186 help_node_t key; /* Search key */
ef416fc2 187
188
85dda01c
MS
189 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
190 hi, filename, anchor));
ef416fc2 191
192 /*
193 * Range check input...
194 */
195
196 if (!hi || !filename)
197 return (NULL);
198
199 /*
200 * Initialize the search key...
201 */
202
203 key.filename = (char *)filename;
204 key.anchor = (char *)anchor;
ef416fc2 205
206 /*
207 * Return any match...
208 */
209
ecdc0628 210 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 211}
212
213
214/*
215 * 'helpLoadIndex()' - Load a help index from disk.
216 */
217
218help_index_t * /* O - Index pointer or NULL */
219helpLoadIndex(const char *hifile, /* I - Index filename */
220 const char *directory) /* I - Directory that is indexed */
221{
222 help_index_t *hi; /* Help index */
223 cups_file_t *fp; /* Current file */
224 char line[2048], /* Line from file */
225 *ptr, /* Pointer into line */
226 *filename, /* Filename in line */
227 *anchor, /* Anchor in line */
228 *sectptr, /* Section pointer in line */
229 section[1024], /* Section name */
230 *text; /* Text in line */
231 time_t mtime; /* Modification time */
232 off_t offset; /* Offset into file */
233 size_t length; /* Length in bytes */
234 int update; /* Update? */
ef416fc2 235 help_node_t *node; /* Current node */
f7deaa1a 236 help_word_t *word; /* Current word */
ef416fc2 237
238
85dda01c 239 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
ef416fc2 240 hifile, directory));
241
242 /*
243 * Create a new, empty index.
244 */
245
ecdc0628 246 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
247 return (NULL);
248
249 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
250 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
251
252 if (!hi->nodes || !hi->sorted)
253 {
254 cupsArrayDelete(hi->nodes);
255 cupsArrayDelete(hi->sorted);
256 free(hi);
257 return (NULL);
258 }
ef416fc2 259
260 /*
261 * Try loading the existing index file...
262 */
263
264 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
265 {
266 /*
267 * Lock the file and then read the first line...
268 */
269
270 cupsFileLock(fp, 1);
271
f7deaa1a 272 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 273 {
274 /*
275 * Got a valid header line, now read the data lines...
276 */
277
f7deaa1a 278 node = NULL;
279
ef416fc2 280 while (cupsFileGets(fp, line, sizeof(line)))
281 {
282 /*
283 * Each line looks like one of the following:
284 *
285 * filename mtime offset length "section" "text"
286 * filename#anchor offset length "text"
f7deaa1a 287 * SP count word
ef416fc2 288 */
289
f7deaa1a 290 if (line[0] == ' ')
ef416fc2 291 {
f7deaa1a 292 /*
293 * Read a word in the current node...
294 */
ef416fc2 295
f7deaa1a 296 if (!node || (ptr = strrchr(line, ' ')) == NULL)
297 continue;
ef416fc2 298
f7deaa1a 299 if ((word = help_add_word(node, ptr + 1)) != NULL)
300 word->count = atoi(line + 1);
301 }
302 else
ef416fc2 303 {
304 /*
f7deaa1a 305 * Add a node...
ef416fc2 306 */
307
f7deaa1a 308 filename = line;
ef416fc2 309
f7deaa1a 310 if ((ptr = strchr(line, ' ')) == NULL)
311 break;
ef416fc2 312
f7deaa1a 313 while (isspace(*ptr & 255))
314 *ptr++ = '\0';
ef416fc2 315
f7deaa1a 316 if ((anchor = strrchr(filename, '#')) != NULL)
317 {
318 *anchor++ = '\0';
319 mtime = 0;
320 }
321 else
322 mtime = strtol(ptr, &ptr, 10);
ef416fc2 323
f7deaa1a 324 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 325 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 326
327 while (isspace(*ptr & 255))
328 ptr ++;
ef416fc2 329
f7deaa1a 330 if (!anchor)
331 {
332 /*
333 * Get section...
334 */
ef416fc2 335
f7deaa1a 336 if (*ptr != '\"')
337 break;
ef416fc2 338
f7deaa1a 339 ptr ++;
340 sectptr = ptr;
ef416fc2 341
f7deaa1a 342 while (*ptr && *ptr != '\"')
343 ptr ++;
344
345 if (*ptr != '\"')
346 break;
ef416fc2 347
f7deaa1a 348 *ptr++ = '\0';
ef416fc2 349
f7deaa1a 350 strlcpy(section, sectptr, sizeof(section));
ef416fc2 351
f7deaa1a 352 while (isspace(*ptr & 255))
353 ptr ++;
354 }
ecdc0628 355
f7deaa1a 356 if (*ptr != '\"')
357 break;
358
359 ptr ++;
360 text = ptr;
361
362 while (*ptr && *ptr != '\"')
363 ptr ++;
364
365 if (*ptr != '\"')
366 break;
367
368 *ptr++ = '\0';
369
370 if ((node = help_new_node(filename, anchor, section, text,
371 mtime, offset, length)) == NULL)
372 break;
373
374 node->score = -1;
375
376 cupsArrayAdd(hi->nodes, node);
377 }
ef416fc2 378 }
379 }
380
381 cupsFileClose(fp);
382 }
383
384 /*
385 * Scan for new/updated files...
386 */
387
388 update = help_load_directory(hi, directory, NULL);
389
390 /*
391 * Remove any files that are no longer installed...
392 */
393
ecdc0628 394 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
395 node;
396 node = (help_node_t *)cupsArrayNext(hi->nodes))
397 if (node->score < 0)
ef416fc2 398 {
399 /*
400 * Delete this node...
401 */
402
ecdc0628 403 cupsArrayRemove(hi->nodes, node);
404 help_delete_node(node);
ef416fc2 405 }
ef416fc2 406
407 /*
ecdc0628 408 * Add nodes to the sorted array...
ef416fc2 409 */
410
ecdc0628 411 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
412 node;
413 node = (help_node_t *)cupsArrayNext(hi->nodes))
414 cupsArrayAdd(hi->sorted, node);
ef416fc2 415
416 /*
ecdc0628 417 * Save the index if we updated it...
ef416fc2 418 */
419
ecdc0628 420 if (update)
421 helpSaveIndex(hi, hifile);
ef416fc2 422
423 /*
424 * Return the index...
425 */
426
427 return (hi);
428}
429
430
431/*
432 * 'helpSaveIndex()' - Save a help index to disk.
433 */
434
435int /* O - 0 on success, -1 on error */
436helpSaveIndex(help_index_t *hi, /* I - Index */
437 const char *hifile) /* I - Index filename */
438{
439 cups_file_t *fp; /* Index file */
ef416fc2 440 help_node_t *node; /* Current node */
f7deaa1a 441 help_word_t *word; /* Current word */
ef416fc2 442
443
85dda01c 444 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
ef416fc2 445
446 /*
447 * Try creating a new index file...
448 */
449
450 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
451 return (-1);
452
453 /*
454 * Lock the file while we write it...
455 */
456
457 cupsFileLock(fp, 1);
458
f7deaa1a 459 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 460
ecdc0628 461 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
462 node;
463 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 464 {
465 /*
466 * Write the current node with/without the anchor...
467 */
468
ef416fc2 469 if (node->anchor)
470 {
471 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
472 node->filename, node->anchor,
473 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
474 node->text) < 0)
475 break;
476 }
477 else
478 {
479 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 480 node->filename, (int)node->mtime,
ef416fc2 481 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
482 node->section ? node->section : "", node->text) < 0)
483 break;
484 }
f7deaa1a 485
486 /*
487 * Then write the words associated with the node...
488 */
489
490 for (word = (help_word_t *)cupsArrayFirst(node->words);
491 word;
492 word = (help_word_t *)cupsArrayNext(node->words))
493 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
494 break;
ef416fc2 495 }
496
ecdc0628 497 cupsFileFlush(fp);
498
ef416fc2 499 if (cupsFileClose(fp) < 0)
500 return (-1);
ecdc0628 501 else if (node)
ef416fc2 502 return (-1);
503 else
504 return (0);
505}
506
507
508/*
509 * 'helpSearchIndex()' - Search an index.
510 */
511
512help_index_t * /* O - Search index */
513helpSearchIndex(help_index_t *hi, /* I - Index */
514 const char *query, /* I - Query string */
515 const char *section, /* I - Limit search to this section */
516 const char *filename) /* I - Limit search to this file */
517{
ef416fc2 518 help_index_t *search; /* Search index */
ecdc0628 519 help_node_t *node; /* Current node */
f7deaa1a 520 help_word_t *word; /* Current word */
ef416fc2 521 void *sc; /* Search context */
522 int matches; /* Number of matches */
523
524
85dda01c
MS
525 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
526 hi, query, filename));
ef416fc2 527
528 /*
529 * Range check...
530 */
531
532 if (!hi || !query)
533 return (NULL);
534
ecdc0628 535 /*
536 * Reset the scores of all nodes to 0...
537 */
538
539 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
540 node;
541 node = (help_node_t *)cupsArrayNext(hi->nodes))
542 node->score = 0;
543
544 /*
545 * Find the first node to search in...
546 */
ef416fc2 547
548 if (filename)
549 {
ecdc0628 550 node = helpFindNode(hi, filename, NULL);
551 if (!node)
ef416fc2 552 return (NULL);
553 }
554 else
ecdc0628 555 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 556
557 /*
558 * Convert the query into a regular expression...
559 */
560
561 sc = cgiCompileSearch(query);
562 if (!sc)
563 return (NULL);
564
565 /*
566 * Allocate a search index...
567 */
568
569 search = calloc(1, sizeof(help_index_t));
570 if (!search)
571 {
572 cgiFreeSearch(sc);
573 return (NULL);
574 }
575
ecdc0628 576 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
577 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 578
ecdc0628 579 if (!search->nodes || !search->sorted)
580 {
581 cupsArrayDelete(search->nodes);
582 cupsArrayDelete(search->sorted);
583 free(search);
584 cgiFreeSearch(sc);
585 return (NULL);
586 }
587
ef416fc2 588 search->search = 1;
589
590 /*
591 * Check each node in the index, adding matching nodes to the
592 * search index...
593 */
594
ecdc0628 595 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
596 if (section && strcmp(node->section, section))
ef416fc2 597 continue;
ecdc0628 598 else if (filename && strcmp(node->filename, filename))
ef416fc2 599 continue;
f7deaa1a 600 else
ef416fc2 601 {
f7deaa1a 602 matches = cgiDoSearch(sc, node->text);
603
604 for (word = (help_word_t *)cupsArrayFirst(node->words);
605 word;
606 word = (help_word_t *)cupsArrayNext(node->words))
607 if (cgiDoSearch(sc, word->text) > 0)
608 matches += word->count;
ef416fc2 609
f7deaa1a 610 if (matches > 0)
611 {
612 /*
613 * Found a match, add the node to the search index...
614 */
ef416fc2 615
f7deaa1a 616 node->score = matches;
617
321d8d57
MS
618 cupsArrayAdd(search->nodes, node);
619 cupsArrayAdd(search->sorted, node);
f7deaa1a 620 }
ef416fc2 621 }
622
623 /*
624 * Free the search context...
625 */
626
627 cgiFreeSearch(sc);
628
ef416fc2 629 /*
630 * Return the results...
631 */
632
633 return (search);
634}
635
636
f7deaa1a 637/*
638 * 'help_add_word()' - Add a word to a node.
639 */
640
641static help_word_t * /* O - New word */
642help_add_word(help_node_t *n, /* I - Node */
643 const char *text) /* I - Word text */
644{
645 help_word_t *w, /* New word */
646 key; /* Search key */
647
648
85dda01c 649 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
f7deaa1a 650
651 /*
652 * Create the words array as needed...
653 */
654
655 if (!n->words)
656 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
657
658 /*
659 * See if the word is already added...
660 */
661
662 key.text = (char *)text;
663
664 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
665 {
666 /*
667 * Create a new word...
668 */
669
670 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
671 return (NULL);
672
673 if ((w->text = strdup(text)) == NULL)
674 {
675 free(w);
676 return (NULL);
677 }
678
679 cupsArrayAdd(n->words, w);
680 }
681
682 /*
683 * Bump the counter for this word and return it...
684 */
685
686 w->count ++;
687
688 return (w);
689}
690
691
ef416fc2 692/*
693 * 'help_delete_node()' - Free all memory used by a node.
694 */
695
696static void
697help_delete_node(help_node_t *n) /* I - Node */
698{
f7deaa1a 699 help_word_t *w; /* Current word */
700
701
85dda01c 702 DEBUG_printf(("2help_delete_node(n=%p)", n));
ef416fc2 703
704 if (!n)
705 return;
706
707 if (n->filename)
708 free(n->filename);
709
710 if (n->anchor)
711 free(n->anchor);
712
713 if (n->section)
714 free(n->section);
715
716 if (n->text)
717 free(n->text);
718
f7deaa1a 719 for (w = (help_word_t *)cupsArrayFirst(n->words);
720 w;
721 w = (help_word_t *)cupsArrayNext(n->words))
722 help_delete_word(w);
723
724 cupsArrayDelete(n->words);
725
ef416fc2 726 free(n);
727}
728
729
f7deaa1a 730/*
731 * 'help_delete_word()' - Free all memory used by a word.
732 */
733
734static void
735help_delete_word(help_word_t *w) /* I - Word */
736{
85dda01c 737 DEBUG_printf(("2help_delete_word(w=%p)", w));
f7deaa1a 738
739 if (!w)
740 return;
741
742 if (w->text)
743 free(w->text);
744
745 free(w);
746}
747
748
ef416fc2 749/*
750 * 'help_load_directory()' - Load a directory of files into an index.
751 */
752
753static int /* O - 0 = success, -1 = error, 1 = updated */
754help_load_directory(
755 help_index_t *hi, /* I - Index */
756 const char *directory, /* I - Directory */
757 const char *relative) /* I - Relative path */
758{
ef416fc2 759 cups_dir_t *dir; /* Directory file */
760 cups_dentry_t *dent; /* Directory entry */
761 char *ext, /* Pointer to extension */
762 filename[1024], /* Full filename */
763 relname[1024]; /* Relative filename */
764 int update; /* Updated? */
ecdc0628 765 help_node_t *node; /* Current node */
ef416fc2 766
767
85dda01c
MS
768 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
769 hi, directory, relative));
ef416fc2 770
771 /*
772 * Open the directory and scan it...
773 */
774
775 if ((dir = cupsDirOpen(directory)) == NULL)
776 return (0);
777
778 update = 0;
779
780 while ((dent = cupsDirRead(dir)) != NULL)
781 {
ecdc0628 782 /*
783 * Skip "." files...
784 */
785
786 if (dent->filename[0] == '.')
787 continue;
788
ef416fc2 789 /*
790 * Get absolute and relative filenames...
791 */
792
793 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
794 if (relative)
795 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
796 else
797 strlcpy(relname, dent->filename, sizeof(relname));
798
799 /*
800 * Check if we have a HTML file...
801 */
802
803 if ((ext = strstr(dent->filename, ".html")) != NULL &&
804 (!ext[5] || !strcmp(ext + 5, ".gz")))
805 {
806 /*
807 * HTML file, see if we have already indexed the file...
808 */
809
810 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
811 {
812 /*
813 * File already indexed - check dates to confirm that the
814 * index is up-to-date...
815 */
816
ecdc0628 817 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 818 {
819 /*
820 * Same modification time, so mark all of the nodes
821 * for this file as up-to-date...
822 */
823
ecdc0628 824 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
825 if (!strcmp(node->filename, relname))
826 node->score = 0;
ef416fc2 827 else
828 break;
829
830 continue;
831 }
832 }
833
834 update = 1;
835
836 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
837 }
838 else if (S_ISDIR(dent->fileinfo.st_mode))
839 {
840 /*
841 * Process sub-directory...
842 */
843
844 if (help_load_directory(hi, filename, relname) == 1)
845 update = 1;
846 }
847 }
848
849 cupsDirClose(dir);
850
851 return (update);
852}
853
854
855/*
856 * 'help_load_file()' - Load a HTML files into an index.
857 */
858
859static int /* O - 0 = success, -1 = error */
860help_load_file(
861 help_index_t *hi, /* I - Index */
862 const char *filename, /* I - Filename */
863 const char *relative, /* I - Relative path */
864 time_t mtime) /* I - Modification time */
865{
866 cups_file_t *fp; /* HTML file */
ecdc0628 867 help_node_t *node; /* Current node */
ef416fc2 868 char line[1024], /* Line from file */
f42414bf 869 temp[1024], /* Temporary word */
ef416fc2 870 section[1024], /* Section */
871 *ptr, /* Pointer into line */
872 *anchor, /* Anchor name */
873 *text; /* Text for anchor */
874 off_t offset; /* File offset */
875 char quote; /* Quote character */
f7deaa1a 876 help_word_t *word; /* Current word */
877 int wordlen; /* Length of word */
ef416fc2 878
879
85dda01c 880 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
2274af38 881 "mtime=%ld)", hi, filename, relative, (long)mtime));
ef416fc2 882
883 if ((fp = cupsFileOpen(filename, "r")) == NULL)
884 return (-1);
885
886 node = NULL;
887 offset = 0;
888
5a9febac 889 strlcpy(section, "Other", sizeof(section));
ef416fc2 890
891 while (cupsFileGets(fp, line, sizeof(line)))
892 {
893 /*
894 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
895 */
896
cfd375ad 897 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
ef416fc2 898 {
899 /*
900 * Got section line, copy it!
901 */
902
cfd375ad 903 for (ptr += 13; isspace(*ptr & 255); ptr ++);
ef416fc2 904
905 strlcpy(section, ptr, sizeof(section));
906 if ((ptr = strstr(section, "-->")) != NULL)
907 {
908 /*
909 * Strip comment stuff from end of line...
910 */
911
912 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
913
914 if (isspace(*ptr & 255))
915 *ptr = '\0';
916 }
917 continue;
918 }
919
920 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
921 {
922 ptr ++;
923
88f9aafc 924 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 925 {
926 /*
927 * Found the title...
928 */
929
930 anchor = NULL;
931 ptr += 6;
932 }
cfd375ad 933 else
ef416fc2 934 {
cfd375ad
MS
935 char *idptr; /* Pointer to ID */
936
937 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
938 ptr += 7;
939 else if ((idptr = strstr(ptr, " ID=")) != NULL)
940 ptr = idptr + 4;
941 else if ((idptr = strstr(ptr, " id=")) != NULL)
942 ptr = idptr + 4;
943 else
944 continue;
945
ef416fc2 946 /*
947 * Found an anchor...
948 */
949
ef416fc2 950 if (*ptr == '\"' || *ptr == '\'')
951 {
952 /*
953 * Get quoted anchor...
954 */
955
956 quote = *ptr;
957 anchor = ptr + 1;
958 if ((ptr = strchr(anchor, quote)) != NULL)
959 *ptr++ = '\0';
960 else
961 break;
962 }
963 else
964 {
965 /*
966 * Get unquoted anchor...
967 */
968
969 anchor = ptr + 1;
970
971 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
972
cfd375ad 973 if (*ptr != '>')
ef416fc2 974 *ptr++ = '\0';
975 else
976 break;
977 }
978
979 /*
980 * Got the anchor, now lets find the end...
981 */
982
983 while (*ptr && *ptr != '>')
984 ptr ++;
985
986 if (*ptr != '>')
987 break;
988
cfd375ad 989 *ptr++ = '\0';
ef416fc2 990 }
ef416fc2 991
992 /*
993 * Now collect text for the link...
994 */
995
996 text = ptr;
997 while ((ptr = strchr(text, '<')) == NULL)
998 {
999 ptr = text + strlen(text);
1000 if (ptr >= (line + sizeof(line) - 2))
1001 break;
1002
1003 *ptr++ = ' ';
1004
7e86f2f6 1005 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 1006 break;
1007 }
1008
1009 *ptr = '\0';
1010
1011 if (node)
7e86f2f6 1012 node->length = (size_t)(offset - node->offset);
ef416fc2 1013
1014 if (!*text)
1015 {
1016 node = NULL;
1017 break;
1018 }
1019
ecdc0628 1020 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1021 {
1022 /*
1023 * Node already in the index, so replace the text and other
1024 * data...
1025 */
1026
ecdc0628 1027 cupsArrayRemove(hi->nodes, node);
ef416fc2 1028
1029 if (node->section)
1030 free(node->section);
1031
1032 if (node->text)
1033 free(node->text);
1034
f7deaa1a 1035 if (node->words)
1036 {
1037 for (word = (help_word_t *)cupsArrayFirst(node->words);
1038 word;
1039 word = (help_word_t *)cupsArrayNext(node->words))
1040 help_delete_word(word);
1041
1042 cupsArrayDelete(node->words);
1043 node->words = NULL;
1044 }
1045
ef416fc2 1046 node->section = section[0] ? strdup(section) : NULL;
1047 node->text = strdup(text);
1048 node->mtime = mtime;
1049 node->offset = offset;
1050 node->score = 0;
1051 }
1052 else
1053 {
1054 /*
1055 * New node...
1056 */
1057
1058 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1059 }
1060
1061 /*
1062 * Go through the text value and replace tabs and newlines with
1063 * whitespace and eliminate extra whitespace...
1064 */
1065
1066 for (ptr = node->text, text = node->text; *ptr;)
1067 if (isspace(*ptr & 255))
1068 {
1069 while (isspace(*ptr & 255))
ed486911 1070 ptr ++;
ef416fc2 1071
1072 *text++ = ' ';
1073 }
1074 else if (text != ptr)
1075 *text++ = *ptr++;
1076 else
1077 {
1078 text ++;
1079 ptr ++;
1080 }
1081
1082 *text = '\0';
1083
ecdc0628 1084 /*
1085 * (Re)add the node to the array...
1086 */
1087
1088 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1089
1090 if (!anchor)
1091 node = NULL;
ef416fc2 1092 break;
1093 }
1094
f7deaa1a 1095 if (node)
1096 {
1097 /*
1098 * Scan this line for words...
1099 */
1100
1101 for (ptr = line; *ptr; ptr ++)
1102 {
1103 /*
1104 * Skip HTML stuff...
1105 */
1106
1107 if (*ptr == '<')
1108 {
1109 if (!strncmp(ptr, "<!--", 4))
1110 {
1111 /*
1112 * Skip HTML comment...
1113 */
1114
1115 if ((text = strstr(ptr + 4, "-->")) == NULL)
1116 ptr += strlen(ptr) - 1;
1117 else
1118 ptr = text + 2;
1119 }
1120 else
1121 {
1122 /*
1123 * Skip HTML element...
1124 */
1125
1126 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1127 {
f7deaa1a 1128 if (*ptr == '\"' || *ptr == '\'')
1129 {
1130 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1131
1132 if (!*ptr)
1133 ptr --;
1134 }
f42414bf 1135 }
f7deaa1a 1136
1137 if (!*ptr)
1138 ptr --;
1139 }
1140
1141 continue;
1142 }
1143 else if (*ptr == '&')
1144 {
1145 /*
1146 * Skip HTML entity...
1147 */
1148
1149 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1150
1151 if (!*ptr)
1152 ptr --;
1153
1154 continue;
1155 }
1156 else if (!isalnum(*ptr & 255))
1157 continue;
1158
1159 /*
1160 * Found the start of a word, search until we find the end...
1161 */
1162
1163 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1164
7e86f2f6 1165 wordlen = (int)(ptr - text);
f7deaa1a 1166
07623986 1167 memcpy(temp, text, (size_t)wordlen);
f42414bf 1168 temp[wordlen] = '\0';
1169
1170 ptr --;
f7deaa1a 1171
f42414bf 1172 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1173 (sizeof(help_common_words) /
1174 sizeof(help_common_words[0])),
1175 sizeof(help_common_words[0]),
1176 (int (*)(const void *, const void *))
88f9aafc 1177 _cups_strcasecmp))
f42414bf 1178 help_add_word(node, temp);
f7deaa1a 1179 }
1180 }
1181
ef416fc2 1182 /*
1183 * Get the offset of the next line...
1184 */
1185
1186 offset = cupsFileTell(fp);
1187 }
1188
1189 cupsFileClose(fp);
1190
1191 if (node)
7e86f2f6 1192 node->length = (size_t)(offset - node->offset);
ef416fc2 1193
1194 return (0);
1195}
1196
1197
1198/*
1199 * 'help_new_node()' - Create a new node and add it to an index.
1200 */
1201
1202static help_node_t * /* O - Node pointer or NULL on error */
1203help_new_node(const char *filename, /* I - Filename */
1204 const char *anchor, /* I - Anchor */
1205 const char *section, /* I - Section */
1206 const char *text, /* I - Text */
1207 time_t mtime, /* I - Modification time */
1208 off_t offset, /* I - Offset in file */
1209 size_t length) /* I - Length in bytes */
1210{
1211 help_node_t *n; /* Node */
1212
1213
85dda01c
MS
1214 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1215 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1216 (long)mtime, (long)offset, (long)length));
ef416fc2 1217
1218 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1219 if (!n)
1220 return (NULL);
1221
1222 n->filename = strdup(filename);
1223 n->anchor = anchor ? strdup(anchor) : NULL;
3bb59731 1224 n->section = *section ? strdup(section) : NULL;
ef416fc2 1225 n->text = strdup(text);
1226 n->mtime = mtime;
1227 n->offset = offset;
1228 n->length = length;
1229
1230 return (n);
1231}
1232
1233
1234/*
1235 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1236 */
1237
1238static int /* O - Difference */
ecdc0628 1239help_sort_by_name(help_node_t *n1, /* I - First node */
1240 help_node_t *n2) /* I - Second node */
ef416fc2 1241{
ef416fc2 1242 int diff; /* Difference */
1243
1244
85dda01c
MS
1245 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1246 n1, n1->filename, n1->anchor,
1247 n2, n2->filename, n2->anchor));
ef416fc2 1248
ecdc0628 1249 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1250 return (diff);
1251
ecdc0628 1252 if (!n1->anchor && !n2->anchor)
ef416fc2 1253 return (0);
ecdc0628 1254 else if (!n1->anchor)
ef416fc2 1255 return (-1);
ecdc0628 1256 else if (!n2->anchor)
ef416fc2 1257 return (1);
1258 else
ecdc0628 1259 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1260}
1261
1262
1263/*
1264 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1265 */
1266
1267static int /* O - Difference */
ecdc0628 1268help_sort_by_score(help_node_t *n1, /* I - First node */
1269 help_node_t *n2) /* I - Second node */
ef416fc2 1270{
ef416fc2 1271 int diff; /* Difference */
1272
1273
85dda01c
MS
1274 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1275 "n2=%p(%d \"%s\" \"%s\")",
1276 n1, n1->score, n1->section, n1->text,
1277 n2, n2->score, n2->section, n2->text));
ef416fc2 1278
ecdc0628 1279 if (n1->score != n2->score)
1f0275e3 1280 return (n2->score - n1->score);
ef416fc2 1281
ecdc0628 1282 if (n1->section && !n2->section)
ef416fc2 1283 return (1);
ecdc0628 1284 else if (!n1->section && n2->section)
ef416fc2 1285 return (-1);
ecdc0628 1286 else if (n1->section && n2->section &&
1287 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1288 return (diff);
1289
88f9aafc 1290 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1291}
1292
1293
1294/*
f7deaa1a 1295 * 'help_sort_words()' - Sort words alphabetically.
1296 */
1297
1298static int /* O - Difference */
1299help_sort_words(help_word_t *w1, /* I - Second word */
1300 help_word_t *w2) /* I - Second word */
1301{
85dda01c 1302 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
f7deaa1a 1303 w1, w1->text, w2, w2->text));
1304
88f9aafc 1305 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1306}