]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Add new dynamic mDNSResponder stub.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
cfd375ad 4 * Copyright 2007-2017 by Apple Inc.
7e86f2f6 5 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 6 *
e3101897 7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
ef416fc2 8 */
9
10/*
11 * Include necessary headers...
12 */
13
14#include "cgi-private.h"
15#include <cups/dir.h>
16
17
f7deaa1a 18/*
19 * List of common English words that should not be indexed...
20 */
21
22static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
ef416fc2 123/*
124 * Local functions...
125 */
126
f7deaa1a 127static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 128static void help_delete_node(help_node_t *n);
f7deaa1a 129static void help_delete_word(help_word_t *w);
ef416fc2 130static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
a32af27c 137static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
ecdc0628 138static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
139static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 140static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 141
142
143/*
144 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
145 */
146
147void
ecdc0628 148helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 149{
ecdc0628 150 help_node_t *node; /* Current node */
ef416fc2 151
152
85dda01c 153 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
ef416fc2 154
155 if (!hi)
156 return;
157
ecdc0628 158 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159 node;
160 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 161 {
ecdc0628 162 if (!hi->search)
163 help_delete_node(node);
164 }
ef416fc2 165
ecdc0628 166 cupsArrayDelete(hi->nodes);
167 cupsArrayDelete(hi->sorted);
ef416fc2 168
169 free(hi);
170}
171
172
173/*
174 * 'helpFindNode()' - Find a node in an index.
175 */
176
ecdc0628 177help_node_t * /* O - Node pointer or NULL */
ef416fc2 178helpFindNode(help_index_t *hi, /* I - Index */
179 const char *filename, /* I - Filename */
180 const char *anchor) /* I - Anchor */
181{
ecdc0628 182 help_node_t key; /* Search key */
ef416fc2 183
184
85dda01c
MS
185 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
186 hi, filename, anchor));
ef416fc2 187
188 /*
189 * Range check input...
190 */
191
192 if (!hi || !filename)
193 return (NULL);
194
195 /*
196 * Initialize the search key...
197 */
198
199 key.filename = (char *)filename;
200 key.anchor = (char *)anchor;
ef416fc2 201
202 /*
203 * Return any match...
204 */
205
ecdc0628 206 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 207}
208
209
210/*
211 * 'helpLoadIndex()' - Load a help index from disk.
212 */
213
214help_index_t * /* O - Index pointer or NULL */
215helpLoadIndex(const char *hifile, /* I - Index filename */
216 const char *directory) /* I - Directory that is indexed */
217{
218 help_index_t *hi; /* Help index */
219 cups_file_t *fp; /* Current file */
220 char line[2048], /* Line from file */
221 *ptr, /* Pointer into line */
222 *filename, /* Filename in line */
223 *anchor, /* Anchor in line */
224 *sectptr, /* Section pointer in line */
225 section[1024], /* Section name */
226 *text; /* Text in line */
227 time_t mtime; /* Modification time */
228 off_t offset; /* Offset into file */
229 size_t length; /* Length in bytes */
230 int update; /* Update? */
ef416fc2 231 help_node_t *node; /* Current node */
f7deaa1a 232 help_word_t *word; /* Current word */
ef416fc2 233
234
85dda01c 235 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
ef416fc2 236 hifile, directory));
237
238 /*
239 * Create a new, empty index.
240 */
241
ecdc0628 242 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
243 return (NULL);
244
245 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
246 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
247
248 if (!hi->nodes || !hi->sorted)
249 {
250 cupsArrayDelete(hi->nodes);
251 cupsArrayDelete(hi->sorted);
252 free(hi);
253 return (NULL);
254 }
ef416fc2 255
256 /*
257 * Try loading the existing index file...
258 */
259
260 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
261 {
262 /*
263 * Lock the file and then read the first line...
264 */
265
266 cupsFileLock(fp, 1);
267
f7deaa1a 268 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 269 {
270 /*
271 * Got a valid header line, now read the data lines...
272 */
273
f7deaa1a 274 node = NULL;
275
ef416fc2 276 while (cupsFileGets(fp, line, sizeof(line)))
277 {
278 /*
279 * Each line looks like one of the following:
280 *
281 * filename mtime offset length "section" "text"
282 * filename#anchor offset length "text"
f7deaa1a 283 * SP count word
ef416fc2 284 */
285
f7deaa1a 286 if (line[0] == ' ')
ef416fc2 287 {
f7deaa1a 288 /*
289 * Read a word in the current node...
290 */
ef416fc2 291
f7deaa1a 292 if (!node || (ptr = strrchr(line, ' ')) == NULL)
293 continue;
ef416fc2 294
f7deaa1a 295 if ((word = help_add_word(node, ptr + 1)) != NULL)
296 word->count = atoi(line + 1);
297 }
298 else
ef416fc2 299 {
300 /*
f7deaa1a 301 * Add a node...
ef416fc2 302 */
303
f7deaa1a 304 filename = line;
ef416fc2 305
f7deaa1a 306 if ((ptr = strchr(line, ' ')) == NULL)
307 break;
ef416fc2 308
f7deaa1a 309 while (isspace(*ptr & 255))
310 *ptr++ = '\0';
ef416fc2 311
f7deaa1a 312 if ((anchor = strrchr(filename, '#')) != NULL)
313 {
314 *anchor++ = '\0';
315 mtime = 0;
316 }
317 else
318 mtime = strtol(ptr, &ptr, 10);
ef416fc2 319
f7deaa1a 320 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 321 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 322
323 while (isspace(*ptr & 255))
324 ptr ++;
ef416fc2 325
f7deaa1a 326 if (!anchor)
327 {
328 /*
329 * Get section...
330 */
ef416fc2 331
f7deaa1a 332 if (*ptr != '\"')
333 break;
ef416fc2 334
f7deaa1a 335 ptr ++;
336 sectptr = ptr;
ef416fc2 337
f7deaa1a 338 while (*ptr && *ptr != '\"')
339 ptr ++;
340
341 if (*ptr != '\"')
342 break;
ef416fc2 343
f7deaa1a 344 *ptr++ = '\0';
ef416fc2 345
f7deaa1a 346 strlcpy(section, sectptr, sizeof(section));
ef416fc2 347
f7deaa1a 348 while (isspace(*ptr & 255))
349 ptr ++;
350 }
ecdc0628 351
f7deaa1a 352 if (*ptr != '\"')
353 break;
354
355 ptr ++;
356 text = ptr;
357
358 while (*ptr && *ptr != '\"')
359 ptr ++;
360
361 if (*ptr != '\"')
362 break;
363
364 *ptr++ = '\0';
365
366 if ((node = help_new_node(filename, anchor, section, text,
367 mtime, offset, length)) == NULL)
368 break;
369
370 node->score = -1;
371
372 cupsArrayAdd(hi->nodes, node);
373 }
ef416fc2 374 }
375 }
376
377 cupsFileClose(fp);
378 }
379
380 /*
381 * Scan for new/updated files...
382 */
383
384 update = help_load_directory(hi, directory, NULL);
385
386 /*
387 * Remove any files that are no longer installed...
388 */
389
ecdc0628 390 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
391 node;
392 node = (help_node_t *)cupsArrayNext(hi->nodes))
393 if (node->score < 0)
ef416fc2 394 {
395 /*
396 * Delete this node...
397 */
398
ecdc0628 399 cupsArrayRemove(hi->nodes, node);
400 help_delete_node(node);
ef416fc2 401 }
ef416fc2 402
403 /*
ecdc0628 404 * Add nodes to the sorted array...
ef416fc2 405 */
406
ecdc0628 407 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
408 node;
409 node = (help_node_t *)cupsArrayNext(hi->nodes))
410 cupsArrayAdd(hi->sorted, node);
ef416fc2 411
412 /*
ecdc0628 413 * Save the index if we updated it...
ef416fc2 414 */
415
ecdc0628 416 if (update)
417 helpSaveIndex(hi, hifile);
ef416fc2 418
419 /*
420 * Return the index...
421 */
422
423 return (hi);
424}
425
426
427/*
428 * 'helpSaveIndex()' - Save a help index to disk.
429 */
430
431int /* O - 0 on success, -1 on error */
432helpSaveIndex(help_index_t *hi, /* I - Index */
433 const char *hifile) /* I - Index filename */
434{
435 cups_file_t *fp; /* Index file */
ef416fc2 436 help_node_t *node; /* Current node */
f7deaa1a 437 help_word_t *word; /* Current word */
ef416fc2 438
439
85dda01c 440 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
ef416fc2 441
442 /*
443 * Try creating a new index file...
444 */
445
446 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
447 return (-1);
448
449 /*
450 * Lock the file while we write it...
451 */
452
453 cupsFileLock(fp, 1);
454
f7deaa1a 455 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 456
ecdc0628 457 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
458 node;
459 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 460 {
461 /*
462 * Write the current node with/without the anchor...
463 */
464
ef416fc2 465 if (node->anchor)
466 {
467 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
468 node->filename, node->anchor,
469 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
470 node->text) < 0)
471 break;
472 }
473 else
474 {
475 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 476 node->filename, (int)node->mtime,
ef416fc2 477 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
478 node->section ? node->section : "", node->text) < 0)
479 break;
480 }
f7deaa1a 481
482 /*
483 * Then write the words associated with the node...
484 */
485
486 for (word = (help_word_t *)cupsArrayFirst(node->words);
487 word;
488 word = (help_word_t *)cupsArrayNext(node->words))
489 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
490 break;
ef416fc2 491 }
492
ecdc0628 493 cupsFileFlush(fp);
494
ef416fc2 495 if (cupsFileClose(fp) < 0)
496 return (-1);
ecdc0628 497 else if (node)
ef416fc2 498 return (-1);
499 else
500 return (0);
501}
502
503
504/*
505 * 'helpSearchIndex()' - Search an index.
506 */
507
508help_index_t * /* O - Search index */
509helpSearchIndex(help_index_t *hi, /* I - Index */
510 const char *query, /* I - Query string */
511 const char *section, /* I - Limit search to this section */
512 const char *filename) /* I - Limit search to this file */
513{
ef416fc2 514 help_index_t *search; /* Search index */
ecdc0628 515 help_node_t *node; /* Current node */
f7deaa1a 516 help_word_t *word; /* Current word */
ef416fc2 517 void *sc; /* Search context */
518 int matches; /* Number of matches */
519
520
85dda01c
MS
521 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
522 hi, query, filename));
ef416fc2 523
524 /*
525 * Range check...
526 */
527
528 if (!hi || !query)
529 return (NULL);
530
ecdc0628 531 /*
532 * Reset the scores of all nodes to 0...
533 */
534
535 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
536 node;
537 node = (help_node_t *)cupsArrayNext(hi->nodes))
538 node->score = 0;
539
540 /*
541 * Find the first node to search in...
542 */
ef416fc2 543
544 if (filename)
545 {
ecdc0628 546 node = helpFindNode(hi, filename, NULL);
547 if (!node)
ef416fc2 548 return (NULL);
549 }
550 else
ecdc0628 551 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 552
553 /*
554 * Convert the query into a regular expression...
555 */
556
557 sc = cgiCompileSearch(query);
558 if (!sc)
559 return (NULL);
560
561 /*
562 * Allocate a search index...
563 */
564
565 search = calloc(1, sizeof(help_index_t));
566 if (!search)
567 {
568 cgiFreeSearch(sc);
569 return (NULL);
570 }
571
ecdc0628 572 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
573 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 574
ecdc0628 575 if (!search->nodes || !search->sorted)
576 {
577 cupsArrayDelete(search->nodes);
578 cupsArrayDelete(search->sorted);
579 free(search);
580 cgiFreeSearch(sc);
581 return (NULL);
582 }
583
ef416fc2 584 search->search = 1;
585
586 /*
587 * Check each node in the index, adding matching nodes to the
588 * search index...
589 */
590
ecdc0628 591 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
592 if (section && strcmp(node->section, section))
ef416fc2 593 continue;
ecdc0628 594 else if (filename && strcmp(node->filename, filename))
ef416fc2 595 continue;
f7deaa1a 596 else
ef416fc2 597 {
f7deaa1a 598 matches = cgiDoSearch(sc, node->text);
599
600 for (word = (help_word_t *)cupsArrayFirst(node->words);
601 word;
602 word = (help_word_t *)cupsArrayNext(node->words))
603 if (cgiDoSearch(sc, word->text) > 0)
604 matches += word->count;
ef416fc2 605
f7deaa1a 606 if (matches > 0)
607 {
608 /*
609 * Found a match, add the node to the search index...
610 */
ef416fc2 611
f7deaa1a 612 node->score = matches;
613
321d8d57
MS
614 cupsArrayAdd(search->nodes, node);
615 cupsArrayAdd(search->sorted, node);
f7deaa1a 616 }
ef416fc2 617 }
618
619 /*
620 * Free the search context...
621 */
622
623 cgiFreeSearch(sc);
624
ef416fc2 625 /*
626 * Return the results...
627 */
628
629 return (search);
630}
631
632
f7deaa1a 633/*
634 * 'help_add_word()' - Add a word to a node.
635 */
636
637static help_word_t * /* O - New word */
638help_add_word(help_node_t *n, /* I - Node */
639 const char *text) /* I - Word text */
640{
641 help_word_t *w, /* New word */
642 key; /* Search key */
643
644
85dda01c 645 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
f7deaa1a 646
647 /*
648 * Create the words array as needed...
649 */
650
651 if (!n->words)
652 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
653
654 /*
655 * See if the word is already added...
656 */
657
658 key.text = (char *)text;
659
660 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
661 {
662 /*
663 * Create a new word...
664 */
665
666 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
667 return (NULL);
668
669 if ((w->text = strdup(text)) == NULL)
670 {
671 free(w);
672 return (NULL);
673 }
674
675 cupsArrayAdd(n->words, w);
676 }
677
678 /*
679 * Bump the counter for this word and return it...
680 */
681
682 w->count ++;
683
684 return (w);
685}
686
687
ef416fc2 688/*
689 * 'help_delete_node()' - Free all memory used by a node.
690 */
691
692static void
693help_delete_node(help_node_t *n) /* I - Node */
694{
f7deaa1a 695 help_word_t *w; /* Current word */
696
697
85dda01c 698 DEBUG_printf(("2help_delete_node(n=%p)", n));
ef416fc2 699
700 if (!n)
701 return;
702
703 if (n->filename)
704 free(n->filename);
705
706 if (n->anchor)
707 free(n->anchor);
708
709 if (n->section)
710 free(n->section);
711
712 if (n->text)
713 free(n->text);
714
f7deaa1a 715 for (w = (help_word_t *)cupsArrayFirst(n->words);
716 w;
717 w = (help_word_t *)cupsArrayNext(n->words))
718 help_delete_word(w);
719
720 cupsArrayDelete(n->words);
721
ef416fc2 722 free(n);
723}
724
725
f7deaa1a 726/*
727 * 'help_delete_word()' - Free all memory used by a word.
728 */
729
730static void
731help_delete_word(help_word_t *w) /* I - Word */
732{
85dda01c 733 DEBUG_printf(("2help_delete_word(w=%p)", w));
f7deaa1a 734
735 if (!w)
736 return;
737
738 if (w->text)
739 free(w->text);
740
741 free(w);
742}
743
744
ef416fc2 745/*
746 * 'help_load_directory()' - Load a directory of files into an index.
747 */
748
749static int /* O - 0 = success, -1 = error, 1 = updated */
750help_load_directory(
751 help_index_t *hi, /* I - Index */
752 const char *directory, /* I - Directory */
753 const char *relative) /* I - Relative path */
754{
ef416fc2 755 cups_dir_t *dir; /* Directory file */
756 cups_dentry_t *dent; /* Directory entry */
757 char *ext, /* Pointer to extension */
758 filename[1024], /* Full filename */
759 relname[1024]; /* Relative filename */
760 int update; /* Updated? */
ecdc0628 761 help_node_t *node; /* Current node */
ef416fc2 762
763
85dda01c
MS
764 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
765 hi, directory, relative));
ef416fc2 766
767 /*
768 * Open the directory and scan it...
769 */
770
771 if ((dir = cupsDirOpen(directory)) == NULL)
772 return (0);
773
774 update = 0;
775
776 while ((dent = cupsDirRead(dir)) != NULL)
777 {
ecdc0628 778 /*
779 * Skip "." files...
780 */
781
782 if (dent->filename[0] == '.')
783 continue;
784
ef416fc2 785 /*
786 * Get absolute and relative filenames...
787 */
788
789 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
790 if (relative)
791 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
792 else
793 strlcpy(relname, dent->filename, sizeof(relname));
794
795 /*
796 * Check if we have a HTML file...
797 */
798
799 if ((ext = strstr(dent->filename, ".html")) != NULL &&
800 (!ext[5] || !strcmp(ext + 5, ".gz")))
801 {
802 /*
803 * HTML file, see if we have already indexed the file...
804 */
805
806 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
807 {
808 /*
809 * File already indexed - check dates to confirm that the
810 * index is up-to-date...
811 */
812
ecdc0628 813 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 814 {
815 /*
816 * Same modification time, so mark all of the nodes
817 * for this file as up-to-date...
818 */
819
ecdc0628 820 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
821 if (!strcmp(node->filename, relname))
822 node->score = 0;
ef416fc2 823 else
824 break;
825
826 continue;
827 }
828 }
829
830 update = 1;
831
832 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
833 }
834 else if (S_ISDIR(dent->fileinfo.st_mode))
835 {
836 /*
837 * Process sub-directory...
838 */
839
840 if (help_load_directory(hi, filename, relname) == 1)
841 update = 1;
842 }
843 }
844
845 cupsDirClose(dir);
846
847 return (update);
848}
849
850
851/*
852 * 'help_load_file()' - Load a HTML files into an index.
853 */
854
855static int /* O - 0 = success, -1 = error */
856help_load_file(
857 help_index_t *hi, /* I - Index */
858 const char *filename, /* I - Filename */
859 const char *relative, /* I - Relative path */
860 time_t mtime) /* I - Modification time */
861{
862 cups_file_t *fp; /* HTML file */
ecdc0628 863 help_node_t *node; /* Current node */
ef416fc2 864 char line[1024], /* Line from file */
f42414bf 865 temp[1024], /* Temporary word */
ef416fc2 866 section[1024], /* Section */
867 *ptr, /* Pointer into line */
868 *anchor, /* Anchor name */
869 *text; /* Text for anchor */
870 off_t offset; /* File offset */
871 char quote; /* Quote character */
f7deaa1a 872 help_word_t *word; /* Current word */
873 int wordlen; /* Length of word */
ef416fc2 874
875
85dda01c 876 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
2274af38 877 "mtime=%ld)", hi, filename, relative, (long)mtime));
ef416fc2 878
879 if ((fp = cupsFileOpen(filename, "r")) == NULL)
880 return (-1);
881
882 node = NULL;
883 offset = 0;
884
5a9febac 885 strlcpy(section, "Other", sizeof(section));
ef416fc2 886
887 while (cupsFileGets(fp, line, sizeof(line)))
888 {
889 /*
890 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
891 */
892
cfd375ad 893 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
ef416fc2 894 {
895 /*
896 * Got section line, copy it!
897 */
898
cfd375ad 899 for (ptr += 13; isspace(*ptr & 255); ptr ++);
ef416fc2 900
901 strlcpy(section, ptr, sizeof(section));
902 if ((ptr = strstr(section, "-->")) != NULL)
903 {
904 /*
905 * Strip comment stuff from end of line...
906 */
907
908 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
909
910 if (isspace(*ptr & 255))
911 *ptr = '\0';
912 }
913 continue;
914 }
915
916 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
917 {
918 ptr ++;
919
88f9aafc 920 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 921 {
922 /*
923 * Found the title...
924 */
925
926 anchor = NULL;
927 ptr += 6;
928 }
cfd375ad 929 else
ef416fc2 930 {
cfd375ad
MS
931 char *idptr; /* Pointer to ID */
932
933 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
934 ptr += 7;
935 else if ((idptr = strstr(ptr, " ID=")) != NULL)
936 ptr = idptr + 4;
937 else if ((idptr = strstr(ptr, " id=")) != NULL)
938 ptr = idptr + 4;
939 else
940 continue;
941
ef416fc2 942 /*
943 * Found an anchor...
944 */
945
ef416fc2 946 if (*ptr == '\"' || *ptr == '\'')
947 {
948 /*
949 * Get quoted anchor...
950 */
951
952 quote = *ptr;
953 anchor = ptr + 1;
954 if ((ptr = strchr(anchor, quote)) != NULL)
955 *ptr++ = '\0';
956 else
957 break;
958 }
959 else
960 {
961 /*
962 * Get unquoted anchor...
963 */
964
965 anchor = ptr + 1;
966
967 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
968
cfd375ad 969 if (*ptr != '>')
ef416fc2 970 *ptr++ = '\0';
971 else
972 break;
973 }
974
975 /*
976 * Got the anchor, now lets find the end...
977 */
978
979 while (*ptr && *ptr != '>')
980 ptr ++;
981
982 if (*ptr != '>')
983 break;
984
cfd375ad 985 *ptr++ = '\0';
ef416fc2 986 }
ef416fc2 987
988 /*
989 * Now collect text for the link...
990 */
991
992 text = ptr;
993 while ((ptr = strchr(text, '<')) == NULL)
994 {
995 ptr = text + strlen(text);
996 if (ptr >= (line + sizeof(line) - 2))
997 break;
998
999 *ptr++ = ' ';
1000
7e86f2f6 1001 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 1002 break;
1003 }
1004
1005 *ptr = '\0';
1006
1007 if (node)
7e86f2f6 1008 node->length = (size_t)(offset - node->offset);
ef416fc2 1009
1010 if (!*text)
1011 {
1012 node = NULL;
1013 break;
1014 }
1015
ecdc0628 1016 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1017 {
1018 /*
1019 * Node already in the index, so replace the text and other
1020 * data...
1021 */
1022
ecdc0628 1023 cupsArrayRemove(hi->nodes, node);
ef416fc2 1024
1025 if (node->section)
1026 free(node->section);
1027
1028 if (node->text)
1029 free(node->text);
1030
f7deaa1a 1031 if (node->words)
1032 {
1033 for (word = (help_word_t *)cupsArrayFirst(node->words);
1034 word;
1035 word = (help_word_t *)cupsArrayNext(node->words))
1036 help_delete_word(word);
1037
1038 cupsArrayDelete(node->words);
1039 node->words = NULL;
1040 }
1041
ef416fc2 1042 node->section = section[0] ? strdup(section) : NULL;
1043 node->text = strdup(text);
1044 node->mtime = mtime;
1045 node->offset = offset;
1046 node->score = 0;
1047 }
1048 else
1049 {
1050 /*
1051 * New node...
1052 */
1053
1054 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1055 }
1056
1057 /*
1058 * Go through the text value and replace tabs and newlines with
1059 * whitespace and eliminate extra whitespace...
1060 */
1061
1062 for (ptr = node->text, text = node->text; *ptr;)
1063 if (isspace(*ptr & 255))
1064 {
1065 while (isspace(*ptr & 255))
ed486911 1066 ptr ++;
ef416fc2 1067
1068 *text++ = ' ';
1069 }
1070 else if (text != ptr)
1071 *text++ = *ptr++;
1072 else
1073 {
1074 text ++;
1075 ptr ++;
1076 }
1077
1078 *text = '\0';
1079
ecdc0628 1080 /*
1081 * (Re)add the node to the array...
1082 */
1083
1084 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1085
1086 if (!anchor)
1087 node = NULL;
ef416fc2 1088 break;
1089 }
1090
f7deaa1a 1091 if (node)
1092 {
1093 /*
1094 * Scan this line for words...
1095 */
1096
1097 for (ptr = line; *ptr; ptr ++)
1098 {
1099 /*
1100 * Skip HTML stuff...
1101 */
1102
1103 if (*ptr == '<')
1104 {
1105 if (!strncmp(ptr, "<!--", 4))
1106 {
1107 /*
1108 * Skip HTML comment...
1109 */
1110
1111 if ((text = strstr(ptr + 4, "-->")) == NULL)
1112 ptr += strlen(ptr) - 1;
1113 else
1114 ptr = text + 2;
1115 }
1116 else
1117 {
1118 /*
1119 * Skip HTML element...
1120 */
1121
1122 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1123 {
f7deaa1a 1124 if (*ptr == '\"' || *ptr == '\'')
1125 {
1126 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1127
1128 if (!*ptr)
1129 ptr --;
1130 }
f42414bf 1131 }
f7deaa1a 1132
1133 if (!*ptr)
1134 ptr --;
1135 }
1136
1137 continue;
1138 }
1139 else if (*ptr == '&')
1140 {
1141 /*
1142 * Skip HTML entity...
1143 */
1144
1145 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1146
1147 if (!*ptr)
1148 ptr --;
1149
1150 continue;
1151 }
1152 else if (!isalnum(*ptr & 255))
1153 continue;
1154
1155 /*
1156 * Found the start of a word, search until we find the end...
1157 */
1158
1159 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1160
7e86f2f6 1161 wordlen = (int)(ptr - text);
f7deaa1a 1162
07623986 1163 memcpy(temp, text, (size_t)wordlen);
f42414bf 1164 temp[wordlen] = '\0';
1165
1166 ptr --;
f7deaa1a 1167
f42414bf 1168 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1169 (sizeof(help_common_words) /
1170 sizeof(help_common_words[0])),
1171 sizeof(help_common_words[0]),
1172 (int (*)(const void *, const void *))
88f9aafc 1173 _cups_strcasecmp))
f42414bf 1174 help_add_word(node, temp);
f7deaa1a 1175 }
1176 }
1177
ef416fc2 1178 /*
1179 * Get the offset of the next line...
1180 */
1181
1182 offset = cupsFileTell(fp);
1183 }
1184
1185 cupsFileClose(fp);
1186
1187 if (node)
7e86f2f6 1188 node->length = (size_t)(offset - node->offset);
ef416fc2 1189
1190 return (0);
1191}
1192
1193
1194/*
1195 * 'help_new_node()' - Create a new node and add it to an index.
1196 */
1197
1198static help_node_t * /* O - Node pointer or NULL on error */
1199help_new_node(const char *filename, /* I - Filename */
1200 const char *anchor, /* I - Anchor */
1201 const char *section, /* I - Section */
1202 const char *text, /* I - Text */
1203 time_t mtime, /* I - Modification time */
1204 off_t offset, /* I - Offset in file */
1205 size_t length) /* I - Length in bytes */
1206{
1207 help_node_t *n; /* Node */
1208
1209
85dda01c
MS
1210 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1211 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1212 (long)mtime, (long)offset, (long)length));
ef416fc2 1213
1214 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1215 if (!n)
1216 return (NULL);
1217
1218 n->filename = strdup(filename);
1219 n->anchor = anchor ? strdup(anchor) : NULL;
3bb59731 1220 n->section = *section ? strdup(section) : NULL;
ef416fc2 1221 n->text = strdup(text);
1222 n->mtime = mtime;
1223 n->offset = offset;
1224 n->length = length;
1225
1226 return (n);
1227}
1228
1229
1230/*
1231 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1232 */
1233
1234static int /* O - Difference */
ecdc0628 1235help_sort_by_name(help_node_t *n1, /* I - First node */
1236 help_node_t *n2) /* I - Second node */
ef416fc2 1237{
ef416fc2 1238 int diff; /* Difference */
1239
1240
85dda01c
MS
1241 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1242 n1, n1->filename, n1->anchor,
1243 n2, n2->filename, n2->anchor));
ef416fc2 1244
ecdc0628 1245 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1246 return (diff);
1247
ecdc0628 1248 if (!n1->anchor && !n2->anchor)
ef416fc2 1249 return (0);
ecdc0628 1250 else if (!n1->anchor)
ef416fc2 1251 return (-1);
ecdc0628 1252 else if (!n2->anchor)
ef416fc2 1253 return (1);
1254 else
ecdc0628 1255 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1256}
1257
1258
1259/*
1260 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1261 */
1262
1263static int /* O - Difference */
ecdc0628 1264help_sort_by_score(help_node_t *n1, /* I - First node */
1265 help_node_t *n2) /* I - Second node */
ef416fc2 1266{
ef416fc2 1267 int diff; /* Difference */
1268
1269
85dda01c
MS
1270 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1271 "n2=%p(%d \"%s\" \"%s\")",
1272 n1, n1->score, n1->section, n1->text,
1273 n2, n2->score, n2->section, n2->text));
ef416fc2 1274
ecdc0628 1275 if (n1->score != n2->score)
1f0275e3 1276 return (n2->score - n1->score);
ef416fc2 1277
ecdc0628 1278 if (n1->section && !n2->section)
ef416fc2 1279 return (1);
ecdc0628 1280 else if (!n1->section && n2->section)
ef416fc2 1281 return (-1);
ecdc0628 1282 else if (n1->section && n2->section &&
1283 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1284 return (diff);
1285
88f9aafc 1286 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1287}
1288
1289
1290/*
f7deaa1a 1291 * 'help_sort_words()' - Sort words alphabetically.
1292 */
1293
1294static int /* O - Difference */
1295help_sort_words(help_word_t *w1, /* I - Second word */
1296 help_word_t *w2) /* I - Second word */
1297{
85dda01c 1298 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
f7deaa1a 1299 w1, w1->text, w2, w2->text));
1300
88f9aafc 1301 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1302}