]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Move debug printfs to internal usage only.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
cfd375ad 4 * Copyright 2007-2017 by Apple Inc.
7e86f2f6 5 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 6 *
e3101897 7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
ef416fc2 8 */
9
10/*
11 * Include necessary headers...
12 */
13
14#include "cgi-private.h"
15#include <cups/dir.h>
16
17
f7deaa1a 18/*
19 * List of common English words that should not be indexed...
20 */
21
22static char help_common_words[][6] =
23 {
24 "about",
25 "all",
26 "an",
27 "and",
28 "are",
29 "as",
30 "at",
31 "be",
32 "been",
33 "but",
34 "by",
35 "call",
36 "can",
37 "come",
38 "could",
39 "day",
40 "did",
41 "do",
42 "down",
43 "each",
44 "find",
45 "first",
46 "for",
47 "from",
48 "go",
49 "had",
50 "has",
51 "have",
52 "he",
53 "her",
54 "him",
55 "his",
56 "hot",
57 "how",
58 "if",
59 "in",
60 "is",
61 "it",
62 "know",
63 "like",
64 "long",
65 "look",
66 "make",
67 "many",
68 "may",
69 "more",
70 "most",
71 "my",
72 "no",
73 "now",
74 "of",
75 "on",
76 "one",
77 "or",
78 "other",
79 "out",
80 "over",
81 "said",
82 "see",
83 "she",
84 "side",
85 "so",
86 "some",
87 "sound",
88 "than",
89 "that",
90 "the",
91 "their",
92 "them",
93 "then",
94 "there",
95 "these",
96 "they",
97 "thing",
98 "this",
99 "time",
100 "to",
101 "two",
102 "up",
103 "use",
104 "was",
105 "water",
106 "way",
107 "we",
108 "were",
109 "what",
110 "when",
111 "which",
112 "who",
113 "will",
114 "with",
115 "word",
116 "would",
117 "write",
118 "you",
119 "your"
120 };
121
122
ef416fc2 123/*
124 * Local functions...
125 */
126
f7deaa1a 127static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 128static void help_delete_node(help_node_t *n);
f7deaa1a 129static void help_delete_word(help_word_t *w);
ef416fc2 130static int help_load_directory(help_index_t *hi,
131 const char *directory,
132 const char *relative);
133static int help_load_file(help_index_t *hi,
134 const char *filename,
135 const char *relative,
136 time_t mtime);
a32af27c 137static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
ecdc0628 138static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
139static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 140static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 141
142
143/*
144 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
145 */
146
147void
ecdc0628 148helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 149{
ecdc0628 150 help_node_t *node; /* Current node */
ef416fc2 151
152
ef416fc2 153 if (!hi)
154 return;
155
ecdc0628 156 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
157 node;
158 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 159 {
ecdc0628 160 if (!hi->search)
161 help_delete_node(node);
162 }
ef416fc2 163
ecdc0628 164 cupsArrayDelete(hi->nodes);
165 cupsArrayDelete(hi->sorted);
ef416fc2 166
167 free(hi);
168}
169
170
171/*
172 * 'helpFindNode()' - Find a node in an index.
173 */
174
ecdc0628 175help_node_t * /* O - Node pointer or NULL */
ef416fc2 176helpFindNode(help_index_t *hi, /* I - Index */
177 const char *filename, /* I - Filename */
178 const char *anchor) /* I - Anchor */
179{
ecdc0628 180 help_node_t key; /* Search key */
ef416fc2 181
182
ef416fc2 183 /*
184 * Range check input...
185 */
186
187 if (!hi || !filename)
188 return (NULL);
189
190 /*
191 * Initialize the search key...
192 */
193
194 key.filename = (char *)filename;
195 key.anchor = (char *)anchor;
ef416fc2 196
197 /*
198 * Return any match...
199 */
200
ecdc0628 201 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 202}
203
204
205/*
206 * 'helpLoadIndex()' - Load a help index from disk.
207 */
208
209help_index_t * /* O - Index pointer or NULL */
210helpLoadIndex(const char *hifile, /* I - Index filename */
211 const char *directory) /* I - Directory that is indexed */
212{
213 help_index_t *hi; /* Help index */
214 cups_file_t *fp; /* Current file */
215 char line[2048], /* Line from file */
216 *ptr, /* Pointer into line */
217 *filename, /* Filename in line */
218 *anchor, /* Anchor in line */
219 *sectptr, /* Section pointer in line */
220 section[1024], /* Section name */
221 *text; /* Text in line */
222 time_t mtime; /* Modification time */
223 off_t offset; /* Offset into file */
224 size_t length; /* Length in bytes */
225 int update; /* Update? */
ef416fc2 226 help_node_t *node; /* Current node */
f7deaa1a 227 help_word_t *word; /* Current word */
ef416fc2 228
229
ef416fc2 230 /*
231 * Create a new, empty index.
232 */
233
ecdc0628 234 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
235 return (NULL);
236
237 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
238 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
239
240 if (!hi->nodes || !hi->sorted)
241 {
242 cupsArrayDelete(hi->nodes);
243 cupsArrayDelete(hi->sorted);
244 free(hi);
245 return (NULL);
246 }
ef416fc2 247
248 /*
249 * Try loading the existing index file...
250 */
251
252 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
253 {
254 /*
255 * Lock the file and then read the first line...
256 */
257
258 cupsFileLock(fp, 1);
259
f7deaa1a 260 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 261 {
262 /*
263 * Got a valid header line, now read the data lines...
264 */
265
f7deaa1a 266 node = NULL;
267
ef416fc2 268 while (cupsFileGets(fp, line, sizeof(line)))
269 {
270 /*
271 * Each line looks like one of the following:
272 *
273 * filename mtime offset length "section" "text"
274 * filename#anchor offset length "text"
f7deaa1a 275 * SP count word
ef416fc2 276 */
277
f7deaa1a 278 if (line[0] == ' ')
ef416fc2 279 {
f7deaa1a 280 /*
281 * Read a word in the current node...
282 */
ef416fc2 283
f7deaa1a 284 if (!node || (ptr = strrchr(line, ' ')) == NULL)
285 continue;
ef416fc2 286
f7deaa1a 287 if ((word = help_add_word(node, ptr + 1)) != NULL)
288 word->count = atoi(line + 1);
289 }
290 else
ef416fc2 291 {
292 /*
f7deaa1a 293 * Add a node...
ef416fc2 294 */
295
f7deaa1a 296 filename = line;
ef416fc2 297
f7deaa1a 298 if ((ptr = strchr(line, ' ')) == NULL)
299 break;
ef416fc2 300
f7deaa1a 301 while (isspace(*ptr & 255))
302 *ptr++ = '\0';
ef416fc2 303
f7deaa1a 304 if ((anchor = strrchr(filename, '#')) != NULL)
305 {
306 *anchor++ = '\0';
307 mtime = 0;
308 }
309 else
310 mtime = strtol(ptr, &ptr, 10);
ef416fc2 311
f7deaa1a 312 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 313 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 314
315 while (isspace(*ptr & 255))
316 ptr ++;
ef416fc2 317
f7deaa1a 318 if (!anchor)
319 {
320 /*
321 * Get section...
322 */
ef416fc2 323
f7deaa1a 324 if (*ptr != '\"')
325 break;
ef416fc2 326
f7deaa1a 327 ptr ++;
328 sectptr = ptr;
ef416fc2 329
f7deaa1a 330 while (*ptr && *ptr != '\"')
331 ptr ++;
332
333 if (*ptr != '\"')
334 break;
ef416fc2 335
f7deaa1a 336 *ptr++ = '\0';
ef416fc2 337
f7deaa1a 338 strlcpy(section, sectptr, sizeof(section));
ef416fc2 339
f7deaa1a 340 while (isspace(*ptr & 255))
341 ptr ++;
342 }
ecdc0628 343
f7deaa1a 344 if (*ptr != '\"')
345 break;
346
347 ptr ++;
348 text = ptr;
349
350 while (*ptr && *ptr != '\"')
351 ptr ++;
352
353 if (*ptr != '\"')
354 break;
355
356 *ptr++ = '\0';
357
358 if ((node = help_new_node(filename, anchor, section, text,
359 mtime, offset, length)) == NULL)
360 break;
361
362 node->score = -1;
363
364 cupsArrayAdd(hi->nodes, node);
365 }
ef416fc2 366 }
367 }
368
369 cupsFileClose(fp);
370 }
371
372 /*
373 * Scan for new/updated files...
374 */
375
376 update = help_load_directory(hi, directory, NULL);
377
378 /*
379 * Remove any files that are no longer installed...
380 */
381
ecdc0628 382 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
383 node;
384 node = (help_node_t *)cupsArrayNext(hi->nodes))
385 if (node->score < 0)
ef416fc2 386 {
387 /*
388 * Delete this node...
389 */
390
ecdc0628 391 cupsArrayRemove(hi->nodes, node);
392 help_delete_node(node);
ef416fc2 393 }
ef416fc2 394
395 /*
ecdc0628 396 * Add nodes to the sorted array...
ef416fc2 397 */
398
ecdc0628 399 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
400 node;
401 node = (help_node_t *)cupsArrayNext(hi->nodes))
402 cupsArrayAdd(hi->sorted, node);
ef416fc2 403
404 /*
ecdc0628 405 * Save the index if we updated it...
ef416fc2 406 */
407
ecdc0628 408 if (update)
409 helpSaveIndex(hi, hifile);
ef416fc2 410
411 /*
412 * Return the index...
413 */
414
415 return (hi);
416}
417
418
419/*
420 * 'helpSaveIndex()' - Save a help index to disk.
421 */
422
423int /* O - 0 on success, -1 on error */
424helpSaveIndex(help_index_t *hi, /* I - Index */
425 const char *hifile) /* I - Index filename */
426{
427 cups_file_t *fp; /* Index file */
ef416fc2 428 help_node_t *node; /* Current node */
f7deaa1a 429 help_word_t *word; /* Current word */
ef416fc2 430
431
ef416fc2 432 /*
433 * Try creating a new index file...
434 */
435
436 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
437 return (-1);
438
439 /*
440 * Lock the file while we write it...
441 */
442
443 cupsFileLock(fp, 1);
444
f7deaa1a 445 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 446
ecdc0628 447 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
448 node;
449 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 450 {
451 /*
452 * Write the current node with/without the anchor...
453 */
454
ef416fc2 455 if (node->anchor)
456 {
457 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
458 node->filename, node->anchor,
459 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
460 node->text) < 0)
461 break;
462 }
463 else
464 {
465 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 466 node->filename, (int)node->mtime,
ef416fc2 467 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
468 node->section ? node->section : "", node->text) < 0)
469 break;
470 }
f7deaa1a 471
472 /*
473 * Then write the words associated with the node...
474 */
475
476 for (word = (help_word_t *)cupsArrayFirst(node->words);
477 word;
478 word = (help_word_t *)cupsArrayNext(node->words))
479 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
480 break;
ef416fc2 481 }
482
ecdc0628 483 cupsFileFlush(fp);
484
ef416fc2 485 if (cupsFileClose(fp) < 0)
486 return (-1);
ecdc0628 487 else if (node)
ef416fc2 488 return (-1);
489 else
490 return (0);
491}
492
493
494/*
495 * 'helpSearchIndex()' - Search an index.
496 */
497
498help_index_t * /* O - Search index */
499helpSearchIndex(help_index_t *hi, /* I - Index */
500 const char *query, /* I - Query string */
501 const char *section, /* I - Limit search to this section */
502 const char *filename) /* I - Limit search to this file */
503{
ef416fc2 504 help_index_t *search; /* Search index */
ecdc0628 505 help_node_t *node; /* Current node */
f7deaa1a 506 help_word_t *word; /* Current word */
ef416fc2 507 void *sc; /* Search context */
508 int matches; /* Number of matches */
509
510
ef416fc2 511 /*
512 * Range check...
513 */
514
515 if (!hi || !query)
516 return (NULL);
517
ecdc0628 518 /*
519 * Reset the scores of all nodes to 0...
520 */
521
522 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
523 node;
524 node = (help_node_t *)cupsArrayNext(hi->nodes))
525 node->score = 0;
526
527 /*
528 * Find the first node to search in...
529 */
ef416fc2 530
531 if (filename)
532 {
ecdc0628 533 node = helpFindNode(hi, filename, NULL);
534 if (!node)
ef416fc2 535 return (NULL);
536 }
537 else
ecdc0628 538 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 539
540 /*
541 * Convert the query into a regular expression...
542 */
543
544 sc = cgiCompileSearch(query);
545 if (!sc)
546 return (NULL);
547
548 /*
549 * Allocate a search index...
550 */
551
552 search = calloc(1, sizeof(help_index_t));
553 if (!search)
554 {
555 cgiFreeSearch(sc);
556 return (NULL);
557 }
558
ecdc0628 559 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
560 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 561
ecdc0628 562 if (!search->nodes || !search->sorted)
563 {
564 cupsArrayDelete(search->nodes);
565 cupsArrayDelete(search->sorted);
566 free(search);
567 cgiFreeSearch(sc);
568 return (NULL);
569 }
570
ef416fc2 571 search->search = 1;
572
573 /*
574 * Check each node in the index, adding matching nodes to the
575 * search index...
576 */
577
ecdc0628 578 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
579 if (section && strcmp(node->section, section))
ef416fc2 580 continue;
ecdc0628 581 else if (filename && strcmp(node->filename, filename))
ef416fc2 582 continue;
f7deaa1a 583 else
ef416fc2 584 {
f7deaa1a 585 matches = cgiDoSearch(sc, node->text);
586
587 for (word = (help_word_t *)cupsArrayFirst(node->words);
588 word;
589 word = (help_word_t *)cupsArrayNext(node->words))
590 if (cgiDoSearch(sc, word->text) > 0)
591 matches += word->count;
ef416fc2 592
f7deaa1a 593 if (matches > 0)
594 {
595 /*
596 * Found a match, add the node to the search index...
597 */
ef416fc2 598
f7deaa1a 599 node->score = matches;
600
321d8d57
MS
601 cupsArrayAdd(search->nodes, node);
602 cupsArrayAdd(search->sorted, node);
f7deaa1a 603 }
ef416fc2 604 }
605
606 /*
607 * Free the search context...
608 */
609
610 cgiFreeSearch(sc);
611
ef416fc2 612 /*
613 * Return the results...
614 */
615
616 return (search);
617}
618
619
f7deaa1a 620/*
621 * 'help_add_word()' - Add a word to a node.
622 */
623
624static help_word_t * /* O - New word */
625help_add_word(help_node_t *n, /* I - Node */
626 const char *text) /* I - Word text */
627{
628 help_word_t *w, /* New word */
629 key; /* Search key */
630
631
f7deaa1a 632 /*
633 * Create the words array as needed...
634 */
635
636 if (!n->words)
637 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
638
639 /*
640 * See if the word is already added...
641 */
642
643 key.text = (char *)text;
644
645 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
646 {
647 /*
648 * Create a new word...
649 */
650
651 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
652 return (NULL);
653
654 if ((w->text = strdup(text)) == NULL)
655 {
656 free(w);
657 return (NULL);
658 }
659
660 cupsArrayAdd(n->words, w);
661 }
662
663 /*
664 * Bump the counter for this word and return it...
665 */
666
667 w->count ++;
668
669 return (w);
670}
671
672
ef416fc2 673/*
674 * 'help_delete_node()' - Free all memory used by a node.
675 */
676
677static void
678help_delete_node(help_node_t *n) /* I - Node */
679{
f7deaa1a 680 help_word_t *w; /* Current word */
681
682
ef416fc2 683 if (!n)
684 return;
685
686 if (n->filename)
687 free(n->filename);
688
689 if (n->anchor)
690 free(n->anchor);
691
692 if (n->section)
693 free(n->section);
694
695 if (n->text)
696 free(n->text);
697
f7deaa1a 698 for (w = (help_word_t *)cupsArrayFirst(n->words);
699 w;
700 w = (help_word_t *)cupsArrayNext(n->words))
701 help_delete_word(w);
702
703 cupsArrayDelete(n->words);
704
ef416fc2 705 free(n);
706}
707
708
f7deaa1a 709/*
710 * 'help_delete_word()' - Free all memory used by a word.
711 */
712
713static void
714help_delete_word(help_word_t *w) /* I - Word */
715{
f7deaa1a 716 if (!w)
717 return;
718
719 if (w->text)
720 free(w->text);
721
722 free(w);
723}
724
725
ef416fc2 726/*
727 * 'help_load_directory()' - Load a directory of files into an index.
728 */
729
730static int /* O - 0 = success, -1 = error, 1 = updated */
731help_load_directory(
732 help_index_t *hi, /* I - Index */
733 const char *directory, /* I - Directory */
734 const char *relative) /* I - Relative path */
735{
ef416fc2 736 cups_dir_t *dir; /* Directory file */
737 cups_dentry_t *dent; /* Directory entry */
738 char *ext, /* Pointer to extension */
739 filename[1024], /* Full filename */
740 relname[1024]; /* Relative filename */
741 int update; /* Updated? */
ecdc0628 742 help_node_t *node; /* Current node */
ef416fc2 743
744
ef416fc2 745 /*
746 * Open the directory and scan it...
747 */
748
749 if ((dir = cupsDirOpen(directory)) == NULL)
750 return (0);
751
752 update = 0;
753
754 while ((dent = cupsDirRead(dir)) != NULL)
755 {
ecdc0628 756 /*
757 * Skip "." files...
758 */
759
760 if (dent->filename[0] == '.')
761 continue;
762
ef416fc2 763 /*
764 * Get absolute and relative filenames...
765 */
766
767 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
768 if (relative)
769 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
770 else
771 strlcpy(relname, dent->filename, sizeof(relname));
772
773 /*
774 * Check if we have a HTML file...
775 */
776
777 if ((ext = strstr(dent->filename, ".html")) != NULL &&
778 (!ext[5] || !strcmp(ext + 5, ".gz")))
779 {
780 /*
781 * HTML file, see if we have already indexed the file...
782 */
783
784 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
785 {
786 /*
787 * File already indexed - check dates to confirm that the
788 * index is up-to-date...
789 */
790
ecdc0628 791 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 792 {
793 /*
794 * Same modification time, so mark all of the nodes
795 * for this file as up-to-date...
796 */
797
ecdc0628 798 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
799 if (!strcmp(node->filename, relname))
800 node->score = 0;
ef416fc2 801 else
802 break;
803
804 continue;
805 }
806 }
807
808 update = 1;
809
810 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
811 }
812 else if (S_ISDIR(dent->fileinfo.st_mode))
813 {
814 /*
815 * Process sub-directory...
816 */
817
818 if (help_load_directory(hi, filename, relname) == 1)
819 update = 1;
820 }
821 }
822
823 cupsDirClose(dir);
824
825 return (update);
826}
827
828
829/*
830 * 'help_load_file()' - Load a HTML files into an index.
831 */
832
833static int /* O - 0 = success, -1 = error */
834help_load_file(
835 help_index_t *hi, /* I - Index */
836 const char *filename, /* I - Filename */
837 const char *relative, /* I - Relative path */
838 time_t mtime) /* I - Modification time */
839{
840 cups_file_t *fp; /* HTML file */
ecdc0628 841 help_node_t *node; /* Current node */
ef416fc2 842 char line[1024], /* Line from file */
f42414bf 843 temp[1024], /* Temporary word */
ef416fc2 844 section[1024], /* Section */
845 *ptr, /* Pointer into line */
846 *anchor, /* Anchor name */
847 *text; /* Text for anchor */
848 off_t offset; /* File offset */
849 char quote; /* Quote character */
f7deaa1a 850 help_word_t *word; /* Current word */
851 int wordlen; /* Length of word */
ef416fc2 852
853
ef416fc2 854 if ((fp = cupsFileOpen(filename, "r")) == NULL)
855 return (-1);
856
857 node = NULL;
858 offset = 0;
859
5a9febac 860 strlcpy(section, "Other", sizeof(section));
ef416fc2 861
862 while (cupsFileGets(fp, line, sizeof(line)))
863 {
864 /*
865 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
866 */
867
cfd375ad 868 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
ef416fc2 869 {
870 /*
871 * Got section line, copy it!
872 */
873
cfd375ad 874 for (ptr += 13; isspace(*ptr & 255); ptr ++);
ef416fc2 875
876 strlcpy(section, ptr, sizeof(section));
877 if ((ptr = strstr(section, "-->")) != NULL)
878 {
879 /*
880 * Strip comment stuff from end of line...
881 */
882
883 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
884
885 if (isspace(*ptr & 255))
886 *ptr = '\0';
887 }
888 continue;
889 }
890
891 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
892 {
893 ptr ++;
894
88f9aafc 895 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 896 {
897 /*
898 * Found the title...
899 */
900
901 anchor = NULL;
902 ptr += 6;
903 }
cfd375ad 904 else
ef416fc2 905 {
cfd375ad
MS
906 char *idptr; /* Pointer to ID */
907
908 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
909 ptr += 7;
910 else if ((idptr = strstr(ptr, " ID=")) != NULL)
911 ptr = idptr + 4;
912 else if ((idptr = strstr(ptr, " id=")) != NULL)
913 ptr = idptr + 4;
914 else
915 continue;
916
ef416fc2 917 /*
918 * Found an anchor...
919 */
920
ef416fc2 921 if (*ptr == '\"' || *ptr == '\'')
922 {
923 /*
924 * Get quoted anchor...
925 */
926
927 quote = *ptr;
928 anchor = ptr + 1;
929 if ((ptr = strchr(anchor, quote)) != NULL)
930 *ptr++ = '\0';
931 else
932 break;
933 }
934 else
935 {
936 /*
937 * Get unquoted anchor...
938 */
939
940 anchor = ptr + 1;
941
942 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
943
cfd375ad 944 if (*ptr != '>')
ef416fc2 945 *ptr++ = '\0';
946 else
947 break;
948 }
949
950 /*
951 * Got the anchor, now lets find the end...
952 */
953
954 while (*ptr && *ptr != '>')
955 ptr ++;
956
957 if (*ptr != '>')
958 break;
959
cfd375ad 960 *ptr++ = '\0';
ef416fc2 961 }
ef416fc2 962
963 /*
964 * Now collect text for the link...
965 */
966
967 text = ptr;
968 while ((ptr = strchr(text, '<')) == NULL)
969 {
970 ptr = text + strlen(text);
971 if (ptr >= (line + sizeof(line) - 2))
972 break;
973
974 *ptr++ = ' ';
975
7e86f2f6 976 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 977 break;
978 }
979
980 *ptr = '\0';
981
982 if (node)
7e86f2f6 983 node->length = (size_t)(offset - node->offset);
ef416fc2 984
985 if (!*text)
986 {
987 node = NULL;
988 break;
989 }
990
ecdc0628 991 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 992 {
993 /*
994 * Node already in the index, so replace the text and other
995 * data...
996 */
997
ecdc0628 998 cupsArrayRemove(hi->nodes, node);
ef416fc2 999
1000 if (node->section)
1001 free(node->section);
1002
1003 if (node->text)
1004 free(node->text);
1005
f7deaa1a 1006 if (node->words)
1007 {
1008 for (word = (help_word_t *)cupsArrayFirst(node->words);
1009 word;
1010 word = (help_word_t *)cupsArrayNext(node->words))
1011 help_delete_word(word);
1012
1013 cupsArrayDelete(node->words);
1014 node->words = NULL;
1015 }
1016
ef416fc2 1017 node->section = section[0] ? strdup(section) : NULL;
1018 node->text = strdup(text);
1019 node->mtime = mtime;
1020 node->offset = offset;
1021 node->score = 0;
1022 }
1023 else
1024 {
1025 /*
1026 * New node...
1027 */
1028
1029 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1030 }
1031
1032 /*
1033 * Go through the text value and replace tabs and newlines with
1034 * whitespace and eliminate extra whitespace...
1035 */
1036
1037 for (ptr = node->text, text = node->text; *ptr;)
1038 if (isspace(*ptr & 255))
1039 {
1040 while (isspace(*ptr & 255))
ed486911 1041 ptr ++;
ef416fc2 1042
1043 *text++ = ' ';
1044 }
1045 else if (text != ptr)
1046 *text++ = *ptr++;
1047 else
1048 {
1049 text ++;
1050 ptr ++;
1051 }
1052
1053 *text = '\0';
1054
ecdc0628 1055 /*
1056 * (Re)add the node to the array...
1057 */
1058
1059 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1060
1061 if (!anchor)
1062 node = NULL;
ef416fc2 1063 break;
1064 }
1065
f7deaa1a 1066 if (node)
1067 {
1068 /*
1069 * Scan this line for words...
1070 */
1071
1072 for (ptr = line; *ptr; ptr ++)
1073 {
1074 /*
1075 * Skip HTML stuff...
1076 */
1077
1078 if (*ptr == '<')
1079 {
1080 if (!strncmp(ptr, "<!--", 4))
1081 {
1082 /*
1083 * Skip HTML comment...
1084 */
1085
1086 if ((text = strstr(ptr + 4, "-->")) == NULL)
1087 ptr += strlen(ptr) - 1;
1088 else
1089 ptr = text + 2;
1090 }
1091 else
1092 {
1093 /*
1094 * Skip HTML element...
1095 */
1096
1097 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1098 {
f7deaa1a 1099 if (*ptr == '\"' || *ptr == '\'')
1100 {
1101 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1102
1103 if (!*ptr)
1104 ptr --;
1105 }
f42414bf 1106 }
f7deaa1a 1107
1108 if (!*ptr)
1109 ptr --;
1110 }
1111
1112 continue;
1113 }
1114 else if (*ptr == '&')
1115 {
1116 /*
1117 * Skip HTML entity...
1118 */
1119
1120 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1121
1122 if (!*ptr)
1123 ptr --;
1124
1125 continue;
1126 }
1127 else if (!isalnum(*ptr & 255))
1128 continue;
1129
1130 /*
1131 * Found the start of a word, search until we find the end...
1132 */
1133
1134 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1135
7e86f2f6 1136 wordlen = (int)(ptr - text);
f7deaa1a 1137
07623986 1138 memcpy(temp, text, (size_t)wordlen);
f42414bf 1139 temp[wordlen] = '\0';
1140
1141 ptr --;
f7deaa1a 1142
f42414bf 1143 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1144 (sizeof(help_common_words) /
1145 sizeof(help_common_words[0])),
1146 sizeof(help_common_words[0]),
1147 (int (*)(const void *, const void *))
88f9aafc 1148 _cups_strcasecmp))
f42414bf 1149 help_add_word(node, temp);
f7deaa1a 1150 }
1151 }
1152
ef416fc2 1153 /*
1154 * Get the offset of the next line...
1155 */
1156
1157 offset = cupsFileTell(fp);
1158 }
1159
1160 cupsFileClose(fp);
1161
1162 if (node)
7e86f2f6 1163 node->length = (size_t)(offset - node->offset);
ef416fc2 1164
1165 return (0);
1166}
1167
1168
1169/*
1170 * 'help_new_node()' - Create a new node and add it to an index.
1171 */
1172
1173static help_node_t * /* O - Node pointer or NULL on error */
1174help_new_node(const char *filename, /* I - Filename */
1175 const char *anchor, /* I - Anchor */
1176 const char *section, /* I - Section */
1177 const char *text, /* I - Text */
1178 time_t mtime, /* I - Modification time */
1179 off_t offset, /* I - Offset in file */
1180 size_t length) /* I - Length in bytes */
1181{
1182 help_node_t *n; /* Node */
1183
1184
ef416fc2 1185 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1186 if (!n)
1187 return (NULL);
1188
1189 n->filename = strdup(filename);
1190 n->anchor = anchor ? strdup(anchor) : NULL;
3bb59731 1191 n->section = *section ? strdup(section) : NULL;
ef416fc2 1192 n->text = strdup(text);
1193 n->mtime = mtime;
1194 n->offset = offset;
1195 n->length = length;
1196
1197 return (n);
1198}
1199
1200
1201/*
1202 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1203 */
1204
1205static int /* O - Difference */
ecdc0628 1206help_sort_by_name(help_node_t *n1, /* I - First node */
1207 help_node_t *n2) /* I - Second node */
ef416fc2 1208{
ef416fc2 1209 int diff; /* Difference */
1210
1211
ecdc0628 1212 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1213 return (diff);
1214
ecdc0628 1215 if (!n1->anchor && !n2->anchor)
ef416fc2 1216 return (0);
ecdc0628 1217 else if (!n1->anchor)
ef416fc2 1218 return (-1);
ecdc0628 1219 else if (!n2->anchor)
ef416fc2 1220 return (1);
1221 else
ecdc0628 1222 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1223}
1224
1225
1226/*
1227 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1228 */
1229
1230static int /* O - Difference */
ecdc0628 1231help_sort_by_score(help_node_t *n1, /* I - First node */
1232 help_node_t *n2) /* I - Second node */
ef416fc2 1233{
ef416fc2 1234 int diff; /* Difference */
1235
1236
ecdc0628 1237 if (n1->score != n2->score)
1f0275e3 1238 return (n2->score - n1->score);
ef416fc2 1239
ecdc0628 1240 if (n1->section && !n2->section)
ef416fc2 1241 return (1);
ecdc0628 1242 else if (!n1->section && n2->section)
ef416fc2 1243 return (-1);
ecdc0628 1244 else if (n1->section && n2->section &&
1245 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1246 return (diff);
1247
88f9aafc 1248 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1249}
1250
1251
1252/*
f7deaa1a 1253 * 'help_sort_words()' - Sort words alphabetically.
1254 */
1255
1256static int /* O - Difference */
1257help_sort_words(help_word_t *w1, /* I - Second word */
1258 help_word_t *w2) /* I - Second word */
1259{
88f9aafc 1260 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1261}