]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Update ipp documentation to reflect the behavior of configuring WiFi on IPP USB printers.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
507c4adc
MS
4 * Copyright © 2007-2019 by Apple Inc.
5 * Copyright © 1997-2007 by Easy Software Products.
ef416fc2 6 *
507c4adc
MS
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more
8 * information.
ef416fc2 9 */
10
11/*
12 * Include necessary headers...
13 */
14
15#include "cgi-private.h"
16#include <cups/dir.h>
17
18
f7deaa1a 19/*
20 * List of common English words that should not be indexed...
21 */
22
23static char help_common_words[][6] =
24 {
25 "about",
26 "all",
27 "an",
28 "and",
29 "are",
30 "as",
31 "at",
32 "be",
33 "been",
34 "but",
35 "by",
36 "call",
37 "can",
38 "come",
39 "could",
40 "day",
41 "did",
42 "do",
43 "down",
44 "each",
45 "find",
46 "first",
47 "for",
48 "from",
49 "go",
50 "had",
51 "has",
52 "have",
53 "he",
54 "her",
55 "him",
56 "his",
57 "hot",
58 "how",
59 "if",
60 "in",
61 "is",
62 "it",
63 "know",
64 "like",
65 "long",
66 "look",
67 "make",
68 "many",
69 "may",
70 "more",
71 "most",
72 "my",
73 "no",
74 "now",
75 "of",
76 "on",
77 "one",
78 "or",
79 "other",
80 "out",
81 "over",
82 "said",
83 "see",
84 "she",
85 "side",
86 "so",
87 "some",
88 "sound",
89 "than",
90 "that",
91 "the",
92 "their",
93 "them",
94 "then",
95 "there",
96 "these",
97 "they",
98 "thing",
99 "this",
100 "time",
101 "to",
102 "two",
103 "up",
104 "use",
105 "was",
106 "water",
107 "way",
108 "we",
109 "were",
110 "what",
111 "when",
112 "which",
113 "who",
114 "will",
115 "with",
116 "word",
117 "would",
118 "write",
119 "you",
120 "your"
121 };
122
123
ef416fc2 124/*
125 * Local functions...
126 */
127
f7deaa1a 128static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 129static void help_delete_node(help_node_t *n);
f7deaa1a 130static void help_delete_word(help_word_t *w);
ef416fc2 131static int help_load_directory(help_index_t *hi,
132 const char *directory,
133 const char *relative);
134static int help_load_file(help_index_t *hi,
135 const char *filename,
136 const char *relative,
137 time_t mtime);
a32af27c 138static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
ecdc0628 139static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
140static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 141static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 142
143
144/*
145 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
146 */
147
148void
ecdc0628 149helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 150{
ecdc0628 151 help_node_t *node; /* Current node */
ef416fc2 152
153
ef416fc2 154 if (!hi)
155 return;
156
ecdc0628 157 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
158 node;
159 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 160 {
ecdc0628 161 if (!hi->search)
162 help_delete_node(node);
163 }
ef416fc2 164
ecdc0628 165 cupsArrayDelete(hi->nodes);
166 cupsArrayDelete(hi->sorted);
ef416fc2 167
168 free(hi);
169}
170
171
172/*
173 * 'helpFindNode()' - Find a node in an index.
174 */
175
ecdc0628 176help_node_t * /* O - Node pointer or NULL */
ef416fc2 177helpFindNode(help_index_t *hi, /* I - Index */
178 const char *filename, /* I - Filename */
179 const char *anchor) /* I - Anchor */
180{
ecdc0628 181 help_node_t key; /* Search key */
ef416fc2 182
183
ef416fc2 184 /*
185 * Range check input...
186 */
187
188 if (!hi || !filename)
189 return (NULL);
190
191 /*
192 * Initialize the search key...
193 */
194
195 key.filename = (char *)filename;
196 key.anchor = (char *)anchor;
ef416fc2 197
198 /*
199 * Return any match...
200 */
201
ecdc0628 202 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 203}
204
205
206/*
207 * 'helpLoadIndex()' - Load a help index from disk.
208 */
209
210help_index_t * /* O - Index pointer or NULL */
211helpLoadIndex(const char *hifile, /* I - Index filename */
212 const char *directory) /* I - Directory that is indexed */
213{
214 help_index_t *hi; /* Help index */
215 cups_file_t *fp; /* Current file */
216 char line[2048], /* Line from file */
217 *ptr, /* Pointer into line */
218 *filename, /* Filename in line */
219 *anchor, /* Anchor in line */
220 *sectptr, /* Section pointer in line */
221 section[1024], /* Section name */
222 *text; /* Text in line */
223 time_t mtime; /* Modification time */
224 off_t offset; /* Offset into file */
225 size_t length; /* Length in bytes */
226 int update; /* Update? */
ef416fc2 227 help_node_t *node; /* Current node */
f7deaa1a 228 help_word_t *word; /* Current word */
ef416fc2 229
230
ef416fc2 231 /*
232 * Create a new, empty index.
233 */
234
ecdc0628 235 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
236 return (NULL);
237
238 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
239 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
240
241 if (!hi->nodes || !hi->sorted)
242 {
243 cupsArrayDelete(hi->nodes);
244 cupsArrayDelete(hi->sorted);
245 free(hi);
246 return (NULL);
247 }
ef416fc2 248
249 /*
250 * Try loading the existing index file...
251 */
252
253 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
254 {
255 /*
256 * Lock the file and then read the first line...
257 */
258
259 cupsFileLock(fp, 1);
260
f7deaa1a 261 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 262 {
263 /*
264 * Got a valid header line, now read the data lines...
265 */
266
f7deaa1a 267 node = NULL;
268
ef416fc2 269 while (cupsFileGets(fp, line, sizeof(line)))
270 {
271 /*
272 * Each line looks like one of the following:
273 *
274 * filename mtime offset length "section" "text"
275 * filename#anchor offset length "text"
f7deaa1a 276 * SP count word
ef416fc2 277 */
278
f7deaa1a 279 if (line[0] == ' ')
ef416fc2 280 {
f7deaa1a 281 /*
282 * Read a word in the current node...
283 */
ef416fc2 284
f7deaa1a 285 if (!node || (ptr = strrchr(line, ' ')) == NULL)
286 continue;
ef416fc2 287
f7deaa1a 288 if ((word = help_add_word(node, ptr + 1)) != NULL)
289 word->count = atoi(line + 1);
290 }
291 else
ef416fc2 292 {
293 /*
f7deaa1a 294 * Add a node...
ef416fc2 295 */
296
f7deaa1a 297 filename = line;
ef416fc2 298
f7deaa1a 299 if ((ptr = strchr(line, ' ')) == NULL)
300 break;
ef416fc2 301
f7deaa1a 302 while (isspace(*ptr & 255))
303 *ptr++ = '\0';
ef416fc2 304
f7deaa1a 305 if ((anchor = strrchr(filename, '#')) != NULL)
306 {
307 *anchor++ = '\0';
308 mtime = 0;
309 }
310 else
311 mtime = strtol(ptr, &ptr, 10);
ef416fc2 312
f7deaa1a 313 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 314 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 315
316 while (isspace(*ptr & 255))
317 ptr ++;
ef416fc2 318
f7deaa1a 319 if (!anchor)
320 {
321 /*
322 * Get section...
323 */
ef416fc2 324
f7deaa1a 325 if (*ptr != '\"')
326 break;
ef416fc2 327
f7deaa1a 328 ptr ++;
329 sectptr = ptr;
ef416fc2 330
f7deaa1a 331 while (*ptr && *ptr != '\"')
332 ptr ++;
333
334 if (*ptr != '\"')
335 break;
ef416fc2 336
f7deaa1a 337 *ptr++ = '\0';
ef416fc2 338
f7deaa1a 339 strlcpy(section, sectptr, sizeof(section));
ef416fc2 340
f7deaa1a 341 while (isspace(*ptr & 255))
342 ptr ++;
343 }
507c4adc
MS
344 else
345 section[0] = '\0';
ecdc0628 346
f7deaa1a 347 if (*ptr != '\"')
348 break;
349
350 ptr ++;
351 text = ptr;
352
353 while (*ptr && *ptr != '\"')
354 ptr ++;
355
356 if (*ptr != '\"')
357 break;
358
359 *ptr++ = '\0';
360
361 if ((node = help_new_node(filename, anchor, section, text,
362 mtime, offset, length)) == NULL)
363 break;
364
365 node->score = -1;
366
367 cupsArrayAdd(hi->nodes, node);
368 }
ef416fc2 369 }
370 }
371
372 cupsFileClose(fp);
373 }
374
375 /*
376 * Scan for new/updated files...
377 */
378
379 update = help_load_directory(hi, directory, NULL);
380
381 /*
382 * Remove any files that are no longer installed...
383 */
384
ecdc0628 385 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
386 node;
387 node = (help_node_t *)cupsArrayNext(hi->nodes))
388 if (node->score < 0)
ef416fc2 389 {
390 /*
391 * Delete this node...
392 */
393
ecdc0628 394 cupsArrayRemove(hi->nodes, node);
395 help_delete_node(node);
ef416fc2 396 }
ef416fc2 397
398 /*
ecdc0628 399 * Add nodes to the sorted array...
ef416fc2 400 */
401
ecdc0628 402 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
403 node;
404 node = (help_node_t *)cupsArrayNext(hi->nodes))
405 cupsArrayAdd(hi->sorted, node);
ef416fc2 406
407 /*
ecdc0628 408 * Save the index if we updated it...
ef416fc2 409 */
410
ecdc0628 411 if (update)
412 helpSaveIndex(hi, hifile);
ef416fc2 413
414 /*
415 * Return the index...
416 */
417
418 return (hi);
419}
420
421
422/*
423 * 'helpSaveIndex()' - Save a help index to disk.
424 */
425
426int /* O - 0 on success, -1 on error */
427helpSaveIndex(help_index_t *hi, /* I - Index */
428 const char *hifile) /* I - Index filename */
429{
430 cups_file_t *fp; /* Index file */
ef416fc2 431 help_node_t *node; /* Current node */
f7deaa1a 432 help_word_t *word; /* Current word */
ef416fc2 433
434
ef416fc2 435 /*
436 * Try creating a new index file...
437 */
438
439 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
440 return (-1);
441
442 /*
443 * Lock the file while we write it...
444 */
445
446 cupsFileLock(fp, 1);
447
f7deaa1a 448 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 449
ecdc0628 450 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
451 node;
452 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 453 {
454 /*
455 * Write the current node with/without the anchor...
456 */
457
ef416fc2 458 if (node->anchor)
459 {
460 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
461 node->filename, node->anchor,
462 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
463 node->text) < 0)
464 break;
465 }
466 else
467 {
468 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 469 node->filename, (int)node->mtime,
ef416fc2 470 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
471 node->section ? node->section : "", node->text) < 0)
472 break;
473 }
f7deaa1a 474
475 /*
476 * Then write the words associated with the node...
477 */
478
479 for (word = (help_word_t *)cupsArrayFirst(node->words);
480 word;
481 word = (help_word_t *)cupsArrayNext(node->words))
482 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
483 break;
ef416fc2 484 }
485
ecdc0628 486 cupsFileFlush(fp);
487
ef416fc2 488 if (cupsFileClose(fp) < 0)
489 return (-1);
ecdc0628 490 else if (node)
ef416fc2 491 return (-1);
492 else
493 return (0);
494}
495
496
497/*
498 * 'helpSearchIndex()' - Search an index.
499 */
500
501help_index_t * /* O - Search index */
502helpSearchIndex(help_index_t *hi, /* I - Index */
503 const char *query, /* I - Query string */
504 const char *section, /* I - Limit search to this section */
505 const char *filename) /* I - Limit search to this file */
506{
ef416fc2 507 help_index_t *search; /* Search index */
ecdc0628 508 help_node_t *node; /* Current node */
f7deaa1a 509 help_word_t *word; /* Current word */
ef416fc2 510 void *sc; /* Search context */
511 int matches; /* Number of matches */
512
513
ef416fc2 514 /*
515 * Range check...
516 */
517
518 if (!hi || !query)
519 return (NULL);
520
ecdc0628 521 /*
522 * Reset the scores of all nodes to 0...
523 */
524
525 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
526 node;
527 node = (help_node_t *)cupsArrayNext(hi->nodes))
528 node->score = 0;
529
530 /*
531 * Find the first node to search in...
532 */
ef416fc2 533
534 if (filename)
535 {
ecdc0628 536 node = helpFindNode(hi, filename, NULL);
537 if (!node)
ef416fc2 538 return (NULL);
539 }
540 else
ecdc0628 541 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 542
543 /*
544 * Convert the query into a regular expression...
545 */
546
547 sc = cgiCompileSearch(query);
548 if (!sc)
549 return (NULL);
550
551 /*
552 * Allocate a search index...
553 */
554
555 search = calloc(1, sizeof(help_index_t));
556 if (!search)
557 {
558 cgiFreeSearch(sc);
559 return (NULL);
560 }
561
ecdc0628 562 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
563 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 564
ecdc0628 565 if (!search->nodes || !search->sorted)
566 {
567 cupsArrayDelete(search->nodes);
568 cupsArrayDelete(search->sorted);
569 free(search);
570 cgiFreeSearch(sc);
571 return (NULL);
572 }
573
ef416fc2 574 search->search = 1;
575
576 /*
577 * Check each node in the index, adding matching nodes to the
578 * search index...
579 */
580
ecdc0628 581 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
064e50fb 582 if (node->section && section && strcmp(node->section, section))
ef416fc2 583 continue;
ecdc0628 584 else if (filename && strcmp(node->filename, filename))
ef416fc2 585 continue;
f7deaa1a 586 else
ef416fc2 587 {
f7deaa1a 588 matches = cgiDoSearch(sc, node->text);
589
590 for (word = (help_word_t *)cupsArrayFirst(node->words);
591 word;
592 word = (help_word_t *)cupsArrayNext(node->words))
593 if (cgiDoSearch(sc, word->text) > 0)
594 matches += word->count;
ef416fc2 595
f7deaa1a 596 if (matches > 0)
597 {
598 /*
599 * Found a match, add the node to the search index...
600 */
ef416fc2 601
f7deaa1a 602 node->score = matches;
603
321d8d57
MS
604 cupsArrayAdd(search->nodes, node);
605 cupsArrayAdd(search->sorted, node);
f7deaa1a 606 }
ef416fc2 607 }
608
609 /*
610 * Free the search context...
611 */
612
613 cgiFreeSearch(sc);
614
ef416fc2 615 /*
616 * Return the results...
617 */
618
619 return (search);
620}
621
622
f7deaa1a 623/*
624 * 'help_add_word()' - Add a word to a node.
625 */
626
627static help_word_t * /* O - New word */
628help_add_word(help_node_t *n, /* I - Node */
629 const char *text) /* I - Word text */
630{
631 help_word_t *w, /* New word */
632 key; /* Search key */
633
634
f7deaa1a 635 /*
636 * Create the words array as needed...
637 */
638
639 if (!n->words)
640 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
641
642 /*
643 * See if the word is already added...
644 */
645
646 key.text = (char *)text;
647
648 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
649 {
650 /*
651 * Create a new word...
652 */
653
654 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
655 return (NULL);
656
657 if ((w->text = strdup(text)) == NULL)
658 {
659 free(w);
660 return (NULL);
661 }
662
663 cupsArrayAdd(n->words, w);
664 }
665
666 /*
667 * Bump the counter for this word and return it...
668 */
669
670 w->count ++;
671
672 return (w);
673}
674
675
ef416fc2 676/*
677 * 'help_delete_node()' - Free all memory used by a node.
678 */
679
680static void
681help_delete_node(help_node_t *n) /* I - Node */
682{
f7deaa1a 683 help_word_t *w; /* Current word */
684
685
ef416fc2 686 if (!n)
687 return;
688
689 if (n->filename)
690 free(n->filename);
691
692 if (n->anchor)
693 free(n->anchor);
694
695 if (n->section)
696 free(n->section);
697
698 if (n->text)
699 free(n->text);
700
f7deaa1a 701 for (w = (help_word_t *)cupsArrayFirst(n->words);
702 w;
703 w = (help_word_t *)cupsArrayNext(n->words))
704 help_delete_word(w);
705
706 cupsArrayDelete(n->words);
707
ef416fc2 708 free(n);
709}
710
711
f7deaa1a 712/*
713 * 'help_delete_word()' - Free all memory used by a word.
714 */
715
716static void
717help_delete_word(help_word_t *w) /* I - Word */
718{
f7deaa1a 719 if (!w)
720 return;
721
722 if (w->text)
723 free(w->text);
724
725 free(w);
726}
727
728
ef416fc2 729/*
730 * 'help_load_directory()' - Load a directory of files into an index.
731 */
732
733static int /* O - 0 = success, -1 = error, 1 = updated */
734help_load_directory(
735 help_index_t *hi, /* I - Index */
736 const char *directory, /* I - Directory */
737 const char *relative) /* I - Relative path */
738{
ef416fc2 739 cups_dir_t *dir; /* Directory file */
740 cups_dentry_t *dent; /* Directory entry */
741 char *ext, /* Pointer to extension */
742 filename[1024], /* Full filename */
743 relname[1024]; /* Relative filename */
744 int update; /* Updated? */
ecdc0628 745 help_node_t *node; /* Current node */
ef416fc2 746
747
ef416fc2 748 /*
749 * Open the directory and scan it...
750 */
751
752 if ((dir = cupsDirOpen(directory)) == NULL)
753 return (0);
754
755 update = 0;
756
757 while ((dent = cupsDirRead(dir)) != NULL)
758 {
ecdc0628 759 /*
760 * Skip "." files...
761 */
762
763 if (dent->filename[0] == '.')
764 continue;
765
ef416fc2 766 /*
767 * Get absolute and relative filenames...
768 */
769
770 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
771 if (relative)
772 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
773 else
774 strlcpy(relname, dent->filename, sizeof(relname));
775
776 /*
777 * Check if we have a HTML file...
778 */
779
780 if ((ext = strstr(dent->filename, ".html")) != NULL &&
781 (!ext[5] || !strcmp(ext + 5, ".gz")))
782 {
783 /*
784 * HTML file, see if we have already indexed the file...
785 */
786
787 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
788 {
789 /*
790 * File already indexed - check dates to confirm that the
791 * index is up-to-date...
792 */
793
ecdc0628 794 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 795 {
796 /*
797 * Same modification time, so mark all of the nodes
798 * for this file as up-to-date...
799 */
800
ecdc0628 801 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
802 if (!strcmp(node->filename, relname))
803 node->score = 0;
ef416fc2 804 else
805 break;
806
807 continue;
808 }
809 }
810
811 update = 1;
812
813 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
814 }
815 else if (S_ISDIR(dent->fileinfo.st_mode))
816 {
817 /*
818 * Process sub-directory...
819 */
820
821 if (help_load_directory(hi, filename, relname) == 1)
822 update = 1;
823 }
824 }
825
826 cupsDirClose(dir);
827
828 return (update);
829}
830
831
832/*
833 * 'help_load_file()' - Load a HTML files into an index.
834 */
835
836static int /* O - 0 = success, -1 = error */
837help_load_file(
838 help_index_t *hi, /* I - Index */
839 const char *filename, /* I - Filename */
840 const char *relative, /* I - Relative path */
841 time_t mtime) /* I - Modification time */
842{
843 cups_file_t *fp; /* HTML file */
ecdc0628 844 help_node_t *node; /* Current node */
ef416fc2 845 char line[1024], /* Line from file */
f42414bf 846 temp[1024], /* Temporary word */
ef416fc2 847 section[1024], /* Section */
848 *ptr, /* Pointer into line */
849 *anchor, /* Anchor name */
850 *text; /* Text for anchor */
851 off_t offset; /* File offset */
852 char quote; /* Quote character */
f7deaa1a 853 help_word_t *word; /* Current word */
854 int wordlen; /* Length of word */
ef416fc2 855
856
ef416fc2 857 if ((fp = cupsFileOpen(filename, "r")) == NULL)
858 return (-1);
859
860 node = NULL;
861 offset = 0;
862
5a9febac 863 strlcpy(section, "Other", sizeof(section));
ef416fc2 864
865 while (cupsFileGets(fp, line, sizeof(line)))
866 {
867 /*
868 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
869 */
870
cfd375ad 871 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
ef416fc2 872 {
873 /*
874 * Got section line, copy it!
875 */
876
cfd375ad 877 for (ptr += 13; isspace(*ptr & 255); ptr ++);
ef416fc2 878
879 strlcpy(section, ptr, sizeof(section));
880 if ((ptr = strstr(section, "-->")) != NULL)
881 {
882 /*
883 * Strip comment stuff from end of line...
884 */
885
886 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
887
888 if (isspace(*ptr & 255))
889 *ptr = '\0';
890 }
891 continue;
892 }
893
894 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
895 {
896 ptr ++;
897
88f9aafc 898 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 899 {
900 /*
901 * Found the title...
902 */
903
904 anchor = NULL;
905 ptr += 6;
906 }
cfd375ad 907 else
ef416fc2 908 {
cfd375ad
MS
909 char *idptr; /* Pointer to ID */
910
911 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
912 ptr += 7;
913 else if ((idptr = strstr(ptr, " ID=")) != NULL)
914 ptr = idptr + 4;
915 else if ((idptr = strstr(ptr, " id=")) != NULL)
916 ptr = idptr + 4;
917 else
918 continue;
919
ef416fc2 920 /*
921 * Found an anchor...
922 */
923
ef416fc2 924 if (*ptr == '\"' || *ptr == '\'')
925 {
926 /*
927 * Get quoted anchor...
928 */
929
930 quote = *ptr;
931 anchor = ptr + 1;
932 if ((ptr = strchr(anchor, quote)) != NULL)
933 *ptr++ = '\0';
934 else
935 break;
936 }
937 else
938 {
939 /*
940 * Get unquoted anchor...
941 */
942
943 anchor = ptr + 1;
944
945 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
946
cfd375ad 947 if (*ptr != '>')
ef416fc2 948 *ptr++ = '\0';
949 else
950 break;
951 }
952
953 /*
954 * Got the anchor, now lets find the end...
955 */
956
957 while (*ptr && *ptr != '>')
958 ptr ++;
959
960 if (*ptr != '>')
961 break;
962
cfd375ad 963 *ptr++ = '\0';
ef416fc2 964 }
ef416fc2 965
966 /*
967 * Now collect text for the link...
968 */
969
970 text = ptr;
971 while ((ptr = strchr(text, '<')) == NULL)
972 {
973 ptr = text + strlen(text);
974 if (ptr >= (line + sizeof(line) - 2))
975 break;
976
977 *ptr++ = ' ';
978
7e86f2f6 979 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 980 break;
981 }
982
983 *ptr = '\0';
984
985 if (node)
7e86f2f6 986 node->length = (size_t)(offset - node->offset);
ef416fc2 987
988 if (!*text)
989 {
990 node = NULL;
991 break;
992 }
993
ecdc0628 994 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 995 {
996 /*
997 * Node already in the index, so replace the text and other
998 * data...
999 */
1000
ecdc0628 1001 cupsArrayRemove(hi->nodes, node);
ef416fc2 1002
1003 if (node->section)
1004 free(node->section);
1005
1006 if (node->text)
1007 free(node->text);
1008
f7deaa1a 1009 if (node->words)
1010 {
1011 for (word = (help_word_t *)cupsArrayFirst(node->words);
1012 word;
1013 word = (help_word_t *)cupsArrayNext(node->words))
1014 help_delete_word(word);
1015
1016 cupsArrayDelete(node->words);
1017 node->words = NULL;
1018 }
1019
ef416fc2 1020 node->section = section[0] ? strdup(section) : NULL;
1021 node->text = strdup(text);
1022 node->mtime = mtime;
1023 node->offset = offset;
1024 node->score = 0;
1025 }
1026 else
1027 {
1028 /*
1029 * New node...
1030 */
1031
1032 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1033 }
1034
1035 /*
1036 * Go through the text value and replace tabs and newlines with
1037 * whitespace and eliminate extra whitespace...
1038 */
1039
1040 for (ptr = node->text, text = node->text; *ptr;)
1041 if (isspace(*ptr & 255))
1042 {
1043 while (isspace(*ptr & 255))
ed486911 1044 ptr ++;
ef416fc2 1045
1046 *text++ = ' ';
1047 }
1048 else if (text != ptr)
1049 *text++ = *ptr++;
1050 else
1051 {
1052 text ++;
1053 ptr ++;
1054 }
1055
1056 *text = '\0';
1057
ecdc0628 1058 /*
1059 * (Re)add the node to the array...
1060 */
1061
1062 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1063
1064 if (!anchor)
1065 node = NULL;
ef416fc2 1066 break;
1067 }
1068
f7deaa1a 1069 if (node)
1070 {
1071 /*
1072 * Scan this line for words...
1073 */
1074
1075 for (ptr = line; *ptr; ptr ++)
1076 {
1077 /*
1078 * Skip HTML stuff...
1079 */
1080
1081 if (*ptr == '<')
1082 {
1083 if (!strncmp(ptr, "<!--", 4))
1084 {
1085 /*
1086 * Skip HTML comment...
1087 */
1088
1089 if ((text = strstr(ptr + 4, "-->")) == NULL)
1090 ptr += strlen(ptr) - 1;
1091 else
1092 ptr = text + 2;
1093 }
1094 else
1095 {
1096 /*
1097 * Skip HTML element...
1098 */
1099
1100 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1101 {
f7deaa1a 1102 if (*ptr == '\"' || *ptr == '\'')
1103 {
1104 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1105
1106 if (!*ptr)
1107 ptr --;
1108 }
f42414bf 1109 }
f7deaa1a 1110
1111 if (!*ptr)
1112 ptr --;
1113 }
1114
1115 continue;
1116 }
1117 else if (*ptr == '&')
1118 {
1119 /*
1120 * Skip HTML entity...
1121 */
1122
1123 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1124
1125 if (!*ptr)
1126 ptr --;
1127
1128 continue;
1129 }
1130 else if (!isalnum(*ptr & 255))
1131 continue;
1132
1133 /*
1134 * Found the start of a word, search until we find the end...
1135 */
1136
1137 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1138
7e86f2f6 1139 wordlen = (int)(ptr - text);
f7deaa1a 1140
07623986 1141 memcpy(temp, text, (size_t)wordlen);
f42414bf 1142 temp[wordlen] = '\0';
1143
1144 ptr --;
f7deaa1a 1145
f42414bf 1146 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1147 (sizeof(help_common_words) /
1148 sizeof(help_common_words[0])),
1149 sizeof(help_common_words[0]),
1150 (int (*)(const void *, const void *))
88f9aafc 1151 _cups_strcasecmp))
f42414bf 1152 help_add_word(node, temp);
f7deaa1a 1153 }
1154 }
1155
ef416fc2 1156 /*
1157 * Get the offset of the next line...
1158 */
1159
1160 offset = cupsFileTell(fp);
1161 }
1162
1163 cupsFileClose(fp);
1164
1165 if (node)
7e86f2f6 1166 node->length = (size_t)(offset - node->offset);
ef416fc2 1167
1168 return (0);
1169}
1170
1171
1172/*
1173 * 'help_new_node()' - Create a new node and add it to an index.
1174 */
1175
1176static help_node_t * /* O - Node pointer or NULL on error */
1177help_new_node(const char *filename, /* I - Filename */
1178 const char *anchor, /* I - Anchor */
1179 const char *section, /* I - Section */
1180 const char *text, /* I - Text */
1181 time_t mtime, /* I - Modification time */
1182 off_t offset, /* I - Offset in file */
1183 size_t length) /* I - Length in bytes */
1184{
1185 help_node_t *n; /* Node */
1186
1187
ef416fc2 1188 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1189 if (!n)
1190 return (NULL);
1191
1192 n->filename = strdup(filename);
1193 n->anchor = anchor ? strdup(anchor) : NULL;
507c4adc 1194 n->section = (section && *section) ? strdup(section) : NULL;
ef416fc2 1195 n->text = strdup(text);
1196 n->mtime = mtime;
1197 n->offset = offset;
1198 n->length = length;
1199
1200 return (n);
1201}
1202
1203
1204/*
1205 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1206 */
1207
1208static int /* O - Difference */
ecdc0628 1209help_sort_by_name(help_node_t *n1, /* I - First node */
1210 help_node_t *n2) /* I - Second node */
ef416fc2 1211{
ef416fc2 1212 int diff; /* Difference */
1213
1214
ecdc0628 1215 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1216 return (diff);
1217
ecdc0628 1218 if (!n1->anchor && !n2->anchor)
ef416fc2 1219 return (0);
ecdc0628 1220 else if (!n1->anchor)
ef416fc2 1221 return (-1);
ecdc0628 1222 else if (!n2->anchor)
ef416fc2 1223 return (1);
1224 else
ecdc0628 1225 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1226}
1227
1228
1229/*
1230 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1231 */
1232
1233static int /* O - Difference */
ecdc0628 1234help_sort_by_score(help_node_t *n1, /* I - First node */
1235 help_node_t *n2) /* I - Second node */
ef416fc2 1236{
ef416fc2 1237 int diff; /* Difference */
1238
1239
ecdc0628 1240 if (n1->score != n2->score)
1f0275e3 1241 return (n2->score - n1->score);
ef416fc2 1242
ecdc0628 1243 if (n1->section && !n2->section)
ef416fc2 1244 return (1);
ecdc0628 1245 else if (!n1->section && n2->section)
ef416fc2 1246 return (-1);
ecdc0628 1247 else if (n1->section && n2->section &&
1248 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1249 return (diff);
1250
88f9aafc 1251 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1252}
1253
1254
1255/*
f7deaa1a 1256 * 'help_sort_words()' - Sort words alphabetically.
1257 */
1258
1259static int /* O - Difference */
1260help_sort_words(help_word_t *w1, /* I - Second word */
1261 help_word_t *w2) /* I - Second word */
1262{
88f9aafc 1263 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1264}