]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Merge pull request #1311 from weblate/weblate-cups-cups
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
76b6aade 4 * Copyright © 2020-2024 by OpenPrinting.
507c4adc
MS
5 * Copyright © 2007-2019 by Apple Inc.
6 * Copyright © 1997-2007 by Easy Software Products.
ef416fc2 7 *
507c4adc
MS
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more
9 * information.
ef416fc2 10 */
11
12/*
13 * Include necessary headers...
14 */
15
16#include "cgi-private.h"
17#include <cups/dir.h>
18
19
f7deaa1a 20/*
21 * List of common English words that should not be indexed...
22 */
23
24static char help_common_words[][6] =
25 {
26 "about",
27 "all",
28 "an",
29 "and",
30 "are",
31 "as",
32 "at",
33 "be",
34 "been",
35 "but",
36 "by",
37 "call",
38 "can",
39 "come",
40 "could",
41 "day",
42 "did",
43 "do",
44 "down",
45 "each",
46 "find",
47 "first",
48 "for",
49 "from",
50 "go",
51 "had",
52 "has",
53 "have",
54 "he",
55 "her",
56 "him",
57 "his",
58 "hot",
59 "how",
60 "if",
61 "in",
62 "is",
63 "it",
64 "know",
65 "like",
66 "long",
67 "look",
68 "make",
69 "many",
70 "may",
71 "more",
72 "most",
73 "my",
74 "no",
75 "now",
76 "of",
77 "on",
78 "one",
79 "or",
80 "other",
81 "out",
82 "over",
83 "said",
84 "see",
85 "she",
86 "side",
87 "so",
88 "some",
89 "sound",
90 "than",
91 "that",
92 "the",
93 "their",
94 "them",
95 "then",
96 "there",
97 "these",
98 "they",
99 "thing",
100 "this",
101 "time",
102 "to",
103 "two",
104 "up",
105 "use",
106 "was",
107 "water",
108 "way",
109 "we",
110 "were",
111 "what",
112 "when",
113 "which",
114 "who",
115 "will",
116 "with",
117 "word",
118 "would",
119 "write",
120 "you",
121 "your"
122 };
123
124
ef416fc2 125/*
126 * Local functions...
127 */
128
f7deaa1a 129static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 130static void help_delete_node(help_node_t *n);
f7deaa1a 131static void help_delete_word(help_word_t *w);
ef416fc2 132static int help_load_directory(help_index_t *hi,
133 const char *directory,
134 const char *relative);
135static int help_load_file(help_index_t *hi,
136 const char *filename,
137 const char *relative,
138 time_t mtime);
a32af27c 139static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
cfe4c0c3
R
140static int help_sort_by_name(help_node_t *p1, help_node_t *p2, void *data);
141static int help_sort_by_score(help_node_t *p1, help_node_t *p2, void *data);
142static int help_sort_words(help_word_t *w1, help_word_t *w2, void *data);
ef416fc2 143
144
145/*
146 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
147 */
148
149void
ecdc0628 150helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 151{
ecdc0628 152 help_node_t *node; /* Current node */
ef416fc2 153
154
ef416fc2 155 if (!hi)
156 return;
157
ecdc0628 158 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
159 node;
160 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 161 {
ecdc0628 162 if (!hi->search)
163 help_delete_node(node);
164 }
ef416fc2 165
ecdc0628 166 cupsArrayDelete(hi->nodes);
167 cupsArrayDelete(hi->sorted);
ef416fc2 168
169 free(hi);
170}
171
172
173/*
174 * 'helpFindNode()' - Find a node in an index.
175 */
176
ecdc0628 177help_node_t * /* O - Node pointer or NULL */
ef416fc2 178helpFindNode(help_index_t *hi, /* I - Index */
179 const char *filename, /* I - Filename */
180 const char *anchor) /* I - Anchor */
181{
ecdc0628 182 help_node_t key; /* Search key */
ef416fc2 183
184
ef416fc2 185 /*
186 * Range check input...
187 */
188
189 if (!hi || !filename)
190 return (NULL);
191
192 /*
193 * Initialize the search key...
194 */
195
196 key.filename = (char *)filename;
197 key.anchor = (char *)anchor;
ef416fc2 198
199 /*
200 * Return any match...
201 */
202
ecdc0628 203 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 204}
205
206
207/*
208 * 'helpLoadIndex()' - Load a help index from disk.
209 */
210
211help_index_t * /* O - Index pointer or NULL */
212helpLoadIndex(const char *hifile, /* I - Index filename */
213 const char *directory) /* I - Directory that is indexed */
214{
215 help_index_t *hi; /* Help index */
216 cups_file_t *fp; /* Current file */
217 char line[2048], /* Line from file */
218 *ptr, /* Pointer into line */
219 *filename, /* Filename in line */
220 *anchor, /* Anchor in line */
221 *sectptr, /* Section pointer in line */
222 section[1024], /* Section name */
223 *text; /* Text in line */
224 time_t mtime; /* Modification time */
225 off_t offset; /* Offset into file */
226 size_t length; /* Length in bytes */
227 int update; /* Update? */
ef416fc2 228 help_node_t *node; /* Current node */
f7deaa1a 229 help_word_t *word; /* Current word */
ef416fc2 230
231
ef416fc2 232 /*
233 * Create a new, empty index.
234 */
235
ecdc0628 236 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
237 return (NULL);
238
239 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
240 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
241
242 if (!hi->nodes || !hi->sorted)
243 {
244 cupsArrayDelete(hi->nodes);
245 cupsArrayDelete(hi->sorted);
246 free(hi);
247 return (NULL);
248 }
ef416fc2 249
250 /*
251 * Try loading the existing index file...
252 */
253
254 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
255 {
256 /*
257 * Lock the file and then read the first line...
258 */
259
260 cupsFileLock(fp, 1);
261
f7deaa1a 262 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 263 {
264 /*
265 * Got a valid header line, now read the data lines...
266 */
267
f7deaa1a 268 node = NULL;
269
ef416fc2 270 while (cupsFileGets(fp, line, sizeof(line)))
271 {
272 /*
273 * Each line looks like one of the following:
274 *
275 * filename mtime offset length "section" "text"
276 * filename#anchor offset length "text"
f7deaa1a 277 * SP count word
ef416fc2 278 */
279
f7deaa1a 280 if (line[0] == ' ')
ef416fc2 281 {
f7deaa1a 282 /*
283 * Read a word in the current node...
284 */
ef416fc2 285
f7deaa1a 286 if (!node || (ptr = strrchr(line, ' ')) == NULL)
287 continue;
ef416fc2 288
f7deaa1a 289 if ((word = help_add_word(node, ptr + 1)) != NULL)
290 word->count = atoi(line + 1);
291 }
292 else
ef416fc2 293 {
294 /*
f7deaa1a 295 * Add a node...
ef416fc2 296 */
297
f7deaa1a 298 filename = line;
ef416fc2 299
f7deaa1a 300 if ((ptr = strchr(line, ' ')) == NULL)
301 break;
ef416fc2 302
f7deaa1a 303 while (isspace(*ptr & 255))
304 *ptr++ = '\0';
ef416fc2 305
f7deaa1a 306 if ((anchor = strrchr(filename, '#')) != NULL)
307 {
308 *anchor++ = '\0';
309 mtime = 0;
310 }
311 else
312 mtime = strtol(ptr, &ptr, 10);
ef416fc2 313
f7deaa1a 314 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 315 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 316
317 while (isspace(*ptr & 255))
318 ptr ++;
ef416fc2 319
f7deaa1a 320 if (!anchor)
321 {
322 /*
323 * Get section...
324 */
ef416fc2 325
f7deaa1a 326 if (*ptr != '\"')
327 break;
ef416fc2 328
f7deaa1a 329 ptr ++;
330 sectptr = ptr;
ef416fc2 331
f7deaa1a 332 while (*ptr && *ptr != '\"')
333 ptr ++;
334
335 if (*ptr != '\"')
336 break;
ef416fc2 337
f7deaa1a 338 *ptr++ = '\0';
ef416fc2 339
6ac4da6b 340 cupsCopyString(section, sectptr, sizeof(section));
ef416fc2 341
f7deaa1a 342 while (isspace(*ptr & 255))
343 ptr ++;
344 }
507c4adc
MS
345 else
346 section[0] = '\0';
ecdc0628 347
f7deaa1a 348 if (*ptr != '\"')
349 break;
350
351 ptr ++;
352 text = ptr;
353
354 while (*ptr && *ptr != '\"')
355 ptr ++;
356
357 if (*ptr != '\"')
358 break;
359
360 *ptr++ = '\0';
361
362 if ((node = help_new_node(filename, anchor, section, text,
363 mtime, offset, length)) == NULL)
364 break;
365
366 node->score = -1;
367
368 cupsArrayAdd(hi->nodes, node);
369 }
ef416fc2 370 }
371 }
372
373 cupsFileClose(fp);
374 }
375
376 /*
377 * Scan for new/updated files...
378 */
379
380 update = help_load_directory(hi, directory, NULL);
381
382 /*
383 * Remove any files that are no longer installed...
384 */
385
ecdc0628 386 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
387 node;
388 node = (help_node_t *)cupsArrayNext(hi->nodes))
389 if (node->score < 0)
ef416fc2 390 {
391 /*
392 * Delete this node...
393 */
394
ecdc0628 395 cupsArrayRemove(hi->nodes, node);
396 help_delete_node(node);
ef416fc2 397 }
ef416fc2 398
399 /*
ecdc0628 400 * Add nodes to the sorted array...
ef416fc2 401 */
402
ecdc0628 403 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
404 node;
405 node = (help_node_t *)cupsArrayNext(hi->nodes))
406 cupsArrayAdd(hi->sorted, node);
ef416fc2 407
408 /*
ecdc0628 409 * Save the index if we updated it...
ef416fc2 410 */
411
ecdc0628 412 if (update)
413 helpSaveIndex(hi, hifile);
ef416fc2 414
415 /*
416 * Return the index...
417 */
418
419 return (hi);
420}
421
422
423/*
424 * 'helpSaveIndex()' - Save a help index to disk.
425 */
426
427int /* O - 0 on success, -1 on error */
428helpSaveIndex(help_index_t *hi, /* I - Index */
429 const char *hifile) /* I - Index filename */
430{
431 cups_file_t *fp; /* Index file */
ef416fc2 432 help_node_t *node; /* Current node */
f7deaa1a 433 help_word_t *word; /* Current word */
ef416fc2 434
435
ef416fc2 436 /*
437 * Try creating a new index file...
438 */
439
440 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
441 return (-1);
442
443 /*
444 * Lock the file while we write it...
445 */
446
447 cupsFileLock(fp, 1);
448
f7deaa1a 449 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 450
ecdc0628 451 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
452 node;
453 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 454 {
455 /*
456 * Write the current node with/without the anchor...
457 */
458
ef416fc2 459 if (node->anchor)
460 {
461 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
462 node->filename, node->anchor,
463 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
464 node->text) < 0)
465 break;
466 }
467 else
468 {
469 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 470 node->filename, (int)node->mtime,
ef416fc2 471 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
472 node->section ? node->section : "", node->text) < 0)
473 break;
474 }
f7deaa1a 475
476 /*
477 * Then write the words associated with the node...
478 */
479
480 for (word = (help_word_t *)cupsArrayFirst(node->words);
481 word;
482 word = (help_word_t *)cupsArrayNext(node->words))
483 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
484 break;
ef416fc2 485 }
486
ecdc0628 487 cupsFileFlush(fp);
488
ef416fc2 489 if (cupsFileClose(fp) < 0)
490 return (-1);
ecdc0628 491 else if (node)
ef416fc2 492 return (-1);
493 else
494 return (0);
495}
496
497
498/*
499 * 'helpSearchIndex()' - Search an index.
500 */
501
502help_index_t * /* O - Search index */
503helpSearchIndex(help_index_t *hi, /* I - Index */
504 const char *query, /* I - Query string */
505 const char *section, /* I - Limit search to this section */
506 const char *filename) /* I - Limit search to this file */
507{
ef416fc2 508 help_index_t *search; /* Search index */
ecdc0628 509 help_node_t *node; /* Current node */
f7deaa1a 510 help_word_t *word; /* Current word */
ef416fc2 511 void *sc; /* Search context */
512 int matches; /* Number of matches */
513
514
ef416fc2 515 /*
516 * Range check...
517 */
518
519 if (!hi || !query)
520 return (NULL);
521
ecdc0628 522 /*
523 * Reset the scores of all nodes to 0...
524 */
525
526 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
527 node;
528 node = (help_node_t *)cupsArrayNext(hi->nodes))
529 node->score = 0;
530
531 /*
532 * Find the first node to search in...
533 */
ef416fc2 534
535 if (filename)
536 {
ecdc0628 537 node = helpFindNode(hi, filename, NULL);
538 if (!node)
ef416fc2 539 return (NULL);
540 }
541 else
ecdc0628 542 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 543
544 /*
545 * Convert the query into a regular expression...
546 */
547
548 sc = cgiCompileSearch(query);
549 if (!sc)
550 return (NULL);
551
552 /*
553 * Allocate a search index...
554 */
555
556 search = calloc(1, sizeof(help_index_t));
557 if (!search)
558 {
559 cgiFreeSearch(sc);
560 return (NULL);
561 }
562
ecdc0628 563 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
564 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 565
ecdc0628 566 if (!search->nodes || !search->sorted)
567 {
568 cupsArrayDelete(search->nodes);
569 cupsArrayDelete(search->sorted);
570 free(search);
571 cgiFreeSearch(sc);
572 return (NULL);
573 }
574
ef416fc2 575 search->search = 1;
576
577 /*
578 * Check each node in the index, adding matching nodes to the
579 * search index...
580 */
581
ecdc0628 582 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
edeb8af8 583 if (node->section && section && strcmp(node->section, section))
ef416fc2 584 continue;
ecdc0628 585 else if (filename && strcmp(node->filename, filename))
ef416fc2 586 continue;
f7deaa1a 587 else
ef416fc2 588 {
f7deaa1a 589 matches = cgiDoSearch(sc, node->text);
590
591 for (word = (help_word_t *)cupsArrayFirst(node->words);
592 word;
593 word = (help_word_t *)cupsArrayNext(node->words))
594 if (cgiDoSearch(sc, word->text) > 0)
595 matches += word->count;
ef416fc2 596
f7deaa1a 597 if (matches > 0)
598 {
599 /*
600 * Found a match, add the node to the search index...
601 */
ef416fc2 602
f7deaa1a 603 node->score = matches;
604
321d8d57
MS
605 cupsArrayAdd(search->nodes, node);
606 cupsArrayAdd(search->sorted, node);
f7deaa1a 607 }
ef416fc2 608 }
609
610 /*
611 * Free the search context...
612 */
613
614 cgiFreeSearch(sc);
615
ef416fc2 616 /*
617 * Return the results...
618 */
619
620 return (search);
621}
622
623
f7deaa1a 624/*
625 * 'help_add_word()' - Add a word to a node.
626 */
627
628static help_word_t * /* O - New word */
629help_add_word(help_node_t *n, /* I - Node */
630 const char *text) /* I - Word text */
631{
632 help_word_t *w, /* New word */
633 key; /* Search key */
634
635
f7deaa1a 636 /*
637 * Create the words array as needed...
638 */
639
640 if (!n->words)
641 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
642
643 /*
644 * See if the word is already added...
645 */
646
647 key.text = (char *)text;
648
649 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
650 {
651 /*
652 * Create a new word...
653 */
654
655 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
656 return (NULL);
657
658 if ((w->text = strdup(text)) == NULL)
659 {
660 free(w);
661 return (NULL);
662 }
663
664 cupsArrayAdd(n->words, w);
665 }
666
667 /*
668 * Bump the counter for this word and return it...
669 */
670
671 w->count ++;
672
673 return (w);
674}
675
676
ef416fc2 677/*
678 * 'help_delete_node()' - Free all memory used by a node.
679 */
680
681static void
682help_delete_node(help_node_t *n) /* I - Node */
683{
f7deaa1a 684 help_word_t *w; /* Current word */
685
686
ef416fc2 687 if (!n)
688 return;
689
690 if (n->filename)
691 free(n->filename);
692
693 if (n->anchor)
694 free(n->anchor);
695
696 if (n->section)
697 free(n->section);
698
699 if (n->text)
700 free(n->text);
701
f7deaa1a 702 for (w = (help_word_t *)cupsArrayFirst(n->words);
703 w;
704 w = (help_word_t *)cupsArrayNext(n->words))
705 help_delete_word(w);
706
707 cupsArrayDelete(n->words);
708
ef416fc2 709 free(n);
710}
711
712
f7deaa1a 713/*
714 * 'help_delete_word()' - Free all memory used by a word.
715 */
716
717static void
718help_delete_word(help_word_t *w) /* I - Word */
719{
f7deaa1a 720 if (!w)
721 return;
722
723 if (w->text)
724 free(w->text);
725
726 free(w);
727}
728
729
ef416fc2 730/*
731 * 'help_load_directory()' - Load a directory of files into an index.
732 */
733
734static int /* O - 0 = success, -1 = error, 1 = updated */
735help_load_directory(
736 help_index_t *hi, /* I - Index */
737 const char *directory, /* I - Directory */
738 const char *relative) /* I - Relative path */
739{
ef416fc2 740 cups_dir_t *dir; /* Directory file */
741 cups_dentry_t *dent; /* Directory entry */
742 char *ext, /* Pointer to extension */
743 filename[1024], /* Full filename */
744 relname[1024]; /* Relative filename */
745 int update; /* Updated? */
ecdc0628 746 help_node_t *node; /* Current node */
ef416fc2 747
748
ef416fc2 749 /*
750 * Open the directory and scan it...
751 */
752
753 if ((dir = cupsDirOpen(directory)) == NULL)
754 return (0);
755
756 update = 0;
757
758 while ((dent = cupsDirRead(dir)) != NULL)
759 {
ecdc0628 760 /*
761 * Skip "." files...
762 */
763
764 if (dent->filename[0] == '.')
765 continue;
766
ef416fc2 767 /*
768 * Get absolute and relative filenames...
769 */
770
771 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
772 if (relative)
773 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
774 else
6ac4da6b 775 cupsCopyString(relname, dent->filename, sizeof(relname));
ef416fc2 776
777 /*
778 * Check if we have a HTML file...
779 */
780
781 if ((ext = strstr(dent->filename, ".html")) != NULL &&
782 (!ext[5] || !strcmp(ext + 5, ".gz")))
783 {
784 /*
785 * HTML file, see if we have already indexed the file...
786 */
787
788 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
789 {
790 /*
791 * File already indexed - check dates to confirm that the
792 * index is up-to-date...
793 */
794
ecdc0628 795 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 796 {
797 /*
798 * Same modification time, so mark all of the nodes
799 * for this file as up-to-date...
800 */
801
ecdc0628 802 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
803 if (!strcmp(node->filename, relname))
804 node->score = 0;
ef416fc2 805 else
806 break;
807
808 continue;
809 }
810 }
811
812 update = 1;
813
814 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
815 }
816 else if (S_ISDIR(dent->fileinfo.st_mode))
817 {
818 /*
819 * Process sub-directory...
820 */
821
822 if (help_load_directory(hi, filename, relname) == 1)
823 update = 1;
824 }
825 }
826
827 cupsDirClose(dir);
828
829 return (update);
830}
831
832
833/*
834 * 'help_load_file()' - Load a HTML files into an index.
835 */
836
837static int /* O - 0 = success, -1 = error */
838help_load_file(
839 help_index_t *hi, /* I - Index */
840 const char *filename, /* I - Filename */
841 const char *relative, /* I - Relative path */
842 time_t mtime) /* I - Modification time */
843{
844 cups_file_t *fp; /* HTML file */
ecdc0628 845 help_node_t *node; /* Current node */
ef416fc2 846 char line[1024], /* Line from file */
f42414bf 847 temp[1024], /* Temporary word */
ef416fc2 848 section[1024], /* Section */
849 *ptr, /* Pointer into line */
850 *anchor, /* Anchor name */
851 *text; /* Text for anchor */
852 off_t offset; /* File offset */
853 char quote; /* Quote character */
f7deaa1a 854 help_word_t *word; /* Current word */
1d3d3807 855 size_t wordlen; /* Length of word */
ef416fc2 856
857
ef416fc2 858 if ((fp = cupsFileOpen(filename, "r")) == NULL)
859 return (-1);
860
861 node = NULL;
862 offset = 0;
863
cc7359ae
MS
864 if (strstr(filename, "/man-") != NULL)
865 cupsCopyString(section, "Man Pages", sizeof(section));
866 else
867 cupsCopyString(section, "Other", sizeof(section));
ef416fc2 868
869 while (cupsFileGets(fp, line, sizeof(line)))
870 {
871 /*
872 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
873 */
874
cfd375ad 875 if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
ef416fc2 876 {
877 /*
878 * Got section line, copy it!
879 */
880
cfd375ad 881 for (ptr += 13; isspace(*ptr & 255); ptr ++);
ef416fc2 882
6ac4da6b 883 cupsCopyString(section, ptr, sizeof(section));
ef416fc2 884 if ((ptr = strstr(section, "-->")) != NULL)
885 {
886 /*
887 * Strip comment stuff from end of line...
888 */
889
890 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
891
892 if (isspace(*ptr & 255))
893 *ptr = '\0';
894 }
895 continue;
896 }
897
898 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
899 {
900 ptr ++;
901
88f9aafc 902 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 903 {
904 /*
905 * Found the title...
906 */
907
908 anchor = NULL;
909 ptr += 6;
910 }
cfd375ad 911 else
ef416fc2 912 {
cfd375ad
MS
913 char *idptr; /* Pointer to ID */
914
915 if (!_cups_strncasecmp(ptr, "A NAME=", 7))
916 ptr += 7;
917 else if ((idptr = strstr(ptr, " ID=")) != NULL)
918 ptr = idptr + 4;
919 else if ((idptr = strstr(ptr, " id=")) != NULL)
920 ptr = idptr + 4;
921 else
922 continue;
923
ef416fc2 924 /*
925 * Found an anchor...
926 */
927
ef416fc2 928 if (*ptr == '\"' || *ptr == '\'')
929 {
930 /*
931 * Get quoted anchor...
932 */
933
934 quote = *ptr;
935 anchor = ptr + 1;
936 if ((ptr = strchr(anchor, quote)) != NULL)
937 *ptr++ = '\0';
938 else
939 break;
940 }
941 else
942 {
943 /*
944 * Get unquoted anchor...
945 */
946
947 anchor = ptr + 1;
948
949 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
950
cfd375ad 951 if (*ptr != '>')
ef416fc2 952 *ptr++ = '\0';
953 else
954 break;
955 }
956
957 /*
958 * Got the anchor, now lets find the end...
959 */
960
961 while (*ptr && *ptr != '>')
962 ptr ++;
963
964 if (*ptr != '>')
965 break;
966
cfd375ad 967 *ptr++ = '\0';
ef416fc2 968 }
ef416fc2 969
970 /*
971 * Now collect text for the link...
972 */
973
974 text = ptr;
975 while ((ptr = strchr(text, '<')) == NULL)
976 {
977 ptr = text + strlen(text);
978 if (ptr >= (line + sizeof(line) - 2))
979 break;
980
981 *ptr++ = ' ';
982
7e86f2f6 983 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 984 break;
985 }
986
987 *ptr = '\0';
988
989 if (node)
7e86f2f6 990 node->length = (size_t)(offset - node->offset);
ef416fc2 991
992 if (!*text)
993 {
994 node = NULL;
995 break;
996 }
997
ecdc0628 998 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 999 {
1000 /*
1001 * Node already in the index, so replace the text and other
1002 * data...
1003 */
1004
ecdc0628 1005 cupsArrayRemove(hi->nodes, node);
ef416fc2 1006
1007 if (node->section)
1008 free(node->section);
1009
1010 if (node->text)
1011 free(node->text);
1012
f7deaa1a 1013 if (node->words)
1014 {
1015 for (word = (help_word_t *)cupsArrayFirst(node->words);
1016 word;
1017 word = (help_word_t *)cupsArrayNext(node->words))
1018 help_delete_word(word);
1019
1020 cupsArrayDelete(node->words);
1021 node->words = NULL;
1022 }
1023
ef416fc2 1024 node->section = section[0] ? strdup(section) : NULL;
1025 node->text = strdup(text);
1026 node->mtime = mtime;
1027 node->offset = offset;
1028 node->score = 0;
1029 }
1030 else
1031 {
1032 /*
1033 * New node...
1034 */
1035
1036 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1037 }
1038
1039 /*
1040 * Go through the text value and replace tabs and newlines with
1041 * whitespace and eliminate extra whitespace...
1042 */
1043
1044 for (ptr = node->text, text = node->text; *ptr;)
1045 if (isspace(*ptr & 255))
1046 {
1047 while (isspace(*ptr & 255))
ed486911 1048 ptr ++;
ef416fc2 1049
1050 *text++ = ' ';
1051 }
1052 else if (text != ptr)
1053 *text++ = *ptr++;
1054 else
1055 {
1056 text ++;
1057 ptr ++;
1058 }
1059
1060 *text = '\0';
1061
ecdc0628 1062 /*
1063 * (Re)add the node to the array...
1064 */
1065
1066 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1067
1068 if (!anchor)
1069 node = NULL;
ef416fc2 1070 break;
1071 }
1072
f7deaa1a 1073 if (node)
1074 {
1075 /*
1076 * Scan this line for words...
1077 */
1078
1079 for (ptr = line; *ptr; ptr ++)
1080 {
1081 /*
1082 * Skip HTML stuff...
1083 */
1084
1085 if (*ptr == '<')
1086 {
1087 if (!strncmp(ptr, "<!--", 4))
1088 {
1089 /*
1090 * Skip HTML comment...
1091 */
1092
1093 if ((text = strstr(ptr + 4, "-->")) == NULL)
1094 ptr += strlen(ptr) - 1;
1095 else
1096 ptr = text + 2;
1097 }
1098 else
1099 {
1100 /*
1101 * Skip HTML element...
1102 */
1103
1104 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1105 {
f7deaa1a 1106 if (*ptr == '\"' || *ptr == '\'')
1107 {
1108 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1109
1110 if (!*ptr)
1111 ptr --;
1112 }
f42414bf 1113 }
f7deaa1a 1114
1115 if (!*ptr)
1116 ptr --;
1117 }
1118
1119 continue;
1120 }
1121 else if (*ptr == '&')
1122 {
1123 /*
1124 * Skip HTML entity...
1125 */
1126
1127 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1128
1129 if (!*ptr)
1130 ptr --;
1131
1132 continue;
1133 }
1134 else if (!isalnum(*ptr & 255))
1135 continue;
1136
1137 /*
1138 * Found the start of a word, search until we find the end...
1139 */
1140
1141 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1142
1d3d3807 1143 wordlen = (size_t)(ptr - text);
f7deaa1a 1144
1d3d3807 1145 memcpy(temp, text, wordlen);
f42414bf 1146 temp[wordlen] = '\0';
1147
1148 ptr --;
f7deaa1a 1149
f42414bf 1150 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1151 (sizeof(help_common_words) /
1152 sizeof(help_common_words[0])),
1153 sizeof(help_common_words[0]),
1154 (int (*)(const void *, const void *))
88f9aafc 1155 _cups_strcasecmp))
f42414bf 1156 help_add_word(node, temp);
f7deaa1a 1157 }
1158 }
1159
ef416fc2 1160 /*
1161 * Get the offset of the next line...
1162 */
1163
1164 offset = cupsFileTell(fp);
1165 }
1166
1167 cupsFileClose(fp);
1168
1169 if (node)
7e86f2f6 1170 node->length = (size_t)(offset - node->offset);
ef416fc2 1171
1172 return (0);
1173}
1174
1175
1176/*
1177 * 'help_new_node()' - Create a new node and add it to an index.
1178 */
1179
1180static help_node_t * /* O - Node pointer or NULL on error */
1181help_new_node(const char *filename, /* I - Filename */
1182 const char *anchor, /* I - Anchor */
1183 const char *section, /* I - Section */
1184 const char *text, /* I - Text */
1185 time_t mtime, /* I - Modification time */
1186 off_t offset, /* I - Offset in file */
1187 size_t length) /* I - Length in bytes */
1188{
1189 help_node_t *n; /* Node */
1190
1191
ef416fc2 1192 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1193 if (!n)
1194 return (NULL);
1195
1196 n->filename = strdup(filename);
1197 n->anchor = anchor ? strdup(anchor) : NULL;
507c4adc 1198 n->section = (section && *section) ? strdup(section) : NULL;
ef416fc2 1199 n->text = strdup(text);
1200 n->mtime = mtime;
1201 n->offset = offset;
1202 n->length = length;
1203
1204 return (n);
1205}
1206
1207
1208/*
1209 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1210 */
1211
d73371a0
MS
1212static int /* O - Difference */
1213help_sort_by_name(
1214 help_node_t *n1, /* I - First node */
1215 help_node_t *n2, /* I - Second node */
1216 void *data) /* Unused */
ef416fc2 1217{
d73371a0 1218 int diff; /* Difference */
ef416fc2 1219
1220
d73371a0
MS
1221 (void)data;
1222
ecdc0628 1223 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1224 return (diff);
1225
ecdc0628 1226 if (!n1->anchor && !n2->anchor)
ef416fc2 1227 return (0);
ecdc0628 1228 else if (!n1->anchor)
ef416fc2 1229 return (-1);
ecdc0628 1230 else if (!n2->anchor)
ef416fc2 1231 return (1);
1232 else
ecdc0628 1233 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1234}
1235
1236
1237/*
1238 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1239 */
1240
cfe4c0c3
R
1241static int /* O - Difference */
1242help_sort_by_score(help_node_t *n1, /* I - First node */
1243 help_node_t *n2, /* I - Second node */
1244 void *data) /* I - Unused */
ef416fc2 1245{
ef416fc2 1246 int diff; /* Difference */
1247
1248
cfe4c0c3
R
1249 (void)data;
1250
ecdc0628 1251 if (n1->score != n2->score)
1f0275e3 1252 return (n2->score - n1->score);
ef416fc2 1253
ecdc0628 1254 if (n1->section && !n2->section)
ef416fc2 1255 return (1);
ecdc0628 1256 else if (!n1->section && n2->section)
ef416fc2 1257 return (-1);
ecdc0628 1258 else if (n1->section && n2->section &&
1259 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1260 return (diff);
1261
88f9aafc 1262 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1263}
1264
1265
1266/*
f7deaa1a 1267 * 'help_sort_words()' - Sort words alphabetically.
1268 */
1269
cfe4c0c3
R
1270static int /* O - Difference */
1271help_sort_words(help_word_t *w1, /* I - Second word */
1272 help_word_t *w2, /* I - Second word */
1273 void *data) /* Unused */
f7deaa1a 1274{
cfe4c0c3 1275 (void)data;
88f9aafc 1276 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1277}