]> git.ipfire.org Git - thirdparty/cups.git/blame - cgi-bin/help-index.c
Fix source file header text duplication text duplication.
[thirdparty/cups.git] / cgi-bin / help-index.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Online help index routines for CUPS.
ef416fc2 3 *
3bb59731 4 * Copyright 2007-2015 by Apple Inc.
7e86f2f6 5 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 6 *
7e86f2f6
MS
7 * These coded instructions, statements, and computer programs are the
8 * property of Apple Inc. and are protected by Federal copyright
9 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
10 * which should have been included with this file. If this file is
57b7b66b 11 * missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 12 */
13
14/*
15 * Include necessary headers...
16 */
17
18#include "cgi-private.h"
19#include <cups/dir.h>
20
21
f7deaa1a 22/*
23 * List of common English words that should not be indexed...
24 */
25
26static char help_common_words[][6] =
27 {
28 "about",
29 "all",
30 "an",
31 "and",
32 "are",
33 "as",
34 "at",
35 "be",
36 "been",
37 "but",
38 "by",
39 "call",
40 "can",
41 "come",
42 "could",
43 "day",
44 "did",
45 "do",
46 "down",
47 "each",
48 "find",
49 "first",
50 "for",
51 "from",
52 "go",
53 "had",
54 "has",
55 "have",
56 "he",
57 "her",
58 "him",
59 "his",
60 "hot",
61 "how",
62 "if",
63 "in",
64 "is",
65 "it",
66 "know",
67 "like",
68 "long",
69 "look",
70 "make",
71 "many",
72 "may",
73 "more",
74 "most",
75 "my",
76 "no",
77 "now",
78 "of",
79 "on",
80 "one",
81 "or",
82 "other",
83 "out",
84 "over",
85 "said",
86 "see",
87 "she",
88 "side",
89 "so",
90 "some",
91 "sound",
92 "than",
93 "that",
94 "the",
95 "their",
96 "them",
97 "then",
98 "there",
99 "these",
100 "they",
101 "thing",
102 "this",
103 "time",
104 "to",
105 "two",
106 "up",
107 "use",
108 "was",
109 "water",
110 "way",
111 "we",
112 "were",
113 "what",
114 "when",
115 "which",
116 "who",
117 "will",
118 "with",
119 "word",
120 "would",
121 "write",
122 "you",
123 "your"
124 };
125
126
ef416fc2 127/*
128 * Local functions...
129 */
130
f7deaa1a 131static help_word_t *help_add_word(help_node_t *n, const char *text);
ef416fc2 132static void help_delete_node(help_node_t *n);
f7deaa1a 133static void help_delete_word(help_word_t *w);
ef416fc2 134static int help_load_directory(help_index_t *hi,
135 const char *directory,
136 const char *relative);
137static int help_load_file(help_index_t *hi,
138 const char *filename,
139 const char *relative,
140 time_t mtime);
141static help_node_t *help_new_node(const char *filename, const char *anchor,
142 const char *section, const char *text,
143 time_t mtime, off_t offset,
85dda01c
MS
144 size_t length)
145 __attribute__((nonnull(1,3,4)));
ecdc0628 146static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
147static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
f7deaa1a 148static int help_sort_words(help_word_t *w1, help_word_t *w2);
ef416fc2 149
150
151/*
152 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
153 */
154
155void
ecdc0628 156helpDeleteIndex(help_index_t *hi) /* I - Help index */
ef416fc2 157{
ecdc0628 158 help_node_t *node; /* Current node */
ef416fc2 159
160
85dda01c 161 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
ef416fc2 162
163 if (!hi)
164 return;
165
ecdc0628 166 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
167 node;
168 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 169 {
ecdc0628 170 if (!hi->search)
171 help_delete_node(node);
172 }
ef416fc2 173
ecdc0628 174 cupsArrayDelete(hi->nodes);
175 cupsArrayDelete(hi->sorted);
ef416fc2 176
177 free(hi);
178}
179
180
181/*
182 * 'helpFindNode()' - Find a node in an index.
183 */
184
ecdc0628 185help_node_t * /* O - Node pointer or NULL */
ef416fc2 186helpFindNode(help_index_t *hi, /* I - Index */
187 const char *filename, /* I - Filename */
188 const char *anchor) /* I - Anchor */
189{
ecdc0628 190 help_node_t key; /* Search key */
ef416fc2 191
192
85dda01c
MS
193 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
194 hi, filename, anchor));
ef416fc2 195
196 /*
197 * Range check input...
198 */
199
200 if (!hi || !filename)
201 return (NULL);
202
203 /*
204 * Initialize the search key...
205 */
206
207 key.filename = (char *)filename;
208 key.anchor = (char *)anchor;
ef416fc2 209
210 /*
211 * Return any match...
212 */
213
ecdc0628 214 return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
ef416fc2 215}
216
217
218/*
219 * 'helpLoadIndex()' - Load a help index from disk.
220 */
221
222help_index_t * /* O - Index pointer or NULL */
223helpLoadIndex(const char *hifile, /* I - Index filename */
224 const char *directory) /* I - Directory that is indexed */
225{
226 help_index_t *hi; /* Help index */
227 cups_file_t *fp; /* Current file */
228 char line[2048], /* Line from file */
229 *ptr, /* Pointer into line */
230 *filename, /* Filename in line */
231 *anchor, /* Anchor in line */
232 *sectptr, /* Section pointer in line */
233 section[1024], /* Section name */
234 *text; /* Text in line */
235 time_t mtime; /* Modification time */
236 off_t offset; /* Offset into file */
237 size_t length; /* Length in bytes */
238 int update; /* Update? */
ef416fc2 239 help_node_t *node; /* Current node */
f7deaa1a 240 help_word_t *word; /* Current word */
ef416fc2 241
242
85dda01c 243 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
ef416fc2 244 hifile, directory));
245
246 /*
247 * Create a new, empty index.
248 */
249
ecdc0628 250 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
251 return (NULL);
252
253 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
254 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
255
256 if (!hi->nodes || !hi->sorted)
257 {
258 cupsArrayDelete(hi->nodes);
259 cupsArrayDelete(hi->sorted);
260 free(hi);
261 return (NULL);
262 }
ef416fc2 263
264 /*
265 * Try loading the existing index file...
266 */
267
268 if ((fp = cupsFileOpen(hifile, "r")) != NULL)
269 {
270 /*
271 * Lock the file and then read the first line...
272 */
273
274 cupsFileLock(fp, 1);
275
f7deaa1a 276 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
ef416fc2 277 {
278 /*
279 * Got a valid header line, now read the data lines...
280 */
281
f7deaa1a 282 node = NULL;
283
ef416fc2 284 while (cupsFileGets(fp, line, sizeof(line)))
285 {
286 /*
287 * Each line looks like one of the following:
288 *
289 * filename mtime offset length "section" "text"
290 * filename#anchor offset length "text"
f7deaa1a 291 * SP count word
ef416fc2 292 */
293
f7deaa1a 294 if (line[0] == ' ')
ef416fc2 295 {
f7deaa1a 296 /*
297 * Read a word in the current node...
298 */
ef416fc2 299
f7deaa1a 300 if (!node || (ptr = strrchr(line, ' ')) == NULL)
301 continue;
ef416fc2 302
f7deaa1a 303 if ((word = help_add_word(node, ptr + 1)) != NULL)
304 word->count = atoi(line + 1);
305 }
306 else
ef416fc2 307 {
308 /*
f7deaa1a 309 * Add a node...
ef416fc2 310 */
311
f7deaa1a 312 filename = line;
ef416fc2 313
f7deaa1a 314 if ((ptr = strchr(line, ' ')) == NULL)
315 break;
ef416fc2 316
f7deaa1a 317 while (isspace(*ptr & 255))
318 *ptr++ = '\0';
ef416fc2 319
f7deaa1a 320 if ((anchor = strrchr(filename, '#')) != NULL)
321 {
322 *anchor++ = '\0';
323 mtime = 0;
324 }
325 else
326 mtime = strtol(ptr, &ptr, 10);
ef416fc2 327
f7deaa1a 328 offset = strtoll(ptr, &ptr, 10);
7e86f2f6 329 length = (size_t)strtoll(ptr, &ptr, 10);
ef416fc2 330
331 while (isspace(*ptr & 255))
332 ptr ++;
ef416fc2 333
f7deaa1a 334 if (!anchor)
335 {
336 /*
337 * Get section...
338 */
ef416fc2 339
f7deaa1a 340 if (*ptr != '\"')
341 break;
ef416fc2 342
f7deaa1a 343 ptr ++;
344 sectptr = ptr;
ef416fc2 345
f7deaa1a 346 while (*ptr && *ptr != '\"')
347 ptr ++;
348
349 if (*ptr != '\"')
350 break;
ef416fc2 351
f7deaa1a 352 *ptr++ = '\0';
ef416fc2 353
f7deaa1a 354 strlcpy(section, sectptr, sizeof(section));
ef416fc2 355
f7deaa1a 356 while (isspace(*ptr & 255))
357 ptr ++;
358 }
ecdc0628 359
f7deaa1a 360 if (*ptr != '\"')
361 break;
362
363 ptr ++;
364 text = ptr;
365
366 while (*ptr && *ptr != '\"')
367 ptr ++;
368
369 if (*ptr != '\"')
370 break;
371
372 *ptr++ = '\0';
373
374 if ((node = help_new_node(filename, anchor, section, text,
375 mtime, offset, length)) == NULL)
376 break;
377
378 node->score = -1;
379
380 cupsArrayAdd(hi->nodes, node);
381 }
ef416fc2 382 }
383 }
384
385 cupsFileClose(fp);
386 }
387
388 /*
389 * Scan for new/updated files...
390 */
391
392 update = help_load_directory(hi, directory, NULL);
393
394 /*
395 * Remove any files that are no longer installed...
396 */
397
ecdc0628 398 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
399 node;
400 node = (help_node_t *)cupsArrayNext(hi->nodes))
401 if (node->score < 0)
ef416fc2 402 {
403 /*
404 * Delete this node...
405 */
406
ecdc0628 407 cupsArrayRemove(hi->nodes, node);
408 help_delete_node(node);
ef416fc2 409 }
ef416fc2 410
411 /*
ecdc0628 412 * Add nodes to the sorted array...
ef416fc2 413 */
414
ecdc0628 415 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
416 node;
417 node = (help_node_t *)cupsArrayNext(hi->nodes))
418 cupsArrayAdd(hi->sorted, node);
ef416fc2 419
420 /*
ecdc0628 421 * Save the index if we updated it...
ef416fc2 422 */
423
ecdc0628 424 if (update)
425 helpSaveIndex(hi, hifile);
ef416fc2 426
427 /*
428 * Return the index...
429 */
430
431 return (hi);
432}
433
434
435/*
436 * 'helpSaveIndex()' - Save a help index to disk.
437 */
438
439int /* O - 0 on success, -1 on error */
440helpSaveIndex(help_index_t *hi, /* I - Index */
441 const char *hifile) /* I - Index filename */
442{
443 cups_file_t *fp; /* Index file */
ef416fc2 444 help_node_t *node; /* Current node */
f7deaa1a 445 help_word_t *word; /* Current word */
ef416fc2 446
447
85dda01c 448 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
ef416fc2 449
450 /*
451 * Try creating a new index file...
452 */
453
454 if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
455 return (-1);
456
457 /*
458 * Lock the file while we write it...
459 */
460
461 cupsFileLock(fp, 1);
462
f7deaa1a 463 cupsFilePuts(fp, "HELPV2\n");
ef416fc2 464
ecdc0628 465 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
466 node;
467 node = (help_node_t *)cupsArrayNext(hi->nodes))
ef416fc2 468 {
469 /*
470 * Write the current node with/without the anchor...
471 */
472
ef416fc2 473 if (node->anchor)
474 {
475 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
476 node->filename, node->anchor,
477 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
478 node->text) < 0)
479 break;
480 }
481 else
482 {
483 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
ae71f5de 484 node->filename, (int)node->mtime,
ef416fc2 485 CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
486 node->section ? node->section : "", node->text) < 0)
487 break;
488 }
f7deaa1a 489
490 /*
491 * Then write the words associated with the node...
492 */
493
494 for (word = (help_word_t *)cupsArrayFirst(node->words);
495 word;
496 word = (help_word_t *)cupsArrayNext(node->words))
497 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
498 break;
ef416fc2 499 }
500
ecdc0628 501 cupsFileFlush(fp);
502
ef416fc2 503 if (cupsFileClose(fp) < 0)
504 return (-1);
ecdc0628 505 else if (node)
ef416fc2 506 return (-1);
507 else
508 return (0);
509}
510
511
512/*
513 * 'helpSearchIndex()' - Search an index.
514 */
515
516help_index_t * /* O - Search index */
517helpSearchIndex(help_index_t *hi, /* I - Index */
518 const char *query, /* I - Query string */
519 const char *section, /* I - Limit search to this section */
520 const char *filename) /* I - Limit search to this file */
521{
ef416fc2 522 help_index_t *search; /* Search index */
ecdc0628 523 help_node_t *node; /* Current node */
f7deaa1a 524 help_word_t *word; /* Current word */
ef416fc2 525 void *sc; /* Search context */
526 int matches; /* Number of matches */
527
528
85dda01c
MS
529 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
530 hi, query, filename));
ef416fc2 531
532 /*
533 * Range check...
534 */
535
536 if (!hi || !query)
537 return (NULL);
538
ecdc0628 539 /*
540 * Reset the scores of all nodes to 0...
541 */
542
543 for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
544 node;
545 node = (help_node_t *)cupsArrayNext(hi->nodes))
546 node->score = 0;
547
548 /*
549 * Find the first node to search in...
550 */
ef416fc2 551
552 if (filename)
553 {
ecdc0628 554 node = helpFindNode(hi, filename, NULL);
555 if (!node)
ef416fc2 556 return (NULL);
557 }
558 else
ecdc0628 559 node = (help_node_t *)cupsArrayFirst(hi->nodes);
ef416fc2 560
561 /*
562 * Convert the query into a regular expression...
563 */
564
565 sc = cgiCompileSearch(query);
566 if (!sc)
567 return (NULL);
568
569 /*
570 * Allocate a search index...
571 */
572
573 search = calloc(1, sizeof(help_index_t));
574 if (!search)
575 {
576 cgiFreeSearch(sc);
577 return (NULL);
578 }
579
ecdc0628 580 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
581 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
321d8d57 582
ecdc0628 583 if (!search->nodes || !search->sorted)
584 {
585 cupsArrayDelete(search->nodes);
586 cupsArrayDelete(search->sorted);
587 free(search);
588 cgiFreeSearch(sc);
589 return (NULL);
590 }
591
ef416fc2 592 search->search = 1;
593
594 /*
595 * Check each node in the index, adding matching nodes to the
596 * search index...
597 */
598
ecdc0628 599 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
600 if (section && strcmp(node->section, section))
ef416fc2 601 continue;
ecdc0628 602 else if (filename && strcmp(node->filename, filename))
ef416fc2 603 continue;
f7deaa1a 604 else
ef416fc2 605 {
f7deaa1a 606 matches = cgiDoSearch(sc, node->text);
607
608 for (word = (help_word_t *)cupsArrayFirst(node->words);
609 word;
610 word = (help_word_t *)cupsArrayNext(node->words))
611 if (cgiDoSearch(sc, word->text) > 0)
612 matches += word->count;
ef416fc2 613
f7deaa1a 614 if (matches > 0)
615 {
616 /*
617 * Found a match, add the node to the search index...
618 */
ef416fc2 619
f7deaa1a 620 node->score = matches;
621
321d8d57
MS
622 cupsArrayAdd(search->nodes, node);
623 cupsArrayAdd(search->sorted, node);
f7deaa1a 624 }
ef416fc2 625 }
626
627 /*
628 * Free the search context...
629 */
630
631 cgiFreeSearch(sc);
632
ef416fc2 633 /*
634 * Return the results...
635 */
636
637 return (search);
638}
639
640
f7deaa1a 641/*
642 * 'help_add_word()' - Add a word to a node.
643 */
644
645static help_word_t * /* O - New word */
646help_add_word(help_node_t *n, /* I - Node */
647 const char *text) /* I - Word text */
648{
649 help_word_t *w, /* New word */
650 key; /* Search key */
651
652
85dda01c 653 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
f7deaa1a 654
655 /*
656 * Create the words array as needed...
657 */
658
659 if (!n->words)
660 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
661
662 /*
663 * See if the word is already added...
664 */
665
666 key.text = (char *)text;
667
668 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
669 {
670 /*
671 * Create a new word...
672 */
673
674 if ((w = calloc(1, sizeof(help_word_t))) == NULL)
675 return (NULL);
676
677 if ((w->text = strdup(text)) == NULL)
678 {
679 free(w);
680 return (NULL);
681 }
682
683 cupsArrayAdd(n->words, w);
684 }
685
686 /*
687 * Bump the counter for this word and return it...
688 */
689
690 w->count ++;
691
692 return (w);
693}
694
695
ef416fc2 696/*
697 * 'help_delete_node()' - Free all memory used by a node.
698 */
699
700static void
701help_delete_node(help_node_t *n) /* I - Node */
702{
f7deaa1a 703 help_word_t *w; /* Current word */
704
705
85dda01c 706 DEBUG_printf(("2help_delete_node(n=%p)", n));
ef416fc2 707
708 if (!n)
709 return;
710
711 if (n->filename)
712 free(n->filename);
713
714 if (n->anchor)
715 free(n->anchor);
716
717 if (n->section)
718 free(n->section);
719
720 if (n->text)
721 free(n->text);
722
f7deaa1a 723 for (w = (help_word_t *)cupsArrayFirst(n->words);
724 w;
725 w = (help_word_t *)cupsArrayNext(n->words))
726 help_delete_word(w);
727
728 cupsArrayDelete(n->words);
729
ef416fc2 730 free(n);
731}
732
733
f7deaa1a 734/*
735 * 'help_delete_word()' - Free all memory used by a word.
736 */
737
738static void
739help_delete_word(help_word_t *w) /* I - Word */
740{
85dda01c 741 DEBUG_printf(("2help_delete_word(w=%p)", w));
f7deaa1a 742
743 if (!w)
744 return;
745
746 if (w->text)
747 free(w->text);
748
749 free(w);
750}
751
752
ef416fc2 753/*
754 * 'help_load_directory()' - Load a directory of files into an index.
755 */
756
757static int /* O - 0 = success, -1 = error, 1 = updated */
758help_load_directory(
759 help_index_t *hi, /* I - Index */
760 const char *directory, /* I - Directory */
761 const char *relative) /* I - Relative path */
762{
ef416fc2 763 cups_dir_t *dir; /* Directory file */
764 cups_dentry_t *dent; /* Directory entry */
765 char *ext, /* Pointer to extension */
766 filename[1024], /* Full filename */
767 relname[1024]; /* Relative filename */
768 int update; /* Updated? */
ecdc0628 769 help_node_t *node; /* Current node */
ef416fc2 770
771
85dda01c
MS
772 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
773 hi, directory, relative));
ef416fc2 774
775 /*
776 * Open the directory and scan it...
777 */
778
779 if ((dir = cupsDirOpen(directory)) == NULL)
780 return (0);
781
782 update = 0;
783
784 while ((dent = cupsDirRead(dir)) != NULL)
785 {
ecdc0628 786 /*
787 * Skip "." files...
788 */
789
790 if (dent->filename[0] == '.')
791 continue;
792
ef416fc2 793 /*
794 * Get absolute and relative filenames...
795 */
796
797 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
798 if (relative)
799 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
800 else
801 strlcpy(relname, dent->filename, sizeof(relname));
802
803 /*
804 * Check if we have a HTML file...
805 */
806
807 if ((ext = strstr(dent->filename, ".html")) != NULL &&
808 (!ext[5] || !strcmp(ext + 5, ".gz")))
809 {
810 /*
811 * HTML file, see if we have already indexed the file...
812 */
813
814 if ((node = helpFindNode(hi, relname, NULL)) != NULL)
815 {
816 /*
817 * File already indexed - check dates to confirm that the
818 * index is up-to-date...
819 */
820
ecdc0628 821 if (node->mtime == dent->fileinfo.st_mtime)
ef416fc2 822 {
823 /*
824 * Same modification time, so mark all of the nodes
825 * for this file as up-to-date...
826 */
827
ecdc0628 828 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
829 if (!strcmp(node->filename, relname))
830 node->score = 0;
ef416fc2 831 else
832 break;
833
834 continue;
835 }
836 }
837
838 update = 1;
839
840 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
841 }
842 else if (S_ISDIR(dent->fileinfo.st_mode))
843 {
844 /*
845 * Process sub-directory...
846 */
847
848 if (help_load_directory(hi, filename, relname) == 1)
849 update = 1;
850 }
851 }
852
853 cupsDirClose(dir);
854
855 return (update);
856}
857
858
859/*
860 * 'help_load_file()' - Load a HTML files into an index.
861 */
862
863static int /* O - 0 = success, -1 = error */
864help_load_file(
865 help_index_t *hi, /* I - Index */
866 const char *filename, /* I - Filename */
867 const char *relative, /* I - Relative path */
868 time_t mtime) /* I - Modification time */
869{
870 cups_file_t *fp; /* HTML file */
ecdc0628 871 help_node_t *node; /* Current node */
ef416fc2 872 char line[1024], /* Line from file */
f42414bf 873 temp[1024], /* Temporary word */
ef416fc2 874 section[1024], /* Section */
875 *ptr, /* Pointer into line */
876 *anchor, /* Anchor name */
877 *text; /* Text for anchor */
878 off_t offset; /* File offset */
879 char quote; /* Quote character */
f7deaa1a 880 help_word_t *word; /* Current word */
881 int wordlen; /* Length of word */
ef416fc2 882
883
85dda01c 884 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
2274af38 885 "mtime=%ld)", hi, filename, relative, (long)mtime));
ef416fc2 886
887 if ((fp = cupsFileOpen(filename, "r")) == NULL)
888 return (-1);
889
890 node = NULL;
891 offset = 0;
892
5a9febac 893 strlcpy(section, "Other", sizeof(section));
ef416fc2 894
895 while (cupsFileGets(fp, line, sizeof(line)))
896 {
897 /*
898 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
899 */
900
88f9aafc 901 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
ef416fc2 902 {
903 /*
904 * Got section line, copy it!
905 */
906
907 for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
908
909 strlcpy(section, ptr, sizeof(section));
910 if ((ptr = strstr(section, "-->")) != NULL)
911 {
912 /*
913 * Strip comment stuff from end of line...
914 */
915
916 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
917
918 if (isspace(*ptr & 255))
919 *ptr = '\0';
920 }
921 continue;
922 }
923
924 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
925 {
926 ptr ++;
927
88f9aafc 928 if (!_cups_strncasecmp(ptr, "TITLE>", 6))
ef416fc2 929 {
930 /*
931 * Found the title...
932 */
933
934 anchor = NULL;
935 ptr += 6;
936 }
88f9aafc 937 else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
ef416fc2 938 {
939 /*
940 * Found an anchor...
941 */
942
943 ptr += 7;
944
945 if (*ptr == '\"' || *ptr == '\'')
946 {
947 /*
948 * Get quoted anchor...
949 */
950
951 quote = *ptr;
952 anchor = ptr + 1;
953 if ((ptr = strchr(anchor, quote)) != NULL)
954 *ptr++ = '\0';
955 else
956 break;
957 }
958 else
959 {
960 /*
961 * Get unquoted anchor...
962 */
963
964 anchor = ptr + 1;
965
966 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
967
968 if (*ptr)
969 *ptr++ = '\0';
970 else
971 break;
972 }
973
974 /*
975 * Got the anchor, now lets find the end...
976 */
977
978 while (*ptr && *ptr != '>')
979 ptr ++;
980
981 if (*ptr != '>')
982 break;
983
984 ptr ++;
985 }
986 else
987 continue;
988
989 /*
990 * Now collect text for the link...
991 */
992
993 text = ptr;
994 while ((ptr = strchr(text, '<')) == NULL)
995 {
996 ptr = text + strlen(text);
997 if (ptr >= (line + sizeof(line) - 2))
998 break;
999
1000 *ptr++ = ' ';
1001
7e86f2f6 1002 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
ef416fc2 1003 break;
1004 }
1005
1006 *ptr = '\0';
1007
1008 if (node)
7e86f2f6 1009 node->length = (size_t)(offset - node->offset);
ef416fc2 1010
1011 if (!*text)
1012 {
1013 node = NULL;
1014 break;
1015 }
1016
ecdc0628 1017 if ((node = helpFindNode(hi, relative, anchor)) != NULL)
ef416fc2 1018 {
1019 /*
1020 * Node already in the index, so replace the text and other
1021 * data...
1022 */
1023
ecdc0628 1024 cupsArrayRemove(hi->nodes, node);
ef416fc2 1025
1026 if (node->section)
1027 free(node->section);
1028
1029 if (node->text)
1030 free(node->text);
1031
f7deaa1a 1032 if (node->words)
1033 {
1034 for (word = (help_word_t *)cupsArrayFirst(node->words);
1035 word;
1036 word = (help_word_t *)cupsArrayNext(node->words))
1037 help_delete_word(word);
1038
1039 cupsArrayDelete(node->words);
1040 node->words = NULL;
1041 }
1042
ef416fc2 1043 node->section = section[0] ? strdup(section) : NULL;
1044 node->text = strdup(text);
1045 node->mtime = mtime;
1046 node->offset = offset;
1047 node->score = 0;
1048 }
1049 else
1050 {
1051 /*
1052 * New node...
1053 */
1054
1055 node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
ef416fc2 1056 }
1057
1058 /*
1059 * Go through the text value and replace tabs and newlines with
1060 * whitespace and eliminate extra whitespace...
1061 */
1062
1063 for (ptr = node->text, text = node->text; *ptr;)
1064 if (isspace(*ptr & 255))
1065 {
1066 while (isspace(*ptr & 255))
ed486911 1067 ptr ++;
ef416fc2 1068
1069 *text++ = ' ';
1070 }
1071 else if (text != ptr)
1072 *text++ = *ptr++;
1073 else
1074 {
1075 text ++;
1076 ptr ++;
1077 }
1078
1079 *text = '\0';
1080
ecdc0628 1081 /*
1082 * (Re)add the node to the array...
1083 */
1084
1085 cupsArrayAdd(hi->nodes, node);
f7deaa1a 1086
1087 if (!anchor)
1088 node = NULL;
ef416fc2 1089 break;
1090 }
1091
f7deaa1a 1092 if (node)
1093 {
1094 /*
1095 * Scan this line for words...
1096 */
1097
1098 for (ptr = line; *ptr; ptr ++)
1099 {
1100 /*
1101 * Skip HTML stuff...
1102 */
1103
1104 if (*ptr == '<')
1105 {
1106 if (!strncmp(ptr, "<!--", 4))
1107 {
1108 /*
1109 * Skip HTML comment...
1110 */
1111
1112 if ((text = strstr(ptr + 4, "-->")) == NULL)
1113 ptr += strlen(ptr) - 1;
1114 else
1115 ptr = text + 2;
1116 }
1117 else
1118 {
1119 /*
1120 * Skip HTML element...
1121 */
1122
1123 for (ptr ++; *ptr && *ptr != '>'; ptr ++)
f42414bf 1124 {
f7deaa1a 1125 if (*ptr == '\"' || *ptr == '\'')
1126 {
1127 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1128
1129 if (!*ptr)
1130 ptr --;
1131 }
f42414bf 1132 }
f7deaa1a 1133
1134 if (!*ptr)
1135 ptr --;
1136 }
1137
1138 continue;
1139 }
1140 else if (*ptr == '&')
1141 {
1142 /*
1143 * Skip HTML entity...
1144 */
1145
1146 for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1147
1148 if (!*ptr)
1149 ptr --;
1150
1151 continue;
1152 }
1153 else if (!isalnum(*ptr & 255))
1154 continue;
1155
1156 /*
1157 * Found the start of a word, search until we find the end...
1158 */
1159
1160 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1161
7e86f2f6 1162 wordlen = (int)(ptr - text);
f7deaa1a 1163
07623986 1164 memcpy(temp, text, (size_t)wordlen);
f42414bf 1165 temp[wordlen] = '\0';
1166
1167 ptr --;
f7deaa1a 1168
f42414bf 1169 if (wordlen > 1 && !bsearch(temp, help_common_words,
f7deaa1a 1170 (sizeof(help_common_words) /
1171 sizeof(help_common_words[0])),
1172 sizeof(help_common_words[0]),
1173 (int (*)(const void *, const void *))
88f9aafc 1174 _cups_strcasecmp))
f42414bf 1175 help_add_word(node, temp);
f7deaa1a 1176 }
1177 }
1178
ef416fc2 1179 /*
1180 * Get the offset of the next line...
1181 */
1182
1183 offset = cupsFileTell(fp);
1184 }
1185
1186 cupsFileClose(fp);
1187
1188 if (node)
7e86f2f6 1189 node->length = (size_t)(offset - node->offset);
ef416fc2 1190
1191 return (0);
1192}
1193
1194
1195/*
1196 * 'help_new_node()' - Create a new node and add it to an index.
1197 */
1198
1199static help_node_t * /* O - Node pointer or NULL on error */
1200help_new_node(const char *filename, /* I - Filename */
1201 const char *anchor, /* I - Anchor */
1202 const char *section, /* I - Section */
1203 const char *text, /* I - Text */
1204 time_t mtime, /* I - Modification time */
1205 off_t offset, /* I - Offset in file */
1206 size_t length) /* I - Length in bytes */
1207{
1208 help_node_t *n; /* Node */
1209
1210
85dda01c
MS
1211 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1212 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1213 (long)mtime, (long)offset, (long)length));
ef416fc2 1214
1215 n = (help_node_t *)calloc(1, sizeof(help_node_t));
1216 if (!n)
1217 return (NULL);
1218
1219 n->filename = strdup(filename);
1220 n->anchor = anchor ? strdup(anchor) : NULL;
3bb59731 1221 n->section = *section ? strdup(section) : NULL;
ef416fc2 1222 n->text = strdup(text);
1223 n->mtime = mtime;
1224 n->offset = offset;
1225 n->length = length;
1226
1227 return (n);
1228}
1229
1230
1231/*
1232 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1233 */
1234
1235static int /* O - Difference */
ecdc0628 1236help_sort_by_name(help_node_t *n1, /* I - First node */
1237 help_node_t *n2) /* I - Second node */
ef416fc2 1238{
ef416fc2 1239 int diff; /* Difference */
1240
1241
85dda01c
MS
1242 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1243 n1, n1->filename, n1->anchor,
1244 n2, n2->filename, n2->anchor));
ef416fc2 1245
ecdc0628 1246 if ((diff = strcmp(n1->filename, n2->filename)) != 0)
ef416fc2 1247 return (diff);
1248
ecdc0628 1249 if (!n1->anchor && !n2->anchor)
ef416fc2 1250 return (0);
ecdc0628 1251 else if (!n1->anchor)
ef416fc2 1252 return (-1);
ecdc0628 1253 else if (!n2->anchor)
ef416fc2 1254 return (1);
1255 else
ecdc0628 1256 return (strcmp(n1->anchor, n2->anchor));
ef416fc2 1257}
1258
1259
1260/*
1261 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1262 */
1263
1264static int /* O - Difference */
ecdc0628 1265help_sort_by_score(help_node_t *n1, /* I - First node */
1266 help_node_t *n2) /* I - Second node */
ef416fc2 1267{
ef416fc2 1268 int diff; /* Difference */
1269
1270
85dda01c
MS
1271 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1272 "n2=%p(%d \"%s\" \"%s\")",
1273 n1, n1->score, n1->section, n1->text,
1274 n2, n2->score, n2->section, n2->text));
ef416fc2 1275
ecdc0628 1276 if (n1->score != n2->score)
1f0275e3 1277 return (n2->score - n1->score);
ef416fc2 1278
ecdc0628 1279 if (n1->section && !n2->section)
ef416fc2 1280 return (1);
ecdc0628 1281 else if (!n1->section && n2->section)
ef416fc2 1282 return (-1);
ecdc0628 1283 else if (n1->section && n2->section &&
1284 (diff = strcmp(n1->section, n2->section)) != 0)
ef416fc2 1285 return (diff);
1286
88f9aafc 1287 return (_cups_strcasecmp(n1->text, n2->text));
ef416fc2 1288}
1289
1290
1291/*
f7deaa1a 1292 * 'help_sort_words()' - Sort words alphabetically.
1293 */
1294
1295static int /* O - Difference */
1296help_sort_words(help_word_t *w1, /* I - Second word */
1297 help_word_t *w2) /* I - Second word */
1298{
85dda01c 1299 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
f7deaa1a 1300 w1, w1->text, w2, w2->text));
1301
88f9aafc 1302 return (_cups_strcasecmp(w1->text, w2->text));
f7deaa1a 1303}