cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright 2007-2017 by Apple Inc.
   5  * Copyright 1997-2007 by Easy Software Products.
   6  *
   7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
   8  */
   9
  10 /*
  11  * Include necessary headers...
  12  */
  13
  14 #include "cgi-private.h"
  15 #include <cups/dir.h>
  16
  17
  18 /*
  19  * List of common English words that should not be indexed...
  20  */
  21
  22 static char             help_common_words[][6] =
  23                         {
  24                           "about",
  25                           "all",
  26                           "an",
  27                           "and",
  28                           "are",
  29                           "as",
  30                           "at",
  31                           "be",
  32                           "been",
  33                           "but",
  34                           "by",
  35                           "call",
  36                           "can",
  37                           "come",
  38                           "could",
  39                           "day",
  40                           "did",
  41                           "do",
  42                           "down",
  43                           "each",
  44                           "find",
  45                           "first",
  46                           "for",
  47                           "from",
  48                           "go",
  49                           "had",
  50                           "has",
  51                           "have",
  52                           "he",
  53                           "her",
  54                           "him",
  55                           "his",
  56                           "hot",
  57                           "how",
  58                           "if",
  59                           "in",
  60                           "is",
  61                           "it",
  62                           "know",
  63                           "like",
  64                           "long",
  65                           "look",
  66                           "make",
  67                           "many",
  68                           "may",
  69                           "more",
  70                           "most",
  71                           "my",
  72                           "no",
  73                           "now",
  74                           "of",
  75                           "on",
  76                           "one",
  77                           "or",
  78                           "other",
  79                           "out",
  80                           "over",
  81                           "said",
  82                           "see",
  83                           "she",
  84                           "side",
  85                           "so",
  86                           "some",
  87                           "sound",
  88                           "than",
  89                           "that",
  90                           "the",
  91                           "their",
  92                           "them",
  93                           "then",
  94                           "there",
  95                           "these",
  96                           "they",
  97                           "thing",
  98                           "this",
  99                           "time",
 100                           "to",
 101                           "two",
 102                           "up",
 103                           "use",
 104                           "was",
 105                           "water",
 106                           "way",
 107                           "we",
 108                           "were",
 109                           "what",
 110                           "when",
 111                           "which",
 112                           "who",
 113                           "will",
 114                           "with",
 115                           "word",
 116                           "would",
 117                           "write",
 118                           "you",
 119                           "your"
 120                         };
 121
 122
 123 /*
 124  * Local functions...
 125  */
 126
 127 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 128 static void             help_delete_node(help_node_t *n);
 129 static void             help_delete_word(help_word_t *w);
 130 static int              help_load_directory(help_index_t *hi,
 131                                             const char *directory,
 132                                             const char *relative);
 133 static int              help_load_file(help_index_t *hi,
 134                                        const char *filename,
 135                                        const char *relative,
 136                                        time_t     mtime);
 137 static help_node_t      *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
 138 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 139 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 140 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 141
 142
 143 /*
 144  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 145  */
 146
 147 void
 148 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 149 {
 150   help_node_t   *node;                  /* Current node */
 151
 152
 153   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 154
 155   if (!hi)
 156     return;
 157
 158   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 159        node;
 160        node = (help_node_t *)cupsArrayNext(hi->nodes))
 161   {
 162     if (!hi->search)
 163       help_delete_node(node);
 164   }
 165
 166   cupsArrayDelete(hi->nodes);
 167   cupsArrayDelete(hi->sorted);
 168
 169   free(hi);
 170 }
 171
 172
 173 /*
 174  * 'helpFindNode()' - Find a node in an index.
 175  */
 176
 177 help_node_t *                           /* O - Node pointer or NULL */
 178 helpFindNode(help_index_t *hi,          /* I - Index */
 179              const char   *filename,    /* I - Filename */
 180              const char   *anchor)      /* I - Anchor */
 181 {
 182   help_node_t   key;                    /* Search key */
 183
 184
 185   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 186                 hi, filename, anchor));
 187
 188  /*
 189   * Range check input...
 190   */
 191
 192   if (!hi || !filename)
 193     return (NULL);
 194
 195  /*
 196   * Initialize the search key...
 197   */
 198
 199   key.filename = (char *)filename;
 200   key.anchor   = (char *)anchor;
 201
 202  /*
 203   * Return any match...
 204   */
 205
 206   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 207 }
 208
 209
 210 /*
 211  * 'helpLoadIndex()' - Load a help index from disk.
 212  */
 213
 214 help_index_t *                          /* O - Index pointer or NULL */
 215 helpLoadIndex(const char *hifile,       /* I - Index filename */
 216               const char *directory)    /* I - Directory that is indexed */
 217 {
 218   help_index_t  *hi;                    /* Help index */
 219   cups_file_t   *fp;                    /* Current file */
 220   char          line[2048],             /* Line from file */
 221                 *ptr,                   /* Pointer into line */
 222                 *filename,              /* Filename in line */
 223                 *anchor,                /* Anchor in line */
 224                 *sectptr,               /* Section pointer in line */
 225                 section[1024],          /* Section name */
 226                 *text;                  /* Text in line */
 227   time_t        mtime;                  /* Modification time */
 228   off_t         offset;                 /* Offset into file */
 229   size_t        length;                 /* Length in bytes */
 230   int           update;                 /* Update? */
 231   help_node_t   *node;                  /* Current node */
 232   help_word_t   *word;                  /* Current word */
 233
 234
 235   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 236                 hifile, directory));
 237
 238  /*
 239   * Create a new, empty index.
 240   */
 241
 242   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 243     return (NULL);
 244
 245   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 246   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 247
 248   if (!hi->nodes || !hi->sorted)
 249   {
 250     cupsArrayDelete(hi->nodes);
 251     cupsArrayDelete(hi->sorted);
 252     free(hi);
 253     return (NULL);
 254   }
 255
 256  /*
 257   * Try loading the existing index file...
 258   */
 259
 260   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 261   {
 262    /*
 263     * Lock the file and then read the first line...
 264     */
 265
 266     cupsFileLock(fp, 1);
 267
 268     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 269     {
 270      /*
 271       * Got a valid header line, now read the data lines...
 272       */
 273
 274       node = NULL;
 275
 276       while (cupsFileGets(fp, line, sizeof(line)))
 277       {
 278        /*
 279         * Each line looks like one of the following:
 280         *
 281         *     filename mtime offset length "section" "text"
 282         *     filename#anchor offset length "text"
 283         *     SP count word
 284         */
 285
 286         if (line[0] == ' ')
 287         {
 288          /*
 289           * Read a word in the current node...
 290           */
 291
 292           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 293             continue;
 294
 295           if ((word = help_add_word(node, ptr + 1)) != NULL)
 296             word->count = atoi(line + 1);
 297         }
 298         else
 299         {
 300          /*
 301           * Add a node...
 302           */
 303
 304           filename = line;
 305
 306           if ((ptr = strchr(line, ' ')) == NULL)
 307             break;
 308
 309           while (isspace(*ptr & 255))
 310             *ptr++ = '\0';
 311
 312           if ((anchor = strrchr(filename, '#')) != NULL)
 313           {
 314             *anchor++ = '\0';
 315             mtime = 0;
 316           }
 317           else
 318             mtime = strtol(ptr, &ptr, 10);
 319
 320           offset = strtoll(ptr, &ptr, 10);
 321           length = (size_t)strtoll(ptr, &ptr, 10);
 322
 323           while (isspace(*ptr & 255))
 324             ptr ++;
 325
 326           if (!anchor)
 327           {
 328            /*
 329             * Get section...
 330             */
 331
 332             if (*ptr != '\"')
 333               break;
 334
 335             ptr ++;
 336             sectptr = ptr;
 337
 338             while (*ptr && *ptr != '\"')
 339               ptr ++;
 340
 341             if (*ptr != '\"')
 342               break;
 343
 344             *ptr++ = '\0';
 345
 346             strlcpy(section, sectptr, sizeof(section));
 347
 348             while (isspace(*ptr & 255))
 349               ptr ++;
 350           }
 351
 352           if (*ptr != '\"')
 353             break;
 354
 355           ptr ++;
 356           text = ptr;
 357
 358           while (*ptr && *ptr != '\"')
 359             ptr ++;
 360
 361           if (*ptr != '\"')
 362             break;
 363
 364           *ptr++ = '\0';
 365
 366           if ((node = help_new_node(filename, anchor, section, text,
 367                                     mtime, offset, length)) == NULL)
 368             break;
 369
 370           node->score = -1;
 371
 372           cupsArrayAdd(hi->nodes, node);
 373         }
 374       }
 375     }
 376
 377     cupsFileClose(fp);
 378   }
 379
 380  /*
 381   * Scan for new/updated files...
 382   */
 383
 384   update = help_load_directory(hi, directory, NULL);
 385
 386  /*
 387   * Remove any files that are no longer installed...
 388   */
 389
 390   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 391        node;
 392        node = (help_node_t *)cupsArrayNext(hi->nodes))
 393     if (node->score < 0)
 394     {
 395      /*
 396       * Delete this node...
 397       */
 398
 399       cupsArrayRemove(hi->nodes, node);
 400       help_delete_node(node);
 401     }
 402
 403  /*
 404   * Add nodes to the sorted array...
 405   */
 406
 407   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 408        node;
 409        node = (help_node_t *)cupsArrayNext(hi->nodes))
 410     cupsArrayAdd(hi->sorted, node);
 411
 412  /*
 413   * Save the index if we updated it...
 414   */
 415
 416   if (update)
 417     helpSaveIndex(hi, hifile);
 418
 419  /*
 420   * Return the index...
 421   */
 422
 423   return (hi);
 424 }
 425
 426
 427 /*
 428  * 'helpSaveIndex()' - Save a help index to disk.
 429  */
 430
 431 int                                     /* O - 0 on success, -1 on error */
 432 helpSaveIndex(help_index_t *hi,         /* I - Index */
 433               const char   *hifile)     /* I - Index filename */
 434 {
 435   cups_file_t   *fp;                    /* Index file */
 436   help_node_t   *node;                  /* Current node */
 437   help_word_t   *word;                  /* Current word */
 438
 439
 440   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 441
 442  /*
 443   * Try creating a new index file...
 444   */
 445
 446   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 447     return (-1);
 448
 449  /*
 450   * Lock the file while we write it...
 451   */
 452
 453   cupsFileLock(fp, 1);
 454
 455   cupsFilePuts(fp, "HELPV2\n");
 456
 457   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 458        node;
 459        node = (help_node_t *)cupsArrayNext(hi->nodes))
 460   {
 461    /*
 462     * Write the current node with/without the anchor...
 463     */
 464
 465     if (node->anchor)
 466     {
 467       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 468                          node->filename, node->anchor,
 469                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 470                          node->text) < 0)
 471         break;
 472     }
 473     else
 474     {
 475       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 476                          node->filename, (int)node->mtime,
 477                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 478                          node->section ? node->section : "", node->text) < 0)
 479         break;
 480     }
 481
 482    /*
 483     * Then write the words associated with the node...
 484     */
 485
 486     for (word = (help_word_t *)cupsArrayFirst(node->words);
 487          word;
 488          word = (help_word_t *)cupsArrayNext(node->words))
 489       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 490         break;
 491   }
 492
 493   cupsFileFlush(fp);
 494
 495   if (cupsFileClose(fp) < 0)
 496     return (-1);
 497   else if (node)
 498     return (-1);
 499   else
 500     return (0);
 501 }
 502
 503
 504 /*
 505  * 'helpSearchIndex()' - Search an index.
 506  */
 507
 508 help_index_t *                          /* O - Search index */
 509 helpSearchIndex(help_index_t *hi,       /* I - Index */
 510                 const char   *query,    /* I - Query string */
 511                 const char   *section,  /* I - Limit search to this section */
 512                 const char   *filename) /* I - Limit search to this file */
 513 {
 514   help_index_t  *search;                /* Search index */
 515   help_node_t   *node;                  /* Current node */
 516   help_word_t   *word;                  /* Current word */
 517   void          *sc;                    /* Search context */
 518   int           matches;                /* Number of matches */
 519
 520
 521   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 522                 hi, query, filename));
 523
 524  /*
 525   * Range check...
 526   */
 527
 528   if (!hi || !query)
 529     return (NULL);
 530
 531  /*
 532   * Reset the scores of all nodes to 0...
 533   */
 534
 535   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 536        node;
 537        node = (help_node_t *)cupsArrayNext(hi->nodes))
 538     node->score = 0;
 539
 540  /*
 541   * Find the first node to search in...
 542   */
 543
 544   if (filename)
 545   {
 546     node = helpFindNode(hi, filename, NULL);
 547     if (!node)
 548       return (NULL);
 549   }
 550   else
 551     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 552
 553  /*
 554   * Convert the query into a regular expression...
 555   */
 556
 557   sc = cgiCompileSearch(query);
 558   if (!sc)
 559     return (NULL);
 560
 561  /*
 562   * Allocate a search index...
 563   */
 564
 565   search = calloc(1, sizeof(help_index_t));
 566   if (!search)
 567   {
 568     cgiFreeSearch(sc);
 569     return (NULL);
 570   }
 571
 572   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 573   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 574
 575   if (!search->nodes || !search->sorted)
 576   {
 577     cupsArrayDelete(search->nodes);
 578     cupsArrayDelete(search->sorted);
 579     free(search);
 580     cgiFreeSearch(sc);
 581     return (NULL);
 582   }
 583
 584   search->search = 1;
 585
 586  /*
 587   * Check each node in the index, adding matching nodes to the
 588   * search index...
 589   */
 590
 591   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 592     if (section && strcmp(node->section, section))
 593       continue;
 594     else if (filename && strcmp(node->filename, filename))
 595       continue;
 596     else
 597     {
 598       matches = cgiDoSearch(sc, node->text);
 599
 600       for (word = (help_word_t *)cupsArrayFirst(node->words);
 601            word;
 602            word = (help_word_t *)cupsArrayNext(node->words))
 603         if (cgiDoSearch(sc, word->text) > 0)
 604           matches += word->count;
 605
 606       if (matches > 0)
 607       {
 608        /*
 609         * Found a match, add the node to the search index...
 610         */
 611
 612         node->score = matches;
 613
 614         cupsArrayAdd(search->nodes, node);
 615         cupsArrayAdd(search->sorted, node);
 616       }
 617     }
 618
 619  /*
 620   * Free the search context...
 621   */
 622
 623   cgiFreeSearch(sc);
 624
 625  /*
 626   * Return the results...
 627   */
 628
 629   return (search);
 630 }
 631
 632
 633 /*
 634  * 'help_add_word()' - Add a word to a node.
 635  */
 636
 637 static help_word_t *                    /* O - New word */
 638 help_add_word(help_node_t *n,           /* I - Node */
 639               const char  *text)        /* I - Word text */
 640 {
 641   help_word_t   *w,                     /* New word */
 642                 key;                    /* Search key */
 643
 644
 645   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 646
 647  /*
 648   * Create the words array as needed...
 649   */
 650
 651   if (!n->words)
 652     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 653
 654  /*
 655   * See if the word is already added...
 656   */
 657
 658   key.text = (char *)text;
 659
 660   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 661   {
 662    /*
 663     * Create a new word...
 664     */
 665
 666     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 667       return (NULL);
 668
 669     if ((w->text = strdup(text)) == NULL)
 670     {
 671       free(w);
 672       return (NULL);
 673     }
 674
 675     cupsArrayAdd(n->words, w);
 676   }
 677
 678  /*
 679   * Bump the counter for this word and return it...
 680   */
 681
 682   w->count ++;
 683
 684   return (w);
 685 }
 686
 687
 688 /*
 689  * 'help_delete_node()' - Free all memory used by a node.
 690  */
 691
 692 static void
 693 help_delete_node(help_node_t *n)        /* I - Node */
 694 {
 695   help_word_t   *w;                     /* Current word */
 696
 697
 698   DEBUG_printf(("2help_delete_node(n=%p)", n));
 699
 700   if (!n)
 701     return;
 702
 703   if (n->filename)
 704     free(n->filename);
 705
 706   if (n->anchor)
 707     free(n->anchor);
 708
 709   if (n->section)
 710     free(n->section);
 711
 712   if (n->text)
 713     free(n->text);
 714
 715   for (w = (help_word_t *)cupsArrayFirst(n->words);
 716        w;
 717        w = (help_word_t *)cupsArrayNext(n->words))
 718     help_delete_word(w);
 719
 720   cupsArrayDelete(n->words);
 721
 722   free(n);
 723 }
 724
 725
 726 /*
 727  * 'help_delete_word()' - Free all memory used by a word.
 728  */
 729
 730 static void
 731 help_delete_word(help_word_t *w)        /* I - Word */
 732 {
 733   DEBUG_printf(("2help_delete_word(w=%p)", w));
 734
 735   if (!w)
 736     return;
 737
 738   if (w->text)
 739     free(w->text);
 740
 741   free(w);
 742 }
 743
 744
 745 /*
 746  * 'help_load_directory()' - Load a directory of files into an index.
 747  */
 748
 749 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 750 help_load_directory(
 751     help_index_t *hi,                   /* I - Index */
 752     const char   *directory,            /* I - Directory */
 753     const char   *relative)             /* I - Relative path */
 754 {
 755   cups_dir_t    *dir;                   /* Directory file */
 756   cups_dentry_t *dent;                  /* Directory entry */
 757   char          *ext,                   /* Pointer to extension */
 758                 filename[1024],         /* Full filename */
 759                 relname[1024];          /* Relative filename */
 760   int           update;                 /* Updated? */
 761   help_node_t   *node;                  /* Current node */
 762
 763
 764   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 765                 hi, directory, relative));
 766
 767  /*
 768   * Open the directory and scan it...
 769   */
 770
 771   if ((dir = cupsDirOpen(directory)) == NULL)
 772     return (0);
 773
 774   update = 0;
 775
 776   while ((dent = cupsDirRead(dir)) != NULL)
 777   {
 778    /*
 779     * Skip "." files...
 780     */
 781
 782     if (dent->filename[0] == '.')
 783       continue;
 784
 785    /*
 786     * Get absolute and relative filenames...
 787     */
 788
 789     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 790     if (relative)
 791       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 792     else
 793       strlcpy(relname, dent->filename, sizeof(relname));
 794
 795    /*
 796     * Check if we have a HTML file...
 797     */
 798
 799     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 800         (!ext[5] || !strcmp(ext + 5, ".gz")))
 801     {
 802      /*
 803       * HTML file, see if we have already indexed the file...
 804       */
 805
 806       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 807       {
 808        /*
 809         * File already indexed - check dates to confirm that the
 810         * index is up-to-date...
 811         */
 812
 813         if (node->mtime == dent->fileinfo.st_mtime)
 814         {
 815          /*
 816           * Same modification time, so mark all of the nodes
 817           * for this file as up-to-date...
 818           */
 819
 820           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 821             if (!strcmp(node->filename, relname))
 822               node->score = 0;
 823             else
 824               break;
 825
 826           continue;
 827         }
 828       }
 829
 830       update = 1;
 831
 832       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 833     }
 834     else if (S_ISDIR(dent->fileinfo.st_mode))
 835     {
 836      /*
 837       * Process sub-directory...
 838       */
 839
 840       if (help_load_directory(hi, filename, relname) == 1)
 841         update = 1;
 842     }
 843   }
 844
 845   cupsDirClose(dir);
 846
 847   return (update);
 848 }
 849
 850
 851 /*
 852  * 'help_load_file()' - Load a HTML files into an index.
 853  */
 854
 855 static int                              /* O - 0 = success, -1 = error */
 856 help_load_file(
 857     help_index_t *hi,                   /* I - Index */
 858     const char   *filename,             /* I - Filename */
 859     const char   *relative,             /* I - Relative path */
 860     time_t       mtime)                 /* I - Modification time */
 861 {
 862   cups_file_t   *fp;                    /* HTML file */
 863   help_node_t   *node;                  /* Current node */
 864   char          line[1024],             /* Line from file */
 865                 temp[1024],             /* Temporary word */
 866                 section[1024],          /* Section */
 867                 *ptr,                   /* Pointer into line */
 868                 *anchor,                /* Anchor name */
 869                 *text;                  /* Text for anchor */
 870   off_t         offset;                 /* File offset */
 871   char          quote;                  /* Quote character */
 872   help_word_t   *word;                  /* Current word */
 873   int           wordlen;                /* Length of word */
 874
 875
 876   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 877                 "mtime=%ld)", hi, filename, relative, (long)mtime));
 878
 879   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 880     return (-1);
 881
 882   node   = NULL;
 883   offset = 0;
 884
 885   strlcpy(section, "Other", sizeof(section));
 886
 887   while (cupsFileGets(fp, line, sizeof(line)))
 888   {
 889    /*
 890     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 891     */
 892
 893     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
 894     {
 895      /*
 896       * Got section line, copy it!
 897       */
 898
 899       for (ptr += 13; isspace(*ptr & 255); ptr ++);
 900
 901       strlcpy(section, ptr, sizeof(section));
 902       if ((ptr = strstr(section, "-->")) != NULL)
 903       {
 904        /*
 905         * Strip comment stuff from end of line...
 906         */
 907
 908         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 909
 910         if (isspace(*ptr & 255))
 911           *ptr = '\0';
 912       }
 913       continue;
 914     }
 915
 916     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 917     {
 918       ptr ++;
 919
 920       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 921       {
 922        /*
 923         * Found the title...
 924         */
 925
 926         anchor = NULL;
 927         ptr += 6;
 928       }
 929       else
 930       {
 931         char *idptr;                    /* Pointer to ID */
 932
 933         if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 934           ptr += 7;
 935         else if ((idptr = strstr(ptr, " ID=")) != NULL)
 936           ptr = idptr + 4;
 937         else if ((idptr = strstr(ptr, " id=")) != NULL)
 938           ptr = idptr + 4;
 939         else
 940           continue;
 941
 942        /*
 943         * Found an anchor...
 944         */
 945
 946         if (*ptr == '\"' || *ptr == '\'')
 947         {
 948          /*
 949           * Get quoted anchor...
 950           */
 951
 952           quote  = *ptr;
 953           anchor = ptr + 1;
 954           if ((ptr = strchr(anchor, quote)) != NULL)
 955             *ptr++ = '\0';
 956           else
 957             break;
 958         }
 959         else
 960         {
 961          /*
 962           * Get unquoted anchor...
 963           */
 964
 965           anchor = ptr + 1;
 966
 967           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 968
 969           if (*ptr != '>')
 970             *ptr++ = '\0';
 971           else
 972             break;
 973         }
 974
 975        /*
 976         * Got the anchor, now lets find the end...
 977         */
 978
 979         while (*ptr && *ptr != '>')
 980           ptr ++;
 981
 982         if (*ptr != '>')
 983           break;
 984
 985         *ptr++ = '\0';
 986       }
 987
 988      /*
 989       * Now collect text for the link...
 990       */
 991
 992       text = ptr;
 993       while ((ptr = strchr(text, '<')) == NULL)
 994       {
 995         ptr = text + strlen(text);
 996         if (ptr >= (line + sizeof(line) - 2))
 997           break;
 998
 999         *ptr++ = ' ';
1000
1001         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1002           break;
1003       }
1004
1005       *ptr = '\0';
1006
1007       if (node)
1008         node->length = (size_t)(offset - node->offset);
1009
1010       if (!*text)
1011       {
1012         node = NULL;
1013         break;
1014       }
1015
1016       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1017       {
1018        /*
1019         * Node already in the index, so replace the text and other
1020         * data...
1021         */
1022
1023         cupsArrayRemove(hi->nodes, node);
1024
1025         if (node->section)
1026           free(node->section);
1027
1028         if (node->text)
1029           free(node->text);
1030
1031         if (node->words)
1032         {
1033           for (word = (help_word_t *)cupsArrayFirst(node->words);
1034                word;
1035                word = (help_word_t *)cupsArrayNext(node->words))
1036             help_delete_word(word);
1037
1038           cupsArrayDelete(node->words);
1039           node->words = NULL;
1040         }
1041
1042         node->section = section[0] ? strdup(section) : NULL;
1043         node->text    = strdup(text);
1044         node->mtime   = mtime;
1045         node->offset  = offset;
1046         node->score   = 0;
1047       }
1048       else
1049       {
1050        /*
1051         * New node...
1052         */
1053
1054         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1055       }
1056
1057      /*
1058       * Go through the text value and replace tabs and newlines with
1059       * whitespace and eliminate extra whitespace...
1060       */
1061
1062       for (ptr = node->text, text = node->text; *ptr;)
1063         if (isspace(*ptr & 255))
1064         {
1065           while (isspace(*ptr & 255))
1066             ptr ++;
1067
1068           *text++ = ' ';
1069         }
1070         else if (text != ptr)
1071           *text++ = *ptr++;
1072         else
1073         {
1074           text ++;
1075           ptr ++;
1076         }
1077
1078       *text = '\0';
1079
1080      /*
1081       * (Re)add the node to the array...
1082       */
1083
1084       cupsArrayAdd(hi->nodes, node);
1085
1086       if (!anchor)
1087         node = NULL;
1088       break;
1089     }
1090
1091     if (node)
1092     {
1093      /*
1094       * Scan this line for words...
1095       */
1096
1097       for (ptr = line; *ptr; ptr ++)
1098       {
1099        /*
1100         * Skip HTML stuff...
1101         */
1102
1103         if (*ptr == '<')
1104         {
1105           if (!strncmp(ptr, "<!--", 4))
1106           {
1107            /*
1108             * Skip HTML comment...
1109             */
1110
1111             if ((text = strstr(ptr + 4, "-->")) == NULL)
1112               ptr += strlen(ptr) - 1;
1113             else
1114               ptr = text + 2;
1115           }
1116           else
1117           {
1118            /*
1119             * Skip HTML element...
1120             */
1121
1122             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1123             {
1124               if (*ptr == '\"' || *ptr == '\'')
1125               {
1126                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1127
1128                 if (!*ptr)
1129                   ptr --;
1130               }
1131             }
1132
1133             if (!*ptr)
1134               ptr --;
1135           }
1136
1137           continue;
1138         }
1139         else if (*ptr == '&')
1140         {
1141          /*
1142           * Skip HTML entity...
1143           */
1144
1145           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1146
1147           if (!*ptr)
1148             ptr --;
1149
1150           continue;
1151         }
1152         else if (!isalnum(*ptr & 255))
1153           continue;
1154
1155        /*
1156         * Found the start of a word, search until we find the end...
1157         */
1158
1159         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1160
1161         wordlen = (int)(ptr - text);
1162
1163         memcpy(temp, text, (size_t)wordlen);
1164         temp[wordlen] = '\0';
1165
1166         ptr --;
1167
1168         if (wordlen > 1 && !bsearch(temp, help_common_words,
1169                                     (sizeof(help_common_words) /
1170                                      sizeof(help_common_words[0])),
1171                                     sizeof(help_common_words[0]),
1172                                     (int (*)(const void *, const void *))
1173                                         _cups_strcasecmp))
1174           help_add_word(node, temp);
1175       }
1176     }
1177
1178    /*
1179     * Get the offset of the next line...
1180     */
1181
1182     offset = cupsFileTell(fp);
1183   }
1184
1185   cupsFileClose(fp);
1186
1187   if (node)
1188     node->length = (size_t)(offset - node->offset);
1189
1190   return (0);
1191 }
1192
1193
1194 /*
1195  * 'help_new_node()' - Create a new node and add it to an index.
1196  */
1197
1198 static help_node_t *                    /* O - Node pointer or NULL on error */
1199 help_new_node(const char   *filename,   /* I - Filename */
1200               const char   *anchor,     /* I - Anchor */
1201               const char   *section,    /* I - Section */
1202               const char   *text,       /* I - Text */
1203               time_t       mtime,       /* I - Modification time */
1204               off_t        offset,      /* I - Offset in file */
1205               size_t       length)      /* I - Length in bytes */
1206 {
1207   help_node_t   *n;                     /* Node */
1208
1209
1210   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1211                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1212                 (long)mtime, (long)offset, (long)length));
1213
1214   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1215   if (!n)
1216     return (NULL);
1217
1218   n->filename = strdup(filename);
1219   n->anchor   = anchor ? strdup(anchor) : NULL;
1220   n->section  = *section ? strdup(section) : NULL;
1221   n->text     = strdup(text);
1222   n->mtime    = mtime;
1223   n->offset   = offset;
1224   n->length   = length;
1225
1226   return (n);
1227 }
1228
1229
1230 /*
1231  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1232  */
1233
1234 static int                              /* O - Difference */
1235 help_sort_by_name(help_node_t *n1,      /* I - First node */
1236                   help_node_t *n2)      /* I - Second node */
1237 {
1238   int           diff;                   /* Difference */
1239
1240
1241   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1242                 n1, n1->filename, n1->anchor,
1243                 n2, n2->filename, n2->anchor));
1244
1245   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1246     return (diff);
1247
1248   if (!n1->anchor && !n2->anchor)
1249     return (0);
1250   else if (!n1->anchor)
1251     return (-1);
1252   else if (!n2->anchor)
1253     return (1);
1254   else
1255     return (strcmp(n1->anchor, n2->anchor));
1256 }
1257
1258
1259 /*
1260  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1261  */
1262
1263 static int                              /* O - Difference */
1264 help_sort_by_score(help_node_t *n1,     /* I - First node */
1265                    help_node_t *n2)     /* I - Second node */
1266 {
1267   int           diff;                   /* Difference */
1268
1269
1270   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1271                 "n2=%p(%d \"%s\" \"%s\")",
1272                 n1, n1->score, n1->section, n1->text,
1273                 n2, n2->score, n2->section, n2->text));
1274
1275   if (n1->score != n2->score)
1276     return (n2->score - n1->score);
1277
1278   if (n1->section && !n2->section)
1279     return (1);
1280   else if (!n1->section && n2->section)
1281     return (-1);
1282   else if (n1->section && n2->section &&
1283            (diff = strcmp(n1->section, n2->section)) != 0)
1284     return (diff);
1285
1286   return (_cups_strcasecmp(n1->text, n2->text));
1287 }
1288
1289
1290 /*
1291  * 'help_sort_words()' - Sort words alphabetically.
1292  */
1293
1294 static int                              /* O - Difference */
1295 help_sort_words(help_word_t *w1,        /* I - Second word */
1296                 help_word_t *w2)        /* I - Second word */
1297 {
1298   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1299                 w1, w1->text, w2, w2->text));
1300
1301   return (_cups_strcasecmp(w1->text, w2->text));
1302 }