cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright © 2007-2019 by Apple Inc.
   5  * Copyright © 1997-2007 by Easy Software Products.
   6  *
   7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more
   8  * information.
   9  */
  10
  11 /*
  12  * Include necessary headers...
  13  */
  14
  15 #include "cgi-private.h"
  16 #include <cups/dir.h>
  17
  18
  19 /*
  20  * List of common English words that should not be indexed...
  21  */
  22
  23 static char             help_common_words[][6] =
  24                         {
  25                           "about",
  26                           "all",
  27                           "an",
  28                           "and",
  29                           "are",
  30                           "as",
  31                           "at",
  32                           "be",
  33                           "been",
  34                           "but",
  35                           "by",
  36                           "call",
  37                           "can",
  38                           "come",
  39                           "could",
  40                           "day",
  41                           "did",
  42                           "do",
  43                           "down",
  44                           "each",
  45                           "find",
  46                           "first",
  47                           "for",
  48                           "from",
  49                           "go",
  50                           "had",
  51                           "has",
  52                           "have",
  53                           "he",
  54                           "her",
  55                           "him",
  56                           "his",
  57                           "hot",
  58                           "how",
  59                           "if",
  60                           "in",
  61                           "is",
  62                           "it",
  63                           "know",
  64                           "like",
  65                           "long",
  66                           "look",
  67                           "make",
  68                           "many",
  69                           "may",
  70                           "more",
  71                           "most",
  72                           "my",
  73                           "no",
  74                           "now",
  75                           "of",
  76                           "on",
  77                           "one",
  78                           "or",
  79                           "other",
  80                           "out",
  81                           "over",
  82                           "said",
  83                           "see",
  84                           "she",
  85                           "side",
  86                           "so",
  87                           "some",
  88                           "sound",
  89                           "than",
  90                           "that",
  91                           "the",
  92                           "their",
  93                           "them",
  94                           "then",
  95                           "there",
  96                           "these",
  97                           "they",
  98                           "thing",
  99                           "this",
 100                           "time",
 101                           "to",
 102                           "two",
 103                           "up",
 104                           "use",
 105                           "was",
 106                           "water",
 107                           "way",
 108                           "we",
 109                           "were",
 110                           "what",
 111                           "when",
 112                           "which",
 113                           "who",
 114                           "will",
 115                           "with",
 116                           "word",
 117                           "would",
 118                           "write",
 119                           "you",
 120                           "your"
 121                         };
 122
 123
 124 /*
 125  * Local functions...
 126  */
 127
 128 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 129 static void             help_delete_node(help_node_t *n);
 130 static void             help_delete_word(help_word_t *w);
 131 static int              help_load_directory(help_index_t *hi,
 132                                             const char *directory,
 133                                             const char *relative);
 134 static int              help_load_file(help_index_t *hi,
 135                                        const char *filename,
 136                                        const char *relative,
 137                                        time_t     mtime);
 138 static help_node_t      *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
 139 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 140 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 141 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 142
 143
 144 /*
 145  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 146  */
 147
 148 void
 149 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 150 {
 151   help_node_t   *node;                  /* Current node */
 152
 153
 154   if (!hi)
 155     return;
 156
 157   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 158        node;
 159        node = (help_node_t *)cupsArrayNext(hi->nodes))
 160   {
 161     if (!hi->search)
 162       help_delete_node(node);
 163   }
 164
 165   cupsArrayDelete(hi->nodes);
 166   cupsArrayDelete(hi->sorted);
 167
 168   free(hi);
 169 }
 170
 171
 172 /*
 173  * 'helpFindNode()' - Find a node in an index.
 174  */
 175
 176 help_node_t *                           /* O - Node pointer or NULL */
 177 helpFindNode(help_index_t *hi,          /* I - Index */
 178              const char   *filename,    /* I - Filename */
 179              const char   *anchor)      /* I - Anchor */
 180 {
 181   help_node_t   key;                    /* Search key */
 182
 183
 184  /*
 185   * Range check input...
 186   */
 187
 188   if (!hi || !filename)
 189     return (NULL);
 190
 191  /*
 192   * Initialize the search key...
 193   */
 194
 195   key.filename = (char *)filename;
 196   key.anchor   = (char *)anchor;
 197
 198  /*
 199   * Return any match...
 200   */
 201
 202   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 203 }
 204
 205
 206 /*
 207  * 'helpLoadIndex()' - Load a help index from disk.
 208  */
 209
 210 help_index_t *                          /* O - Index pointer or NULL */
 211 helpLoadIndex(const char *hifile,       /* I - Index filename */
 212               const char *directory)    /* I - Directory that is indexed */
 213 {
 214   help_index_t  *hi;                    /* Help index */
 215   cups_file_t   *fp;                    /* Current file */
 216   char          line[2048],             /* Line from file */
 217                 *ptr,                   /* Pointer into line */
 218                 *filename,              /* Filename in line */
 219                 *anchor,                /* Anchor in line */
 220                 *sectptr,               /* Section pointer in line */
 221                 section[1024],          /* Section name */
 222                 *text;                  /* Text in line */
 223   time_t        mtime;                  /* Modification time */
 224   off_t         offset;                 /* Offset into file */
 225   size_t        length;                 /* Length in bytes */
 226   int           update;                 /* Update? */
 227   help_node_t   *node;                  /* Current node */
 228   help_word_t   *word;                  /* Current word */
 229
 230
 231  /*
 232   * Create a new, empty index.
 233   */
 234
 235   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 236     return (NULL);
 237
 238   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 239   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 240
 241   if (!hi->nodes || !hi->sorted)
 242   {
 243     cupsArrayDelete(hi->nodes);
 244     cupsArrayDelete(hi->sorted);
 245     free(hi);
 246     return (NULL);
 247   }
 248
 249  /*
 250   * Try loading the existing index file...
 251   */
 252
 253   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 254   {
 255    /*
 256     * Lock the file and then read the first line...
 257     */
 258
 259     cupsFileLock(fp, 1);
 260
 261     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 262     {
 263      /*
 264       * Got a valid header line, now read the data lines...
 265       */
 266
 267       node = NULL;
 268
 269       while (cupsFileGets(fp, line, sizeof(line)))
 270       {
 271        /*
 272         * Each line looks like one of the following:
 273         *
 274         *     filename mtime offset length "section" "text"
 275         *     filename#anchor offset length "text"
 276         *     SP count word
 277         */
 278
 279         if (line[0] == ' ')
 280         {
 281          /*
 282           * Read a word in the current node...
 283           */
 284
 285           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 286             continue;
 287
 288           if ((word = help_add_word(node, ptr + 1)) != NULL)
 289             word->count = atoi(line + 1);
 290         }
 291         else
 292         {
 293          /*
 294           * Add a node...
 295           */
 296
 297           filename = line;
 298
 299           if ((ptr = strchr(line, ' ')) == NULL)
 300             break;
 301
 302           while (isspace(*ptr & 255))
 303             *ptr++ = '\0';
 304
 305           if ((anchor = strrchr(filename, '#')) != NULL)
 306           {
 307             *anchor++ = '\0';
 308             mtime = 0;
 309           }
 310           else
 311             mtime = strtol(ptr, &ptr, 10);
 312
 313           offset = strtoll(ptr, &ptr, 10);
 314           length = (size_t)strtoll(ptr, &ptr, 10);
 315
 316           while (isspace(*ptr & 255))
 317             ptr ++;
 318
 319           if (!anchor)
 320           {
 321            /*
 322             * Get section...
 323             */
 324
 325             if (*ptr != '\"')
 326               break;
 327
 328             ptr ++;
 329             sectptr = ptr;
 330
 331             while (*ptr && *ptr != '\"')
 332               ptr ++;
 333
 334             if (*ptr != '\"')
 335               break;
 336
 337             *ptr++ = '\0';
 338
 339             strlcpy(section, sectptr, sizeof(section));
 340
 341             while (isspace(*ptr & 255))
 342               ptr ++;
 343           }
 344           else
 345             section[0] = '\0';
 346
 347           if (*ptr != '\"')
 348             break;
 349
 350           ptr ++;
 351           text = ptr;
 352
 353           while (*ptr && *ptr != '\"')
 354             ptr ++;
 355
 356           if (*ptr != '\"')
 357             break;
 358
 359           *ptr++ = '\0';
 360
 361           if ((node = help_new_node(filename, anchor, section, text,
 362                                     mtime, offset, length)) == NULL)
 363             break;
 364
 365           node->score = -1;
 366
 367           cupsArrayAdd(hi->nodes, node);
 368         }
 369       }
 370     }
 371
 372     cupsFileClose(fp);
 373   }
 374
 375  /*
 376   * Scan for new/updated files...
 377   */
 378
 379   update = help_load_directory(hi, directory, NULL);
 380
 381  /*
 382   * Remove any files that are no longer installed...
 383   */
 384
 385   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 386        node;
 387        node = (help_node_t *)cupsArrayNext(hi->nodes))
 388     if (node->score < 0)
 389     {
 390      /*
 391       * Delete this node...
 392       */
 393
 394       cupsArrayRemove(hi->nodes, node);
 395       help_delete_node(node);
 396     }
 397
 398  /*
 399   * Add nodes to the sorted array...
 400   */
 401
 402   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 403        node;
 404        node = (help_node_t *)cupsArrayNext(hi->nodes))
 405     cupsArrayAdd(hi->sorted, node);
 406
 407  /*
 408   * Save the index if we updated it...
 409   */
 410
 411   if (update)
 412     helpSaveIndex(hi, hifile);
 413
 414  /*
 415   * Return the index...
 416   */
 417
 418   return (hi);
 419 }
 420
 421
 422 /*
 423  * 'helpSaveIndex()' - Save a help index to disk.
 424  */
 425
 426 int                                     /* O - 0 on success, -1 on error */
 427 helpSaveIndex(help_index_t *hi,         /* I - Index */
 428               const char   *hifile)     /* I - Index filename */
 429 {
 430   cups_file_t   *fp;                    /* Index file */
 431   help_node_t   *node;                  /* Current node */
 432   help_word_t   *word;                  /* Current word */
 433
 434
 435  /*
 436   * Try creating a new index file...
 437   */
 438
 439   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 440     return (-1);
 441
 442  /*
 443   * Lock the file while we write it...
 444   */
 445
 446   cupsFileLock(fp, 1);
 447
 448   cupsFilePuts(fp, "HELPV2\n");
 449
 450   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 451        node;
 452        node = (help_node_t *)cupsArrayNext(hi->nodes))
 453   {
 454    /*
 455     * Write the current node with/without the anchor...
 456     */
 457
 458     if (node->anchor)
 459     {
 460       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 461                          node->filename, node->anchor,
 462                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 463                          node->text) < 0)
 464         break;
 465     }
 466     else
 467     {
 468       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 469                          node->filename, (int)node->mtime,
 470                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 471                          node->section ? node->section : "", node->text) < 0)
 472         break;
 473     }
 474
 475    /*
 476     * Then write the words associated with the node...
 477     */
 478
 479     for (word = (help_word_t *)cupsArrayFirst(node->words);
 480          word;
 481          word = (help_word_t *)cupsArrayNext(node->words))
 482       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 483         break;
 484   }
 485
 486   cupsFileFlush(fp);
 487
 488   if (cupsFileClose(fp) < 0)
 489     return (-1);
 490   else if (node)
 491     return (-1);
 492   else
 493     return (0);
 494 }
 495
 496
 497 /*
 498  * 'helpSearchIndex()' - Search an index.
 499  */
 500
 501 help_index_t *                          /* O - Search index */
 502 helpSearchIndex(help_index_t *hi,       /* I - Index */
 503                 const char   *query,    /* I - Query string */
 504                 const char   *section,  /* I - Limit search to this section */
 505                 const char   *filename) /* I - Limit search to this file */
 506 {
 507   help_index_t  *search;                /* Search index */
 508   help_node_t   *node;                  /* Current node */
 509   help_word_t   *word;                  /* Current word */
 510   void          *sc;                    /* Search context */
 511   int           matches;                /* Number of matches */
 512
 513
 514  /*
 515   * Range check...
 516   */
 517
 518   if (!hi || !query)
 519     return (NULL);
 520
 521  /*
 522   * Reset the scores of all nodes to 0...
 523   */
 524
 525   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 526        node;
 527        node = (help_node_t *)cupsArrayNext(hi->nodes))
 528     node->score = 0;
 529
 530  /*
 531   * Find the first node to search in...
 532   */
 533
 534   if (filename)
 535   {
 536     node = helpFindNode(hi, filename, NULL);
 537     if (!node)
 538       return (NULL);
 539   }
 540   else
 541     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 542
 543  /*
 544   * Convert the query into a regular expression...
 545   */
 546
 547   sc = cgiCompileSearch(query);
 548   if (!sc)
 549     return (NULL);
 550
 551  /*
 552   * Allocate a search index...
 553   */
 554
 555   search = calloc(1, sizeof(help_index_t));
 556   if (!search)
 557   {
 558     cgiFreeSearch(sc);
 559     return (NULL);
 560   }
 561
 562   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 563   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 564
 565   if (!search->nodes || !search->sorted)
 566   {
 567     cupsArrayDelete(search->nodes);
 568     cupsArrayDelete(search->sorted);
 569     free(search);
 570     cgiFreeSearch(sc);
 571     return (NULL);
 572   }
 573
 574   search->search = 1;
 575
 576  /*
 577   * Check each node in the index, adding matching nodes to the
 578   * search index...
 579   */
 580
 581   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 582     if (section && strcmp(node->section, section))
 583       continue;
 584     else if (filename && strcmp(node->filename, filename))
 585       continue;
 586     else
 587     {
 588       matches = cgiDoSearch(sc, node->text);
 589
 590       for (word = (help_word_t *)cupsArrayFirst(node->words);
 591            word;
 592            word = (help_word_t *)cupsArrayNext(node->words))
 593         if (cgiDoSearch(sc, word->text) > 0)
 594           matches += word->count;
 595
 596       if (matches > 0)
 597       {
 598        /*
 599         * Found a match, add the node to the search index...
 600         */
 601
 602         node->score = matches;
 603
 604         cupsArrayAdd(search->nodes, node);
 605         cupsArrayAdd(search->sorted, node);
 606       }
 607     }
 608
 609  /*
 610   * Free the search context...
 611   */
 612
 613   cgiFreeSearch(sc);
 614
 615  /*
 616   * Return the results...
 617   */
 618
 619   return (search);
 620 }
 621
 622
 623 /*
 624  * 'help_add_word()' - Add a word to a node.
 625  */
 626
 627 static help_word_t *                    /* O - New word */
 628 help_add_word(help_node_t *n,           /* I - Node */
 629               const char  *text)        /* I - Word text */
 630 {
 631   help_word_t   *w,                     /* New word */
 632                 key;                    /* Search key */
 633
 634
 635  /*
 636   * Create the words array as needed...
 637   */
 638
 639   if (!n->words)
 640     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 641
 642  /*
 643   * See if the word is already added...
 644   */
 645
 646   key.text = (char *)text;
 647
 648   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 649   {
 650    /*
 651     * Create a new word...
 652     */
 653
 654     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 655       return (NULL);
 656
 657     if ((w->text = strdup(text)) == NULL)
 658     {
 659       free(w);
 660       return (NULL);
 661     }
 662
 663     cupsArrayAdd(n->words, w);
 664   }
 665
 666  /*
 667   * Bump the counter for this word and return it...
 668   */
 669
 670   w->count ++;
 671
 672   return (w);
 673 }
 674
 675
 676 /*
 677  * 'help_delete_node()' - Free all memory used by a node.
 678  */
 679
 680 static void
 681 help_delete_node(help_node_t *n)        /* I - Node */
 682 {
 683   help_word_t   *w;                     /* Current word */
 684
 685
 686   if (!n)
 687     return;
 688
 689   if (n->filename)
 690     free(n->filename);
 691
 692   if (n->anchor)
 693     free(n->anchor);
 694
 695   if (n->section)
 696     free(n->section);
 697
 698   if (n->text)
 699     free(n->text);
 700
 701   for (w = (help_word_t *)cupsArrayFirst(n->words);
 702        w;
 703        w = (help_word_t *)cupsArrayNext(n->words))
 704     help_delete_word(w);
 705
 706   cupsArrayDelete(n->words);
 707
 708   free(n);
 709 }
 710
 711
 712 /*
 713  * 'help_delete_word()' - Free all memory used by a word.
 714  */
 715
 716 static void
 717 help_delete_word(help_word_t *w)        /* I - Word */
 718 {
 719   if (!w)
 720     return;
 721
 722   if (w->text)
 723     free(w->text);
 724
 725   free(w);
 726 }
 727
 728
 729 /*
 730  * 'help_load_directory()' - Load a directory of files into an index.
 731  */
 732
 733 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 734 help_load_directory(
 735     help_index_t *hi,                   /* I - Index */
 736     const char   *directory,            /* I - Directory */
 737     const char   *relative)             /* I - Relative path */
 738 {
 739   cups_dir_t    *dir;                   /* Directory file */
 740   cups_dentry_t *dent;                  /* Directory entry */
 741   char          *ext,                   /* Pointer to extension */
 742                 filename[1024],         /* Full filename */
 743                 relname[1024];          /* Relative filename */
 744   int           update;                 /* Updated? */
 745   help_node_t   *node;                  /* Current node */
 746
 747
 748  /*
 749   * Open the directory and scan it...
 750   */
 751
 752   if ((dir = cupsDirOpen(directory)) == NULL)
 753     return (0);
 754
 755   update = 0;
 756
 757   while ((dent = cupsDirRead(dir)) != NULL)
 758   {
 759    /*
 760     * Skip "." files...
 761     */
 762
 763     if (dent->filename[0] == '.')
 764       continue;
 765
 766    /*
 767     * Get absolute and relative filenames...
 768     */
 769
 770     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 771     if (relative)
 772       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 773     else
 774       strlcpy(relname, dent->filename, sizeof(relname));
 775
 776    /*
 777     * Check if we have a HTML file...
 778     */
 779
 780     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 781         (!ext[5] || !strcmp(ext + 5, ".gz")))
 782     {
 783      /*
 784       * HTML file, see if we have already indexed the file...
 785       */
 786
 787       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 788       {
 789        /*
 790         * File already indexed - check dates to confirm that the
 791         * index is up-to-date...
 792         */
 793
 794         if (node->mtime == dent->fileinfo.st_mtime)
 795         {
 796          /*
 797           * Same modification time, so mark all of the nodes
 798           * for this file as up-to-date...
 799           */
 800
 801           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 802             if (!strcmp(node->filename, relname))
 803               node->score = 0;
 804             else
 805               break;
 806
 807           continue;
 808         }
 809       }
 810
 811       update = 1;
 812
 813       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 814     }
 815     else if (S_ISDIR(dent->fileinfo.st_mode))
 816     {
 817      /*
 818       * Process sub-directory...
 819       */
 820
 821       if (help_load_directory(hi, filename, relname) == 1)
 822         update = 1;
 823     }
 824   }
 825
 826   cupsDirClose(dir);
 827
 828   return (update);
 829 }
 830
 831
 832 /*
 833  * 'help_load_file()' - Load a HTML files into an index.
 834  */
 835
 836 static int                              /* O - 0 = success, -1 = error */
 837 help_load_file(
 838     help_index_t *hi,                   /* I - Index */
 839     const char   *filename,             /* I - Filename */
 840     const char   *relative,             /* I - Relative path */
 841     time_t       mtime)                 /* I - Modification time */
 842 {
 843   cups_file_t   *fp;                    /* HTML file */
 844   help_node_t   *node;                  /* Current node */
 845   char          line[1024],             /* Line from file */
 846                 temp[1024],             /* Temporary word */
 847                 section[1024],          /* Section */
 848                 *ptr,                   /* Pointer into line */
 849                 *anchor,                /* Anchor name */
 850                 *text;                  /* Text for anchor */
 851   off_t         offset;                 /* File offset */
 852   char          quote;                  /* Quote character */
 853   help_word_t   *word;                  /* Current word */
 854   int           wordlen;                /* Length of word */
 855
 856
 857   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 858     return (-1);
 859
 860   node   = NULL;
 861   offset = 0;
 862
 863   strlcpy(section, "Other", sizeof(section));
 864
 865   while (cupsFileGets(fp, line, sizeof(line)))
 866   {
 867    /*
 868     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 869     */
 870
 871     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
 872     {
 873      /*
 874       * Got section line, copy it!
 875       */
 876
 877       for (ptr += 13; isspace(*ptr & 255); ptr ++);
 878
 879       strlcpy(section, ptr, sizeof(section));
 880       if ((ptr = strstr(section, "-->")) != NULL)
 881       {
 882        /*
 883         * Strip comment stuff from end of line...
 884         */
 885
 886         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 887
 888         if (isspace(*ptr & 255))
 889           *ptr = '\0';
 890       }
 891       continue;
 892     }
 893
 894     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 895     {
 896       ptr ++;
 897
 898       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 899       {
 900        /*
 901         * Found the title...
 902         */
 903
 904         anchor = NULL;
 905         ptr += 6;
 906       }
 907       else
 908       {
 909         char *idptr;                    /* Pointer to ID */
 910
 911         if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 912           ptr += 7;
 913         else if ((idptr = strstr(ptr, " ID=")) != NULL)
 914           ptr = idptr + 4;
 915         else if ((idptr = strstr(ptr, " id=")) != NULL)
 916           ptr = idptr + 4;
 917         else
 918           continue;
 919
 920        /*
 921         * Found an anchor...
 922         */
 923
 924         if (*ptr == '\"' || *ptr == '\'')
 925         {
 926          /*
 927           * Get quoted anchor...
 928           */
 929
 930           quote  = *ptr;
 931           anchor = ptr + 1;
 932           if ((ptr = strchr(anchor, quote)) != NULL)
 933             *ptr++ = '\0';
 934           else
 935             break;
 936         }
 937         else
 938         {
 939          /*
 940           * Get unquoted anchor...
 941           */
 942
 943           anchor = ptr + 1;
 944
 945           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 946
 947           if (*ptr != '>')
 948             *ptr++ = '\0';
 949           else
 950             break;
 951         }
 952
 953        /*
 954         * Got the anchor, now lets find the end...
 955         */
 956
 957         while (*ptr && *ptr != '>')
 958           ptr ++;
 959
 960         if (*ptr != '>')
 961           break;
 962
 963         *ptr++ = '\0';
 964       }
 965
 966      /*
 967       * Now collect text for the link...
 968       */
 969
 970       text = ptr;
 971       while ((ptr = strchr(text, '<')) == NULL)
 972       {
 973         ptr = text + strlen(text);
 974         if (ptr >= (line + sizeof(line) - 2))
 975           break;
 976
 977         *ptr++ = ' ';
 978
 979         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
 980           break;
 981       }
 982
 983       *ptr = '\0';
 984
 985       if (node)
 986         node->length = (size_t)(offset - node->offset);
 987
 988       if (!*text)
 989       {
 990         node = NULL;
 991         break;
 992       }
 993
 994       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
 995       {
 996        /*
 997         * Node already in the index, so replace the text and other
 998         * data...
 999         */
1000
1001         cupsArrayRemove(hi->nodes, node);
1002
1003         if (node->section)
1004           free(node->section);
1005
1006         if (node->text)
1007           free(node->text);
1008
1009         if (node->words)
1010         {
1011           for (word = (help_word_t *)cupsArrayFirst(node->words);
1012                word;
1013                word = (help_word_t *)cupsArrayNext(node->words))
1014             help_delete_word(word);
1015
1016           cupsArrayDelete(node->words);
1017           node->words = NULL;
1018         }
1019
1020         node->section = section[0] ? strdup(section) : NULL;
1021         node->text    = strdup(text);
1022         node->mtime   = mtime;
1023         node->offset  = offset;
1024         node->score   = 0;
1025       }
1026       else
1027       {
1028        /*
1029         * New node...
1030         */
1031
1032         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1033       }
1034
1035      /*
1036       * Go through the text value and replace tabs and newlines with
1037       * whitespace and eliminate extra whitespace...
1038       */
1039
1040       for (ptr = node->text, text = node->text; *ptr;)
1041         if (isspace(*ptr & 255))
1042         {
1043           while (isspace(*ptr & 255))
1044             ptr ++;
1045
1046           *text++ = ' ';
1047         }
1048         else if (text != ptr)
1049           *text++ = *ptr++;
1050         else
1051         {
1052           text ++;
1053           ptr ++;
1054         }
1055
1056       *text = '\0';
1057
1058      /*
1059       * (Re)add the node to the array...
1060       */
1061
1062       cupsArrayAdd(hi->nodes, node);
1063
1064       if (!anchor)
1065         node = NULL;
1066       break;
1067     }
1068
1069     if (node)
1070     {
1071      /*
1072       * Scan this line for words...
1073       */
1074
1075       for (ptr = line; *ptr; ptr ++)
1076       {
1077        /*
1078         * Skip HTML stuff...
1079         */
1080
1081         if (*ptr == '<')
1082         {
1083           if (!strncmp(ptr, "<!--", 4))
1084           {
1085            /*
1086             * Skip HTML comment...
1087             */
1088
1089             if ((text = strstr(ptr + 4, "-->")) == NULL)
1090               ptr += strlen(ptr) - 1;
1091             else
1092               ptr = text + 2;
1093           }
1094           else
1095           {
1096            /*
1097             * Skip HTML element...
1098             */
1099
1100             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1101             {
1102               if (*ptr == '\"' || *ptr == '\'')
1103               {
1104                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1105
1106                 if (!*ptr)
1107                   ptr --;
1108               }
1109             }
1110
1111             if (!*ptr)
1112               ptr --;
1113           }
1114
1115           continue;
1116         }
1117         else if (*ptr == '&')
1118         {
1119          /*
1120           * Skip HTML entity...
1121           */
1122
1123           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1124
1125           if (!*ptr)
1126             ptr --;
1127
1128           continue;
1129         }
1130         else if (!isalnum(*ptr & 255))
1131           continue;
1132
1133        /*
1134         * Found the start of a word, search until we find the end...
1135         */
1136
1137         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1138
1139         wordlen = (int)(ptr - text);
1140
1141         memcpy(temp, text, (size_t)wordlen);
1142         temp[wordlen] = '\0';
1143
1144         ptr --;
1145
1146         if (wordlen > 1 && !bsearch(temp, help_common_words,
1147                                     (sizeof(help_common_words) /
1148                                      sizeof(help_common_words[0])),
1149                                     sizeof(help_common_words[0]),
1150                                     (int (*)(const void *, const void *))
1151                                         _cups_strcasecmp))
1152           help_add_word(node, temp);
1153       }
1154     }
1155
1156    /*
1157     * Get the offset of the next line...
1158     */
1159
1160     offset = cupsFileTell(fp);
1161   }
1162
1163   cupsFileClose(fp);
1164
1165   if (node)
1166     node->length = (size_t)(offset - node->offset);
1167
1168   return (0);
1169 }
1170
1171
1172 /*
1173  * 'help_new_node()' - Create a new node and add it to an index.
1174  */
1175
1176 static help_node_t *                    /* O - Node pointer or NULL on error */
1177 help_new_node(const char   *filename,   /* I - Filename */
1178               const char   *anchor,     /* I - Anchor */
1179               const char   *section,    /* I - Section */
1180               const char   *text,       /* I - Text */
1181               time_t       mtime,       /* I - Modification time */
1182               off_t        offset,      /* I - Offset in file */
1183               size_t       length)      /* I - Length in bytes */
1184 {
1185   help_node_t   *n;                     /* Node */
1186
1187
1188   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1189   if (!n)
1190     return (NULL);
1191
1192   n->filename = strdup(filename);
1193   n->anchor   = anchor ? strdup(anchor) : NULL;
1194   n->section  = (section && *section) ? strdup(section) : NULL;
1195   n->text     = strdup(text);
1196   n->mtime    = mtime;
1197   n->offset   = offset;
1198   n->length   = length;
1199
1200   return (n);
1201 }
1202
1203
1204 /*
1205  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1206  */
1207
1208 static int                              /* O - Difference */
1209 help_sort_by_name(help_node_t *n1,      /* I - First node */
1210                   help_node_t *n2)      /* I - Second node */
1211 {
1212   int           diff;                   /* Difference */
1213
1214
1215   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1216     return (diff);
1217
1218   if (!n1->anchor && !n2->anchor)
1219     return (0);
1220   else if (!n1->anchor)
1221     return (-1);
1222   else if (!n2->anchor)
1223     return (1);
1224   else
1225     return (strcmp(n1->anchor, n2->anchor));
1226 }
1227
1228
1229 /*
1230  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1231  */
1232
1233 static int                              /* O - Difference */
1234 help_sort_by_score(help_node_t *n1,     /* I - First node */
1235                    help_node_t *n2)     /* I - Second node */
1236 {
1237   int           diff;                   /* Difference */
1238
1239
1240   if (n1->score != n2->score)
1241     return (n2->score - n1->score);
1242
1243   if (n1->section && !n2->section)
1244     return (1);
1245   else if (!n1->section && n2->section)
1246     return (-1);
1247   else if (n1->section && n2->section &&
1248            (diff = strcmp(n1->section, n2->section)) != 0)
1249     return (diff);
1250
1251   return (_cups_strcasecmp(n1->text, n2->text));
1252 }
1253
1254
1255 /*
1256  * 'help_sort_words()' - Sort words alphabetically.
1257  */
1258
1259 static int                              /* O - Difference */
1260 help_sort_words(help_word_t *w1,        /* I - Second word */
1261                 help_word_t *w2)        /* I - Second word */
1262 {
1263   return (_cups_strcasecmp(w1->text, w2->text));
1264 }