cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright 2007-2017 by Apple Inc.
   5  * Copyright 1997-2007 by Easy Software Products.
   6  *
   7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
   8  */
   9
  10 /*
  11  * Include necessary headers...
  12  */
  13
  14 #include "cgi-private.h"
  15 #include <cups/dir.h>
  16
  17
  18 /*
  19  * List of common English words that should not be indexed...
  20  */
  21
  22 static char             help_common_words[][6] =
  23                         {
  24                           "about",
  25                           "all",
  26                           "an",
  27                           "and",
  28                           "are",
  29                           "as",
  30                           "at",
  31                           "be",
  32                           "been",
  33                           "but",
  34                           "by",
  35                           "call",
  36                           "can",
  37                           "come",
  38                           "could",
  39                           "day",
  40                           "did",
  41                           "do",
  42                           "down",
  43                           "each",
  44                           "find",
  45                           "first",
  46                           "for",
  47                           "from",
  48                           "go",
  49                           "had",
  50                           "has",
  51                           "have",
  52                           "he",
  53                           "her",
  54                           "him",
  55                           "his",
  56                           "hot",
  57                           "how",
  58                           "if",
  59                           "in",
  60                           "is",
  61                           "it",
  62                           "know",
  63                           "like",
  64                           "long",
  65                           "look",
  66                           "make",
  67                           "many",
  68                           "may",
  69                           "more",
  70                           "most",
  71                           "my",
  72                           "no",
  73                           "now",
  74                           "of",
  75                           "on",
  76                           "one",
  77                           "or",
  78                           "other",
  79                           "out",
  80                           "over",
  81                           "said",
  82                           "see",
  83                           "she",
  84                           "side",
  85                           "so",
  86                           "some",
  87                           "sound",
  88                           "than",
  89                           "that",
  90                           "the",
  91                           "their",
  92                           "them",
  93                           "then",
  94                           "there",
  95                           "these",
  96                           "they",
  97                           "thing",
  98                           "this",
  99                           "time",
 100                           "to",
 101                           "two",
 102                           "up",
 103                           "use",
 104                           "was",
 105                           "water",
 106                           "way",
 107                           "we",
 108                           "were",
 109                           "what",
 110                           "when",
 111                           "which",
 112                           "who",
 113                           "will",
 114                           "with",
 115                           "word",
 116                           "would",
 117                           "write",
 118                           "you",
 119                           "your"
 120                         };
 121
 122
 123 /*
 124  * Local functions...
 125  */
 126
 127 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 128 static void             help_delete_node(help_node_t *n);
 129 static void             help_delete_word(help_word_t *w);
 130 static int              help_load_directory(help_index_t *hi,
 131                                             const char *directory,
 132                                             const char *relative);
 133 static int              help_load_file(help_index_t *hi,
 134                                        const char *filename,
 135                                        const char *relative,
 136                                        time_t     mtime);
 137 static help_node_t      *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
 138 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 139 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 140 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 141
 142
 143 /*
 144  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 145  */
 146
 147 void
 148 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 149 {
 150   help_node_t   *node;                  /* Current node */
 151
 152
 153   if (!hi)
 154     return;
 155
 156   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 157        node;
 158        node = (help_node_t *)cupsArrayNext(hi->nodes))
 159   {
 160     if (!hi->search)
 161       help_delete_node(node);
 162   }
 163
 164   cupsArrayDelete(hi->nodes);
 165   cupsArrayDelete(hi->sorted);
 166
 167   free(hi);
 168 }
 169
 170
 171 /*
 172  * 'helpFindNode()' - Find a node in an index.
 173  */
 174
 175 help_node_t *                           /* O - Node pointer or NULL */
 176 helpFindNode(help_index_t *hi,          /* I - Index */
 177              const char   *filename,    /* I - Filename */
 178              const char   *anchor)      /* I - Anchor */
 179 {
 180   help_node_t   key;                    /* Search key */
 181
 182
 183  /*
 184   * Range check input...
 185   */
 186
 187   if (!hi || !filename)
 188     return (NULL);
 189
 190  /*
 191   * Initialize the search key...
 192   */
 193
 194   key.filename = (char *)filename;
 195   key.anchor   = (char *)anchor;
 196
 197  /*
 198   * Return any match...
 199   */
 200
 201   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 202 }
 203
 204
 205 /*
 206  * 'helpLoadIndex()' - Load a help index from disk.
 207  */
 208
 209 help_index_t *                          /* O - Index pointer or NULL */
 210 helpLoadIndex(const char *hifile,       /* I - Index filename */
 211               const char *directory)    /* I - Directory that is indexed */
 212 {
 213   help_index_t  *hi;                    /* Help index */
 214   cups_file_t   *fp;                    /* Current file */
 215   char          line[2048],             /* Line from file */
 216                 *ptr,                   /* Pointer into line */
 217                 *filename,              /* Filename in line */
 218                 *anchor,                /* Anchor in line */
 219                 *sectptr,               /* Section pointer in line */
 220                 section[1024],          /* Section name */
 221                 *text;                  /* Text in line */
 222   time_t        mtime;                  /* Modification time */
 223   off_t         offset;                 /* Offset into file */
 224   size_t        length;                 /* Length in bytes */
 225   int           update;                 /* Update? */
 226   help_node_t   *node;                  /* Current node */
 227   help_word_t   *word;                  /* Current word */
 228
 229
 230  /*
 231   * Create a new, empty index.
 232   */
 233
 234   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 235     return (NULL);
 236
 237   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 238   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 239
 240   if (!hi->nodes || !hi->sorted)
 241   {
 242     cupsArrayDelete(hi->nodes);
 243     cupsArrayDelete(hi->sorted);
 244     free(hi);
 245     return (NULL);
 246   }
 247
 248  /*
 249   * Try loading the existing index file...
 250   */
 251
 252   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 253   {
 254    /*
 255     * Lock the file and then read the first line...
 256     */
 257
 258     cupsFileLock(fp, 1);
 259
 260     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 261     {
 262      /*
 263       * Got a valid header line, now read the data lines...
 264       */
 265
 266       node = NULL;
 267
 268       while (cupsFileGets(fp, line, sizeof(line)))
 269       {
 270        /*
 271         * Each line looks like one of the following:
 272         *
 273         *     filename mtime offset length "section" "text"
 274         *     filename#anchor offset length "text"
 275         *     SP count word
 276         */
 277
 278         if (line[0] == ' ')
 279         {
 280          /*
 281           * Read a word in the current node...
 282           */
 283
 284           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 285             continue;
 286
 287           if ((word = help_add_word(node, ptr + 1)) != NULL)
 288             word->count = atoi(line + 1);
 289         }
 290         else
 291         {
 292          /*
 293           * Add a node...
 294           */
 295
 296           filename = line;
 297
 298           if ((ptr = strchr(line, ' ')) == NULL)
 299             break;
 300
 301           while (isspace(*ptr & 255))
 302             *ptr++ = '\0';
 303
 304           if ((anchor = strrchr(filename, '#')) != NULL)
 305           {
 306             *anchor++ = '\0';
 307             mtime = 0;
 308           }
 309           else
 310             mtime = strtol(ptr, &ptr, 10);
 311
 312           offset = strtoll(ptr, &ptr, 10);
 313           length = (size_t)strtoll(ptr, &ptr, 10);
 314
 315           while (isspace(*ptr & 255))
 316             ptr ++;
 317
 318           if (!anchor)
 319           {
 320            /*
 321             * Get section...
 322             */
 323
 324             if (*ptr != '\"')
 325               break;
 326
 327             ptr ++;
 328             sectptr = ptr;
 329
 330             while (*ptr && *ptr != '\"')
 331               ptr ++;
 332
 333             if (*ptr != '\"')
 334               break;
 335
 336             *ptr++ = '\0';
 337
 338             strlcpy(section, sectptr, sizeof(section));
 339
 340             while (isspace(*ptr & 255))
 341               ptr ++;
 342           }
 343
 344           if (*ptr != '\"')
 345             break;
 346
 347           ptr ++;
 348           text = ptr;
 349
 350           while (*ptr && *ptr != '\"')
 351             ptr ++;
 352
 353           if (*ptr != '\"')
 354             break;
 355
 356           *ptr++ = '\0';
 357
 358           if ((node = help_new_node(filename, anchor, section, text,
 359                                     mtime, offset, length)) == NULL)
 360             break;
 361
 362           node->score = -1;
 363
 364           cupsArrayAdd(hi->nodes, node);
 365         }
 366       }
 367     }
 368
 369     cupsFileClose(fp);
 370   }
 371
 372  /*
 373   * Scan for new/updated files...
 374   */
 375
 376   update = help_load_directory(hi, directory, NULL);
 377
 378  /*
 379   * Remove any files that are no longer installed...
 380   */
 381
 382   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 383        node;
 384        node = (help_node_t *)cupsArrayNext(hi->nodes))
 385     if (node->score < 0)
 386     {
 387      /*
 388       * Delete this node...
 389       */
 390
 391       cupsArrayRemove(hi->nodes, node);
 392       help_delete_node(node);
 393     }
 394
 395  /*
 396   * Add nodes to the sorted array...
 397   */
 398
 399   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 400        node;
 401        node = (help_node_t *)cupsArrayNext(hi->nodes))
 402     cupsArrayAdd(hi->sorted, node);
 403
 404  /*
 405   * Save the index if we updated it...
 406   */
 407
 408   if (update)
 409     helpSaveIndex(hi, hifile);
 410
 411  /*
 412   * Return the index...
 413   */
 414
 415   return (hi);
 416 }
 417
 418
 419 /*
 420  * 'helpSaveIndex()' - Save a help index to disk.
 421  */
 422
 423 int                                     /* O - 0 on success, -1 on error */
 424 helpSaveIndex(help_index_t *hi,         /* I - Index */
 425               const char   *hifile)     /* I - Index filename */
 426 {
 427   cups_file_t   *fp;                    /* Index file */
 428   help_node_t   *node;                  /* Current node */
 429   help_word_t   *word;                  /* Current word */
 430
 431
 432  /*
 433   * Try creating a new index file...
 434   */
 435
 436   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 437     return (-1);
 438
 439  /*
 440   * Lock the file while we write it...
 441   */
 442
 443   cupsFileLock(fp, 1);
 444
 445   cupsFilePuts(fp, "HELPV2\n");
 446
 447   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 448        node;
 449        node = (help_node_t *)cupsArrayNext(hi->nodes))
 450   {
 451    /*
 452     * Write the current node with/without the anchor...
 453     */
 454
 455     if (node->anchor)
 456     {
 457       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 458                          node->filename, node->anchor,
 459                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 460                          node->text) < 0)
 461         break;
 462     }
 463     else
 464     {
 465       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 466                          node->filename, (int)node->mtime,
 467                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 468                          node->section ? node->section : "", node->text) < 0)
 469         break;
 470     }
 471
 472    /*
 473     * Then write the words associated with the node...
 474     */
 475
 476     for (word = (help_word_t *)cupsArrayFirst(node->words);
 477          word;
 478          word = (help_word_t *)cupsArrayNext(node->words))
 479       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 480         break;
 481   }
 482
 483   cupsFileFlush(fp);
 484
 485   if (cupsFileClose(fp) < 0)
 486     return (-1);
 487   else if (node)
 488     return (-1);
 489   else
 490     return (0);
 491 }
 492
 493
 494 /*
 495  * 'helpSearchIndex()' - Search an index.
 496  */
 497
 498 help_index_t *                          /* O - Search index */
 499 helpSearchIndex(help_index_t *hi,       /* I - Index */
 500                 const char   *query,    /* I - Query string */
 501                 const char   *section,  /* I - Limit search to this section */
 502                 const char   *filename) /* I - Limit search to this file */
 503 {
 504   help_index_t  *search;                /* Search index */
 505   help_node_t   *node;                  /* Current node */
 506   help_word_t   *word;                  /* Current word */
 507   void          *sc;                    /* Search context */
 508   int           matches;                /* Number of matches */
 509
 510
 511  /*
 512   * Range check...
 513   */
 514
 515   if (!hi || !query)
 516     return (NULL);
 517
 518  /*
 519   * Reset the scores of all nodes to 0...
 520   */
 521
 522   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 523        node;
 524        node = (help_node_t *)cupsArrayNext(hi->nodes))
 525     node->score = 0;
 526
 527  /*
 528   * Find the first node to search in...
 529   */
 530
 531   if (filename)
 532   {
 533     node = helpFindNode(hi, filename, NULL);
 534     if (!node)
 535       return (NULL);
 536   }
 537   else
 538     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 539
 540  /*
 541   * Convert the query into a regular expression...
 542   */
 543
 544   sc = cgiCompileSearch(query);
 545   if (!sc)
 546     return (NULL);
 547
 548  /*
 549   * Allocate a search index...
 550   */
 551
 552   search = calloc(1, sizeof(help_index_t));
 553   if (!search)
 554   {
 555     cgiFreeSearch(sc);
 556     return (NULL);
 557   }
 558
 559   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 560   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 561
 562   if (!search->nodes || !search->sorted)
 563   {
 564     cupsArrayDelete(search->nodes);
 565     cupsArrayDelete(search->sorted);
 566     free(search);
 567     cgiFreeSearch(sc);
 568     return (NULL);
 569   }
 570
 571   search->search = 1;
 572
 573  /*
 574   * Check each node in the index, adding matching nodes to the
 575   * search index...
 576   */
 577
 578   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 579     if (section && strcmp(node->section, section))
 580       continue;
 581     else if (filename && strcmp(node->filename, filename))
 582       continue;
 583     else
 584     {
 585       matches = cgiDoSearch(sc, node->text);
 586
 587       for (word = (help_word_t *)cupsArrayFirst(node->words);
 588            word;
 589            word = (help_word_t *)cupsArrayNext(node->words))
 590         if (cgiDoSearch(sc, word->text) > 0)
 591           matches += word->count;
 592
 593       if (matches > 0)
 594       {
 595        /*
 596         * Found a match, add the node to the search index...
 597         */
 598
 599         node->score = matches;
 600
 601         cupsArrayAdd(search->nodes, node);
 602         cupsArrayAdd(search->sorted, node);
 603       }
 604     }
 605
 606  /*
 607   * Free the search context...
 608   */
 609
 610   cgiFreeSearch(sc);
 611
 612  /*
 613   * Return the results...
 614   */
 615
 616   return (search);
 617 }
 618
 619
 620 /*
 621  * 'help_add_word()' - Add a word to a node.
 622  */
 623
 624 static help_word_t *                    /* O - New word */
 625 help_add_word(help_node_t *n,           /* I - Node */
 626               const char  *text)        /* I - Word text */
 627 {
 628   help_word_t   *w,                     /* New word */
 629                 key;                    /* Search key */
 630
 631
 632  /*
 633   * Create the words array as needed...
 634   */
 635
 636   if (!n->words)
 637     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 638
 639  /*
 640   * See if the word is already added...
 641   */
 642
 643   key.text = (char *)text;
 644
 645   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 646   {
 647    /*
 648     * Create a new word...
 649     */
 650
 651     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 652       return (NULL);
 653
 654     if ((w->text = strdup(text)) == NULL)
 655     {
 656       free(w);
 657       return (NULL);
 658     }
 659
 660     cupsArrayAdd(n->words, w);
 661   }
 662
 663  /*
 664   * Bump the counter for this word and return it...
 665   */
 666
 667   w->count ++;
 668
 669   return (w);
 670 }
 671
 672
 673 /*
 674  * 'help_delete_node()' - Free all memory used by a node.
 675  */
 676
 677 static void
 678 help_delete_node(help_node_t *n)        /* I - Node */
 679 {
 680   help_word_t   *w;                     /* Current word */
 681
 682
 683   if (!n)
 684     return;
 685
 686   if (n->filename)
 687     free(n->filename);
 688
 689   if (n->anchor)
 690     free(n->anchor);
 691
 692   if (n->section)
 693     free(n->section);
 694
 695   if (n->text)
 696     free(n->text);
 697
 698   for (w = (help_word_t *)cupsArrayFirst(n->words);
 699        w;
 700        w = (help_word_t *)cupsArrayNext(n->words))
 701     help_delete_word(w);
 702
 703   cupsArrayDelete(n->words);
 704
 705   free(n);
 706 }
 707
 708
 709 /*
 710  * 'help_delete_word()' - Free all memory used by a word.
 711  */
 712
 713 static void
 714 help_delete_word(help_word_t *w)        /* I - Word */
 715 {
 716   if (!w)
 717     return;
 718
 719   if (w->text)
 720     free(w->text);
 721
 722   free(w);
 723 }
 724
 725
 726 /*
 727  * 'help_load_directory()' - Load a directory of files into an index.
 728  */
 729
 730 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 731 help_load_directory(
 732     help_index_t *hi,                   /* I - Index */
 733     const char   *directory,            /* I - Directory */
 734     const char   *relative)             /* I - Relative path */
 735 {
 736   cups_dir_t    *dir;                   /* Directory file */
 737   cups_dentry_t *dent;                  /* Directory entry */
 738   char          *ext,                   /* Pointer to extension */
 739                 filename[1024],         /* Full filename */
 740                 relname[1024];          /* Relative filename */
 741   int           update;                 /* Updated? */
 742   help_node_t   *node;                  /* Current node */
 743
 744
 745  /*
 746   * Open the directory and scan it...
 747   */
 748
 749   if ((dir = cupsDirOpen(directory)) == NULL)
 750     return (0);
 751
 752   update = 0;
 753
 754   while ((dent = cupsDirRead(dir)) != NULL)
 755   {
 756    /*
 757     * Skip "." files...
 758     */
 759
 760     if (dent->filename[0] == '.')
 761       continue;
 762
 763    /*
 764     * Get absolute and relative filenames...
 765     */
 766
 767     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 768     if (relative)
 769       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 770     else
 771       strlcpy(relname, dent->filename, sizeof(relname));
 772
 773    /*
 774     * Check if we have a HTML file...
 775     */
 776
 777     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 778         (!ext[5] || !strcmp(ext + 5, ".gz")))
 779     {
 780      /*
 781       * HTML file, see if we have already indexed the file...
 782       */
 783
 784       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 785       {
 786        /*
 787         * File already indexed - check dates to confirm that the
 788         * index is up-to-date...
 789         */
 790
 791         if (node->mtime == dent->fileinfo.st_mtime)
 792         {
 793          /*
 794           * Same modification time, so mark all of the nodes
 795           * for this file as up-to-date...
 796           */
 797
 798           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 799             if (!strcmp(node->filename, relname))
 800               node->score = 0;
 801             else
 802               break;
 803
 804           continue;
 805         }
 806       }
 807
 808       update = 1;
 809
 810       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 811     }
 812     else if (S_ISDIR(dent->fileinfo.st_mode))
 813     {
 814      /*
 815       * Process sub-directory...
 816       */
 817
 818       if (help_load_directory(hi, filename, relname) == 1)
 819         update = 1;
 820     }
 821   }
 822
 823   cupsDirClose(dir);
 824
 825   return (update);
 826 }
 827
 828
 829 /*
 830  * 'help_load_file()' - Load a HTML files into an index.
 831  */
 832
 833 static int                              /* O - 0 = success, -1 = error */
 834 help_load_file(
 835     help_index_t *hi,                   /* I - Index */
 836     const char   *filename,             /* I - Filename */
 837     const char   *relative,             /* I - Relative path */
 838     time_t       mtime)                 /* I - Modification time */
 839 {
 840   cups_file_t   *fp;                    /* HTML file */
 841   help_node_t   *node;                  /* Current node */
 842   char          line[1024],             /* Line from file */
 843                 temp[1024],             /* Temporary word */
 844                 section[1024],          /* Section */
 845                 *ptr,                   /* Pointer into line */
 846                 *anchor,                /* Anchor name */
 847                 *text;                  /* Text for anchor */
 848   off_t         offset;                 /* File offset */
 849   char          quote;                  /* Quote character */
 850   help_word_t   *word;                  /* Current word */
 851   int           wordlen;                /* Length of word */
 852
 853
 854   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 855     return (-1);
 856
 857   node   = NULL;
 858   offset = 0;
 859
 860   strlcpy(section, "Other", sizeof(section));
 861
 862   while (cupsFileGets(fp, line, sizeof(line)))
 863   {
 864    /*
 865     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 866     */
 867
 868     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
 869     {
 870      /*
 871       * Got section line, copy it!
 872       */
 873
 874       for (ptr += 13; isspace(*ptr & 255); ptr ++);
 875
 876       strlcpy(section, ptr, sizeof(section));
 877       if ((ptr = strstr(section, "-->")) != NULL)
 878       {
 879        /*
 880         * Strip comment stuff from end of line...
 881         */
 882
 883         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 884
 885         if (isspace(*ptr & 255))
 886           *ptr = '\0';
 887       }
 888       continue;
 889     }
 890
 891     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 892     {
 893       ptr ++;
 894
 895       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 896       {
 897        /*
 898         * Found the title...
 899         */
 900
 901         anchor = NULL;
 902         ptr += 6;
 903       }
 904       else
 905       {
 906         char *idptr;                    /* Pointer to ID */
 907
 908         if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 909           ptr += 7;
 910         else if ((idptr = strstr(ptr, " ID=")) != NULL)
 911           ptr = idptr + 4;
 912         else if ((idptr = strstr(ptr, " id=")) != NULL)
 913           ptr = idptr + 4;
 914         else
 915           continue;
 916
 917        /*
 918         * Found an anchor...
 919         */
 920
 921         if (*ptr == '\"' || *ptr == '\'')
 922         {
 923          /*
 924           * Get quoted anchor...
 925           */
 926
 927           quote  = *ptr;
 928           anchor = ptr + 1;
 929           if ((ptr = strchr(anchor, quote)) != NULL)
 930             *ptr++ = '\0';
 931           else
 932             break;
 933         }
 934         else
 935         {
 936          /*
 937           * Get unquoted anchor...
 938           */
 939
 940           anchor = ptr + 1;
 941
 942           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 943
 944           if (*ptr != '>')
 945             *ptr++ = '\0';
 946           else
 947             break;
 948         }
 949
 950        /*
 951         * Got the anchor, now lets find the end...
 952         */
 953
 954         while (*ptr && *ptr != '>')
 955           ptr ++;
 956
 957         if (*ptr != '>')
 958           break;
 959
 960         *ptr++ = '\0';
 961       }
 962
 963      /*
 964       * Now collect text for the link...
 965       */
 966
 967       text = ptr;
 968       while ((ptr = strchr(text, '<')) == NULL)
 969       {
 970         ptr = text + strlen(text);
 971         if (ptr >= (line + sizeof(line) - 2))
 972           break;
 973
 974         *ptr++ = ' ';
 975
 976         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
 977           break;
 978       }
 979
 980       *ptr = '\0';
 981
 982       if (node)
 983         node->length = (size_t)(offset - node->offset);
 984
 985       if (!*text)
 986       {
 987         node = NULL;
 988         break;
 989       }
 990
 991       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
 992       {
 993        /*
 994         * Node already in the index, so replace the text and other
 995         * data...
 996         */
 997
 998         cupsArrayRemove(hi->nodes, node);
 999
1000         if (node->section)
1001           free(node->section);
1002
1003         if (node->text)
1004           free(node->text);
1005
1006         if (node->words)
1007         {
1008           for (word = (help_word_t *)cupsArrayFirst(node->words);
1009                word;
1010                word = (help_word_t *)cupsArrayNext(node->words))
1011             help_delete_word(word);
1012
1013           cupsArrayDelete(node->words);
1014           node->words = NULL;
1015         }
1016
1017         node->section = section[0] ? strdup(section) : NULL;
1018         node->text    = strdup(text);
1019         node->mtime   = mtime;
1020         node->offset  = offset;
1021         node->score   = 0;
1022       }
1023       else
1024       {
1025        /*
1026         * New node...
1027         */
1028
1029         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1030       }
1031
1032      /*
1033       * Go through the text value and replace tabs and newlines with
1034       * whitespace and eliminate extra whitespace...
1035       */
1036
1037       for (ptr = node->text, text = node->text; *ptr;)
1038         if (isspace(*ptr & 255))
1039         {
1040           while (isspace(*ptr & 255))
1041             ptr ++;
1042
1043           *text++ = ' ';
1044         }
1045         else if (text != ptr)
1046           *text++ = *ptr++;
1047         else
1048         {
1049           text ++;
1050           ptr ++;
1051         }
1052
1053       *text = '\0';
1054
1055      /*
1056       * (Re)add the node to the array...
1057       */
1058
1059       cupsArrayAdd(hi->nodes, node);
1060
1061       if (!anchor)
1062         node = NULL;
1063       break;
1064     }
1065
1066     if (node)
1067     {
1068      /*
1069       * Scan this line for words...
1070       */
1071
1072       for (ptr = line; *ptr; ptr ++)
1073       {
1074        /*
1075         * Skip HTML stuff...
1076         */
1077
1078         if (*ptr == '<')
1079         {
1080           if (!strncmp(ptr, "<!--", 4))
1081           {
1082            /*
1083             * Skip HTML comment...
1084             */
1085
1086             if ((text = strstr(ptr + 4, "-->")) == NULL)
1087               ptr += strlen(ptr) - 1;
1088             else
1089               ptr = text + 2;
1090           }
1091           else
1092           {
1093            /*
1094             * Skip HTML element...
1095             */
1096
1097             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1098             {
1099               if (*ptr == '\"' || *ptr == '\'')
1100               {
1101                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1102
1103                 if (!*ptr)
1104                   ptr --;
1105               }
1106             }
1107
1108             if (!*ptr)
1109               ptr --;
1110           }
1111
1112           continue;
1113         }
1114         else if (*ptr == '&')
1115         {
1116          /*
1117           * Skip HTML entity...
1118           */
1119
1120           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1121
1122           if (!*ptr)
1123             ptr --;
1124
1125           continue;
1126         }
1127         else if (!isalnum(*ptr & 255))
1128           continue;
1129
1130        /*
1131         * Found the start of a word, search until we find the end...
1132         */
1133
1134         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1135
1136         wordlen = (int)(ptr - text);
1137
1138         memcpy(temp, text, (size_t)wordlen);
1139         temp[wordlen] = '\0';
1140
1141         ptr --;
1142
1143         if (wordlen > 1 && !bsearch(temp, help_common_words,
1144                                     (sizeof(help_common_words) /
1145                                      sizeof(help_common_words[0])),
1146                                     sizeof(help_common_words[0]),
1147                                     (int (*)(const void *, const void *))
1148                                         _cups_strcasecmp))
1149           help_add_word(node, temp);
1150       }
1151     }
1152
1153    /*
1154     * Get the offset of the next line...
1155     */
1156
1157     offset = cupsFileTell(fp);
1158   }
1159
1160   cupsFileClose(fp);
1161
1162   if (node)
1163     node->length = (size_t)(offset - node->offset);
1164
1165   return (0);
1166 }
1167
1168
1169 /*
1170  * 'help_new_node()' - Create a new node and add it to an index.
1171  */
1172
1173 static help_node_t *                    /* O - Node pointer or NULL on error */
1174 help_new_node(const char   *filename,   /* I - Filename */
1175               const char   *anchor,     /* I - Anchor */
1176               const char   *section,    /* I - Section */
1177               const char   *text,       /* I - Text */
1178               time_t       mtime,       /* I - Modification time */
1179               off_t        offset,      /* I - Offset in file */
1180               size_t       length)      /* I - Length in bytes */
1181 {
1182   help_node_t   *n;                     /* Node */
1183
1184
1185   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1186   if (!n)
1187     return (NULL);
1188
1189   n->filename = strdup(filename);
1190   n->anchor   = anchor ? strdup(anchor) : NULL;
1191   n->section  = *section ? strdup(section) : NULL;
1192   n->text     = strdup(text);
1193   n->mtime    = mtime;
1194   n->offset   = offset;
1195   n->length   = length;
1196
1197   return (n);
1198 }
1199
1200
1201 /*
1202  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1203  */
1204
1205 static int                              /* O - Difference */
1206 help_sort_by_name(help_node_t *n1,      /* I - First node */
1207                   help_node_t *n2)      /* I - Second node */
1208 {
1209   int           diff;                   /* Difference */
1210
1211
1212   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1213     return (diff);
1214
1215   if (!n1->anchor && !n2->anchor)
1216     return (0);
1217   else if (!n1->anchor)
1218     return (-1);
1219   else if (!n2->anchor)
1220     return (1);
1221   else
1222     return (strcmp(n1->anchor, n2->anchor));
1223 }
1224
1225
1226 /*
1227  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1228  */
1229
1230 static int                              /* O - Difference */
1231 help_sort_by_score(help_node_t *n1,     /* I - First node */
1232                    help_node_t *n2)     /* I - Second node */
1233 {
1234   int           diff;                   /* Difference */
1235
1236
1237   if (n1->score != n2->score)
1238     return (n2->score - n1->score);
1239
1240   if (n1->section && !n2->section)
1241     return (1);
1242   else if (!n1->section && n2->section)
1243     return (-1);
1244   else if (n1->section && n2->section &&
1245            (diff = strcmp(n1->section, n2->section)) != 0)
1246     return (diff);
1247
1248   return (_cups_strcasecmp(n1->text, n2->text));
1249 }
1250
1251
1252 /*
1253  * 'help_sort_words()' - Sort words alphabetically.
1254  */
1255
1256 static int                              /* O - Difference */
1257 help_sort_words(help_word_t *w1,        /* I - Second word */
1258                 help_word_t *w2)        /* I - Second word */
1259 {
1260   return (_cups_strcasecmp(w1->text, w2->text));
1261 }