cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright 2007-2015 by Apple Inc.
   5  * Copyright 1997-2007 by Easy Software Products.
   6  *
   7  * These coded instructions, statements, and computer programs are the
   8  * property of Apple Inc. and are protected by Federal copyright
   9  * law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  10  * which should have been included with this file.  If this file is
  11  * missing or damaged, see the license at "http://www.cups.org/".
  12  */
  13
  14 /*
  15  * Include necessary headers...
  16  */
  17
  18 #include "cgi-private.h"
  19 #include <cups/dir.h>
  20
  21
  22 /*
  23  * List of common English words that should not be indexed...
  24  */
  25
  26 static char             help_common_words[][6] =
  27                         {
  28                           "about",
  29                           "all",
  30                           "an",
  31                           "and",
  32                           "are",
  33                           "as",
  34                           "at",
  35                           "be",
  36                           "been",
  37                           "but",
  38                           "by",
  39                           "call",
  40                           "can",
  41                           "come",
  42                           "could",
  43                           "day",
  44                           "did",
  45                           "do",
  46                           "down",
  47                           "each",
  48                           "find",
  49                           "first",
  50                           "for",
  51                           "from",
  52                           "go",
  53                           "had",
  54                           "has",
  55                           "have",
  56                           "he",
  57                           "her",
  58                           "him",
  59                           "his",
  60                           "hot",
  61                           "how",
  62                           "if",
  63                           "in",
  64                           "is",
  65                           "it",
  66                           "know",
  67                           "like",
  68                           "long",
  69                           "look",
  70                           "make",
  71                           "many",
  72                           "may",
  73                           "more",
  74                           "most",
  75                           "my",
  76                           "no",
  77                           "now",
  78                           "of",
  79                           "on",
  80                           "one",
  81                           "or",
  82                           "other",
  83                           "out",
  84                           "over",
  85                           "said",
  86                           "see",
  87                           "she",
  88                           "side",
  89                           "so",
  90                           "some",
  91                           "sound",
  92                           "than",
  93                           "that",
  94                           "the",
  95                           "their",
  96                           "them",
  97                           "then",
  98                           "there",
  99                           "these",
 100                           "they",
 101                           "thing",
 102                           "this",
 103                           "time",
 104                           "to",
 105                           "two",
 106                           "up",
 107                           "use",
 108                           "was",
 109                           "water",
 110                           "way",
 111                           "we",
 112                           "were",
 113                           "what",
 114                           "when",
 115                           "which",
 116                           "who",
 117                           "will",
 118                           "with",
 119                           "word",
 120                           "would",
 121                           "write",
 122                           "you",
 123                           "your"
 124                         };
 125
 126
 127 /*
 128  * Local functions...
 129  */
 130
 131 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 132 static void             help_delete_node(help_node_t *n);
 133 static void             help_delete_word(help_word_t *w);
 134 static int              help_load_directory(help_index_t *hi,
 135                                             const char *directory,
 136                                             const char *relative);
 137 static int              help_load_file(help_index_t *hi,
 138                                        const char *filename,
 139                                        const char *relative,
 140                                        time_t     mtime);
 141 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 142                                        const char *section, const char *text,
 143                                        time_t mtime, off_t offset,
 144                                        size_t length)
 145                                        __attribute__((nonnull(1,3,4)));
 146 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 147 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 148 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 149
 150
 151 /*
 152  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 153  */
 154
 155 void
 156 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 157 {
 158   help_node_t   *node;                  /* Current node */
 159
 160
 161   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 162
 163   if (!hi)
 164     return;
 165
 166   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 167        node;
 168        node = (help_node_t *)cupsArrayNext(hi->nodes))
 169   {
 170     if (!hi->search)
 171       help_delete_node(node);
 172   }
 173
 174   cupsArrayDelete(hi->nodes);
 175   cupsArrayDelete(hi->sorted);
 176
 177   free(hi);
 178 }
 179
 180
 181 /*
 182  * 'helpFindNode()' - Find a node in an index.
 183  */
 184
 185 help_node_t *                           /* O - Node pointer or NULL */
 186 helpFindNode(help_index_t *hi,          /* I - Index */
 187              const char   *filename,    /* I - Filename */
 188              const char   *anchor)      /* I - Anchor */
 189 {
 190   help_node_t   key;                    /* Search key */
 191
 192
 193   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 194                 hi, filename, anchor));
 195
 196  /*
 197   * Range check input...
 198   */
 199
 200   if (!hi || !filename)
 201     return (NULL);
 202
 203  /*
 204   * Initialize the search key...
 205   */
 206
 207   key.filename = (char *)filename;
 208   key.anchor   = (char *)anchor;
 209
 210  /*
 211   * Return any match...
 212   */
 213
 214   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 215 }
 216
 217
 218 /*
 219  * 'helpLoadIndex()' - Load a help index from disk.
 220  */
 221
 222 help_index_t *                          /* O - Index pointer or NULL */
 223 helpLoadIndex(const char *hifile,       /* I - Index filename */
 224               const char *directory)    /* I - Directory that is indexed */
 225 {
 226   help_index_t  *hi;                    /* Help index */
 227   cups_file_t   *fp;                    /* Current file */
 228   char          line[2048],             /* Line from file */
 229                 *ptr,                   /* Pointer into line */
 230                 *filename,              /* Filename in line */
 231                 *anchor,                /* Anchor in line */
 232                 *sectptr,               /* Section pointer in line */
 233                 section[1024],          /* Section name */
 234                 *text;                  /* Text in line */
 235   time_t        mtime;                  /* Modification time */
 236   off_t         offset;                 /* Offset into file */
 237   size_t        length;                 /* Length in bytes */
 238   int           update;                 /* Update? */
 239   help_node_t   *node;                  /* Current node */
 240   help_word_t   *word;                  /* Current word */
 241
 242
 243   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 244                 hifile, directory));
 245
 246  /*
 247   * Create a new, empty index.
 248   */
 249
 250   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 251     return (NULL);
 252
 253   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 254   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 255
 256   if (!hi->nodes || !hi->sorted)
 257   {
 258     cupsArrayDelete(hi->nodes);
 259     cupsArrayDelete(hi->sorted);
 260     free(hi);
 261     return (NULL);
 262   }
 263
 264  /*
 265   * Try loading the existing index file...
 266   */
 267
 268   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 269   {
 270    /*
 271     * Lock the file and then read the first line...
 272     */
 273
 274     cupsFileLock(fp, 1);
 275
 276     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 277     {
 278      /*
 279       * Got a valid header line, now read the data lines...
 280       */
 281
 282       node = NULL;
 283
 284       while (cupsFileGets(fp, line, sizeof(line)))
 285       {
 286        /*
 287         * Each line looks like one of the following:
 288         *
 289         *     filename mtime offset length "section" "text"
 290         *     filename#anchor offset length "text"
 291         *     SP count word
 292         */
 293
 294         if (line[0] == ' ')
 295         {
 296          /*
 297           * Read a word in the current node...
 298           */
 299
 300           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 301             continue;
 302
 303           if ((word = help_add_word(node, ptr + 1)) != NULL)
 304             word->count = atoi(line + 1);
 305         }
 306         else
 307         {
 308          /*
 309           * Add a node...
 310           */
 311
 312           filename = line;
 313
 314           if ((ptr = strchr(line, ' ')) == NULL)
 315             break;
 316
 317           while (isspace(*ptr & 255))
 318             *ptr++ = '\0';
 319
 320           if ((anchor = strrchr(filename, '#')) != NULL)
 321           {
 322             *anchor++ = '\0';
 323             mtime = 0;
 324           }
 325           else
 326             mtime = strtol(ptr, &ptr, 10);
 327
 328           offset = strtoll(ptr, &ptr, 10);
 329           length = (size_t)strtoll(ptr, &ptr, 10);
 330
 331           while (isspace(*ptr & 255))
 332             ptr ++;
 333
 334           if (!anchor)
 335           {
 336            /*
 337             * Get section...
 338             */
 339
 340             if (*ptr != '\"')
 341               break;
 342
 343             ptr ++;
 344             sectptr = ptr;
 345
 346             while (*ptr && *ptr != '\"')
 347               ptr ++;
 348
 349             if (*ptr != '\"')
 350               break;
 351
 352             *ptr++ = '\0';
 353
 354             strlcpy(section, sectptr, sizeof(section));
 355
 356             while (isspace(*ptr & 255))
 357               ptr ++;
 358           }
 359
 360           if (*ptr != '\"')
 361             break;
 362
 363           ptr ++;
 364           text = ptr;
 365
 366           while (*ptr && *ptr != '\"')
 367             ptr ++;
 368
 369           if (*ptr != '\"')
 370             break;
 371
 372           *ptr++ = '\0';
 373
 374           if ((node = help_new_node(filename, anchor, section, text,
 375                                     mtime, offset, length)) == NULL)
 376             break;
 377
 378           node->score = -1;
 379
 380           cupsArrayAdd(hi->nodes, node);
 381         }
 382       }
 383     }
 384
 385     cupsFileClose(fp);
 386   }
 387
 388  /*
 389   * Scan for new/updated files...
 390   */
 391
 392   update = help_load_directory(hi, directory, NULL);
 393
 394  /*
 395   * Remove any files that are no longer installed...
 396   */
 397
 398   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 399        node;
 400        node = (help_node_t *)cupsArrayNext(hi->nodes))
 401     if (node->score < 0)
 402     {
 403      /*
 404       * Delete this node...
 405       */
 406
 407       cupsArrayRemove(hi->nodes, node);
 408       help_delete_node(node);
 409     }
 410
 411  /*
 412   * Add nodes to the sorted array...
 413   */
 414
 415   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 416        node;
 417        node = (help_node_t *)cupsArrayNext(hi->nodes))
 418     cupsArrayAdd(hi->sorted, node);
 419
 420  /*
 421   * Save the index if we updated it...
 422   */
 423
 424   if (update)
 425     helpSaveIndex(hi, hifile);
 426
 427  /*
 428   * Return the index...
 429   */
 430
 431   return (hi);
 432 }
 433
 434
 435 /*
 436  * 'helpSaveIndex()' - Save a help index to disk.
 437  */
 438
 439 int                                     /* O - 0 on success, -1 on error */
 440 helpSaveIndex(help_index_t *hi,         /* I - Index */
 441               const char   *hifile)     /* I - Index filename */
 442 {
 443   cups_file_t   *fp;                    /* Index file */
 444   help_node_t   *node;                  /* Current node */
 445   help_word_t   *word;                  /* Current word */
 446
 447
 448   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 449
 450  /*
 451   * Try creating a new index file...
 452   */
 453
 454   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 455     return (-1);
 456
 457  /*
 458   * Lock the file while we write it...
 459   */
 460
 461   cupsFileLock(fp, 1);
 462
 463   cupsFilePuts(fp, "HELPV2\n");
 464
 465   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 466        node;
 467        node = (help_node_t *)cupsArrayNext(hi->nodes))
 468   {
 469    /*
 470     * Write the current node with/without the anchor...
 471     */
 472
 473     if (node->anchor)
 474     {
 475       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 476                          node->filename, node->anchor,
 477                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 478                          node->text) < 0)
 479         break;
 480     }
 481     else
 482     {
 483       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 484                          node->filename, (int)node->mtime,
 485                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 486                          node->section ? node->section : "", node->text) < 0)
 487         break;
 488     }
 489
 490    /*
 491     * Then write the words associated with the node...
 492     */
 493
 494     for (word = (help_word_t *)cupsArrayFirst(node->words);
 495          word;
 496          word = (help_word_t *)cupsArrayNext(node->words))
 497       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 498         break;
 499   }
 500
 501   cupsFileFlush(fp);
 502
 503   if (cupsFileClose(fp) < 0)
 504     return (-1);
 505   else if (node)
 506     return (-1);
 507   else
 508     return (0);
 509 }
 510
 511
 512 /*
 513  * 'helpSearchIndex()' - Search an index.
 514  */
 515
 516 help_index_t *                          /* O - Search index */
 517 helpSearchIndex(help_index_t *hi,       /* I - Index */
 518                 const char   *query,    /* I - Query string */
 519                 const char   *section,  /* I - Limit search to this section */
 520                 const char   *filename) /* I - Limit search to this file */
 521 {
 522   help_index_t  *search;                /* Search index */
 523   help_node_t   *node;                  /* Current node */
 524   help_word_t   *word;                  /* Current word */
 525   void          *sc;                    /* Search context */
 526   int           matches;                /* Number of matches */
 527
 528
 529   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 530                 hi, query, filename));
 531
 532  /*
 533   * Range check...
 534   */
 535
 536   if (!hi || !query)
 537     return (NULL);
 538
 539  /*
 540   * Reset the scores of all nodes to 0...
 541   */
 542
 543   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 544        node;
 545        node = (help_node_t *)cupsArrayNext(hi->nodes))
 546     node->score = 0;
 547
 548  /*
 549   * Find the first node to search in...
 550   */
 551
 552   if (filename)
 553   {
 554     node = helpFindNode(hi, filename, NULL);
 555     if (!node)
 556       return (NULL);
 557   }
 558   else
 559     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 560
 561  /*
 562   * Convert the query into a regular expression...
 563   */
 564
 565   sc = cgiCompileSearch(query);
 566   if (!sc)
 567     return (NULL);
 568
 569  /*
 570   * Allocate a search index...
 571   */
 572
 573   search = calloc(1, sizeof(help_index_t));
 574   if (!search)
 575   {
 576     cgiFreeSearch(sc);
 577     return (NULL);
 578   }
 579
 580   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 581   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 582
 583   if (!search->nodes || !search->sorted)
 584   {
 585     cupsArrayDelete(search->nodes);
 586     cupsArrayDelete(search->sorted);
 587     free(search);
 588     cgiFreeSearch(sc);
 589     return (NULL);
 590   }
 591
 592   search->search = 1;
 593
 594  /*
 595   * Check each node in the index, adding matching nodes to the
 596   * search index...
 597   */
 598
 599   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 600     if (section && strcmp(node->section, section))
 601       continue;
 602     else if (filename && strcmp(node->filename, filename))
 603       continue;
 604     else
 605     {
 606       matches = cgiDoSearch(sc, node->text);
 607
 608       for (word = (help_word_t *)cupsArrayFirst(node->words);
 609            word;
 610            word = (help_word_t *)cupsArrayNext(node->words))
 611         if (cgiDoSearch(sc, word->text) > 0)
 612           matches += word->count;
 613
 614       if (matches > 0)
 615       {
 616        /*
 617         * Found a match, add the node to the search index...
 618         */
 619
 620         node->score = matches;
 621
 622         cupsArrayAdd(search->nodes, node);
 623         cupsArrayAdd(search->sorted, node);
 624       }
 625     }
 626
 627  /*
 628   * Free the search context...
 629   */
 630
 631   cgiFreeSearch(sc);
 632
 633  /*
 634   * Return the results...
 635   */
 636
 637   return (search);
 638 }
 639
 640
 641 /*
 642  * 'help_add_word()' - Add a word to a node.
 643  */
 644
 645 static help_word_t *                    /* O - New word */
 646 help_add_word(help_node_t *n,           /* I - Node */
 647               const char  *text)        /* I - Word text */
 648 {
 649   help_word_t   *w,                     /* New word */
 650                 key;                    /* Search key */
 651
 652
 653   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 654
 655  /*
 656   * Create the words array as needed...
 657   */
 658
 659   if (!n->words)
 660     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 661
 662  /*
 663   * See if the word is already added...
 664   */
 665
 666   key.text = (char *)text;
 667
 668   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 669   {
 670    /*
 671     * Create a new word...
 672     */
 673
 674     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 675       return (NULL);
 676
 677     if ((w->text = strdup(text)) == NULL)
 678     {
 679       free(w);
 680       return (NULL);
 681     }
 682
 683     cupsArrayAdd(n->words, w);
 684   }
 685
 686  /*
 687   * Bump the counter for this word and return it...
 688   */
 689
 690   w->count ++;
 691
 692   return (w);
 693 }
 694
 695
 696 /*
 697  * 'help_delete_node()' - Free all memory used by a node.
 698  */
 699
 700 static void
 701 help_delete_node(help_node_t *n)        /* I - Node */
 702 {
 703   help_word_t   *w;                     /* Current word */
 704
 705
 706   DEBUG_printf(("2help_delete_node(n=%p)", n));
 707
 708   if (!n)
 709     return;
 710
 711   if (n->filename)
 712     free(n->filename);
 713
 714   if (n->anchor)
 715     free(n->anchor);
 716
 717   if (n->section)
 718     free(n->section);
 719
 720   if (n->text)
 721     free(n->text);
 722
 723   for (w = (help_word_t *)cupsArrayFirst(n->words);
 724        w;
 725        w = (help_word_t *)cupsArrayNext(n->words))
 726     help_delete_word(w);
 727
 728   cupsArrayDelete(n->words);
 729
 730   free(n);
 731 }
 732
 733
 734 /*
 735  * 'help_delete_word()' - Free all memory used by a word.
 736  */
 737
 738 static void
 739 help_delete_word(help_word_t *w)        /* I - Word */
 740 {
 741   DEBUG_printf(("2help_delete_word(w=%p)", w));
 742
 743   if (!w)
 744     return;
 745
 746   if (w->text)
 747     free(w->text);
 748
 749   free(w);
 750 }
 751
 752
 753 /*
 754  * 'help_load_directory()' - Load a directory of files into an index.
 755  */
 756
 757 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 758 help_load_directory(
 759     help_index_t *hi,                   /* I - Index */
 760     const char   *directory,            /* I - Directory */
 761     const char   *relative)             /* I - Relative path */
 762 {
 763   cups_dir_t    *dir;                   /* Directory file */
 764   cups_dentry_t *dent;                  /* Directory entry */
 765   char          *ext,                   /* Pointer to extension */
 766                 filename[1024],         /* Full filename */
 767                 relname[1024];          /* Relative filename */
 768   int           update;                 /* Updated? */
 769   help_node_t   *node;                  /* Current node */
 770
 771
 772   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 773                 hi, directory, relative));
 774
 775  /*
 776   * Open the directory and scan it...
 777   */
 778
 779   if ((dir = cupsDirOpen(directory)) == NULL)
 780     return (0);
 781
 782   update = 0;
 783
 784   while ((dent = cupsDirRead(dir)) != NULL)
 785   {
 786    /*
 787     * Skip "." files...
 788     */
 789
 790     if (dent->filename[0] == '.')
 791       continue;
 792
 793    /*
 794     * Get absolute and relative filenames...
 795     */
 796
 797     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 798     if (relative)
 799       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 800     else
 801       strlcpy(relname, dent->filename, sizeof(relname));
 802
 803    /*
 804     * Check if we have a HTML file...
 805     */
 806
 807     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 808         (!ext[5] || !strcmp(ext + 5, ".gz")))
 809     {
 810      /*
 811       * HTML file, see if we have already indexed the file...
 812       */
 813
 814       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 815       {
 816        /*
 817         * File already indexed - check dates to confirm that the
 818         * index is up-to-date...
 819         */
 820
 821         if (node->mtime == dent->fileinfo.st_mtime)
 822         {
 823          /*
 824           * Same modification time, so mark all of the nodes
 825           * for this file as up-to-date...
 826           */
 827
 828           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 829             if (!strcmp(node->filename, relname))
 830               node->score = 0;
 831             else
 832               break;
 833
 834           continue;
 835         }
 836       }
 837
 838       update = 1;
 839
 840       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 841     }
 842     else if (S_ISDIR(dent->fileinfo.st_mode))
 843     {
 844      /*
 845       * Process sub-directory...
 846       */
 847
 848       if (help_load_directory(hi, filename, relname) == 1)
 849         update = 1;
 850     }
 851   }
 852
 853   cupsDirClose(dir);
 854
 855   return (update);
 856 }
 857
 858
 859 /*
 860  * 'help_load_file()' - Load a HTML files into an index.
 861  */
 862
 863 static int                              /* O - 0 = success, -1 = error */
 864 help_load_file(
 865     help_index_t *hi,                   /* I - Index */
 866     const char   *filename,             /* I - Filename */
 867     const char   *relative,             /* I - Relative path */
 868     time_t       mtime)                 /* I - Modification time */
 869 {
 870   cups_file_t   *fp;                    /* HTML file */
 871   help_node_t   *node;                  /* Current node */
 872   char          line[1024],             /* Line from file */
 873                 temp[1024],             /* Temporary word */
 874                 section[1024],          /* Section */
 875                 *ptr,                   /* Pointer into line */
 876                 *anchor,                /* Anchor name */
 877                 *text;                  /* Text for anchor */
 878   off_t         offset;                 /* File offset */
 879   char          quote;                  /* Quote character */
 880   help_word_t   *word;                  /* Current word */
 881   int           wordlen;                /* Length of word */
 882
 883
 884   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 885                 "mtime=%ld)", hi, filename, relative, (long)mtime));
 886
 887   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 888     return (-1);
 889
 890   node   = NULL;
 891   offset = 0;
 892
 893   strlcpy(section, "Other", sizeof(section));
 894
 895   while (cupsFileGets(fp, line, sizeof(line)))
 896   {
 897    /*
 898     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 899     */
 900
 901     if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
 902     {
 903      /*
 904       * Got section line, copy it!
 905       */
 906
 907       for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
 908
 909       strlcpy(section, ptr, sizeof(section));
 910       if ((ptr = strstr(section, "-->")) != NULL)
 911       {
 912        /*
 913         * Strip comment stuff from end of line...
 914         */
 915
 916         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 917
 918         if (isspace(*ptr & 255))
 919           *ptr = '\0';
 920       }
 921       continue;
 922     }
 923
 924     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 925     {
 926       ptr ++;
 927
 928       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 929       {
 930        /*
 931         * Found the title...
 932         */
 933
 934         anchor = NULL;
 935         ptr += 6;
 936       }
 937       else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 938       {
 939        /*
 940         * Found an anchor...
 941         */
 942
 943         ptr += 7;
 944
 945         if (*ptr == '\"' || *ptr == '\'')
 946         {
 947          /*
 948           * Get quoted anchor...
 949           */
 950
 951           quote  = *ptr;
 952           anchor = ptr + 1;
 953           if ((ptr = strchr(anchor, quote)) != NULL)
 954             *ptr++ = '\0';
 955           else
 956             break;
 957         }
 958         else
 959         {
 960          /*
 961           * Get unquoted anchor...
 962           */
 963
 964           anchor = ptr + 1;
 965
 966           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 967
 968           if (*ptr)
 969             *ptr++ = '\0';
 970           else
 971             break;
 972         }
 973
 974        /*
 975         * Got the anchor, now lets find the end...
 976         */
 977
 978         while (*ptr && *ptr != '>')
 979           ptr ++;
 980
 981         if (*ptr != '>')
 982           break;
 983
 984         ptr ++;
 985       }
 986       else
 987         continue;
 988
 989      /*
 990       * Now collect text for the link...
 991       */
 992
 993       text = ptr;
 994       while ((ptr = strchr(text, '<')) == NULL)
 995       {
 996         ptr = text + strlen(text);
 997         if (ptr >= (line + sizeof(line) - 2))
 998           break;
 999
1000         *ptr++ = ' ';
1001
1002         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1003           break;
1004       }
1005
1006       *ptr = '\0';
1007
1008       if (node)
1009         node->length = (size_t)(offset - node->offset);
1010
1011       if (!*text)
1012       {
1013         node = NULL;
1014         break;
1015       }
1016
1017       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1018       {
1019        /*
1020         * Node already in the index, so replace the text and other
1021         * data...
1022         */
1023
1024         cupsArrayRemove(hi->nodes, node);
1025
1026         if (node->section)
1027           free(node->section);
1028
1029         if (node->text)
1030           free(node->text);
1031
1032         if (node->words)
1033         {
1034           for (word = (help_word_t *)cupsArrayFirst(node->words);
1035                word;
1036                word = (help_word_t *)cupsArrayNext(node->words))
1037             help_delete_word(word);
1038
1039           cupsArrayDelete(node->words);
1040           node->words = NULL;
1041         }
1042
1043         node->section = section[0] ? strdup(section) : NULL;
1044         node->text    = strdup(text);
1045         node->mtime   = mtime;
1046         node->offset  = offset;
1047         node->score   = 0;
1048       }
1049       else
1050       {
1051        /*
1052         * New node...
1053         */
1054
1055         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1056       }
1057
1058      /*
1059       * Go through the text value and replace tabs and newlines with
1060       * whitespace and eliminate extra whitespace...
1061       */
1062
1063       for (ptr = node->text, text = node->text; *ptr;)
1064         if (isspace(*ptr & 255))
1065         {
1066           while (isspace(*ptr & 255))
1067             ptr ++;
1068
1069           *text++ = ' ';
1070         }
1071         else if (text != ptr)
1072           *text++ = *ptr++;
1073         else
1074         {
1075           text ++;
1076           ptr ++;
1077         }
1078
1079       *text = '\0';
1080
1081      /*
1082       * (Re)add the node to the array...
1083       */
1084
1085       cupsArrayAdd(hi->nodes, node);
1086
1087       if (!anchor)
1088         node = NULL;
1089       break;
1090     }
1091
1092     if (node)
1093     {
1094      /*
1095       * Scan this line for words...
1096       */
1097
1098       for (ptr = line; *ptr; ptr ++)
1099       {
1100        /*
1101         * Skip HTML stuff...
1102         */
1103
1104         if (*ptr == '<')
1105         {
1106           if (!strncmp(ptr, "<!--", 4))
1107           {
1108            /*
1109             * Skip HTML comment...
1110             */
1111
1112             if ((text = strstr(ptr + 4, "-->")) == NULL)
1113               ptr += strlen(ptr) - 1;
1114             else
1115               ptr = text + 2;
1116           }
1117           else
1118           {
1119            /*
1120             * Skip HTML element...
1121             */
1122
1123             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1124             {
1125               if (*ptr == '\"' || *ptr == '\'')
1126               {
1127                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1128
1129                 if (!*ptr)
1130                   ptr --;
1131               }
1132             }
1133
1134             if (!*ptr)
1135               ptr --;
1136           }
1137
1138           continue;
1139         }
1140         else if (*ptr == '&')
1141         {
1142          /*
1143           * Skip HTML entity...
1144           */
1145
1146           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1147
1148           if (!*ptr)
1149             ptr --;
1150
1151           continue;
1152         }
1153         else if (!isalnum(*ptr & 255))
1154           continue;
1155
1156        /*
1157         * Found the start of a word, search until we find the end...
1158         */
1159
1160         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1161
1162         wordlen = (int)(ptr - text);
1163
1164         memcpy(temp, text, (size_t)wordlen);
1165         temp[wordlen] = '\0';
1166
1167         ptr --;
1168
1169         if (wordlen > 1 && !bsearch(temp, help_common_words,
1170                                     (sizeof(help_common_words) /
1171                                      sizeof(help_common_words[0])),
1172                                     sizeof(help_common_words[0]),
1173                                     (int (*)(const void *, const void *))
1174                                         _cups_strcasecmp))
1175           help_add_word(node, temp);
1176       }
1177     }
1178
1179    /*
1180     * Get the offset of the next line...
1181     */
1182
1183     offset = cupsFileTell(fp);
1184   }
1185
1186   cupsFileClose(fp);
1187
1188   if (node)
1189     node->length = (size_t)(offset - node->offset);
1190
1191   return (0);
1192 }
1193
1194
1195 /*
1196  * 'help_new_node()' - Create a new node and add it to an index.
1197  */
1198
1199 static help_node_t *                    /* O - Node pointer or NULL on error */
1200 help_new_node(const char   *filename,   /* I - Filename */
1201               const char   *anchor,     /* I - Anchor */
1202               const char   *section,    /* I - Section */
1203               const char   *text,       /* I - Text */
1204               time_t       mtime,       /* I - Modification time */
1205               off_t        offset,      /* I - Offset in file */
1206               size_t       length)      /* I - Length in bytes */
1207 {
1208   help_node_t   *n;                     /* Node */
1209
1210
1211   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1212                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1213                 (long)mtime, (long)offset, (long)length));
1214
1215   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1216   if (!n)
1217     return (NULL);
1218
1219   n->filename = strdup(filename);
1220   n->anchor   = anchor ? strdup(anchor) : NULL;
1221   n->section  = *section ? strdup(section) : NULL;
1222   n->text     = strdup(text);
1223   n->mtime    = mtime;
1224   n->offset   = offset;
1225   n->length   = length;
1226
1227   return (n);
1228 }
1229
1230
1231 /*
1232  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1233  */
1234
1235 static int                              /* O - Difference */
1236 help_sort_by_name(help_node_t *n1,      /* I - First node */
1237                   help_node_t *n2)      /* I - Second node */
1238 {
1239   int           diff;                   /* Difference */
1240
1241
1242   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1243                 n1, n1->filename, n1->anchor,
1244                 n2, n2->filename, n2->anchor));
1245
1246   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1247     return (diff);
1248
1249   if (!n1->anchor && !n2->anchor)
1250     return (0);
1251   else if (!n1->anchor)
1252     return (-1);
1253   else if (!n2->anchor)
1254     return (1);
1255   else
1256     return (strcmp(n1->anchor, n2->anchor));
1257 }
1258
1259
1260 /*
1261  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1262  */
1263
1264 static int                              /* O - Difference */
1265 help_sort_by_score(help_node_t *n1,     /* I - First node */
1266                    help_node_t *n2)     /* I - Second node */
1267 {
1268   int           diff;                   /* Difference */
1269
1270
1271   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1272                 "n2=%p(%d \"%s\" \"%s\")",
1273                 n1, n1->score, n1->section, n1->text,
1274                 n2, n2->score, n2->section, n2->text));
1275
1276   if (n1->score != n2->score)
1277     return (n2->score - n1->score);
1278
1279   if (n1->section && !n2->section)
1280     return (1);
1281   else if (!n1->section && n2->section)
1282     return (-1);
1283   else if (n1->section && n2->section &&
1284            (diff = strcmp(n1->section, n2->section)) != 0)
1285     return (diff);
1286
1287   return (_cups_strcasecmp(n1->text, n2->text));
1288 }
1289
1290
1291 /*
1292  * 'help_sort_words()' - Sort words alphabetically.
1293  */
1294
1295 static int                              /* O - Difference */
1296 help_sort_words(help_word_t *w1,        /* I - Second word */
1297                 help_word_t *w2)        /* I - Second word */
1298 {
1299   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1300                 w1, w1->text, w2, w2->text));
1301
1302   return (_cups_strcasecmp(w1->text, w2->text));
1303 }