cgi-bin/help-index.c

   1 /*
   2  * "$Id$"
   3  *
   4  * Online help index routines for CUPS.
   5  *
   6  * Copyright 2007-2014 by Apple Inc.
   7  * Copyright 1997-2007 by Easy Software Products.
   8  *
   9  * These coded instructions, statements, and computer programs are the
  10  * property of Apple Inc. and are protected by Federal copyright
  11  * law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  * which should have been included with this file.  If this file is
  13  * file is missing or damaged, see the license at "http://www.cups.org/".
  14  */
  15
  16 /*
  17  * Include necessary headers...
  18  */
  19
  20 #include "cgi-private.h"
  21 #include <cups/dir.h>
  22
  23
  24 /*
  25  * List of common English words that should not be indexed...
  26  */
  27
  28 static char             help_common_words[][6] =
  29                         {
  30                           "about",
  31                           "all",
  32                           "an",
  33                           "and",
  34                           "are",
  35                           "as",
  36                           "at",
  37                           "be",
  38                           "been",
  39                           "but",
  40                           "by",
  41                           "call",
  42                           "can",
  43                           "come",
  44                           "could",
  45                           "day",
  46                           "did",
  47                           "do",
  48                           "down",
  49                           "each",
  50                           "find",
  51                           "first",
  52                           "for",
  53                           "from",
  54                           "go",
  55                           "had",
  56                           "has",
  57                           "have",
  58                           "he",
  59                           "her",
  60                           "him",
  61                           "his",
  62                           "hot",
  63                           "how",
  64                           "if",
  65                           "in",
  66                           "is",
  67                           "it",
  68                           "know",
  69                           "like",
  70                           "long",
  71                           "look",
  72                           "make",
  73                           "many",
  74                           "may",
  75                           "more",
  76                           "most",
  77                           "my",
  78                           "no",
  79                           "now",
  80                           "of",
  81                           "on",
  82                           "one",
  83                           "or",
  84                           "other",
  85                           "out",
  86                           "over",
  87                           "said",
  88                           "see",
  89                           "she",
  90                           "side",
  91                           "so",
  92                           "some",
  93                           "sound",
  94                           "than",
  95                           "that",
  96                           "the",
  97                           "their",
  98                           "them",
  99                           "then",
 100                           "there",
 101                           "these",
 102                           "they",
 103                           "thing",
 104                           "this",
 105                           "time",
 106                           "to",
 107                           "two",
 108                           "up",
 109                           "use",
 110                           "was",
 111                           "water",
 112                           "way",
 113                           "we",
 114                           "were",
 115                           "what",
 116                           "when",
 117                           "which",
 118                           "who",
 119                           "will",
 120                           "with",
 121                           "word",
 122                           "would",
 123                           "write",
 124                           "you",
 125                           "your"
 126                         };
 127
 128
 129 /*
 130  * Local functions...
 131  */
 132
 133 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 134 static void             help_delete_node(help_node_t *n);
 135 static void             help_delete_word(help_word_t *w);
 136 static int              help_load_directory(help_index_t *hi,
 137                                             const char *directory,
 138                                             const char *relative);
 139 static int              help_load_file(help_index_t *hi,
 140                                        const char *filename,
 141                                        const char *relative,
 142                                        time_t     mtime);
 143 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 144                                        const char *section, const char *text,
 145                                        time_t mtime, off_t offset,
 146                                        size_t length)
 147                                        __attribute__((nonnull(1,3,4)));
 148 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 149 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 150 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 151
 152
 153 /*
 154  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 155  */
 156
 157 void
 158 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 159 {
 160   help_node_t   *node;                  /* Current node */
 161
 162
 163   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 164
 165   if (!hi)
 166     return;
 167
 168   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 169        node;
 170        node = (help_node_t *)cupsArrayNext(hi->nodes))
 171   {
 172     if (!hi->search)
 173       help_delete_node(node);
 174   }
 175
 176   cupsArrayDelete(hi->nodes);
 177   cupsArrayDelete(hi->sorted);
 178
 179   free(hi);
 180 }
 181
 182
 183 /*
 184  * 'helpFindNode()' - Find a node in an index.
 185  */
 186
 187 help_node_t *                           /* O - Node pointer or NULL */
 188 helpFindNode(help_index_t *hi,          /* I - Index */
 189              const char   *filename,    /* I - Filename */
 190              const char   *anchor)      /* I - Anchor */
 191 {
 192   help_node_t   key;                    /* Search key */
 193
 194
 195   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 196                 hi, filename, anchor));
 197
 198  /*
 199   * Range check input...
 200   */
 201
 202   if (!hi || !filename)
 203     return (NULL);
 204
 205  /*
 206   * Initialize the search key...
 207   */
 208
 209   key.filename = (char *)filename;
 210   key.anchor   = (char *)anchor;
 211
 212  /*
 213   * Return any match...
 214   */
 215
 216   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 217 }
 218
 219
 220 /*
 221  * 'helpLoadIndex()' - Load a help index from disk.
 222  */
 223
 224 help_index_t *                          /* O - Index pointer or NULL */
 225 helpLoadIndex(const char *hifile,       /* I - Index filename */
 226               const char *directory)    /* I - Directory that is indexed */
 227 {
 228   help_index_t  *hi;                    /* Help index */
 229   cups_file_t   *fp;                    /* Current file */
 230   char          line[2048],             /* Line from file */
 231                 *ptr,                   /* Pointer into line */
 232                 *filename,              /* Filename in line */
 233                 *anchor,                /* Anchor in line */
 234                 *sectptr,               /* Section pointer in line */
 235                 section[1024],          /* Section name */
 236                 *text;                  /* Text in line */
 237   time_t        mtime;                  /* Modification time */
 238   off_t         offset;                 /* Offset into file */
 239   size_t        length;                 /* Length in bytes */
 240   int           update;                 /* Update? */
 241   help_node_t   *node;                  /* Current node */
 242   help_word_t   *word;                  /* Current word */
 243
 244
 245   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 246                 hifile, directory));
 247
 248  /*
 249   * Create a new, empty index.
 250   */
 251
 252   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 253     return (NULL);
 254
 255   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 256   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 257
 258   if (!hi->nodes || !hi->sorted)
 259   {
 260     cupsArrayDelete(hi->nodes);
 261     cupsArrayDelete(hi->sorted);
 262     free(hi);
 263     return (NULL);
 264   }
 265
 266  /*
 267   * Try loading the existing index file...
 268   */
 269
 270   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 271   {
 272    /*
 273     * Lock the file and then read the first line...
 274     */
 275
 276     cupsFileLock(fp, 1);
 277
 278     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 279     {
 280      /*
 281       * Got a valid header line, now read the data lines...
 282       */
 283
 284       node = NULL;
 285
 286       while (cupsFileGets(fp, line, sizeof(line)))
 287       {
 288        /*
 289         * Each line looks like one of the following:
 290         *
 291         *     filename mtime offset length "section" "text"
 292         *     filename#anchor offset length "text"
 293         *     SP count word
 294         */
 295
 296         if (line[0] == ' ')
 297         {
 298          /*
 299           * Read a word in the current node...
 300           */
 301
 302           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 303             continue;
 304
 305           if ((word = help_add_word(node, ptr + 1)) != NULL)
 306             word->count = atoi(line + 1);
 307         }
 308         else
 309         {
 310          /*
 311           * Add a node...
 312           */
 313
 314           filename = line;
 315
 316           if ((ptr = strchr(line, ' ')) == NULL)
 317             break;
 318
 319           while (isspace(*ptr & 255))
 320             *ptr++ = '\0';
 321
 322           if ((anchor = strrchr(filename, '#')) != NULL)
 323           {
 324             *anchor++ = '\0';
 325             mtime = 0;
 326           }
 327           else
 328             mtime = strtol(ptr, &ptr, 10);
 329
 330           offset = strtoll(ptr, &ptr, 10);
 331           length = (size_t)strtoll(ptr, &ptr, 10);
 332
 333           while (isspace(*ptr & 255))
 334             ptr ++;
 335
 336           if (!anchor)
 337           {
 338            /*
 339             * Get section...
 340             */
 341
 342             if (*ptr != '\"')
 343               break;
 344
 345             ptr ++;
 346             sectptr = ptr;
 347
 348             while (*ptr && *ptr != '\"')
 349               ptr ++;
 350
 351             if (*ptr != '\"')
 352               break;
 353
 354             *ptr++ = '\0';
 355
 356             strlcpy(section, sectptr, sizeof(section));
 357
 358             while (isspace(*ptr & 255))
 359               ptr ++;
 360           }
 361
 362           if (*ptr != '\"')
 363             break;
 364
 365           ptr ++;
 366           text = ptr;
 367
 368           while (*ptr && *ptr != '\"')
 369             ptr ++;
 370
 371           if (*ptr != '\"')
 372             break;
 373
 374           *ptr++ = '\0';
 375
 376           if ((node = help_new_node(filename, anchor, section, text,
 377                                     mtime, offset, length)) == NULL)
 378             break;
 379
 380           node->score = -1;
 381
 382           cupsArrayAdd(hi->nodes, node);
 383         }
 384       }
 385     }
 386
 387     cupsFileClose(fp);
 388   }
 389
 390  /*
 391   * Scan for new/updated files...
 392   */
 393
 394   update = help_load_directory(hi, directory, NULL);
 395
 396  /*
 397   * Remove any files that are no longer installed...
 398   */
 399
 400   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 401        node;
 402        node = (help_node_t *)cupsArrayNext(hi->nodes))
 403     if (node->score < 0)
 404     {
 405      /*
 406       * Delete this node...
 407       */
 408
 409       cupsArrayRemove(hi->nodes, node);
 410       help_delete_node(node);
 411     }
 412
 413  /*
 414   * Add nodes to the sorted array...
 415   */
 416
 417   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 418        node;
 419        node = (help_node_t *)cupsArrayNext(hi->nodes))
 420     cupsArrayAdd(hi->sorted, node);
 421
 422  /*
 423   * Save the index if we updated it...
 424   */
 425
 426   if (update)
 427     helpSaveIndex(hi, hifile);
 428
 429  /*
 430   * Return the index...
 431   */
 432
 433   return (hi);
 434 }
 435
 436
 437 /*
 438  * 'helpSaveIndex()' - Save a help index to disk.
 439  */
 440
 441 int                                     /* O - 0 on success, -1 on error */
 442 helpSaveIndex(help_index_t *hi,         /* I - Index */
 443               const char   *hifile)     /* I - Index filename */
 444 {
 445   cups_file_t   *fp;                    /* Index file */
 446   help_node_t   *node;                  /* Current node */
 447   help_word_t   *word;                  /* Current word */
 448
 449
 450   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 451
 452  /*
 453   * Try creating a new index file...
 454   */
 455
 456   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 457     return (-1);
 458
 459  /*
 460   * Lock the file while we write it...
 461   */
 462
 463   cupsFileLock(fp, 1);
 464
 465   cupsFilePuts(fp, "HELPV2\n");
 466
 467   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 468        node;
 469        node = (help_node_t *)cupsArrayNext(hi->nodes))
 470   {
 471    /*
 472     * Write the current node with/without the anchor...
 473     */
 474
 475     if (node->anchor)
 476     {
 477       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 478                          node->filename, node->anchor,
 479                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 480                          node->text) < 0)
 481         break;
 482     }
 483     else
 484     {
 485       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 486                          node->filename, (int)node->mtime,
 487                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 488                          node->section ? node->section : "", node->text) < 0)
 489         break;
 490     }
 491
 492    /*
 493     * Then write the words associated with the node...
 494     */
 495
 496     for (word = (help_word_t *)cupsArrayFirst(node->words);
 497          word;
 498          word = (help_word_t *)cupsArrayNext(node->words))
 499       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 500         break;
 501   }
 502
 503   cupsFileFlush(fp);
 504
 505   if (cupsFileClose(fp) < 0)
 506     return (-1);
 507   else if (node)
 508     return (-1);
 509   else
 510     return (0);
 511 }
 512
 513
 514 /*
 515  * 'helpSearchIndex()' - Search an index.
 516  */
 517
 518 help_index_t *                          /* O - Search index */
 519 helpSearchIndex(help_index_t *hi,       /* I - Index */
 520                 const char   *query,    /* I - Query string */
 521                 const char   *section,  /* I - Limit search to this section */
 522                 const char   *filename) /* I - Limit search to this file */
 523 {
 524   help_index_t  *search;                /* Search index */
 525   help_node_t   *node;                  /* Current node */
 526   help_word_t   *word;                  /* Current word */
 527   void          *sc;                    /* Search context */
 528   int           matches;                /* Number of matches */
 529
 530
 531   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 532                 hi, query, filename));
 533
 534  /*
 535   * Range check...
 536   */
 537
 538   if (!hi || !query)
 539     return (NULL);
 540
 541  /*
 542   * Reset the scores of all nodes to 0...
 543   */
 544
 545   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 546        node;
 547        node = (help_node_t *)cupsArrayNext(hi->nodes))
 548     node->score = 0;
 549
 550  /*
 551   * Find the first node to search in...
 552   */
 553
 554   if (filename)
 555   {
 556     node = helpFindNode(hi, filename, NULL);
 557     if (!node)
 558       return (NULL);
 559   }
 560   else
 561     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 562
 563  /*
 564   * Convert the query into a regular expression...
 565   */
 566
 567   sc = cgiCompileSearch(query);
 568   if (!sc)
 569     return (NULL);
 570
 571  /*
 572   * Allocate a search index...
 573   */
 574
 575   search = calloc(1, sizeof(help_index_t));
 576   if (!search)
 577   {
 578     cgiFreeSearch(sc);
 579     return (NULL);
 580   }
 581
 582   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 583   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 584
 585   if (!search->nodes || !search->sorted)
 586   {
 587     cupsArrayDelete(search->nodes);
 588     cupsArrayDelete(search->sorted);
 589     free(search);
 590     cgiFreeSearch(sc);
 591     return (NULL);
 592   }
 593
 594   search->search = 1;
 595
 596  /*
 597   * Check each node in the index, adding matching nodes to the
 598   * search index...
 599   */
 600
 601   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 602     if (section && strcmp(node->section, section))
 603       continue;
 604     else if (filename && strcmp(node->filename, filename))
 605       continue;
 606     else
 607     {
 608       matches = cgiDoSearch(sc, node->text);
 609
 610       for (word = (help_word_t *)cupsArrayFirst(node->words);
 611            word;
 612            word = (help_word_t *)cupsArrayNext(node->words))
 613         if (cgiDoSearch(sc, word->text) > 0)
 614           matches += word->count;
 615
 616       if (matches > 0)
 617       {
 618        /*
 619         * Found a match, add the node to the search index...
 620         */
 621
 622         node->score = matches;
 623
 624         cupsArrayAdd(search->nodes, node);
 625         cupsArrayAdd(search->sorted, node);
 626       }
 627     }
 628
 629  /*
 630   * Free the search context...
 631   */
 632
 633   cgiFreeSearch(sc);
 634
 635  /*
 636   * Return the results...
 637   */
 638
 639   return (search);
 640 }
 641
 642
 643 /*
 644  * 'help_add_word()' - Add a word to a node.
 645  */
 646
 647 static help_word_t *                    /* O - New word */
 648 help_add_word(help_node_t *n,           /* I - Node */
 649               const char  *text)        /* I - Word text */
 650 {
 651   help_word_t   *w,                     /* New word */
 652                 key;                    /* Search key */
 653
 654
 655   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 656
 657  /*
 658   * Create the words array as needed...
 659   */
 660
 661   if (!n->words)
 662     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 663
 664  /*
 665   * See if the word is already added...
 666   */
 667
 668   key.text = (char *)text;
 669
 670   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 671   {
 672    /*
 673     * Create a new word...
 674     */
 675
 676     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 677       return (NULL);
 678
 679     if ((w->text = strdup(text)) == NULL)
 680     {
 681       free(w);
 682       return (NULL);
 683     }
 684
 685     cupsArrayAdd(n->words, w);
 686   }
 687
 688  /*
 689   * Bump the counter for this word and return it...
 690   */
 691
 692   w->count ++;
 693
 694   return (w);
 695 }
 696
 697
 698 /*
 699  * 'help_delete_node()' - Free all memory used by a node.
 700  */
 701
 702 static void
 703 help_delete_node(help_node_t *n)        /* I - Node */
 704 {
 705   help_word_t   *w;                     /* Current word */
 706
 707
 708   DEBUG_printf(("2help_delete_node(n=%p)", n));
 709
 710   if (!n)
 711     return;
 712
 713   if (n->filename)
 714     free(n->filename);
 715
 716   if (n->anchor)
 717     free(n->anchor);
 718
 719   if (n->section)
 720     free(n->section);
 721
 722   if (n->text)
 723     free(n->text);
 724
 725   for (w = (help_word_t *)cupsArrayFirst(n->words);
 726        w;
 727        w = (help_word_t *)cupsArrayNext(n->words))
 728     help_delete_word(w);
 729
 730   cupsArrayDelete(n->words);
 731
 732   free(n);
 733 }
 734
 735
 736 /*
 737  * 'help_delete_word()' - Free all memory used by a word.
 738  */
 739
 740 static void
 741 help_delete_word(help_word_t *w)        /* I - Word */
 742 {
 743   DEBUG_printf(("2help_delete_word(w=%p)", w));
 744
 745   if (!w)
 746     return;
 747
 748   if (w->text)
 749     free(w->text);
 750
 751   free(w);
 752 }
 753
 754
 755 /*
 756  * 'help_load_directory()' - Load a directory of files into an index.
 757  */
 758
 759 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 760 help_load_directory(
 761     help_index_t *hi,                   /* I - Index */
 762     const char   *directory,            /* I - Directory */
 763     const char   *relative)             /* I - Relative path */
 764 {
 765   cups_dir_t    *dir;                   /* Directory file */
 766   cups_dentry_t *dent;                  /* Directory entry */
 767   char          *ext,                   /* Pointer to extension */
 768                 filename[1024],         /* Full filename */
 769                 relname[1024];          /* Relative filename */
 770   int           update;                 /* Updated? */
 771   help_node_t   *node;                  /* Current node */
 772
 773
 774   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 775                 hi, directory, relative));
 776
 777  /*
 778   * Open the directory and scan it...
 779   */
 780
 781   if ((dir = cupsDirOpen(directory)) == NULL)
 782     return (0);
 783
 784   update = 0;
 785
 786   while ((dent = cupsDirRead(dir)) != NULL)
 787   {
 788    /*
 789     * Skip "." files...
 790     */
 791
 792     if (dent->filename[0] == '.')
 793       continue;
 794
 795    /*
 796     * Get absolute and relative filenames...
 797     */
 798
 799     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 800     if (relative)
 801       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 802     else
 803       strlcpy(relname, dent->filename, sizeof(relname));
 804
 805    /*
 806     * Check if we have a HTML file...
 807     */
 808
 809     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 810         (!ext[5] || !strcmp(ext + 5, ".gz")))
 811     {
 812      /*
 813       * HTML file, see if we have already indexed the file...
 814       */
 815
 816       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 817       {
 818        /*
 819         * File already indexed - check dates to confirm that the
 820         * index is up-to-date...
 821         */
 822
 823         if (node->mtime == dent->fileinfo.st_mtime)
 824         {
 825          /*
 826           * Same modification time, so mark all of the nodes
 827           * for this file as up-to-date...
 828           */
 829
 830           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 831             if (!strcmp(node->filename, relname))
 832               node->score = 0;
 833             else
 834               break;
 835
 836           continue;
 837         }
 838       }
 839
 840       update = 1;
 841
 842       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 843     }
 844     else if (S_ISDIR(dent->fileinfo.st_mode))
 845     {
 846      /*
 847       * Process sub-directory...
 848       */
 849
 850       if (help_load_directory(hi, filename, relname) == 1)
 851         update = 1;
 852     }
 853   }
 854
 855   cupsDirClose(dir);
 856
 857   return (update);
 858 }
 859
 860
 861 /*
 862  * 'help_load_file()' - Load a HTML files into an index.
 863  */
 864
 865 static int                              /* O - 0 = success, -1 = error */
 866 help_load_file(
 867     help_index_t *hi,                   /* I - Index */
 868     const char   *filename,             /* I - Filename */
 869     const char   *relative,             /* I - Relative path */
 870     time_t       mtime)                 /* I - Modification time */
 871 {
 872   cups_file_t   *fp;                    /* HTML file */
 873   help_node_t   *node;                  /* Current node */
 874   char          line[1024],             /* Line from file */
 875                 temp[1024],             /* Temporary word */
 876                 section[1024],          /* Section */
 877                 *ptr,                   /* Pointer into line */
 878                 *anchor,                /* Anchor name */
 879                 *text;                  /* Text for anchor */
 880   off_t         offset;                 /* File offset */
 881   char          quote;                  /* Quote character */
 882   help_word_t   *word;                  /* Current word */
 883   int           wordlen;                /* Length of word */
 884
 885
 886   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 887                 "mtime=%ld)", hi, filename, relative, mtime));
 888
 889   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 890     return (-1);
 891
 892   node   = NULL;
 893   offset = 0;
 894
 895   strlcpy(section, "Other", sizeof(section));
 896
 897   while (cupsFileGets(fp, line, sizeof(line)))
 898   {
 899    /*
 900     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 901     */
 902
 903     if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
 904     {
 905      /*
 906       * Got section line, copy it!
 907       */
 908
 909       for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
 910
 911       strlcpy(section, ptr, sizeof(section));
 912       if ((ptr = strstr(section, "-->")) != NULL)
 913       {
 914        /*
 915         * Strip comment stuff from end of line...
 916         */
 917
 918         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 919
 920         if (isspace(*ptr & 255))
 921           *ptr = '\0';
 922       }
 923       continue;
 924     }
 925
 926     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 927     {
 928       ptr ++;
 929
 930       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 931       {
 932        /*
 933         * Found the title...
 934         */
 935
 936         anchor = NULL;
 937         ptr += 6;
 938       }
 939       else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 940       {
 941        /*
 942         * Found an anchor...
 943         */
 944
 945         ptr += 7;
 946
 947         if (*ptr == '\"' || *ptr == '\'')
 948         {
 949          /*
 950           * Get quoted anchor...
 951           */
 952
 953           quote  = *ptr;
 954           anchor = ptr + 1;
 955           if ((ptr = strchr(anchor, quote)) != NULL)
 956             *ptr++ = '\0';
 957           else
 958             break;
 959         }
 960         else
 961         {
 962          /*
 963           * Get unquoted anchor...
 964           */
 965
 966           anchor = ptr + 1;
 967
 968           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 969
 970           if (*ptr)
 971             *ptr++ = '\0';
 972           else
 973             break;
 974         }
 975
 976        /*
 977         * Got the anchor, now lets find the end...
 978         */
 979
 980         while (*ptr && *ptr != '>')
 981           ptr ++;
 982
 983         if (*ptr != '>')
 984           break;
 985
 986         ptr ++;
 987       }
 988       else
 989         continue;
 990
 991      /*
 992       * Now collect text for the link...
 993       */
 994
 995       text = ptr;
 996       while ((ptr = strchr(text, '<')) == NULL)
 997       {
 998         ptr = text + strlen(text);
 999         if (ptr >= (line + sizeof(line) - 2))
1000           break;
1001
1002         *ptr++ = ' ';
1003
1004         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1005           break;
1006       }
1007
1008       *ptr = '\0';
1009
1010       if (node)
1011         node->length = (size_t)(offset - node->offset);
1012
1013       if (!*text)
1014       {
1015         node = NULL;
1016         break;
1017       }
1018
1019       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1020       {
1021        /*
1022         * Node already in the index, so replace the text and other
1023         * data...
1024         */
1025
1026         cupsArrayRemove(hi->nodes, node);
1027
1028         if (node->section)
1029           free(node->section);
1030
1031         if (node->text)
1032           free(node->text);
1033
1034         if (node->words)
1035         {
1036           for (word = (help_word_t *)cupsArrayFirst(node->words);
1037                word;
1038                word = (help_word_t *)cupsArrayNext(node->words))
1039             help_delete_word(word);
1040
1041           cupsArrayDelete(node->words);
1042           node->words = NULL;
1043         }
1044
1045         node->section = section[0] ? strdup(section) : NULL;
1046         node->text    = strdup(text);
1047         node->mtime   = mtime;
1048         node->offset  = offset;
1049         node->score   = 0;
1050       }
1051       else
1052       {
1053        /*
1054         * New node...
1055         */
1056
1057         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1058       }
1059
1060      /*
1061       * Go through the text value and replace tabs and newlines with
1062       * whitespace and eliminate extra whitespace...
1063       */
1064
1065       for (ptr = node->text, text = node->text; *ptr;)
1066         if (isspace(*ptr & 255))
1067         {
1068           while (isspace(*ptr & 255))
1069             ptr ++;
1070
1071           *text++ = ' ';
1072         }
1073         else if (text != ptr)
1074           *text++ = *ptr++;
1075         else
1076         {
1077           text ++;
1078           ptr ++;
1079         }
1080
1081       *text = '\0';
1082
1083      /*
1084       * (Re)add the node to the array...
1085       */
1086
1087       cupsArrayAdd(hi->nodes, node);
1088
1089       if (!anchor)
1090         node = NULL;
1091       break;
1092     }
1093
1094     if (node)
1095     {
1096      /*
1097       * Scan this line for words...
1098       */
1099
1100       for (ptr = line; *ptr; ptr ++)
1101       {
1102        /*
1103         * Skip HTML stuff...
1104         */
1105
1106         if (*ptr == '<')
1107         {
1108           if (!strncmp(ptr, "<!--", 4))
1109           {
1110            /*
1111             * Skip HTML comment...
1112             */
1113
1114             if ((text = strstr(ptr + 4, "-->")) == NULL)
1115               ptr += strlen(ptr) - 1;
1116             else
1117               ptr = text + 2;
1118           }
1119           else
1120           {
1121            /*
1122             * Skip HTML element...
1123             */
1124
1125             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1126             {
1127               if (*ptr == '\"' || *ptr == '\'')
1128               {
1129                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1130
1131                 if (!*ptr)
1132                   ptr --;
1133               }
1134             }
1135
1136             if (!*ptr)
1137               ptr --;
1138           }
1139
1140           continue;
1141         }
1142         else if (*ptr == '&')
1143         {
1144          /*
1145           * Skip HTML entity...
1146           */
1147
1148           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1149
1150           if (!*ptr)
1151             ptr --;
1152
1153           continue;
1154         }
1155         else if (!isalnum(*ptr & 255))
1156           continue;
1157
1158        /*
1159         * Found the start of a word, search until we find the end...
1160         */
1161
1162         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1163
1164         wordlen = (int)(ptr - text);
1165
1166         memcpy(temp, text, (size_t)wordlen);
1167         temp[wordlen] = '\0';
1168
1169         ptr --;
1170
1171         if (wordlen > 1 && !bsearch(temp, help_common_words,
1172                                     (sizeof(help_common_words) /
1173                                      sizeof(help_common_words[0])),
1174                                     sizeof(help_common_words[0]),
1175                                     (int (*)(const void *, const void *))
1176                                         _cups_strcasecmp))
1177           help_add_word(node, temp);
1178       }
1179     }
1180
1181    /*
1182     * Get the offset of the next line...
1183     */
1184
1185     offset = cupsFileTell(fp);
1186   }
1187
1188   cupsFileClose(fp);
1189
1190   if (node)
1191     node->length = (size_t)(offset - node->offset);
1192
1193   return (0);
1194 }
1195
1196
1197 /*
1198  * 'help_new_node()' - Create a new node and add it to an index.
1199  */
1200
1201 static help_node_t *                    /* O - Node pointer or NULL on error */
1202 help_new_node(const char   *filename,   /* I - Filename */
1203               const char   *anchor,     /* I - Anchor */
1204               const char   *section,    /* I - Section */
1205               const char   *text,       /* I - Text */
1206               time_t       mtime,       /* I - Modification time */
1207               off_t        offset,      /* I - Offset in file */
1208               size_t       length)      /* I - Length in bytes */
1209 {
1210   help_node_t   *n;                     /* Node */
1211
1212
1213   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1214                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1215                 (long)mtime, (long)offset, (long)length));
1216
1217   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1218   if (!n)
1219     return (NULL);
1220
1221   n->filename = strdup(filename);
1222   n->anchor   = anchor ? strdup(anchor) : NULL;
1223   n->section  = (section && *section) ? strdup(section) : NULL;
1224   n->text     = strdup(text);
1225   n->mtime    = mtime;
1226   n->offset   = offset;
1227   n->length   = length;
1228
1229   return (n);
1230 }
1231
1232
1233 /*
1234  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1235  */
1236
1237 static int                              /* O - Difference */
1238 help_sort_by_name(help_node_t *n1,      /* I - First node */
1239                   help_node_t *n2)      /* I - Second node */
1240 {
1241   int           diff;                   /* Difference */
1242
1243
1244   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1245                 n1, n1->filename, n1->anchor,
1246                 n2, n2->filename, n2->anchor));
1247
1248   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1249     return (diff);
1250
1251   if (!n1->anchor && !n2->anchor)
1252     return (0);
1253   else if (!n1->anchor)
1254     return (-1);
1255   else if (!n2->anchor)
1256     return (1);
1257   else
1258     return (strcmp(n1->anchor, n2->anchor));
1259 }
1260
1261
1262 /*
1263  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1264  */
1265
1266 static int                              /* O - Difference */
1267 help_sort_by_score(help_node_t *n1,     /* I - First node */
1268                    help_node_t *n2)     /* I - Second node */
1269 {
1270   int           diff;                   /* Difference */
1271
1272
1273   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1274                 "n2=%p(%d \"%s\" \"%s\")",
1275                 n1, n1->score, n1->section, n1->text,
1276                 n2, n2->score, n2->section, n2->text));
1277
1278   if (n1->score != n2->score)
1279     return (n2->score - n1->score);
1280
1281   if (n1->section && !n2->section)
1282     return (1);
1283   else if (!n1->section && n2->section)
1284     return (-1);
1285   else if (n1->section && n2->section &&
1286            (diff = strcmp(n1->section, n2->section)) != 0)
1287     return (diff);
1288
1289   return (_cups_strcasecmp(n1->text, n2->text));
1290 }
1291
1292
1293 /*
1294  * 'help_sort_words()' - Sort words alphabetically.
1295  */
1296
1297 static int                              /* O - Difference */
1298 help_sort_words(help_word_t *w1,        /* I - Second word */
1299                 help_word_t *w2)        /* I - Second word */
1300 {
1301   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1302                 w1, w1->text, w2, w2->text));
1303
1304   return (_cups_strcasecmp(w1->text, w2->text));
1305 }
1306
1307
1308 /*
1309  * End of "$Id$".
1310  */