cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright 2007-2015 by Apple Inc.
   5  * Copyright 1997-2007 by Easy Software Products.
   6  *
   7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
   8  */
   9
  10 /*
  11  * Include necessary headers...
  12  */
  13
  14 #include "cgi-private.h"
  15 #include <cups/dir.h>
  16
  17
  18 /*
  19  * List of common English words that should not be indexed...
  20  */
  21
  22 static char             help_common_words[][6] =
  23                         {
  24                           "about",
  25                           "all",
  26                           "an",
  27                           "and",
  28                           "are",
  29                           "as",
  30                           "at",
  31                           "be",
  32                           "been",
  33                           "but",
  34                           "by",
  35                           "call",
  36                           "can",
  37                           "come",
  38                           "could",
  39                           "day",
  40                           "did",
  41                           "do",
  42                           "down",
  43                           "each",
  44                           "find",
  45                           "first",
  46                           "for",
  47                           "from",
  48                           "go",
  49                           "had",
  50                           "has",
  51                           "have",
  52                           "he",
  53                           "her",
  54                           "him",
  55                           "his",
  56                           "hot",
  57                           "how",
  58                           "if",
  59                           "in",
  60                           "is",
  61                           "it",
  62                           "know",
  63                           "like",
  64                           "long",
  65                           "look",
  66                           "make",
  67                           "many",
  68                           "may",
  69                           "more",
  70                           "most",
  71                           "my",
  72                           "no",
  73                           "now",
  74                           "of",
  75                           "on",
  76                           "one",
  77                           "or",
  78                           "other",
  79                           "out",
  80                           "over",
  81                           "said",
  82                           "see",
  83                           "she",
  84                           "side",
  85                           "so",
  86                           "some",
  87                           "sound",
  88                           "than",
  89                           "that",
  90                           "the",
  91                           "their",
  92                           "them",
  93                           "then",
  94                           "there",
  95                           "these",
  96                           "they",
  97                           "thing",
  98                           "this",
  99                           "time",
 100                           "to",
 101                           "two",
 102                           "up",
 103                           "use",
 104                           "was",
 105                           "water",
 106                           "way",
 107                           "we",
 108                           "were",
 109                           "what",
 110                           "when",
 111                           "which",
 112                           "who",
 113                           "will",
 114                           "with",
 115                           "word",
 116                           "would",
 117                           "write",
 118                           "you",
 119                           "your"
 120                         };
 121
 122
 123 /*
 124  * Local functions...
 125  */
 126
 127 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 128 static void             help_delete_node(help_node_t *n);
 129 static void             help_delete_word(help_word_t *w);
 130 static int              help_load_directory(help_index_t *hi,
 131                                             const char *directory,
 132                                             const char *relative);
 133 static int              help_load_file(help_index_t *hi,
 134                                        const char *filename,
 135                                        const char *relative,
 136                                        time_t     mtime);
 137 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 138                                        const char *section, const char *text,
 139                                        time_t mtime, off_t offset,
 140                                        size_t length)
 141                                        __attribute__((nonnull(1,3,4)));
 142 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 143 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 144 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 145
 146
 147 /*
 148  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 149  */
 150
 151 void
 152 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 153 {
 154   help_node_t   *node;                  /* Current node */
 155
 156
 157   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 158
 159   if (!hi)
 160     return;
 161
 162   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 163        node;
 164        node = (help_node_t *)cupsArrayNext(hi->nodes))
 165   {
 166     if (!hi->search)
 167       help_delete_node(node);
 168   }
 169
 170   cupsArrayDelete(hi->nodes);
 171   cupsArrayDelete(hi->sorted);
 172
 173   free(hi);
 174 }
 175
 176
 177 /*
 178  * 'helpFindNode()' - Find a node in an index.
 179  */
 180
 181 help_node_t *                           /* O - Node pointer or NULL */
 182 helpFindNode(help_index_t *hi,          /* I - Index */
 183              const char   *filename,    /* I - Filename */
 184              const char   *anchor)      /* I - Anchor */
 185 {
 186   help_node_t   key;                    /* Search key */
 187
 188
 189   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 190                 hi, filename, anchor));
 191
 192  /*
 193   * Range check input...
 194   */
 195
 196   if (!hi || !filename)
 197     return (NULL);
 198
 199  /*
 200   * Initialize the search key...
 201   */
 202
 203   key.filename = (char *)filename;
 204   key.anchor   = (char *)anchor;
 205
 206  /*
 207   * Return any match...
 208   */
 209
 210   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 211 }
 212
 213
 214 /*
 215  * 'helpLoadIndex()' - Load a help index from disk.
 216  */
 217
 218 help_index_t *                          /* O - Index pointer or NULL */
 219 helpLoadIndex(const char *hifile,       /* I - Index filename */
 220               const char *directory)    /* I - Directory that is indexed */
 221 {
 222   help_index_t  *hi;                    /* Help index */
 223   cups_file_t   *fp;                    /* Current file */
 224   char          line[2048],             /* Line from file */
 225                 *ptr,                   /* Pointer into line */
 226                 *filename,              /* Filename in line */
 227                 *anchor,                /* Anchor in line */
 228                 *sectptr,               /* Section pointer in line */
 229                 section[1024],          /* Section name */
 230                 *text;                  /* Text in line */
 231   time_t        mtime;                  /* Modification time */
 232   off_t         offset;                 /* Offset into file */
 233   size_t        length;                 /* Length in bytes */
 234   int           update;                 /* Update? */
 235   help_node_t   *node;                  /* Current node */
 236   help_word_t   *word;                  /* Current word */
 237
 238
 239   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 240                 hifile, directory));
 241
 242  /*
 243   * Create a new, empty index.
 244   */
 245
 246   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 247     return (NULL);
 248
 249   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 250   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 251
 252   if (!hi->nodes || !hi->sorted)
 253   {
 254     cupsArrayDelete(hi->nodes);
 255     cupsArrayDelete(hi->sorted);
 256     free(hi);
 257     return (NULL);
 258   }
 259
 260  /*
 261   * Try loading the existing index file...
 262   */
 263
 264   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 265   {
 266    /*
 267     * Lock the file and then read the first line...
 268     */
 269
 270     cupsFileLock(fp, 1);
 271
 272     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 273     {
 274      /*
 275       * Got a valid header line, now read the data lines...
 276       */
 277
 278       node = NULL;
 279
 280       while (cupsFileGets(fp, line, sizeof(line)))
 281       {
 282        /*
 283         * Each line looks like one of the following:
 284         *
 285         *     filename mtime offset length "section" "text"
 286         *     filename#anchor offset length "text"
 287         *     SP count word
 288         */
 289
 290         if (line[0] == ' ')
 291         {
 292          /*
 293           * Read a word in the current node...
 294           */
 295
 296           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 297             continue;
 298
 299           if ((word = help_add_word(node, ptr + 1)) != NULL)
 300             word->count = atoi(line + 1);
 301         }
 302         else
 303         {
 304          /*
 305           * Add a node...
 306           */
 307
 308           filename = line;
 309
 310           if ((ptr = strchr(line, ' ')) == NULL)
 311             break;
 312
 313           while (isspace(*ptr & 255))
 314             *ptr++ = '\0';
 315
 316           if ((anchor = strrchr(filename, '#')) != NULL)
 317           {
 318             *anchor++ = '\0';
 319             mtime = 0;
 320           }
 321           else
 322             mtime = strtol(ptr, &ptr, 10);
 323
 324           offset = strtoll(ptr, &ptr, 10);
 325           length = (size_t)strtoll(ptr, &ptr, 10);
 326
 327           while (isspace(*ptr & 255))
 328             ptr ++;
 329
 330           if (!anchor)
 331           {
 332            /*
 333             * Get section...
 334             */
 335
 336             if (*ptr != '\"')
 337               break;
 338
 339             ptr ++;
 340             sectptr = ptr;
 341
 342             while (*ptr && *ptr != '\"')
 343               ptr ++;
 344
 345             if (*ptr != '\"')
 346               break;
 347
 348             *ptr++ = '\0';
 349
 350             strlcpy(section, sectptr, sizeof(section));
 351
 352             while (isspace(*ptr & 255))
 353               ptr ++;
 354           }
 355
 356           if (*ptr != '\"')
 357             break;
 358
 359           ptr ++;
 360           text = ptr;
 361
 362           while (*ptr && *ptr != '\"')
 363             ptr ++;
 364
 365           if (*ptr != '\"')
 366             break;
 367
 368           *ptr++ = '\0';
 369
 370           if ((node = help_new_node(filename, anchor, section, text,
 371                                     mtime, offset, length)) == NULL)
 372             break;
 373
 374           node->score = -1;
 375
 376           cupsArrayAdd(hi->nodes, node);
 377         }
 378       }
 379     }
 380
 381     cupsFileClose(fp);
 382   }
 383
 384  /*
 385   * Scan for new/updated files...
 386   */
 387
 388   update = help_load_directory(hi, directory, NULL);
 389
 390  /*
 391   * Remove any files that are no longer installed...
 392   */
 393
 394   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 395        node;
 396        node = (help_node_t *)cupsArrayNext(hi->nodes))
 397     if (node->score < 0)
 398     {
 399      /*
 400       * Delete this node...
 401       */
 402
 403       cupsArrayRemove(hi->nodes, node);
 404       help_delete_node(node);
 405     }
 406
 407  /*
 408   * Add nodes to the sorted array...
 409   */
 410
 411   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 412        node;
 413        node = (help_node_t *)cupsArrayNext(hi->nodes))
 414     cupsArrayAdd(hi->sorted, node);
 415
 416  /*
 417   * Save the index if we updated it...
 418   */
 419
 420   if (update)
 421     helpSaveIndex(hi, hifile);
 422
 423  /*
 424   * Return the index...
 425   */
 426
 427   return (hi);
 428 }
 429
 430
 431 /*
 432  * 'helpSaveIndex()' - Save a help index to disk.
 433  */
 434
 435 int                                     /* O - 0 on success, -1 on error */
 436 helpSaveIndex(help_index_t *hi,         /* I - Index */
 437               const char   *hifile)     /* I - Index filename */
 438 {
 439   cups_file_t   *fp;                    /* Index file */
 440   help_node_t   *node;                  /* Current node */
 441   help_word_t   *word;                  /* Current word */
 442
 443
 444   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 445
 446  /*
 447   * Try creating a new index file...
 448   */
 449
 450   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 451     return (-1);
 452
 453  /*
 454   * Lock the file while we write it...
 455   */
 456
 457   cupsFileLock(fp, 1);
 458
 459   cupsFilePuts(fp, "HELPV2\n");
 460
 461   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 462        node;
 463        node = (help_node_t *)cupsArrayNext(hi->nodes))
 464   {
 465    /*
 466     * Write the current node with/without the anchor...
 467     */
 468
 469     if (node->anchor)
 470     {
 471       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 472                          node->filename, node->anchor,
 473                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 474                          node->text) < 0)
 475         break;
 476     }
 477     else
 478     {
 479       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 480                          node->filename, (int)node->mtime,
 481                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 482                          node->section ? node->section : "", node->text) < 0)
 483         break;
 484     }
 485
 486    /*
 487     * Then write the words associated with the node...
 488     */
 489
 490     for (word = (help_word_t *)cupsArrayFirst(node->words);
 491          word;
 492          word = (help_word_t *)cupsArrayNext(node->words))
 493       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 494         break;
 495   }
 496
 497   cupsFileFlush(fp);
 498
 499   if (cupsFileClose(fp) < 0)
 500     return (-1);
 501   else if (node)
 502     return (-1);
 503   else
 504     return (0);
 505 }
 506
 507
 508 /*
 509  * 'helpSearchIndex()' - Search an index.
 510  */
 511
 512 help_index_t *                          /* O - Search index */
 513 helpSearchIndex(help_index_t *hi,       /* I - Index */
 514                 const char   *query,    /* I - Query string */
 515                 const char   *section,  /* I - Limit search to this section */
 516                 const char   *filename) /* I - Limit search to this file */
 517 {
 518   help_index_t  *search;                /* Search index */
 519   help_node_t   *node;                  /* Current node */
 520   help_word_t   *word;                  /* Current word */
 521   void          *sc;                    /* Search context */
 522   int           matches;                /* Number of matches */
 523
 524
 525   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 526                 hi, query, filename));
 527
 528  /*
 529   * Range check...
 530   */
 531
 532   if (!hi || !query)
 533     return (NULL);
 534
 535  /*
 536   * Reset the scores of all nodes to 0...
 537   */
 538
 539   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 540        node;
 541        node = (help_node_t *)cupsArrayNext(hi->nodes))
 542     node->score = 0;
 543
 544  /*
 545   * Find the first node to search in...
 546   */
 547
 548   if (filename)
 549   {
 550     node = helpFindNode(hi, filename, NULL);
 551     if (!node)
 552       return (NULL);
 553   }
 554   else
 555     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 556
 557  /*
 558   * Convert the query into a regular expression...
 559   */
 560
 561   sc = cgiCompileSearch(query);
 562   if (!sc)
 563     return (NULL);
 564
 565  /*
 566   * Allocate a search index...
 567   */
 568
 569   search = calloc(1, sizeof(help_index_t));
 570   if (!search)
 571   {
 572     cgiFreeSearch(sc);
 573     return (NULL);
 574   }
 575
 576   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 577   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 578
 579   if (!search->nodes || !search->sorted)
 580   {
 581     cupsArrayDelete(search->nodes);
 582     cupsArrayDelete(search->sorted);
 583     free(search);
 584     cgiFreeSearch(sc);
 585     return (NULL);
 586   }
 587
 588   search->search = 1;
 589
 590  /*
 591   * Check each node in the index, adding matching nodes to the
 592   * search index...
 593   */
 594
 595   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 596     if (section && strcmp(node->section, section))
 597       continue;
 598     else if (filename && strcmp(node->filename, filename))
 599       continue;
 600     else
 601     {
 602       matches = cgiDoSearch(sc, node->text);
 603
 604       for (word = (help_word_t *)cupsArrayFirst(node->words);
 605            word;
 606            word = (help_word_t *)cupsArrayNext(node->words))
 607         if (cgiDoSearch(sc, word->text) > 0)
 608           matches += word->count;
 609
 610       if (matches > 0)
 611       {
 612        /*
 613         * Found a match, add the node to the search index...
 614         */
 615
 616         node->score = matches;
 617
 618         cupsArrayAdd(search->nodes, node);
 619         cupsArrayAdd(search->sorted, node);
 620       }
 621     }
 622
 623  /*
 624   * Free the search context...
 625   */
 626
 627   cgiFreeSearch(sc);
 628
 629  /*
 630   * Return the results...
 631   */
 632
 633   return (search);
 634 }
 635
 636
 637 /*
 638  * 'help_add_word()' - Add a word to a node.
 639  */
 640
 641 static help_word_t *                    /* O - New word */
 642 help_add_word(help_node_t *n,           /* I - Node */
 643               const char  *text)        /* I - Word text */
 644 {
 645   help_word_t   *w,                     /* New word */
 646                 key;                    /* Search key */
 647
 648
 649   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 650
 651  /*
 652   * Create the words array as needed...
 653   */
 654
 655   if (!n->words)
 656     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 657
 658  /*
 659   * See if the word is already added...
 660   */
 661
 662   key.text = (char *)text;
 663
 664   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 665   {
 666    /*
 667     * Create a new word...
 668     */
 669
 670     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 671       return (NULL);
 672
 673     if ((w->text = strdup(text)) == NULL)
 674     {
 675       free(w);
 676       return (NULL);
 677     }
 678
 679     cupsArrayAdd(n->words, w);
 680   }
 681
 682  /*
 683   * Bump the counter for this word and return it...
 684   */
 685
 686   w->count ++;
 687
 688   return (w);
 689 }
 690
 691
 692 /*
 693  * 'help_delete_node()' - Free all memory used by a node.
 694  */
 695
 696 static void
 697 help_delete_node(help_node_t *n)        /* I - Node */
 698 {
 699   help_word_t   *w;                     /* Current word */
 700
 701
 702   DEBUG_printf(("2help_delete_node(n=%p)", n));
 703
 704   if (!n)
 705     return;
 706
 707   if (n->filename)
 708     free(n->filename);
 709
 710   if (n->anchor)
 711     free(n->anchor);
 712
 713   if (n->section)
 714     free(n->section);
 715
 716   if (n->text)
 717     free(n->text);
 718
 719   for (w = (help_word_t *)cupsArrayFirst(n->words);
 720        w;
 721        w = (help_word_t *)cupsArrayNext(n->words))
 722     help_delete_word(w);
 723
 724   cupsArrayDelete(n->words);
 725
 726   free(n);
 727 }
 728
 729
 730 /*
 731  * 'help_delete_word()' - Free all memory used by a word.
 732  */
 733
 734 static void
 735 help_delete_word(help_word_t *w)        /* I - Word */
 736 {
 737   DEBUG_printf(("2help_delete_word(w=%p)", w));
 738
 739   if (!w)
 740     return;
 741
 742   if (w->text)
 743     free(w->text);
 744
 745   free(w);
 746 }
 747
 748
 749 /*
 750  * 'help_load_directory()' - Load a directory of files into an index.
 751  */
 752
 753 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 754 help_load_directory(
 755     help_index_t *hi,                   /* I - Index */
 756     const char   *directory,            /* I - Directory */
 757     const char   *relative)             /* I - Relative path */
 758 {
 759   cups_dir_t    *dir;                   /* Directory file */
 760   cups_dentry_t *dent;                  /* Directory entry */
 761   char          *ext,                   /* Pointer to extension */
 762                 filename[1024],         /* Full filename */
 763                 relname[1024];          /* Relative filename */
 764   int           update;                 /* Updated? */
 765   help_node_t   *node;                  /* Current node */
 766
 767
 768   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 769                 hi, directory, relative));
 770
 771  /*
 772   * Open the directory and scan it...
 773   */
 774
 775   if ((dir = cupsDirOpen(directory)) == NULL)
 776     return (0);
 777
 778   update = 0;
 779
 780   while ((dent = cupsDirRead(dir)) != NULL)
 781   {
 782    /*
 783     * Skip "." files...
 784     */
 785
 786     if (dent->filename[0] == '.')
 787       continue;
 788
 789    /*
 790     * Get absolute and relative filenames...
 791     */
 792
 793     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 794     if (relative)
 795       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 796     else
 797       strlcpy(relname, dent->filename, sizeof(relname));
 798
 799    /*
 800     * Check if we have a HTML file...
 801     */
 802
 803     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 804         (!ext[5] || !strcmp(ext + 5, ".gz")))
 805     {
 806      /*
 807       * HTML file, see if we have already indexed the file...
 808       */
 809
 810       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 811       {
 812        /*
 813         * File already indexed - check dates to confirm that the
 814         * index is up-to-date...
 815         */
 816
 817         if (node->mtime == dent->fileinfo.st_mtime)
 818         {
 819          /*
 820           * Same modification time, so mark all of the nodes
 821           * for this file as up-to-date...
 822           */
 823
 824           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 825             if (!strcmp(node->filename, relname))
 826               node->score = 0;
 827             else
 828               break;
 829
 830           continue;
 831         }
 832       }
 833
 834       update = 1;
 835
 836       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 837     }
 838     else if (S_ISDIR(dent->fileinfo.st_mode))
 839     {
 840      /*
 841       * Process sub-directory...
 842       */
 843
 844       if (help_load_directory(hi, filename, relname) == 1)
 845         update = 1;
 846     }
 847   }
 848
 849   cupsDirClose(dir);
 850
 851   return (update);
 852 }
 853
 854
 855 /*
 856  * 'help_load_file()' - Load a HTML files into an index.
 857  */
 858
 859 static int                              /* O - 0 = success, -1 = error */
 860 help_load_file(
 861     help_index_t *hi,                   /* I - Index */
 862     const char   *filename,             /* I - Filename */
 863     const char   *relative,             /* I - Relative path */
 864     time_t       mtime)                 /* I - Modification time */
 865 {
 866   cups_file_t   *fp;                    /* HTML file */
 867   help_node_t   *node;                  /* Current node */
 868   char          line[1024],             /* Line from file */
 869                 temp[1024],             /* Temporary word */
 870                 section[1024],          /* Section */
 871                 *ptr,                   /* Pointer into line */
 872                 *anchor,                /* Anchor name */
 873                 *text;                  /* Text for anchor */
 874   off_t         offset;                 /* File offset */
 875   char          quote;                  /* Quote character */
 876   help_word_t   *word;                  /* Current word */
 877   int           wordlen;                /* Length of word */
 878
 879
 880   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 881                 "mtime=%ld)", hi, filename, relative, (long)mtime));
 882
 883   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 884     return (-1);
 885
 886   node   = NULL;
 887   offset = 0;
 888
 889   strlcpy(section, "Other", sizeof(section));
 890
 891   while (cupsFileGets(fp, line, sizeof(line)))
 892   {
 893    /*
 894     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 895     */
 896
 897     if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
 898     {
 899      /*
 900       * Got section line, copy it!
 901       */
 902
 903       for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
 904
 905       strlcpy(section, ptr, sizeof(section));
 906       if ((ptr = strstr(section, "-->")) != NULL)
 907       {
 908        /*
 909         * Strip comment stuff from end of line...
 910         */
 911
 912         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 913
 914         if (isspace(*ptr & 255))
 915           *ptr = '\0';
 916       }
 917       continue;
 918     }
 919
 920     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 921     {
 922       ptr ++;
 923
 924       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 925       {
 926        /*
 927         * Found the title...
 928         */
 929
 930         anchor = NULL;
 931         ptr += 6;
 932       }
 933       else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 934       {
 935        /*
 936         * Found an anchor...
 937         */
 938
 939         ptr += 7;
 940
 941         if (*ptr == '\"' || *ptr == '\'')
 942         {
 943          /*
 944           * Get quoted anchor...
 945           */
 946
 947           quote  = *ptr;
 948           anchor = ptr + 1;
 949           if ((ptr = strchr(anchor, quote)) != NULL)
 950             *ptr++ = '\0';
 951           else
 952             break;
 953         }
 954         else
 955         {
 956          /*
 957           * Get unquoted anchor...
 958           */
 959
 960           anchor = ptr + 1;
 961
 962           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 963
 964           if (*ptr)
 965             *ptr++ = '\0';
 966           else
 967             break;
 968         }
 969
 970        /*
 971         * Got the anchor, now lets find the end...
 972         */
 973
 974         while (*ptr && *ptr != '>')
 975           ptr ++;
 976
 977         if (*ptr != '>')
 978           break;
 979
 980         ptr ++;
 981       }
 982       else
 983         continue;
 984
 985      /*
 986       * Now collect text for the link...
 987       */
 988
 989       text = ptr;
 990       while ((ptr = strchr(text, '<')) == NULL)
 991       {
 992         ptr = text + strlen(text);
 993         if (ptr >= (line + sizeof(line) - 2))
 994           break;
 995
 996         *ptr++ = ' ';
 997
 998         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
 999           break;
1000       }
1001
1002       *ptr = '\0';
1003
1004       if (node)
1005         node->length = (size_t)(offset - node->offset);
1006
1007       if (!*text)
1008       {
1009         node = NULL;
1010         break;
1011       }
1012
1013       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1014       {
1015        /*
1016         * Node already in the index, so replace the text and other
1017         * data...
1018         */
1019
1020         cupsArrayRemove(hi->nodes, node);
1021
1022         if (node->section)
1023           free(node->section);
1024
1025         if (node->text)
1026           free(node->text);
1027
1028         if (node->words)
1029         {
1030           for (word = (help_word_t *)cupsArrayFirst(node->words);
1031                word;
1032                word = (help_word_t *)cupsArrayNext(node->words))
1033             help_delete_word(word);
1034
1035           cupsArrayDelete(node->words);
1036           node->words = NULL;
1037         }
1038
1039         node->section = section[0] ? strdup(section) : NULL;
1040         node->text    = strdup(text);
1041         node->mtime   = mtime;
1042         node->offset  = offset;
1043         node->score   = 0;
1044       }
1045       else
1046       {
1047        /*
1048         * New node...
1049         */
1050
1051         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1052       }
1053
1054      /*
1055       * Go through the text value and replace tabs and newlines with
1056       * whitespace and eliminate extra whitespace...
1057       */
1058
1059       for (ptr = node->text, text = node->text; *ptr;)
1060         if (isspace(*ptr & 255))
1061         {
1062           while (isspace(*ptr & 255))
1063             ptr ++;
1064
1065           *text++ = ' ';
1066         }
1067         else if (text != ptr)
1068           *text++ = *ptr++;
1069         else
1070         {
1071           text ++;
1072           ptr ++;
1073         }
1074
1075       *text = '\0';
1076
1077      /*
1078       * (Re)add the node to the array...
1079       */
1080
1081       cupsArrayAdd(hi->nodes, node);
1082
1083       if (!anchor)
1084         node = NULL;
1085       break;
1086     }
1087
1088     if (node)
1089     {
1090      /*
1091       * Scan this line for words...
1092       */
1093
1094       for (ptr = line; *ptr; ptr ++)
1095       {
1096        /*
1097         * Skip HTML stuff...
1098         */
1099
1100         if (*ptr == '<')
1101         {
1102           if (!strncmp(ptr, "<!--", 4))
1103           {
1104            /*
1105             * Skip HTML comment...
1106             */
1107
1108             if ((text = strstr(ptr + 4, "-->")) == NULL)
1109               ptr += strlen(ptr) - 1;
1110             else
1111               ptr = text + 2;
1112           }
1113           else
1114           {
1115            /*
1116             * Skip HTML element...
1117             */
1118
1119             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1120             {
1121               if (*ptr == '\"' || *ptr == '\'')
1122               {
1123                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1124
1125                 if (!*ptr)
1126                   ptr --;
1127               }
1128             }
1129
1130             if (!*ptr)
1131               ptr --;
1132           }
1133
1134           continue;
1135         }
1136         else if (*ptr == '&')
1137         {
1138          /*
1139           * Skip HTML entity...
1140           */
1141
1142           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1143
1144           if (!*ptr)
1145             ptr --;
1146
1147           continue;
1148         }
1149         else if (!isalnum(*ptr & 255))
1150           continue;
1151
1152        /*
1153         * Found the start of a word, search until we find the end...
1154         */
1155
1156         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1157
1158         wordlen = (int)(ptr - text);
1159
1160         memcpy(temp, text, (size_t)wordlen);
1161         temp[wordlen] = '\0';
1162
1163         ptr --;
1164
1165         if (wordlen > 1 && !bsearch(temp, help_common_words,
1166                                     (sizeof(help_common_words) /
1167                                      sizeof(help_common_words[0])),
1168                                     sizeof(help_common_words[0]),
1169                                     (int (*)(const void *, const void *))
1170                                         _cups_strcasecmp))
1171           help_add_word(node, temp);
1172       }
1173     }
1174
1175    /*
1176     * Get the offset of the next line...
1177     */
1178
1179     offset = cupsFileTell(fp);
1180   }
1181
1182   cupsFileClose(fp);
1183
1184   if (node)
1185     node->length = (size_t)(offset - node->offset);
1186
1187   return (0);
1188 }
1189
1190
1191 /*
1192  * 'help_new_node()' - Create a new node and add it to an index.
1193  */
1194
1195 static help_node_t *                    /* O - Node pointer or NULL on error */
1196 help_new_node(const char   *filename,   /* I - Filename */
1197               const char   *anchor,     /* I - Anchor */
1198               const char   *section,    /* I - Section */
1199               const char   *text,       /* I - Text */
1200               time_t       mtime,       /* I - Modification time */
1201               off_t        offset,      /* I - Offset in file */
1202               size_t       length)      /* I - Length in bytes */
1203 {
1204   help_node_t   *n;                     /* Node */
1205
1206
1207   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1208                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1209                 (long)mtime, (long)offset, (long)length));
1210
1211   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1212   if (!n)
1213     return (NULL);
1214
1215   n->filename = strdup(filename);
1216   n->anchor   = anchor ? strdup(anchor) : NULL;
1217   n->section  = *section ? strdup(section) : NULL;
1218   n->text     = strdup(text);
1219   n->mtime    = mtime;
1220   n->offset   = offset;
1221   n->length   = length;
1222
1223   return (n);
1224 }
1225
1226
1227 /*
1228  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1229  */
1230
1231 static int                              /* O - Difference */
1232 help_sort_by_name(help_node_t *n1,      /* I - First node */
1233                   help_node_t *n2)      /* I - Second node */
1234 {
1235   int           diff;                   /* Difference */
1236
1237
1238   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1239                 n1, n1->filename, n1->anchor,
1240                 n2, n2->filename, n2->anchor));
1241
1242   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1243     return (diff);
1244
1245   if (!n1->anchor && !n2->anchor)
1246     return (0);
1247   else if (!n1->anchor)
1248     return (-1);
1249   else if (!n2->anchor)
1250     return (1);
1251   else
1252     return (strcmp(n1->anchor, n2->anchor));
1253 }
1254
1255
1256 /*
1257  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1258  */
1259
1260 static int                              /* O - Difference */
1261 help_sort_by_score(help_node_t *n1,     /* I - First node */
1262                    help_node_t *n2)     /* I - Second node */
1263 {
1264   int           diff;                   /* Difference */
1265
1266
1267   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1268                 "n2=%p(%d \"%s\" \"%s\")",
1269                 n1, n1->score, n1->section, n1->text,
1270                 n2, n2->score, n2->section, n2->text));
1271
1272   if (n1->score != n2->score)
1273     return (n2->score - n1->score);
1274
1275   if (n1->section && !n2->section)
1276     return (1);
1277   else if (!n1->section && n2->section)
1278     return (-1);
1279   else if (n1->section && n2->section &&
1280            (diff = strcmp(n1->section, n2->section)) != 0)
1281     return (diff);
1282
1283   return (_cups_strcasecmp(n1->text, n2->text));
1284 }
1285
1286
1287 /*
1288  * 'help_sort_words()' - Sort words alphabetically.
1289  */
1290
1291 static int                              /* O - Difference */
1292 help_sort_words(help_word_t *w1,        /* I - Second word */
1293                 help_word_t *w2)        /* I - Second word */
1294 {
1295   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1296                 w1, w1->text, w2, w2->text));
1297
1298   return (_cups_strcasecmp(w1->text, w2->text));
1299 }