cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright 2007-2017 by Apple Inc.
   5  * Copyright 1997-2007 by Easy Software Products.
   6  *
   7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
   8  */
   9
  10 /*
  11  * Include necessary headers...
  12  */
  13
  14 #include "cgi-private.h"
  15 #include <cups/dir.h>
  16
  17
  18 /*
  19  * List of common English words that should not be indexed...
  20  */
  21
  22 static char             help_common_words[][6] =
  23                         {
  24                           "about",
  25                           "all",
  26                           "an",
  27                           "and",
  28                           "are",
  29                           "as",
  30                           "at",
  31                           "be",
  32                           "been",
  33                           "but",
  34                           "by",
  35                           "call",
  36                           "can",
  37                           "come",
  38                           "could",
  39                           "day",
  40                           "did",
  41                           "do",
  42                           "down",
  43                           "each",
  44                           "find",
  45                           "first",
  46                           "for",
  47                           "from",
  48                           "go",
  49                           "had",
  50                           "has",
  51                           "have",
  52                           "he",
  53                           "her",
  54                           "him",
  55                           "his",
  56                           "hot",
  57                           "how",
  58                           "if",
  59                           "in",
  60                           "is",
  61                           "it",
  62                           "know",
  63                           "like",
  64                           "long",
  65                           "look",
  66                           "make",
  67                           "many",
  68                           "may",
  69                           "more",
  70                           "most",
  71                           "my",
  72                           "no",
  73                           "now",
  74                           "of",
  75                           "on",
  76                           "one",
  77                           "or",
  78                           "other",
  79                           "out",
  80                           "over",
  81                           "said",
  82                           "see",
  83                           "she",
  84                           "side",
  85                           "so",
  86                           "some",
  87                           "sound",
  88                           "than",
  89                           "that",
  90                           "the",
  91                           "their",
  92                           "them",
  93                           "then",
  94                           "there",
  95                           "these",
  96                           "they",
  97                           "thing",
  98                           "this",
  99                           "time",
 100                           "to",
 101                           "two",
 102                           "up",
 103                           "use",
 104                           "was",
 105                           "water",
 106                           "way",
 107                           "we",
 108                           "were",
 109                           "what",
 110                           "when",
 111                           "which",
 112                           "who",
 113                           "will",
 114                           "with",
 115                           "word",
 116                           "would",
 117                           "write",
 118                           "you",
 119                           "your"
 120                         };
 121
 122
 123 /*
 124  * Local functions...
 125  */
 126
 127 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 128 static void             help_delete_node(help_node_t *n);
 129 static void             help_delete_word(help_word_t *w);
 130 static int              help_load_directory(help_index_t *hi,
 131                                             const char *directory,
 132                                             const char *relative);
 133 static int              help_load_file(help_index_t *hi,
 134                                        const char *filename,
 135                                        const char *relative,
 136                                        time_t     mtime);
 137 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 138                                        const char *section, const char *text,
 139                                        time_t mtime, off_t offset,
 140                                        size_t length)
 141                                        __attribute__((nonnull(1,3,4)));
 142 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 143 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 144 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 145
 146
 147 /*
 148  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 149  */
 150
 151 void
 152 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 153 {
 154   help_node_t   *node;                  /* Current node */
 155
 156
 157   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 158
 159   if (!hi)
 160     return;
 161
 162   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 163        node;
 164        node = (help_node_t *)cupsArrayNext(hi->nodes))
 165   {
 166     if (!hi->search)
 167       help_delete_node(node);
 168   }
 169
 170   cupsArrayDelete(hi->nodes);
 171   cupsArrayDelete(hi->sorted);
 172
 173   free(hi);
 174 }
 175
 176
 177 /*
 178  * 'helpFindNode()' - Find a node in an index.
 179  */
 180
 181 help_node_t *                           /* O - Node pointer or NULL */
 182 helpFindNode(help_index_t *hi,          /* I - Index */
 183              const char   *filename,    /* I - Filename */
 184              const char   *anchor)      /* I - Anchor */
 185 {
 186   help_node_t   key;                    /* Search key */
 187
 188
 189   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 190                 hi, filename, anchor));
 191
 192  /*
 193   * Range check input...
 194   */
 195
 196   if (!hi || !filename)
 197     return (NULL);
 198
 199  /*
 200   * Initialize the search key...
 201   */
 202
 203   key.filename = (char *)filename;
 204   key.anchor   = (char *)anchor;
 205
 206  /*
 207   * Return any match...
 208   */
 209
 210   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 211 }
 212
 213
 214 /*
 215  * 'helpLoadIndex()' - Load a help index from disk.
 216  */
 217
 218 help_index_t *                          /* O - Index pointer or NULL */
 219 helpLoadIndex(const char *hifile,       /* I - Index filename */
 220               const char *directory)    /* I - Directory that is indexed */
 221 {
 222   help_index_t  *hi;                    /* Help index */
 223   cups_file_t   *fp;                    /* Current file */
 224   char          line[2048],             /* Line from file */
 225                 *ptr,                   /* Pointer into line */
 226                 *filename,              /* Filename in line */
 227                 *anchor,                /* Anchor in line */
 228                 *sectptr,               /* Section pointer in line */
 229                 section[1024],          /* Section name */
 230                 *text;                  /* Text in line */
 231   time_t        mtime;                  /* Modification time */
 232   off_t         offset;                 /* Offset into file */
 233   size_t        length;                 /* Length in bytes */
 234   int           update;                 /* Update? */
 235   help_node_t   *node;                  /* Current node */
 236   help_word_t   *word;                  /* Current word */
 237
 238
 239   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 240                 hifile, directory));
 241
 242  /*
 243   * Create a new, empty index.
 244   */
 245
 246   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 247     return (NULL);
 248
 249   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 250   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 251
 252   if (!hi->nodes || !hi->sorted)
 253   {
 254     cupsArrayDelete(hi->nodes);
 255     cupsArrayDelete(hi->sorted);
 256     free(hi);
 257     return (NULL);
 258   }
 259
 260  /*
 261   * Try loading the existing index file...
 262   */
 263
 264   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 265   {
 266    /*
 267     * Lock the file and then read the first line...
 268     */
 269
 270     cupsFileLock(fp, 1);
 271
 272     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 273     {
 274      /*
 275       * Got a valid header line, now read the data lines...
 276       */
 277
 278       node = NULL;
 279
 280       while (cupsFileGets(fp, line, sizeof(line)))
 281       {
 282        /*
 283         * Each line looks like one of the following:
 284         *
 285         *     filename mtime offset length "section" "text"
 286         *     filename#anchor offset length "text"
 287         *     SP count word
 288         */
 289
 290         if (line[0] == ' ')
 291         {
 292          /*
 293           * Read a word in the current node...
 294           */
 295
 296           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 297             continue;
 298
 299           if ((word = help_add_word(node, ptr + 1)) != NULL)
 300             word->count = atoi(line + 1);
 301         }
 302         else
 303         {
 304          /*
 305           * Add a node...
 306           */
 307
 308           filename = line;
 309
 310           if ((ptr = strchr(line, ' ')) == NULL)
 311             break;
 312
 313           while (isspace(*ptr & 255))
 314             *ptr++ = '\0';
 315
 316           if ((anchor = strrchr(filename, '#')) != NULL)
 317           {
 318             *anchor++ = '\0';
 319             mtime = 0;
 320           }
 321           else
 322             mtime = strtol(ptr, &ptr, 10);
 323
 324           offset = strtoll(ptr, &ptr, 10);
 325           length = (size_t)strtoll(ptr, &ptr, 10);
 326
 327           while (isspace(*ptr & 255))
 328             ptr ++;
 329
 330           if (!anchor)
 331           {
 332            /*
 333             * Get section...
 334             */
 335
 336             if (*ptr != '\"')
 337               break;
 338
 339             ptr ++;
 340             sectptr = ptr;
 341
 342             while (*ptr && *ptr != '\"')
 343               ptr ++;
 344
 345             if (*ptr != '\"')
 346               break;
 347
 348             *ptr++ = '\0';
 349
 350             strlcpy(section, sectptr, sizeof(section));
 351
 352             while (isspace(*ptr & 255))
 353               ptr ++;
 354           }
 355
 356           if (*ptr != '\"')
 357             break;
 358
 359           ptr ++;
 360           text = ptr;
 361
 362           while (*ptr && *ptr != '\"')
 363             ptr ++;
 364
 365           if (*ptr != '\"')
 366             break;
 367
 368           *ptr++ = '\0';
 369
 370           if ((node = help_new_node(filename, anchor, section, text,
 371                                     mtime, offset, length)) == NULL)
 372             break;
 373
 374           node->score = -1;
 375
 376           cupsArrayAdd(hi->nodes, node);
 377         }
 378       }
 379     }
 380
 381     cupsFileClose(fp);
 382   }
 383
 384  /*
 385   * Scan for new/updated files...
 386   */
 387
 388   update = help_load_directory(hi, directory, NULL);
 389
 390  /*
 391   * Remove any files that are no longer installed...
 392   */
 393
 394   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 395        node;
 396        node = (help_node_t *)cupsArrayNext(hi->nodes))
 397     if (node->score < 0)
 398     {
 399      /*
 400       * Delete this node...
 401       */
 402
 403       cupsArrayRemove(hi->nodes, node);
 404       help_delete_node(node);
 405     }
 406
 407  /*
 408   * Add nodes to the sorted array...
 409   */
 410
 411   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 412        node;
 413        node = (help_node_t *)cupsArrayNext(hi->nodes))
 414     cupsArrayAdd(hi->sorted, node);
 415
 416  /*
 417   * Save the index if we updated it...
 418   */
 419
 420   if (update)
 421     helpSaveIndex(hi, hifile);
 422
 423  /*
 424   * Return the index...
 425   */
 426
 427   return (hi);
 428 }
 429
 430
 431 /*
 432  * 'helpSaveIndex()' - Save a help index to disk.
 433  */
 434
 435 int                                     /* O - 0 on success, -1 on error */
 436 helpSaveIndex(help_index_t *hi,         /* I - Index */
 437               const char   *hifile)     /* I - Index filename */
 438 {
 439   cups_file_t   *fp;                    /* Index file */
 440   help_node_t   *node;                  /* Current node */
 441   help_word_t   *word;                  /* Current word */
 442
 443
 444   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 445
 446  /*
 447   * Try creating a new index file...
 448   */
 449
 450   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 451     return (-1);
 452
 453  /*
 454   * Lock the file while we write it...
 455   */
 456
 457   cupsFileLock(fp, 1);
 458
 459   cupsFilePuts(fp, "HELPV2\n");
 460
 461   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 462        node;
 463        node = (help_node_t *)cupsArrayNext(hi->nodes))
 464   {
 465    /*
 466     * Write the current node with/without the anchor...
 467     */
 468
 469     if (node->anchor)
 470     {
 471       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 472                          node->filename, node->anchor,
 473                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 474                          node->text) < 0)
 475         break;
 476     }
 477     else
 478     {
 479       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 480                          node->filename, (int)node->mtime,
 481                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 482                          node->section ? node->section : "", node->text) < 0)
 483         break;
 484     }
 485
 486    /*
 487     * Then write the words associated with the node...
 488     */
 489
 490     for (word = (help_word_t *)cupsArrayFirst(node->words);
 491          word;
 492          word = (help_word_t *)cupsArrayNext(node->words))
 493       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 494         break;
 495   }
 496
 497   cupsFileFlush(fp);
 498
 499   if (cupsFileClose(fp) < 0)
 500     return (-1);
 501   else if (node)
 502     return (-1);
 503   else
 504     return (0);
 505 }
 506
 507
 508 /*
 509  * 'helpSearchIndex()' - Search an index.
 510  */
 511
 512 help_index_t *                          /* O - Search index */
 513 helpSearchIndex(help_index_t *hi,       /* I - Index */
 514                 const char   *query,    /* I - Query string */
 515                 const char   *section,  /* I - Limit search to this section */
 516                 const char   *filename) /* I - Limit search to this file */
 517 {
 518   help_index_t  *search;                /* Search index */
 519   help_node_t   *node;                  /* Current node */
 520   help_word_t   *word;                  /* Current word */
 521   void          *sc;                    /* Search context */
 522   int           matches;                /* Number of matches */
 523
 524
 525   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 526                 hi, query, filename));
 527
 528  /*
 529   * Range check...
 530   */
 531
 532   if (!hi || !query)
 533     return (NULL);
 534
 535  /*
 536   * Reset the scores of all nodes to 0...
 537   */
 538
 539   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 540        node;
 541        node = (help_node_t *)cupsArrayNext(hi->nodes))
 542     node->score = 0;
 543
 544  /*
 545   * Find the first node to search in...
 546   */
 547
 548   if (filename)
 549   {
 550     node = helpFindNode(hi, filename, NULL);
 551     if (!node)
 552       return (NULL);
 553   }
 554   else
 555     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 556
 557  /*
 558   * Convert the query into a regular expression...
 559   */
 560
 561   sc = cgiCompileSearch(query);
 562   if (!sc)
 563     return (NULL);
 564
 565  /*
 566   * Allocate a search index...
 567   */
 568
 569   search = calloc(1, sizeof(help_index_t));
 570   if (!search)
 571   {
 572     cgiFreeSearch(sc);
 573     return (NULL);
 574   }
 575
 576   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 577   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 578
 579   if (!search->nodes || !search->sorted)
 580   {
 581     cupsArrayDelete(search->nodes);
 582     cupsArrayDelete(search->sorted);
 583     free(search);
 584     cgiFreeSearch(sc);
 585     return (NULL);
 586   }
 587
 588   search->search = 1;
 589
 590  /*
 591   * Check each node in the index, adding matching nodes to the
 592   * search index...
 593   */
 594
 595   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 596     if (section && strcmp(node->section, section))
 597       continue;
 598     else if (filename && strcmp(node->filename, filename))
 599       continue;
 600     else
 601     {
 602       matches = cgiDoSearch(sc, node->text);
 603
 604       for (word = (help_word_t *)cupsArrayFirst(node->words);
 605            word;
 606            word = (help_word_t *)cupsArrayNext(node->words))
 607         if (cgiDoSearch(sc, word->text) > 0)
 608           matches += word->count;
 609
 610       if (matches > 0)
 611       {
 612        /*
 613         * Found a match, add the node to the search index...
 614         */
 615
 616         node->score = matches;
 617
 618         cupsArrayAdd(search->nodes, node);
 619         cupsArrayAdd(search->sorted, node);
 620       }
 621     }
 622
 623  /*
 624   * Free the search context...
 625   */
 626
 627   cgiFreeSearch(sc);
 628
 629  /*
 630   * Return the results...
 631   */
 632
 633   return (search);
 634 }
 635
 636
 637 /*
 638  * 'help_add_word()' - Add a word to a node.
 639  */
 640
 641 static help_word_t *                    /* O - New word */
 642 help_add_word(help_node_t *n,           /* I - Node */
 643               const char  *text)        /* I - Word text */
 644 {
 645   help_word_t   *w,                     /* New word */
 646                 key;                    /* Search key */
 647
 648
 649   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 650
 651  /*
 652   * Create the words array as needed...
 653   */
 654
 655   if (!n->words)
 656     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 657
 658  /*
 659   * See if the word is already added...
 660   */
 661
 662   key.text = (char *)text;
 663
 664   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 665   {
 666    /*
 667     * Create a new word...
 668     */
 669
 670     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 671       return (NULL);
 672
 673     if ((w->text = strdup(text)) == NULL)
 674     {
 675       free(w);
 676       return (NULL);
 677     }
 678
 679     cupsArrayAdd(n->words, w);
 680   }
 681
 682  /*
 683   * Bump the counter for this word and return it...
 684   */
 685
 686   w->count ++;
 687
 688   return (w);
 689 }
 690
 691
 692 /*
 693  * 'help_delete_node()' - Free all memory used by a node.
 694  */
 695
 696 static void
 697 help_delete_node(help_node_t *n)        /* I - Node */
 698 {
 699   help_word_t   *w;                     /* Current word */
 700
 701
 702   DEBUG_printf(("2help_delete_node(n=%p)", n));
 703
 704   if (!n)
 705     return;
 706
 707   if (n->filename)
 708     free(n->filename);
 709
 710   if (n->anchor)
 711     free(n->anchor);
 712
 713   if (n->section)
 714     free(n->section);
 715
 716   if (n->text)
 717     free(n->text);
 718
 719   for (w = (help_word_t *)cupsArrayFirst(n->words);
 720        w;
 721        w = (help_word_t *)cupsArrayNext(n->words))
 722     help_delete_word(w);
 723
 724   cupsArrayDelete(n->words);
 725
 726   free(n);
 727 }
 728
 729
 730 /*
 731  * 'help_delete_word()' - Free all memory used by a word.
 732  */
 733
 734 static void
 735 help_delete_word(help_word_t *w)        /* I - Word */
 736 {
 737   DEBUG_printf(("2help_delete_word(w=%p)", w));
 738
 739   if (!w)
 740     return;
 741
 742   if (w->text)
 743     free(w->text);
 744
 745   free(w);
 746 }
 747
 748
 749 /*
 750  * 'help_load_directory()' - Load a directory of files into an index.
 751  */
 752
 753 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 754 help_load_directory(
 755     help_index_t *hi,                   /* I - Index */
 756     const char   *directory,            /* I - Directory */
 757     const char   *relative)             /* I - Relative path */
 758 {
 759   cups_dir_t    *dir;                   /* Directory file */
 760   cups_dentry_t *dent;                  /* Directory entry */
 761   char          *ext,                   /* Pointer to extension */
 762                 filename[1024],         /* Full filename */
 763                 relname[1024];          /* Relative filename */
 764   int           update;                 /* Updated? */
 765   help_node_t   *node;                  /* Current node */
 766
 767
 768   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 769                 hi, directory, relative));
 770
 771  /*
 772   * Open the directory and scan it...
 773   */
 774
 775   if ((dir = cupsDirOpen(directory)) == NULL)
 776     return (0);
 777
 778   update = 0;
 779
 780   while ((dent = cupsDirRead(dir)) != NULL)
 781   {
 782    /*
 783     * Skip "." files...
 784     */
 785
 786     if (dent->filename[0] == '.')
 787       continue;
 788
 789    /*
 790     * Get absolute and relative filenames...
 791     */
 792
 793     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 794     if (relative)
 795       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 796     else
 797       strlcpy(relname, dent->filename, sizeof(relname));
 798
 799    /*
 800     * Check if we have a HTML file...
 801     */
 802
 803     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 804         (!ext[5] || !strcmp(ext + 5, ".gz")))
 805     {
 806      /*
 807       * HTML file, see if we have already indexed the file...
 808       */
 809
 810       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 811       {
 812        /*
 813         * File already indexed - check dates to confirm that the
 814         * index is up-to-date...
 815         */
 816
 817         if (node->mtime == dent->fileinfo.st_mtime)
 818         {
 819          /*
 820           * Same modification time, so mark all of the nodes
 821           * for this file as up-to-date...
 822           */
 823
 824           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 825             if (!strcmp(node->filename, relname))
 826               node->score = 0;
 827             else
 828               break;
 829
 830           continue;
 831         }
 832       }
 833
 834       update = 1;
 835
 836       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 837     }
 838     else if (S_ISDIR(dent->fileinfo.st_mode))
 839     {
 840      /*
 841       * Process sub-directory...
 842       */
 843
 844       if (help_load_directory(hi, filename, relname) == 1)
 845         update = 1;
 846     }
 847   }
 848
 849   cupsDirClose(dir);
 850
 851   return (update);
 852 }
 853
 854
 855 /*
 856  * 'help_load_file()' - Load a HTML files into an index.
 857  */
 858
 859 static int                              /* O - 0 = success, -1 = error */
 860 help_load_file(
 861     help_index_t *hi,                   /* I - Index */
 862     const char   *filename,             /* I - Filename */
 863     const char   *relative,             /* I - Relative path */
 864     time_t       mtime)                 /* I - Modification time */
 865 {
 866   cups_file_t   *fp;                    /* HTML file */
 867   help_node_t   *node;                  /* Current node */
 868   char          line[1024],             /* Line from file */
 869                 temp[1024],             /* Temporary word */
 870                 section[1024],          /* Section */
 871                 *ptr,                   /* Pointer into line */
 872                 *anchor,                /* Anchor name */
 873                 *text;                  /* Text for anchor */
 874   off_t         offset;                 /* File offset */
 875   char          quote;                  /* Quote character */
 876   help_word_t   *word;                  /* Current word */
 877   int           wordlen;                /* Length of word */
 878
 879
 880   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 881                 "mtime=%ld)", hi, filename, relative, (long)mtime));
 882
 883   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 884     return (-1);
 885
 886   node   = NULL;
 887   offset = 0;
 888
 889   strlcpy(section, "Other", sizeof(section));
 890
 891   while (cupsFileGets(fp, line, sizeof(line)))
 892   {
 893    /*
 894     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 895     */
 896
 897     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
 898     {
 899      /*
 900       * Got section line, copy it!
 901       */
 902
 903       for (ptr += 13; isspace(*ptr & 255); ptr ++);
 904
 905       strlcpy(section, ptr, sizeof(section));
 906       if ((ptr = strstr(section, "-->")) != NULL)
 907       {
 908        /*
 909         * Strip comment stuff from end of line...
 910         */
 911
 912         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 913
 914         if (isspace(*ptr & 255))
 915           *ptr = '\0';
 916       }
 917       continue;
 918     }
 919
 920     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 921     {
 922       ptr ++;
 923
 924       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 925       {
 926        /*
 927         * Found the title...
 928         */
 929
 930         anchor = NULL;
 931         ptr += 6;
 932       }
 933       else
 934       {
 935         char *idptr;                    /* Pointer to ID */
 936
 937         if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 938           ptr += 7;
 939         else if ((idptr = strstr(ptr, " ID=")) != NULL)
 940           ptr = idptr + 4;
 941         else if ((idptr = strstr(ptr, " id=")) != NULL)
 942           ptr = idptr + 4;
 943         else
 944           continue;
 945
 946        /*
 947         * Found an anchor...
 948         */
 949
 950         if (*ptr == '\"' || *ptr == '\'')
 951         {
 952          /*
 953           * Get quoted anchor...
 954           */
 955
 956           quote  = *ptr;
 957           anchor = ptr + 1;
 958           if ((ptr = strchr(anchor, quote)) != NULL)
 959             *ptr++ = '\0';
 960           else
 961             break;
 962         }
 963         else
 964         {
 965          /*
 966           * Get unquoted anchor...
 967           */
 968
 969           anchor = ptr + 1;
 970
 971           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 972
 973           if (*ptr != '>')
 974             *ptr++ = '\0';
 975           else
 976             break;
 977         }
 978
 979        /*
 980         * Got the anchor, now lets find the end...
 981         */
 982
 983         while (*ptr && *ptr != '>')
 984           ptr ++;
 985
 986         if (*ptr != '>')
 987           break;
 988
 989         *ptr++ = '\0';
 990       }
 991
 992      /*
 993       * Now collect text for the link...
 994       */
 995
 996       text = ptr;
 997       while ((ptr = strchr(text, '<')) == NULL)
 998       {
 999         ptr = text + strlen(text);
1000         if (ptr >= (line + sizeof(line) - 2))
1001           break;
1002
1003         *ptr++ = ' ';
1004
1005         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1006           break;
1007       }
1008
1009       *ptr = '\0';
1010
1011       if (node)
1012         node->length = (size_t)(offset - node->offset);
1013
1014       if (!*text)
1015       {
1016         node = NULL;
1017         break;
1018       }
1019
1020       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1021       {
1022        /*
1023         * Node already in the index, so replace the text and other
1024         * data...
1025         */
1026
1027         cupsArrayRemove(hi->nodes, node);
1028
1029         if (node->section)
1030           free(node->section);
1031
1032         if (node->text)
1033           free(node->text);
1034
1035         if (node->words)
1036         {
1037           for (word = (help_word_t *)cupsArrayFirst(node->words);
1038                word;
1039                word = (help_word_t *)cupsArrayNext(node->words))
1040             help_delete_word(word);
1041
1042           cupsArrayDelete(node->words);
1043           node->words = NULL;
1044         }
1045
1046         node->section = section[0] ? strdup(section) : NULL;
1047         node->text    = strdup(text);
1048         node->mtime   = mtime;
1049         node->offset  = offset;
1050         node->score   = 0;
1051       }
1052       else
1053       {
1054        /*
1055         * New node...
1056         */
1057
1058         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1059       }
1060
1061      /*
1062       * Go through the text value and replace tabs and newlines with
1063       * whitespace and eliminate extra whitespace...
1064       */
1065
1066       for (ptr = node->text, text = node->text; *ptr;)
1067         if (isspace(*ptr & 255))
1068         {
1069           while (isspace(*ptr & 255))
1070             ptr ++;
1071
1072           *text++ = ' ';
1073         }
1074         else if (text != ptr)
1075           *text++ = *ptr++;
1076         else
1077         {
1078           text ++;
1079           ptr ++;
1080         }
1081
1082       *text = '\0';
1083
1084      /*
1085       * (Re)add the node to the array...
1086       */
1087
1088       cupsArrayAdd(hi->nodes, node);
1089
1090       if (!anchor)
1091         node = NULL;
1092       break;
1093     }
1094
1095     if (node)
1096     {
1097      /*
1098       * Scan this line for words...
1099       */
1100
1101       for (ptr = line; *ptr; ptr ++)
1102       {
1103        /*
1104         * Skip HTML stuff...
1105         */
1106
1107         if (*ptr == '<')
1108         {
1109           if (!strncmp(ptr, "<!--", 4))
1110           {
1111            /*
1112             * Skip HTML comment...
1113             */
1114
1115             if ((text = strstr(ptr + 4, "-->")) == NULL)
1116               ptr += strlen(ptr) - 1;
1117             else
1118               ptr = text + 2;
1119           }
1120           else
1121           {
1122            /*
1123             * Skip HTML element...
1124             */
1125
1126             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1127             {
1128               if (*ptr == '\"' || *ptr == '\'')
1129               {
1130                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1131
1132                 if (!*ptr)
1133                   ptr --;
1134               }
1135             }
1136
1137             if (!*ptr)
1138               ptr --;
1139           }
1140
1141           continue;
1142         }
1143         else if (*ptr == '&')
1144         {
1145          /*
1146           * Skip HTML entity...
1147           */
1148
1149           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1150
1151           if (!*ptr)
1152             ptr --;
1153
1154           continue;
1155         }
1156         else if (!isalnum(*ptr & 255))
1157           continue;
1158
1159        /*
1160         * Found the start of a word, search until we find the end...
1161         */
1162
1163         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1164
1165         wordlen = (int)(ptr - text);
1166
1167         memcpy(temp, text, (size_t)wordlen);
1168         temp[wordlen] = '\0';
1169
1170         ptr --;
1171
1172         if (wordlen > 1 && !bsearch(temp, help_common_words,
1173                                     (sizeof(help_common_words) /
1174                                      sizeof(help_common_words[0])),
1175                                     sizeof(help_common_words[0]),
1176                                     (int (*)(const void *, const void *))
1177                                         _cups_strcasecmp))
1178           help_add_word(node, temp);
1179       }
1180     }
1181
1182    /*
1183     * Get the offset of the next line...
1184     */
1185
1186     offset = cupsFileTell(fp);
1187   }
1188
1189   cupsFileClose(fp);
1190
1191   if (node)
1192     node->length = (size_t)(offset - node->offset);
1193
1194   return (0);
1195 }
1196
1197
1198 /*
1199  * 'help_new_node()' - Create a new node and add it to an index.
1200  */
1201
1202 static help_node_t *                    /* O - Node pointer or NULL on error */
1203 help_new_node(const char   *filename,   /* I - Filename */
1204               const char   *anchor,     /* I - Anchor */
1205               const char   *section,    /* I - Section */
1206               const char   *text,       /* I - Text */
1207               time_t       mtime,       /* I - Modification time */
1208               off_t        offset,      /* I - Offset in file */
1209               size_t       length)      /* I - Length in bytes */
1210 {
1211   help_node_t   *n;                     /* Node */
1212
1213
1214   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1215                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1216                 (long)mtime, (long)offset, (long)length));
1217
1218   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1219   if (!n)
1220     return (NULL);
1221
1222   n->filename = strdup(filename);
1223   n->anchor   = anchor ? strdup(anchor) : NULL;
1224   n->section  = *section ? strdup(section) : NULL;
1225   n->text     = strdup(text);
1226   n->mtime    = mtime;
1227   n->offset   = offset;
1228   n->length   = length;
1229
1230   return (n);
1231 }
1232
1233
1234 /*
1235  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1236  */
1237
1238 static int                              /* O - Difference */
1239 help_sort_by_name(help_node_t *n1,      /* I - First node */
1240                   help_node_t *n2)      /* I - Second node */
1241 {
1242   int           diff;                   /* Difference */
1243
1244
1245   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1246                 n1, n1->filename, n1->anchor,
1247                 n2, n2->filename, n2->anchor));
1248
1249   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1250     return (diff);
1251
1252   if (!n1->anchor && !n2->anchor)
1253     return (0);
1254   else if (!n1->anchor)
1255     return (-1);
1256   else if (!n2->anchor)
1257     return (1);
1258   else
1259     return (strcmp(n1->anchor, n2->anchor));
1260 }
1261
1262
1263 /*
1264  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1265  */
1266
1267 static int                              /* O - Difference */
1268 help_sort_by_score(help_node_t *n1,     /* I - First node */
1269                    help_node_t *n2)     /* I - Second node */
1270 {
1271   int           diff;                   /* Difference */
1272
1273
1274   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1275                 "n2=%p(%d \"%s\" \"%s\")",
1276                 n1, n1->score, n1->section, n1->text,
1277                 n2, n2->score, n2->section, n2->text));
1278
1279   if (n1->score != n2->score)
1280     return (n2->score - n1->score);
1281
1282   if (n1->section && !n2->section)
1283     return (1);
1284   else if (!n1->section && n2->section)
1285     return (-1);
1286   else if (n1->section && n2->section &&
1287            (diff = strcmp(n1->section, n2->section)) != 0)
1288     return (diff);
1289
1290   return (_cups_strcasecmp(n1->text, n2->text));
1291 }
1292
1293
1294 /*
1295  * 'help_sort_words()' - Sort words alphabetically.
1296  */
1297
1298 static int                              /* O - Difference */
1299 help_sort_words(help_word_t *w1,        /* I - Second word */
1300                 help_word_t *w2)        /* I - Second word */
1301 {
1302   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1303                 w1, w1->text, w2, w2->text));
1304
1305   return (_cups_strcasecmp(w1->text, w2->text));
1306 }