cgi-bin/help-index.c

   1 /*
   2  * Online help index routines for CUPS.
   3  *
   4  * Copyright © 2020-2024 by OpenPrinting.
   5  * Copyright © 2007-2019 by Apple Inc.
   6  * Copyright © 1997-2007 by Easy Software Products.
   7  *
   8  * Licensed under Apache License v2.0.  See the file "LICENSE" for more
   9  * information.
  10  */
  11
  12 /*
  13  * Include necessary headers...
  14  */
  15
  16 #include "cgi-private.h"
  17 #include <cups/dir.h>
  18
  19
  20 /*
  21  * List of common English words that should not be indexed...
  22  */
  23
  24 static char             help_common_words[][6] =
  25                         {
  26                           "about",
  27                           "all",
  28                           "an",
  29                           "and",
  30                           "are",
  31                           "as",
  32                           "at",
  33                           "be",
  34                           "been",
  35                           "but",
  36                           "by",
  37                           "call",
  38                           "can",
  39                           "come",
  40                           "could",
  41                           "day",
  42                           "did",
  43                           "do",
  44                           "down",
  45                           "each",
  46                           "find",
  47                           "first",
  48                           "for",
  49                           "from",
  50                           "go",
  51                           "had",
  52                           "has",
  53                           "have",
  54                           "he",
  55                           "her",
  56                           "him",
  57                           "his",
  58                           "hot",
  59                           "how",
  60                           "if",
  61                           "in",
  62                           "is",
  63                           "it",
  64                           "know",
  65                           "like",
  66                           "long",
  67                           "look",
  68                           "make",
  69                           "many",
  70                           "may",
  71                           "more",
  72                           "most",
  73                           "my",
  74                           "no",
  75                           "now",
  76                           "of",
  77                           "on",
  78                           "one",
  79                           "or",
  80                           "other",
  81                           "out",
  82                           "over",
  83                           "said",
  84                           "see",
  85                           "she",
  86                           "side",
  87                           "so",
  88                           "some",
  89                           "sound",
  90                           "than",
  91                           "that",
  92                           "the",
  93                           "their",
  94                           "them",
  95                           "then",
  96                           "there",
  97                           "these",
  98                           "they",
  99                           "thing",
 100                           "this",
 101                           "time",
 102                           "to",
 103                           "two",
 104                           "up",
 105                           "use",
 106                           "was",
 107                           "water",
 108                           "way",
 109                           "we",
 110                           "were",
 111                           "what",
 112                           "when",
 113                           "which",
 114                           "who",
 115                           "will",
 116                           "with",
 117                           "word",
 118                           "would",
 119                           "write",
 120                           "you",
 121                           "your"
 122                         };
 123
 124
 125 /*
 126  * Local functions...
 127  */
 128
 129 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 130 static void             help_delete_node(help_node_t *n);
 131 static void             help_delete_word(help_word_t *w);
 132 static int              help_load_directory(help_index_t *hi,
 133                                             const char *directory,
 134                                             const char *relative);
 135 static int              help_load_file(help_index_t *hi,
 136                                        const char *filename,
 137                                        const char *relative,
 138                                        time_t     mtime);
 139 static help_node_t      *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
 140 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2, void *data);
 141 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2, void *data);
 142 static int              help_sort_words(help_word_t *w1, help_word_t *w2, void *data);
 143
 144
 145 /*
 146  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 147  */
 148
 149 void
 150 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 151 {
 152   help_node_t   *node;                  /* Current node */
 153
 154
 155   if (!hi)
 156     return;
 157
 158   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 159        node;
 160        node = (help_node_t *)cupsArrayNext(hi->nodes))
 161   {
 162     if (!hi->search)
 163       help_delete_node(node);
 164   }
 165
 166   cupsArrayDelete(hi->nodes);
 167   cupsArrayDelete(hi->sorted);
 168
 169   free(hi);
 170 }
 171
 172
 173 /*
 174  * 'helpFindNode()' - Find a node in an index.
 175  */
 176
 177 help_node_t *                           /* O - Node pointer or NULL */
 178 helpFindNode(help_index_t *hi,          /* I - Index */
 179              const char   *filename,    /* I - Filename */
 180              const char   *anchor)      /* I - Anchor */
 181 {
 182   help_node_t   key;                    /* Search key */
 183
 184
 185  /*
 186   * Range check input...
 187   */
 188
 189   if (!hi || !filename)
 190     return (NULL);
 191
 192  /*
 193   * Initialize the search key...
 194   */
 195
 196   key.filename = (char *)filename;
 197   key.anchor   = (char *)anchor;
 198
 199  /*
 200   * Return any match...
 201   */
 202
 203   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 204 }
 205
 206
 207 /*
 208  * 'helpLoadIndex()' - Load a help index from disk.
 209  */
 210
 211 help_index_t *                          /* O - Index pointer or NULL */
 212 helpLoadIndex(const char *hifile,       /* I - Index filename */
 213               const char *directory)    /* I - Directory that is indexed */
 214 {
 215   help_index_t  *hi;                    /* Help index */
 216   cups_file_t   *fp;                    /* Current file */
 217   char          line[2048],             /* Line from file */
 218                 *ptr,                   /* Pointer into line */
 219                 *filename,              /* Filename in line */
 220                 *anchor,                /* Anchor in line */
 221                 *sectptr,               /* Section pointer in line */
 222                 section[1024],          /* Section name */
 223                 *text;                  /* Text in line */
 224   time_t        mtime;                  /* Modification time */
 225   off_t         offset;                 /* Offset into file */
 226   size_t        length;                 /* Length in bytes */
 227   int           update;                 /* Update? */
 228   help_node_t   *node;                  /* Current node */
 229   help_word_t   *word;                  /* Current word */
 230
 231
 232  /*
 233   * Create a new, empty index.
 234   */
 235
 236   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 237     return (NULL);
 238
 239   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 240   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 241
 242   if (!hi->nodes || !hi->sorted)
 243   {
 244     cupsArrayDelete(hi->nodes);
 245     cupsArrayDelete(hi->sorted);
 246     free(hi);
 247     return (NULL);
 248   }
 249
 250  /*
 251   * Try loading the existing index file...
 252   */
 253
 254   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 255   {
 256    /*
 257     * Lock the file and then read the first line...
 258     */
 259
 260     cupsFileLock(fp, 1);
 261
 262     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 263     {
 264      /*
 265       * Got a valid header line, now read the data lines...
 266       */
 267
 268       node = NULL;
 269
 270       while (cupsFileGets(fp, line, sizeof(line)))
 271       {
 272        /*
 273         * Each line looks like one of the following:
 274         *
 275         *     filename mtime offset length "section" "text"
 276         *     filename#anchor offset length "text"
 277         *     SP count word
 278         */
 279
 280         if (line[0] == ' ')
 281         {
 282          /*
 283           * Read a word in the current node...
 284           */
 285
 286           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 287             continue;
 288
 289           if ((word = help_add_word(node, ptr + 1)) != NULL)
 290             word->count = atoi(line + 1);
 291         }
 292         else
 293         {
 294          /*
 295           * Add a node...
 296           */
 297
 298           filename = line;
 299
 300           if ((ptr = strchr(line, ' ')) == NULL)
 301             break;
 302
 303           while (isspace(*ptr & 255))
 304             *ptr++ = '\0';
 305
 306           if ((anchor = strrchr(filename, '#')) != NULL)
 307           {
 308             *anchor++ = '\0';
 309             mtime = 0;
 310           }
 311           else
 312             mtime = strtol(ptr, &ptr, 10);
 313
 314           offset = strtoll(ptr, &ptr, 10);
 315           length = (size_t)strtoll(ptr, &ptr, 10);
 316
 317           while (isspace(*ptr & 255))
 318             ptr ++;
 319
 320           if (!anchor)
 321           {
 322            /*
 323             * Get section...
 324             */
 325
 326             if (*ptr != '\"')
 327               break;
 328
 329             ptr ++;
 330             sectptr = ptr;
 331
 332             while (*ptr && *ptr != '\"')
 333               ptr ++;
 334
 335             if (*ptr != '\"')
 336               break;
 337
 338             *ptr++ = '\0';
 339
 340             cupsCopyString(section, sectptr, sizeof(section));
 341
 342             while (isspace(*ptr & 255))
 343               ptr ++;
 344           }
 345           else
 346             section[0] = '\0';
 347
 348           if (*ptr != '\"')
 349             break;
 350
 351           ptr ++;
 352           text = ptr;
 353
 354           while (*ptr && *ptr != '\"')
 355             ptr ++;
 356
 357           if (*ptr != '\"')
 358             break;
 359
 360           *ptr++ = '\0';
 361
 362           if ((node = help_new_node(filename, anchor, section, text,
 363                                     mtime, offset, length)) == NULL)
 364             break;
 365
 366           node->score = -1;
 367
 368           cupsArrayAdd(hi->nodes, node);
 369         }
 370       }
 371     }
 372
 373     cupsFileClose(fp);
 374   }
 375
 376  /*
 377   * Scan for new/updated files...
 378   */
 379
 380   update = help_load_directory(hi, directory, NULL);
 381
 382  /*
 383   * Remove any files that are no longer installed...
 384   */
 385
 386   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 387        node;
 388        node = (help_node_t *)cupsArrayNext(hi->nodes))
 389     if (node->score < 0)
 390     {
 391      /*
 392       * Delete this node...
 393       */
 394
 395       cupsArrayRemove(hi->nodes, node);
 396       help_delete_node(node);
 397     }
 398
 399  /*
 400   * Add nodes to the sorted array...
 401   */
 402
 403   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 404        node;
 405        node = (help_node_t *)cupsArrayNext(hi->nodes))
 406     cupsArrayAdd(hi->sorted, node);
 407
 408  /*
 409   * Save the index if we updated it...
 410   */
 411
 412   if (update)
 413     helpSaveIndex(hi, hifile);
 414
 415  /*
 416   * Return the index...
 417   */
 418
 419   return (hi);
 420 }
 421
 422
 423 /*
 424  * 'helpSaveIndex()' - Save a help index to disk.
 425  */
 426
 427 int                                     /* O - 0 on success, -1 on error */
 428 helpSaveIndex(help_index_t *hi,         /* I - Index */
 429               const char   *hifile)     /* I - Index filename */
 430 {
 431   cups_file_t   *fp;                    /* Index file */
 432   help_node_t   *node;                  /* Current node */
 433   help_word_t   *word;                  /* Current word */
 434
 435
 436  /*
 437   * Try creating a new index file...
 438   */
 439
 440   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 441     return (-1);
 442
 443  /*
 444   * Lock the file while we write it...
 445   */
 446
 447   cupsFileLock(fp, 1);
 448
 449   cupsFilePuts(fp, "HELPV2\n");
 450
 451   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 452        node;
 453        node = (help_node_t *)cupsArrayNext(hi->nodes))
 454   {
 455    /*
 456     * Write the current node with/without the anchor...
 457     */
 458
 459     if (node->anchor)
 460     {
 461       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 462                          node->filename, node->anchor,
 463                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 464                          node->text) < 0)
 465         break;
 466     }
 467     else
 468     {
 469       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 470                          node->filename, (int)node->mtime,
 471                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 472                          node->section ? node->section : "", node->text) < 0)
 473         break;
 474     }
 475
 476    /*
 477     * Then write the words associated with the node...
 478     */
 479
 480     for (word = (help_word_t *)cupsArrayFirst(node->words);
 481          word;
 482          word = (help_word_t *)cupsArrayNext(node->words))
 483       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 484         break;
 485   }
 486
 487   cupsFileFlush(fp);
 488
 489   if (cupsFileClose(fp) < 0)
 490     return (-1);
 491   else if (node)
 492     return (-1);
 493   else
 494     return (0);
 495 }
 496
 497
 498 /*
 499  * 'helpSearchIndex()' - Search an index.
 500  */
 501
 502 help_index_t *                          /* O - Search index */
 503 helpSearchIndex(help_index_t *hi,       /* I - Index */
 504                 const char   *query,    /* I - Query string */
 505                 const char   *section,  /* I - Limit search to this section */
 506                 const char   *filename) /* I - Limit search to this file */
 507 {
 508   help_index_t  *search;                /* Search index */
 509   help_node_t   *node;                  /* Current node */
 510   help_word_t   *word;                  /* Current word */
 511   void          *sc;                    /* Search context */
 512   int           matches;                /* Number of matches */
 513
 514
 515  /*
 516   * Range check...
 517   */
 518
 519   if (!hi || !query)
 520     return (NULL);
 521
 522  /*
 523   * Reset the scores of all nodes to 0...
 524   */
 525
 526   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 527        node;
 528        node = (help_node_t *)cupsArrayNext(hi->nodes))
 529     node->score = 0;
 530
 531  /*
 532   * Find the first node to search in...
 533   */
 534
 535   if (filename)
 536   {
 537     node = helpFindNode(hi, filename, NULL);
 538     if (!node)
 539       return (NULL);
 540   }
 541   else
 542     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 543
 544  /*
 545   * Convert the query into a regular expression...
 546   */
 547
 548   sc = cgiCompileSearch(query);
 549   if (!sc)
 550     return (NULL);
 551
 552  /*
 553   * Allocate a search index...
 554   */
 555
 556   search = calloc(1, sizeof(help_index_t));
 557   if (!search)
 558   {
 559     cgiFreeSearch(sc);
 560     return (NULL);
 561   }
 562
 563   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 564   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 565
 566   if (!search->nodes || !search->sorted)
 567   {
 568     cupsArrayDelete(search->nodes);
 569     cupsArrayDelete(search->sorted);
 570     free(search);
 571     cgiFreeSearch(sc);
 572     return (NULL);
 573   }
 574
 575   search->search = 1;
 576
 577  /*
 578   * Check each node in the index, adding matching nodes to the
 579   * search index...
 580   */
 581
 582   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 583     if (node->section && section && strcmp(node->section, section))
 584       continue;
 585     else if (filename && strcmp(node->filename, filename))
 586       continue;
 587     else
 588     {
 589       matches = cgiDoSearch(sc, node->text);
 590
 591       for (word = (help_word_t *)cupsArrayFirst(node->words);
 592            word;
 593            word = (help_word_t *)cupsArrayNext(node->words))
 594         if (cgiDoSearch(sc, word->text) > 0)
 595           matches += word->count;
 596
 597       if (matches > 0)
 598       {
 599        /*
 600         * Found a match, add the node to the search index...
 601         */
 602
 603         node->score = matches;
 604
 605         cupsArrayAdd(search->nodes, node);
 606         cupsArrayAdd(search->sorted, node);
 607       }
 608     }
 609
 610  /*
 611   * Free the search context...
 612   */
 613
 614   cgiFreeSearch(sc);
 615
 616  /*
 617   * Return the results...
 618   */
 619
 620   return (search);
 621 }
 622
 623
 624 /*
 625  * 'help_add_word()' - Add a word to a node.
 626  */
 627
 628 static help_word_t *                    /* O - New word */
 629 help_add_word(help_node_t *n,           /* I - Node */
 630               const char  *text)        /* I - Word text */
 631 {
 632   help_word_t   *w,                     /* New word */
 633                 key;                    /* Search key */
 634
 635
 636  /*
 637   * Create the words array as needed...
 638   */
 639
 640   if (!n->words)
 641     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 642
 643  /*
 644   * See if the word is already added...
 645   */
 646
 647   key.text = (char *)text;
 648
 649   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 650   {
 651    /*
 652     * Create a new word...
 653     */
 654
 655     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 656       return (NULL);
 657
 658     if ((w->text = strdup(text)) == NULL)
 659     {
 660       free(w);
 661       return (NULL);
 662     }
 663
 664     cupsArrayAdd(n->words, w);
 665   }
 666
 667  /*
 668   * Bump the counter for this word and return it...
 669   */
 670
 671   w->count ++;
 672
 673   return (w);
 674 }
 675
 676
 677 /*
 678  * 'help_delete_node()' - Free all memory used by a node.
 679  */
 680
 681 static void
 682 help_delete_node(help_node_t *n)        /* I - Node */
 683 {
 684   help_word_t   *w;                     /* Current word */
 685
 686
 687   if (!n)
 688     return;
 689
 690   if (n->filename)
 691     free(n->filename);
 692
 693   if (n->anchor)
 694     free(n->anchor);
 695
 696   if (n->section)
 697     free(n->section);
 698
 699   if (n->text)
 700     free(n->text);
 701
 702   for (w = (help_word_t *)cupsArrayFirst(n->words);
 703        w;
 704        w = (help_word_t *)cupsArrayNext(n->words))
 705     help_delete_word(w);
 706
 707   cupsArrayDelete(n->words);
 708
 709   free(n);
 710 }
 711
 712
 713 /*
 714  * 'help_delete_word()' - Free all memory used by a word.
 715  */
 716
 717 static void
 718 help_delete_word(help_word_t *w)        /* I - Word */
 719 {
 720   if (!w)
 721     return;
 722
 723   if (w->text)
 724     free(w->text);
 725
 726   free(w);
 727 }
 728
 729
 730 /*
 731  * 'help_load_directory()' - Load a directory of files into an index.
 732  */
 733
 734 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 735 help_load_directory(
 736     help_index_t *hi,                   /* I - Index */
 737     const char   *directory,            /* I - Directory */
 738     const char   *relative)             /* I - Relative path */
 739 {
 740   cups_dir_t    *dir;                   /* Directory file */
 741   cups_dentry_t *dent;                  /* Directory entry */
 742   char          *ext,                   /* Pointer to extension */
 743                 filename[1024],         /* Full filename */
 744                 relname[1024];          /* Relative filename */
 745   int           update;                 /* Updated? */
 746   help_node_t   *node;                  /* Current node */
 747
 748
 749  /*
 750   * Open the directory and scan it...
 751   */
 752
 753   if ((dir = cupsDirOpen(directory)) == NULL)
 754     return (0);
 755
 756   update = 0;
 757
 758   while ((dent = cupsDirRead(dir)) != NULL)
 759   {
 760    /*
 761     * Skip "." files...
 762     */
 763
 764     if (dent->filename[0] == '.')
 765       continue;
 766
 767    /*
 768     * Get absolute and relative filenames...
 769     */
 770
 771     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 772     if (relative)
 773       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 774     else
 775       cupsCopyString(relname, dent->filename, sizeof(relname));
 776
 777    /*
 778     * Check if we have a HTML file...
 779     */
 780
 781     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 782         (!ext[5] || !strcmp(ext + 5, ".gz")))
 783     {
 784      /*
 785       * HTML file, see if we have already indexed the file...
 786       */
 787
 788       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 789       {
 790        /*
 791         * File already indexed - check dates to confirm that the
 792         * index is up-to-date...
 793         */
 794
 795         if (node->mtime == dent->fileinfo.st_mtime)
 796         {
 797          /*
 798           * Same modification time, so mark all of the nodes
 799           * for this file as up-to-date...
 800           */
 801
 802           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 803             if (!strcmp(node->filename, relname))
 804               node->score = 0;
 805             else
 806               break;
 807
 808           continue;
 809         }
 810       }
 811
 812       update = 1;
 813
 814       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 815     }
 816     else if (S_ISDIR(dent->fileinfo.st_mode))
 817     {
 818      /*
 819       * Process sub-directory...
 820       */
 821
 822       if (help_load_directory(hi, filename, relname) == 1)
 823         update = 1;
 824     }
 825   }
 826
 827   cupsDirClose(dir);
 828
 829   return (update);
 830 }
 831
 832
 833 /*
 834  * 'help_load_file()' - Load a HTML files into an index.
 835  */
 836
 837 static int                              /* O - 0 = success, -1 = error */
 838 help_load_file(
 839     help_index_t *hi,                   /* I - Index */
 840     const char   *filename,             /* I - Filename */
 841     const char   *relative,             /* I - Relative path */
 842     time_t       mtime)                 /* I - Modification time */
 843 {
 844   cups_file_t   *fp;                    /* HTML file */
 845   help_node_t   *node;                  /* Current node */
 846   char          line[1024],             /* Line from file */
 847                 temp[1024],             /* Temporary word */
 848                 section[1024],          /* Section */
 849                 *ptr,                   /* Pointer into line */
 850                 *anchor,                /* Anchor name */
 851                 *text;                  /* Text for anchor */
 852   off_t         offset;                 /* File offset */
 853   char          quote;                  /* Quote character */
 854   help_word_t   *word;                  /* Current word */
 855   size_t                wordlen;                /* Length of word */
 856
 857
 858   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 859     return (-1);
 860
 861   node   = NULL;
 862   offset = 0;
 863
 864   if (strstr(filename, "/man-") != NULL)
 865     cupsCopyString(section, "Man Pages", sizeof(section));
 866   else
 867     cupsCopyString(section, "Other", sizeof(section));
 868
 869   while (cupsFileGets(fp, line, sizeof(line)))
 870   {
 871    /*
 872     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 873     */
 874
 875     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
 876     {
 877      /*
 878       * Got section line, copy it!
 879       */
 880
 881       for (ptr += 13; isspace(*ptr & 255); ptr ++);
 882
 883       cupsCopyString(section, ptr, sizeof(section));
 884       if ((ptr = strstr(section, "-->")) != NULL)
 885       {
 886        /*
 887         * Strip comment stuff from end of line...
 888         */
 889
 890         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 891
 892         if (isspace(*ptr & 255))
 893           *ptr = '\0';
 894       }
 895       continue;
 896     }
 897
 898     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 899     {
 900       ptr ++;
 901
 902       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 903       {
 904        /*
 905         * Found the title...
 906         */
 907
 908         anchor = NULL;
 909         ptr += 6;
 910       }
 911       else
 912       {
 913         char *idptr;                    /* Pointer to ID */
 914
 915         if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 916           ptr += 7;
 917         else if ((idptr = strstr(ptr, " ID=")) != NULL)
 918           ptr = idptr + 4;
 919         else if ((idptr = strstr(ptr, " id=")) != NULL)
 920           ptr = idptr + 4;
 921         else
 922           continue;
 923
 924        /*
 925         * Found an anchor...
 926         */
 927
 928         if (*ptr == '\"' || *ptr == '\'')
 929         {
 930          /*
 931           * Get quoted anchor...
 932           */
 933
 934           quote  = *ptr;
 935           anchor = ptr + 1;
 936           if ((ptr = strchr(anchor, quote)) != NULL)
 937             *ptr++ = '\0';
 938           else
 939             break;
 940         }
 941         else
 942         {
 943          /*
 944           * Get unquoted anchor...
 945           */
 946
 947           anchor = ptr + 1;
 948
 949           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 950
 951           if (*ptr != '>')
 952             *ptr++ = '\0';
 953           else
 954             break;
 955         }
 956
 957        /*
 958         * Got the anchor, now lets find the end...
 959         */
 960
 961         while (*ptr && *ptr != '>')
 962           ptr ++;
 963
 964         if (*ptr != '>')
 965           break;
 966
 967         *ptr++ = '\0';
 968       }
 969
 970      /*
 971       * Now collect text for the link...
 972       */
 973
 974       text = ptr;
 975       while ((ptr = strchr(text, '<')) == NULL)
 976       {
 977         ptr = text + strlen(text);
 978         if (ptr >= (line + sizeof(line) - 2))
 979           break;
 980
 981         *ptr++ = ' ';
 982
 983         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
 984           break;
 985       }
 986
 987       *ptr = '\0';
 988
 989       if (node)
 990         node->length = (size_t)(offset - node->offset);
 991
 992       if (!*text)
 993       {
 994         node = NULL;
 995         break;
 996       }
 997
 998       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
 999       {
1000        /*
1001         * Node already in the index, so replace the text and other
1002         * data...
1003         */
1004
1005         cupsArrayRemove(hi->nodes, node);
1006
1007         if (node->section)
1008           free(node->section);
1009
1010         if (node->text)
1011           free(node->text);
1012
1013         if (node->words)
1014         {
1015           for (word = (help_word_t *)cupsArrayFirst(node->words);
1016                word;
1017                word = (help_word_t *)cupsArrayNext(node->words))
1018             help_delete_word(word);
1019
1020           cupsArrayDelete(node->words);
1021           node->words = NULL;
1022         }
1023
1024         node->section = section[0] ? strdup(section) : NULL;
1025         node->text    = strdup(text);
1026         node->mtime   = mtime;
1027         node->offset  = offset;
1028         node->score   = 0;
1029       }
1030       else
1031       {
1032        /*
1033         * New node...
1034         */
1035
1036         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1037       }
1038
1039      /*
1040       * Go through the text value and replace tabs and newlines with
1041       * whitespace and eliminate extra whitespace...
1042       */
1043
1044       for (ptr = node->text, text = node->text; *ptr;)
1045         if (isspace(*ptr & 255))
1046         {
1047           while (isspace(*ptr & 255))
1048             ptr ++;
1049
1050           *text++ = ' ';
1051         }
1052         else if (text != ptr)
1053           *text++ = *ptr++;
1054         else
1055         {
1056           text ++;
1057           ptr ++;
1058         }
1059
1060       *text = '\0';
1061
1062      /*
1063       * (Re)add the node to the array...
1064       */
1065
1066       cupsArrayAdd(hi->nodes, node);
1067
1068       if (!anchor)
1069         node = NULL;
1070       break;
1071     }
1072
1073     if (node)
1074     {
1075      /*
1076       * Scan this line for words...
1077       */
1078
1079       for (ptr = line; *ptr; ptr ++)
1080       {
1081        /*
1082         * Skip HTML stuff...
1083         */
1084
1085         if (*ptr == '<')
1086         {
1087           if (!strncmp(ptr, "<!--", 4))
1088           {
1089            /*
1090             * Skip HTML comment...
1091             */
1092
1093             if ((text = strstr(ptr + 4, "-->")) == NULL)
1094               ptr += strlen(ptr) - 1;
1095             else
1096               ptr = text + 2;
1097           }
1098           else
1099           {
1100            /*
1101             * Skip HTML element...
1102             */
1103
1104             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1105             {
1106               if (*ptr == '\"' || *ptr == '\'')
1107               {
1108                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1109
1110                 if (!*ptr)
1111                   ptr --;
1112               }
1113             }
1114
1115             if (!*ptr)
1116               ptr --;
1117           }
1118
1119           continue;
1120         }
1121         else if (*ptr == '&')
1122         {
1123          /*
1124           * Skip HTML entity...
1125           */
1126
1127           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1128
1129           if (!*ptr)
1130             ptr --;
1131
1132           continue;
1133         }
1134         else if (!isalnum(*ptr & 255))
1135           continue;
1136
1137        /*
1138         * Found the start of a word, search until we find the end...
1139         */
1140
1141         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1142
1143         wordlen = (size_t)(ptr - text);
1144
1145         memcpy(temp, text, wordlen);
1146         temp[wordlen] = '\0';
1147
1148         ptr --;
1149
1150         if (wordlen > 1 && !bsearch(temp, help_common_words,
1151                                     (sizeof(help_common_words) /
1152                                      sizeof(help_common_words[0])),
1153                                     sizeof(help_common_words[0]),
1154                                     (int (*)(const void *, const void *))
1155                                         _cups_strcasecmp))
1156           help_add_word(node, temp);
1157       }
1158     }
1159
1160    /*
1161     * Get the offset of the next line...
1162     */
1163
1164     offset = cupsFileTell(fp);
1165   }
1166
1167   cupsFileClose(fp);
1168
1169   if (node)
1170     node->length = (size_t)(offset - node->offset);
1171
1172   return (0);
1173 }
1174
1175
1176 /*
1177  * 'help_new_node()' - Create a new node and add it to an index.
1178  */
1179
1180 static help_node_t *                    /* O - Node pointer or NULL on error */
1181 help_new_node(const char   *filename,   /* I - Filename */
1182               const char   *anchor,     /* I - Anchor */
1183               const char   *section,    /* I - Section */
1184               const char   *text,       /* I - Text */
1185               time_t       mtime,       /* I - Modification time */
1186               off_t        offset,      /* I - Offset in file */
1187               size_t       length)      /* I - Length in bytes */
1188 {
1189   help_node_t   *n;                     /* Node */
1190
1191
1192   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1193   if (!n)
1194     return (NULL);
1195
1196   n->filename = strdup(filename);
1197   n->anchor   = anchor ? strdup(anchor) : NULL;
1198   n->section  = (section && *section) ? strdup(section) : NULL;
1199   n->text     = strdup(text);
1200   n->mtime    = mtime;
1201   n->offset   = offset;
1202   n->length   = length;
1203
1204   return (n);
1205 }
1206
1207
1208 /*
1209  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1210  */
1211
1212 static int                              /* O - Difference */
1213 help_sort_by_name(
1214     help_node_t *n1,                    /* I - First node */
1215     help_node_t *n2,                    /* I - Second node */
1216     void        *data)                  /* Unused */
1217 {
1218   int   diff;                           /* Difference */
1219
1220
1221   (void)data;
1222
1223   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1224     return (diff);
1225
1226   if (!n1->anchor && !n2->anchor)
1227     return (0);
1228   else if (!n1->anchor)
1229     return (-1);
1230   else if (!n2->anchor)
1231     return (1);
1232   else
1233     return (strcmp(n1->anchor, n2->anchor));
1234 }
1235
1236
1237 /*
1238  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1239  */
1240
1241 static int                          /* O - Difference */
1242 help_sort_by_score(help_node_t *n1, /* I - First node */
1243                    help_node_t *n2, /* I - Second node */
1244                    void *data)      /* I - Unused */
1245 {
1246   int           diff;                   /* Difference */
1247
1248
1249   (void)data;
1250
1251   if (n1->score != n2->score)
1252     return (n2->score - n1->score);
1253
1254   if (n1->section && !n2->section)
1255     return (1);
1256   else if (!n1->section && n2->section)
1257     return (-1);
1258   else if (n1->section && n2->section &&
1259            (diff = strcmp(n1->section, n2->section)) != 0)
1260     return (diff);
1261
1262   return (_cups_strcasecmp(n1->text, n2->text));
1263 }
1264
1265
1266 /*
1267  * 'help_sort_words()' - Sort words alphabetically.
1268  */
1269
1270 static int                       /* O - Difference */
1271 help_sort_words(help_word_t *w1, /* I - Second word */
1272                 help_word_t *w2, /* I - Second word */
1273                 void *data)      /* Unused */
1274 {
1275   (void)data;
1276   return (_cups_strcasecmp(w1->text, w2->text));
1277 }