cgi-bin/help-index.c

   1 /*
   2  * "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $"
   3  *
   4  *   Online help index routines for CUPS.
   5  *
   6  *   Copyright 2007-2011 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  * Contents:
  16  *
  17  *   helpDeleteIndex()          - Delete an index, freeing all memory used.
  18  *   helpFindNode()             - Find a node in an index.
  19  *   helpLoadIndex()            - Load a help index from disk.
  20  *   helpSaveIndex()            - Save a help index to disk.
  21  *   helpSearchIndex()          - Search an index.
  22  *   help_add_word()            - Add a word to a node.
  23  *   help_compile_search()      - Convert a search string into a regular expression.
  24  *   help_delete_node()         - Free all memory used by a node.
  25  *   help_delete_word()         - Free all memory used by a word.
  26  *   help_load_directory()      - Load a directory of files into an index.
  27  *   help_load_file()           - Load a HTML files into an index.
  28  *   help_new_node()            - Create a new node and add it to an index.
  29  *   help_sort_nodes_by_name()  - Sort nodes by section, filename, and anchor.
  30  *   help_sort_nodes_by_score() - Sort nodes by score and text.
  31  *   help_sort_words()          - Sort words alphabetically.
  32  */
  33
  34 /*
  35  * Include necessary headers...
  36  */
  37
  38 #include "cgi-private.h"
  39 #include <cups/dir.h>
  40
  41
  42 /*
  43  * List of common English words that should not be indexed...
  44  */
  45
  46 static char             help_common_words[][6] =
  47                         {
  48                           "about",
  49                           "all",
  50                           "an",
  51                           "and",
  52                           "are",
  53                           "as",
  54                           "at",
  55                           "be",
  56                           "been",
  57                           "but",
  58                           "by",
  59                           "call",
  60                           "can",
  61                           "come",
  62                           "could",
  63                           "day",
  64                           "did",
  65                           "do",
  66                           "down",
  67                           "each",
  68                           "find",
  69                           "first",
  70                           "for",
  71                           "from",
  72                           "go",
  73                           "had",
  74                           "has",
  75                           "have",
  76                           "he",
  77                           "her",
  78                           "him",
  79                           "his",
  80                           "hot",
  81                           "how",
  82                           "if",
  83                           "in",
  84                           "is",
  85                           "it",
  86                           "know",
  87                           "like",
  88                           "long",
  89                           "look",
  90                           "make",
  91                           "many",
  92                           "may",
  93                           "more",
  94                           "most",
  95                           "my",
  96                           "no",
  97                           "now",
  98                           "of",
  99                           "on",
 100                           "one",
 101                           "or",
 102                           "other",
 103                           "out",
 104                           "over",
 105                           "said",
 106                           "see",
 107                           "she",
 108                           "side",
 109                           "so",
 110                           "some",
 111                           "sound",
 112                           "than",
 113                           "that",
 114                           "the",
 115                           "their",
 116                           "them",
 117                           "then",
 118                           "there",
 119                           "these",
 120                           "they",
 121                           "thing",
 122                           "this",
 123                           "time",
 124                           "to",
 125                           "two",
 126                           "up",
 127                           "use",
 128                           "was",
 129                           "water",
 130                           "way",
 131                           "we",
 132                           "were",
 133                           "what",
 134                           "when",
 135                           "which",
 136                           "who",
 137                           "will",
 138                           "with",
 139                           "word",
 140                           "would",
 141                           "write",
 142                           "you",
 143                           "your"
 144                         };
 145
 146
 147 /*
 148  * Local functions...
 149  */
 150
 151 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 152 static void             help_delete_node(help_node_t *n);
 153 static void             help_delete_word(help_word_t *w);
 154 static int              help_load_directory(help_index_t *hi,
 155                                             const char *directory,
 156                                             const char *relative);
 157 static int              help_load_file(help_index_t *hi,
 158                                        const char *filename,
 159                                        const char *relative,
 160                                        time_t     mtime);
 161 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 162                                        const char *section, const char *text,
 163                                        time_t mtime, off_t offset,
 164                                        size_t length)
 165                                        __attribute__((nonnull(1,3,4)));
 166 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 167 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 168 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 169
 170
 171 /*
 172  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 173  */
 174
 175 void
 176 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 177 {
 178   help_node_t   *node;                  /* Current node */
 179
 180
 181   DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
 182
 183   if (!hi)
 184     return;
 185
 186   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 187        node;
 188        node = (help_node_t *)cupsArrayNext(hi->nodes))
 189   {
 190     if (!hi->search)
 191       help_delete_node(node);
 192   }
 193
 194   cupsArrayDelete(hi->nodes);
 195   cupsArrayDelete(hi->sorted);
 196
 197   free(hi);
 198 }
 199
 200
 201 /*
 202  * 'helpFindNode()' - Find a node in an index.
 203  */
 204
 205 help_node_t *                           /* O - Node pointer or NULL */
 206 helpFindNode(help_index_t *hi,          /* I - Index */
 207              const char   *filename,    /* I - Filename */
 208              const char   *anchor)      /* I - Anchor */
 209 {
 210   help_node_t   key;                    /* Search key */
 211
 212
 213   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
 214                 hi, filename, anchor));
 215
 216  /*
 217   * Range check input...
 218   */
 219
 220   if (!hi || !filename)
 221     return (NULL);
 222
 223  /*
 224   * Initialize the search key...
 225   */
 226
 227   key.filename = (char *)filename;
 228   key.anchor   = (char *)anchor;
 229
 230  /*
 231   * Return any match...
 232   */
 233
 234   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 235 }
 236
 237
 238 /*
 239  * 'helpLoadIndex()' - Load a help index from disk.
 240  */
 241
 242 help_index_t *                          /* O - Index pointer or NULL */
 243 helpLoadIndex(const char *hifile,       /* I - Index filename */
 244               const char *directory)    /* I - Directory that is indexed */
 245 {
 246   help_index_t  *hi;                    /* Help index */
 247   cups_file_t   *fp;                    /* Current file */
 248   char          line[2048],             /* Line from file */
 249                 *ptr,                   /* Pointer into line */
 250                 *filename,              /* Filename in line */
 251                 *anchor,                /* Anchor in line */
 252                 *sectptr,               /* Section pointer in line */
 253                 section[1024],          /* Section name */
 254                 *text;                  /* Text in line */
 255   time_t        mtime;                  /* Modification time */
 256   off_t         offset;                 /* Offset into file */
 257   size_t        length;                 /* Length in bytes */
 258   int           update;                 /* Update? */
 259   help_node_t   *node;                  /* Current node */
 260   help_word_t   *word;                  /* Current word */
 261
 262
 263   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
 264                 hifile, directory));
 265
 266  /*
 267   * Create a new, empty index.
 268   */
 269
 270   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 271     return (NULL);
 272
 273   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 274   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 275
 276   if (!hi->nodes || !hi->sorted)
 277   {
 278     cupsArrayDelete(hi->nodes);
 279     cupsArrayDelete(hi->sorted);
 280     free(hi);
 281     return (NULL);
 282   }
 283
 284  /*
 285   * Try loading the existing index file...
 286   */
 287
 288   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 289   {
 290    /*
 291     * Lock the file and then read the first line...
 292     */
 293
 294     cupsFileLock(fp, 1);
 295
 296     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 297     {
 298      /*
 299       * Got a valid header line, now read the data lines...
 300       */
 301
 302       node = NULL;
 303
 304       while (cupsFileGets(fp, line, sizeof(line)))
 305       {
 306        /*
 307         * Each line looks like one of the following:
 308         *
 309         *     filename mtime offset length "section" "text"
 310         *     filename#anchor offset length "text"
 311         *     SP count word
 312         */
 313
 314         if (line[0] == ' ')
 315         {
 316          /*
 317           * Read a word in the current node...
 318           */
 319
 320           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 321             continue;
 322
 323           if ((word = help_add_word(node, ptr + 1)) != NULL)
 324             word->count = atoi(line + 1);
 325         }
 326         else
 327         {
 328          /*
 329           * Add a node...
 330           */
 331
 332           filename = line;
 333
 334           if ((ptr = strchr(line, ' ')) == NULL)
 335             break;
 336
 337           while (isspace(*ptr & 255))
 338             *ptr++ = '\0';
 339
 340           if ((anchor = strrchr(filename, '#')) != NULL)
 341           {
 342             *anchor++ = '\0';
 343             mtime = 0;
 344           }
 345           else
 346             mtime = strtol(ptr, &ptr, 10);
 347
 348           offset = strtoll(ptr, &ptr, 10);
 349           length = strtoll(ptr, &ptr, 10);
 350
 351           while (isspace(*ptr & 255))
 352             ptr ++;
 353
 354           if (!anchor)
 355           {
 356            /*
 357             * Get section...
 358             */
 359
 360             if (*ptr != '\"')
 361               break;
 362
 363             ptr ++;
 364             sectptr = ptr;
 365
 366             while (*ptr && *ptr != '\"')
 367               ptr ++;
 368
 369             if (*ptr != '\"')
 370               break;
 371
 372             *ptr++ = '\0';
 373
 374             strlcpy(section, sectptr, sizeof(section));
 375
 376             while (isspace(*ptr & 255))
 377               ptr ++;
 378           }
 379
 380           if (*ptr != '\"')
 381             break;
 382
 383           ptr ++;
 384           text = ptr;
 385
 386           while (*ptr && *ptr != '\"')
 387             ptr ++;
 388
 389           if (*ptr != '\"')
 390             break;
 391
 392           *ptr++ = '\0';
 393
 394           if ((node = help_new_node(filename, anchor, section, text,
 395                                     mtime, offset, length)) == NULL)
 396             break;
 397
 398           node->score = -1;
 399
 400           cupsArrayAdd(hi->nodes, node);
 401         }
 402       }
 403     }
 404
 405     cupsFileClose(fp);
 406   }
 407
 408  /*
 409   * Scan for new/updated files...
 410   */
 411
 412   update = help_load_directory(hi, directory, NULL);
 413
 414  /*
 415   * Remove any files that are no longer installed...
 416   */
 417
 418   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 419        node;
 420        node = (help_node_t *)cupsArrayNext(hi->nodes))
 421     if (node->score < 0)
 422     {
 423      /*
 424       * Delete this node...
 425       */
 426
 427       cupsArrayRemove(hi->nodes, node);
 428       help_delete_node(node);
 429     }
 430
 431  /*
 432   * Add nodes to the sorted array...
 433   */
 434
 435   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 436        node;
 437        node = (help_node_t *)cupsArrayNext(hi->nodes))
 438     cupsArrayAdd(hi->sorted, node);
 439
 440  /*
 441   * Save the index if we updated it...
 442   */
 443
 444   if (update)
 445     helpSaveIndex(hi, hifile);
 446
 447  /*
 448   * Return the index...
 449   */
 450
 451   return (hi);
 452 }
 453
 454
 455 /*
 456  * 'helpSaveIndex()' - Save a help index to disk.
 457  */
 458
 459 int                                     /* O - 0 on success, -1 on error */
 460 helpSaveIndex(help_index_t *hi,         /* I - Index */
 461               const char   *hifile)     /* I - Index filename */
 462 {
 463   cups_file_t   *fp;                    /* Index file */
 464   help_node_t   *node;                  /* Current node */
 465   help_word_t   *word;                  /* Current word */
 466
 467
 468   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
 469
 470  /*
 471   * Try creating a new index file...
 472   */
 473
 474   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 475     return (-1);
 476
 477  /*
 478   * Lock the file while we write it...
 479   */
 480
 481   cupsFileLock(fp, 1);
 482
 483   cupsFilePuts(fp, "HELPV2\n");
 484
 485   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 486        node;
 487        node = (help_node_t *)cupsArrayNext(hi->nodes))
 488   {
 489    /*
 490     * Write the current node with/without the anchor...
 491     */
 492
 493     if (node->anchor)
 494     {
 495       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 496                          node->filename, node->anchor,
 497                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 498                          node->text) < 0)
 499         break;
 500     }
 501     else
 502     {
 503       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 504                          node->filename, (int)node->mtime,
 505                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 506                          node->section ? node->section : "", node->text) < 0)
 507         break;
 508     }
 509
 510    /*
 511     * Then write the words associated with the node...
 512     */
 513
 514     for (word = (help_word_t *)cupsArrayFirst(node->words);
 515          word;
 516          word = (help_word_t *)cupsArrayNext(node->words))
 517       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 518         break;
 519   }
 520
 521   cupsFileFlush(fp);
 522
 523   if (cupsFileClose(fp) < 0)
 524     return (-1);
 525   else if (node)
 526     return (-1);
 527   else
 528     return (0);
 529 }
 530
 531
 532 /*
 533  * 'helpSearchIndex()' - Search an index.
 534  */
 535
 536 help_index_t *                          /* O - Search index */
 537 helpSearchIndex(help_index_t *hi,       /* I - Index */
 538                 const char   *query,    /* I - Query string */
 539                 const char   *section,  /* I - Limit search to this section */
 540                 const char   *filename) /* I - Limit search to this file */
 541 {
 542   help_index_t  *search;                /* Search index */
 543   help_node_t   *node;                  /* Current node */
 544   help_word_t   *word;                  /* Current word */
 545   void          *sc;                    /* Search context */
 546   int           matches;                /* Number of matches */
 547
 548
 549   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
 550                 hi, query, filename));
 551
 552  /*
 553   * Range check...
 554   */
 555
 556   if (!hi || !query)
 557     return (NULL);
 558
 559  /*
 560   * Reset the scores of all nodes to 0...
 561   */
 562
 563   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 564        node;
 565        node = (help_node_t *)cupsArrayNext(hi->nodes))
 566     node->score = 0;
 567
 568  /*
 569   * Find the first node to search in...
 570   */
 571
 572   if (filename)
 573   {
 574     node = helpFindNode(hi, filename, NULL);
 575     if (!node)
 576       return (NULL);
 577   }
 578   else
 579     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 580
 581  /*
 582   * Convert the query into a regular expression...
 583   */
 584
 585   sc = cgiCompileSearch(query);
 586   if (!sc)
 587     return (NULL);
 588
 589  /*
 590   * Allocate a search index...
 591   */
 592
 593   search = calloc(1, sizeof(help_index_t));
 594   if (!search)
 595   {
 596     cgiFreeSearch(sc);
 597     return (NULL);
 598   }
 599
 600   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 601   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 602
 603   if (!search->nodes || !search->sorted)
 604   {
 605     cupsArrayDelete(search->nodes);
 606     cupsArrayDelete(search->sorted);
 607     free(search);
 608     cgiFreeSearch(sc);
 609     return (NULL);
 610   }
 611
 612   search->search = 1;
 613
 614  /*
 615   * Check each node in the index, adding matching nodes to the
 616   * search index...
 617   */
 618
 619   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 620     if (section && strcmp(node->section, section))
 621       continue;
 622     else if (filename && strcmp(node->filename, filename))
 623       continue;
 624     else
 625     {
 626       matches = cgiDoSearch(sc, node->text);
 627
 628       for (word = (help_word_t *)cupsArrayFirst(node->words);
 629            word;
 630            word = (help_word_t *)cupsArrayNext(node->words))
 631         if (cgiDoSearch(sc, word->text) > 0)
 632           matches += word->count;
 633
 634       if (matches > 0)
 635       {
 636        /*
 637         * Found a match, add the node to the search index...
 638         */
 639
 640         node->score = matches;
 641
 642         cupsArrayAdd(search->nodes, node);
 643         cupsArrayAdd(search->sorted, node);
 644       }
 645     }
 646
 647  /*
 648   * Free the search context...
 649   */
 650
 651   cgiFreeSearch(sc);
 652
 653  /*
 654   * Return the results...
 655   */
 656
 657   return (search);
 658 }
 659
 660
 661 /*
 662  * 'help_add_word()' - Add a word to a node.
 663  */
 664
 665 static help_word_t *                    /* O - New word */
 666 help_add_word(help_node_t *n,           /* I - Node */
 667               const char  *text)        /* I - Word text */
 668 {
 669   help_word_t   *w,                     /* New word */
 670                 key;                    /* Search key */
 671
 672
 673   DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
 674
 675  /*
 676   * Create the words array as needed...
 677   */
 678
 679   if (!n->words)
 680     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 681
 682  /*
 683   * See if the word is already added...
 684   */
 685
 686   key.text = (char *)text;
 687
 688   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 689   {
 690    /*
 691     * Create a new word...
 692     */
 693
 694     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 695       return (NULL);
 696
 697     if ((w->text = strdup(text)) == NULL)
 698     {
 699       free(w);
 700       return (NULL);
 701     }
 702
 703     cupsArrayAdd(n->words, w);
 704   }
 705
 706  /*
 707   * Bump the counter for this word and return it...
 708   */
 709
 710   w->count ++;
 711
 712   return (w);
 713 }
 714
 715
 716 /*
 717  * 'help_delete_node()' - Free all memory used by a node.
 718  */
 719
 720 static void
 721 help_delete_node(help_node_t *n)        /* I - Node */
 722 {
 723   help_word_t   *w;                     /* Current word */
 724
 725
 726   DEBUG_printf(("2help_delete_node(n=%p)", n));
 727
 728   if (!n)
 729     return;
 730
 731   if (n->filename)
 732     free(n->filename);
 733
 734   if (n->anchor)
 735     free(n->anchor);
 736
 737   if (n->section)
 738     free(n->section);
 739
 740   if (n->text)
 741     free(n->text);
 742
 743   for (w = (help_word_t *)cupsArrayFirst(n->words);
 744        w;
 745        w = (help_word_t *)cupsArrayNext(n->words))
 746     help_delete_word(w);
 747
 748   cupsArrayDelete(n->words);
 749
 750   free(n);
 751 }
 752
 753
 754 /*
 755  * 'help_delete_word()' - Free all memory used by a word.
 756  */
 757
 758 static void
 759 help_delete_word(help_word_t *w)        /* I - Word */
 760 {
 761   DEBUG_printf(("2help_delete_word(w=%p)", w));
 762
 763   if (!w)
 764     return;
 765
 766   if (w->text)
 767     free(w->text);
 768
 769   free(w);
 770 }
 771
 772
 773 /*
 774  * 'help_load_directory()' - Load a directory of files into an index.
 775  */
 776
 777 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 778 help_load_directory(
 779     help_index_t *hi,                   /* I - Index */
 780     const char   *directory,            /* I - Directory */
 781     const char   *relative)             /* I - Relative path */
 782 {
 783   cups_dir_t    *dir;                   /* Directory file */
 784   cups_dentry_t *dent;                  /* Directory entry */
 785   char          *ext,                   /* Pointer to extension */
 786                 filename[1024],         /* Full filename */
 787                 relname[1024];          /* Relative filename */
 788   int           update;                 /* Updated? */
 789   help_node_t   *node;                  /* Current node */
 790
 791
 792   DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
 793                 hi, directory, relative));
 794
 795  /*
 796   * Open the directory and scan it...
 797   */
 798
 799   if ((dir = cupsDirOpen(directory)) == NULL)
 800     return (0);
 801
 802   update = 0;
 803
 804   while ((dent = cupsDirRead(dir)) != NULL)
 805   {
 806    /*
 807     * Skip "." files...
 808     */
 809
 810     if (dent->filename[0] == '.')
 811       continue;
 812
 813    /*
 814     * Get absolute and relative filenames...
 815     */
 816
 817     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 818     if (relative)
 819       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 820     else
 821       strlcpy(relname, dent->filename, sizeof(relname));
 822
 823    /*
 824     * Check if we have a HTML file...
 825     */
 826
 827     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 828         (!ext[5] || !strcmp(ext + 5, ".gz")))
 829     {
 830      /*
 831       * HTML file, see if we have already indexed the file...
 832       */
 833
 834       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 835       {
 836        /*
 837         * File already indexed - check dates to confirm that the
 838         * index is up-to-date...
 839         */
 840
 841         if (node->mtime == dent->fileinfo.st_mtime)
 842         {
 843          /*
 844           * Same modification time, so mark all of the nodes
 845           * for this file as up-to-date...
 846           */
 847
 848           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 849             if (!strcmp(node->filename, relname))
 850               node->score = 0;
 851             else
 852               break;
 853
 854           continue;
 855         }
 856       }
 857
 858       update = 1;
 859
 860       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 861     }
 862     else if (S_ISDIR(dent->fileinfo.st_mode))
 863     {
 864      /*
 865       * Process sub-directory...
 866       */
 867
 868       if (help_load_directory(hi, filename, relname) == 1)
 869         update = 1;
 870     }
 871   }
 872
 873   cupsDirClose(dir);
 874
 875   return (update);
 876 }
 877
 878
 879 /*
 880  * 'help_load_file()' - Load a HTML files into an index.
 881  */
 882
 883 static int                              /* O - 0 = success, -1 = error */
 884 help_load_file(
 885     help_index_t *hi,                   /* I - Index */
 886     const char   *filename,             /* I - Filename */
 887     const char   *relative,             /* I - Relative path */
 888     time_t       mtime)                 /* I - Modification time */
 889 {
 890   cups_file_t   *fp;                    /* HTML file */
 891   help_node_t   *node;                  /* Current node */
 892   char          line[1024],             /* Line from file */
 893                 temp[1024],             /* Temporary word */
 894                 section[1024],          /* Section */
 895                 *ptr,                   /* Pointer into line */
 896                 *anchor,                /* Anchor name */
 897                 *text;                  /* Text for anchor */
 898   off_t         offset;                 /* File offset */
 899   char          quote;                  /* Quote character */
 900   help_word_t   *word;                  /* Current word */
 901   int           wordlen;                /* Length of word */
 902
 903
 904   DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
 905                 "mtime=%ld)", hi, filename, relative, mtime));
 906
 907   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 908     return (-1);
 909
 910   node   = NULL;
 911   offset = 0;
 912
 913   strcpy(section, "Other");
 914
 915   while (cupsFileGets(fp, line, sizeof(line)))
 916   {
 917    /*
 918     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 919     */
 920
 921     if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
 922     {
 923      /*
 924       * Got section line, copy it!
 925       */
 926
 927       for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
 928
 929       strlcpy(section, ptr, sizeof(section));
 930       if ((ptr = strstr(section, "-->")) != NULL)
 931       {
 932        /*
 933         * Strip comment stuff from end of line...
 934         */
 935
 936         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 937
 938         if (isspace(*ptr & 255))
 939           *ptr = '\0';
 940       }
 941       continue;
 942     }
 943
 944     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 945     {
 946       ptr ++;
 947
 948       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
 949       {
 950        /*
 951         * Found the title...
 952         */
 953
 954         anchor = NULL;
 955         ptr += 6;
 956       }
 957       else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
 958       {
 959        /*
 960         * Found an anchor...
 961         */
 962
 963         ptr += 7;
 964
 965         if (*ptr == '\"' || *ptr == '\'')
 966         {
 967          /*
 968           * Get quoted anchor...
 969           */
 970
 971           quote  = *ptr;
 972           anchor = ptr + 1;
 973           if ((ptr = strchr(anchor, quote)) != NULL)
 974             *ptr++ = '\0';
 975           else
 976             break;
 977         }
 978         else
 979         {
 980          /*
 981           * Get unquoted anchor...
 982           */
 983
 984           anchor = ptr + 1;
 985
 986           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 987
 988           if (*ptr)
 989             *ptr++ = '\0';
 990           else
 991             break;
 992         }
 993
 994        /*
 995         * Got the anchor, now lets find the end...
 996         */
 997
 998         while (*ptr && *ptr != '>')
 999           ptr ++;
1000
1001         if (*ptr != '>')
1002           break;
1003
1004         ptr ++;
1005       }
1006       else
1007         continue;
1008
1009      /*
1010       * Now collect text for the link...
1011       */
1012
1013       text = ptr;
1014       while ((ptr = strchr(text, '<')) == NULL)
1015       {
1016         ptr = text + strlen(text);
1017         if (ptr >= (line + sizeof(line) - 2))
1018           break;
1019
1020         *ptr++ = ' ';
1021
1022         if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1023           break;
1024       }
1025
1026       *ptr = '\0';
1027
1028       if (node)
1029         node->length = offset - node->offset;
1030
1031       if (!*text)
1032       {
1033         node = NULL;
1034         break;
1035       }
1036
1037       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1038       {
1039        /*
1040         * Node already in the index, so replace the text and other
1041         * data...
1042         */
1043
1044         cupsArrayRemove(hi->nodes, node);
1045
1046         if (node->section)
1047           free(node->section);
1048
1049         if (node->text)
1050           free(node->text);
1051
1052         if (node->words)
1053         {
1054           for (word = (help_word_t *)cupsArrayFirst(node->words);
1055                word;
1056                word = (help_word_t *)cupsArrayNext(node->words))
1057             help_delete_word(word);
1058
1059           cupsArrayDelete(node->words);
1060           node->words = NULL;
1061         }
1062
1063         node->section = section[0] ? strdup(section) : NULL;
1064         node->text    = strdup(text);
1065         node->mtime   = mtime;
1066         node->offset  = offset;
1067         node->score   = 0;
1068       }
1069       else
1070       {
1071        /*
1072         * New node...
1073         */
1074
1075         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1076       }
1077
1078      /*
1079       * Go through the text value and replace tabs and newlines with
1080       * whitespace and eliminate extra whitespace...
1081       */
1082
1083       for (ptr = node->text, text = node->text; *ptr;)
1084         if (isspace(*ptr & 255))
1085         {
1086           while (isspace(*ptr & 255))
1087             ptr ++;
1088
1089           *text++ = ' ';
1090         }
1091         else if (text != ptr)
1092           *text++ = *ptr++;
1093         else
1094         {
1095           text ++;
1096           ptr ++;
1097         }
1098
1099       *text = '\0';
1100
1101      /*
1102       * (Re)add the node to the array...
1103       */
1104
1105       cupsArrayAdd(hi->nodes, node);
1106
1107       if (!anchor)
1108         node = NULL;
1109       break;
1110     }
1111
1112     if (node)
1113     {
1114      /*
1115       * Scan this line for words...
1116       */
1117
1118       for (ptr = line; *ptr; ptr ++)
1119       {
1120        /*
1121         * Skip HTML stuff...
1122         */
1123
1124         if (*ptr == '<')
1125         {
1126           if (!strncmp(ptr, "<!--", 4))
1127           {
1128            /*
1129             * Skip HTML comment...
1130             */
1131
1132             if ((text = strstr(ptr + 4, "-->")) == NULL)
1133               ptr += strlen(ptr) - 1;
1134             else
1135               ptr = text + 2;
1136           }
1137           else
1138           {
1139            /*
1140             * Skip HTML element...
1141             */
1142
1143             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1144             {
1145               if (*ptr == '\"' || *ptr == '\'')
1146               {
1147                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1148
1149                 if (!*ptr)
1150                   ptr --;
1151               }
1152             }
1153
1154             if (!*ptr)
1155               ptr --;
1156           }
1157
1158           continue;
1159         }
1160         else if (*ptr == '&')
1161         {
1162          /*
1163           * Skip HTML entity...
1164           */
1165
1166           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1167
1168           if (!*ptr)
1169             ptr --;
1170
1171           continue;
1172         }
1173         else if (!isalnum(*ptr & 255))
1174           continue;
1175
1176        /*
1177         * Found the start of a word, search until we find the end...
1178         */
1179
1180         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1181
1182         wordlen = ptr - text;
1183
1184         memcpy(temp, text, wordlen);
1185         temp[wordlen] = '\0';
1186
1187         ptr --;
1188
1189         if (wordlen > 1 && !bsearch(temp, help_common_words,
1190                                     (sizeof(help_common_words) /
1191                                      sizeof(help_common_words[0])),
1192                                     sizeof(help_common_words[0]),
1193                                     (int (*)(const void *, const void *))
1194                                         _cups_strcasecmp))
1195           help_add_word(node, temp);
1196       }
1197     }
1198
1199    /*
1200     * Get the offset of the next line...
1201     */
1202
1203     offset = cupsFileTell(fp);
1204   }
1205
1206   cupsFileClose(fp);
1207
1208   if (node)
1209     node->length = offset - node->offset;
1210
1211   return (0);
1212 }
1213
1214
1215 /*
1216  * 'help_new_node()' - Create a new node and add it to an index.
1217  */
1218
1219 static help_node_t *                    /* O - Node pointer or NULL on error */
1220 help_new_node(const char   *filename,   /* I - Filename */
1221               const char   *anchor,     /* I - Anchor */
1222               const char   *section,    /* I - Section */
1223               const char   *text,       /* I - Text */
1224               time_t       mtime,       /* I - Modification time */
1225               off_t        offset,      /* I - Offset in file */
1226               size_t       length)      /* I - Length in bytes */
1227 {
1228   help_node_t   *n;                     /* Node */
1229
1230
1231   DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1232                 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1233                 (long)mtime, (long)offset, (long)length));
1234
1235   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1236   if (!n)
1237     return (NULL);
1238
1239   n->filename = strdup(filename);
1240   n->anchor   = anchor ? strdup(anchor) : NULL;
1241   n->section  = (section && *section) ? strdup(section) : NULL;
1242   n->text     = strdup(text);
1243   n->mtime    = mtime;
1244   n->offset   = offset;
1245   n->length   = length;
1246
1247   return (n);
1248 }
1249
1250
1251 /*
1252  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1253  */
1254
1255 static int                              /* O - Difference */
1256 help_sort_by_name(help_node_t *n1,      /* I - First node */
1257                   help_node_t *n2)      /* I - Second node */
1258 {
1259   int           diff;                   /* Difference */
1260
1261
1262   DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1263                 n1, n1->filename, n1->anchor,
1264                 n2, n2->filename, n2->anchor));
1265
1266   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1267     return (diff);
1268
1269   if (!n1->anchor && !n2->anchor)
1270     return (0);
1271   else if (!n1->anchor)
1272     return (-1);
1273   else if (!n2->anchor)
1274     return (1);
1275   else
1276     return (strcmp(n1->anchor, n2->anchor));
1277 }
1278
1279
1280 /*
1281  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1282  */
1283
1284 static int                              /* O - Difference */
1285 help_sort_by_score(help_node_t *n1,     /* I - First node */
1286                    help_node_t *n2)     /* I - Second node */
1287 {
1288   int           diff;                   /* Difference */
1289
1290
1291   DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1292                 "n2=%p(%d \"%s\" \"%s\")",
1293                 n1, n1->score, n1->section, n1->text,
1294                 n2, n2->score, n2->section, n2->text));
1295
1296   if (n1->score != n2->score)
1297     return (n2->score - n1->score);
1298
1299   if (n1->section && !n2->section)
1300     return (1);
1301   else if (!n1->section && n2->section)
1302     return (-1);
1303   else if (n1->section && n2->section &&
1304            (diff = strcmp(n1->section, n2->section)) != 0)
1305     return (diff);
1306
1307   return (_cups_strcasecmp(n1->text, n2->text));
1308 }
1309
1310
1311 /*
1312  * 'help_sort_words()' - Sort words alphabetically.
1313  */
1314
1315 static int                              /* O - Difference */
1316 help_sort_words(help_word_t *w1,        /* I - Second word */
1317                 help_word_t *w2)        /* I - Second word */
1318 {
1319   DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1320                 w1, w1->text, w2, w2->text));
1321
1322   return (_cups_strcasecmp(w1->text, w2->text));
1323 }
1324
1325
1326 /*
1327  * End of "$Id: help-index.c 7717 2008-07-04 02:35:33Z mike $".
1328  */