cgi-bin/help-index.c

   1 /*
   2  * "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $"
   3  *
   4  *   On-line help index routines for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2007 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are the
   9  *   property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the file
  11  *   "LICENSE.txt" which should have been included with this file.  If this
  12  *   file is missing or damaged please contact Easy Software Products
  13  *   at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   helpDeleteIndex()          - Delete an index, freeing all memory used.
  27  *   helpFindNode()             - Find a node in an index.
  28  *   helpLoadIndex()            - Load a help index from disk.
  29  *   helpSaveIndex()            - Save a help index to disk.
  30  *   helpSearchIndex()          - Search an index.
  31  *   help_add_word()            - Add a word to a node.
  32  *   help_compile_search()      - Convert a search string into a regular expression.
  33  *   help_delete_node()         - Free all memory used by a node.
  34  *   help_delete_word()         - Free all memory used by a word.
  35  *   help_load_directory()      - Load a directory of files into an index.
  36  *   help_load_file()           - Load a HTML files into an index.
  37  *   help_new_node()            - Create a new node and add it to an index.
  38  *   help_sort_nodes_by_name()  - Sort nodes by section, filename, and anchor.
  39  *   help_sort_nodes_by_score() - Sort nodes by score and text.
  40  *   help_sort_words()          - Sort words alphabetically.
  41  */
  42
  43 /*
  44  * Include necessary headers...
  45  */
  46
  47 #include "cgi-private.h"
  48 #include <cups/dir.h>
  49
  50
  51 /*
  52  * List of common English words that should not be indexed...
  53  */
  54
  55 static char             help_common_words[][6] =
  56                         {
  57                           "about",
  58                           "all",
  59                           "an",
  60                           "and",
  61                           "are",
  62                           "as",
  63                           "at",
  64                           "be",
  65                           "been",
  66                           "but",
  67                           "by",
  68                           "call",
  69                           "can",
  70                           "come",
  71                           "could",
  72                           "day",
  73                           "did",
  74                           "do",
  75                           "down",
  76                           "each",
  77                           "find",
  78                           "first",
  79                           "for",
  80                           "from",
  81                           "go",
  82                           "had",
  83                           "has",
  84                           "have",
  85                           "he",
  86                           "her",
  87                           "him",
  88                           "his",
  89                           "hot",
  90                           "how",
  91                           "if",
  92                           "in",
  93                           "is",
  94                           "it",
  95                           "know",
  96                           "like",
  97                           "long",
  98                           "look",
  99                           "make",
 100                           "many",
 101                           "may",
 102                           "more",
 103                           "most",
 104                           "my",
 105                           "no",
 106                           "now",
 107                           "of",
 108                           "on",
 109                           "one",
 110                           "or",
 111                           "other",
 112                           "out",
 113                           "over",
 114                           "said",
 115                           "see",
 116                           "she",
 117                           "side",
 118                           "so",
 119                           "some",
 120                           "sound",
 121                           "than",
 122                           "that",
 123                           "the",
 124                           "their",
 125                           "them",
 126                           "then",
 127                           "there",
 128                           "these",
 129                           "they",
 130                           "thing",
 131                           "this",
 132                           "time",
 133                           "to",
 134                           "two",
 135                           "up",
 136                           "use",
 137                           "was",
 138                           "water",
 139                           "way",
 140                           "we",
 141                           "were",
 142                           "what",
 143                           "when",
 144                           "which",
 145                           "who",
 146                           "will",
 147                           "with",
 148                           "word",
 149                           "would",
 150                           "write",
 151                           "you",
 152                           "your"
 153                         };
 154
 155
 156 /*
 157  * Local functions...
 158  */
 159
 160 static help_word_t      *help_add_word(help_node_t *n, const char *text);
 161 static void             help_delete_node(help_node_t *n);
 162 static void             help_delete_word(help_word_t *w);
 163 static int              help_load_directory(help_index_t *hi,
 164                                             const char *directory,
 165                                             const char *relative);
 166 static int              help_load_file(help_index_t *hi,
 167                                        const char *filename,
 168                                        const char *relative,
 169                                        time_t     mtime);
 170 static help_node_t      *help_new_node(const char *filename, const char *anchor,
 171                                        const char *section, const char *text,
 172                                        time_t mtime, off_t offset,
 173                                        size_t length);
 174 static int              help_sort_by_name(help_node_t *p1, help_node_t *p2);
 175 static int              help_sort_by_score(help_node_t *p1, help_node_t *p2);
 176 static int              help_sort_words(help_word_t *w1, help_word_t *w2);
 177
 178
 179 /*
 180  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
 181  */
 182
 183 void
 184 helpDeleteIndex(help_index_t *hi)       /* I - Help index */
 185 {
 186   help_node_t   *node;                  /* Current node */
 187
 188
 189   DEBUG_printf(("helpDeleteIndex(hi=%p)\n", hi));
 190
 191   if (!hi)
 192     return;
 193
 194   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 195        node;
 196        node = (help_node_t *)cupsArrayNext(hi->nodes))
 197   {
 198     if (!hi->search)
 199       help_delete_node(node);
 200   }
 201
 202   cupsArrayDelete(hi->nodes);
 203   cupsArrayDelete(hi->sorted);
 204
 205   free(hi);
 206 }
 207
 208
 209 /*
 210  * 'helpFindNode()' - Find a node in an index.
 211  */
 212
 213 help_node_t *                           /* O - Node pointer or NULL */
 214 helpFindNode(help_index_t *hi,          /* I - Index */
 215              const char   *filename,    /* I - Filename */
 216              const char   *anchor)      /* I - Anchor */
 217 {
 218   help_node_t   key;                    /* Search key */
 219
 220
 221   DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")\n",
 222                 hi, filename ? filename : "(nil)", anchor ? anchor : "(nil)"));
 223
 224  /*
 225   * Range check input...
 226   */
 227
 228   if (!hi || !filename)
 229     return (NULL);
 230
 231  /*
 232   * Initialize the search key...
 233   */
 234
 235   key.filename = (char *)filename;
 236   key.anchor   = (char *)anchor;
 237
 238  /*
 239   * Return any match...
 240   */
 241
 242   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
 243 }
 244
 245
 246 /*
 247  * 'helpLoadIndex()' - Load a help index from disk.
 248  */
 249
 250 help_index_t *                          /* O - Index pointer or NULL */
 251 helpLoadIndex(const char *hifile,       /* I - Index filename */
 252               const char *directory)    /* I - Directory that is indexed */
 253 {
 254   help_index_t  *hi;                    /* Help index */
 255   cups_file_t   *fp;                    /* Current file */
 256   char          line[2048],             /* Line from file */
 257                 *ptr,                   /* Pointer into line */
 258                 *filename,              /* Filename in line */
 259                 *anchor,                /* Anchor in line */
 260                 *sectptr,               /* Section pointer in line */
 261                 section[1024],          /* Section name */
 262                 *text;                  /* Text in line */
 263   time_t        mtime;                  /* Modification time */
 264   off_t         offset;                 /* Offset into file */
 265   size_t        length;                 /* Length in bytes */
 266   int           update;                 /* Update? */
 267   help_node_t   *node;                  /* Current node */
 268   help_word_t   *word;                  /* Current word */
 269
 270
 271   DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")\n",
 272                 hifile, directory));
 273
 274  /*
 275   * Create a new, empty index.
 276   */
 277
 278   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
 279     return (NULL);
 280
 281   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 282   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 283
 284   if (!hi->nodes || !hi->sorted)
 285   {
 286     cupsArrayDelete(hi->nodes);
 287     cupsArrayDelete(hi->sorted);
 288     free(hi);
 289     return (NULL);
 290   }
 291
 292  /*
 293   * Try loading the existing index file...
 294   */
 295
 296   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
 297   {
 298    /*
 299     * Lock the file and then read the first line...
 300     */
 301
 302     cupsFileLock(fp, 1);
 303
 304     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
 305     {
 306      /*
 307       * Got a valid header line, now read the data lines...
 308       */
 309
 310       node = NULL;
 311
 312       while (cupsFileGets(fp, line, sizeof(line)))
 313       {
 314        /*
 315         * Each line looks like one of the following:
 316         *
 317         *     filename mtime offset length "section" "text"
 318         *     filename#anchor offset length "text"
 319         *     SP count word
 320         */
 321
 322         if (line[0] == ' ')
 323         {
 324          /*
 325           * Read a word in the current node...
 326           */
 327
 328           if (!node || (ptr = strrchr(line, ' ')) == NULL)
 329             continue;
 330
 331           if ((word = help_add_word(node, ptr + 1)) != NULL)
 332             word->count = atoi(line + 1);
 333         }
 334         else
 335         {
 336          /*
 337           * Add a node...
 338           */
 339
 340           filename = line;
 341
 342           if ((ptr = strchr(line, ' ')) == NULL)
 343             break;
 344
 345           while (isspace(*ptr & 255))
 346             *ptr++ = '\0';
 347
 348           if ((anchor = strrchr(filename, '#')) != NULL)
 349           {
 350             *anchor++ = '\0';
 351             mtime = 0;
 352           }
 353           else
 354             mtime = strtol(ptr, &ptr, 10);
 355
 356           offset = strtoll(ptr, &ptr, 10);
 357           length = strtoll(ptr, &ptr, 10);
 358
 359           while (isspace(*ptr & 255))
 360             ptr ++;
 361
 362           if (!anchor)
 363           {
 364            /*
 365             * Get section...
 366             */
 367
 368             if (*ptr != '\"')
 369               break;
 370
 371             ptr ++;
 372             sectptr = ptr;
 373
 374             while (*ptr && *ptr != '\"')
 375               ptr ++;
 376
 377             if (*ptr != '\"')
 378               break;
 379
 380             *ptr++ = '\0';
 381
 382             strlcpy(section, sectptr, sizeof(section));
 383
 384             while (isspace(*ptr & 255))
 385               ptr ++;
 386           }
 387
 388           if (*ptr != '\"')
 389             break;
 390
 391           ptr ++;
 392           text = ptr;
 393
 394           while (*ptr && *ptr != '\"')
 395             ptr ++;
 396
 397           if (*ptr != '\"')
 398             break;
 399
 400           *ptr++ = '\0';
 401
 402           if ((node = help_new_node(filename, anchor, section, text,
 403                                     mtime, offset, length)) == NULL)
 404             break;
 405
 406           node->score = -1;
 407
 408           cupsArrayAdd(hi->nodes, node);
 409         }
 410       }
 411     }
 412
 413     cupsFileClose(fp);
 414   }
 415
 416  /*
 417   * Scan for new/updated files...
 418   */
 419
 420   update = help_load_directory(hi, directory, NULL);
 421
 422  /*
 423   * Remove any files that are no longer installed...
 424   */
 425
 426   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 427        node;
 428        node = (help_node_t *)cupsArrayNext(hi->nodes))
 429     if (node->score < 0)
 430     {
 431      /*
 432       * Delete this node...
 433       */
 434
 435       cupsArrayRemove(hi->nodes, node);
 436       help_delete_node(node);
 437     }
 438
 439  /*
 440   * Add nodes to the sorted array...
 441   */
 442
 443   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 444        node;
 445        node = (help_node_t *)cupsArrayNext(hi->nodes))
 446     cupsArrayAdd(hi->sorted, node);
 447
 448  /*
 449   * Save the index if we updated it...
 450   */
 451
 452   if (update)
 453     helpSaveIndex(hi, hifile);
 454
 455  /*
 456   * Return the index...
 457   */
 458
 459   return (hi);
 460 }
 461
 462
 463 /*
 464  * 'helpSaveIndex()' - Save a help index to disk.
 465  */
 466
 467 int                                     /* O - 0 on success, -1 on error */
 468 helpSaveIndex(help_index_t *hi,         /* I - Index */
 469               const char   *hifile)     /* I - Index filename */
 470 {
 471   cups_file_t   *fp;                    /* Index file */
 472   help_node_t   *node;                  /* Current node */
 473   help_word_t   *word;                  /* Current word */
 474
 475
 476   DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")\n", hi, hifile));
 477
 478  /*
 479   * Try creating a new index file...
 480   */
 481
 482   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
 483     return (-1);
 484
 485  /*
 486   * Lock the file while we write it...
 487   */
 488
 489   cupsFileLock(fp, 1);
 490
 491   cupsFilePuts(fp, "HELPV2\n");
 492
 493   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 494        node;
 495        node = (help_node_t *)cupsArrayNext(hi->nodes))
 496   {
 497    /*
 498     * Write the current node with/without the anchor...
 499     */
 500
 501     if (node->anchor)
 502     {
 503       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
 504                          node->filename, node->anchor,
 505                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 506                          node->text) < 0)
 507         break;
 508     }
 509     else
 510     {
 511       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
 512                          node->filename, node->mtime,
 513                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
 514                          node->section ? node->section : "", node->text) < 0)
 515         break;
 516     }
 517
 518    /*
 519     * Then write the words associated with the node...
 520     */
 521
 522     for (word = (help_word_t *)cupsArrayFirst(node->words);
 523          word;
 524          word = (help_word_t *)cupsArrayNext(node->words))
 525       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
 526         break;
 527   }
 528
 529   cupsFileFlush(fp);
 530
 531   if (cupsFileClose(fp) < 0)
 532     return (-1);
 533   else if (node)
 534     return (-1);
 535   else
 536     return (0);
 537 }
 538
 539
 540 /*
 541  * 'helpSearchIndex()' - Search an index.
 542  */
 543
 544 help_index_t *                          /* O - Search index */
 545 helpSearchIndex(help_index_t *hi,       /* I - Index */
 546                 const char   *query,    /* I - Query string */
 547                 const char   *section,  /* I - Limit search to this section */
 548                 const char   *filename) /* I - Limit search to this file */
 549 {
 550   help_index_t  *search;                /* Search index */
 551   help_node_t   *node;                  /* Current node */
 552   help_word_t   *word;                  /* Current word */
 553   void          *sc;                    /* Search context */
 554   int           matches;                /* Number of matches */
 555
 556
 557   DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")\n",
 558                 hi, query ? query : "(nil)",
 559                 filename ? filename : "(nil)"));
 560
 561  /*
 562   * Range check...
 563   */
 564
 565   if (!hi || !query)
 566     return (NULL);
 567
 568  /*
 569   * Reset the scores of all nodes to 0...
 570   */
 571
 572   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
 573        node;
 574        node = (help_node_t *)cupsArrayNext(hi->nodes))
 575     node->score = 0;
 576
 577  /*
 578   * Find the first node to search in...
 579   */
 580
 581   if (filename)
 582   {
 583     node = helpFindNode(hi, filename, NULL);
 584     if (!node)
 585       return (NULL);
 586   }
 587   else
 588     node = (help_node_t *)cupsArrayFirst(hi->nodes);
 589
 590  /*
 591   * Convert the query into a regular expression...
 592   */
 593
 594   sc = cgiCompileSearch(query);
 595   if (!sc)
 596     return (NULL);
 597
 598  /*
 599   * Allocate a search index...
 600   */
 601
 602   search = calloc(1, sizeof(help_index_t));
 603   if (!search)
 604   {
 605     cgiFreeSearch(sc);
 606     return (NULL);
 607   }
 608
 609   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
 610   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
 611
 612   if (!search->nodes || !search->sorted)
 613   {
 614     cupsArrayDelete(search->nodes);
 615     cupsArrayDelete(search->sorted);
 616     free(search);
 617     cgiFreeSearch(sc);
 618     return (NULL);
 619   }
 620
 621   search->search = 1;
 622
 623  /*
 624   * Check each node in the index, adding matching nodes to the
 625   * search index...
 626   */
 627
 628   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 629     if (section && strcmp(node->section, section))
 630       continue;
 631     else if (filename && strcmp(node->filename, filename))
 632       continue;
 633     else
 634     {
 635       matches = cgiDoSearch(sc, node->text);
 636
 637       for (word = (help_word_t *)cupsArrayFirst(node->words);
 638            word;
 639            word = (help_word_t *)cupsArrayNext(node->words))
 640         if (cgiDoSearch(sc, word->text) > 0)
 641           matches += word->count;
 642
 643       if (matches > 0)
 644       {
 645        /*
 646         * Found a match, add the node to the search index...
 647         */
 648
 649         node->score = matches;
 650
 651         cupsArrayAdd(search->nodes, node);
 652         cupsArrayAdd(search->sorted, node);
 653       }
 654     }
 655
 656  /*
 657   * Free the search context...
 658   */
 659
 660   cgiFreeSearch(sc);
 661
 662  /*
 663   * Return the results...
 664   */
 665
 666   return (search);
 667 }
 668
 669
 670 /*
 671  * 'help_add_word()' - Add a word to a node.
 672  */
 673
 674 static help_word_t *                    /* O - New word */
 675 help_add_word(help_node_t *n,           /* I - Node */
 676               const char  *text)        /* I - Word text */
 677 {
 678   help_word_t   *w,                     /* New word */
 679                 key;                    /* Search key */
 680
 681
 682   DEBUG_printf(("help_add_word(n=%p, text=\"%s\")\n", n, text));
 683
 684  /*
 685   * Create the words array as needed...
 686   */
 687
 688   if (!n->words)
 689     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
 690
 691  /*
 692   * See if the word is already added...
 693   */
 694
 695   key.text = (char *)text;
 696
 697   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
 698   {
 699    /*
 700     * Create a new word...
 701     */
 702
 703     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
 704       return (NULL);
 705
 706     if ((w->text = strdup(text)) == NULL)
 707     {
 708       free(w);
 709       return (NULL);
 710     }
 711
 712     cupsArrayAdd(n->words, w);
 713   }
 714
 715  /*
 716   * Bump the counter for this word and return it...
 717   */
 718
 719   w->count ++;
 720
 721   return (w);
 722 }
 723
 724
 725 /*
 726  * 'help_delete_node()' - Free all memory used by a node.
 727  */
 728
 729 static void
 730 help_delete_node(help_node_t *n)        /* I - Node */
 731 {
 732   help_word_t   *w;                     /* Current word */
 733
 734
 735   DEBUG_printf(("help_delete_node(n=%p)\n", n));
 736
 737   if (!n)
 738     return;
 739
 740   if (n->filename)
 741     free(n->filename);
 742
 743   if (n->anchor)
 744     free(n->anchor);
 745
 746   if (n->section)
 747     free(n->section);
 748
 749   if (n->text)
 750     free(n->text);
 751
 752   for (w = (help_word_t *)cupsArrayFirst(n->words);
 753        w;
 754        w = (help_word_t *)cupsArrayNext(n->words))
 755     help_delete_word(w);
 756
 757   cupsArrayDelete(n->words);
 758
 759   free(n);
 760 }
 761
 762
 763 /*
 764  * 'help_delete_word()' - Free all memory used by a word.
 765  */
 766
 767 static void
 768 help_delete_word(help_word_t *w)        /* I - Word */
 769 {
 770   DEBUG_printf(("help_delete_word(w=%p)\n", w));
 771
 772   if (!w)
 773     return;
 774
 775   if (w->text)
 776     free(w->text);
 777
 778   free(w);
 779 }
 780
 781
 782 /*
 783  * 'help_load_directory()' - Load a directory of files into an index.
 784  */
 785
 786 static int                              /* O - 0 = success, -1 = error, 1 = updated */
 787 help_load_directory(
 788     help_index_t *hi,                   /* I - Index */
 789     const char   *directory,            /* I - Directory */
 790     const char   *relative)             /* I - Relative path */
 791 {
 792   cups_dir_t    *dir;                   /* Directory file */
 793   cups_dentry_t *dent;                  /* Directory entry */
 794   char          *ext,                   /* Pointer to extension */
 795                 filename[1024],         /* Full filename */
 796                 relname[1024];          /* Relative filename */
 797   int           update;                 /* Updated? */
 798   help_node_t   *node;                  /* Current node */
 799
 800
 801   DEBUG_printf(("help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")\n",
 802                 hi, directory ? directory : "(nil)", relative ? relative : "(nil)"));
 803
 804  /*
 805   * Open the directory and scan it...
 806   */
 807
 808   if ((dir = cupsDirOpen(directory)) == NULL)
 809     return (0);
 810
 811   update = 0;
 812
 813   while ((dent = cupsDirRead(dir)) != NULL)
 814   {
 815    /*
 816     * Skip "." files...
 817     */
 818
 819     if (dent->filename[0] == '.')
 820       continue;
 821
 822    /*
 823     * Get absolute and relative filenames...
 824     */
 825
 826     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
 827     if (relative)
 828       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
 829     else
 830       strlcpy(relname, dent->filename, sizeof(relname));
 831
 832    /*
 833     * Check if we have a HTML file...
 834     */
 835
 836     if ((ext = strstr(dent->filename, ".html")) != NULL &&
 837         (!ext[5] || !strcmp(ext + 5, ".gz")))
 838     {
 839      /*
 840       * HTML file, see if we have already indexed the file...
 841       */
 842
 843       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
 844       {
 845        /*
 846         * File already indexed - check dates to confirm that the
 847         * index is up-to-date...
 848         */
 849
 850         if (node->mtime == dent->fileinfo.st_mtime)
 851         {
 852          /*
 853           * Same modification time, so mark all of the nodes
 854           * for this file as up-to-date...
 855           */
 856
 857           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
 858             if (!strcmp(node->filename, relname))
 859               node->score = 0;
 860             else
 861               break;
 862
 863           continue;
 864         }
 865       }
 866
 867       update = 1;
 868
 869       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
 870     }
 871     else if (S_ISDIR(dent->fileinfo.st_mode))
 872     {
 873      /*
 874       * Process sub-directory...
 875       */
 876
 877       if (help_load_directory(hi, filename, relname) == 1)
 878         update = 1;
 879     }
 880   }
 881
 882   cupsDirClose(dir);
 883
 884   return (update);
 885 }
 886
 887
 888 /*
 889  * 'help_load_file()' - Load a HTML files into an index.
 890  */
 891
 892 static int                              /* O - 0 = success, -1 = error */
 893 help_load_file(
 894     help_index_t *hi,                   /* I - Index */
 895     const char   *filename,             /* I - Filename */
 896     const char   *relative,             /* I - Relative path */
 897     time_t       mtime)                 /* I - Modification time */
 898 {
 899   cups_file_t   *fp;                    /* HTML file */
 900   help_node_t   *node;                  /* Current node */
 901   char          line[1024],             /* Line from file */
 902                 section[1024],          /* Section */
 903                 *ptr,                   /* Pointer into line */
 904                 *anchor,                /* Anchor name */
 905                 *text;                  /* Text for anchor */
 906   off_t         offset;                 /* File offset */
 907   char          quote;                  /* Quote character */
 908   help_word_t   *word;                  /* Current word */
 909   int           wordlen;                /* Length of word */
 910
 911
 912   DEBUG_printf(("help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", mtime=%ld)\n",
 913                 hi, filename ? filename : "(nil)",
 914                 relative ? relative : "(nil)", mtime));
 915
 916   if ((fp = cupsFileOpen(filename, "r")) == NULL)
 917     return (-1);
 918
 919   node   = NULL;
 920   offset = 0;
 921
 922   strcpy(section, "Other");
 923
 924   while (cupsFileGets(fp, line, sizeof(line)))
 925   {
 926    /*
 927     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
 928     */
 929
 930     if (!strncasecmp(line, "<!-- SECTION:", 13))
 931     {
 932      /*
 933       * Got section line, copy it!
 934       */
 935
 936       for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
 937
 938       strlcpy(section, ptr, sizeof(section));
 939       if ((ptr = strstr(section, "-->")) != NULL)
 940       {
 941        /*
 942         * Strip comment stuff from end of line...
 943         */
 944
 945         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
 946
 947         if (isspace(*ptr & 255))
 948           *ptr = '\0';
 949       }
 950       continue;
 951     }
 952
 953     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
 954     {
 955       ptr ++;
 956
 957       if (!strncasecmp(ptr, "TITLE>", 6))
 958       {
 959        /*
 960         * Found the title...
 961         */
 962
 963         anchor = NULL;
 964         ptr += 6;
 965       }
 966       else if (!strncasecmp(ptr, "A NAME=", 7))
 967       {
 968        /*
 969         * Found an anchor...
 970         */
 971
 972         ptr += 7;
 973
 974         if (*ptr == '\"' || *ptr == '\'')
 975         {
 976          /*
 977           * Get quoted anchor...
 978           */
 979
 980           quote  = *ptr;
 981           anchor = ptr + 1;
 982           if ((ptr = strchr(anchor, quote)) != NULL)
 983             *ptr++ = '\0';
 984           else
 985             break;
 986         }
 987         else
 988         {
 989          /*
 990           * Get unquoted anchor...
 991           */
 992
 993           anchor = ptr + 1;
 994
 995           for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
 996
 997           if (*ptr)
 998             *ptr++ = '\0';
 999           else
1000             break;
1001         }
1002
1003        /*
1004         * Got the anchor, now lets find the end...
1005         */
1006
1007         while (*ptr && *ptr != '>')
1008           ptr ++;
1009
1010         if (*ptr != '>')
1011           break;
1012
1013         ptr ++;
1014       }
1015       else
1016         continue;
1017
1018      /*
1019       * Now collect text for the link...
1020       */
1021
1022       text = ptr;
1023       while ((ptr = strchr(text, '<')) == NULL)
1024       {
1025         ptr = text + strlen(text);
1026         if (ptr >= (line + sizeof(line) - 2))
1027           break;
1028
1029         *ptr++ = ' ';
1030
1031         if (!cupsFileGets(fp, ptr, sizeof(line) - (ptr - line) - 1))
1032           break;
1033       }
1034
1035       *ptr = '\0';
1036
1037       if (node)
1038         node->length = offset - node->offset;
1039
1040       if (!*text)
1041       {
1042         node = NULL;
1043         break;
1044       }
1045
1046       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1047       {
1048        /*
1049         * Node already in the index, so replace the text and other
1050         * data...
1051         */
1052
1053         cupsArrayRemove(hi->nodes, node);
1054
1055         if (node->section)
1056           free(node->section);
1057
1058         if (node->text)
1059           free(node->text);
1060
1061         if (node->words)
1062         {
1063           for (word = (help_word_t *)cupsArrayFirst(node->words);
1064                word;
1065                word = (help_word_t *)cupsArrayNext(node->words))
1066             help_delete_word(word);
1067
1068           cupsArrayDelete(node->words);
1069           node->words = NULL;
1070         }
1071
1072         node->section = section[0] ? strdup(section) : NULL;
1073         node->text    = strdup(text);
1074         node->mtime   = mtime;
1075         node->offset  = offset;
1076         node->score   = 0;
1077       }
1078       else
1079       {
1080        /*
1081         * New node...
1082         */
1083
1084         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1085       }
1086
1087      /*
1088       * Go through the text value and replace tabs and newlines with
1089       * whitespace and eliminate extra whitespace...
1090       */
1091
1092       for (ptr = node->text, text = node->text; *ptr;)
1093         if (isspace(*ptr & 255))
1094         {
1095           while (isspace(*ptr & 255))
1096             ptr ++;
1097
1098           *text++ = ' ';
1099         }
1100         else if (text != ptr)
1101           *text++ = *ptr++;
1102         else
1103         {
1104           text ++;
1105           ptr ++;
1106         }
1107
1108       *text = '\0';
1109
1110      /*
1111       * (Re)add the node to the array...
1112       */
1113
1114       cupsArrayAdd(hi->nodes, node);
1115
1116       if (!anchor)
1117         node = NULL;
1118       break;
1119     }
1120
1121     if (node)
1122     {
1123      /*
1124       * Scan this line for words...
1125       */
1126
1127       for (ptr = line; *ptr; ptr ++)
1128       {
1129        /*
1130         * Skip HTML stuff...
1131         */
1132
1133         if (*ptr == '<')
1134         {
1135           if (!strncmp(ptr, "<!--", 4))
1136           {
1137            /*
1138             * Skip HTML comment...
1139             */
1140
1141             if ((text = strstr(ptr + 4, "-->")) == NULL)
1142               ptr += strlen(ptr) - 1;
1143             else
1144               ptr = text + 2;
1145           }
1146           else
1147           {
1148            /*
1149             * Skip HTML element...
1150             */
1151
1152             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1153               if (*ptr == '\"' || *ptr == '\'')
1154               {
1155                 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1156
1157                 if (!*ptr)
1158                   ptr --;
1159               }
1160
1161             if (!*ptr)
1162               ptr --;
1163           }
1164
1165           continue;
1166         }
1167         else if (*ptr == '&')
1168         {
1169          /*
1170           * Skip HTML entity...
1171           */
1172
1173           for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1174
1175           if (!*ptr)
1176             ptr --;
1177
1178           continue;
1179         }
1180         else if (!isalnum(*ptr & 255))
1181           continue;
1182
1183        /*
1184         * Found the start of a word, search until we find the end...
1185         */
1186
1187         for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1188
1189         wordlen = ptr - text;
1190
1191         if (*ptr)
1192           *ptr = '\0';
1193         else
1194           ptr --;
1195
1196         if (wordlen > 1 && !bsearch(text, help_common_words,
1197                                     (sizeof(help_common_words) /
1198                                      sizeof(help_common_words[0])),
1199                                     sizeof(help_common_words[0]),
1200                                     (int (*)(const void *, const void *))
1201                                         strcasecmp))
1202           help_add_word(node, text);
1203       }
1204     }
1205
1206    /*
1207     * Get the offset of the next line...
1208     */
1209
1210     offset = cupsFileTell(fp);
1211   }
1212
1213   cupsFileClose(fp);
1214
1215   if (node)
1216     node->length = offset - node->offset;
1217
1218   return (0);
1219 }
1220
1221
1222 /*
1223  * 'help_new_node()' - Create a new node and add it to an index.
1224  */
1225
1226 static help_node_t *                    /* O - Node pointer or NULL on error */
1227 help_new_node(const char   *filename,   /* I - Filename */
1228               const char   *anchor,     /* I - Anchor */
1229               const char   *section,    /* I - Section */
1230               const char   *text,       /* I - Text */
1231               time_t       mtime,       /* I - Modification time */
1232               off_t        offset,      /* I - Offset in file */
1233               size_t       length)      /* I - Length in bytes */
1234 {
1235   help_node_t   *n;                     /* Node */
1236
1237
1238   DEBUG_printf(("help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1239                 "mtime=%ld, offset=%ld, length=%ld)\n",
1240                 filename ? filename : "(nil)", anchor ? anchor : "(nil)",
1241                 text ? text : "(nil)", (long)mtime, (long)offset,
1242                 (long)length));
1243
1244   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1245   if (!n)
1246     return (NULL);
1247
1248   n->filename = strdup(filename);
1249   n->anchor   = anchor ? strdup(anchor) : NULL;
1250   n->section  = (section && *section) ? strdup(section) : NULL;
1251   n->text     = strdup(text);
1252   n->mtime    = mtime;
1253   n->offset   = offset;
1254   n->length   = length;
1255
1256   return (n);
1257 }
1258
1259
1260 /*
1261  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1262  */
1263
1264 static int                              /* O - Difference */
1265 help_sort_by_name(help_node_t *n1,      /* I - First node */
1266                   help_node_t *n2)      /* I - Second node */
1267 {
1268   int           diff;                   /* Difference */
1269
1270
1271   DEBUG_printf(("help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)\n",
1272                 n1, n1->filename, n1->anchor ? n1->anchor : "",
1273                 n2, n2->filename, n2->anchor ? n2->anchor : ""));
1274
1275   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1276     return (diff);
1277
1278   if (!n1->anchor && !n2->anchor)
1279     return (0);
1280   else if (!n1->anchor)
1281     return (-1);
1282   else if (!n2->anchor)
1283     return (1);
1284   else
1285     return (strcmp(n1->anchor, n2->anchor));
1286 }
1287
1288
1289 /*
1290  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1291  */
1292
1293 static int                              /* O - Difference */
1294 help_sort_by_score(help_node_t *n1,     /* I - First node */
1295                    help_node_t *n2)     /* I - Second node */
1296 {
1297   int           diff;                   /* Difference */
1298
1299
1300   DEBUG_printf(("help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1301                 "n2=%p(%d \"%s\" \"%s\")\n",
1302                 n1, n1->score, n1->section ? n1->section : "", n1->text,
1303                 n2, n2->score, n2->section ? n2->section : "", n2->text));
1304
1305   if (n1->score != n2->score)
1306     return (n1->score - n2->score);
1307
1308   if (n1->section && !n2->section)
1309     return (1);
1310   else if (!n1->section && n2->section)
1311     return (-1);
1312   else if (n1->section && n2->section &&
1313            (diff = strcmp(n1->section, n2->section)) != 0)
1314     return (diff);
1315
1316   return (strcasecmp(n1->text, n2->text));
1317 }
1318
1319
1320 /*
1321  * 'help_sort_words()' - Sort words alphabetically.
1322  */
1323
1324 static int                              /* O - Difference */
1325 help_sort_words(help_word_t *w1,        /* I - Second word */
1326                 help_word_t *w2)        /* I - Second word */
1327 {
1328   DEBUG_printf(("help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))\n",
1329                 w1, w1->text, w2, w2->text));
1330
1331   return (strcasecmp(w1->text, w2->text));
1332 }
1333
1334
1335 /*
1336  * End of "$Id: help-index.c 6258 2007-02-11 01:16:31Z mike $".
1337  */