]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
f2d18633 | 2 | * "$Id$" |
ef416fc2 | 3 | * |
7e86f2f6 | 4 | * Online help index routines for CUPS. |
ef416fc2 | 5 | * |
7e86f2f6 MS |
6 | * Copyright 2007-2014 by Apple Inc. |
7 | * Copyright 1997-2007 by Easy Software Products. | |
ef416fc2 | 8 | * |
7e86f2f6 MS |
9 | * These coded instructions, statements, and computer programs are the |
10 | * property of Apple Inc. and are protected by Federal copyright | |
11 | * law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | * which should have been included with this file. If this file is | |
13 | * file is missing or damaged, see the license at "http://www.cups.org/". | |
ef416fc2 | 14 | */ |
15 | ||
16 | /* | |
17 | * Include necessary headers... | |
18 | */ | |
19 | ||
20 | #include "cgi-private.h" | |
21 | #include <cups/dir.h> | |
22 | ||
23 | ||
f7deaa1a | 24 | /* |
25 | * List of common English words that should not be indexed... | |
26 | */ | |
27 | ||
28 | static char help_common_words[][6] = | |
29 | { | |
30 | "about", | |
31 | "all", | |
32 | "an", | |
33 | "and", | |
34 | "are", | |
35 | "as", | |
36 | "at", | |
37 | "be", | |
38 | "been", | |
39 | "but", | |
40 | "by", | |
41 | "call", | |
42 | "can", | |
43 | "come", | |
44 | "could", | |
45 | "day", | |
46 | "did", | |
47 | "do", | |
48 | "down", | |
49 | "each", | |
50 | "find", | |
51 | "first", | |
52 | "for", | |
53 | "from", | |
54 | "go", | |
55 | "had", | |
56 | "has", | |
57 | "have", | |
58 | "he", | |
59 | "her", | |
60 | "him", | |
61 | "his", | |
62 | "hot", | |
63 | "how", | |
64 | "if", | |
65 | "in", | |
66 | "is", | |
67 | "it", | |
68 | "know", | |
69 | "like", | |
70 | "long", | |
71 | "look", | |
72 | "make", | |
73 | "many", | |
74 | "may", | |
75 | "more", | |
76 | "most", | |
77 | "my", | |
78 | "no", | |
79 | "now", | |
80 | "of", | |
81 | "on", | |
82 | "one", | |
83 | "or", | |
84 | "other", | |
85 | "out", | |
86 | "over", | |
87 | "said", | |
88 | "see", | |
89 | "she", | |
90 | "side", | |
91 | "so", | |
92 | "some", | |
93 | "sound", | |
94 | "than", | |
95 | "that", | |
96 | "the", | |
97 | "their", | |
98 | "them", | |
99 | "then", | |
100 | "there", | |
101 | "these", | |
102 | "they", | |
103 | "thing", | |
104 | "this", | |
105 | "time", | |
106 | "to", | |
107 | "two", | |
108 | "up", | |
109 | "use", | |
110 | "was", | |
111 | "water", | |
112 | "way", | |
113 | "we", | |
114 | "were", | |
115 | "what", | |
116 | "when", | |
117 | "which", | |
118 | "who", | |
119 | "will", | |
120 | "with", | |
121 | "word", | |
122 | "would", | |
123 | "write", | |
124 | "you", | |
125 | "your" | |
126 | }; | |
127 | ||
128 | ||
ef416fc2 | 129 | /* |
130 | * Local functions... | |
131 | */ | |
132 | ||
f7deaa1a | 133 | static help_word_t *help_add_word(help_node_t *n, const char *text); |
ef416fc2 | 134 | static void help_delete_node(help_node_t *n); |
f7deaa1a | 135 | static void help_delete_word(help_word_t *w); |
ef416fc2 | 136 | static int help_load_directory(help_index_t *hi, |
137 | const char *directory, | |
138 | const char *relative); | |
139 | static int help_load_file(help_index_t *hi, | |
140 | const char *filename, | |
141 | const char *relative, | |
142 | time_t mtime); | |
143 | static help_node_t *help_new_node(const char *filename, const char *anchor, | |
144 | const char *section, const char *text, | |
145 | time_t mtime, off_t offset, | |
85dda01c MS |
146 | size_t length) |
147 | __attribute__((nonnull(1,3,4))); | |
ecdc0628 | 148 | static int help_sort_by_name(help_node_t *p1, help_node_t *p2); |
149 | static int help_sort_by_score(help_node_t *p1, help_node_t *p2); | |
f7deaa1a | 150 | static int help_sort_words(help_word_t *w1, help_word_t *w2); |
ef416fc2 | 151 | |
152 | ||
153 | /* | |
154 | * 'helpDeleteIndex()' - Delete an index, freeing all memory used. | |
155 | */ | |
156 | ||
157 | void | |
ecdc0628 | 158 | helpDeleteIndex(help_index_t *hi) /* I - Help index */ |
ef416fc2 | 159 | { |
ecdc0628 | 160 | help_node_t *node; /* Current node */ |
ef416fc2 | 161 | |
162 | ||
85dda01c | 163 | DEBUG_printf(("helpDeleteIndex(hi=%p)", hi)); |
ef416fc2 | 164 | |
165 | if (!hi) | |
166 | return; | |
167 | ||
ecdc0628 | 168 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
169 | node; | |
170 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 171 | { |
ecdc0628 | 172 | if (!hi->search) |
173 | help_delete_node(node); | |
174 | } | |
ef416fc2 | 175 | |
ecdc0628 | 176 | cupsArrayDelete(hi->nodes); |
177 | cupsArrayDelete(hi->sorted); | |
ef416fc2 | 178 | |
179 | free(hi); | |
180 | } | |
181 | ||
182 | ||
183 | /* | |
184 | * 'helpFindNode()' - Find a node in an index. | |
185 | */ | |
186 | ||
ecdc0628 | 187 | help_node_t * /* O - Node pointer or NULL */ |
ef416fc2 | 188 | helpFindNode(help_index_t *hi, /* I - Index */ |
189 | const char *filename, /* I - Filename */ | |
190 | const char *anchor) /* I - Anchor */ | |
191 | { | |
ecdc0628 | 192 | help_node_t key; /* Search key */ |
ef416fc2 | 193 | |
194 | ||
85dda01c MS |
195 | DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")", |
196 | hi, filename, anchor)); | |
ef416fc2 | 197 | |
198 | /* | |
199 | * Range check input... | |
200 | */ | |
201 | ||
202 | if (!hi || !filename) | |
203 | return (NULL); | |
204 | ||
205 | /* | |
206 | * Initialize the search key... | |
207 | */ | |
208 | ||
209 | key.filename = (char *)filename; | |
210 | key.anchor = (char *)anchor; | |
ef416fc2 | 211 | |
212 | /* | |
213 | * Return any match... | |
214 | */ | |
215 | ||
ecdc0628 | 216 | return ((help_node_t *)cupsArrayFind(hi->nodes, &key)); |
ef416fc2 | 217 | } |
218 | ||
219 | ||
220 | /* | |
221 | * 'helpLoadIndex()' - Load a help index from disk. | |
222 | */ | |
223 | ||
224 | help_index_t * /* O - Index pointer or NULL */ | |
225 | helpLoadIndex(const char *hifile, /* I - Index filename */ | |
226 | const char *directory) /* I - Directory that is indexed */ | |
227 | { | |
228 | help_index_t *hi; /* Help index */ | |
229 | cups_file_t *fp; /* Current file */ | |
230 | char line[2048], /* Line from file */ | |
231 | *ptr, /* Pointer into line */ | |
232 | *filename, /* Filename in line */ | |
233 | *anchor, /* Anchor in line */ | |
234 | *sectptr, /* Section pointer in line */ | |
235 | section[1024], /* Section name */ | |
236 | *text; /* Text in line */ | |
237 | time_t mtime; /* Modification time */ | |
238 | off_t offset; /* Offset into file */ | |
239 | size_t length; /* Length in bytes */ | |
240 | int update; /* Update? */ | |
ef416fc2 | 241 | help_node_t *node; /* Current node */ |
f7deaa1a | 242 | help_word_t *word; /* Current word */ |
ef416fc2 | 243 | |
244 | ||
85dda01c | 245 | DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")", |
ef416fc2 | 246 | hifile, directory)); |
247 | ||
248 | /* | |
249 | * Create a new, empty index. | |
250 | */ | |
251 | ||
ecdc0628 | 252 | if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL) |
253 | return (NULL); | |
254 | ||
255 | hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); | |
256 | hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
257 | ||
258 | if (!hi->nodes || !hi->sorted) | |
259 | { | |
260 | cupsArrayDelete(hi->nodes); | |
261 | cupsArrayDelete(hi->sorted); | |
262 | free(hi); | |
263 | return (NULL); | |
264 | } | |
ef416fc2 | 265 | |
266 | /* | |
267 | * Try loading the existing index file... | |
268 | */ | |
269 | ||
270 | if ((fp = cupsFileOpen(hifile, "r")) != NULL) | |
271 | { | |
272 | /* | |
273 | * Lock the file and then read the first line... | |
274 | */ | |
275 | ||
276 | cupsFileLock(fp, 1); | |
277 | ||
f7deaa1a | 278 | if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2")) |
ef416fc2 | 279 | { |
280 | /* | |
281 | * Got a valid header line, now read the data lines... | |
282 | */ | |
283 | ||
f7deaa1a | 284 | node = NULL; |
285 | ||
ef416fc2 | 286 | while (cupsFileGets(fp, line, sizeof(line))) |
287 | { | |
288 | /* | |
289 | * Each line looks like one of the following: | |
290 | * | |
291 | * filename mtime offset length "section" "text" | |
292 | * filename#anchor offset length "text" | |
f7deaa1a | 293 | * SP count word |
ef416fc2 | 294 | */ |
295 | ||
f7deaa1a | 296 | if (line[0] == ' ') |
ef416fc2 | 297 | { |
f7deaa1a | 298 | /* |
299 | * Read a word in the current node... | |
300 | */ | |
ef416fc2 | 301 | |
f7deaa1a | 302 | if (!node || (ptr = strrchr(line, ' ')) == NULL) |
303 | continue; | |
ef416fc2 | 304 | |
f7deaa1a | 305 | if ((word = help_add_word(node, ptr + 1)) != NULL) |
306 | word->count = atoi(line + 1); | |
307 | } | |
308 | else | |
ef416fc2 | 309 | { |
310 | /* | |
f7deaa1a | 311 | * Add a node... |
ef416fc2 | 312 | */ |
313 | ||
f7deaa1a | 314 | filename = line; |
ef416fc2 | 315 | |
f7deaa1a | 316 | if ((ptr = strchr(line, ' ')) == NULL) |
317 | break; | |
ef416fc2 | 318 | |
f7deaa1a | 319 | while (isspace(*ptr & 255)) |
320 | *ptr++ = '\0'; | |
ef416fc2 | 321 | |
f7deaa1a | 322 | if ((anchor = strrchr(filename, '#')) != NULL) |
323 | { | |
324 | *anchor++ = '\0'; | |
325 | mtime = 0; | |
326 | } | |
327 | else | |
328 | mtime = strtol(ptr, &ptr, 10); | |
ef416fc2 | 329 | |
f7deaa1a | 330 | offset = strtoll(ptr, &ptr, 10); |
7e86f2f6 | 331 | length = (size_t)strtoll(ptr, &ptr, 10); |
ef416fc2 | 332 | |
333 | while (isspace(*ptr & 255)) | |
334 | ptr ++; | |
ef416fc2 | 335 | |
f7deaa1a | 336 | if (!anchor) |
337 | { | |
338 | /* | |
339 | * Get section... | |
340 | */ | |
ef416fc2 | 341 | |
f7deaa1a | 342 | if (*ptr != '\"') |
343 | break; | |
ef416fc2 | 344 | |
f7deaa1a | 345 | ptr ++; |
346 | sectptr = ptr; | |
ef416fc2 | 347 | |
f7deaa1a | 348 | while (*ptr && *ptr != '\"') |
349 | ptr ++; | |
350 | ||
351 | if (*ptr != '\"') | |
352 | break; | |
ef416fc2 | 353 | |
f7deaa1a | 354 | *ptr++ = '\0'; |
ef416fc2 | 355 | |
f7deaa1a | 356 | strlcpy(section, sectptr, sizeof(section)); |
ef416fc2 | 357 | |
f7deaa1a | 358 | while (isspace(*ptr & 255)) |
359 | ptr ++; | |
360 | } | |
ecdc0628 | 361 | |
f7deaa1a | 362 | if (*ptr != '\"') |
363 | break; | |
364 | ||
365 | ptr ++; | |
366 | text = ptr; | |
367 | ||
368 | while (*ptr && *ptr != '\"') | |
369 | ptr ++; | |
370 | ||
371 | if (*ptr != '\"') | |
372 | break; | |
373 | ||
374 | *ptr++ = '\0'; | |
375 | ||
376 | if ((node = help_new_node(filename, anchor, section, text, | |
377 | mtime, offset, length)) == NULL) | |
378 | break; | |
379 | ||
380 | node->score = -1; | |
381 | ||
382 | cupsArrayAdd(hi->nodes, node); | |
383 | } | |
ef416fc2 | 384 | } |
385 | } | |
386 | ||
387 | cupsFileClose(fp); | |
388 | } | |
389 | ||
390 | /* | |
391 | * Scan for new/updated files... | |
392 | */ | |
393 | ||
394 | update = help_load_directory(hi, directory, NULL); | |
395 | ||
396 | /* | |
397 | * Remove any files that are no longer installed... | |
398 | */ | |
399 | ||
ecdc0628 | 400 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
401 | node; | |
402 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
403 | if (node->score < 0) | |
ef416fc2 | 404 | { |
405 | /* | |
406 | * Delete this node... | |
407 | */ | |
408 | ||
ecdc0628 | 409 | cupsArrayRemove(hi->nodes, node); |
410 | help_delete_node(node); | |
ef416fc2 | 411 | } |
ef416fc2 | 412 | |
413 | /* | |
ecdc0628 | 414 | * Add nodes to the sorted array... |
ef416fc2 | 415 | */ |
416 | ||
ecdc0628 | 417 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
418 | node; | |
419 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
420 | cupsArrayAdd(hi->sorted, node); | |
ef416fc2 | 421 | |
422 | /* | |
ecdc0628 | 423 | * Save the index if we updated it... |
ef416fc2 | 424 | */ |
425 | ||
ecdc0628 | 426 | if (update) |
427 | helpSaveIndex(hi, hifile); | |
ef416fc2 | 428 | |
429 | /* | |
430 | * Return the index... | |
431 | */ | |
432 | ||
433 | return (hi); | |
434 | } | |
435 | ||
436 | ||
437 | /* | |
438 | * 'helpSaveIndex()' - Save a help index to disk. | |
439 | */ | |
440 | ||
441 | int /* O - 0 on success, -1 on error */ | |
442 | helpSaveIndex(help_index_t *hi, /* I - Index */ | |
443 | const char *hifile) /* I - Index filename */ | |
444 | { | |
445 | cups_file_t *fp; /* Index file */ | |
ef416fc2 | 446 | help_node_t *node; /* Current node */ |
f7deaa1a | 447 | help_word_t *word; /* Current word */ |
ef416fc2 | 448 | |
449 | ||
85dda01c | 450 | DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile)); |
ef416fc2 | 451 | |
452 | /* | |
453 | * Try creating a new index file... | |
454 | */ | |
455 | ||
456 | if ((fp = cupsFileOpen(hifile, "w9")) == NULL) | |
457 | return (-1); | |
458 | ||
459 | /* | |
460 | * Lock the file while we write it... | |
461 | */ | |
462 | ||
463 | cupsFileLock(fp, 1); | |
464 | ||
f7deaa1a | 465 | cupsFilePuts(fp, "HELPV2\n"); |
ef416fc2 | 466 | |
ecdc0628 | 467 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
468 | node; | |
469 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 470 | { |
471 | /* | |
472 | * Write the current node with/without the anchor... | |
473 | */ | |
474 | ||
ef416fc2 | 475 | if (node->anchor) |
476 | { | |
477 | if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n", | |
478 | node->filename, node->anchor, | |
479 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, | |
480 | node->text) < 0) | |
481 | break; | |
482 | } | |
483 | else | |
484 | { | |
485 | if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n", | |
ae71f5de | 486 | node->filename, (int)node->mtime, |
ef416fc2 | 487 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, |
488 | node->section ? node->section : "", node->text) < 0) | |
489 | break; | |
490 | } | |
f7deaa1a | 491 | |
492 | /* | |
493 | * Then write the words associated with the node... | |
494 | */ | |
495 | ||
496 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
497 | word; | |
498 | word = (help_word_t *)cupsArrayNext(node->words)) | |
499 | if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0) | |
500 | break; | |
ef416fc2 | 501 | } |
502 | ||
ecdc0628 | 503 | cupsFileFlush(fp); |
504 | ||
ef416fc2 | 505 | if (cupsFileClose(fp) < 0) |
506 | return (-1); | |
ecdc0628 | 507 | else if (node) |
ef416fc2 | 508 | return (-1); |
509 | else | |
510 | return (0); | |
511 | } | |
512 | ||
513 | ||
514 | /* | |
515 | * 'helpSearchIndex()' - Search an index. | |
516 | */ | |
517 | ||
518 | help_index_t * /* O - Search index */ | |
519 | helpSearchIndex(help_index_t *hi, /* I - Index */ | |
520 | const char *query, /* I - Query string */ | |
521 | const char *section, /* I - Limit search to this section */ | |
522 | const char *filename) /* I - Limit search to this file */ | |
523 | { | |
ef416fc2 | 524 | help_index_t *search; /* Search index */ |
ecdc0628 | 525 | help_node_t *node; /* Current node */ |
f7deaa1a | 526 | help_word_t *word; /* Current word */ |
ef416fc2 | 527 | void *sc; /* Search context */ |
528 | int matches; /* Number of matches */ | |
529 | ||
530 | ||
85dda01c MS |
531 | DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")", |
532 | hi, query, filename)); | |
ef416fc2 | 533 | |
534 | /* | |
535 | * Range check... | |
536 | */ | |
537 | ||
538 | if (!hi || !query) | |
539 | return (NULL); | |
540 | ||
ecdc0628 | 541 | /* |
542 | * Reset the scores of all nodes to 0... | |
543 | */ | |
544 | ||
545 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); | |
546 | node; | |
547 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
548 | node->score = 0; | |
549 | ||
550 | /* | |
551 | * Find the first node to search in... | |
552 | */ | |
ef416fc2 | 553 | |
554 | if (filename) | |
555 | { | |
ecdc0628 | 556 | node = helpFindNode(hi, filename, NULL); |
557 | if (!node) | |
ef416fc2 | 558 | return (NULL); |
559 | } | |
560 | else | |
ecdc0628 | 561 | node = (help_node_t *)cupsArrayFirst(hi->nodes); |
ef416fc2 | 562 | |
563 | /* | |
564 | * Convert the query into a regular expression... | |
565 | */ | |
566 | ||
567 | sc = cgiCompileSearch(query); | |
568 | if (!sc) | |
569 | return (NULL); | |
570 | ||
571 | /* | |
572 | * Allocate a search index... | |
573 | */ | |
574 | ||
575 | search = calloc(1, sizeof(help_index_t)); | |
576 | if (!search) | |
577 | { | |
578 | cgiFreeSearch(sc); | |
579 | return (NULL); | |
580 | } | |
581 | ||
ecdc0628 | 582 | search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); |
583 | search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
321d8d57 | 584 | |
ecdc0628 | 585 | if (!search->nodes || !search->sorted) |
586 | { | |
587 | cupsArrayDelete(search->nodes); | |
588 | cupsArrayDelete(search->sorted); | |
589 | free(search); | |
590 | cgiFreeSearch(sc); | |
591 | return (NULL); | |
592 | } | |
593 | ||
ef416fc2 | 594 | search->search = 1; |
595 | ||
596 | /* | |
597 | * Check each node in the index, adding matching nodes to the | |
598 | * search index... | |
599 | */ | |
600 | ||
ecdc0628 | 601 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
602 | if (section && strcmp(node->section, section)) | |
ef416fc2 | 603 | continue; |
ecdc0628 | 604 | else if (filename && strcmp(node->filename, filename)) |
ef416fc2 | 605 | continue; |
f7deaa1a | 606 | else |
ef416fc2 | 607 | { |
f7deaa1a | 608 | matches = cgiDoSearch(sc, node->text); |
609 | ||
610 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
611 | word; | |
612 | word = (help_word_t *)cupsArrayNext(node->words)) | |
613 | if (cgiDoSearch(sc, word->text) > 0) | |
614 | matches += word->count; | |
ef416fc2 | 615 | |
f7deaa1a | 616 | if (matches > 0) |
617 | { | |
618 | /* | |
619 | * Found a match, add the node to the search index... | |
620 | */ | |
ef416fc2 | 621 | |
f7deaa1a | 622 | node->score = matches; |
623 | ||
321d8d57 MS |
624 | cupsArrayAdd(search->nodes, node); |
625 | cupsArrayAdd(search->sorted, node); | |
f7deaa1a | 626 | } |
ef416fc2 | 627 | } |
628 | ||
629 | /* | |
630 | * Free the search context... | |
631 | */ | |
632 | ||
633 | cgiFreeSearch(sc); | |
634 | ||
ef416fc2 | 635 | /* |
636 | * Return the results... | |
637 | */ | |
638 | ||
639 | return (search); | |
640 | } | |
641 | ||
642 | ||
f7deaa1a | 643 | /* |
644 | * 'help_add_word()' - Add a word to a node. | |
645 | */ | |
646 | ||
647 | static help_word_t * /* O - New word */ | |
648 | help_add_word(help_node_t *n, /* I - Node */ | |
649 | const char *text) /* I - Word text */ | |
650 | { | |
651 | help_word_t *w, /* New word */ | |
652 | key; /* Search key */ | |
653 | ||
654 | ||
85dda01c | 655 | DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text)); |
f7deaa1a | 656 | |
657 | /* | |
658 | * Create the words array as needed... | |
659 | */ | |
660 | ||
661 | if (!n->words) | |
662 | n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL); | |
663 | ||
664 | /* | |
665 | * See if the word is already added... | |
666 | */ | |
667 | ||
668 | key.text = (char *)text; | |
669 | ||
670 | if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL) | |
671 | { | |
672 | /* | |
673 | * Create a new word... | |
674 | */ | |
675 | ||
676 | if ((w = calloc(1, sizeof(help_word_t))) == NULL) | |
677 | return (NULL); | |
678 | ||
679 | if ((w->text = strdup(text)) == NULL) | |
680 | { | |
681 | free(w); | |
682 | return (NULL); | |
683 | } | |
684 | ||
685 | cupsArrayAdd(n->words, w); | |
686 | } | |
687 | ||
688 | /* | |
689 | * Bump the counter for this word and return it... | |
690 | */ | |
691 | ||
692 | w->count ++; | |
693 | ||
694 | return (w); | |
695 | } | |
696 | ||
697 | ||
ef416fc2 | 698 | /* |
699 | * 'help_delete_node()' - Free all memory used by a node. | |
700 | */ | |
701 | ||
702 | static void | |
703 | help_delete_node(help_node_t *n) /* I - Node */ | |
704 | { | |
f7deaa1a | 705 | help_word_t *w; /* Current word */ |
706 | ||
707 | ||
85dda01c | 708 | DEBUG_printf(("2help_delete_node(n=%p)", n)); |
ef416fc2 | 709 | |
710 | if (!n) | |
711 | return; | |
712 | ||
713 | if (n->filename) | |
714 | free(n->filename); | |
715 | ||
716 | if (n->anchor) | |
717 | free(n->anchor); | |
718 | ||
719 | if (n->section) | |
720 | free(n->section); | |
721 | ||
722 | if (n->text) | |
723 | free(n->text); | |
724 | ||
f7deaa1a | 725 | for (w = (help_word_t *)cupsArrayFirst(n->words); |
726 | w; | |
727 | w = (help_word_t *)cupsArrayNext(n->words)) | |
728 | help_delete_word(w); | |
729 | ||
730 | cupsArrayDelete(n->words); | |
731 | ||
ef416fc2 | 732 | free(n); |
733 | } | |
734 | ||
735 | ||
f7deaa1a | 736 | /* |
737 | * 'help_delete_word()' - Free all memory used by a word. | |
738 | */ | |
739 | ||
740 | static void | |
741 | help_delete_word(help_word_t *w) /* I - Word */ | |
742 | { | |
85dda01c | 743 | DEBUG_printf(("2help_delete_word(w=%p)", w)); |
f7deaa1a | 744 | |
745 | if (!w) | |
746 | return; | |
747 | ||
748 | if (w->text) | |
749 | free(w->text); | |
750 | ||
751 | free(w); | |
752 | } | |
753 | ||
754 | ||
ef416fc2 | 755 | /* |
756 | * 'help_load_directory()' - Load a directory of files into an index. | |
757 | */ | |
758 | ||
759 | static int /* O - 0 = success, -1 = error, 1 = updated */ | |
760 | help_load_directory( | |
761 | help_index_t *hi, /* I - Index */ | |
762 | const char *directory, /* I - Directory */ | |
763 | const char *relative) /* I - Relative path */ | |
764 | { | |
ef416fc2 | 765 | cups_dir_t *dir; /* Directory file */ |
766 | cups_dentry_t *dent; /* Directory entry */ | |
767 | char *ext, /* Pointer to extension */ | |
768 | filename[1024], /* Full filename */ | |
769 | relname[1024]; /* Relative filename */ | |
770 | int update; /* Updated? */ | |
ecdc0628 | 771 | help_node_t *node; /* Current node */ |
ef416fc2 | 772 | |
773 | ||
85dda01c MS |
774 | DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")", |
775 | hi, directory, relative)); | |
ef416fc2 | 776 | |
777 | /* | |
778 | * Open the directory and scan it... | |
779 | */ | |
780 | ||
781 | if ((dir = cupsDirOpen(directory)) == NULL) | |
782 | return (0); | |
783 | ||
784 | update = 0; | |
785 | ||
786 | while ((dent = cupsDirRead(dir)) != NULL) | |
787 | { | |
ecdc0628 | 788 | /* |
789 | * Skip "." files... | |
790 | */ | |
791 | ||
792 | if (dent->filename[0] == '.') | |
793 | continue; | |
794 | ||
ef416fc2 | 795 | /* |
796 | * Get absolute and relative filenames... | |
797 | */ | |
798 | ||
799 | snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename); | |
800 | if (relative) | |
801 | snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename); | |
802 | else | |
803 | strlcpy(relname, dent->filename, sizeof(relname)); | |
804 | ||
805 | /* | |
806 | * Check if we have a HTML file... | |
807 | */ | |
808 | ||
809 | if ((ext = strstr(dent->filename, ".html")) != NULL && | |
810 | (!ext[5] || !strcmp(ext + 5, ".gz"))) | |
811 | { | |
812 | /* | |
813 | * HTML file, see if we have already indexed the file... | |
814 | */ | |
815 | ||
816 | if ((node = helpFindNode(hi, relname, NULL)) != NULL) | |
817 | { | |
818 | /* | |
819 | * File already indexed - check dates to confirm that the | |
820 | * index is up-to-date... | |
821 | */ | |
822 | ||
ecdc0628 | 823 | if (node->mtime == dent->fileinfo.st_mtime) |
ef416fc2 | 824 | { |
825 | /* | |
826 | * Same modification time, so mark all of the nodes | |
827 | * for this file as up-to-date... | |
828 | */ | |
829 | ||
ecdc0628 | 830 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
831 | if (!strcmp(node->filename, relname)) | |
832 | node->score = 0; | |
ef416fc2 | 833 | else |
834 | break; | |
835 | ||
836 | continue; | |
837 | } | |
838 | } | |
839 | ||
840 | update = 1; | |
841 | ||
842 | help_load_file(hi, filename, relname, dent->fileinfo.st_mtime); | |
843 | } | |
844 | else if (S_ISDIR(dent->fileinfo.st_mode)) | |
845 | { | |
846 | /* | |
847 | * Process sub-directory... | |
848 | */ | |
849 | ||
850 | if (help_load_directory(hi, filename, relname) == 1) | |
851 | update = 1; | |
852 | } | |
853 | } | |
854 | ||
855 | cupsDirClose(dir); | |
856 | ||
857 | return (update); | |
858 | } | |
859 | ||
860 | ||
861 | /* | |
862 | * 'help_load_file()' - Load a HTML files into an index. | |
863 | */ | |
864 | ||
865 | static int /* O - 0 = success, -1 = error */ | |
866 | help_load_file( | |
867 | help_index_t *hi, /* I - Index */ | |
868 | const char *filename, /* I - Filename */ | |
869 | const char *relative, /* I - Relative path */ | |
870 | time_t mtime) /* I - Modification time */ | |
871 | { | |
872 | cups_file_t *fp; /* HTML file */ | |
ecdc0628 | 873 | help_node_t *node; /* Current node */ |
ef416fc2 | 874 | char line[1024], /* Line from file */ |
f42414bf | 875 | temp[1024], /* Temporary word */ |
ef416fc2 | 876 | section[1024], /* Section */ |
877 | *ptr, /* Pointer into line */ | |
878 | *anchor, /* Anchor name */ | |
879 | *text; /* Text for anchor */ | |
880 | off_t offset; /* File offset */ | |
881 | char quote; /* Quote character */ | |
f7deaa1a | 882 | help_word_t *word; /* Current word */ |
883 | int wordlen; /* Length of word */ | |
ef416fc2 | 884 | |
885 | ||
85dda01c MS |
886 | DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", " |
887 | "mtime=%ld)", hi, filename, relative, mtime)); | |
ef416fc2 | 888 | |
889 | if ((fp = cupsFileOpen(filename, "r")) == NULL) | |
890 | return (-1); | |
891 | ||
892 | node = NULL; | |
893 | offset = 0; | |
894 | ||
5a9febac | 895 | strlcpy(section, "Other", sizeof(section)); |
ef416fc2 | 896 | |
897 | while (cupsFileGets(fp, line, sizeof(line))) | |
898 | { | |
899 | /* | |
900 | * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix... | |
901 | */ | |
902 | ||
88f9aafc | 903 | if (!_cups_strncasecmp(line, "<!-- SECTION:", 13)) |
ef416fc2 | 904 | { |
905 | /* | |
906 | * Got section line, copy it! | |
907 | */ | |
908 | ||
909 | for (ptr = line + 13; isspace(*ptr & 255); ptr ++); | |
910 | ||
911 | strlcpy(section, ptr, sizeof(section)); | |
912 | if ((ptr = strstr(section, "-->")) != NULL) | |
913 | { | |
914 | /* | |
915 | * Strip comment stuff from end of line... | |
916 | */ | |
917 | ||
918 | for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0'); | |
919 | ||
920 | if (isspace(*ptr & 255)) | |
921 | *ptr = '\0'; | |
922 | } | |
923 | continue; | |
924 | } | |
925 | ||
926 | for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;) | |
927 | { | |
928 | ptr ++; | |
929 | ||
88f9aafc | 930 | if (!_cups_strncasecmp(ptr, "TITLE>", 6)) |
ef416fc2 | 931 | { |
932 | /* | |
933 | * Found the title... | |
934 | */ | |
935 | ||
936 | anchor = NULL; | |
937 | ptr += 6; | |
938 | } | |
88f9aafc | 939 | else if (!_cups_strncasecmp(ptr, "A NAME=", 7)) |
ef416fc2 | 940 | { |
941 | /* | |
942 | * Found an anchor... | |
943 | */ | |
944 | ||
945 | ptr += 7; | |
946 | ||
947 | if (*ptr == '\"' || *ptr == '\'') | |
948 | { | |
949 | /* | |
950 | * Get quoted anchor... | |
951 | */ | |
952 | ||
953 | quote = *ptr; | |
954 | anchor = ptr + 1; | |
955 | if ((ptr = strchr(anchor, quote)) != NULL) | |
956 | *ptr++ = '\0'; | |
957 | else | |
958 | break; | |
959 | } | |
960 | else | |
961 | { | |
962 | /* | |
963 | * Get unquoted anchor... | |
964 | */ | |
965 | ||
966 | anchor = ptr + 1; | |
967 | ||
968 | for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++); | |
969 | ||
970 | if (*ptr) | |
971 | *ptr++ = '\0'; | |
972 | else | |
973 | break; | |
974 | } | |
975 | ||
976 | /* | |
977 | * Got the anchor, now lets find the end... | |
978 | */ | |
979 | ||
980 | while (*ptr && *ptr != '>') | |
981 | ptr ++; | |
982 | ||
983 | if (*ptr != '>') | |
984 | break; | |
985 | ||
986 | ptr ++; | |
987 | } | |
988 | else | |
989 | continue; | |
990 | ||
991 | /* | |
992 | * Now collect text for the link... | |
993 | */ | |
994 | ||
995 | text = ptr; | |
996 | while ((ptr = strchr(text, '<')) == NULL) | |
997 | { | |
998 | ptr = text + strlen(text); | |
999 | if (ptr >= (line + sizeof(line) - 2)) | |
1000 | break; | |
1001 | ||
1002 | *ptr++ = ' '; | |
1003 | ||
7e86f2f6 | 1004 | if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1)) |
ef416fc2 | 1005 | break; |
1006 | } | |
1007 | ||
1008 | *ptr = '\0'; | |
1009 | ||
1010 | if (node) | |
7e86f2f6 | 1011 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 1012 | |
1013 | if (!*text) | |
1014 | { | |
1015 | node = NULL; | |
1016 | break; | |
1017 | } | |
1018 | ||
ecdc0628 | 1019 | if ((node = helpFindNode(hi, relative, anchor)) != NULL) |
ef416fc2 | 1020 | { |
1021 | /* | |
1022 | * Node already in the index, so replace the text and other | |
1023 | * data... | |
1024 | */ | |
1025 | ||
ecdc0628 | 1026 | cupsArrayRemove(hi->nodes, node); |
ef416fc2 | 1027 | |
1028 | if (node->section) | |
1029 | free(node->section); | |
1030 | ||
1031 | if (node->text) | |
1032 | free(node->text); | |
1033 | ||
f7deaa1a | 1034 | if (node->words) |
1035 | { | |
1036 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
1037 | word; | |
1038 | word = (help_word_t *)cupsArrayNext(node->words)) | |
1039 | help_delete_word(word); | |
1040 | ||
1041 | cupsArrayDelete(node->words); | |
1042 | node->words = NULL; | |
1043 | } | |
1044 | ||
ef416fc2 | 1045 | node->section = section[0] ? strdup(section) : NULL; |
1046 | node->text = strdup(text); | |
1047 | node->mtime = mtime; | |
1048 | node->offset = offset; | |
1049 | node->score = 0; | |
1050 | } | |
1051 | else | |
1052 | { | |
1053 | /* | |
1054 | * New node... | |
1055 | */ | |
1056 | ||
1057 | node = help_new_node(relative, anchor, section, text, mtime, offset, 0); | |
ef416fc2 | 1058 | } |
1059 | ||
1060 | /* | |
1061 | * Go through the text value and replace tabs and newlines with | |
1062 | * whitespace and eliminate extra whitespace... | |
1063 | */ | |
1064 | ||
1065 | for (ptr = node->text, text = node->text; *ptr;) | |
1066 | if (isspace(*ptr & 255)) | |
1067 | { | |
1068 | while (isspace(*ptr & 255)) | |
ed486911 | 1069 | ptr ++; |
ef416fc2 | 1070 | |
1071 | *text++ = ' '; | |
1072 | } | |
1073 | else if (text != ptr) | |
1074 | *text++ = *ptr++; | |
1075 | else | |
1076 | { | |
1077 | text ++; | |
1078 | ptr ++; | |
1079 | } | |
1080 | ||
1081 | *text = '\0'; | |
1082 | ||
ecdc0628 | 1083 | /* |
1084 | * (Re)add the node to the array... | |
1085 | */ | |
1086 | ||
1087 | cupsArrayAdd(hi->nodes, node); | |
f7deaa1a | 1088 | |
1089 | if (!anchor) | |
1090 | node = NULL; | |
ef416fc2 | 1091 | break; |
1092 | } | |
1093 | ||
f7deaa1a | 1094 | if (node) |
1095 | { | |
1096 | /* | |
1097 | * Scan this line for words... | |
1098 | */ | |
1099 | ||
1100 | for (ptr = line; *ptr; ptr ++) | |
1101 | { | |
1102 | /* | |
1103 | * Skip HTML stuff... | |
1104 | */ | |
1105 | ||
1106 | if (*ptr == '<') | |
1107 | { | |
1108 | if (!strncmp(ptr, "<!--", 4)) | |
1109 | { | |
1110 | /* | |
1111 | * Skip HTML comment... | |
1112 | */ | |
1113 | ||
1114 | if ((text = strstr(ptr + 4, "-->")) == NULL) | |
1115 | ptr += strlen(ptr) - 1; | |
1116 | else | |
1117 | ptr = text + 2; | |
1118 | } | |
1119 | else | |
1120 | { | |
1121 | /* | |
1122 | * Skip HTML element... | |
1123 | */ | |
1124 | ||
1125 | for (ptr ++; *ptr && *ptr != '>'; ptr ++) | |
f42414bf | 1126 | { |
f7deaa1a | 1127 | if (*ptr == '\"' || *ptr == '\'') |
1128 | { | |
1129 | for (quote = *ptr++; *ptr && *ptr != quote; ptr ++); | |
1130 | ||
1131 | if (!*ptr) | |
1132 | ptr --; | |
1133 | } | |
f42414bf | 1134 | } |
f7deaa1a | 1135 | |
1136 | if (!*ptr) | |
1137 | ptr --; | |
1138 | } | |
1139 | ||
1140 | continue; | |
1141 | } | |
1142 | else if (*ptr == '&') | |
1143 | { | |
1144 | /* | |
1145 | * Skip HTML entity... | |
1146 | */ | |
1147 | ||
1148 | for (ptr ++; *ptr && *ptr != ';'; ptr ++); | |
1149 | ||
1150 | if (!*ptr) | |
1151 | ptr --; | |
1152 | ||
1153 | continue; | |
1154 | } | |
1155 | else if (!isalnum(*ptr & 255)) | |
1156 | continue; | |
1157 | ||
1158 | /* | |
1159 | * Found the start of a word, search until we find the end... | |
1160 | */ | |
1161 | ||
1162 | for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++); | |
1163 | ||
7e86f2f6 | 1164 | wordlen = (int)(ptr - text); |
f7deaa1a | 1165 | |
07623986 | 1166 | memcpy(temp, text, (size_t)wordlen); |
f42414bf | 1167 | temp[wordlen] = '\0'; |
1168 | ||
1169 | ptr --; | |
f7deaa1a | 1170 | |
f42414bf | 1171 | if (wordlen > 1 && !bsearch(temp, help_common_words, |
f7deaa1a | 1172 | (sizeof(help_common_words) / |
1173 | sizeof(help_common_words[0])), | |
1174 | sizeof(help_common_words[0]), | |
1175 | (int (*)(const void *, const void *)) | |
88f9aafc | 1176 | _cups_strcasecmp)) |
f42414bf | 1177 | help_add_word(node, temp); |
f7deaa1a | 1178 | } |
1179 | } | |
1180 | ||
ef416fc2 | 1181 | /* |
1182 | * Get the offset of the next line... | |
1183 | */ | |
1184 | ||
1185 | offset = cupsFileTell(fp); | |
1186 | } | |
1187 | ||
1188 | cupsFileClose(fp); | |
1189 | ||
1190 | if (node) | |
7e86f2f6 | 1191 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 1192 | |
1193 | return (0); | |
1194 | } | |
1195 | ||
1196 | ||
1197 | /* | |
1198 | * 'help_new_node()' - Create a new node and add it to an index. | |
1199 | */ | |
1200 | ||
1201 | static help_node_t * /* O - Node pointer or NULL on error */ | |
1202 | help_new_node(const char *filename, /* I - Filename */ | |
1203 | const char *anchor, /* I - Anchor */ | |
1204 | const char *section, /* I - Section */ | |
1205 | const char *text, /* I - Text */ | |
1206 | time_t mtime, /* I - Modification time */ | |
1207 | off_t offset, /* I - Offset in file */ | |
1208 | size_t length) /* I - Length in bytes */ | |
1209 | { | |
1210 | help_node_t *n; /* Node */ | |
1211 | ||
1212 | ||
85dda01c MS |
1213 | DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", " |
1214 | "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text, | |
1215 | (long)mtime, (long)offset, (long)length)); | |
ef416fc2 | 1216 | |
1217 | n = (help_node_t *)calloc(1, sizeof(help_node_t)); | |
1218 | if (!n) | |
1219 | return (NULL); | |
1220 | ||
1221 | n->filename = strdup(filename); | |
1222 | n->anchor = anchor ? strdup(anchor) : NULL; | |
1223 | n->section = (section && *section) ? strdup(section) : NULL; | |
1224 | n->text = strdup(text); | |
1225 | n->mtime = mtime; | |
1226 | n->offset = offset; | |
1227 | n->length = length; | |
1228 | ||
1229 | return (n); | |
1230 | } | |
1231 | ||
1232 | ||
1233 | /* | |
1234 | * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor. | |
1235 | */ | |
1236 | ||
1237 | static int /* O - Difference */ | |
ecdc0628 | 1238 | help_sort_by_name(help_node_t *n1, /* I - First node */ |
1239 | help_node_t *n2) /* I - Second node */ | |
ef416fc2 | 1240 | { |
ef416fc2 | 1241 | int diff; /* Difference */ |
1242 | ||
1243 | ||
85dda01c MS |
1244 | DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)", |
1245 | n1, n1->filename, n1->anchor, | |
1246 | n2, n2->filename, n2->anchor)); | |
ef416fc2 | 1247 | |
ecdc0628 | 1248 | if ((diff = strcmp(n1->filename, n2->filename)) != 0) |
ef416fc2 | 1249 | return (diff); |
1250 | ||
ecdc0628 | 1251 | if (!n1->anchor && !n2->anchor) |
ef416fc2 | 1252 | return (0); |
ecdc0628 | 1253 | else if (!n1->anchor) |
ef416fc2 | 1254 | return (-1); |
ecdc0628 | 1255 | else if (!n2->anchor) |
ef416fc2 | 1256 | return (1); |
1257 | else | |
ecdc0628 | 1258 | return (strcmp(n1->anchor, n2->anchor)); |
ef416fc2 | 1259 | } |
1260 | ||
1261 | ||
1262 | /* | |
1263 | * 'help_sort_nodes_by_score()' - Sort nodes by score and text. | |
1264 | */ | |
1265 | ||
1266 | static int /* O - Difference */ | |
ecdc0628 | 1267 | help_sort_by_score(help_node_t *n1, /* I - First node */ |
1268 | help_node_t *n2) /* I - Second node */ | |
ef416fc2 | 1269 | { |
ef416fc2 | 1270 | int diff; /* Difference */ |
1271 | ||
1272 | ||
85dda01c MS |
1273 | DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), " |
1274 | "n2=%p(%d \"%s\" \"%s\")", | |
1275 | n1, n1->score, n1->section, n1->text, | |
1276 | n2, n2->score, n2->section, n2->text)); | |
ef416fc2 | 1277 | |
ecdc0628 | 1278 | if (n1->score != n2->score) |
1f0275e3 | 1279 | return (n2->score - n1->score); |
ef416fc2 | 1280 | |
ecdc0628 | 1281 | if (n1->section && !n2->section) |
ef416fc2 | 1282 | return (1); |
ecdc0628 | 1283 | else if (!n1->section && n2->section) |
ef416fc2 | 1284 | return (-1); |
ecdc0628 | 1285 | else if (n1->section && n2->section && |
1286 | (diff = strcmp(n1->section, n2->section)) != 0) | |
ef416fc2 | 1287 | return (diff); |
1288 | ||
88f9aafc | 1289 | return (_cups_strcasecmp(n1->text, n2->text)); |
ef416fc2 | 1290 | } |
1291 | ||
1292 | ||
1293 | /* | |
f7deaa1a | 1294 | * 'help_sort_words()' - Sort words alphabetically. |
1295 | */ | |
1296 | ||
1297 | static int /* O - Difference */ | |
1298 | help_sort_words(help_word_t *w1, /* I - Second word */ | |
1299 | help_word_t *w2) /* I - Second word */ | |
1300 | { | |
85dda01c | 1301 | DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))", |
f7deaa1a | 1302 | w1, w1->text, w2, w2->text)); |
1303 | ||
88f9aafc | 1304 | return (_cups_strcasecmp(w1->text, w2->text)); |
f7deaa1a | 1305 | } |
1306 | ||
1307 | ||
1308 | /* | |
f2d18633 | 1309 | * End of "$Id$". |
ef416fc2 | 1310 | */ |