]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
7e86f2f6 | 2 | * Online help index routines for CUPS. |
ef416fc2 | 3 | * |
76b6aade | 4 | * Copyright © 2020-2024 by OpenPrinting. |
507c4adc MS |
5 | * Copyright © 2007-2019 by Apple Inc. |
6 | * Copyright © 1997-2007 by Easy Software Products. | |
ef416fc2 | 7 | * |
507c4adc MS |
8 | * Licensed under Apache License v2.0. See the file "LICENSE" for more |
9 | * information. | |
ef416fc2 | 10 | */ |
11 | ||
12 | /* | |
13 | * Include necessary headers... | |
14 | */ | |
15 | ||
16 | #include "cgi-private.h" | |
17 | #include <cups/dir.h> | |
18 | ||
19 | ||
f7deaa1a | 20 | /* |
21 | * List of common English words that should not be indexed... | |
22 | */ | |
23 | ||
24 | static char help_common_words[][6] = | |
25 | { | |
26 | "about", | |
27 | "all", | |
28 | "an", | |
29 | "and", | |
30 | "are", | |
31 | "as", | |
32 | "at", | |
33 | "be", | |
34 | "been", | |
35 | "but", | |
36 | "by", | |
37 | "call", | |
38 | "can", | |
39 | "come", | |
40 | "could", | |
41 | "day", | |
42 | "did", | |
43 | "do", | |
44 | "down", | |
45 | "each", | |
46 | "find", | |
47 | "first", | |
48 | "for", | |
49 | "from", | |
50 | "go", | |
51 | "had", | |
52 | "has", | |
53 | "have", | |
54 | "he", | |
55 | "her", | |
56 | "him", | |
57 | "his", | |
58 | "hot", | |
59 | "how", | |
60 | "if", | |
61 | "in", | |
62 | "is", | |
63 | "it", | |
64 | "know", | |
65 | "like", | |
66 | "long", | |
67 | "look", | |
68 | "make", | |
69 | "many", | |
70 | "may", | |
71 | "more", | |
72 | "most", | |
73 | "my", | |
74 | "no", | |
75 | "now", | |
76 | "of", | |
77 | "on", | |
78 | "one", | |
79 | "or", | |
80 | "other", | |
81 | "out", | |
82 | "over", | |
83 | "said", | |
84 | "see", | |
85 | "she", | |
86 | "side", | |
87 | "so", | |
88 | "some", | |
89 | "sound", | |
90 | "than", | |
91 | "that", | |
92 | "the", | |
93 | "their", | |
94 | "them", | |
95 | "then", | |
96 | "there", | |
97 | "these", | |
98 | "they", | |
99 | "thing", | |
100 | "this", | |
101 | "time", | |
102 | "to", | |
103 | "two", | |
104 | "up", | |
105 | "use", | |
106 | "was", | |
107 | "water", | |
108 | "way", | |
109 | "we", | |
110 | "were", | |
111 | "what", | |
112 | "when", | |
113 | "which", | |
114 | "who", | |
115 | "will", | |
116 | "with", | |
117 | "word", | |
118 | "would", | |
119 | "write", | |
120 | "you", | |
121 | "your" | |
122 | }; | |
123 | ||
124 | ||
ef416fc2 | 125 | /* |
126 | * Local functions... | |
127 | */ | |
128 | ||
f7deaa1a | 129 | static help_word_t *help_add_word(help_node_t *n, const char *text); |
ef416fc2 | 130 | static void help_delete_node(help_node_t *n); |
f7deaa1a | 131 | static void help_delete_word(help_word_t *w); |
ef416fc2 | 132 | static int help_load_directory(help_index_t *hi, |
133 | const char *directory, | |
134 | const char *relative); | |
135 | static int help_load_file(help_index_t *hi, | |
136 | const char *filename, | |
137 | const char *relative, | |
138 | time_t mtime); | |
a32af27c | 139 | static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4); |
cfe4c0c3 R |
140 | static int help_sort_by_name(help_node_t *p1, help_node_t *p2, void *data); |
141 | static int help_sort_by_score(help_node_t *p1, help_node_t *p2, void *data); | |
142 | static int help_sort_words(help_word_t *w1, help_word_t *w2, void *data); | |
ef416fc2 | 143 | |
144 | ||
145 | /* | |
146 | * 'helpDeleteIndex()' - Delete an index, freeing all memory used. | |
147 | */ | |
148 | ||
149 | void | |
ecdc0628 | 150 | helpDeleteIndex(help_index_t *hi) /* I - Help index */ |
ef416fc2 | 151 | { |
ecdc0628 | 152 | help_node_t *node; /* Current node */ |
ef416fc2 | 153 | |
154 | ||
ef416fc2 | 155 | if (!hi) |
156 | return; | |
157 | ||
ecdc0628 | 158 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
159 | node; | |
160 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 161 | { |
ecdc0628 | 162 | if (!hi->search) |
163 | help_delete_node(node); | |
164 | } | |
ef416fc2 | 165 | |
ecdc0628 | 166 | cupsArrayDelete(hi->nodes); |
167 | cupsArrayDelete(hi->sorted); | |
ef416fc2 | 168 | |
169 | free(hi); | |
170 | } | |
171 | ||
172 | ||
173 | /* | |
174 | * 'helpFindNode()' - Find a node in an index. | |
175 | */ | |
176 | ||
ecdc0628 | 177 | help_node_t * /* O - Node pointer or NULL */ |
ef416fc2 | 178 | helpFindNode(help_index_t *hi, /* I - Index */ |
179 | const char *filename, /* I - Filename */ | |
180 | const char *anchor) /* I - Anchor */ | |
181 | { | |
ecdc0628 | 182 | help_node_t key; /* Search key */ |
ef416fc2 | 183 | |
184 | ||
ef416fc2 | 185 | /* |
186 | * Range check input... | |
187 | */ | |
188 | ||
189 | if (!hi || !filename) | |
190 | return (NULL); | |
191 | ||
192 | /* | |
193 | * Initialize the search key... | |
194 | */ | |
195 | ||
196 | key.filename = (char *)filename; | |
197 | key.anchor = (char *)anchor; | |
ef416fc2 | 198 | |
199 | /* | |
200 | * Return any match... | |
201 | */ | |
202 | ||
ecdc0628 | 203 | return ((help_node_t *)cupsArrayFind(hi->nodes, &key)); |
ef416fc2 | 204 | } |
205 | ||
206 | ||
207 | /* | |
208 | * 'helpLoadIndex()' - Load a help index from disk. | |
209 | */ | |
210 | ||
211 | help_index_t * /* O - Index pointer or NULL */ | |
212 | helpLoadIndex(const char *hifile, /* I - Index filename */ | |
213 | const char *directory) /* I - Directory that is indexed */ | |
214 | { | |
215 | help_index_t *hi; /* Help index */ | |
216 | cups_file_t *fp; /* Current file */ | |
217 | char line[2048], /* Line from file */ | |
218 | *ptr, /* Pointer into line */ | |
219 | *filename, /* Filename in line */ | |
220 | *anchor, /* Anchor in line */ | |
221 | *sectptr, /* Section pointer in line */ | |
222 | section[1024], /* Section name */ | |
223 | *text; /* Text in line */ | |
224 | time_t mtime; /* Modification time */ | |
225 | off_t offset; /* Offset into file */ | |
226 | size_t length; /* Length in bytes */ | |
227 | int update; /* Update? */ | |
ef416fc2 | 228 | help_node_t *node; /* Current node */ |
f7deaa1a | 229 | help_word_t *word; /* Current word */ |
ef416fc2 | 230 | |
231 | ||
ef416fc2 | 232 | /* |
233 | * Create a new, empty index. | |
234 | */ | |
235 | ||
ecdc0628 | 236 | if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL) |
237 | return (NULL); | |
238 | ||
239 | hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); | |
240 | hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
241 | ||
242 | if (!hi->nodes || !hi->sorted) | |
243 | { | |
244 | cupsArrayDelete(hi->nodes); | |
245 | cupsArrayDelete(hi->sorted); | |
246 | free(hi); | |
247 | return (NULL); | |
248 | } | |
ef416fc2 | 249 | |
250 | /* | |
251 | * Try loading the existing index file... | |
252 | */ | |
253 | ||
254 | if ((fp = cupsFileOpen(hifile, "r")) != NULL) | |
255 | { | |
256 | /* | |
257 | * Lock the file and then read the first line... | |
258 | */ | |
259 | ||
260 | cupsFileLock(fp, 1); | |
261 | ||
f7deaa1a | 262 | if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2")) |
ef416fc2 | 263 | { |
264 | /* | |
265 | * Got a valid header line, now read the data lines... | |
266 | */ | |
267 | ||
f7deaa1a | 268 | node = NULL; |
269 | ||
ef416fc2 | 270 | while (cupsFileGets(fp, line, sizeof(line))) |
271 | { | |
272 | /* | |
273 | * Each line looks like one of the following: | |
274 | * | |
275 | * filename mtime offset length "section" "text" | |
276 | * filename#anchor offset length "text" | |
f7deaa1a | 277 | * SP count word |
ef416fc2 | 278 | */ |
279 | ||
f7deaa1a | 280 | if (line[0] == ' ') |
ef416fc2 | 281 | { |
f7deaa1a | 282 | /* |
283 | * Read a word in the current node... | |
284 | */ | |
ef416fc2 | 285 | |
f7deaa1a | 286 | if (!node || (ptr = strrchr(line, ' ')) == NULL) |
287 | continue; | |
ef416fc2 | 288 | |
f7deaa1a | 289 | if ((word = help_add_word(node, ptr + 1)) != NULL) |
290 | word->count = atoi(line + 1); | |
291 | } | |
292 | else | |
ef416fc2 | 293 | { |
294 | /* | |
f7deaa1a | 295 | * Add a node... |
ef416fc2 | 296 | */ |
297 | ||
f7deaa1a | 298 | filename = line; |
ef416fc2 | 299 | |
f7deaa1a | 300 | if ((ptr = strchr(line, ' ')) == NULL) |
301 | break; | |
ef416fc2 | 302 | |
f7deaa1a | 303 | while (isspace(*ptr & 255)) |
304 | *ptr++ = '\0'; | |
ef416fc2 | 305 | |
f7deaa1a | 306 | if ((anchor = strrchr(filename, '#')) != NULL) |
307 | { | |
308 | *anchor++ = '\0'; | |
309 | mtime = 0; | |
310 | } | |
311 | else | |
312 | mtime = strtol(ptr, &ptr, 10); | |
ef416fc2 | 313 | |
f7deaa1a | 314 | offset = strtoll(ptr, &ptr, 10); |
7e86f2f6 | 315 | length = (size_t)strtoll(ptr, &ptr, 10); |
ef416fc2 | 316 | |
317 | while (isspace(*ptr & 255)) | |
318 | ptr ++; | |
ef416fc2 | 319 | |
f7deaa1a | 320 | if (!anchor) |
321 | { | |
322 | /* | |
323 | * Get section... | |
324 | */ | |
ef416fc2 | 325 | |
f7deaa1a | 326 | if (*ptr != '\"') |
327 | break; | |
ef416fc2 | 328 | |
f7deaa1a | 329 | ptr ++; |
330 | sectptr = ptr; | |
ef416fc2 | 331 | |
f7deaa1a | 332 | while (*ptr && *ptr != '\"') |
333 | ptr ++; | |
334 | ||
335 | if (*ptr != '\"') | |
336 | break; | |
ef416fc2 | 337 | |
f7deaa1a | 338 | *ptr++ = '\0'; |
ef416fc2 | 339 | |
6ac4da6b | 340 | cupsCopyString(section, sectptr, sizeof(section)); |
ef416fc2 | 341 | |
f7deaa1a | 342 | while (isspace(*ptr & 255)) |
343 | ptr ++; | |
344 | } | |
507c4adc MS |
345 | else |
346 | section[0] = '\0'; | |
ecdc0628 | 347 | |
f7deaa1a | 348 | if (*ptr != '\"') |
349 | break; | |
350 | ||
351 | ptr ++; | |
352 | text = ptr; | |
353 | ||
354 | while (*ptr && *ptr != '\"') | |
355 | ptr ++; | |
356 | ||
357 | if (*ptr != '\"') | |
358 | break; | |
359 | ||
360 | *ptr++ = '\0'; | |
361 | ||
362 | if ((node = help_new_node(filename, anchor, section, text, | |
363 | mtime, offset, length)) == NULL) | |
364 | break; | |
365 | ||
366 | node->score = -1; | |
367 | ||
368 | cupsArrayAdd(hi->nodes, node); | |
369 | } | |
ef416fc2 | 370 | } |
371 | } | |
372 | ||
373 | cupsFileClose(fp); | |
374 | } | |
375 | ||
376 | /* | |
377 | * Scan for new/updated files... | |
378 | */ | |
379 | ||
380 | update = help_load_directory(hi, directory, NULL); | |
381 | ||
382 | /* | |
383 | * Remove any files that are no longer installed... | |
384 | */ | |
385 | ||
ecdc0628 | 386 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
387 | node; | |
388 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
389 | if (node->score < 0) | |
ef416fc2 | 390 | { |
391 | /* | |
392 | * Delete this node... | |
393 | */ | |
394 | ||
ecdc0628 | 395 | cupsArrayRemove(hi->nodes, node); |
396 | help_delete_node(node); | |
ef416fc2 | 397 | } |
ef416fc2 | 398 | |
399 | /* | |
ecdc0628 | 400 | * Add nodes to the sorted array... |
ef416fc2 | 401 | */ |
402 | ||
ecdc0628 | 403 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
404 | node; | |
405 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
406 | cupsArrayAdd(hi->sorted, node); | |
ef416fc2 | 407 | |
408 | /* | |
ecdc0628 | 409 | * Save the index if we updated it... |
ef416fc2 | 410 | */ |
411 | ||
ecdc0628 | 412 | if (update) |
413 | helpSaveIndex(hi, hifile); | |
ef416fc2 | 414 | |
415 | /* | |
416 | * Return the index... | |
417 | */ | |
418 | ||
419 | return (hi); | |
420 | } | |
421 | ||
422 | ||
423 | /* | |
424 | * 'helpSaveIndex()' - Save a help index to disk. | |
425 | */ | |
426 | ||
427 | int /* O - 0 on success, -1 on error */ | |
428 | helpSaveIndex(help_index_t *hi, /* I - Index */ | |
429 | const char *hifile) /* I - Index filename */ | |
430 | { | |
431 | cups_file_t *fp; /* Index file */ | |
ef416fc2 | 432 | help_node_t *node; /* Current node */ |
f7deaa1a | 433 | help_word_t *word; /* Current word */ |
ef416fc2 | 434 | |
435 | ||
ef416fc2 | 436 | /* |
437 | * Try creating a new index file... | |
438 | */ | |
439 | ||
440 | if ((fp = cupsFileOpen(hifile, "w9")) == NULL) | |
441 | return (-1); | |
442 | ||
443 | /* | |
444 | * Lock the file while we write it... | |
445 | */ | |
446 | ||
447 | cupsFileLock(fp, 1); | |
448 | ||
f7deaa1a | 449 | cupsFilePuts(fp, "HELPV2\n"); |
ef416fc2 | 450 | |
ecdc0628 | 451 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
452 | node; | |
453 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 454 | { |
455 | /* | |
456 | * Write the current node with/without the anchor... | |
457 | */ | |
458 | ||
ef416fc2 | 459 | if (node->anchor) |
460 | { | |
461 | if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n", | |
462 | node->filename, node->anchor, | |
463 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, | |
464 | node->text) < 0) | |
465 | break; | |
466 | } | |
467 | else | |
468 | { | |
469 | if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n", | |
ae71f5de | 470 | node->filename, (int)node->mtime, |
ef416fc2 | 471 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, |
472 | node->section ? node->section : "", node->text) < 0) | |
473 | break; | |
474 | } | |
f7deaa1a | 475 | |
476 | /* | |
477 | * Then write the words associated with the node... | |
478 | */ | |
479 | ||
480 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
481 | word; | |
482 | word = (help_word_t *)cupsArrayNext(node->words)) | |
483 | if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0) | |
484 | break; | |
ef416fc2 | 485 | } |
486 | ||
ecdc0628 | 487 | cupsFileFlush(fp); |
488 | ||
ef416fc2 | 489 | if (cupsFileClose(fp) < 0) |
490 | return (-1); | |
ecdc0628 | 491 | else if (node) |
ef416fc2 | 492 | return (-1); |
493 | else | |
494 | return (0); | |
495 | } | |
496 | ||
497 | ||
498 | /* | |
499 | * 'helpSearchIndex()' - Search an index. | |
500 | */ | |
501 | ||
502 | help_index_t * /* O - Search index */ | |
503 | helpSearchIndex(help_index_t *hi, /* I - Index */ | |
504 | const char *query, /* I - Query string */ | |
505 | const char *section, /* I - Limit search to this section */ | |
506 | const char *filename) /* I - Limit search to this file */ | |
507 | { | |
ef416fc2 | 508 | help_index_t *search; /* Search index */ |
ecdc0628 | 509 | help_node_t *node; /* Current node */ |
f7deaa1a | 510 | help_word_t *word; /* Current word */ |
ef416fc2 | 511 | void *sc; /* Search context */ |
512 | int matches; /* Number of matches */ | |
513 | ||
514 | ||
ef416fc2 | 515 | /* |
516 | * Range check... | |
517 | */ | |
518 | ||
519 | if (!hi || !query) | |
520 | return (NULL); | |
521 | ||
ecdc0628 | 522 | /* |
523 | * Reset the scores of all nodes to 0... | |
524 | */ | |
525 | ||
526 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); | |
527 | node; | |
528 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
529 | node->score = 0; | |
530 | ||
531 | /* | |
532 | * Find the first node to search in... | |
533 | */ | |
ef416fc2 | 534 | |
535 | if (filename) | |
536 | { | |
ecdc0628 | 537 | node = helpFindNode(hi, filename, NULL); |
538 | if (!node) | |
ef416fc2 | 539 | return (NULL); |
540 | } | |
541 | else | |
ecdc0628 | 542 | node = (help_node_t *)cupsArrayFirst(hi->nodes); |
ef416fc2 | 543 | |
544 | /* | |
545 | * Convert the query into a regular expression... | |
546 | */ | |
547 | ||
548 | sc = cgiCompileSearch(query); | |
549 | if (!sc) | |
550 | return (NULL); | |
551 | ||
552 | /* | |
553 | * Allocate a search index... | |
554 | */ | |
555 | ||
556 | search = calloc(1, sizeof(help_index_t)); | |
557 | if (!search) | |
558 | { | |
559 | cgiFreeSearch(sc); | |
560 | return (NULL); | |
561 | } | |
562 | ||
ecdc0628 | 563 | search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); |
564 | search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
321d8d57 | 565 | |
ecdc0628 | 566 | if (!search->nodes || !search->sorted) |
567 | { | |
568 | cupsArrayDelete(search->nodes); | |
569 | cupsArrayDelete(search->sorted); | |
570 | free(search); | |
571 | cgiFreeSearch(sc); | |
572 | return (NULL); | |
573 | } | |
574 | ||
ef416fc2 | 575 | search->search = 1; |
576 | ||
577 | /* | |
578 | * Check each node in the index, adding matching nodes to the | |
579 | * search index... | |
580 | */ | |
581 | ||
ecdc0628 | 582 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
edeb8af8 | 583 | if (node->section && section && strcmp(node->section, section)) |
ef416fc2 | 584 | continue; |
ecdc0628 | 585 | else if (filename && strcmp(node->filename, filename)) |
ef416fc2 | 586 | continue; |
f7deaa1a | 587 | else |
ef416fc2 | 588 | { |
f7deaa1a | 589 | matches = cgiDoSearch(sc, node->text); |
590 | ||
591 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
592 | word; | |
593 | word = (help_word_t *)cupsArrayNext(node->words)) | |
594 | if (cgiDoSearch(sc, word->text) > 0) | |
595 | matches += word->count; | |
ef416fc2 | 596 | |
f7deaa1a | 597 | if (matches > 0) |
598 | { | |
599 | /* | |
600 | * Found a match, add the node to the search index... | |
601 | */ | |
ef416fc2 | 602 | |
f7deaa1a | 603 | node->score = matches; |
604 | ||
321d8d57 MS |
605 | cupsArrayAdd(search->nodes, node); |
606 | cupsArrayAdd(search->sorted, node); | |
f7deaa1a | 607 | } |
ef416fc2 | 608 | } |
609 | ||
610 | /* | |
611 | * Free the search context... | |
612 | */ | |
613 | ||
614 | cgiFreeSearch(sc); | |
615 | ||
ef416fc2 | 616 | /* |
617 | * Return the results... | |
618 | */ | |
619 | ||
620 | return (search); | |
621 | } | |
622 | ||
623 | ||
f7deaa1a | 624 | /* |
625 | * 'help_add_word()' - Add a word to a node. | |
626 | */ | |
627 | ||
628 | static help_word_t * /* O - New word */ | |
629 | help_add_word(help_node_t *n, /* I - Node */ | |
630 | const char *text) /* I - Word text */ | |
631 | { | |
632 | help_word_t *w, /* New word */ | |
633 | key; /* Search key */ | |
634 | ||
635 | ||
f7deaa1a | 636 | /* |
637 | * Create the words array as needed... | |
638 | */ | |
639 | ||
640 | if (!n->words) | |
641 | n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL); | |
642 | ||
643 | /* | |
644 | * See if the word is already added... | |
645 | */ | |
646 | ||
647 | key.text = (char *)text; | |
648 | ||
649 | if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL) | |
650 | { | |
651 | /* | |
652 | * Create a new word... | |
653 | */ | |
654 | ||
655 | if ((w = calloc(1, sizeof(help_word_t))) == NULL) | |
656 | return (NULL); | |
657 | ||
658 | if ((w->text = strdup(text)) == NULL) | |
659 | { | |
660 | free(w); | |
661 | return (NULL); | |
662 | } | |
663 | ||
664 | cupsArrayAdd(n->words, w); | |
665 | } | |
666 | ||
667 | /* | |
668 | * Bump the counter for this word and return it... | |
669 | */ | |
670 | ||
671 | w->count ++; | |
672 | ||
673 | return (w); | |
674 | } | |
675 | ||
676 | ||
ef416fc2 | 677 | /* |
678 | * 'help_delete_node()' - Free all memory used by a node. | |
679 | */ | |
680 | ||
681 | static void | |
682 | help_delete_node(help_node_t *n) /* I - Node */ | |
683 | { | |
f7deaa1a | 684 | help_word_t *w; /* Current word */ |
685 | ||
686 | ||
ef416fc2 | 687 | if (!n) |
688 | return; | |
689 | ||
690 | if (n->filename) | |
691 | free(n->filename); | |
692 | ||
693 | if (n->anchor) | |
694 | free(n->anchor); | |
695 | ||
696 | if (n->section) | |
697 | free(n->section); | |
698 | ||
699 | if (n->text) | |
700 | free(n->text); | |
701 | ||
f7deaa1a | 702 | for (w = (help_word_t *)cupsArrayFirst(n->words); |
703 | w; | |
704 | w = (help_word_t *)cupsArrayNext(n->words)) | |
705 | help_delete_word(w); | |
706 | ||
707 | cupsArrayDelete(n->words); | |
708 | ||
ef416fc2 | 709 | free(n); |
710 | } | |
711 | ||
712 | ||
f7deaa1a | 713 | /* |
714 | * 'help_delete_word()' - Free all memory used by a word. | |
715 | */ | |
716 | ||
717 | static void | |
718 | help_delete_word(help_word_t *w) /* I - Word */ | |
719 | { | |
f7deaa1a | 720 | if (!w) |
721 | return; | |
722 | ||
723 | if (w->text) | |
724 | free(w->text); | |
725 | ||
726 | free(w); | |
727 | } | |
728 | ||
729 | ||
ef416fc2 | 730 | /* |
731 | * 'help_load_directory()' - Load a directory of files into an index. | |
732 | */ | |
733 | ||
734 | static int /* O - 0 = success, -1 = error, 1 = updated */ | |
735 | help_load_directory( | |
736 | help_index_t *hi, /* I - Index */ | |
737 | const char *directory, /* I - Directory */ | |
738 | const char *relative) /* I - Relative path */ | |
739 | { | |
ef416fc2 | 740 | cups_dir_t *dir; /* Directory file */ |
741 | cups_dentry_t *dent; /* Directory entry */ | |
742 | char *ext, /* Pointer to extension */ | |
743 | filename[1024], /* Full filename */ | |
744 | relname[1024]; /* Relative filename */ | |
745 | int update; /* Updated? */ | |
ecdc0628 | 746 | help_node_t *node; /* Current node */ |
ef416fc2 | 747 | |
748 | ||
ef416fc2 | 749 | /* |
750 | * Open the directory and scan it... | |
751 | */ | |
752 | ||
753 | if ((dir = cupsDirOpen(directory)) == NULL) | |
754 | return (0); | |
755 | ||
756 | update = 0; | |
757 | ||
758 | while ((dent = cupsDirRead(dir)) != NULL) | |
759 | { | |
ecdc0628 | 760 | /* |
761 | * Skip "." files... | |
762 | */ | |
763 | ||
764 | if (dent->filename[0] == '.') | |
765 | continue; | |
766 | ||
ef416fc2 | 767 | /* |
768 | * Get absolute and relative filenames... | |
769 | */ | |
770 | ||
771 | snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename); | |
772 | if (relative) | |
773 | snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename); | |
774 | else | |
6ac4da6b | 775 | cupsCopyString(relname, dent->filename, sizeof(relname)); |
ef416fc2 | 776 | |
777 | /* | |
778 | * Check if we have a HTML file... | |
779 | */ | |
780 | ||
781 | if ((ext = strstr(dent->filename, ".html")) != NULL && | |
782 | (!ext[5] || !strcmp(ext + 5, ".gz"))) | |
783 | { | |
784 | /* | |
785 | * HTML file, see if we have already indexed the file... | |
786 | */ | |
787 | ||
788 | if ((node = helpFindNode(hi, relname, NULL)) != NULL) | |
789 | { | |
790 | /* | |
791 | * File already indexed - check dates to confirm that the | |
792 | * index is up-to-date... | |
793 | */ | |
794 | ||
ecdc0628 | 795 | if (node->mtime == dent->fileinfo.st_mtime) |
ef416fc2 | 796 | { |
797 | /* | |
798 | * Same modification time, so mark all of the nodes | |
799 | * for this file as up-to-date... | |
800 | */ | |
801 | ||
ecdc0628 | 802 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
803 | if (!strcmp(node->filename, relname)) | |
804 | node->score = 0; | |
ef416fc2 | 805 | else |
806 | break; | |
807 | ||
808 | continue; | |
809 | } | |
810 | } | |
811 | ||
812 | update = 1; | |
813 | ||
814 | help_load_file(hi, filename, relname, dent->fileinfo.st_mtime); | |
815 | } | |
816 | else if (S_ISDIR(dent->fileinfo.st_mode)) | |
817 | { | |
818 | /* | |
819 | * Process sub-directory... | |
820 | */ | |
821 | ||
822 | if (help_load_directory(hi, filename, relname) == 1) | |
823 | update = 1; | |
824 | } | |
825 | } | |
826 | ||
827 | cupsDirClose(dir); | |
828 | ||
829 | return (update); | |
830 | } | |
831 | ||
832 | ||
833 | /* | |
834 | * 'help_load_file()' - Load a HTML files into an index. | |
835 | */ | |
836 | ||
837 | static int /* O - 0 = success, -1 = error */ | |
838 | help_load_file( | |
839 | help_index_t *hi, /* I - Index */ | |
840 | const char *filename, /* I - Filename */ | |
841 | const char *relative, /* I - Relative path */ | |
842 | time_t mtime) /* I - Modification time */ | |
843 | { | |
844 | cups_file_t *fp; /* HTML file */ | |
ecdc0628 | 845 | help_node_t *node; /* Current node */ |
ef416fc2 | 846 | char line[1024], /* Line from file */ |
f42414bf | 847 | temp[1024], /* Temporary word */ |
ef416fc2 | 848 | section[1024], /* Section */ |
849 | *ptr, /* Pointer into line */ | |
850 | *anchor, /* Anchor name */ | |
851 | *text; /* Text for anchor */ | |
852 | off_t offset; /* File offset */ | |
853 | char quote; /* Quote character */ | |
f7deaa1a | 854 | help_word_t *word; /* Current word */ |
1d3d3807 | 855 | size_t wordlen; /* Length of word */ |
ef416fc2 | 856 | |
857 | ||
ef416fc2 | 858 | if ((fp = cupsFileOpen(filename, "r")) == NULL) |
859 | return (-1); | |
860 | ||
861 | node = NULL; | |
862 | offset = 0; | |
863 | ||
cc7359ae MS |
864 | if (strstr(filename, "/man-") != NULL) |
865 | cupsCopyString(section, "Man Pages", sizeof(section)); | |
866 | else | |
867 | cupsCopyString(section, "Other", sizeof(section)); | |
ef416fc2 | 868 | |
869 | while (cupsFileGets(fp, line, sizeof(line))) | |
870 | { | |
871 | /* | |
872 | * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix... | |
873 | */ | |
874 | ||
cfd375ad | 875 | if ((ptr = strstr(line, "<!-- SECTION:")) != NULL) |
ef416fc2 | 876 | { |
877 | /* | |
878 | * Got section line, copy it! | |
879 | */ | |
880 | ||
cfd375ad | 881 | for (ptr += 13; isspace(*ptr & 255); ptr ++); |
ef416fc2 | 882 | |
6ac4da6b | 883 | cupsCopyString(section, ptr, sizeof(section)); |
ef416fc2 | 884 | if ((ptr = strstr(section, "-->")) != NULL) |
885 | { | |
886 | /* | |
887 | * Strip comment stuff from end of line... | |
888 | */ | |
889 | ||
890 | for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0'); | |
891 | ||
892 | if (isspace(*ptr & 255)) | |
893 | *ptr = '\0'; | |
894 | } | |
895 | continue; | |
896 | } | |
897 | ||
898 | for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;) | |
899 | { | |
900 | ptr ++; | |
901 | ||
88f9aafc | 902 | if (!_cups_strncasecmp(ptr, "TITLE>", 6)) |
ef416fc2 | 903 | { |
904 | /* | |
905 | * Found the title... | |
906 | */ | |
907 | ||
908 | anchor = NULL; | |
909 | ptr += 6; | |
910 | } | |
cfd375ad | 911 | else |
ef416fc2 | 912 | { |
cfd375ad MS |
913 | char *idptr; /* Pointer to ID */ |
914 | ||
915 | if (!_cups_strncasecmp(ptr, "A NAME=", 7)) | |
916 | ptr += 7; | |
917 | else if ((idptr = strstr(ptr, " ID=")) != NULL) | |
918 | ptr = idptr + 4; | |
919 | else if ((idptr = strstr(ptr, " id=")) != NULL) | |
920 | ptr = idptr + 4; | |
921 | else | |
922 | continue; | |
923 | ||
ef416fc2 | 924 | /* |
925 | * Found an anchor... | |
926 | */ | |
927 | ||
ef416fc2 | 928 | if (*ptr == '\"' || *ptr == '\'') |
929 | { | |
930 | /* | |
931 | * Get quoted anchor... | |
932 | */ | |
933 | ||
934 | quote = *ptr; | |
935 | anchor = ptr + 1; | |
936 | if ((ptr = strchr(anchor, quote)) != NULL) | |
937 | *ptr++ = '\0'; | |
938 | else | |
939 | break; | |
940 | } | |
941 | else | |
942 | { | |
943 | /* | |
944 | * Get unquoted anchor... | |
945 | */ | |
946 | ||
947 | anchor = ptr + 1; | |
948 | ||
949 | for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++); | |
950 | ||
cfd375ad | 951 | if (*ptr != '>') |
ef416fc2 | 952 | *ptr++ = '\0'; |
953 | else | |
954 | break; | |
955 | } | |
956 | ||
957 | /* | |
958 | * Got the anchor, now lets find the end... | |
959 | */ | |
960 | ||
961 | while (*ptr && *ptr != '>') | |
962 | ptr ++; | |
963 | ||
964 | if (*ptr != '>') | |
965 | break; | |
966 | ||
cfd375ad | 967 | *ptr++ = '\0'; |
ef416fc2 | 968 | } |
ef416fc2 | 969 | |
970 | /* | |
971 | * Now collect text for the link... | |
972 | */ | |
973 | ||
974 | text = ptr; | |
975 | while ((ptr = strchr(text, '<')) == NULL) | |
976 | { | |
977 | ptr = text + strlen(text); | |
978 | if (ptr >= (line + sizeof(line) - 2)) | |
979 | break; | |
980 | ||
981 | *ptr++ = ' '; | |
982 | ||
7e86f2f6 | 983 | if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1)) |
ef416fc2 | 984 | break; |
985 | } | |
986 | ||
987 | *ptr = '\0'; | |
988 | ||
989 | if (node) | |
7e86f2f6 | 990 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 991 | |
992 | if (!*text) | |
993 | { | |
994 | node = NULL; | |
995 | break; | |
996 | } | |
997 | ||
ecdc0628 | 998 | if ((node = helpFindNode(hi, relative, anchor)) != NULL) |
ef416fc2 | 999 | { |
1000 | /* | |
1001 | * Node already in the index, so replace the text and other | |
1002 | * data... | |
1003 | */ | |
1004 | ||
ecdc0628 | 1005 | cupsArrayRemove(hi->nodes, node); |
ef416fc2 | 1006 | |
1007 | if (node->section) | |
1008 | free(node->section); | |
1009 | ||
1010 | if (node->text) | |
1011 | free(node->text); | |
1012 | ||
f7deaa1a | 1013 | if (node->words) |
1014 | { | |
1015 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
1016 | word; | |
1017 | word = (help_word_t *)cupsArrayNext(node->words)) | |
1018 | help_delete_word(word); | |
1019 | ||
1020 | cupsArrayDelete(node->words); | |
1021 | node->words = NULL; | |
1022 | } | |
1023 | ||
ef416fc2 | 1024 | node->section = section[0] ? strdup(section) : NULL; |
1025 | node->text = strdup(text); | |
1026 | node->mtime = mtime; | |
1027 | node->offset = offset; | |
1028 | node->score = 0; | |
1029 | } | |
1030 | else | |
1031 | { | |
1032 | /* | |
1033 | * New node... | |
1034 | */ | |
1035 | ||
1036 | node = help_new_node(relative, anchor, section, text, mtime, offset, 0); | |
ef416fc2 | 1037 | } |
1038 | ||
1039 | /* | |
1040 | * Go through the text value and replace tabs and newlines with | |
1041 | * whitespace and eliminate extra whitespace... | |
1042 | */ | |
1043 | ||
1044 | for (ptr = node->text, text = node->text; *ptr;) | |
1045 | if (isspace(*ptr & 255)) | |
1046 | { | |
1047 | while (isspace(*ptr & 255)) | |
ed486911 | 1048 | ptr ++; |
ef416fc2 | 1049 | |
1050 | *text++ = ' '; | |
1051 | } | |
1052 | else if (text != ptr) | |
1053 | *text++ = *ptr++; | |
1054 | else | |
1055 | { | |
1056 | text ++; | |
1057 | ptr ++; | |
1058 | } | |
1059 | ||
1060 | *text = '\0'; | |
1061 | ||
ecdc0628 | 1062 | /* |
1063 | * (Re)add the node to the array... | |
1064 | */ | |
1065 | ||
1066 | cupsArrayAdd(hi->nodes, node); | |
f7deaa1a | 1067 | |
1068 | if (!anchor) | |
1069 | node = NULL; | |
ef416fc2 | 1070 | break; |
1071 | } | |
1072 | ||
f7deaa1a | 1073 | if (node) |
1074 | { | |
1075 | /* | |
1076 | * Scan this line for words... | |
1077 | */ | |
1078 | ||
1079 | for (ptr = line; *ptr; ptr ++) | |
1080 | { | |
1081 | /* | |
1082 | * Skip HTML stuff... | |
1083 | */ | |
1084 | ||
1085 | if (*ptr == '<') | |
1086 | { | |
1087 | if (!strncmp(ptr, "<!--", 4)) | |
1088 | { | |
1089 | /* | |
1090 | * Skip HTML comment... | |
1091 | */ | |
1092 | ||
1093 | if ((text = strstr(ptr + 4, "-->")) == NULL) | |
1094 | ptr += strlen(ptr) - 1; | |
1095 | else | |
1096 | ptr = text + 2; | |
1097 | } | |
1098 | else | |
1099 | { | |
1100 | /* | |
1101 | * Skip HTML element... | |
1102 | */ | |
1103 | ||
1104 | for (ptr ++; *ptr && *ptr != '>'; ptr ++) | |
f42414bf | 1105 | { |
f7deaa1a | 1106 | if (*ptr == '\"' || *ptr == '\'') |
1107 | { | |
1108 | for (quote = *ptr++; *ptr && *ptr != quote; ptr ++); | |
1109 | ||
1110 | if (!*ptr) | |
1111 | ptr --; | |
1112 | } | |
f42414bf | 1113 | } |
f7deaa1a | 1114 | |
1115 | if (!*ptr) | |
1116 | ptr --; | |
1117 | } | |
1118 | ||
1119 | continue; | |
1120 | } | |
1121 | else if (*ptr == '&') | |
1122 | { | |
1123 | /* | |
1124 | * Skip HTML entity... | |
1125 | */ | |
1126 | ||
1127 | for (ptr ++; *ptr && *ptr != ';'; ptr ++); | |
1128 | ||
1129 | if (!*ptr) | |
1130 | ptr --; | |
1131 | ||
1132 | continue; | |
1133 | } | |
1134 | else if (!isalnum(*ptr & 255)) | |
1135 | continue; | |
1136 | ||
1137 | /* | |
1138 | * Found the start of a word, search until we find the end... | |
1139 | */ | |
1140 | ||
1141 | for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++); | |
1142 | ||
1d3d3807 | 1143 | wordlen = (size_t)(ptr - text); |
f7deaa1a | 1144 | |
1d3d3807 | 1145 | memcpy(temp, text, wordlen); |
f42414bf | 1146 | temp[wordlen] = '\0'; |
1147 | ||
1148 | ptr --; | |
f7deaa1a | 1149 | |
f42414bf | 1150 | if (wordlen > 1 && !bsearch(temp, help_common_words, |
f7deaa1a | 1151 | (sizeof(help_common_words) / |
1152 | sizeof(help_common_words[0])), | |
1153 | sizeof(help_common_words[0]), | |
1154 | (int (*)(const void *, const void *)) | |
88f9aafc | 1155 | _cups_strcasecmp)) |
f42414bf | 1156 | help_add_word(node, temp); |
f7deaa1a | 1157 | } |
1158 | } | |
1159 | ||
ef416fc2 | 1160 | /* |
1161 | * Get the offset of the next line... | |
1162 | */ | |
1163 | ||
1164 | offset = cupsFileTell(fp); | |
1165 | } | |
1166 | ||
1167 | cupsFileClose(fp); | |
1168 | ||
1169 | if (node) | |
7e86f2f6 | 1170 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 1171 | |
1172 | return (0); | |
1173 | } | |
1174 | ||
1175 | ||
1176 | /* | |
1177 | * 'help_new_node()' - Create a new node and add it to an index. | |
1178 | */ | |
1179 | ||
1180 | static help_node_t * /* O - Node pointer or NULL on error */ | |
1181 | help_new_node(const char *filename, /* I - Filename */ | |
1182 | const char *anchor, /* I - Anchor */ | |
1183 | const char *section, /* I - Section */ | |
1184 | const char *text, /* I - Text */ | |
1185 | time_t mtime, /* I - Modification time */ | |
1186 | off_t offset, /* I - Offset in file */ | |
1187 | size_t length) /* I - Length in bytes */ | |
1188 | { | |
1189 | help_node_t *n; /* Node */ | |
1190 | ||
1191 | ||
ef416fc2 | 1192 | n = (help_node_t *)calloc(1, sizeof(help_node_t)); |
1193 | if (!n) | |
1194 | return (NULL); | |
1195 | ||
1196 | n->filename = strdup(filename); | |
1197 | n->anchor = anchor ? strdup(anchor) : NULL; | |
507c4adc | 1198 | n->section = (section && *section) ? strdup(section) : NULL; |
ef416fc2 | 1199 | n->text = strdup(text); |
1200 | n->mtime = mtime; | |
1201 | n->offset = offset; | |
1202 | n->length = length; | |
1203 | ||
1204 | return (n); | |
1205 | } | |
1206 | ||
1207 | ||
1208 | /* | |
1209 | * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor. | |
1210 | */ | |
1211 | ||
d73371a0 MS |
1212 | static int /* O - Difference */ |
1213 | help_sort_by_name( | |
1214 | help_node_t *n1, /* I - First node */ | |
1215 | help_node_t *n2, /* I - Second node */ | |
1216 | void *data) /* Unused */ | |
ef416fc2 | 1217 | { |
d73371a0 | 1218 | int diff; /* Difference */ |
ef416fc2 | 1219 | |
1220 | ||
d73371a0 MS |
1221 | (void)data; |
1222 | ||
ecdc0628 | 1223 | if ((diff = strcmp(n1->filename, n2->filename)) != 0) |
ef416fc2 | 1224 | return (diff); |
1225 | ||
ecdc0628 | 1226 | if (!n1->anchor && !n2->anchor) |
ef416fc2 | 1227 | return (0); |
ecdc0628 | 1228 | else if (!n1->anchor) |
ef416fc2 | 1229 | return (-1); |
ecdc0628 | 1230 | else if (!n2->anchor) |
ef416fc2 | 1231 | return (1); |
1232 | else | |
ecdc0628 | 1233 | return (strcmp(n1->anchor, n2->anchor)); |
ef416fc2 | 1234 | } |
1235 | ||
1236 | ||
1237 | /* | |
1238 | * 'help_sort_nodes_by_score()' - Sort nodes by score and text. | |
1239 | */ | |
1240 | ||
cfe4c0c3 R |
1241 | static int /* O - Difference */ |
1242 | help_sort_by_score(help_node_t *n1, /* I - First node */ | |
1243 | help_node_t *n2, /* I - Second node */ | |
1244 | void *data) /* I - Unused */ | |
ef416fc2 | 1245 | { |
ef416fc2 | 1246 | int diff; /* Difference */ |
1247 | ||
1248 | ||
cfe4c0c3 R |
1249 | (void)data; |
1250 | ||
ecdc0628 | 1251 | if (n1->score != n2->score) |
1f0275e3 | 1252 | return (n2->score - n1->score); |
ef416fc2 | 1253 | |
ecdc0628 | 1254 | if (n1->section && !n2->section) |
ef416fc2 | 1255 | return (1); |
ecdc0628 | 1256 | else if (!n1->section && n2->section) |
ef416fc2 | 1257 | return (-1); |
ecdc0628 | 1258 | else if (n1->section && n2->section && |
1259 | (diff = strcmp(n1->section, n2->section)) != 0) | |
ef416fc2 | 1260 | return (diff); |
1261 | ||
88f9aafc | 1262 | return (_cups_strcasecmp(n1->text, n2->text)); |
ef416fc2 | 1263 | } |
1264 | ||
1265 | ||
1266 | /* | |
f7deaa1a | 1267 | * 'help_sort_words()' - Sort words alphabetically. |
1268 | */ | |
1269 | ||
cfe4c0c3 R |
1270 | static int /* O - Difference */ |
1271 | help_sort_words(help_word_t *w1, /* I - Second word */ | |
1272 | help_word_t *w2, /* I - Second word */ | |
1273 | void *data) /* Unused */ | |
f7deaa1a | 1274 | { |
cfe4c0c3 | 1275 | (void)data; |
88f9aafc | 1276 | return (_cups_strcasecmp(w1->text, w2->text)); |
f7deaa1a | 1277 | } |