]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
7e86f2f6 | 2 | * Online help index routines for CUPS. |
ef416fc2 | 3 | * |
cfd375ad | 4 | * Copyright 2007-2017 by Apple Inc. |
7e86f2f6 | 5 | * Copyright 1997-2007 by Easy Software Products. |
ef416fc2 | 6 | * |
e3101897 | 7 | * Licensed under Apache License v2.0. See the file "LICENSE" for more information. |
ef416fc2 | 8 | */ |
9 | ||
10 | /* | |
11 | * Include necessary headers... | |
12 | */ | |
13 | ||
14 | #include "cgi-private.h" | |
15 | #include <cups/dir.h> | |
16 | ||
17 | ||
f7deaa1a | 18 | /* |
19 | * List of common English words that should not be indexed... | |
20 | */ | |
21 | ||
22 | static char help_common_words[][6] = | |
23 | { | |
24 | "about", | |
25 | "all", | |
26 | "an", | |
27 | "and", | |
28 | "are", | |
29 | "as", | |
30 | "at", | |
31 | "be", | |
32 | "been", | |
33 | "but", | |
34 | "by", | |
35 | "call", | |
36 | "can", | |
37 | "come", | |
38 | "could", | |
39 | "day", | |
40 | "did", | |
41 | "do", | |
42 | "down", | |
43 | "each", | |
44 | "find", | |
45 | "first", | |
46 | "for", | |
47 | "from", | |
48 | "go", | |
49 | "had", | |
50 | "has", | |
51 | "have", | |
52 | "he", | |
53 | "her", | |
54 | "him", | |
55 | "his", | |
56 | "hot", | |
57 | "how", | |
58 | "if", | |
59 | "in", | |
60 | "is", | |
61 | "it", | |
62 | "know", | |
63 | "like", | |
64 | "long", | |
65 | "look", | |
66 | "make", | |
67 | "many", | |
68 | "may", | |
69 | "more", | |
70 | "most", | |
71 | "my", | |
72 | "no", | |
73 | "now", | |
74 | "of", | |
75 | "on", | |
76 | "one", | |
77 | "or", | |
78 | "other", | |
79 | "out", | |
80 | "over", | |
81 | "said", | |
82 | "see", | |
83 | "she", | |
84 | "side", | |
85 | "so", | |
86 | "some", | |
87 | "sound", | |
88 | "than", | |
89 | "that", | |
90 | "the", | |
91 | "their", | |
92 | "them", | |
93 | "then", | |
94 | "there", | |
95 | "these", | |
96 | "they", | |
97 | "thing", | |
98 | "this", | |
99 | "time", | |
100 | "to", | |
101 | "two", | |
102 | "up", | |
103 | "use", | |
104 | "was", | |
105 | "water", | |
106 | "way", | |
107 | "we", | |
108 | "were", | |
109 | "what", | |
110 | "when", | |
111 | "which", | |
112 | "who", | |
113 | "will", | |
114 | "with", | |
115 | "word", | |
116 | "would", | |
117 | "write", | |
118 | "you", | |
119 | "your" | |
120 | }; | |
121 | ||
122 | ||
ef416fc2 | 123 | /* |
124 | * Local functions... | |
125 | */ | |
126 | ||
f7deaa1a | 127 | static help_word_t *help_add_word(help_node_t *n, const char *text); |
ef416fc2 | 128 | static void help_delete_node(help_node_t *n); |
f7deaa1a | 129 | static void help_delete_word(help_word_t *w); |
ef416fc2 | 130 | static int help_load_directory(help_index_t *hi, |
131 | const char *directory, | |
132 | const char *relative); | |
133 | static int help_load_file(help_index_t *hi, | |
134 | const char *filename, | |
135 | const char *relative, | |
136 | time_t mtime); | |
137 | static help_node_t *help_new_node(const char *filename, const char *anchor, | |
138 | const char *section, const char *text, | |
139 | time_t mtime, off_t offset, | |
85dda01c MS |
140 | size_t length) |
141 | __attribute__((nonnull(1,3,4))); | |
ecdc0628 | 142 | static int help_sort_by_name(help_node_t *p1, help_node_t *p2); |
143 | static int help_sort_by_score(help_node_t *p1, help_node_t *p2); | |
f7deaa1a | 144 | static int help_sort_words(help_word_t *w1, help_word_t *w2); |
ef416fc2 | 145 | |
146 | ||
147 | /* | |
148 | * 'helpDeleteIndex()' - Delete an index, freeing all memory used. | |
149 | */ | |
150 | ||
151 | void | |
ecdc0628 | 152 | helpDeleteIndex(help_index_t *hi) /* I - Help index */ |
ef416fc2 | 153 | { |
ecdc0628 | 154 | help_node_t *node; /* Current node */ |
ef416fc2 | 155 | |
156 | ||
85dda01c | 157 | DEBUG_printf(("helpDeleteIndex(hi=%p)", hi)); |
ef416fc2 | 158 | |
159 | if (!hi) | |
160 | return; | |
161 | ||
ecdc0628 | 162 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
163 | node; | |
164 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 165 | { |
ecdc0628 | 166 | if (!hi->search) |
167 | help_delete_node(node); | |
168 | } | |
ef416fc2 | 169 | |
ecdc0628 | 170 | cupsArrayDelete(hi->nodes); |
171 | cupsArrayDelete(hi->sorted); | |
ef416fc2 | 172 | |
173 | free(hi); | |
174 | } | |
175 | ||
176 | ||
177 | /* | |
178 | * 'helpFindNode()' - Find a node in an index. | |
179 | */ | |
180 | ||
ecdc0628 | 181 | help_node_t * /* O - Node pointer or NULL */ |
ef416fc2 | 182 | helpFindNode(help_index_t *hi, /* I - Index */ |
183 | const char *filename, /* I - Filename */ | |
184 | const char *anchor) /* I - Anchor */ | |
185 | { | |
ecdc0628 | 186 | help_node_t key; /* Search key */ |
ef416fc2 | 187 | |
188 | ||
85dda01c MS |
189 | DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")", |
190 | hi, filename, anchor)); | |
ef416fc2 | 191 | |
192 | /* | |
193 | * Range check input... | |
194 | */ | |
195 | ||
196 | if (!hi || !filename) | |
197 | return (NULL); | |
198 | ||
199 | /* | |
200 | * Initialize the search key... | |
201 | */ | |
202 | ||
203 | key.filename = (char *)filename; | |
204 | key.anchor = (char *)anchor; | |
ef416fc2 | 205 | |
206 | /* | |
207 | * Return any match... | |
208 | */ | |
209 | ||
ecdc0628 | 210 | return ((help_node_t *)cupsArrayFind(hi->nodes, &key)); |
ef416fc2 | 211 | } |
212 | ||
213 | ||
214 | /* | |
215 | * 'helpLoadIndex()' - Load a help index from disk. | |
216 | */ | |
217 | ||
218 | help_index_t * /* O - Index pointer or NULL */ | |
219 | helpLoadIndex(const char *hifile, /* I - Index filename */ | |
220 | const char *directory) /* I - Directory that is indexed */ | |
221 | { | |
222 | help_index_t *hi; /* Help index */ | |
223 | cups_file_t *fp; /* Current file */ | |
224 | char line[2048], /* Line from file */ | |
225 | *ptr, /* Pointer into line */ | |
226 | *filename, /* Filename in line */ | |
227 | *anchor, /* Anchor in line */ | |
228 | *sectptr, /* Section pointer in line */ | |
229 | section[1024], /* Section name */ | |
230 | *text; /* Text in line */ | |
231 | time_t mtime; /* Modification time */ | |
232 | off_t offset; /* Offset into file */ | |
233 | size_t length; /* Length in bytes */ | |
234 | int update; /* Update? */ | |
ef416fc2 | 235 | help_node_t *node; /* Current node */ |
f7deaa1a | 236 | help_word_t *word; /* Current word */ |
ef416fc2 | 237 | |
238 | ||
85dda01c | 239 | DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")", |
ef416fc2 | 240 | hifile, directory)); |
241 | ||
242 | /* | |
243 | * Create a new, empty index. | |
244 | */ | |
245 | ||
ecdc0628 | 246 | if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL) |
247 | return (NULL); | |
248 | ||
249 | hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); | |
250 | hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
251 | ||
252 | if (!hi->nodes || !hi->sorted) | |
253 | { | |
254 | cupsArrayDelete(hi->nodes); | |
255 | cupsArrayDelete(hi->sorted); | |
256 | free(hi); | |
257 | return (NULL); | |
258 | } | |
ef416fc2 | 259 | |
260 | /* | |
261 | * Try loading the existing index file... | |
262 | */ | |
263 | ||
264 | if ((fp = cupsFileOpen(hifile, "r")) != NULL) | |
265 | { | |
266 | /* | |
267 | * Lock the file and then read the first line... | |
268 | */ | |
269 | ||
270 | cupsFileLock(fp, 1); | |
271 | ||
f7deaa1a | 272 | if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2")) |
ef416fc2 | 273 | { |
274 | /* | |
275 | * Got a valid header line, now read the data lines... | |
276 | */ | |
277 | ||
f7deaa1a | 278 | node = NULL; |
279 | ||
ef416fc2 | 280 | while (cupsFileGets(fp, line, sizeof(line))) |
281 | { | |
282 | /* | |
283 | * Each line looks like one of the following: | |
284 | * | |
285 | * filename mtime offset length "section" "text" | |
286 | * filename#anchor offset length "text" | |
f7deaa1a | 287 | * SP count word |
ef416fc2 | 288 | */ |
289 | ||
f7deaa1a | 290 | if (line[0] == ' ') |
ef416fc2 | 291 | { |
f7deaa1a | 292 | /* |
293 | * Read a word in the current node... | |
294 | */ | |
ef416fc2 | 295 | |
f7deaa1a | 296 | if (!node || (ptr = strrchr(line, ' ')) == NULL) |
297 | continue; | |
ef416fc2 | 298 | |
f7deaa1a | 299 | if ((word = help_add_word(node, ptr + 1)) != NULL) |
300 | word->count = atoi(line + 1); | |
301 | } | |
302 | else | |
ef416fc2 | 303 | { |
304 | /* | |
f7deaa1a | 305 | * Add a node... |
ef416fc2 | 306 | */ |
307 | ||
f7deaa1a | 308 | filename = line; |
ef416fc2 | 309 | |
f7deaa1a | 310 | if ((ptr = strchr(line, ' ')) == NULL) |
311 | break; | |
ef416fc2 | 312 | |
f7deaa1a | 313 | while (isspace(*ptr & 255)) |
314 | *ptr++ = '\0'; | |
ef416fc2 | 315 | |
f7deaa1a | 316 | if ((anchor = strrchr(filename, '#')) != NULL) |
317 | { | |
318 | *anchor++ = '\0'; | |
319 | mtime = 0; | |
320 | } | |
321 | else | |
322 | mtime = strtol(ptr, &ptr, 10); | |
ef416fc2 | 323 | |
f7deaa1a | 324 | offset = strtoll(ptr, &ptr, 10); |
7e86f2f6 | 325 | length = (size_t)strtoll(ptr, &ptr, 10); |
ef416fc2 | 326 | |
327 | while (isspace(*ptr & 255)) | |
328 | ptr ++; | |
ef416fc2 | 329 | |
f7deaa1a | 330 | if (!anchor) |
331 | { | |
332 | /* | |
333 | * Get section... | |
334 | */ | |
ef416fc2 | 335 | |
f7deaa1a | 336 | if (*ptr != '\"') |
337 | break; | |
ef416fc2 | 338 | |
f7deaa1a | 339 | ptr ++; |
340 | sectptr = ptr; | |
ef416fc2 | 341 | |
f7deaa1a | 342 | while (*ptr && *ptr != '\"') |
343 | ptr ++; | |
344 | ||
345 | if (*ptr != '\"') | |
346 | break; | |
ef416fc2 | 347 | |
f7deaa1a | 348 | *ptr++ = '\0'; |
ef416fc2 | 349 | |
f7deaa1a | 350 | strlcpy(section, sectptr, sizeof(section)); |
ef416fc2 | 351 | |
f7deaa1a | 352 | while (isspace(*ptr & 255)) |
353 | ptr ++; | |
354 | } | |
ecdc0628 | 355 | |
f7deaa1a | 356 | if (*ptr != '\"') |
357 | break; | |
358 | ||
359 | ptr ++; | |
360 | text = ptr; | |
361 | ||
362 | while (*ptr && *ptr != '\"') | |
363 | ptr ++; | |
364 | ||
365 | if (*ptr != '\"') | |
366 | break; | |
367 | ||
368 | *ptr++ = '\0'; | |
369 | ||
370 | if ((node = help_new_node(filename, anchor, section, text, | |
371 | mtime, offset, length)) == NULL) | |
372 | break; | |
373 | ||
374 | node->score = -1; | |
375 | ||
376 | cupsArrayAdd(hi->nodes, node); | |
377 | } | |
ef416fc2 | 378 | } |
379 | } | |
380 | ||
381 | cupsFileClose(fp); | |
382 | } | |
383 | ||
384 | /* | |
385 | * Scan for new/updated files... | |
386 | */ | |
387 | ||
388 | update = help_load_directory(hi, directory, NULL); | |
389 | ||
390 | /* | |
391 | * Remove any files that are no longer installed... | |
392 | */ | |
393 | ||
ecdc0628 | 394 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
395 | node; | |
396 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
397 | if (node->score < 0) | |
ef416fc2 | 398 | { |
399 | /* | |
400 | * Delete this node... | |
401 | */ | |
402 | ||
ecdc0628 | 403 | cupsArrayRemove(hi->nodes, node); |
404 | help_delete_node(node); | |
ef416fc2 | 405 | } |
ef416fc2 | 406 | |
407 | /* | |
ecdc0628 | 408 | * Add nodes to the sorted array... |
ef416fc2 | 409 | */ |
410 | ||
ecdc0628 | 411 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
412 | node; | |
413 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
414 | cupsArrayAdd(hi->sorted, node); | |
ef416fc2 | 415 | |
416 | /* | |
ecdc0628 | 417 | * Save the index if we updated it... |
ef416fc2 | 418 | */ |
419 | ||
ecdc0628 | 420 | if (update) |
421 | helpSaveIndex(hi, hifile); | |
ef416fc2 | 422 | |
423 | /* | |
424 | * Return the index... | |
425 | */ | |
426 | ||
427 | return (hi); | |
428 | } | |
429 | ||
430 | ||
431 | /* | |
432 | * 'helpSaveIndex()' - Save a help index to disk. | |
433 | */ | |
434 | ||
435 | int /* O - 0 on success, -1 on error */ | |
436 | helpSaveIndex(help_index_t *hi, /* I - Index */ | |
437 | const char *hifile) /* I - Index filename */ | |
438 | { | |
439 | cups_file_t *fp; /* Index file */ | |
ef416fc2 | 440 | help_node_t *node; /* Current node */ |
f7deaa1a | 441 | help_word_t *word; /* Current word */ |
ef416fc2 | 442 | |
443 | ||
85dda01c | 444 | DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile)); |
ef416fc2 | 445 | |
446 | /* | |
447 | * Try creating a new index file... | |
448 | */ | |
449 | ||
450 | if ((fp = cupsFileOpen(hifile, "w9")) == NULL) | |
451 | return (-1); | |
452 | ||
453 | /* | |
454 | * Lock the file while we write it... | |
455 | */ | |
456 | ||
457 | cupsFileLock(fp, 1); | |
458 | ||
f7deaa1a | 459 | cupsFilePuts(fp, "HELPV2\n"); |
ef416fc2 | 460 | |
ecdc0628 | 461 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); |
462 | node; | |
463 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
ef416fc2 | 464 | { |
465 | /* | |
466 | * Write the current node with/without the anchor... | |
467 | */ | |
468 | ||
ef416fc2 | 469 | if (node->anchor) |
470 | { | |
471 | if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n", | |
472 | node->filename, node->anchor, | |
473 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, | |
474 | node->text) < 0) | |
475 | break; | |
476 | } | |
477 | else | |
478 | { | |
479 | if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n", | |
ae71f5de | 480 | node->filename, (int)node->mtime, |
ef416fc2 | 481 | CUPS_LLCAST node->offset, CUPS_LLCAST node->length, |
482 | node->section ? node->section : "", node->text) < 0) | |
483 | break; | |
484 | } | |
f7deaa1a | 485 | |
486 | /* | |
487 | * Then write the words associated with the node... | |
488 | */ | |
489 | ||
490 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
491 | word; | |
492 | word = (help_word_t *)cupsArrayNext(node->words)) | |
493 | if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0) | |
494 | break; | |
ef416fc2 | 495 | } |
496 | ||
ecdc0628 | 497 | cupsFileFlush(fp); |
498 | ||
ef416fc2 | 499 | if (cupsFileClose(fp) < 0) |
500 | return (-1); | |
ecdc0628 | 501 | else if (node) |
ef416fc2 | 502 | return (-1); |
503 | else | |
504 | return (0); | |
505 | } | |
506 | ||
507 | ||
508 | /* | |
509 | * 'helpSearchIndex()' - Search an index. | |
510 | */ | |
511 | ||
512 | help_index_t * /* O - Search index */ | |
513 | helpSearchIndex(help_index_t *hi, /* I - Index */ | |
514 | const char *query, /* I - Query string */ | |
515 | const char *section, /* I - Limit search to this section */ | |
516 | const char *filename) /* I - Limit search to this file */ | |
517 | { | |
ef416fc2 | 518 | help_index_t *search; /* Search index */ |
ecdc0628 | 519 | help_node_t *node; /* Current node */ |
f7deaa1a | 520 | help_word_t *word; /* Current word */ |
ef416fc2 | 521 | void *sc; /* Search context */ |
522 | int matches; /* Number of matches */ | |
523 | ||
524 | ||
85dda01c MS |
525 | DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")", |
526 | hi, query, filename)); | |
ef416fc2 | 527 | |
528 | /* | |
529 | * Range check... | |
530 | */ | |
531 | ||
532 | if (!hi || !query) | |
533 | return (NULL); | |
534 | ||
ecdc0628 | 535 | /* |
536 | * Reset the scores of all nodes to 0... | |
537 | */ | |
538 | ||
539 | for (node = (help_node_t *)cupsArrayFirst(hi->nodes); | |
540 | node; | |
541 | node = (help_node_t *)cupsArrayNext(hi->nodes)) | |
542 | node->score = 0; | |
543 | ||
544 | /* | |
545 | * Find the first node to search in... | |
546 | */ | |
ef416fc2 | 547 | |
548 | if (filename) | |
549 | { | |
ecdc0628 | 550 | node = helpFindNode(hi, filename, NULL); |
551 | if (!node) | |
ef416fc2 | 552 | return (NULL); |
553 | } | |
554 | else | |
ecdc0628 | 555 | node = (help_node_t *)cupsArrayFirst(hi->nodes); |
ef416fc2 | 556 | |
557 | /* | |
558 | * Convert the query into a regular expression... | |
559 | */ | |
560 | ||
561 | sc = cgiCompileSearch(query); | |
562 | if (!sc) | |
563 | return (NULL); | |
564 | ||
565 | /* | |
566 | * Allocate a search index... | |
567 | */ | |
568 | ||
569 | search = calloc(1, sizeof(help_index_t)); | |
570 | if (!search) | |
571 | { | |
572 | cgiFreeSearch(sc); | |
573 | return (NULL); | |
574 | } | |
575 | ||
ecdc0628 | 576 | search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); |
577 | search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); | |
321d8d57 | 578 | |
ecdc0628 | 579 | if (!search->nodes || !search->sorted) |
580 | { | |
581 | cupsArrayDelete(search->nodes); | |
582 | cupsArrayDelete(search->sorted); | |
583 | free(search); | |
584 | cgiFreeSearch(sc); | |
585 | return (NULL); | |
586 | } | |
587 | ||
ef416fc2 | 588 | search->search = 1; |
589 | ||
590 | /* | |
591 | * Check each node in the index, adding matching nodes to the | |
592 | * search index... | |
593 | */ | |
594 | ||
ecdc0628 | 595 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
596 | if (section && strcmp(node->section, section)) | |
ef416fc2 | 597 | continue; |
ecdc0628 | 598 | else if (filename && strcmp(node->filename, filename)) |
ef416fc2 | 599 | continue; |
f7deaa1a | 600 | else |
ef416fc2 | 601 | { |
f7deaa1a | 602 | matches = cgiDoSearch(sc, node->text); |
603 | ||
604 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
605 | word; | |
606 | word = (help_word_t *)cupsArrayNext(node->words)) | |
607 | if (cgiDoSearch(sc, word->text) > 0) | |
608 | matches += word->count; | |
ef416fc2 | 609 | |
f7deaa1a | 610 | if (matches > 0) |
611 | { | |
612 | /* | |
613 | * Found a match, add the node to the search index... | |
614 | */ | |
ef416fc2 | 615 | |
f7deaa1a | 616 | node->score = matches; |
617 | ||
321d8d57 MS |
618 | cupsArrayAdd(search->nodes, node); |
619 | cupsArrayAdd(search->sorted, node); | |
f7deaa1a | 620 | } |
ef416fc2 | 621 | } |
622 | ||
623 | /* | |
624 | * Free the search context... | |
625 | */ | |
626 | ||
627 | cgiFreeSearch(sc); | |
628 | ||
ef416fc2 | 629 | /* |
630 | * Return the results... | |
631 | */ | |
632 | ||
633 | return (search); | |
634 | } | |
635 | ||
636 | ||
f7deaa1a | 637 | /* |
638 | * 'help_add_word()' - Add a word to a node. | |
639 | */ | |
640 | ||
641 | static help_word_t * /* O - New word */ | |
642 | help_add_word(help_node_t *n, /* I - Node */ | |
643 | const char *text) /* I - Word text */ | |
644 | { | |
645 | help_word_t *w, /* New word */ | |
646 | key; /* Search key */ | |
647 | ||
648 | ||
85dda01c | 649 | DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text)); |
f7deaa1a | 650 | |
651 | /* | |
652 | * Create the words array as needed... | |
653 | */ | |
654 | ||
655 | if (!n->words) | |
656 | n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL); | |
657 | ||
658 | /* | |
659 | * See if the word is already added... | |
660 | */ | |
661 | ||
662 | key.text = (char *)text; | |
663 | ||
664 | if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL) | |
665 | { | |
666 | /* | |
667 | * Create a new word... | |
668 | */ | |
669 | ||
670 | if ((w = calloc(1, sizeof(help_word_t))) == NULL) | |
671 | return (NULL); | |
672 | ||
673 | if ((w->text = strdup(text)) == NULL) | |
674 | { | |
675 | free(w); | |
676 | return (NULL); | |
677 | } | |
678 | ||
679 | cupsArrayAdd(n->words, w); | |
680 | } | |
681 | ||
682 | /* | |
683 | * Bump the counter for this word and return it... | |
684 | */ | |
685 | ||
686 | w->count ++; | |
687 | ||
688 | return (w); | |
689 | } | |
690 | ||
691 | ||
ef416fc2 | 692 | /* |
693 | * 'help_delete_node()' - Free all memory used by a node. | |
694 | */ | |
695 | ||
696 | static void | |
697 | help_delete_node(help_node_t *n) /* I - Node */ | |
698 | { | |
f7deaa1a | 699 | help_word_t *w; /* Current word */ |
700 | ||
701 | ||
85dda01c | 702 | DEBUG_printf(("2help_delete_node(n=%p)", n)); |
ef416fc2 | 703 | |
704 | if (!n) | |
705 | return; | |
706 | ||
707 | if (n->filename) | |
708 | free(n->filename); | |
709 | ||
710 | if (n->anchor) | |
711 | free(n->anchor); | |
712 | ||
713 | if (n->section) | |
714 | free(n->section); | |
715 | ||
716 | if (n->text) | |
717 | free(n->text); | |
718 | ||
f7deaa1a | 719 | for (w = (help_word_t *)cupsArrayFirst(n->words); |
720 | w; | |
721 | w = (help_word_t *)cupsArrayNext(n->words)) | |
722 | help_delete_word(w); | |
723 | ||
724 | cupsArrayDelete(n->words); | |
725 | ||
ef416fc2 | 726 | free(n); |
727 | } | |
728 | ||
729 | ||
f7deaa1a | 730 | /* |
731 | * 'help_delete_word()' - Free all memory used by a word. | |
732 | */ | |
733 | ||
734 | static void | |
735 | help_delete_word(help_word_t *w) /* I - Word */ | |
736 | { | |
85dda01c | 737 | DEBUG_printf(("2help_delete_word(w=%p)", w)); |
f7deaa1a | 738 | |
739 | if (!w) | |
740 | return; | |
741 | ||
742 | if (w->text) | |
743 | free(w->text); | |
744 | ||
745 | free(w); | |
746 | } | |
747 | ||
748 | ||
ef416fc2 | 749 | /* |
750 | * 'help_load_directory()' - Load a directory of files into an index. | |
751 | */ | |
752 | ||
753 | static int /* O - 0 = success, -1 = error, 1 = updated */ | |
754 | help_load_directory( | |
755 | help_index_t *hi, /* I - Index */ | |
756 | const char *directory, /* I - Directory */ | |
757 | const char *relative) /* I - Relative path */ | |
758 | { | |
ef416fc2 | 759 | cups_dir_t *dir; /* Directory file */ |
760 | cups_dentry_t *dent; /* Directory entry */ | |
761 | char *ext, /* Pointer to extension */ | |
762 | filename[1024], /* Full filename */ | |
763 | relname[1024]; /* Relative filename */ | |
764 | int update; /* Updated? */ | |
ecdc0628 | 765 | help_node_t *node; /* Current node */ |
ef416fc2 | 766 | |
767 | ||
85dda01c MS |
768 | DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")", |
769 | hi, directory, relative)); | |
ef416fc2 | 770 | |
771 | /* | |
772 | * Open the directory and scan it... | |
773 | */ | |
774 | ||
775 | if ((dir = cupsDirOpen(directory)) == NULL) | |
776 | return (0); | |
777 | ||
778 | update = 0; | |
779 | ||
780 | while ((dent = cupsDirRead(dir)) != NULL) | |
781 | { | |
ecdc0628 | 782 | /* |
783 | * Skip "." files... | |
784 | */ | |
785 | ||
786 | if (dent->filename[0] == '.') | |
787 | continue; | |
788 | ||
ef416fc2 | 789 | /* |
790 | * Get absolute and relative filenames... | |
791 | */ | |
792 | ||
793 | snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename); | |
794 | if (relative) | |
795 | snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename); | |
796 | else | |
797 | strlcpy(relname, dent->filename, sizeof(relname)); | |
798 | ||
799 | /* | |
800 | * Check if we have a HTML file... | |
801 | */ | |
802 | ||
803 | if ((ext = strstr(dent->filename, ".html")) != NULL && | |
804 | (!ext[5] || !strcmp(ext + 5, ".gz"))) | |
805 | { | |
806 | /* | |
807 | * HTML file, see if we have already indexed the file... | |
808 | */ | |
809 | ||
810 | if ((node = helpFindNode(hi, relname, NULL)) != NULL) | |
811 | { | |
812 | /* | |
813 | * File already indexed - check dates to confirm that the | |
814 | * index is up-to-date... | |
815 | */ | |
816 | ||
ecdc0628 | 817 | if (node->mtime == dent->fileinfo.st_mtime) |
ef416fc2 | 818 | { |
819 | /* | |
820 | * Same modification time, so mark all of the nodes | |
821 | * for this file as up-to-date... | |
822 | */ | |
823 | ||
ecdc0628 | 824 | for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) |
825 | if (!strcmp(node->filename, relname)) | |
826 | node->score = 0; | |
ef416fc2 | 827 | else |
828 | break; | |
829 | ||
830 | continue; | |
831 | } | |
832 | } | |
833 | ||
834 | update = 1; | |
835 | ||
836 | help_load_file(hi, filename, relname, dent->fileinfo.st_mtime); | |
837 | } | |
838 | else if (S_ISDIR(dent->fileinfo.st_mode)) | |
839 | { | |
840 | /* | |
841 | * Process sub-directory... | |
842 | */ | |
843 | ||
844 | if (help_load_directory(hi, filename, relname) == 1) | |
845 | update = 1; | |
846 | } | |
847 | } | |
848 | ||
849 | cupsDirClose(dir); | |
850 | ||
851 | return (update); | |
852 | } | |
853 | ||
854 | ||
855 | /* | |
856 | * 'help_load_file()' - Load a HTML files into an index. | |
857 | */ | |
858 | ||
859 | static int /* O - 0 = success, -1 = error */ | |
860 | help_load_file( | |
861 | help_index_t *hi, /* I - Index */ | |
862 | const char *filename, /* I - Filename */ | |
863 | const char *relative, /* I - Relative path */ | |
864 | time_t mtime) /* I - Modification time */ | |
865 | { | |
866 | cups_file_t *fp; /* HTML file */ | |
ecdc0628 | 867 | help_node_t *node; /* Current node */ |
ef416fc2 | 868 | char line[1024], /* Line from file */ |
f42414bf | 869 | temp[1024], /* Temporary word */ |
ef416fc2 | 870 | section[1024], /* Section */ |
871 | *ptr, /* Pointer into line */ | |
872 | *anchor, /* Anchor name */ | |
873 | *text; /* Text for anchor */ | |
874 | off_t offset; /* File offset */ | |
875 | char quote; /* Quote character */ | |
f7deaa1a | 876 | help_word_t *word; /* Current word */ |
877 | int wordlen; /* Length of word */ | |
ef416fc2 | 878 | |
879 | ||
85dda01c | 880 | DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", " |
2274af38 | 881 | "mtime=%ld)", hi, filename, relative, (long)mtime)); |
ef416fc2 | 882 | |
883 | if ((fp = cupsFileOpen(filename, "r")) == NULL) | |
884 | return (-1); | |
885 | ||
886 | node = NULL; | |
887 | offset = 0; | |
888 | ||
5a9febac | 889 | strlcpy(section, "Other", sizeof(section)); |
ef416fc2 | 890 | |
891 | while (cupsFileGets(fp, line, sizeof(line))) | |
892 | { | |
893 | /* | |
894 | * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix... | |
895 | */ | |
896 | ||
cfd375ad | 897 | if ((ptr = strstr(line, "<!-- SECTION:")) != NULL) |
ef416fc2 | 898 | { |
899 | /* | |
900 | * Got section line, copy it! | |
901 | */ | |
902 | ||
cfd375ad | 903 | for (ptr += 13; isspace(*ptr & 255); ptr ++); |
ef416fc2 | 904 | |
905 | strlcpy(section, ptr, sizeof(section)); | |
906 | if ((ptr = strstr(section, "-->")) != NULL) | |
907 | { | |
908 | /* | |
909 | * Strip comment stuff from end of line... | |
910 | */ | |
911 | ||
912 | for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0'); | |
913 | ||
914 | if (isspace(*ptr & 255)) | |
915 | *ptr = '\0'; | |
916 | } | |
917 | continue; | |
918 | } | |
919 | ||
920 | for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;) | |
921 | { | |
922 | ptr ++; | |
923 | ||
88f9aafc | 924 | if (!_cups_strncasecmp(ptr, "TITLE>", 6)) |
ef416fc2 | 925 | { |
926 | /* | |
927 | * Found the title... | |
928 | */ | |
929 | ||
930 | anchor = NULL; | |
931 | ptr += 6; | |
932 | } | |
cfd375ad | 933 | else |
ef416fc2 | 934 | { |
cfd375ad MS |
935 | char *idptr; /* Pointer to ID */ |
936 | ||
937 | if (!_cups_strncasecmp(ptr, "A NAME=", 7)) | |
938 | ptr += 7; | |
939 | else if ((idptr = strstr(ptr, " ID=")) != NULL) | |
940 | ptr = idptr + 4; | |
941 | else if ((idptr = strstr(ptr, " id=")) != NULL) | |
942 | ptr = idptr + 4; | |
943 | else | |
944 | continue; | |
945 | ||
ef416fc2 | 946 | /* |
947 | * Found an anchor... | |
948 | */ | |
949 | ||
ef416fc2 | 950 | if (*ptr == '\"' || *ptr == '\'') |
951 | { | |
952 | /* | |
953 | * Get quoted anchor... | |
954 | */ | |
955 | ||
956 | quote = *ptr; | |
957 | anchor = ptr + 1; | |
958 | if ((ptr = strchr(anchor, quote)) != NULL) | |
959 | *ptr++ = '\0'; | |
960 | else | |
961 | break; | |
962 | } | |
963 | else | |
964 | { | |
965 | /* | |
966 | * Get unquoted anchor... | |
967 | */ | |
968 | ||
969 | anchor = ptr + 1; | |
970 | ||
971 | for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++); | |
972 | ||
cfd375ad | 973 | if (*ptr != '>') |
ef416fc2 | 974 | *ptr++ = '\0'; |
975 | else | |
976 | break; | |
977 | } | |
978 | ||
979 | /* | |
980 | * Got the anchor, now lets find the end... | |
981 | */ | |
982 | ||
983 | while (*ptr && *ptr != '>') | |
984 | ptr ++; | |
985 | ||
986 | if (*ptr != '>') | |
987 | break; | |
988 | ||
cfd375ad | 989 | *ptr++ = '\0'; |
ef416fc2 | 990 | } |
ef416fc2 | 991 | |
992 | /* | |
993 | * Now collect text for the link... | |
994 | */ | |
995 | ||
996 | text = ptr; | |
997 | while ((ptr = strchr(text, '<')) == NULL) | |
998 | { | |
999 | ptr = text + strlen(text); | |
1000 | if (ptr >= (line + sizeof(line) - 2)) | |
1001 | break; | |
1002 | ||
1003 | *ptr++ = ' '; | |
1004 | ||
7e86f2f6 | 1005 | if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1)) |
ef416fc2 | 1006 | break; |
1007 | } | |
1008 | ||
1009 | *ptr = '\0'; | |
1010 | ||
1011 | if (node) | |
7e86f2f6 | 1012 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 1013 | |
1014 | if (!*text) | |
1015 | { | |
1016 | node = NULL; | |
1017 | break; | |
1018 | } | |
1019 | ||
ecdc0628 | 1020 | if ((node = helpFindNode(hi, relative, anchor)) != NULL) |
ef416fc2 | 1021 | { |
1022 | /* | |
1023 | * Node already in the index, so replace the text and other | |
1024 | * data... | |
1025 | */ | |
1026 | ||
ecdc0628 | 1027 | cupsArrayRemove(hi->nodes, node); |
ef416fc2 | 1028 | |
1029 | if (node->section) | |
1030 | free(node->section); | |
1031 | ||
1032 | if (node->text) | |
1033 | free(node->text); | |
1034 | ||
f7deaa1a | 1035 | if (node->words) |
1036 | { | |
1037 | for (word = (help_word_t *)cupsArrayFirst(node->words); | |
1038 | word; | |
1039 | word = (help_word_t *)cupsArrayNext(node->words)) | |
1040 | help_delete_word(word); | |
1041 | ||
1042 | cupsArrayDelete(node->words); | |
1043 | node->words = NULL; | |
1044 | } | |
1045 | ||
ef416fc2 | 1046 | node->section = section[0] ? strdup(section) : NULL; |
1047 | node->text = strdup(text); | |
1048 | node->mtime = mtime; | |
1049 | node->offset = offset; | |
1050 | node->score = 0; | |
1051 | } | |
1052 | else | |
1053 | { | |
1054 | /* | |
1055 | * New node... | |
1056 | */ | |
1057 | ||
1058 | node = help_new_node(relative, anchor, section, text, mtime, offset, 0); | |
ef416fc2 | 1059 | } |
1060 | ||
1061 | /* | |
1062 | * Go through the text value and replace tabs and newlines with | |
1063 | * whitespace and eliminate extra whitespace... | |
1064 | */ | |
1065 | ||
1066 | for (ptr = node->text, text = node->text; *ptr;) | |
1067 | if (isspace(*ptr & 255)) | |
1068 | { | |
1069 | while (isspace(*ptr & 255)) | |
ed486911 | 1070 | ptr ++; |
ef416fc2 | 1071 | |
1072 | *text++ = ' '; | |
1073 | } | |
1074 | else if (text != ptr) | |
1075 | *text++ = *ptr++; | |
1076 | else | |
1077 | { | |
1078 | text ++; | |
1079 | ptr ++; | |
1080 | } | |
1081 | ||
1082 | *text = '\0'; | |
1083 | ||
ecdc0628 | 1084 | /* |
1085 | * (Re)add the node to the array... | |
1086 | */ | |
1087 | ||
1088 | cupsArrayAdd(hi->nodes, node); | |
f7deaa1a | 1089 | |
1090 | if (!anchor) | |
1091 | node = NULL; | |
ef416fc2 | 1092 | break; |
1093 | } | |
1094 | ||
f7deaa1a | 1095 | if (node) |
1096 | { | |
1097 | /* | |
1098 | * Scan this line for words... | |
1099 | */ | |
1100 | ||
1101 | for (ptr = line; *ptr; ptr ++) | |
1102 | { | |
1103 | /* | |
1104 | * Skip HTML stuff... | |
1105 | */ | |
1106 | ||
1107 | if (*ptr == '<') | |
1108 | { | |
1109 | if (!strncmp(ptr, "<!--", 4)) | |
1110 | { | |
1111 | /* | |
1112 | * Skip HTML comment... | |
1113 | */ | |
1114 | ||
1115 | if ((text = strstr(ptr + 4, "-->")) == NULL) | |
1116 | ptr += strlen(ptr) - 1; | |
1117 | else | |
1118 | ptr = text + 2; | |
1119 | } | |
1120 | else | |
1121 | { | |
1122 | /* | |
1123 | * Skip HTML element... | |
1124 | */ | |
1125 | ||
1126 | for (ptr ++; *ptr && *ptr != '>'; ptr ++) | |
f42414bf | 1127 | { |
f7deaa1a | 1128 | if (*ptr == '\"' || *ptr == '\'') |
1129 | { | |
1130 | for (quote = *ptr++; *ptr && *ptr != quote; ptr ++); | |
1131 | ||
1132 | if (!*ptr) | |
1133 | ptr --; | |
1134 | } | |
f42414bf | 1135 | } |
f7deaa1a | 1136 | |
1137 | if (!*ptr) | |
1138 | ptr --; | |
1139 | } | |
1140 | ||
1141 | continue; | |
1142 | } | |
1143 | else if (*ptr == '&') | |
1144 | { | |
1145 | /* | |
1146 | * Skip HTML entity... | |
1147 | */ | |
1148 | ||
1149 | for (ptr ++; *ptr && *ptr != ';'; ptr ++); | |
1150 | ||
1151 | if (!*ptr) | |
1152 | ptr --; | |
1153 | ||
1154 | continue; | |
1155 | } | |
1156 | else if (!isalnum(*ptr & 255)) | |
1157 | continue; | |
1158 | ||
1159 | /* | |
1160 | * Found the start of a word, search until we find the end... | |
1161 | */ | |
1162 | ||
1163 | for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++); | |
1164 | ||
7e86f2f6 | 1165 | wordlen = (int)(ptr - text); |
f7deaa1a | 1166 | |
07623986 | 1167 | memcpy(temp, text, (size_t)wordlen); |
f42414bf | 1168 | temp[wordlen] = '\0'; |
1169 | ||
1170 | ptr --; | |
f7deaa1a | 1171 | |
f42414bf | 1172 | if (wordlen > 1 && !bsearch(temp, help_common_words, |
f7deaa1a | 1173 | (sizeof(help_common_words) / |
1174 | sizeof(help_common_words[0])), | |
1175 | sizeof(help_common_words[0]), | |
1176 | (int (*)(const void *, const void *)) | |
88f9aafc | 1177 | _cups_strcasecmp)) |
f42414bf | 1178 | help_add_word(node, temp); |
f7deaa1a | 1179 | } |
1180 | } | |
1181 | ||
ef416fc2 | 1182 | /* |
1183 | * Get the offset of the next line... | |
1184 | */ | |
1185 | ||
1186 | offset = cupsFileTell(fp); | |
1187 | } | |
1188 | ||
1189 | cupsFileClose(fp); | |
1190 | ||
1191 | if (node) | |
7e86f2f6 | 1192 | node->length = (size_t)(offset - node->offset); |
ef416fc2 | 1193 | |
1194 | return (0); | |
1195 | } | |
1196 | ||
1197 | ||
1198 | /* | |
1199 | * 'help_new_node()' - Create a new node and add it to an index. | |
1200 | */ | |
1201 | ||
1202 | static help_node_t * /* O - Node pointer or NULL on error */ | |
1203 | help_new_node(const char *filename, /* I - Filename */ | |
1204 | const char *anchor, /* I - Anchor */ | |
1205 | const char *section, /* I - Section */ | |
1206 | const char *text, /* I - Text */ | |
1207 | time_t mtime, /* I - Modification time */ | |
1208 | off_t offset, /* I - Offset in file */ | |
1209 | size_t length) /* I - Length in bytes */ | |
1210 | { | |
1211 | help_node_t *n; /* Node */ | |
1212 | ||
1213 | ||
85dda01c MS |
1214 | DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", " |
1215 | "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text, | |
1216 | (long)mtime, (long)offset, (long)length)); | |
ef416fc2 | 1217 | |
1218 | n = (help_node_t *)calloc(1, sizeof(help_node_t)); | |
1219 | if (!n) | |
1220 | return (NULL); | |
1221 | ||
1222 | n->filename = strdup(filename); | |
1223 | n->anchor = anchor ? strdup(anchor) : NULL; | |
3bb59731 | 1224 | n->section = *section ? strdup(section) : NULL; |
ef416fc2 | 1225 | n->text = strdup(text); |
1226 | n->mtime = mtime; | |
1227 | n->offset = offset; | |
1228 | n->length = length; | |
1229 | ||
1230 | return (n); | |
1231 | } | |
1232 | ||
1233 | ||
1234 | /* | |
1235 | * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor. | |
1236 | */ | |
1237 | ||
1238 | static int /* O - Difference */ | |
ecdc0628 | 1239 | help_sort_by_name(help_node_t *n1, /* I - First node */ |
1240 | help_node_t *n2) /* I - Second node */ | |
ef416fc2 | 1241 | { |
ef416fc2 | 1242 | int diff; /* Difference */ |
1243 | ||
1244 | ||
85dda01c MS |
1245 | DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)", |
1246 | n1, n1->filename, n1->anchor, | |
1247 | n2, n2->filename, n2->anchor)); | |
ef416fc2 | 1248 | |
ecdc0628 | 1249 | if ((diff = strcmp(n1->filename, n2->filename)) != 0) |
ef416fc2 | 1250 | return (diff); |
1251 | ||
ecdc0628 | 1252 | if (!n1->anchor && !n2->anchor) |
ef416fc2 | 1253 | return (0); |
ecdc0628 | 1254 | else if (!n1->anchor) |
ef416fc2 | 1255 | return (-1); |
ecdc0628 | 1256 | else if (!n2->anchor) |
ef416fc2 | 1257 | return (1); |
1258 | else | |
ecdc0628 | 1259 | return (strcmp(n1->anchor, n2->anchor)); |
ef416fc2 | 1260 | } |
1261 | ||
1262 | ||
1263 | /* | |
1264 | * 'help_sort_nodes_by_score()' - Sort nodes by score and text. | |
1265 | */ | |
1266 | ||
1267 | static int /* O - Difference */ | |
ecdc0628 | 1268 | help_sort_by_score(help_node_t *n1, /* I - First node */ |
1269 | help_node_t *n2) /* I - Second node */ | |
ef416fc2 | 1270 | { |
ef416fc2 | 1271 | int diff; /* Difference */ |
1272 | ||
1273 | ||
85dda01c MS |
1274 | DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), " |
1275 | "n2=%p(%d \"%s\" \"%s\")", | |
1276 | n1, n1->score, n1->section, n1->text, | |
1277 | n2, n2->score, n2->section, n2->text)); | |
ef416fc2 | 1278 | |
ecdc0628 | 1279 | if (n1->score != n2->score) |
1f0275e3 | 1280 | return (n2->score - n1->score); |
ef416fc2 | 1281 | |
ecdc0628 | 1282 | if (n1->section && !n2->section) |
ef416fc2 | 1283 | return (1); |
ecdc0628 | 1284 | else if (!n1->section && n2->section) |
ef416fc2 | 1285 | return (-1); |
ecdc0628 | 1286 | else if (n1->section && n2->section && |
1287 | (diff = strcmp(n1->section, n2->section)) != 0) | |
ef416fc2 | 1288 | return (diff); |
1289 | ||
88f9aafc | 1290 | return (_cups_strcasecmp(n1->text, n2->text)); |
ef416fc2 | 1291 | } |
1292 | ||
1293 | ||
1294 | /* | |
f7deaa1a | 1295 | * 'help_sort_words()' - Sort words alphabetically. |
1296 | */ | |
1297 | ||
1298 | static int /* O - Difference */ | |
1299 | help_sort_words(help_word_t *w1, /* I - Second word */ | |
1300 | help_word_t *w2) /* I - Second word */ | |
1301 | { | |
85dda01c | 1302 | DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))", |
f7deaa1a | 1303 | w1, w1->text, w2, w2->text)); |
1304 | ||
88f9aafc | 1305 | return (_cups_strcasecmp(w1->text, w2->text)); |
f7deaa1a | 1306 | } |