]>
Commit | Line | Data |
---|---|---|
df4ef2ab UD |
1 | /* Copyright (C) 1996, 1997 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. | |
3 | Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. | |
a641835a | 4 | |
df4ef2ab UD |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Library General Public License as | |
7 | published by the Free Software Foundation; either version 2 of the | |
8 | License, or (at your option) any later version. | |
a641835a | 9 | |
df4ef2ab UD |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Library General Public License for more details. | |
a641835a | 14 | |
df4ef2ab UD |
15 | You should have received a copy of the GNU Library General Public |
16 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
17 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
18 | Boston, MA 02111-1307, USA. */ | |
a641835a RM |
19 | |
20 | #ifdef HAVE_CONFIG_H | |
21 | # include <config.h> | |
22 | #endif | |
23 | ||
5a97622d | 24 | #include <argp.h> |
a641835a RM |
25 | #include <ctype.h> |
26 | #include <endian.h> | |
27 | #include <errno.h> | |
28 | #include <error.h> | |
29 | #include <fcntl.h> | |
e75154a6 | 30 | #include <locale.h> |
a641835a RM |
31 | #include <libintl.h> |
32 | #include <limits.h> | |
33 | #include <nl_types.h> | |
34 | #include <obstack.h> | |
35 | #include <stdio.h> | |
36 | #include <stdlib.h> | |
37 | #include <string.h> | |
38 | #include <unistd.h> | |
39 | ||
40 | #include "version.h" | |
41 | ||
42 | #include "catgetsinfo.h" | |
43 | ||
44 | ||
45 | #define SWAPU32(w) \ | |
46 | (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) | |
47 | ||
48 | struct message_list | |
49 | { | |
50 | int number; | |
51 | const char *message; | |
52 | ||
53 | const char *fname; | |
54 | size_t line; | |
55 | const char *symbol; | |
56 | ||
57 | struct message_list *next; | |
58 | }; | |
59 | ||
60 | ||
61 | struct set_list | |
62 | { | |
63 | int number; | |
64 | int deleted; | |
65 | struct message_list *messages; | |
66 | int last_message; | |
67 | ||
68 | const char *fname; | |
69 | size_t line; | |
70 | const char *symbol; | |
71 | ||
72 | struct set_list *next; | |
73 | }; | |
74 | ||
75 | ||
76 | struct catalog | |
77 | { | |
78 | struct set_list *all_sets; | |
79 | struct set_list *current_set; | |
80 | size_t total_messages; | |
81 | char quote_char; | |
82 | int last_set; | |
83 | ||
84 | struct obstack mem_pool; | |
85 | }; | |
86 | ||
87 | ||
88 | /* If non-zero force creation of new file, not using existing one. */ | |
89 | static int force_new; | |
90 | ||
5a97622d UD |
91 | /* Name of output file. */ |
92 | static const char *output_name; | |
93 | ||
94 | /* Name of generated C header file. */ | |
95 | static const char *header_name; | |
96 | ||
97 | /* Name and version of program. */ | |
98 | static void print_version (FILE *stream, struct argp_state *state); | |
99 | void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; | |
100 | ||
101 | #define OPT_NEW 1 | |
102 | ||
103 | /* Definitions of arguments for argp functions. */ | |
104 | static const struct argp_option options[] = | |
105 | { | |
106 | { "header", 'H', N_("NAME"), 0, | |
107 | N_("Create C header file NAME containing symbol definitions") }, | |
108 | { "new", OPT_NEW, NULL, 0, | |
109 | N_("Do not use existing catalog, force new output file") }, | |
110 | { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") }, | |
111 | { NULL, 0, NULL, 0, NULL } | |
112 | }; | |
113 | ||
114 | /* Short description of program. */ | |
115 | static const char doc[] = N_("Generate message catalog.\ | |
116 | \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\ | |
117 | is -, output is written to standard output.\n"); | |
118 | ||
119 | /* Strings for arguments in help texts. */ | |
120 | static const char args_doc[] = N_("\ | |
121 | -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]"); | |
122 | ||
123 | /* Prototype for option handler. */ | |
124 | static error_t parse_opt (int key, char *arg, struct argp_state *state); | |
125 | ||
126 | /* Function to print some extra text in the help message. */ | |
127 | static char *more_help (int key, const char *text, void *input); | |
128 | ||
129 | /* Data structure to communicate with argp functions. */ | |
130 | static struct argp argp = | |
a641835a | 131 | { |
5a97622d | 132 | options, parse_opt, args_doc, doc, NULL, more_help |
a641835a RM |
133 | }; |
134 | ||
5a97622d | 135 | |
a641835a RM |
136 | /* Wrapper functions with error checking for standard functions. */ |
137 | extern void *xmalloc (size_t n); | |
138 | ||
139 | /* Prototypes for local functions. */ | |
a641835a RM |
140 | static void error_print (void); |
141 | static struct catalog *read_input_file (struct catalog *current, | |
142 | const char *fname); | |
143 | static void write_out (struct catalog *result, const char *output_name, | |
144 | const char *header_name); | |
145 | static struct set_list *find_set (struct catalog *current, int number); | |
146 | static void normalize_line (const char *fname, size_t line, char *string, | |
147 | char quote_char); | |
148 | static void read_old (struct catalog *catalog, const char *file_name); | |
149 | ||
150 | ||
151 | int | |
152 | main (int argc, char *argv[]) | |
153 | { | |
154 | struct catalog *result; | |
2f6d1f1b | 155 | int remaining; |
a641835a RM |
156 | |
157 | /* Set program name for messages. */ | |
158 | error_print_progname = error_print; | |
159 | ||
160 | /* Set locale via LC_ALL. */ | |
161 | setlocale (LC_ALL, ""); | |
162 | ||
163 | /* Set the text message domain. */ | |
164 | textdomain (PACKAGE); | |
165 | ||
166 | /* Initialize local variables. */ | |
a641835a RM |
167 | result = NULL; |
168 | ||
5a97622d | 169 | /* Parse and process arguments. */ |
2f6d1f1b | 170 | argp_parse (&argp, argc, argv, 0, &remaining, NULL); |
a641835a RM |
171 | |
172 | /* Determine output file. */ | |
173 | if (output_name == NULL) | |
2f6d1f1b | 174 | output_name = remaining < argc ? argv[remaining++] : "-"; |
a641835a RM |
175 | |
176 | /* Process all input files. */ | |
177 | setlocale (LC_CTYPE, "C"); | |
2f6d1f1b | 178 | if (remaining < argc) |
a641835a | 179 | do |
2f6d1f1b UD |
180 | result = read_input_file (result, argv[remaining]); |
181 | while (++remaining < argc); | |
a641835a RM |
182 | else |
183 | result = read_input_file (NULL, "-"); | |
184 | ||
185 | /* Write out the result. */ | |
186 | if (result != NULL) | |
187 | write_out (result, output_name, header_name); | |
188 | ||
189 | exit (EXIT_SUCCESS); | |
190 | } | |
191 | ||
192 | ||
5a97622d UD |
193 | /* Handle program arguments. */ |
194 | static error_t | |
195 | parse_opt (int key, char *arg, struct argp_state *state) | |
a641835a | 196 | { |
5a97622d | 197 | switch (key) |
fafaa44e | 198 | { |
5a97622d UD |
199 | case 'H': |
200 | header_name = arg; | |
201 | break; | |
202 | case OPT_NEW: | |
203 | force_new = 1; | |
204 | break; | |
205 | case 'o': | |
206 | output_name = arg; | |
207 | break; | |
208 | default: | |
209 | return ARGP_ERR_UNKNOWN; | |
fafaa44e | 210 | } |
5a97622d UD |
211 | return 0; |
212 | } | |
a641835a | 213 | |
5a97622d UD |
214 | |
215 | static char * | |
216 | more_help (int key, const char *text, void *input) | |
217 | { | |
218 | switch (key) | |
219 | { | |
220 | case ARGP_KEY_HELP_EXTRA: | |
221 | /* We print some extra information. */ | |
222 | return strdup (gettext ("\ | |
223 | Report bugs using the `glibcbug' script to <bugs@gnu.ai.mit.edu>.\n")); | |
224 | default: | |
225 | break; | |
226 | } | |
227 | return (char *) text; | |
228 | } | |
229 | ||
230 | /* Print the version information. */ | |
231 | static void | |
232 | print_version (FILE *stream, struct argp_state *state) | |
233 | { | |
234 | fprintf (stream, "gencat (GNU %s) %s\n", PACKAGE, VERSION); | |
235 | fprintf (stream, gettext ("\ | |
236 | Copyright (C) %s Free Software Foundation, Inc.\n\ | |
237 | This is free software; see the source for copying conditions. There is NO\n\ | |
238 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ | |
239 | "), "1996, 1997"); | |
240 | fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); | |
a641835a RM |
241 | } |
242 | ||
243 | ||
244 | /* The address of this function will be assigned to the hook in the | |
245 | error functions. */ | |
246 | static void | |
247 | error_print () | |
248 | { | |
249 | /* We don't want the program name to be printed in messages. Emacs' | |
250 | compile.el does not like this. */ | |
251 | } | |
252 | ||
253 | ||
254 | static struct catalog * | |
255 | read_input_file (struct catalog *current, const char *fname) | |
256 | { | |
257 | FILE *fp; | |
258 | char *buf; | |
259 | size_t len; | |
260 | size_t line_number; | |
261 | ||
262 | if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0) | |
263 | { | |
264 | fp = stdin; | |
265 | fname = gettext ("*standard input*"); | |
266 | } | |
267 | else | |
268 | fp = fopen (fname, "r"); | |
269 | if (fp == NULL) | |
270 | { | |
271 | error (0, errno, gettext ("cannot open input file `%s'"), fname); | |
272 | return current; | |
273 | } | |
274 | ||
275 | /* If we haven't seen anything yet, allocate result structure. */ | |
276 | if (current == NULL) | |
277 | { | |
278 | current = (struct catalog *) xmalloc (sizeof (*current)); | |
279 | ||
280 | current->all_sets = NULL; | |
281 | current->total_messages = 0; | |
282 | current->last_set = 0; | |
283 | current->current_set = find_set (current, NL_SETD); | |
284 | ||
df4ef2ab | 285 | #define obstack_chunk_alloc malloc |
a641835a RM |
286 | #define obstack_chunk_free free |
287 | obstack_init (¤t->mem_pool); | |
288 | } | |
289 | ||
290 | buf = NULL; | |
291 | len = 0; | |
292 | line_number = 0; | |
293 | while (!feof (fp)) | |
294 | { | |
295 | int continued; | |
296 | int used; | |
297 | size_t start_line = line_number + 1; | |
298 | char *this_line; | |
299 | ||
300 | do | |
301 | { | |
302 | int act_len; | |
303 | ||
304 | act_len = getline (&buf, &len, fp); | |
305 | if (act_len <= 0) | |
306 | break; | |
307 | ++line_number; | |
308 | ||
309 | /* It the line continued? */ | |
310 | if (buf[act_len - 1] == '\n') | |
311 | { | |
312 | --act_len; | |
313 | continued = buf[act_len - 1] == '\\'; | |
314 | if (continued) | |
315 | --act_len; | |
316 | } | |
317 | else | |
318 | continued = 0; | |
319 | ||
320 | /* Append to currently selected line. */ | |
321 | obstack_grow (¤t->mem_pool, buf, act_len); | |
322 | } | |
323 | while (continued); | |
324 | ||
325 | obstack_1grow (¤t->mem_pool, '\0'); | |
326 | this_line = (char *) obstack_finish (¤t->mem_pool); | |
327 | ||
328 | used = 0; | |
329 | if (this_line[0] == '$') | |
330 | { | |
331 | if (isspace (this_line[1])) | |
332 | /* This is a comment line. Do nothing. */; | |
333 | else if (strncmp (&this_line[1], "set", 3) == 0) | |
334 | { | |
40a55d20 | 335 | int cnt = sizeof ("set"); |
6dbe2837 | 336 | int set_number; |
a641835a RM |
337 | const char *symbol = NULL; |
338 | while (isspace (this_line[cnt])) | |
339 | ++cnt; | |
340 | ||
341 | if (isdigit (this_line[cnt])) | |
342 | { | |
343 | set_number = atol (&this_line[cnt]); | |
344 | ||
345 | /* If the given number for the character set is | |
346 | higher than any we used for symbolic set names | |
347 | avoid clashing by using only higher numbers for | |
348 | the following symbolic definitions. */ | |
349 | if (set_number > current->last_set) | |
350 | current->last_set = set_number; | |
351 | } | |
352 | else | |
353 | { | |
354 | /* See whether it is a reasonable identifier. */ | |
355 | int start = cnt; | |
356 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') | |
357 | ++cnt; | |
358 | ||
359 | if (cnt == start) | |
360 | { | |
361 | /* No correct character found. */ | |
362 | error_at_line (0, 0, fname, start_line, | |
363 | gettext ("illegal set number")); | |
364 | set_number = 0; | |
365 | } | |
366 | else | |
367 | { | |
6d52618b | 368 | /* We have found seomthing that looks like a |
a641835a RM |
369 | correct identifier. */ |
370 | struct set_list *runp; | |
371 | ||
372 | this_line[cnt] = '\0'; | |
373 | used = 1; | |
374 | symbol = &this_line[start]; | |
375 | ||
376 | /* Test whether the identifier was already used. */ | |
377 | runp = current->all_sets; | |
378 | while (runp != 0) | |
379 | if (runp->symbol != NULL | |
380 | && strcmp (runp->symbol, symbol) == 0) | |
381 | break; | |
382 | else | |
383 | runp = runp->next; | |
384 | ||
385 | if (runp != NULL) | |
386 | { | |
387 | /* We cannot allow duplicate identifiers for | |
388 | message sets. */ | |
389 | error_at_line (0, 0, fname, start_line, | |
390 | gettext ("duplicate set definition")); | |
391 | error_at_line (0, 0, runp->fname, runp->line, | |
392 | gettext ("\ | |
393 | this is the first definition")); | |
394 | set_number = 0; | |
395 | } | |
396 | else | |
397 | /* Allocate next free message set for identifier. */ | |
398 | set_number = ++current->last_set; | |
399 | } | |
400 | } | |
401 | ||
402 | if (set_number != 0) | |
403 | { | |
404 | /* We found a legal set number. */ | |
405 | current->current_set = find_set (current, set_number); | |
406 | if (symbol != NULL) | |
407 | used = 1; | |
408 | current->current_set->symbol = symbol; | |
409 | current->current_set->fname = fname; | |
410 | current->current_set->line = start_line; | |
411 | } | |
412 | } | |
413 | else if (strncmp (&this_line[1], "delset", 6) == 0) | |
414 | { | |
415 | int cnt = sizeof ("delset"); | |
416 | size_t set_number; | |
417 | while (isspace (this_line[cnt])) | |
418 | ++cnt; | |
419 | ||
420 | if (isdigit (this_line[cnt])) | |
421 | { | |
422 | size_t set_number = atol (&this_line[cnt]); | |
423 | struct set_list *set; | |
424 | ||
425 | /* Mark the message set with the given number as | |
426 | deleted. */ | |
427 | set = find_set (current, set_number); | |
428 | set->deleted = 1; | |
429 | } | |
430 | else | |
431 | { | |
432 | /* See whether it is a reasonable identifier. */ | |
433 | int start = cnt; | |
434 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') | |
435 | ++cnt; | |
436 | ||
437 | if (cnt == start) | |
438 | { | |
439 | error_at_line (0, 0, fname, start_line, | |
440 | gettext ("illegal set number")); | |
441 | set_number = 0; | |
442 | } | |
443 | else | |
444 | { | |
445 | const char *symbol; | |
446 | struct set_list *runp; | |
447 | ||
448 | this_line[cnt] = '\0'; | |
449 | used = 1; | |
450 | symbol = &this_line[start]; | |
451 | ||
452 | /* We have a symbolic set name. This name must | |
453 | appear somewhere else in the catalogs read so | |
454 | far. */ | |
455 | set_number = 0; | |
456 | for (runp = current->all_sets; runp != NULL; | |
457 | runp = runp->next) | |
458 | { | |
459 | if (strcmp (runp->symbol, symbol) == 0) | |
460 | { | |
461 | runp->deleted = 1; | |
462 | break; | |
463 | } | |
464 | } | |
465 | if (runp == NULL) | |
466 | /* Name does not exist before. */ | |
467 | error_at_line (0, 0, fname, start_line, | |
468 | gettext ("unknown set `%s'"), symbol); | |
469 | } | |
470 | } | |
471 | } | |
472 | else if (strncmp (&this_line[1], "quote", 5) == 0) | |
473 | { | |
474 | int cnt = sizeof ("quote"); | |
475 | while (isspace (this_line[cnt])) | |
476 | ++cnt; | |
477 | /* Yes, the quote char can be '\0'; this means no quote | |
478 | char. */ | |
479 | current->quote_char = this_line[cnt]; | |
480 | } | |
481 | else | |
482 | { | |
483 | int cnt; | |
484 | cnt = 2; | |
485 | while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) | |
486 | ++cnt; | |
487 | this_line[cnt] = '\0'; | |
488 | error_at_line (0, 0, fname, start_line, | |
489 | gettext ("unknown directive `%s': line ignored"), | |
490 | &this_line[1]); | |
491 | } | |
492 | } | |
493 | else if (isalnum (this_line[0]) || this_line[0] == '_') | |
494 | { | |
495 | const char *ident = this_line; | |
496 | int message_number; | |
497 | ||
498 | do | |
499 | ++this_line; | |
500 | while (this_line[0] != '\0' && !isspace (this_line[0]));; | |
501 | this_line[0] = '\0'; /* Terminate the identifier. */ | |
502 | ||
503 | do | |
504 | ++this_line; | |
505 | while (isspace (this_line[0])); | |
506 | /* Now we found the beginning of the message itself. */ | |
507 | ||
508 | if (isdigit (ident[0])) | |
509 | { | |
510 | struct message_list *runp; | |
511 | ||
512 | message_number = atoi (ident); | |
513 | ||
514 | /* Find location to insert the new message. */ | |
515 | runp = current->current_set->messages; | |
516 | while (runp != NULL) | |
517 | if (runp->number == message_number) | |
518 | break; | |
519 | else | |
520 | runp = runp->next; | |
521 | if (runp != NULL) | |
522 | { | |
523 | /* Oh, oh. There is already a message with this | |
524 | number is the message set. */ | |
525 | error_at_line (0, 0, fname, start_line, | |
526 | gettext ("duplicated message number")); | |
527 | error_at_line (0, 0, runp->fname, runp->line, | |
528 | gettext ("this is the first definition")); | |
529 | message_number = 0; | |
530 | } | |
531 | ident = NULL; /* We don't have a symbol. */ | |
532 | ||
533 | if (message_number != 0 | |
534 | && message_number > current->current_set->last_message) | |
535 | current->current_set->last_message = message_number; | |
536 | } | |
537 | else if (ident[0] != '\0') | |
538 | { | |
539 | struct message_list *runp; | |
540 | runp = current->current_set->messages; | |
541 | ||
542 | /* Test whether the symbolic name was not used for | |
543 | another message in this message set. */ | |
544 | while (runp != NULL) | |
545 | if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0) | |
546 | break; | |
547 | else | |
548 | runp = runp->next; | |
549 | if (runp != NULL) | |
550 | { | |
551 | /* The name is already used. */ | |
552 | error_at_line (0, 0, fname, start_line, | |
553 | gettext ("duplicated message identifier")); | |
554 | error_at_line (0, 0, runp->fname, runp->line, | |
555 | gettext ("this is the first definition")); | |
556 | message_number = 0; | |
557 | } | |
558 | else | |
559 | /* Give the message the next unused number. */ | |
560 | message_number = ++current->current_set->last_message; | |
561 | } | |
562 | else | |
563 | message_number = 0; | |
564 | ||
565 | if (message_number != 0) | |
566 | { | |
567 | struct message_list *newp; | |
568 | ||
569 | used = 1; /* Yes, we use the line. */ | |
570 | ||
571 | /* Strip quote characters, change escape sequences into | |
572 | correct characters etc. */ | |
573 | normalize_line (fname, start_line, this_line, | |
574 | current->quote_char); | |
575 | ||
576 | newp = (struct message_list *) xmalloc (sizeof (*newp)); | |
577 | newp->number = message_number; | |
578 | newp->message = this_line; | |
579 | /* Remember symbolic name; is NULL if no is given. */ | |
580 | newp->symbol = ident; | |
581 | /* Remember where we found the character. */ | |
582 | newp->fname = fname; | |
583 | newp->line = start_line; | |
584 | ||
585 | /* Find place to insert to message. We keep them in a | |
586 | sorted single linked list. */ | |
587 | if (current->current_set->messages == NULL | |
588 | || current->current_set->messages->number > message_number) | |
589 | { | |
590 | newp->next = current->current_set->messages; | |
591 | current->current_set->messages = newp; | |
592 | } | |
593 | else | |
594 | { | |
595 | struct message_list *runp; | |
596 | runp = current->current_set->messages; | |
597 | while (runp->next != NULL) | |
598 | if (runp->next->number > message_number) | |
599 | break; | |
600 | else | |
601 | runp = runp->next; | |
602 | newp->next = runp->next; | |
603 | runp->next = newp; | |
604 | } | |
605 | } | |
606 | ++current->total_messages; | |
607 | } | |
608 | else | |
609 | { | |
610 | size_t cnt; | |
611 | ||
612 | cnt = 0; | |
613 | /* See whether we have any non-white space character in this | |
614 | line. */ | |
615 | while (this_line[cnt] != '\0' && isspace (this_line[cnt])) | |
616 | ++cnt; | |
617 | ||
618 | if (this_line[cnt] != '\0') | |
619 | /* Yes, some unknown characters found. */ | |
620 | error_at_line (0, 0, fname, start_line, | |
621 | gettext ("malformed line ignored")); | |
622 | } | |
623 | ||
624 | /* We can save the memory for the line if it was not used. */ | |
625 | if (!used) | |
626 | obstack_free (¤t->mem_pool, this_line); | |
627 | } | |
628 | ||
629 | if (fp != stdin) | |
630 | fclose (fp); | |
631 | return current; | |
632 | } | |
633 | ||
634 | ||
635 | static void | |
636 | write_out (struct catalog *catalog, const char *output_name, | |
637 | const char *header_name) | |
638 | { | |
639 | /* Computing the "optimal" size. */ | |
640 | struct set_list *set_run; | |
641 | size_t best_total, best_size, best_depth; | |
642 | size_t act_size, act_depth; | |
643 | struct catalog_obj obj; | |
644 | struct obstack string_pool; | |
645 | const char *strings; | |
646 | size_t strings_size; | |
647 | u_int32_t *array1, *array2; | |
648 | size_t cnt; | |
649 | int fd; | |
650 | ||
651 | /* If not otherwise told try to read file with existing | |
652 | translations. */ | |
653 | if (!force_new) | |
654 | read_old (catalog, output_name); | |
655 | ||
656 | /* Initialize best_size with a very high value. */ | |
657 | best_total = best_size = best_depth = UINT_MAX; | |
658 | ||
659 | /* We need some start size for testing. Let's start with | |
660 | TOTAL_MESSAGES / 5, which theoretically provides a mean depth of | |
661 | 5. */ | |
662 | act_size = 1 + catalog->total_messages / 5; | |
663 | ||
664 | /* We determine the size of a hash table here. Because the message | |
665 | numbers can be chosen arbitrary by the programmer we cannot use | |
666 | the simple method of accessing the array using the message | |
667 | number. The algorithm is based on the trivial hash function | |
668 | NUMBER % TABLE_SIZE, where collisions are stored in a second | |
669 | dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that | |
670 | the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */ | |
671 | while (act_size <= best_total) | |
672 | { | |
673 | size_t deep[act_size]; | |
674 | ||
675 | act_depth = 1; | |
676 | memset (deep, '\0', act_size * sizeof (size_t)); | |
677 | set_run = catalog->all_sets; | |
678 | while (set_run != NULL) | |
679 | { | |
680 | struct message_list *message_run; | |
681 | ||
682 | message_run = set_run->messages; | |
683 | while (message_run != NULL) | |
684 | { | |
685 | size_t idx = (message_run->number * set_run->number) % act_size; | |
686 | ||
687 | ++deep[idx]; | |
688 | if (deep[idx] > act_depth) | |
689 | { | |
690 | act_depth = deep[idx]; | |
691 | if (act_depth * act_size > best_total) | |
692 | break; | |
693 | } | |
694 | message_run = message_run->next; | |
695 | } | |
a641835a RM |
696 | set_run = set_run->next; |
697 | } | |
698 | ||
adc6ff7f RM |
699 | if (act_depth * act_size <= best_total) |
700 | { | |
701 | /* We have found a better solution. */ | |
702 | best_total = act_depth * act_size; | |
703 | best_size = act_size; | |
704 | best_depth = act_depth; | |
705 | } | |
706 | ||
a641835a RM |
707 | ++act_size; |
708 | } | |
709 | ||
710 | /* let's be prepared for an empty message file. */ | |
711 | if (best_size == UINT_MAX) | |
712 | { | |
713 | best_size = 1; | |
714 | best_depth = 1; | |
715 | } | |
716 | ||
717 | /* OK, now we have the size we will use. Fill in the header, build | |
718 | the table and the second one with swapped byte order. */ | |
719 | obj.magic = CATGETS_MAGIC; | |
720 | obj.plane_size = best_size; | |
721 | obj.plane_depth = best_depth; | |
722 | ||
723 | /* Allocate room for all needed arrays. */ | |
724 | array1 = | |
725 | (u_int32_t *) alloca (best_size * best_depth * sizeof (u_int32_t) * 3); | |
726 | memset (array1, '\0', best_size * best_depth * sizeof (u_int32_t) * 3); | |
727 | array2 | |
728 | = (u_int32_t *) alloca (best_size * best_depth * sizeof (u_int32_t) * 3); | |
729 | obstack_init (&string_pool); | |
730 | ||
731 | set_run = catalog->all_sets; | |
732 | while (set_run != NULL) | |
733 | { | |
734 | struct message_list *message_run; | |
735 | ||
736 | message_run = set_run->messages; | |
737 | while (message_run != NULL) | |
738 | { | |
739 | size_t idx = (((message_run->number * set_run->number) % best_size) | |
740 | * 3); | |
741 | /* Determine collision depth. */ | |
742 | while (array1[idx] != 0) | |
743 | idx += best_size * 3; | |
744 | ||
745 | /* Store set number, message number and pointer into string | |
746 | space, relative to the first string. */ | |
747 | array1[idx + 0] = set_run->number; | |
748 | array1[idx + 1] = message_run->number; | |
749 | array1[idx + 2] = obstack_object_size (&string_pool); | |
750 | ||
751 | /* Add current string to the continuous space containing all | |
752 | strings. */ | |
753 | obstack_grow0 (&string_pool, message_run->message, | |
754 | strlen (message_run->message)); | |
755 | ||
756 | message_run = message_run->next; | |
757 | } | |
758 | ||
759 | set_run = set_run->next; | |
760 | } | |
761 | strings_size = obstack_object_size (&string_pool); | |
762 | strings = obstack_finish (&string_pool); | |
763 | ||
764 | /* Compute ARRAY2 by changing the byte order. */ | |
765 | for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) | |
766 | array2[cnt] = SWAPU32 (array1[cnt]); | |
767 | ||
768 | /* Now we can write out the whole data. */ | |
769 | if (strcmp (output_name, "-") == 0 | |
770 | || strcmp (output_name, "/dev/stdout") == 0) | |
771 | fd = STDOUT_FILENO; | |
772 | else | |
773 | { | |
774 | fd = creat (output_name, 0666); | |
775 | if (fd < 0) | |
776 | error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"), | |
777 | output_name); | |
778 | } | |
779 | ||
780 | /* Write out header. */ | |
781 | write (fd, &obj, sizeof (obj)); | |
782 | ||
783 | /* We always write out the little endian version of the index | |
784 | arrays. */ | |
785 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
786 | write (fd, array1, best_size * best_depth * sizeof (u_int32_t) * 3); | |
787 | write (fd, array2, best_size * best_depth * sizeof (u_int32_t) * 3); | |
788 | #elif __BYTE_ORDER == __BIG_ENDIAN | |
789 | write (fd, array2, best_size * best_depth * sizeof (u_int32_t) * 3); | |
790 | write (fd, array1, best_size * best_depth * sizeof (u_int32_t) * 3); | |
791 | #else | |
792 | # error Cannot handle __BYTE_ORDER byte order | |
793 | #endif | |
794 | ||
795 | /* Finally write the strings. */ | |
796 | write (fd, strings, strings_size); | |
797 | ||
798 | if (fd != STDOUT_FILENO) | |
799 | close (fd); | |
800 | ||
801 | /* If requested now write out the header file. */ | |
802 | if (header_name != NULL) | |
803 | { | |
804 | int first = 1; | |
805 | FILE *fp; | |
806 | ||
807 | /* Open output file. "-" or "/dev/stdout" means write to | |
808 | standard output. */ | |
809 | if (strcmp (header_name, "-") == 0 | |
810 | || strcmp (header_name, "/dev/stdout") == 0) | |
811 | fp = stdout; | |
812 | else | |
813 | { | |
814 | fp = fopen (header_name, "w"); | |
815 | if (fp == NULL) | |
816 | error (EXIT_FAILURE, errno, | |
817 | gettext ("cannot open output file `%s'"), header_name); | |
818 | } | |
819 | ||
820 | /* Iterate over all sets and all messages. */ | |
821 | set_run = catalog->all_sets; | |
822 | while (set_run != NULL) | |
823 | { | |
824 | struct message_list *message_run; | |
825 | ||
826 | /* If the current message set has a symbolic name write this | |
827 | out first. */ | |
828 | if (set_run->symbol != NULL) | |
a4242e25 | 829 | fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n", |
a641835a RM |
830 | first ? "" : "\n", set_run->symbol, set_run->number - 1, |
831 | set_run->fname, set_run->line); | |
832 | first = 0; | |
833 | ||
834 | message_run = set_run->messages; | |
835 | while (message_run != NULL) | |
836 | { | |
837 | /* If the current message has a symbolic name write | |
838 | #define out. But we have to take care for the set | |
839 | not having a symbolic name. */ | |
840 | if (message_run->symbol != NULL) | |
841 | if (set_run->symbol == NULL) | |
a4242e25 | 842 | fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n", |
a641835a RM |
843 | set_run->number, message_run->symbol, |
844 | message_run->number, message_run->fname, | |
845 | message_run->line); | |
846 | else | |
a4242e25 | 847 | fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n", |
a641835a RM |
848 | set_run->symbol, message_run->symbol, |
849 | message_run->number, message_run->fname, | |
850 | message_run->line); | |
851 | ||
852 | message_run = message_run->next; | |
853 | } | |
854 | ||
855 | set_run = set_run->next; | |
856 | } | |
857 | ||
858 | if (fp != stdout) | |
859 | fclose (fp); | |
860 | } | |
861 | } | |
862 | ||
863 | ||
864 | static struct set_list * | |
865 | find_set (struct catalog *current, int number) | |
866 | { | |
867 | struct set_list *result = current->all_sets; | |
868 | ||
869 | /* We must avoid set number 0 because a set of this number signals | |
870 | in the tables that the entry is not occupied. */ | |
871 | ++number; | |
872 | ||
873 | while (result != NULL) | |
874 | if (result->number == number) | |
875 | return result; | |
876 | else | |
877 | result = result->next; | |
878 | ||
879 | /* Prepare new message set. */ | |
880 | result = (struct set_list *) xmalloc (sizeof (*result)); | |
881 | result->number = number; | |
882 | result->deleted = 0; | |
883 | result->messages = NULL; | |
884 | result->next = current->all_sets; | |
885 | current->all_sets = result; | |
886 | ||
887 | return result; | |
888 | } | |
889 | ||
890 | ||
891 | /* Normalize given string *in*place* by processing escape sequences | |
892 | and quote characters. */ | |
893 | static void | |
894 | normalize_line (const char *fname, size_t line, char *string, char quote_char) | |
895 | { | |
896 | int is_quoted; | |
897 | char *rp = string; | |
898 | char *wp = string; | |
899 | ||
900 | if (quote_char != '\0' && *rp == quote_char) | |
901 | { | |
902 | is_quoted = 1; | |
903 | ++rp; | |
904 | } | |
905 | else | |
906 | is_quoted = 0; | |
907 | ||
908 | while (*rp != '\0') | |
909 | if (*rp == quote_char) | |
910 | /* We simply end the string when we find the first time an | |
911 | not-escaped quote character. */ | |
912 | break; | |
913 | else if (*rp == '\\') | |
914 | { | |
915 | ++rp; | |
916 | if (quote_char != '\0' && *rp == quote_char) | |
917 | /* This is an extension to XPG. */ | |
918 | *wp++ = *rp++; | |
919 | else | |
920 | /* Recognize escape sequences. */ | |
921 | switch (*rp) | |
922 | { | |
923 | case 'n': | |
924 | *wp++ = '\n'; | |
925 | ++rp; | |
926 | break; | |
927 | case 't': | |
928 | *wp++ = '\t'; | |
929 | ++rp; | |
930 | break; | |
931 | case 'v': | |
932 | *wp++ = '\v'; | |
933 | ++rp; | |
934 | break; | |
935 | case 'b': | |
936 | *wp++ = '\b'; | |
937 | ++rp; | |
938 | break; | |
939 | case 'r': | |
940 | *wp++ = '\r'; | |
941 | ++rp; | |
942 | break; | |
943 | case 'f': | |
944 | *wp++ = '\f'; | |
945 | ++rp; | |
946 | break; | |
947 | case '\\': | |
948 | *wp++ = '\\'; | |
949 | ++rp; | |
950 | break; | |
951 | case '0' ... '7': | |
952 | { | |
953 | int number = *rp++ - '0'; | |
954 | while (number <= (255 / 8) && *rp >= '0' && *rp <= '7') | |
955 | { | |
956 | number *= 8; | |
957 | number += *rp++ - '0'; | |
958 | } | |
959 | *wp++ = (char) number; | |
960 | } | |
961 | break; | |
962 | default: | |
963 | /* Simply ignore the backslash character. */ | |
964 | break; | |
965 | } | |
966 | } | |
967 | else | |
968 | *wp++ = *rp++; | |
969 | ||
970 | /* If we saw a quote character at the beginning we expect another | |
971 | one at the end. */ | |
972 | if (is_quoted && *rp != quote_char) | |
973 | error (0, 0, fname, line, gettext ("unterminated message")); | |
974 | ||
975 | /* Terminate string. */ | |
976 | *wp = '\0'; | |
977 | return; | |
978 | } | |
979 | ||
980 | ||
981 | static void | |
982 | read_old (struct catalog *catalog, const char *file_name) | |
983 | { | |
984 | struct catalog_info old_cat_obj; | |
985 | struct set_list *set = NULL; | |
986 | int last_set = -1; | |
987 | size_t cnt; | |
988 | ||
989 | old_cat_obj.status = closed; | |
990 | old_cat_obj.cat_name = file_name; | |
991 | ||
992 | /* Try to open catalog, but don't look through the NLSPATH. */ | |
993 | __open_catalog (&old_cat_obj, 0); | |
994 | ||
6d52618b | 995 | if (old_cat_obj.status != mmapped && old_cat_obj.status != malloced) |
a641835a RM |
996 | if (errno == ENOENT) |
997 | /* No problem, the catalog simply does not exist. */ | |
998 | return; | |
999 | else | |
1000 | error (EXIT_FAILURE, errno, gettext ("while opening old catalog file")); | |
1001 | ||
1002 | /* OK, we have the catalog loaded. Now read all messages and merge | |
1003 | them. When set and message number clash for any message the new | |
1004 | one is used. */ | |
1005 | for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt) | |
1006 | { | |
1007 | struct message_list *message, *last; | |
1008 | ||
1009 | if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0) | |
1010 | /* No message in this slot. */ | |
1011 | continue; | |
1012 | ||
6dbe2837 | 1013 | if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (u_int32_t) last_set) |
a641835a RM |
1014 | { |
1015 | last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1; | |
1016 | set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1); | |
1017 | } | |
1018 | ||
1019 | last = NULL; | |
1020 | message = set->messages; | |
1021 | while (message != NULL) | |
1022 | { | |
6dbe2837 | 1023 | if ((u_int32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1]) |
a641835a RM |
1024 | break; |
1025 | last = message; | |
1026 | message = message->next; | |
1027 | } | |
1028 | ||
1029 | if (message == NULL | |
6dbe2837 | 1030 | || (u_int32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1]) |
a641835a RM |
1031 | { |
1032 | /* We have found a message which is not yet in the catalog. | |
1033 | Insert it at the right position. */ | |
1034 | struct message_list *newp; | |
1035 | ||
1036 | newp = (struct message_list *) xmalloc (sizeof(*newp)); | |
1037 | newp->number = old_cat_obj.name_ptr[cnt * 3 + 1]; | |
1038 | newp->message = | |
1039 | &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]]; | |
1040 | newp->fname = NULL; | |
1041 | newp->line = 0; | |
1042 | newp->symbol = NULL; | |
1043 | newp->next = message; | |
1044 | ||
1045 | if (last == NULL) | |
1046 | set->messages = newp; | |
1047 | else | |
1048 | last->next = newp; | |
1049 | ||
1050 | ++catalog->total_messages; | |
1051 | } | |
1052 | } | |
1053 | } |