]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/iconv_prog.c
Update.
[thirdparty/glibc.git] / iconv / iconv_prog.c
1 /* Convert text in given files from the specified from-set to the to-set.
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <argp.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <iconv.h>
27 #include <locale.h>
28 #include <search.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <libintl.h>
34 #ifdef _POSIX_MAPPED_FILES
35 # include <sys/mman.h>
36 #endif
37 #include <gconv_int.h>
38
39 /* Get libc version number. */
40 #include "../version.h"
41
42 #define PACKAGE _libc_intl_domainname
43
44
45 /* Name and version of program. */
46 static void print_version (FILE *stream, struct argp_state *state);
47 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
48
49 #define OPT_VERBOSE 1000
50 #define OPT_LIST 'l'
51
52 /* Definitions of arguments for argp functions. */
53 static const struct argp_option options[] =
54 {
55 { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
56 { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
57 { "to-code", 't', "NAME", 0, N_("encoding for output") },
58 { NULL, 0, NULL, 0, N_("Information:") },
59 { "list", 'l', NULL, 0, N_("list all known coded character sets") },
60 { NULL, 0, NULL, 0, N_("Output control:") },
61 { NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
62 { "output", 'o', "FILE", 0, N_("output file") },
63 { "silent", 's', NULL, 0, N_("supress warnings") },
64 { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
65 { NULL, 0, NULL, 0, NULL }
66 };
67
68 /* Short description of program. */
69 static const char doc[] = N_("\
70 Convert encoding of given files from one encoding to another.");
71
72 /* Strings for arguments in help texts. */
73 static const char args_doc[] = N_("[FILE...]");
74
75 /* Prototype for option handler. */
76 static error_t parse_opt (int key, char *arg, struct argp_state *state);
77
78 /* Function to print some extra text in the help message. */
79 static char *more_help (int key, const char *text, void *input);
80
81 /* Data structure to communicate with argp functions. */
82 static struct argp argp =
83 {
84 options, parse_opt, args_doc, doc, NULL, more_help
85 };
86
87 /* Code sets to convert from and to respectively. */
88 static const char *from_code;
89 static const char *to_code;
90
91 /* File to write output to. If NULL write to stdout. */
92 static const char *output_file;
93
94 /* Nonzero if verbose ouput is wanted. */
95 static int verbose;
96
97 /* Nonzero if list of all coded character sets is wanted. */
98 static int list;
99
100 /* Prototypes for the functions doing the actual work. */
101 static int process_block (iconv_t cd, const char *addr, size_t len,
102 FILE *output);
103 static int process_fd (iconv_t cd, int fd, FILE *output);
104 static int process_file (iconv_t cd, FILE *input, FILE *output);
105 static void print_known_names (void) internal_function;
106
107
108 int
109 main (int argc, char *argv[])
110 {
111 int status = EXIT_SUCCESS;
112 int remaining;
113 FILE *output;
114 iconv_t cd;
115
116 /* Set locale via LC_ALL. */
117 setlocale (LC_ALL, "");
118
119 /* Set the text message domain. */
120 textdomain (_libc_intl_domainname);
121
122 /* Parse and process arguments. */
123 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
124
125 /* List all coded character sets if wanted. */
126 if (list)
127 {
128 print_known_names ();
129 exit (EXIT_SUCCESS);
130 }
131
132 /* If either the from- or to-code is not specified this is an error
133 since we do not know what to do. */
134 if (from_code == NULL && to_code == NULL)
135 error (EXIT_FAILURE, 0,
136 _("neither original nor target encoding specified"));
137 if (from_code == NULL)
138 error (EXIT_FAILURE, 0, _("original encoding not specified using `-f'"));
139 if (to_code == NULL)
140 error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'"));
141
142 /* Let's see whether we have these coded character sets. */
143 cd = iconv_open (to_code, from_code);
144 if (cd == (iconv_t) -1)
145 {
146 if (errno == EINVAL)
147 error (EXIT_FAILURE, 0, _("conversion from `%s' to `%s' not supported"),
148 from_code, to_code);
149 else
150 error (EXIT_FAILURE, errno, _("failed to start conversion processing"));
151 }
152
153 /* Determine output file. */
154 if (output_file != NULL)
155 {
156 output = fopen (output_file, "w");
157 if (output == NULL)
158 error (EXIT_FAILURE, errno, _("cannot open output file"));
159 }
160 else
161 output = stdout;
162
163 /* Now process the remaining files. Write them to stdout or the file
164 specified with the `-o' parameter. If we have no file given as
165 the parameter process all from stdin. */
166 if (remaining == argc)
167 {
168 if (process_file (cd, stdin, output) != 0)
169 status = EXIT_FAILURE;
170 }
171 else
172 do
173 {
174 struct stat st;
175 const char *addr;
176 int fd;
177
178
179 if (verbose)
180 printf ("%s:\n", argv[remaining]);
181 if (strcmp (argv[remaining], "-") == 0)
182 fd = 0;
183 else
184 {
185 fd = open (argv[remaining], O_RDONLY);
186
187 if (fd == -1)
188 {
189 error (0, errno, _("cannot open input file `%s'"),
190 argv[remaining]);
191 status = EXIT_FAILURE;
192 continue;
193 }
194 }
195
196 #ifdef _POSIX_MAPPED_FILES
197 /* We have possibilities for reading the input file. First try
198 to mmap() it since this will provide the fastest solution. */
199 if (fstat (fd, &st) == 0
200 && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
201 != MAP_FAILED))
202 {
203 /* Yes, we can use mmap(). The descriptor is not needed
204 anymore. */
205 if (close (fd) != 0)
206 error (EXIT_FAILURE, errno, _("error while closing input `%s'"),
207 argv[remaining]);
208
209 if (process_block (cd, addr, st.st_size, output) < 0)
210 {
211 /* Something went wrong. */
212 status = EXIT_FAILURE;
213
214 /* We don't need the input data anymore. */
215 munmap ((void *) addr, st.st_size);
216
217 /* We cannot go on with producing output since it might
218 lead to problem because the last output might leave
219 the output stream in an undefined state. */
220 break;
221 }
222
223 /* We don't need the input data anymore. */
224 munmap ((void *) addr, st.st_size);
225 }
226 else
227 #endif /* _POSIX_MAPPED_FILES */
228 {
229 /* Read the file in pieces. */
230 if (process_fd (cd, fd, output) != 0)
231 {
232 /* Something went wrong. */
233 status = EXIT_FAILURE;
234
235 /* We don't need the input file anymore. */
236 close (fd);
237
238 /* We cannot go on with producing output since it might
239 lead to problem because the last output might leave
240 the output stream in an undefined state. */
241 break;
242 }
243
244 /* Now close the file. */
245 close (fd);
246 }
247 }
248 while (++remaining < argc);
249
250 /* Close the output file now. */
251 if (fclose (output))
252 error (EXIT_FAILURE, errno, _("error while closing output file"));
253
254 return status;
255 }
256
257
258 /* Handle program arguments. */
259 static error_t
260 parse_opt (int key, char *arg, struct argp_state *state)
261 {
262 switch (key)
263 {
264 case 'f':
265 from_code = arg;
266 break;
267 case 't':
268 to_code = arg;
269 break;
270 case 'o':
271 output_file = arg;
272 break;
273 case 's':
274 /* Nothing, for now at least. We are not giving out any information
275 about missing character or so. */
276 break;
277 case 'c':
278 /* Omit invalid characters from output.
279 XXX This option will become a meaning once we have different
280 modes of operation for the conversion functions. */
281 break;
282 case OPT_VERBOSE:
283 verbose = 1;
284 break;
285 case OPT_LIST:
286 list = 1;
287 break;
288 default:
289 return ARGP_ERR_UNKNOWN;
290 }
291 return 0;
292 }
293
294
295 static char *
296 more_help (int key, const char *text, void *input)
297 {
298 switch (key)
299 {
300 case ARGP_KEY_HELP_EXTRA:
301 /* We print some extra information. */
302 return strdup (gettext ("\
303 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
304 default:
305 break;
306 }
307 return (char *) text;
308 }
309
310
311 /* Print the version information. */
312 static void
313 print_version (FILE *stream, struct argp_state *state)
314 {
315 fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION);
316 fprintf (stream, gettext ("\
317 Copyright (C) %s Free Software Foundation, Inc.\n\
318 This is free software; see the source for copying conditions. There is NO\n\
319 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
320 "), "2000");
321 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
322 }
323
324
325 static int
326 process_block (iconv_t cd, const char *addr, size_t len, FILE *output)
327 {
328 #define OUTBUF_SIZE 32768
329 const char *start = addr;
330 char outbuf[OUTBUF_SIZE];
331 char *outptr;
332 size_t outlen;
333 size_t n;
334
335 while (len > 0)
336 {
337 outptr = outbuf;
338 outlen = OUTBUF_SIZE;
339 n = iconv (cd, &addr, &len, &outptr, &outlen);
340
341 if (outptr != outbuf)
342 {
343 /* We have something to write out. */
344 int errno_save = errno;
345
346 if (fwrite (outbuf, 1, outptr - outbuf, output) < outptr - outbuf
347 || ferror (output))
348 {
349 /* Error occurred while printing the result. */
350 error (0, 0, _("\
351 conversion stopped due to problem in writing the output"));
352 return -1;
353 }
354
355 errno = errno_save;
356 }
357
358 if (n != (size_t) -1)
359 {
360 /* All the input test is processed. For state-dependent
361 character sets we have to flush the state now. */
362 outptr = outbuf;
363 outlen = OUTBUF_SIZE;
364 n = iconv (cd, NULL, NULL, &outptr, &outlen);
365
366 if (outptr != outbuf)
367 {
368 /* We have something to write out. */
369 int errno_save = errno;
370
371 if (fwrite (outbuf, 1, outptr - outbuf, output) < outptr - outbuf
372 || ferror (output))
373 {
374 /* Error occurred while printing the result. */
375 error (0, 0, _("\
376 conversion stopped due to problem in writing the output"));
377 return -1;
378 }
379
380 errno = errno_save;
381 }
382
383 break;
384 }
385
386 if (errno != E2BIG)
387 {
388 /* iconv() ran into a problem. */
389 switch (errno)
390 {
391 case EILSEQ:
392 error (0, 0, _("illegal input sequence at position %ld"),
393 (long) (addr - start));
394 break;
395 case EINVAL:
396 error (0, 0, _("\
397 incomplete character or shift sequence at end of buffer"));
398 break;
399 case EBADF:
400 error (0, 0, _("internal error (illegal descriptor)"));
401 break;
402 default:
403 error (0, 0, _("unknown iconv() error %d"), errno);
404 break;
405 }
406
407 return -1;
408 }
409 }
410
411 return 0;
412 }
413
414
415 static int
416 process_fd (iconv_t cd, int fd, FILE *output)
417 {
418 /* we have a problem with reading from a desriptor since we must not
419 provide the iconv() function an incomplete character or shift
420 sequence at the end of the buffer. Since we have to deal with
421 arbitrary encodings we must read the whole text in a buffer and
422 process it in one step. */
423 static char *inbuf = NULL;
424 static size_t maxlen = 0;
425 char *inptr = NULL;
426 size_t actlen = 0;
427
428 while (actlen < maxlen)
429 {
430 size_t n = read (fd, inptr, maxlen - actlen);
431
432 if (n == 0)
433 /* No more text to read. */
434 break;
435
436 if (n == -1)
437 {
438 /* Error while reading. */
439 error (0, errno, _("error while reading the input"));
440 return -1;
441 }
442
443 inptr += n;
444 actlen += n;
445 }
446
447 if (actlen == maxlen)
448 while (1)
449 {
450 size_t n;
451
452 /* Increase the buffer. */
453 maxlen += 32768;
454 inbuf = realloc (inbuf, maxlen);
455 if (inbuf == NULL)
456 error (0, errno, _("unable to allocate buffer for input"));
457 inptr = inbuf + actlen;
458
459 do
460 {
461 n = read (fd, inptr, maxlen - actlen);
462
463 if (n == 0)
464 /* No more text to read. */
465 break;
466
467 if (n == -1)
468 {
469 /* Error while reading. */
470 error (0, errno, _("error while reading the input"));
471 return -1;
472 }
473
474 inptr += n;
475 actlen += n;
476 }
477 while (actlen < maxlen);
478
479 if (n == 0)
480 /* Break again so we leave both loops. */
481 break;
482 }
483
484 /* Now we have all the input in the buffer. Process it in one run. */
485 return process_block (cd, inbuf, actlen, output);
486 }
487
488
489 static int
490 process_file (iconv_t cd, FILE *input, FILE *output)
491 {
492 /* This should be safe since we use this function only for `stdin' and
493 we haven't read anything so far. */
494 return process_fd (cd, fileno (input), output);
495 }
496
497
498 /* Print all known character sets/encodings. */
499 static void *printlist;
500 static size_t column;
501 static int not_first;
502
503 static void
504 insert_print_list (const void *nodep, VISIT value, int level)
505 {
506 if (value == leaf || value == postorder)
507 {
508 const struct gconv_alias *s = *(const struct gconv_alias **) nodep;
509 tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp);
510 }
511 }
512
513 static void
514 do_print (const void *nodep, VISIT value, int level)
515 {
516 if (value == leaf || value == postorder)
517 {
518 const char *s = *(const char **) nodep;
519 size_t len = strlen (s);
520 size_t cnt;
521
522 while (len > 0 && s[len - 1] == '/')
523 --len;
524
525 for (cnt = 0; cnt < len; ++cnt)
526 if (isalnum (s[cnt]))
527 break;
528 if (cnt == len)
529 return;
530
531 if (not_first)
532 {
533 putchar (',');
534 ++column;
535
536 if (column > 2 && column + len > 77)
537 {
538 fputs ("\n ", stdout);
539 column = 2;
540 }
541 else
542 {
543 putchar (' ');
544 ++column;
545 }
546 }
547 else
548 not_first = 1;
549
550 fwrite (s, len, 1, stdout);
551 column += len;
552 }
553 }
554
555 static void
556 internal_function
557 add_known_names (struct gconv_module *node)
558 {
559 if (node->left != NULL)
560 add_known_names (node->left);
561 if (node->right != NULL)
562 add_known_names (node->right);
563 if (node->same != NULL)
564 add_known_names (node->same);
565 do
566 {
567 if (node->from_pattern == NULL)
568 {
569 if (strcmp (node->from_constpfx, "INTERNAL"))
570 tsearch (node->from_constpfx, &printlist,
571 (__compar_fn_t) strverscmp);
572 if (strcmp (node->to_string, "INTERNAL"))
573 tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
574 }
575 else
576 if (strcmp (node->from_pattern, "INTERNAL"))
577 tsearch (node->from_pattern, &printlist, (__compar_fn_t) strverscmp);
578
579 node = node->matching;
580 }
581 while (node != NULL);
582 }
583
584 static void
585 internal_function
586 print_known_names (void)
587 {
588 iconv_t h;
589
590 /* We must initialize the internal databases first. */
591 h = iconv_open ("L1", "L1");
592 iconv_close (h);
593
594 /* First add the aliases. */
595 twalk (__gconv_alias_db, insert_print_list);
596
597 /* Add the from- and to-names from the known modules. */
598 add_known_names (__gconv_modules_db);
599
600 fputs (_("\
601 The following list contain all the coded character sets known. This does\n\
602 not necessarily mean that all combinations of these names can be used for\n\
603 the FROM and TO command line parameters. One coded character set can be\n\
604 listed with several different names (aliases).\n\
605 Some of the names are no plain strings but instead regular expressions and\n\
606 they match a variety of names which can be given as parameters to the\n\
607 program.\n\n "), stdout);
608
609 /* Now print the collected names. */
610 column = 2;
611 twalk (printlist, do_print);
612
613 if (column != 0)
614 puts ("");
615 }