1 /* Convert text in given files from the specified from-set to the to-set.
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
34 #ifdef _POSIX_MAPPED_FILES
35 # include <sys/mman.h>
37 #include <gconv_int.h>
39 /* Get libc version number. */
40 #include "../version.h"
42 #define PACKAGE _libc_intl_domainname
45 /* Name and version of program. */
46 static void print_version (FILE *stream
, struct argp_state
*state
);
47 void (*argp_program_version_hook
) (FILE *, struct argp_state
*) = print_version
;
49 #define OPT_VERBOSE 1000
52 /* Definitions of arguments for argp functions. */
53 static const struct argp_option options
[] =
55 { NULL
, 0, NULL
, 0, N_("Input/Output format specification:") },
56 { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
57 { "to-code", 't', "NAME", 0, N_("encoding for output") },
58 { NULL
, 0, NULL
, 0, N_("Information:") },
59 { "list", 'l', NULL
, 0, N_("list all known coded character sets") },
60 { NULL
, 0, NULL
, 0, N_("Output control:") },
61 { NULL
, 'c', NULL
, 0, N_("omit invalid characters from output") },
62 { "output", 'o', "FILE", 0, N_("output file") },
63 { "silent", 's', NULL
, 0, N_("supress warnings") },
64 { "verbose", OPT_VERBOSE
, NULL
, 0, N_("print progress information") },
65 { NULL
, 0, NULL
, 0, NULL
}
68 /* Short description of program. */
69 static const char doc
[] = N_("\
70 Convert encoding of given files from one encoding to another.");
72 /* Strings for arguments in help texts. */
73 static const char args_doc
[] = N_("[FILE...]");
75 /* Prototype for option handler. */
76 static error_t
parse_opt (int key
, char *arg
, struct argp_state
*state
);
78 /* Function to print some extra text in the help message. */
79 static char *more_help (int key
, const char *text
, void *input
);
81 /* Data structure to communicate with argp functions. */
82 static struct argp argp
=
84 options
, parse_opt
, args_doc
, doc
, NULL
, more_help
87 /* Code sets to convert from and to respectively. */
88 static const char *from_code
;
89 static const char *to_code
;
91 /* File to write output to. If NULL write to stdout. */
92 static const char *output_file
;
94 /* Nonzero if verbose ouput is wanted. */
97 /* Nonzero if list of all coded character sets is wanted. */
100 /* Prototypes for the functions doing the actual work. */
101 static int process_block (iconv_t cd
, const char *addr
, size_t len
,
103 static int process_fd (iconv_t cd
, int fd
, FILE *output
);
104 static int process_file (iconv_t cd
, FILE *input
, FILE *output
);
105 static void print_known_names (void) internal_function
;
109 main (int argc
, char *argv
[])
111 int status
= EXIT_SUCCESS
;
116 /* Set locale via LC_ALL. */
117 setlocale (LC_ALL
, "");
119 /* Set the text message domain. */
120 textdomain (_libc_intl_domainname
);
122 /* Parse and process arguments. */
123 argp_parse (&argp
, argc
, argv
, 0, &remaining
, NULL
);
125 /* List all coded character sets if wanted. */
128 print_known_names ();
132 /* If either the from- or to-code is not specified this is an error
133 since we do not know what to do. */
134 if (from_code
== NULL
&& to_code
== NULL
)
135 error (EXIT_FAILURE
, 0,
136 _("neither original nor target encoding specified"));
137 if (from_code
== NULL
)
138 error (EXIT_FAILURE
, 0, _("original encoding not specified using `-f'"));
140 error (EXIT_FAILURE
, 0, _("target encoding not specified using `-t'"));
142 /* Let's see whether we have these coded character sets. */
143 cd
= iconv_open (to_code
, from_code
);
144 if (cd
== (iconv_t
) -1)
147 error (EXIT_FAILURE
, 0, _("conversion from `%s' to `%s' not supported"),
150 error (EXIT_FAILURE
, errno
, _("failed to start conversion processing"));
153 /* Determine output file. */
154 if (output_file
!= NULL
)
156 output
= fopen (output_file
, "w");
158 error (EXIT_FAILURE
, errno
, _("cannot open output file"));
163 /* Now process the remaining files. Write them to stdout or the file
164 specified with the `-o' parameter. If we have no file given as
165 the parameter process all from stdin. */
166 if (remaining
== argc
)
168 if (process_file (cd
, stdin
, output
) != 0)
169 status
= EXIT_FAILURE
;
180 printf ("%s:\n", argv
[remaining
]);
181 if (strcmp (argv
[remaining
], "-") == 0)
185 fd
= open (argv
[remaining
], O_RDONLY
);
189 error (0, errno
, _("cannot open input file `%s'"),
191 status
= EXIT_FAILURE
;
196 #ifdef _POSIX_MAPPED_FILES
197 /* We have possibilities for reading the input file. First try
198 to mmap() it since this will provide the fastest solution. */
199 if (fstat (fd
, &st
) == 0
200 && ((addr
= mmap (NULL
, st
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0))
203 /* Yes, we can use mmap(). The descriptor is not needed
206 error (EXIT_FAILURE
, errno
, _("error while closing input `%s'"),
209 if (process_block (cd
, addr
, st
.st_size
, output
) < 0)
211 /* Something went wrong. */
212 status
= EXIT_FAILURE
;
214 /* We don't need the input data anymore. */
215 munmap ((void *) addr
, st
.st_size
);
217 /* We cannot go on with producing output since it might
218 lead to problem because the last output might leave
219 the output stream in an undefined state. */
223 /* We don't need the input data anymore. */
224 munmap ((void *) addr
, st
.st_size
);
227 #endif /* _POSIX_MAPPED_FILES */
229 /* Read the file in pieces. */
230 if (process_fd (cd
, fd
, output
) != 0)
232 /* Something went wrong. */
233 status
= EXIT_FAILURE
;
235 /* We don't need the input file anymore. */
238 /* We cannot go on with producing output since it might
239 lead to problem because the last output might leave
240 the output stream in an undefined state. */
244 /* Now close the file. */
248 while (++remaining
< argc
);
250 /* Close the output file now. */
252 error (EXIT_FAILURE
, errno
, _("error while closing output file"));
258 /* Handle program arguments. */
260 parse_opt (int key
, char *arg
, struct argp_state
*state
)
274 /* Nothing, for now at least. We are not giving out any information
275 about missing character or so. */
278 /* Omit invalid characters from output.
279 XXX This option will become a meaning once we have different
280 modes of operation for the conversion functions. */
289 return ARGP_ERR_UNKNOWN
;
296 more_help (int key
, const char *text
, void *input
)
300 case ARGP_KEY_HELP_EXTRA
:
301 /* We print some extra information. */
302 return strdup (gettext ("\
303 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
307 return (char *) text
;
311 /* Print the version information. */
313 print_version (FILE *stream
, struct argp_state
*state
)
315 fprintf (stream
, "iconv (GNU %s) %s\n", PACKAGE
, VERSION
);
316 fprintf (stream
, gettext ("\
317 Copyright (C) %s Free Software Foundation, Inc.\n\
318 This is free software; see the source for copying conditions. There is NO\n\
319 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
321 fprintf (stream
, gettext ("Written by %s.\n"), "Ulrich Drepper");
326 process_block (iconv_t cd
, const char *addr
, size_t len
, FILE *output
)
328 #define OUTBUF_SIZE 32768
329 const char *start
= addr
;
330 char outbuf
[OUTBUF_SIZE
];
338 outlen
= OUTBUF_SIZE
;
339 n
= iconv (cd
, &addr
, &len
, &outptr
, &outlen
);
341 if (outptr
!= outbuf
)
343 /* We have something to write out. */
344 int errno_save
= errno
;
346 if (fwrite (outbuf
, 1, outptr
- outbuf
, output
) < outptr
- outbuf
349 /* Error occurred while printing the result. */
351 conversion stopped due to problem in writing the output"));
358 if (n
!= (size_t) -1)
360 /* All the input test is processed. For state-dependent
361 character sets we have to flush the state now. */
363 outlen
= OUTBUF_SIZE
;
364 n
= iconv (cd
, NULL
, NULL
, &outptr
, &outlen
);
366 if (outptr
!= outbuf
)
368 /* We have something to write out. */
369 int errno_save
= errno
;
371 if (fwrite (outbuf
, 1, outptr
- outbuf
, output
) < outptr
- outbuf
374 /* Error occurred while printing the result. */
376 conversion stopped due to problem in writing the output"));
388 /* iconv() ran into a problem. */
392 error (0, 0, _("illegal input sequence at position %ld"),
393 (long) (addr
- start
));
397 incomplete character or shift sequence at end of buffer"));
400 error (0, 0, _("internal error (illegal descriptor)"));
403 error (0, 0, _("unknown iconv() error %d"), errno
);
416 process_fd (iconv_t cd
, int fd
, FILE *output
)
418 /* we have a problem with reading from a desriptor since we must not
419 provide the iconv() function an incomplete character or shift
420 sequence at the end of the buffer. Since we have to deal with
421 arbitrary encodings we must read the whole text in a buffer and
422 process it in one step. */
423 static char *inbuf
= NULL
;
424 static size_t maxlen
= 0;
428 while (actlen
< maxlen
)
430 size_t n
= read (fd
, inptr
, maxlen
- actlen
);
433 /* No more text to read. */
438 /* Error while reading. */
439 error (0, errno
, _("error while reading the input"));
447 if (actlen
== maxlen
)
452 /* Increase the buffer. */
454 inbuf
= realloc (inbuf
, maxlen
);
456 error (0, errno
, _("unable to allocate buffer for input"));
457 inptr
= inbuf
+ actlen
;
461 n
= read (fd
, inptr
, maxlen
- actlen
);
464 /* No more text to read. */
469 /* Error while reading. */
470 error (0, errno
, _("error while reading the input"));
477 while (actlen
< maxlen
);
480 /* Break again so we leave both loops. */
484 /* Now we have all the input in the buffer. Process it in one run. */
485 return process_block (cd
, inbuf
, actlen
, output
);
490 process_file (iconv_t cd
, FILE *input
, FILE *output
)
492 /* This should be safe since we use this function only for `stdin' and
493 we haven't read anything so far. */
494 return process_fd (cd
, fileno (input
), output
);
498 /* Print all known character sets/encodings. */
499 static void *printlist
;
500 static size_t column
;
501 static int not_first
;
504 insert_print_list (const void *nodep
, VISIT value
, int level
)
506 if (value
== leaf
|| value
== postorder
)
508 const struct gconv_alias
*s
= *(const struct gconv_alias
**) nodep
;
509 tsearch (s
->fromname
, &printlist
, (__compar_fn_t
) strverscmp
);
514 do_print (const void *nodep
, VISIT value
, int level
)
516 if (value
== leaf
|| value
== postorder
)
518 const char *s
= *(const char **) nodep
;
519 size_t len
= strlen (s
);
522 while (len
> 0 && s
[len
- 1] == '/')
525 for (cnt
= 0; cnt
< len
; ++cnt
)
526 if (isalnum (s
[cnt
]))
536 if (column
> 2 && column
+ len
> 77)
538 fputs ("\n ", stdout
);
550 fwrite (s
, len
, 1, stdout
);
557 add_known_names (struct gconv_module
*node
)
559 if (node
->left
!= NULL
)
560 add_known_names (node
->left
);
561 if (node
->right
!= NULL
)
562 add_known_names (node
->right
);
563 if (node
->same
!= NULL
)
564 add_known_names (node
->same
);
567 if (node
->from_pattern
== NULL
)
569 if (strcmp (node
->from_constpfx
, "INTERNAL"))
570 tsearch (node
->from_constpfx
, &printlist
,
571 (__compar_fn_t
) strverscmp
);
572 if (strcmp (node
->to_string
, "INTERNAL"))
573 tsearch (node
->to_string
, &printlist
, (__compar_fn_t
) strverscmp
);
576 if (strcmp (node
->from_pattern
, "INTERNAL"))
577 tsearch (node
->from_pattern
, &printlist
, (__compar_fn_t
) strverscmp
);
579 node
= node
->matching
;
581 while (node
!= NULL
);
586 print_known_names (void)
590 /* We must initialize the internal databases first. */
591 h
= iconv_open ("L1", "L1");
594 /* First add the aliases. */
595 twalk (__gconv_alias_db
, insert_print_list
);
597 /* Add the from- and to-names from the known modules. */
598 add_known_names (__gconv_modules_db
);
601 The following list contain all the coded character sets known. This does\n\
602 not necessarily mean that all combinations of these names can be used for\n\
603 the FROM and TO command line parameters. One coded character set can be\n\
604 listed with several different names (aliases).\n\
605 Some of the names are no plain strings but instead regular expressions and\n\
606 they match a variety of names which can be given as parameters to the\n\
607 program.\n\n "), stdout
);
609 /* Now print the collected names. */
611 twalk (printlist
, do_print
);