]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/iconv_prog.c
Update.
[thirdparty/glibc.git] / iconv / iconv_prog.c
CommitLineData
fb5663ca
UD
1/* Convert text in given files from the specified from-set to the to-set.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21#include <argp.h>
22#include <errno.h>
23#include <error.h>
24#include <fcntl.h>
25#include <iconv.h>
26#include <locale.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31#include <sys/mman.h>
32
33/* Get libc version number. */
34#include "../version.h"
35
36#define PACKAGE _libc_intl_domainname
37
38
39/* Name and version of program. */
40static void print_version (FILE *stream, struct argp_state *state);
41void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
42
43#define OPT_VERBOSE 1000
44
45/* Definitions of arguments for argp functions. */
46static const struct argp_option options[] =
47{
48 { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
49 { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
50 { "to-code", 't', "NAME", 0, N_("encoding for output") },
51 { NULL, 0, NULL, 0, N_("Output control:") },
52 { "output", 'o', "FILE", 0, N_("output file") },
53 { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
54 { NULL, 0, NULL, 0, NULL }
55};
56
57/* Short description of program. */
58static const char doc[] = N_("\
59Convert encoding of given files from one encoding to another.");
60
61/* Strings for arguments in help texts. */
62static const char args_doc[] = N_("[FILE...]");
63
64/* Prototype for option handler. */
65static error_t parse_opt __P ((int key, char *arg, struct argp_state *state));
66
67/* Function to print some extra text in the help message. */
68static char *more_help __P ((int key, const char *text, void *input));
69
70/* Data structure to communicate with argp functions. */
71static struct argp argp =
72{
73 options, parse_opt, args_doc, doc, NULL, more_help
74};
75
76/* Code sets to convert from and to respectively. */
77static const char *from_code;
78static const char *to_code;
79
80/* File to write output to. If NULL write to stdout. */
81static const char *output_file;
82
83/* Nonzero if verbose ouput is wanted. */
84static int verbose;
85
86/* Prototypes for the functions doing the actual work. */
87static int process_block (iconv_t cd, const char *addr, size_t len,
88 FILE *output);
89static int process_fd (iconv_t cd, int fd, FILE *output);
90static int process_file (iconv_t cd, FILE *input, FILE *output);
91
92
93int
94main (int argc, char *argv[])
95{
96 int status = EXIT_SUCCESS;
97 int remaining;
98 FILE *output;
99 iconv_t cd;
100
101 /* Set locale via LC_ALL. */
102 setlocale (LC_ALL, "");
103
104 /* Set the text message domain. */
105 textdomain (_libc_intl_domainname);
106
107 /* Parse and process arguments. */
108 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
109
110 /* If either the from- or to-code is not specified this is an error
111 since we do not know what to do. */
112 if (from_code == NULL && to_code == NULL)
113 error (EXIT_FAILURE, 0,
114 _("neither original not target encoding specified"));
115 if (from_code == NULL)
116 error (EXIT_FAILURE, 0, _("original encoding not specified using `-f'"));
117 if (to_code == NULL)
118 error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'"));
119
120 /* Let's see whether we have these coded character sets. */
121 cd = iconv_open (to_code, from_code);
122 if (cd == (iconv_t) -1)
123 if (errno == EINVAL)
124 error (EXIT_FAILURE, 0, _("conversion from `%s' to `%s' not supported"),
125 from_code, to_code);
126 else
127 error (EXIT_FAILURE, errno, _("failed to start conversion processing"));
128
129 /* Determine output file. */
130 if (output_file != NULL)
131 {
132 output = fopen (output_file, "w");
133 if (output == NULL)
134 error (EXIT_FAILURE, errno, _("cannot open output file"));
135 }
136 else
137 output = stdout;
138
139 /* Now process the remaining files. Write them to stdout or the file
140 specified with the `-o' parameter. If we have no file given as
141 the parameter process all from stdin. */
142 if (remaining == argc)
143 process_file (cd, stdin, output);
144 else
145 do
146 {
147 struct stat st;
148 const char *addr;
149 int fd = open (argv[remaining], O_RDONLY);
150
151 if (verbose)
152 printf ("%s:\n", argv[remaining]);
153
154 if (fd == -1)
155 {
156 error (0, errno, _("cannot open input file `%s'"),
157 argv[remaining]);
158 status = EXIT_FAILURE;
159 continue;
160 }
161
162 /* We have possibilities for reading the input file. First try
163 to mmap() it since this will provide the fastest solution. */
164 if (fstat (fd, &st) == 0
165 && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
166 != MAP_FAILED))
167 {
168 /* Yes, we can use mmap(). The descriptor is not needed
169 anymore. */
170 if (close (fd) != 0)
171 error (EXIT_FAILURE, errno, _("error while closing input `%s'"),
172 argv[remaining]);
173
174 if (process_block (cd, addr, st.st_size, stdout) < 0)
175 {
176 /* Something went wrong. */
177 status = EXIT_FAILURE;
178
179 /* We don't need the input data anymore. */
180 munmap ((void *) addr, st.st_size);
181
182 /* We cannot go on with producing output since it might
183 lead to problem because the last output might leave
184 the output stream in an undefined state. */
185 break;
186 }
187
188 /* We don't need the input data anymore. */
189 munmap ((void *) addr, st.st_size);
190 }
191 else
192 {
193 /* Read the file in pieces. */
194 if (process_fd (cd, fd, output) != 0)
195 {
196 /* Something went wrong. */
197 status = EXIT_FAILURE;
198
199 /* We don't need the input file anymore. */
200 close (fd);
201
202 /* We cannot go on with producing output since it might
203 lead to problem because the last output might leave
204 the output stream in an undefined state. */
205 break;
206 }
207
208 /* Now close the file. */
209 close (fd);
210 }
211 }
212 while (++remaining < argc);
213
214 /* Close the output file now. */
215 if (fclose (output))
216 error (EXIT_FAILURE, errno, _("error while closing output file"));
217
218 return status;
219}
220
221
222/* Handle program arguments. */
223static error_t
224parse_opt (int key, char *arg, struct argp_state *state)
225{
226 switch (key)
227 {
228 case 'f':
229 from_code = arg;
230 break;
231 case 't':
232 to_code = arg;
233 break;
234 case 'o':
235 output_file = arg;
236 break;
237 case OPT_VERBOSE:
238 verbose = 1;
239 break;
240 default:
241 return ARGP_ERR_UNKNOWN;
242 }
243 return 0;
244}
245
246
247static char *
248more_help (int key, const char *text, void *input)
249{
250 switch (key)
251 {
252 case ARGP_KEY_HELP_EXTRA:
253 /* We print some extra information. */
254 return strdup (gettext ("\
255Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
256 default:
257 break;
258 }
259 return (char *) text;
260}
261
262
263/* Print the version information. */
264static void
265print_version (FILE *stream, struct argp_state *state)
266{
267 fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION);
268 fprintf (stream, gettext ("\
269Copyright (C) %s Free Software Foundation, Inc.\n\
270This is free software; see the source for copying conditions. There is NO\n\
271warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
272"), "1998");
273 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
274}
275
276
277static int
278process_block (iconv_t cd, const char *addr, size_t len, FILE *output)
279{
280#define OUTBUF_SIZE 32768
281 char outbuf[OUTBUF_SIZE];
282 char *outptr = outbuf;
283 size_t outlen = OUTBUF_SIZE;
284
285 while (len > 0)
286 {
287 size_t n = iconv (cd, &addr, &len, &outptr, &outlen);
288
289 if (outptr != outbuf)
290 {
291 /* We have something to write out. */
292 if (fwrite (outbuf, 1, outptr - outbuf, output) != 0
293 || ferror (output))
294 {
295 /* Error occurred while printing the result. */
296 error (0, 0, _("\
297conversion stopped due to problem in writing the output"));
298 return -1;
299 }
300 }
301
302 if (n != (size_t) -1)
303 /* Everything is processed. */
304 break;
305
306 if (errno != E2BIG)
307 {
308 /* iconv() ran into a problem. */
309 switch (errno)
310 {
311 case EILSEQ:
312 error (0, 0, _("illegal input sequence"));
313 break;
314 case EINVAL:
315 error (0, 0, _("\
316incomplete character or shift sequence at end of buffer"));
317 break;
318 case EBADF:
319 error (0, 0, _("internal error (illegal descriptor)"));
320 break;
321 default:
322 error (0, 0, _("unknown iconv() error %d"), errno);
323 break;
324 }
325
326 return -1;
327 }
328 }
329
330 return 0;
331}
332
333
334static int
335process_fd (iconv_t cd, int fd, FILE *output)
336{
337 /* we have a problem with reading from a desriptor since we must not
338 provide the iconv() function an incomplete character or shift
339 sequence at the end of the buffer. Since we have to deal with
340 arbitrary encodings we must read the whole text in a buffer and
341 process it in one step. */
342 static char *inbuf = NULL;
343 static size_t maxlen = 0;
344 char *inptr = NULL;
345 size_t actlen = 0;
346
347 while (actlen < maxlen)
348 {
349 size_t n = read (fd, inptr, maxlen - actlen);
350
351 if (n == 0)
352 /* No more text to read. */
353 break;
354
355 if (n == -1)
356 {
357 /* Error while reading. */
358 error (0, errno, _("error while reading the input"));
359 return -1;
360 }
361
362 inptr += n;
363 actlen += n;
364 }
365
366 if (actlen == maxlen)
367 while (1)
368 {
369 size_t n;
370
371 /* Increase the buffer. */
372 maxlen += 32768;
373 inbuf = realloc (inbuf, maxlen);
374 if (inbuf == NULL)
375 error (0, errno, _("unable to allocate buffer for input"));
376 inptr = inbuf + actlen;
377
378 do
379 {
380 n = read (fd, inptr, maxlen - actlen);
381
382 if (n == 0)
383 /* No more text to read. */
384 break;
385
386 if (n == -1)
387 {
388 /* Error while reading. */
389 error (0, errno, _("error while reading the input"));
390 return -1;
391 }
392
393 inptr += n;
394 actlen += n;
395 }
396 while (actlen < maxlen);
397
398 if (n == 0)
399 /* Break again so we leave both loops. */
400 break;
401 }
402
403 /* Now we have all the input in the buffer. Process it in one run. */
404 return process_block (cd, inbuf, actlen, output);
405}
406
407
408static int
409process_file (iconv_t cd, FILE *input, FILE *output)
410{
411 /* This should be safe since we use this function only for `stdin' and
412 we haven't read anything so far. */
413 return process_fd (cd, fileno (input), output);
414}