]>
Commit | Line | Data |
---|---|---|
fb5663ca UD |
1 | /* Convert text in given files from the specified from-set to the to-set. |
2 | Copyright (C) 1998 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Library General Public License as | |
8 | published by the Free Software Foundation; either version 2 of the | |
9 | License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Library General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Library General Public | |
17 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
18 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #include <argp.h> | |
22 | #include <errno.h> | |
23 | #include <error.h> | |
24 | #include <fcntl.h> | |
25 | #include <iconv.h> | |
26 | #include <locale.h> | |
27 | #include <stdio.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <unistd.h> | |
31 | #include <sys/mman.h> | |
32 | ||
33 | /* Get libc version number. */ | |
34 | #include "../version.h" | |
35 | ||
36 | #define PACKAGE _libc_intl_domainname | |
37 | ||
38 | ||
39 | /* Name and version of program. */ | |
40 | static void print_version (FILE *stream, struct argp_state *state); | |
41 | void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; | |
42 | ||
43 | #define OPT_VERBOSE 1000 | |
44 | ||
45 | /* Definitions of arguments for argp functions. */ | |
46 | static const struct argp_option options[] = | |
47 | { | |
48 | { NULL, 0, NULL, 0, N_("Input/Output format specification:") }, | |
49 | { "from-code", 'f', "NAME", 0, N_("encoding of original text") }, | |
50 | { "to-code", 't', "NAME", 0, N_("encoding for output") }, | |
51 | { NULL, 0, NULL, 0, N_("Output control:") }, | |
52 | { "output", 'o', "FILE", 0, N_("output file") }, | |
53 | { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, | |
54 | { NULL, 0, NULL, 0, NULL } | |
55 | }; | |
56 | ||
57 | /* Short description of program. */ | |
58 | static const char doc[] = N_("\ | |
59 | Convert encoding of given files from one encoding to another."); | |
60 | ||
61 | /* Strings for arguments in help texts. */ | |
62 | static const char args_doc[] = N_("[FILE...]"); | |
63 | ||
64 | /* Prototype for option handler. */ | |
65 | static error_t parse_opt __P ((int key, char *arg, struct argp_state *state)); | |
66 | ||
67 | /* Function to print some extra text in the help message. */ | |
68 | static char *more_help __P ((int key, const char *text, void *input)); | |
69 | ||
70 | /* Data structure to communicate with argp functions. */ | |
71 | static struct argp argp = | |
72 | { | |
73 | options, parse_opt, args_doc, doc, NULL, more_help | |
74 | }; | |
75 | ||
76 | /* Code sets to convert from and to respectively. */ | |
77 | static const char *from_code; | |
78 | static const char *to_code; | |
79 | ||
80 | /* File to write output to. If NULL write to stdout. */ | |
81 | static const char *output_file; | |
82 | ||
83 | /* Nonzero if verbose ouput is wanted. */ | |
84 | static int verbose; | |
85 | ||
86 | /* Prototypes for the functions doing the actual work. */ | |
87 | static int process_block (iconv_t cd, const char *addr, size_t len, | |
88 | FILE *output); | |
89 | static int process_fd (iconv_t cd, int fd, FILE *output); | |
90 | static int process_file (iconv_t cd, FILE *input, FILE *output); | |
91 | ||
92 | ||
93 | int | |
94 | main (int argc, char *argv[]) | |
95 | { | |
96 | int status = EXIT_SUCCESS; | |
97 | int remaining; | |
98 | FILE *output; | |
99 | iconv_t cd; | |
100 | ||
101 | /* Set locale via LC_ALL. */ | |
102 | setlocale (LC_ALL, ""); | |
103 | ||
104 | /* Set the text message domain. */ | |
105 | textdomain (_libc_intl_domainname); | |
106 | ||
107 | /* Parse and process arguments. */ | |
108 | argp_parse (&argp, argc, argv, 0, &remaining, NULL); | |
109 | ||
110 | /* If either the from- or to-code is not specified this is an error | |
111 | since we do not know what to do. */ | |
112 | if (from_code == NULL && to_code == NULL) | |
113 | error (EXIT_FAILURE, 0, | |
114 | _("neither original not target encoding specified")); | |
115 | if (from_code == NULL) | |
116 | error (EXIT_FAILURE, 0, _("original encoding not specified using `-f'")); | |
117 | if (to_code == NULL) | |
118 | error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'")); | |
119 | ||
120 | /* Let's see whether we have these coded character sets. */ | |
121 | cd = iconv_open (to_code, from_code); | |
122 | if (cd == (iconv_t) -1) | |
123 | if (errno == EINVAL) | |
124 | error (EXIT_FAILURE, 0, _("conversion from `%s' to `%s' not supported"), | |
125 | from_code, to_code); | |
126 | else | |
127 | error (EXIT_FAILURE, errno, _("failed to start conversion processing")); | |
128 | ||
129 | /* Determine output file. */ | |
130 | if (output_file != NULL) | |
131 | { | |
132 | output = fopen (output_file, "w"); | |
133 | if (output == NULL) | |
134 | error (EXIT_FAILURE, errno, _("cannot open output file")); | |
135 | } | |
136 | else | |
137 | output = stdout; | |
138 | ||
139 | /* Now process the remaining files. Write them to stdout or the file | |
140 | specified with the `-o' parameter. If we have no file given as | |
141 | the parameter process all from stdin. */ | |
142 | if (remaining == argc) | |
143 | process_file (cd, stdin, output); | |
144 | else | |
145 | do | |
146 | { | |
147 | struct stat st; | |
148 | const char *addr; | |
149 | int fd = open (argv[remaining], O_RDONLY); | |
150 | ||
151 | if (verbose) | |
152 | printf ("%s:\n", argv[remaining]); | |
153 | ||
154 | if (fd == -1) | |
155 | { | |
156 | error (0, errno, _("cannot open input file `%s'"), | |
157 | argv[remaining]); | |
158 | status = EXIT_FAILURE; | |
159 | continue; | |
160 | } | |
161 | ||
162 | /* We have possibilities for reading the input file. First try | |
163 | to mmap() it since this will provide the fastest solution. */ | |
164 | if (fstat (fd, &st) == 0 | |
165 | && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) | |
166 | != MAP_FAILED)) | |
167 | { | |
168 | /* Yes, we can use mmap(). The descriptor is not needed | |
169 | anymore. */ | |
170 | if (close (fd) != 0) | |
171 | error (EXIT_FAILURE, errno, _("error while closing input `%s'"), | |
172 | argv[remaining]); | |
173 | ||
174 | if (process_block (cd, addr, st.st_size, stdout) < 0) | |
175 | { | |
176 | /* Something went wrong. */ | |
177 | status = EXIT_FAILURE; | |
178 | ||
179 | /* We don't need the input data anymore. */ | |
180 | munmap ((void *) addr, st.st_size); | |
181 | ||
182 | /* We cannot go on with producing output since it might | |
183 | lead to problem because the last output might leave | |
184 | the output stream in an undefined state. */ | |
185 | break; | |
186 | } | |
187 | ||
188 | /* We don't need the input data anymore. */ | |
189 | munmap ((void *) addr, st.st_size); | |
190 | } | |
191 | else | |
192 | { | |
193 | /* Read the file in pieces. */ | |
194 | if (process_fd (cd, fd, output) != 0) | |
195 | { | |
196 | /* Something went wrong. */ | |
197 | status = EXIT_FAILURE; | |
198 | ||
199 | /* We don't need the input file anymore. */ | |
200 | close (fd); | |
201 | ||
202 | /* We cannot go on with producing output since it might | |
203 | lead to problem because the last output might leave | |
204 | the output stream in an undefined state. */ | |
205 | break; | |
206 | } | |
207 | ||
208 | /* Now close the file. */ | |
209 | close (fd); | |
210 | } | |
211 | } | |
212 | while (++remaining < argc); | |
213 | ||
214 | /* Close the output file now. */ | |
215 | if (fclose (output)) | |
216 | error (EXIT_FAILURE, errno, _("error while closing output file")); | |
217 | ||
218 | return status; | |
219 | } | |
220 | ||
221 | ||
222 | /* Handle program arguments. */ | |
223 | static error_t | |
224 | parse_opt (int key, char *arg, struct argp_state *state) | |
225 | { | |
226 | switch (key) | |
227 | { | |
228 | case 'f': | |
229 | from_code = arg; | |
230 | break; | |
231 | case 't': | |
232 | to_code = arg; | |
233 | break; | |
234 | case 'o': | |
235 | output_file = arg; | |
236 | break; | |
237 | case OPT_VERBOSE: | |
238 | verbose = 1; | |
239 | break; | |
240 | default: | |
241 | return ARGP_ERR_UNKNOWN; | |
242 | } | |
243 | return 0; | |
244 | } | |
245 | ||
246 | ||
247 | static char * | |
248 | more_help (int key, const char *text, void *input) | |
249 | { | |
250 | switch (key) | |
251 | { | |
252 | case ARGP_KEY_HELP_EXTRA: | |
253 | /* We print some extra information. */ | |
254 | return strdup (gettext ("\ | |
255 | Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n")); | |
256 | default: | |
257 | break; | |
258 | } | |
259 | return (char *) text; | |
260 | } | |
261 | ||
262 | ||
263 | /* Print the version information. */ | |
264 | static void | |
265 | print_version (FILE *stream, struct argp_state *state) | |
266 | { | |
267 | fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION); | |
268 | fprintf (stream, gettext ("\ | |
269 | Copyright (C) %s Free Software Foundation, Inc.\n\ | |
270 | This is free software; see the source for copying conditions. There is NO\n\ | |
271 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ | |
272 | "), "1998"); | |
273 | fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); | |
274 | } | |
275 | ||
276 | ||
277 | static int | |
278 | process_block (iconv_t cd, const char *addr, size_t len, FILE *output) | |
279 | { | |
280 | #define OUTBUF_SIZE 32768 | |
281 | char outbuf[OUTBUF_SIZE]; | |
282 | char *outptr = outbuf; | |
283 | size_t outlen = OUTBUF_SIZE; | |
284 | ||
285 | while (len > 0) | |
286 | { | |
287 | size_t n = iconv (cd, &addr, &len, &outptr, &outlen); | |
288 | ||
289 | if (outptr != outbuf) | |
290 | { | |
291 | /* We have something to write out. */ | |
292 | if (fwrite (outbuf, 1, outptr - outbuf, output) != 0 | |
293 | || ferror (output)) | |
294 | { | |
295 | /* Error occurred while printing the result. */ | |
296 | error (0, 0, _("\ | |
297 | conversion stopped due to problem in writing the output")); | |
298 | return -1; | |
299 | } | |
300 | } | |
301 | ||
302 | if (n != (size_t) -1) | |
303 | /* Everything is processed. */ | |
304 | break; | |
305 | ||
306 | if (errno != E2BIG) | |
307 | { | |
308 | /* iconv() ran into a problem. */ | |
309 | switch (errno) | |
310 | { | |
311 | case EILSEQ: | |
312 | error (0, 0, _("illegal input sequence")); | |
313 | break; | |
314 | case EINVAL: | |
315 | error (0, 0, _("\ | |
316 | incomplete character or shift sequence at end of buffer")); | |
317 | break; | |
318 | case EBADF: | |
319 | error (0, 0, _("internal error (illegal descriptor)")); | |
320 | break; | |
321 | default: | |
322 | error (0, 0, _("unknown iconv() error %d"), errno); | |
323 | break; | |
324 | } | |
325 | ||
326 | return -1; | |
327 | } | |
328 | } | |
329 | ||
330 | return 0; | |
331 | } | |
332 | ||
333 | ||
334 | static int | |
335 | process_fd (iconv_t cd, int fd, FILE *output) | |
336 | { | |
337 | /* we have a problem with reading from a desriptor since we must not | |
338 | provide the iconv() function an incomplete character or shift | |
339 | sequence at the end of the buffer. Since we have to deal with | |
340 | arbitrary encodings we must read the whole text in a buffer and | |
341 | process it in one step. */ | |
342 | static char *inbuf = NULL; | |
343 | static size_t maxlen = 0; | |
344 | char *inptr = NULL; | |
345 | size_t actlen = 0; | |
346 | ||
347 | while (actlen < maxlen) | |
348 | { | |
349 | size_t n = read (fd, inptr, maxlen - actlen); | |
350 | ||
351 | if (n == 0) | |
352 | /* No more text to read. */ | |
353 | break; | |
354 | ||
355 | if (n == -1) | |
356 | { | |
357 | /* Error while reading. */ | |
358 | error (0, errno, _("error while reading the input")); | |
359 | return -1; | |
360 | } | |
361 | ||
362 | inptr += n; | |
363 | actlen += n; | |
364 | } | |
365 | ||
366 | if (actlen == maxlen) | |
367 | while (1) | |
368 | { | |
369 | size_t n; | |
370 | ||
371 | /* Increase the buffer. */ | |
372 | maxlen += 32768; | |
373 | inbuf = realloc (inbuf, maxlen); | |
374 | if (inbuf == NULL) | |
375 | error (0, errno, _("unable to allocate buffer for input")); | |
376 | inptr = inbuf + actlen; | |
377 | ||
378 | do | |
379 | { | |
380 | n = read (fd, inptr, maxlen - actlen); | |
381 | ||
382 | if (n == 0) | |
383 | /* No more text to read. */ | |
384 | break; | |
385 | ||
386 | if (n == -1) | |
387 | { | |
388 | /* Error while reading. */ | |
389 | error (0, errno, _("error while reading the input")); | |
390 | return -1; | |
391 | } | |
392 | ||
393 | inptr += n; | |
394 | actlen += n; | |
395 | } | |
396 | while (actlen < maxlen); | |
397 | ||
398 | if (n == 0) | |
399 | /* Break again so we leave both loops. */ | |
400 | break; | |
401 | } | |
402 | ||
403 | /* Now we have all the input in the buffer. Process it in one run. */ | |
404 | return process_block (cd, inbuf, actlen, output); | |
405 | } | |
406 | ||
407 | ||
408 | static int | |
409 | process_file (iconv_t cd, FILE *input, FILE *output) | |
410 | { | |
411 | /* This should be safe since we use this function only for `stdin' and | |
412 | we haven't read anything so far. */ | |
413 | return process_fd (cd, fileno (input), output); | |
414 | } |