]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gdb/charset.c
Add casts to memory allocation related calls
[thirdparty/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
32d0add0 3 Copyright (C) 2001-2015 Free Software Foundation, Inc.
234b45d4
KB
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
a9762ec7 9 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
a9762ec7 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
19
20#include "defs.h"
21#include "charset.h"
22#include "gdbcmd.h"
6c7a06a3 23#include "gdb_obstack.h"
732f6a93 24#include "gdb_wait.h"
6c7a06a3
TT
25#include "charset-list.h"
26#include "vec.h"
40b5c9fb 27#include "environ.h"
f870a310 28#include "arch-utils.h"
fa864999 29#include "gdb_vecs.h"
234b45d4
KB
30#include <ctype.h>
31
43484f03
DJ
32#ifdef USE_WIN32API
33#include <windows.h>
34#endif
234b45d4
KB
35\f
36/* How GDB's character set support works
37
6c7a06a3 38 GDB has three global settings:
234b45d4
KB
39
40 - The `current host character set' is the character set GDB should
41 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
42 terminal knows how to display properly. Most users should not
43 change this.
234b45d4
KB
44
45 - The `current target character set' is the character set the
46 program being debugged uses.
47
6c7a06a3
TT
48 - The `current target wide character set' is the wide character set
49 the program being debugged uses, that is, the encoding used for
50 wchar_t.
51
234b45d4
KB
52 There are commands to set each of these, and mechanisms for
53 choosing reasonable default values. GDB has a global list of
54 character sets that it can use as its host or target character
55 sets.
56
57 The header file `charset.h' declares various functions that
58 different pieces of GDB need to perform tasks like:
59
60 - printing target strings and characters to the user's terminal
61 (mostly target->host conversions),
62
63 - building target-appropriate representations of strings and
64 characters the user enters in expressions (mostly host->target
65 conversions),
66
6c7a06a3
TT
67 and so on.
68
69 To avoid excessive code duplication and maintenance efforts,
70 GDB simply requires a capable iconv function. Users on platforms
71 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
72
73\f
6c7a06a3 74#ifdef PHONY_ICONV
234b45d4 75
6c7a06a3
TT
76/* Provide a phony iconv that does as little as possible. Also,
77 arrange for there to be a single available character set. */
234b45d4 78
6c7a06a3
TT
79#undef GDB_DEFAULT_HOST_CHARSET
80#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
81#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
82#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
83#undef DEFAULT_CHARSET_NAMES
84#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
85
86#undef iconv_t
87#define iconv_t int
88#undef iconv_open
62234ccc 89#define iconv_open phony_iconv_open
6c7a06a3 90#undef iconv
62234ccc 91#define iconv phony_iconv
6c7a06a3 92#undef iconv_close
62234ccc 93#define iconv_close phony_iconv_close
6c7a06a3 94
0dd7fb99
TT
95#undef ICONV_CONST
96#define ICONV_CONST const
97
a95babbf 98static iconv_t
62234ccc 99phony_iconv_open (const char *to, const char *from)
6c7a06a3 100{
b8899f2b 101 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 102 We allow conversions to wchar_t and the host charset. */
b8899f2b 103 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
104 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
105 return -1;
106 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
107 return -1;
234b45d4 108
b8899f2b 109 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 110 used as a flag in calls to iconv. */
b8899f2b 111 return !strcmp (from, "UTF-32BE");
6c7a06a3 112}
234b45d4 113
a95babbf 114static int
62234ccc 115phony_iconv_close (iconv_t arg)
6c7a06a3
TT
116{
117 return 0;
118}
234b45d4 119
a95babbf 120static size_t
62234ccc
TT
121phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
122 char **outbuf, size_t *outbytesleft)
6c7a06a3 123{
b8899f2b 124 if (utf_flag)
6c7a06a3
TT
125 {
126 while (*inbytesleft >= 4)
127 {
128 size_t j;
129 unsigned long c = 0;
130
131 for (j = 0; j < 4; ++j)
132 {
133 c <<= 8;
134 c += (*inbuf)[j] & 0xff;
135 }
136
137 if (c >= 256)
138 {
139 errno = EILSEQ;
140 return -1;
141 }
142 **outbuf = c & 0xff;
143 ++*outbuf;
144 --*outbytesleft;
145
146 ++*inbuf;
147 *inbytesleft -= 4;
148 }
149 if (*inbytesleft < 4)
150 {
151 errno = EINVAL;
152 return -1;
153 }
154 }
155 else
156 {
157 /* In all other cases we simply copy input bytes to the
158 output. */
159 size_t amt = *inbytesleft;
c5504eaf 160
6c7a06a3
TT
161 if (amt > *outbytesleft)
162 amt = *outbytesleft;
163 memcpy (*outbuf, *inbuf, amt);
164 *inbuf += amt;
165 *outbuf += amt;
166 *inbytesleft -= amt;
167 *outbytesleft -= amt;
168 }
234b45d4 169
6c7a06a3
TT
170 if (*inbytesleft)
171 {
172 errno = E2BIG;
173 return -1;
174 }
234b45d4 175
6c7a06a3
TT
176 /* The number of non-reversible conversions -- but they were all
177 reversible. */
178 return 0;
179}
234b45d4 180
83030110
PA
181#else /* PHONY_ICONV */
182
183/* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
184 to ENOENT, while gnulib defines it to a different value. Always
185 map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */
186
187static size_t
188gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
189 char **outbuf, size_t *outbytesleft)
190{
191 size_t ret;
192
193 ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
194 if (errno == ENOENT)
195 errno = EILSEQ;
196 return ret;
197}
198
199#undef iconv
200#define iconv gdb_iconv
234b45d4 201
83030110 202#endif /* PHONY_ICONV */
234b45d4
KB
203
204\f
205/* The global lists of character sets and translations. */
206
207
e33d66ec
EZ
208#ifndef GDB_DEFAULT_TARGET_CHARSET
209#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
210#endif
211
6c7a06a3 212#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 213#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
214#endif
215
216static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
217static const char *host_charset_name = "auto";
920d2a44
AC
218static void
219show_host_charset_name (struct ui_file *file, int from_tty,
220 struct cmd_list_element *c,
221 const char *value)
222{
6c7a06a3
TT
223 if (!strcmp (value, "auto"))
224 fprintf_filtered (file,
225 _("The host character set is \"auto; currently %s\".\n"),
226 auto_host_charset_name);
227 else
228 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
229}
230
f870a310 231static const char *target_charset_name = "auto";
920d2a44
AC
232static void
233show_target_charset_name (struct ui_file *file, int from_tty,
234 struct cmd_list_element *c, const char *value)
235{
f870a310
TT
236 if (!strcmp (value, "auto"))
237 fprintf_filtered (file,
238 _("The target character set is \"auto; "
239 "currently %s\".\n"),
240 gdbarch_auto_charset (get_current_arch ()));
241 else
242 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
243 value);
920d2a44
AC
244}
245
f870a310 246static const char *target_wide_charset_name = "auto";
6c7a06a3 247static void
aff410f1
MS
248show_target_wide_charset_name (struct ui_file *file,
249 int from_tty,
250 struct cmd_list_element *c,
251 const char *value)
e33d66ec 252{
f870a310
TT
253 if (!strcmp (value, "auto"))
254 fprintf_filtered (file,
255 _("The target wide character set is \"auto; "
256 "currently %s\".\n"),
257 gdbarch_auto_wide_charset (get_current_arch ()));
258 else
259 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
260 value);
6c7a06a3 261}
e33d66ec 262
6c7a06a3 263static const char *default_charset_names[] =
e33d66ec 264{
6c7a06a3 265 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
266 0
267};
234b45d4 268
6c7a06a3 269static const char **charset_enum;
234b45d4 270
6c7a06a3
TT
271\f
272/* If the target wide character set has big- or little-endian
273 variants, these are the corresponding names. */
274static const char *target_wide_charset_be_name;
275static const char *target_wide_charset_le_name;
234b45d4 276
f870a310
TT
277/* The architecture for which the BE- and LE-names are valid. */
278static struct gdbarch *be_le_arch;
279
280/* A helper function which sets the target wide big- and little-endian
281 character set names, if possible. */
234b45d4 282
6c7a06a3 283static void
f870a310 284set_be_le_names (struct gdbarch *gdbarch)
234b45d4 285{
6c7a06a3 286 int i, len;
f870a310
TT
287 const char *target_wide;
288
289 if (be_le_arch == gdbarch)
290 return;
291 be_le_arch = gdbarch;
234b45d4 292
6c7a06a3
TT
293 target_wide_charset_le_name = NULL;
294 target_wide_charset_be_name = NULL;
234b45d4 295
f870a310
TT
296 target_wide = target_wide_charset_name;
297 if (!strcmp (target_wide, "auto"))
298 target_wide = gdbarch_auto_wide_charset (gdbarch);
299
300 len = strlen (target_wide);
6c7a06a3
TT
301 for (i = 0; charset_enum[i]; ++i)
302 {
f870a310 303 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
304 continue;
305 if ((charset_enum[i][len] == 'B'
306 || charset_enum[i][len] == 'L')
307 && charset_enum[i][len + 1] == 'E'
308 && charset_enum[i][len + 2] == '\0')
309 {
310 if (charset_enum[i][len] == 'B')
311 target_wide_charset_be_name = charset_enum[i];
312 else
313 target_wide_charset_le_name = charset_enum[i];
314 }
315 }
234b45d4
KB
316}
317
6c7a06a3
TT
318/* 'Set charset', 'set host-charset', 'set target-charset', 'set
319 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
320
321static void
f870a310 322validate (struct gdbarch *gdbarch)
234b45d4 323{
6c7a06a3
TT
324 iconv_t desc;
325 const char *host_cset = host_charset ();
f870a310
TT
326 const char *target_cset = target_charset (gdbarch);
327 const char *target_wide_cset = target_wide_charset_name;
c5504eaf 328
f870a310
TT
329 if (!strcmp (target_wide_cset, "auto"))
330 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 331
f870a310 332 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3 333 if (desc == (iconv_t) -1)
a73c6dcd 334 error (_("Cannot convert between character sets `%s' and `%s'"),
f870a310 335 target_wide_cset, host_cset);
6c7a06a3 336 iconv_close (desc);
234b45d4 337
f870a310 338 desc = iconv_open (target_cset, host_cset);
6c7a06a3 339 if (desc == (iconv_t) -1)
a73c6dcd 340 error (_("Cannot convert between character sets `%s' and `%s'"),
f870a310 341 target_cset, host_cset);
6c7a06a3 342 iconv_close (desc);
234b45d4 343
f870a310
TT
344 /* Clear the cache. */
345 be_le_arch = NULL;
234b45d4
KB
346}
347
6c7a06a3
TT
348/* This is the sfunc for the 'set charset' command. */
349static void
aff410f1
MS
350set_charset_sfunc (char *charset, int from_tty,
351 struct cmd_list_element *c)
234b45d4 352{
aff410f1 353 /* CAREFUL: set the target charset here as well. */
6c7a06a3 354 target_charset_name = host_charset_name;
f870a310 355 validate (get_current_arch ());
234b45d4
KB
356}
357
6c7a06a3
TT
358/* 'set host-charset' command sfunc. We need a wrapper here because
359 the function needs to have a specific signature. */
360static void
361set_host_charset_sfunc (char *charset, int from_tty,
362 struct cmd_list_element *c)
234b45d4 363{
f870a310 364 validate (get_current_arch ());
234b45d4
KB
365}
366
6c7a06a3
TT
367/* Wrapper for the 'set target-charset' command. */
368static void
369set_target_charset_sfunc (char *charset, int from_tty,
370 struct cmd_list_element *c)
234b45d4 371{
f870a310 372 validate (get_current_arch ());
234b45d4
KB
373}
374
6c7a06a3
TT
375/* Wrapper for the 'set target-wide-charset' command. */
376static void
377set_target_wide_charset_sfunc (char *charset, int from_tty,
378 struct cmd_list_element *c)
234b45d4 379{
f870a310 380 validate (get_current_arch ());
234b45d4
KB
381}
382
6c7a06a3
TT
383/* sfunc for the 'show charset' command. */
384static void
aff410f1
MS
385show_charset (struct ui_file *file, int from_tty,
386 struct cmd_list_element *c,
6c7a06a3 387 const char *name)
234b45d4 388{
6c7a06a3
TT
389 show_host_charset_name (file, from_tty, c, host_charset_name);
390 show_target_charset_name (file, from_tty, c, target_charset_name);
aff410f1
MS
391 show_target_wide_charset_name (file, from_tty, c,
392 target_wide_charset_name);
234b45d4
KB
393}
394
234b45d4 395\f
6c7a06a3 396/* Accessor functions. */
234b45d4 397
6c7a06a3
TT
398const char *
399host_charset (void)
234b45d4 400{
6c7a06a3
TT
401 if (!strcmp (host_charset_name, "auto"))
402 return auto_host_charset_name;
403 return host_charset_name;
234b45d4
KB
404}
405
6c7a06a3 406const char *
f870a310 407target_charset (struct gdbarch *gdbarch)
234b45d4 408{
f870a310
TT
409 if (!strcmp (target_charset_name, "auto"))
410 return gdbarch_auto_charset (gdbarch);
6c7a06a3 411 return target_charset_name;
234b45d4 412}
234b45d4 413
6c7a06a3 414const char *
f870a310 415target_wide_charset (struct gdbarch *gdbarch)
234b45d4 416{
f870a310
TT
417 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
418
419 set_be_le_names (gdbarch);
e17a4113 420 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 421 {
6c7a06a3
TT
422 if (target_wide_charset_be_name)
423 return target_wide_charset_be_name;
234b45d4 424 }
6c7a06a3 425 else
234b45d4 426 {
6c7a06a3
TT
427 if (target_wide_charset_le_name)
428 return target_wide_charset_le_name;
234b45d4
KB
429 }
430
f870a310
TT
431 if (!strcmp (target_wide_charset_name, "auto"))
432 return gdbarch_auto_wide_charset (gdbarch);
433
6c7a06a3 434 return target_wide_charset_name;
234b45d4
KB
435}
436
234b45d4 437\f
6c7a06a3
TT
438/* Host character set management. For the time being, we assume that
439 the host character set is some superset of ASCII. */
234b45d4 440
6c7a06a3
TT
441char
442host_letter_to_control_character (char c)
234b45d4 443{
6c7a06a3
TT
444 if (c == '?')
445 return 0177;
446 return c & 0237;
234b45d4
KB
447}
448
6c7a06a3
TT
449/* Convert a host character, C, to its hex value. C must already have
450 been validated using isxdigit. */
234b45d4 451
6c7a06a3
TT
452int
453host_hex_value (char c)
234b45d4 454{
6c7a06a3
TT
455 if (isdigit (c))
456 return c - '0';
457 if (c >= 'a' && c <= 'f')
458 return 10 + c - 'a';
459 gdb_assert (c >= 'A' && c <= 'F');
460 return 10 + c - 'A';
234b45d4
KB
461}
462
234b45d4 463\f
6c7a06a3 464/* Public character management functions. */
234b45d4 465
6c7a06a3 466/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 467
6c7a06a3
TT
468static void
469cleanup_iconv (void *p)
234b45d4 470{
6c7a06a3
TT
471 iconv_t *descp = p;
472 iconv_close (*descp);
234b45d4
KB
473}
474
6c7a06a3
TT
475void
476convert_between_encodings (const char *from, const char *to,
477 const gdb_byte *bytes, unsigned int num_bytes,
478 int width, struct obstack *output,
479 enum transliterations translit)
480{
481 iconv_t desc;
482 struct cleanup *cleanups;
483 size_t inleft;
39086a0e 484 ICONV_CONST char *inp;
6c7a06a3
TT
485 unsigned int space_request;
486
487 /* Often, the host and target charsets will be the same. */
488 if (!strcmp (from, to))
489 {
490 obstack_grow (output, bytes, num_bytes);
491 return;
492 }
234b45d4 493
6c7a06a3
TT
494 desc = iconv_open (to, from);
495 if (desc == (iconv_t) -1)
9b20d036 496 perror_with_name (_("Converting character sets"));
6c7a06a3 497 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 498
6c7a06a3 499 inleft = num_bytes;
39086a0e 500 inp = (ICONV_CONST char *) bytes;
234b45d4 501
6c7a06a3 502 space_request = num_bytes;
234b45d4 503
6c7a06a3 504 while (inleft > 0)
234b45d4 505 {
6c7a06a3
TT
506 char *outp;
507 size_t outleft, r;
508 int old_size;
509
510 old_size = obstack_object_size (output);
511 obstack_blank (output, space_request);
512
241fd515 513 outp = (char *) obstack_base (output) + old_size;
6c7a06a3
TT
514 outleft = space_request;
515
39086a0e 516 r = iconv (desc, &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
517
518 /* Now make sure that the object on the obstack only includes
519 bytes we have converted. */
ee11262d 520 obstack_blank_fast (output, -outleft);
6c7a06a3
TT
521
522 if (r == (size_t) -1)
523 {
524 switch (errno)
525 {
526 case EILSEQ:
527 {
528 int i;
529
530 /* Invalid input sequence. */
531 if (translit == translit_none)
3e43a32a
MS
532 error (_("Could not convert character "
533 "to `%s' character set"), to);
6c7a06a3
TT
534
535 /* We emit escape sequence for the bytes, skip them,
536 and try again. */
537 for (i = 0; i < width; ++i)
538 {
539 char octal[5];
540
08850b56 541 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
6c7a06a3
TT
542 obstack_grow_str (output, octal);
543
544 ++inp;
545 --inleft;
546 }
547 }
548 break;
549
550 case E2BIG:
551 /* We ran out of space in the output buffer. Make it
552 bigger next time around. */
553 space_request *= 2;
554 break;
555
556 case EINVAL:
557 /* Incomplete input sequence. FIXME: ought to report this
558 to the caller somehow. */
559 inleft = 0;
560 break;
561
562 default:
9b20d036
MS
563 perror_with_name (_("Internal error while "
564 "converting character sets"));
6c7a06a3
TT
565 }
566 }
234b45d4 567 }
234b45d4 568
6c7a06a3 569 do_cleanups (cleanups);
234b45d4
KB
570}
571
e33d66ec 572\f
e33d66ec 573
6c7a06a3
TT
574/* An iterator that returns host wchar_t's from a target string. */
575struct wchar_iterator
e33d66ec 576{
6c7a06a3
TT
577 /* The underlying iconv descriptor. */
578 iconv_t desc;
e33d66ec 579
6c7a06a3 580 /* The input string. This is updated as convert characters. */
2898e560 581 const gdb_byte *input;
6c7a06a3
TT
582 /* The number of bytes remaining in the input. */
583 size_t bytes;
e33d66ec 584
6c7a06a3
TT
585 /* The width of an input character. */
586 size_t width;
e33d66ec 587
6c7a06a3
TT
588 /* The output buffer and its size. */
589 gdb_wchar_t *out;
590 size_t out_size;
591};
234b45d4 592
6c7a06a3
TT
593/* Create a new iterator. */
594struct wchar_iterator *
aff410f1
MS
595make_wchar_iterator (const gdb_byte *input, size_t bytes,
596 const char *charset, size_t width)
234b45d4 597{
6c7a06a3
TT
598 struct wchar_iterator *result;
599 iconv_t desc;
234b45d4 600
732f6a93 601 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3 602 if (desc == (iconv_t) -1)
9b20d036 603 perror_with_name (_("Converting character sets"));
234b45d4 604
6c7a06a3
TT
605 result = XNEW (struct wchar_iterator);
606 result->desc = desc;
2898e560 607 result->input = input;
6c7a06a3
TT
608 result->bytes = bytes;
609 result->width = width;
234b45d4 610
6c7a06a3
TT
611 result->out = XNEW (gdb_wchar_t);
612 result->out_size = 1;
234b45d4 613
6c7a06a3 614 return result;
e33d66ec 615}
234b45d4 616
e33d66ec 617static void
6c7a06a3 618do_cleanup_iterator (void *p)
e33d66ec 619{
6c7a06a3 620 struct wchar_iterator *iter = p;
234b45d4 621
6c7a06a3
TT
622 iconv_close (iter->desc);
623 xfree (iter->out);
624 xfree (iter);
234b45d4
KB
625}
626
6c7a06a3
TT
627struct cleanup *
628make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 629{
6c7a06a3 630 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 631}
234b45d4 632
6c7a06a3
TT
633int
634wchar_iterate (struct wchar_iterator *iter,
635 enum wchar_iterate_result *out_result,
636 gdb_wchar_t **out_chars,
637 const gdb_byte **ptr,
638 size_t *len)
639{
640 size_t out_request;
641
642 /* Try to convert some characters. At first we try to convert just
643 a single character. The reason for this is that iconv does not
644 necessarily update its outgoing arguments when it encounters an
645 invalid input sequence -- but we want to reliably report this to
646 our caller so it can emit an escape sequence. */
647 out_request = 1;
648 while (iter->bytes > 0)
e33d66ec 649 {
39086a0e 650 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
6c7a06a3 651 char *outptr = (char *) &iter->out[0];
2898e560 652 const gdb_byte *orig_inptr = iter->input;
6c7a06a3
TT
653 size_t orig_in = iter->bytes;
654 size_t out_avail = out_request * sizeof (gdb_wchar_t);
655 size_t num;
39086a0e
PA
656 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
657
658 iter->input = (gdb_byte *) inptr;
c5504eaf 659
6c7a06a3
TT
660 if (r == (size_t) -1)
661 {
662 switch (errno)
663 {
664 case EILSEQ:
aff410f1
MS
665 /* Invalid input sequence. We still might have
666 converted a character; if so, return it. */
1558ab4c
JK
667 if (out_avail < out_request * sizeof (gdb_wchar_t))
668 break;
669
aff410f1
MS
670 /* Otherwise skip the first invalid character, and let
671 the caller know about it. */
6c7a06a3
TT
672 *out_result = wchar_iterate_invalid;
673 *ptr = iter->input;
674 *len = iter->width;
675 iter->input += iter->width;
676 iter->bytes -= iter->width;
677 return 0;
678
679 case E2BIG:
680 /* We ran out of space. We still might have converted a
681 character; if so, return it. Otherwise, grow the
682 buffer and try again. */
683 if (out_avail < out_request * sizeof (gdb_wchar_t))
684 break;
685
686 ++out_request;
687 if (out_request > iter->out_size)
688 {
689 iter->out_size = out_request;
224c3ddb 690 iter->out = XRESIZEVEC (gdb_wchar_t, iter->out, out_request);
6c7a06a3
TT
691 }
692 continue;
693
694 case EINVAL:
695 /* Incomplete input sequence. Let the caller know, and
696 arrange for future calls to see EOF. */
697 *out_result = wchar_iterate_incomplete;
698 *ptr = iter->input;
699 *len = iter->bytes;
700 iter->bytes = 0;
701 return 0;
702
703 default:
9b20d036
MS
704 perror_with_name (_("Internal error while "
705 "converting character sets"));
6c7a06a3
TT
706 }
707 }
708
709 /* We converted something. */
710 num = out_request - out_avail / sizeof (gdb_wchar_t);
711 *out_result = wchar_iterate_ok;
712 *out_chars = iter->out;
713 *ptr = orig_inptr;
714 *len = orig_in - iter->bytes;
715 return num;
e33d66ec 716 }
6c7a06a3
TT
717
718 /* Really done. */
719 *out_result = wchar_iterate_eof;
720 return -1;
234b45d4
KB
721}
722
e33d66ec 723\f
6c7a06a3 724/* The charset.c module initialization function. */
234b45d4 725
6c7a06a3 726extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 727
6c7a06a3 728static VEC (char_ptr) *charsets;
234b45d4 729
6c7a06a3 730#ifdef PHONY_ICONV
234b45d4 731
6c7a06a3
TT
732static void
733find_charset_names (void)
234b45d4 734{
6c7a06a3
TT
735 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
736 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
737}
738
6c7a06a3 739#else /* PHONY_ICONV */
fc3b640d
TT
740
741/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
742 provides different symbols in the static and dynamic libraries.
743 So, configure may see libiconvlist but not iconvlist. But, calling
744 iconvlist is the right thing to do and will work. Hence we do a
745 check here but unconditionally call iconvlist below. */
746#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 747
6c7a06a3
TT
748/* A helper function that adds some character sets to the vector of
749 all character sets. This is a callback function for iconvlist. */
750
751static int
752add_one (unsigned int count, const char *const *names, void *data)
234b45d4 753{
6c7a06a3 754 unsigned int i;
234b45d4 755
6c7a06a3
TT
756 for (i = 0; i < count; ++i)
757 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 758
6c7a06a3 759 return 0;
234b45d4
KB
760}
761
6c7a06a3
TT
762static void
763find_charset_names (void)
234b45d4 764{
6c7a06a3
TT
765 iconvlist (add_one, NULL);
766 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
767}
768
6c7a06a3 769#else
234b45d4 770
40b5c9fb
DE
771/* Return non-zero if LINE (output from iconv) should be ignored.
772 Older iconv programs (e.g. 2.2.2) include the human readable
773 introduction even when stdout is not a tty. Newer versions omit
774 the intro if stdout is not a tty. */
775
776static int
777ignore_line_p (const char *line)
778{
779 /* This table is used to filter the output. If this text appears
780 anywhere in the line, it is ignored (strstr is used). */
781 static const char * const ignore_lines[] =
782 {
783 "The following",
784 "not necessarily",
785 "the FROM and TO",
786 "listed with several",
787 NULL
788 };
789 int i;
790
791 for (i = 0; ignore_lines[i] != NULL; ++i)
792 {
793 if (strstr (line, ignore_lines[i]) != NULL)
794 return 1;
795 }
796
797 return 0;
798}
799
6c7a06a3
TT
800static void
801find_charset_names (void)
234b45d4 802{
732f6a93
TT
803 struct pex_obj *child;
804 char *args[3];
805 int err, status;
806 int fail = 1;
478aac75 807 int flags;
40b5c9fb 808 struct gdb_environ *iconv_env;
478aac75 809 char *iconv_program;
40b5c9fb 810
aff410f1
MS
811 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
812 not a tty. We need to recognize it and ignore it. This text is
813 subject to translation, so force LANGUAGE=C. */
40b5c9fb
DE
814 iconv_env = make_environ ();
815 init_environ (iconv_env);
816 set_in_environ (iconv_env, "LANGUAGE", "C");
817 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93 818
40618926 819 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
732f6a93 820
478aac75
DE
821#ifdef ICONV_BIN
822 {
823 char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
824 ICONV_BIN_RELOCATABLE);
825 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
826 xfree (iconv_dir);
827 }
828#else
829 iconv_program = xstrdup ("iconv");
830#endif
831 args[0] = iconv_program;
732f6a93
TT
832 args[1] = "-l";
833 args[2] = NULL;
478aac75
DE
834 flags = PEX_STDERR_TO_STDOUT;
835#ifndef ICONV_BIN
836 flags |= PEX_SEARCH;
837#endif
732f6a93 838 /* Note that we simply ignore errors here. */
478aac75
DE
839 if (!pex_run_in_environment (child, flags,
840 args[0], args, environ_vector (iconv_env),
40b5c9fb 841 NULL, NULL, &err))
732f6a93
TT
842 {
843 FILE *in = pex_read_output (child, 0);
844
845 /* POSIX says that iconv -l uses an unspecified format. We
846 parse the glibc and libiconv formats; feel free to add others
847 as needed. */
40b5c9fb 848
1d6b2d2b 849 while (in != NULL && !feof (in))
732f6a93
TT
850 {
851 /* The size of buf is chosen arbitrarily. */
852 char buf[1024];
853 char *start, *r;
8ea13695 854 int len;
732f6a93
TT
855
856 r = fgets (buf, sizeof (buf), in);
857 if (!r)
858 break;
859 len = strlen (r);
860 if (len <= 3)
861 continue;
40b5c9fb
DE
862 if (ignore_line_p (r))
863 continue;
864
732f6a93
TT
865 /* Strip off the newline. */
866 --len;
867 /* Strip off one or two '/'s. glibc will print lines like
868 "8859_7//", but also "10646-1:1993/UCS4/". */
869 if (buf[len - 1] == '/')
870 --len;
871 if (buf[len - 1] == '/')
872 --len;
873 buf[len] = '\0';
874
875 /* libiconv will print multiple entries per line, separated
aff410f1
MS
876 by spaces. Older iconvs will print multiple entries per
877 line, indented by two spaces, and separated by ", "
40b5c9fb 878 (i.e. the human readable form). */
732f6a93
TT
879 start = buf;
880 while (1)
881 {
882 int keep_going;
883 char *p;
884
40b5c9fb
DE
885 /* Skip leading blanks. */
886 for (p = start; *p && *p == ' '; ++p)
887 ;
888 start = p;
889 /* Find the next space, comma, or end-of-line. */
890 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
891 ;
892 /* Ignore an empty result. */
893 if (p == start)
894 break;
895 keep_going = *p;
896 *p = '\0';
897 VEC_safe_push (char_ptr, charsets, xstrdup (start));
898 if (!keep_going)
899 break;
900 /* Skip any extra spaces. */
901 for (start = p + 1; *start && *start == ' '; ++start)
902 ;
903 }
904 }
234b45d4 905
732f6a93
TT
906 if (pex_get_status (child, 1, &status)
907 && WIFEXITED (status) && !WEXITSTATUS (status))
908 fail = 0;
234b45d4 909
6c7a06a3 910 }
234b45d4 911
478aac75 912 xfree (iconv_program);
732f6a93 913 pex_free (child);
40b5c9fb 914 free_environ (iconv_env);
234b45d4 915
732f6a93
TT
916 if (fail)
917 {
918 /* Some error occurred, so drop the vector. */
e4ab2fad
JK
919 free_char_ptr_vec (charsets);
920 charsets = NULL;
732f6a93
TT
921 }
922 else
923 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 924}
234b45d4 925
fc3b640d 926#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 927#endif /* PHONY_ICONV */
234b45d4 928
f870a310
TT
929/* The "auto" target charset used by default_auto_charset. */
930static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
931
932const char *
933default_auto_charset (void)
934{
935 return auto_target_charset_name;
936}
937
938const char *
939default_auto_wide_charset (void)
940{
941 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
942}
943
bcb28afc
PM
944
945#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
946/* Macro used for UTF or UCS endianness suffix. */
947#if WORDS_BIGENDIAN
948#define ENDIAN_SUFFIX "BE"
949#else
950#define ENDIAN_SUFFIX "LE"
951#endif
952
953/* The code below serves to generate a compile time error if
954 gdb_wchar_t type is not of size 2 nor 4, despite the fact that
955 macro __STDC_ISO_10646__ is defined.
956 This is better than a gdb_assert call, because GDB cannot handle
957 strings correctly if this size is different. */
958
959extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
960 || sizeof (gdb_wchar_t) == 4)
961 ? 1 : -1];
962
ee34b3f9 963/* intermediate_encoding returns the charset used internally by
bcb28afc
PM
964 GDB to convert between target and host encodings. As the test above
965 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
966 UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
967 otherwise an error is generated. */
968
969const char *
970intermediate_encoding (void)
971{
972 iconv_t desc;
973 static const char *stored_result = NULL;
974 char *result;
bcb28afc
PM
975
976 if (stored_result)
977 return stored_result;
978 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
979 ENDIAN_SUFFIX);
980 /* Check that the name is supported by iconv_open. */
981 desc = iconv_open (result, host_charset ());
982 if (desc != (iconv_t) -1)
983 {
984 iconv_close (desc);
985 stored_result = result;
986 return result;
987 }
988 /* Not valid, free the allocated memory. */
989 xfree (result);
990 /* Second try, with UCS-2 type. */
991 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
992 ENDIAN_SUFFIX);
993 /* Check that the name is supported by iconv_open. */
994 desc = iconv_open (result, host_charset ());
995 if (desc != (iconv_t) -1)
996 {
997 iconv_close (desc);
998 stored_result = result;
999 return result;
1000 }
1001 /* Not valid, free the allocated memory. */
1002 xfree (result);
1003 /* No valid charset found, generate error here. */
1004 error (_("Unable to find a vaild charset for string conversions"));
1005}
1006
1007#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
1008
234b45d4
KB
1009void
1010_initialize_charset (void)
1011{
f870a310 1012 /* The first element is always "auto". */
732f6a93 1013 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
1014 find_charset_names ();
1015
1016 if (VEC_length (char_ptr, charsets) > 1)
1017 charset_enum = (const char **) VEC_address (char_ptr, charsets);
1018 else
1019 charset_enum = default_charset_names;
1020
1021#ifndef PHONY_ICONV
1022#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
1023 /* The result of nl_langinfo may be overwritten later. This may
1024 leak a little memory, if the user later changes the host charset,
1025 but that doesn't matter much. */
1026 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
aff410f1
MS
1027 /* Solaris will return `646' here -- but the Solaris iconv then does
1028 not accept this. Darwin (and maybe FreeBSD) may return "" here,
06be6983
TG
1029 which GNU libiconv doesn't like (infinite loop). */
1030 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 1031 auto_host_charset_name = "ASCII";
f870a310
TT
1032 auto_target_charset_name = auto_host_charset_name;
1033#elif defined (USE_WIN32API)
1034 {
3e43a32a
MS
1035 /* "CP" + x<=5 digits + paranoia. */
1036 static char w32_host_default_charset[16];
f870a310
TT
1037
1038 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1039 "CP%d", GetACP());
1040 auto_host_charset_name = w32_host_default_charset;
1041 auto_target_charset_name = auto_host_charset_name;
1042 }
6c7a06a3
TT
1043#endif
1044#endif
e33d66ec 1045
7ab04401 1046 add_setshow_enum_cmd ("charset", class_support,
f870a310 1047 charset_enum, &host_charset_name, _("\
7ab04401
AC
1048Set the host and target character sets."), _("\
1049Show the host and target character sets."), _("\
3d263c1d
BI
1050The `host character set' is the one used by the system GDB is running on.\n\
1051The `target character set' is the one used by the program being debugged.\n\
1052You may only use supersets of ASCII for your host character set; GDB does\n\
1053not support any others.\n\
1054To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
1055 /* Note that the sfunc below needs to set
1056 target_charset_name, because the 'set
1057 charset' command sets two variables. */
1058 set_charset_sfunc,
1059 show_charset,
1060 &setlist, &showlist);
1061
1062 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 1063 charset_enum, &host_charset_name, _("\
7ab04401
AC
1064Set the host character set."), _("\
1065Show the host character set."), _("\
3d263c1d
BI
1066The `host character set' is the one used by the system GDB is running on.\n\
1067You may only use supersets of ASCII for your host character set; GDB does\n\
ac74f770
MS
1068not support any others.\n\
1069To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 1070 set_host_charset_sfunc,
920d2a44 1071 show_host_charset_name,
7ab04401
AC
1072 &setlist, &showlist);
1073
1074 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 1075 charset_enum, &target_charset_name, _("\
7ab04401
AC
1076Set the target character set."), _("\
1077Show the target character set."), _("\
3d263c1d
BI
1078The `target character set' is the one used by the program being debugged.\n\
1079GDB translates characters and strings between the host and target\n\
b670013c 1080character sets as needed.\n\
ac74f770 1081To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 1082 set_target_charset_sfunc,
920d2a44 1083 show_target_charset_name,
7ab04401 1084 &setlist, &showlist);
6c7a06a3
TT
1085
1086 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 1087 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
1088 _("\
1089Set the target wide character set."), _("\
1090Show the target wide character set."), _("\
3e43a32a
MS
1091The `target wide character set' is the one used by the program being debugged.\
1092\nIn particular it is the encoding used by `wchar_t'.\n\
6c7a06a3
TT
1093GDB translates characters and strings between the host and target\n\
1094character sets as needed.\n\
1095To see a list of the character sets GDB supports, type\n\
1096`set target-wide-charset'<TAB>"),
1097 set_target_wide_charset_sfunc,
1098 show_target_wide_charset_name,
1099 &setlist, &showlist);
234b45d4 1100}