1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
28 #include <new> // For placement-new.
34 /* Overwrite the given location/range within this text_info's rich_location.
35 For use e.g. when implementing "+" in client format decoders. */
38 text_info::set_location (unsigned int idx
, location_t loc
, bool show_caret_p
)
40 gcc_checking_assert (m_richloc
);
41 m_richloc
->set_range (line_table
, idx
, loc
, show_caret_p
);
45 text_info::get_location (unsigned int index_of_location
) const
47 gcc_checking_assert (m_richloc
);
49 if (index_of_location
== 0)
50 return m_richloc
->get_loc ();
52 return UNKNOWN_LOCATION
;
55 // Default construct an output buffer.
57 output_buffer::output_buffer ()
58 : formatted_obstack (),
60 obstack (&formatted_obstack
),
67 obstack_init (&formatted_obstack
);
68 obstack_init (&chunk_obstack
);
71 // Release resources owned by an output buffer at the end of lifetime.
73 output_buffer::~output_buffer ()
75 obstack_free (&chunk_obstack
, NULL
);
76 obstack_free (&formatted_obstack
, NULL
);
80 /* Format an integer given by va_arg (ARG, type-specifier T) where
81 type-specifier is a precision modifier as indicated by PREC. F is
82 a string used to construct the appropriate format-specifier. */
83 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
88 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
92 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
96 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
105 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
106 internal maximum characters per line. */
108 pp_set_real_maximum_length (pretty_printer
*pp
)
110 /* If we're told not to wrap lines then do the obvious thing. In case
111 we'll emit prefix only once per message, it is appropriate
112 not to increase unnecessarily the line-length cut-off. */
113 if (!pp_is_wrapping_line (pp
)
114 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
115 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
116 pp
->maximum_length
= pp_line_cutoff (pp
);
119 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
120 /* If the prefix is ridiculously too long, output at least
122 if (pp_line_cutoff (pp
) - prefix_length
< 32)
123 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
125 pp
->maximum_length
= pp_line_cutoff (pp
);
129 /* Clear PRETTY-PRINTER's output state. */
131 pp_clear_state (pretty_printer
*pp
)
133 pp
->emitted_prefix
= false;
134 pp_indentation (pp
) = 0;
137 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
139 pp_write_text_to_stream (pretty_printer
*pp
)
141 const char *text
= pp_formatted_text (pp
);
142 fputs (text
, pp_buffer (pp
)->stream
);
143 pp_clear_output_area (pp
);
146 /* As pp_write_text_to_stream, but for GraphViz label output.
148 Flush the formatted text of pretty-printer PP onto the attached stream.
149 Replace characters in PPF that have special meaning in a GraphViz .dot
152 This routine is not very fast, but it doesn't have to be as this is only
153 be used by routines dumping intermediate representations in graph form. */
156 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
158 const char *text
= pp_formatted_text (pp
);
159 const char *p
= text
;
160 FILE *fp
= pp_buffer (pp
)->stream
;
167 /* Print newlines as a left-aligned newline. */
173 /* The following characters are only special for record-shape nodes. */
180 escape_char
= for_record
;
183 /* The following characters always have to be escaped
184 for use in labels. */
186 /* There is a bug in some (f.i. 2.36.0) versions of graphiz
187 ( http://www.graphviz.org/mantisbt/view.php?id=2524 ) related to
188 backslash as last char in label. Let's avoid triggering it. */
189 gcc_assert (*(p
+ 1) != '\0');
206 pp_clear_output_area (pp
);
209 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
211 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
213 bool wrapping_line
= pp_is_wrapping_line (pp
);
217 /* Dump anything bordered by whitespaces. */
219 const char *p
= start
;
220 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
223 && p
- start
>= pp_remaining_character_count_for_line (pp
))
225 pp_append_text (pp
, start
, p
);
229 if (start
!= end
&& ISBLANK (*start
))
234 if (start
!= end
&& *start
== '\n')
242 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
244 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
246 if (pp_is_wrapping_line (pp
))
247 pp_wrap_text (pp
, start
, end
);
249 pp_append_text (pp
, start
, end
);
252 /* Append to the output area of PRETTY-PRINTER a string specified by its
253 STARTing character and LENGTH. */
255 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
257 output_buffer_append_r (pp_buffer (pp
), start
, length
);
260 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
261 the column position to the current indentation level, assuming that a
262 newline has just been written to the buffer. */
264 pp_indent (pretty_printer
*pp
)
266 int n
= pp_indentation (pp
);
269 for (i
= 0; i
< n
; ++i
)
273 /* The following format specifiers are recognized as being client independent:
274 %d, %i: (signed) integer in base ten.
275 %u: unsigned integer in base ten.
276 %o: unsigned integer in base eight.
277 %x: unsigned integer in base sixteen.
278 %ld, %li, %lo, %lu, %lx: long versions of the above.
279 %lld, %lli, %llo, %llu, %llx: long long versions.
280 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
284 %r: if pp_show_color(pp), switch to color identified by const char *.
285 %R: if pp_show_color(pp), reset color.
286 %m: strerror(text->err_no) - does not consume a value from args_ptr.
290 %': apostrophe (should only be used in untranslated messages;
291 translations should use appropriate punctuation directly).
292 %.*s: a substring the length of which is specified by an argument
294 %Ns: likewise, but length specified as constant in the format string.
295 Flag 'q': quote formatted text (must come immediately after '%').
297 Arguments can be used sequentially, or through %N$ resp. *N$
298 notation Nth argument after the format string. If %N$ / *N$
299 notation is used, it must be used for all arguments, except %m, %%,
300 %<, %> and %', which may not have a number, as they do not consume
301 an argument. When %M$.*N$s is used, M must be N + 1. (This may
302 also be written %M$.*s, provided N is not otherwise used.) The
303 format string must have conversion specifiers with argument numbers
304 1 up to highest argument; each argument may only be used once.
305 A format string can have at most 30 arguments. */
307 /* Formatting phases 1 and 2: render TEXT->format_spec plus
308 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
309 Phase 3 is in pp_format_text. */
312 pp_format (pretty_printer
*pp
, text_info
*text
)
314 output_buffer
*buffer
= pp_buffer (pp
);
317 struct chunk_info
*new_chunk_array
;
319 unsigned int curarg
= 0, chunk
= 0, argno
;
320 pp_wrapping_mode_t old_wrapping_mode
;
321 bool any_unnumbered
= false, any_numbered
= false;
322 const char **formatters
[PP_NL_ARGMAX
];
324 /* Allocate a new chunk structure. */
325 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
326 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
327 buffer
->cur_chunk_array
= new_chunk_array
;
328 args
= new_chunk_array
->args
;
330 /* Formatting phase 1: split up TEXT->format_spec into chunks in
331 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
332 verbatim, odd-numbered chunks are format specifiers.
333 %m, %%, %<, %>, and %' are replaced with the appropriate text at
336 memset (formatters
, 0, sizeof formatters
);
338 for (p
= text
->format_spec
; *p
; )
340 while (*p
!= '\0' && *p
!= '%')
342 obstack_1grow (&buffer
->chunk_obstack
, *p
);
355 obstack_1grow (&buffer
->chunk_obstack
, '%');
361 obstack_grow (&buffer
->chunk_obstack
,
362 open_quote
, strlen (open_quote
));
364 = colorize_start (pp_show_color (pp
), "quote");
365 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
372 const char *colorstr
= colorize_stop (pp_show_color (pp
));
373 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
377 obstack_grow (&buffer
->chunk_obstack
,
378 close_quote
, strlen (close_quote
));
384 const char *colorstr
= colorize_stop (pp_show_color (pp
));
385 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
393 const char *errstr
= xstrerror (text
->err_no
);
394 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
400 /* Handled in phase 2. Terminate the plain chunk here. */
401 obstack_1grow (&buffer
->chunk_obstack
, '\0');
402 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
403 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
410 argno
= strtoul (p
, &end
, 10) - 1;
412 gcc_assert (*p
== '$');
416 gcc_assert (!any_unnumbered
);
421 any_unnumbered
= true;
422 gcc_assert (!any_numbered
);
424 gcc_assert (argno
< PP_NL_ARGMAX
);
425 gcc_assert (!formatters
[argno
]);
426 formatters
[argno
] = &args
[chunk
];
429 obstack_1grow (&buffer
->chunk_obstack
, *p
);
432 while (strchr ("qwl+#", p
[-1]));
436 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
437 (where M == N + 1). */
442 obstack_1grow (&buffer
->chunk_obstack
, *p
);
445 while (ISDIGIT (p
[-1]));
446 gcc_assert (p
[-1] == 's');
450 gcc_assert (*p
== '*');
451 obstack_1grow (&buffer
->chunk_obstack
, '*');
457 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
459 gcc_assert (argno2
== argno
- 1);
460 gcc_assert (!any_unnumbered
);
461 gcc_assert (*p
== '$');
464 formatters
[argno2
] = formatters
[argno
];
468 gcc_assert (!any_numbered
);
469 formatters
[argno
+1] = formatters
[argno
];
472 gcc_assert (*p
== 's');
473 obstack_1grow (&buffer
->chunk_obstack
, 's');
480 obstack_1grow (&buffer
->chunk_obstack
, '\0');
481 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
482 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
485 obstack_1grow (&buffer
->chunk_obstack
, '\0');
486 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
487 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
490 /* Set output to the argument obstack, and switch line-wrapping and
492 buffer
->obstack
= &buffer
->chunk_obstack
;
493 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
495 /* Second phase. Replace each formatter with the formatted text it
498 for (argno
= 0; formatters
[argno
]; argno
++)
506 /* We do not attempt to enforce any ordering on the modifier
509 for (p
= *formatters
[argno
];; p
++)
534 /* We don't support precision beyond that of "long long". */
535 gcc_assert (precision
< 2);
542 gcc_assert (!wide
|| precision
== 0);
546 pp_string (pp
, open_quote
);
547 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
553 pp_string (pp
, colorize_start (pp_show_color (pp
),
554 va_arg (*text
->args_ptr
,
559 pp_character (pp
, va_arg (*text
->args_ptr
, int));
565 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
567 pp_integer_with_precision
568 (pp
, *text
->args_ptr
, precision
, int, "d");
573 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
574 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
576 pp_integer_with_precision
577 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
581 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
585 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
590 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
591 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
593 pp_integer_with_precision
594 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
599 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
600 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
602 pp_integer_with_precision
603 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
611 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
612 (where M == N + 1). The format string should be verified
613 already from the first phase. */
618 n
= strtoul (p
, &end
, 10);
620 gcc_assert (*p
== 's');
624 gcc_assert (*p
== '*');
626 gcc_assert (*p
== 's');
627 n
= va_arg (*text
->args_ptr
, int);
629 /* This consumes a second entry in the formatters array. */
630 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
634 s
= va_arg (*text
->args_ptr
, const char *);
635 pp_append_text (pp
, s
, s
+ n
);
643 gcc_assert (pp_format_decoder (pp
));
644 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
645 precision
, wide
, plus
, hash
);
652 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
653 pp_string (pp
, close_quote
);
656 obstack_1grow (&buffer
->chunk_obstack
, '\0');
657 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
661 for (; argno
< PP_NL_ARGMAX
; argno
++)
662 gcc_assert (!formatters
[argno
]);
664 /* Revert to normal obstack and wrapping mode. */
665 buffer
->obstack
= &buffer
->formatted_obstack
;
666 buffer
->line_length
= 0;
667 pp_wrapping_mode (pp
) = old_wrapping_mode
;
671 /* Format of a message pointed to by TEXT. */
673 pp_output_formatted_text (pretty_printer
*pp
)
676 output_buffer
*buffer
= pp_buffer (pp
);
677 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
678 const char **args
= chunk_array
->args
;
680 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
681 gcc_assert (buffer
->line_length
== 0);
683 /* This is a third phase, first 2 phases done in pp_format_args.
684 Now we actually print it. */
685 for (chunk
= 0; args
[chunk
]; chunk
++)
686 pp_string (pp
, args
[chunk
]);
688 /* Deallocate the chunk structure and everything after it (i.e. the
689 associated series of formatted strings). */
690 buffer
->cur_chunk_array
= chunk_array
->prev
;
691 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
694 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
695 settings needed by BUFFER for a verbatim formatting. */
697 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
699 /* Set verbatim mode. */
700 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
702 /* Do the actual formatting. */
703 pp_format (pp
, text
);
704 pp_output_formatted_text (pp
);
706 /* Restore previous settings. */
707 pp_wrapping_mode (pp
) = oldmode
;
710 /* Flush the content of BUFFER onto the attached stream. This
711 function does nothing unless pp->output_buffer->flush_p. */
713 pp_flush (pretty_printer
*pp
)
716 if (!pp
->buffer
->flush_p
)
718 pp_write_text_to_stream (pp
);
719 fflush (pp_buffer (pp
)->stream
);
722 /* Flush the content of BUFFER onto the attached stream independently
723 of the value of pp->output_buffer->flush_p. */
725 pp_really_flush (pretty_printer
*pp
)
728 pp_write_text_to_stream (pp
);
729 fflush (pp_buffer (pp
)->stream
);
732 /* Sets the number of maximum characters per line PRETTY-PRINTER can
733 output in line-wrapping mode. A LENGTH value 0 suppresses
736 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
738 pp_line_cutoff (pp
) = length
;
739 pp_set_real_maximum_length (pp
);
742 /* Clear PRETTY-PRINTER output area text info. */
744 pp_clear_output_area (pretty_printer
*pp
)
746 obstack_free (pp_buffer (pp
)->obstack
,
747 obstack_base (pp_buffer (pp
)->obstack
));
748 pp_buffer (pp
)->line_length
= 0;
751 /* Set PREFIX for PRETTY-PRINTER. */
753 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
756 pp_set_real_maximum_length (pp
);
757 pp
->emitted_prefix
= false;
758 pp_indentation (pp
) = 0;
761 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
763 pp_destroy_prefix (pretty_printer
*pp
)
765 if (pp
->prefix
!= NULL
)
767 free (CONST_CAST (char *, pp
->prefix
));
772 /* Write out PRETTY-PRINTER's prefix. */
774 pp_emit_prefix (pretty_printer
*pp
)
776 if (pp
->prefix
!= NULL
)
778 switch (pp_prefixing_rule (pp
))
781 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
784 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
785 if (pp
->emitted_prefix
)
790 pp_indentation (pp
) += 3;
793 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
795 int prefix_length
= strlen (pp
->prefix
);
796 pp_append_r (pp
, pp
->prefix
, prefix_length
);
797 pp
->emitted_prefix
= true;
804 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
805 characters per line. */
807 pretty_printer::pretty_printer (const char *p
, int l
)
808 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
817 translate_identifiers (true),
820 pp_line_cutoff (this) = l
;
821 /* By default, we emit prefixes once per message. */
822 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
823 pp_set_prefix (this, p
);
826 pretty_printer::~pretty_printer ()
828 buffer
->~output_buffer ();
832 /* Append a string delimited by START and END to the output area of
833 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
834 new line then emit PRETTY-PRINTER's prefix and skip any leading
835 whitespace if appropriate. The caller must ensure that it is
838 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
840 /* Emit prefix and skip whitespace if we're starting a new line. */
841 if (pp_buffer (pp
)->line_length
== 0)
844 if (pp_is_wrapping_line (pp
))
845 while (start
!= end
&& *start
== ' ')
848 pp_append_r (pp
, start
, end
- start
);
851 /* Finishes constructing a NULL-terminated character string representing
852 the PRETTY-PRINTED text. */
854 pp_formatted_text (pretty_printer
*pp
)
856 return output_buffer_formatted_text (pp_buffer (pp
));
859 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
860 output area. A NULL pointer means no character available. */
862 pp_last_position_in_text (const pretty_printer
*pp
)
864 return output_buffer_last_position_in_text (pp_buffer (pp
));
867 /* Return the amount of characters PRETTY-PRINTER can accept to
868 make a full line. Meaningful only in line-wrapping mode. */
870 pp_remaining_character_count_for_line (pretty_printer
*pp
)
872 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
876 /* Format a message into BUFFER a la printf. */
878 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
886 text
.format_spec
= msg
;
887 pp_format (pp
, &text
);
888 pp_output_formatted_text (pp
);
893 /* Output MESSAGE verbatim into BUFFER. */
895 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
903 text
.format_spec
= msg
;
904 pp_format_verbatim (pp
, &text
);
910 /* Have PRETTY-PRINTER start a new line. */
912 pp_newline (pretty_printer
*pp
)
914 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
915 pp_needs_newline (pp
) = false;
916 pp_buffer (pp
)->line_length
= 0;
919 /* Have PRETTY-PRINTER add a CHARACTER. */
921 pp_character (pretty_printer
*pp
, int c
)
923 if (pp_is_wrapping_line (pp
)
924 && pp_remaining_character_count_for_line (pp
) <= 0)
930 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
931 ++pp_buffer (pp
)->line_length
;
934 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
935 be line-wrapped if in appropriate mode. */
937 pp_string (pretty_printer
*pp
, const char *str
)
939 gcc_checking_assert (str
);
940 pp_maybe_wrap_text (pp
, str
, str
+ strlen (str
));
943 /* Maybe print out a whitespace if needed. */
946 pp_maybe_space (pretty_printer
*pp
)
948 if (pp
->padding
!= pp_none
)
951 pp
->padding
= pp_none
;
955 // Add a newline to the pretty printer PP and flush formatted text.
958 pp_newline_and_flush (pretty_printer
*pp
)
962 pp_needs_newline (pp
) = false;
965 // Add a newline to the pretty printer PP, followed by indentation.
968 pp_newline_and_indent (pretty_printer
*pp
, int n
)
970 pp_indentation (pp
) += n
;
973 pp_needs_newline (pp
) = false;
976 // Add separator C, followed by a single whitespace.
979 pp_separate_with (pretty_printer
*pp
, char c
)
981 pp_character (pp
, c
);
986 /* The string starting at P has LEN (at least 1) bytes left; if they
987 start with a valid UTF-8 sequence, return the length of that
988 sequence and set *VALUE to the value of that sequence, and
989 otherwise return 0 and set *VALUE to (unsigned int) -1. */
992 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
1000 size_t utf8_len
= 0;
1003 for (t
= *p
; t
& 0x80; t
<<= 1)
1006 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
1008 *value
= (unsigned int) -1;
1011 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
1012 for (i
= 1; i
< utf8_len
; i
++)
1014 unsigned int u
= p
[i
];
1015 if ((u
& 0xC0) != 0x80)
1017 *value
= (unsigned int) -1;
1020 ch
= (ch
<< 6) | (u
& 0x3F);
1022 if ( (ch
<= 0x7F && utf8_len
> 1)
1023 || (ch
<= 0x7FF && utf8_len
> 2)
1024 || (ch
<= 0xFFFF && utf8_len
> 3)
1025 || (ch
<= 0x1FFFFF && utf8_len
> 4)
1026 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
1027 || (ch
>= 0xD800 && ch
<= 0xDFFF))
1029 *value
= (unsigned int) -1;
1042 /* Allocator for identifier_to_locale and corresponding function to
1045 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1046 void (*identifier_to_locale_free
) (void *) = free
;
1048 /* Given IDENT, an identifier in the internal encoding, return a
1049 version of IDENT suitable for diagnostics in the locale character
1050 set: either IDENT itself, or a string, allocated using
1051 identifier_to_locale_alloc, converted to the locale character set
1052 and using escape sequences if not representable in the locale
1053 character set or containing control characters or invalid byte
1054 sequences. Existing backslashes in IDENT are not doubled, so the
1055 result may not uniquely specify the contents of an arbitrary byte
1056 sequence identifier. */
1059 identifier_to_locale (const char *ident
)
1061 const unsigned char *uid
= (const unsigned char *) ident
;
1062 size_t idlen
= strlen (ident
);
1063 bool valid_printable_utf8
= true;
1064 bool all_ascii
= true;
1067 for (i
= 0; i
< idlen
;)
1070 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1071 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1073 valid_printable_utf8
= false;
1081 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1082 attributes putting arbitrary byte sequences in identifiers), or
1083 control characters, we use octal escape sequences for all bytes
1084 outside printable ASCII. */
1085 if (!valid_printable_utf8
)
1087 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1089 for (i
= 0; i
< idlen
; i
++)
1091 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1095 sprintf (p
, "\\%03o", uid
[i
]);
1103 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1104 with the locale character set being UTF-8, IDENT is used. */
1105 if (all_ascii
|| locale_utf8
)
1108 /* Otherwise IDENT is converted to the locale character set if
1110 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1111 if (locale_encoding
!= NULL
)
1113 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1114 bool conversion_ok
= true;
1116 if (cd
!= (iconv_t
) -1)
1118 size_t ret_alloc
= 4 * idlen
+ 1;
1121 /* Repeat the whole conversion process as needed with
1122 larger buffers so non-reversible transformations can
1123 always be detected. */
1124 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1126 size_t inbytesleft
= idlen
;
1127 size_t outbytesleft
= ret_alloc
- 1;
1130 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1133 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1135 conversion_ok
= false;
1139 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1140 &outbuf
, &outbytesleft
);
1141 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1146 identifier_to_locale_free (ret
);
1152 conversion_ok
= false;
1156 else if (iconv_ret
!= 0)
1158 conversion_ok
= false;
1161 /* Return to initial shift state. */
1162 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1167 identifier_to_locale_free (ret
);
1173 conversion_ok
= false;
1187 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1189 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1191 for (i
= 0; i
< idlen
;)
1194 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1199 sprintf (p
, "\\U%08x", c
);