From 6424c7c7f7abf1c69b45aa22f02619bc04e35da6 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 20 Apr 2008 05:23:52 +0000 Subject: [PATCH] Improve error message. --- gettext-tools/src/ChangeLog | 34 +++++++++++++++++++ gettext-tools/src/x-csharp.c | 24 ++++++++++---- gettext-tools/src/x-java.c | 14 +++++--- gettext-tools/src/x-perl.c | 10 +++--- gettext-tools/src/x-python.c | 31 ++++++++++++------ gettext-tools/src/xgettext.c | 63 +++++++++++++++++++++++++++--------- gettext-tools/src/xgettext.h | 21 ++++++++++-- 7 files changed, 153 insertions(+), 44 deletions(-) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 7053937ed..bc89c7870 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,37 @@ +2008-04-20 Bruno Haible + + Improve error message. + * xgettext.h (lexical_context_ty): New type. + (non_ascii_error_message): New declaration. + (from_current_source_encoding): Add lcontext argument. + * xgettext.c (non_ascii_error_message): New function. + (from_current_source_encoding): Add lcontext argument. Use + non_ascii_error_message. + (CONVERT_STRING): Add lcontext argument. + (remember_a_message, remember_a_message_plural): Update. + * x-csharp.c (lexical_context): New variable. + (phase2_getc): Use non_ascii_error_message. + (comment_start, comment_line_end, phase6_get): Set lexical_context. + (extract_csharp): Initialize lexical_context. + * x-java.c (struct string_buffer): Add lcontext field. + (init_string_buffer): Add lcontext argument. + (string_buffer_flush_curr_buffer): Update from_current_source_encoding + call. + (comment_start): Set lcontext. + (phase5_get): Pass lcontext argument. + * x-perl.c (get_here_document, phase2_getc, + extract_quotelike_pass1_utf8): Pass lcontext argument. + * x-python.c (lexical_context): New variable. + (phase2_getc): Use non_ascii_error_message. + (comment_start, comment_line_end, phase6_get): Set lexical_context. + (struct mixed_string_buffer): Add lcontext field. + (init_mixed_string_buffer): Add lcontext argument. + (mixed_string_buffer_flush_curr_buffer): Update + from_current_source_encoding call. + (phase5_get): Set lexical_context. Pass lcontext argument. + (extract_python): Initialize lexical_context. + Reported by Mark Weyer. + 2008-04-20 Bruno Haible * xgettext.c (usage): Clarify single-letter options that take an diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c index 362c84eed..9c73fc8ca 100644 --- a/gettext-tools/src/x-csharp.c +++ b/gettext-tools/src/x-csharp.c @@ -1,5 +1,5 @@ /* xgettext C# backend. - Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software: you can redistribute it and/or modify @@ -198,6 +198,8 @@ phase1_ungetc (int c) /* Newline Unicode character. */ #define UNL 0x000a +static lexical_context_ty lexical_context; + static int phase2_pushback[1]; static int phase2_pushback_length; @@ -215,13 +217,13 @@ phase2_getc () return UEOF; if (!c_isascii (c)) { - char buffer[21]; - sprintf (buffer, ":%ld", (long) line_number); multiline_error (xstrdup (""), - xasprintf (_("\ -Non-ASCII string at %s%s.\n\ -Please specify the source encoding through --from-code.\n"), - real_file_name, buffer)); + xasprintf ("%s\n%s\n", + non_ascii_error_message (lexical_context, + real_file_name, + line_number), + _("\ +Please specify the source encoding through --from-code."))); exit (EXIT_FAILURE); } return c; @@ -595,6 +597,7 @@ static struct string_buffer comment_buffer; static inline void comment_start () { + lexical_context = lc_comment; comment_buffer.utf8_buflen = 0; } @@ -622,6 +625,7 @@ comment_line_end (size_t chars_to_remove) --buflen; buffer[buflen] = '\0'; savable_comment_add (buffer); + lexical_context = lc_outside; } @@ -1636,11 +1640,13 @@ phase6_get (token_ty *tp) { struct string_buffer literal; + lexical_context = lc_string; init_string_buffer (&literal); accumulate_escaped (&literal, '"'); tp->string = xstrdup (string_buffer_result (&literal)); free_string_buffer (&literal); tp->comment = add_reference (savable_comment); + lexical_context = lc_outside; tp->type = token_type_string_literal; return; } @@ -1680,6 +1686,7 @@ phase6_get (token_ty *tp) /* Verbatim string literal. */ struct string_buffer literal; + lexical_context = lc_string; init_string_buffer (&literal); for (;;) { @@ -1703,6 +1710,7 @@ phase6_get (token_ty *tp) tp->string = xstrdup (string_buffer_result (&literal)); free_string_buffer (&literal); tp->comment = add_reference (savable_comment); + lexical_context = lc_outside; tp->type = token_type_string_literal; return; } @@ -2118,6 +2126,8 @@ extract_csharp (FILE *f, logical_file_name = xstrdup (logical_filename); line_number = 1; + lexical_context = lc_outside; + logical_line_number = 1; last_comment_line = -1; last_non_comment_line = -1; diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c index 75481ba42..ae32e64b4 100644 --- a/gettext-tools/src/x-java.c +++ b/gettext-tools/src/x-java.c @@ -1,5 +1,5 @@ /* xgettext Java backend. - Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software: you can redistribute it and/or modify @@ -401,11 +401,13 @@ struct string_buffer char *curr_buffer; size_t curr_buflen; size_t curr_allocated; + /* The lexical context. Used only for error message purposes. */ + lexical_context_ty lcontext; }; /* Initialize a 'struct string_buffer' to empty. */ static inline void -init_string_buffer (struct string_buffer *bp) +init_string_buffer (struct string_buffer *bp, lexical_context_ty lcontext) { bp->utf8_buffer = NULL; bp->utf8_buflen = 0; @@ -414,6 +416,7 @@ init_string_buffer (struct string_buffer *bp) bp->curr_buffer = NULL; bp->curr_buflen = 0; bp->curr_allocated = 0; + bp->lcontext = lcontext; } /* Auxiliary function: Append a byte to bp->curr. */ @@ -518,7 +521,7 @@ string_buffer_flush_curr_buffer (struct string_buffer *bp, int lineno) string_buffer_append_byte (bp, '\0'); /* Convert from the source encoding to UTF-8. */ - curr = from_current_source_encoding (bp->curr_buffer, + curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext, logical_file_name, lineno); /* Append it to bp->utf8_buffer. */ @@ -625,6 +628,7 @@ comment_start () comment_buffer.utf8_buflen = 0; comment_buffer.utf16_surr = 0; comment_buffer.curr_buflen = 0; + comment_buffer.lcontext = lc_comment; } static inline bool @@ -1042,7 +1046,7 @@ phase5_get (token_ty *tp) { struct string_buffer literal; - init_string_buffer (&literal); + init_string_buffer (&literal, lc_string); accumulate_escaped (&literal, '"'); tp->string = xstrdup (string_buffer_result (&literal)); free_string_buffer (&literal); @@ -1056,7 +1060,7 @@ phase5_get (token_ty *tp) { struct string_buffer literal; - init_string_buffer (&literal); + init_string_buffer (&literal, lc_outside); accumulate_escaped (&literal, '\''); free_string_buffer (&literal); tp->type = token_type_other; diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c index 26ff5124c..d468e0d2b 100644 --- a/gettext-tools/src/x-perl.c +++ b/gettext-tools/src/x-perl.c @@ -1,5 +1,5 @@ /* xgettext Perl backend. - Copyright (C) 2002-2007 Free Software Foundation, Inc. + Copyright (C) 2002-2008 Free Software Foundation, Inc. This file was written by Guido Flohr , 2002-2003. @@ -317,7 +317,7 @@ get_here_document (const char *delimiter) /* Convert to UTF-8. */ my_line_utf8 = - from_current_source_encoding (my_linebuf, logical_file_name, + from_current_source_encoding (my_linebuf, lc_string, logical_file_name, line_number + here_eaten); if (my_line_utf8 != my_linebuf) { @@ -466,7 +466,8 @@ phase2_getc () buffer[buflen] = '\0'; /* Convert it to UTF-8. */ utf8_string = - from_current_source_encoding (buffer, logical_file_name, lineno); + from_current_source_encoding (buffer, lc_comment, logical_file_name, + lineno); /* Save it until we encounter the corresponding string. */ savable_comment_add (utf8_string); last_comment_line = lineno; @@ -735,7 +736,8 @@ extract_quotelike_pass1_utf8 (int delim) { char *string = extract_quotelike_pass1 (delim); char *utf8_string = - from_current_source_encoding (string, logical_file_name, line_number); + from_current_source_encoding (string, lc_string, logical_file_name, + line_number); if (utf8_string != string) free (string); return utf8_string; diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c index 8597a4135..e6dcc3190 100644 --- a/gettext-tools/src/x-python.c +++ b/gettext-tools/src/x-python.c @@ -1,5 +1,5 @@ /* xgettext Python backend. - Copyright (C) 2002-2003, 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2002-2003, 2005-2008 Free Software Foundation, Inc. This file was written by Bruno Haible , 2002. @@ -210,6 +210,8 @@ phase1_ungetc (int c) /* End-of-file indicator for functions returning an UCS-4 character. */ #define UEOF -1 +static lexical_context_ty lexical_context; + static int phase2_pushback[max (9, UNINAME_MAX + 3)]; static int phase2_pushback_length; @@ -227,14 +229,14 @@ phase2_getc () return UEOF; if (!c_isascii (c)) { - char buffer[21]; - sprintf (buffer, ":%ld", (long) line_number); multiline_error (xstrdup (""), - xasprintf (_("\ -Non-ASCII string at %s%s.\n\ + xasprintf ("%s\n%s\n", + non_ascii_error_message (lexical_context, + real_file_name, + line_number), + _("\ Please specify the source encoding through --from-code or through a comment\n\ -as specified in http://www.python.org/peps/pep-0263.html.\n"), - real_file_name, buffer)); +as specified in http://www.python.org/peps/pep-0263.html.\n"))); exit (EXIT_FAILURE); } return c; @@ -539,6 +541,7 @@ static struct unicode_string_buffer comment_buffer; static inline void comment_start () { + lexical_context = lc_comment; comment_buffer.utf8_buflen = 0; } @@ -565,6 +568,7 @@ comment_line_end () --buflen; buffer[buflen] = '\0'; savable_comment_add (buffer); + lexical_context = lc_outside; return buffer; } @@ -793,11 +797,13 @@ struct mixed_string_buffer char *curr_buffer; size_t curr_buflen; size_t curr_allocated; + /* The lexical context. Used only for error message purposes. */ + lexical_context_ty lcontext; }; /* Initialize a 'struct mixed_string_buffer' to empty. */ static inline void -init_mixed_string_buffer (struct mixed_string_buffer *bp) +init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext) { bp->utf8_buffer = NULL; bp->utf8_buflen = 0; @@ -806,6 +812,7 @@ init_mixed_string_buffer (struct mixed_string_buffer *bp) bp->curr_buffer = NULL; bp->curr_buflen = 0; bp->curr_allocated = 0; + bp->lcontext = lcontext; } /* Auxiliary function: Append a byte to bp->curr. */ @@ -875,7 +882,7 @@ mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int linen mixed_string_buffer_append_byte (bp, '\0'); /* Convert from the source encoding to UTF-8. */ - curr = from_current_source_encoding (bp->curr_buffer, + curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext, logical_file_name, lineno); /* Append it to bp->utf8_buffer. */ @@ -1510,6 +1517,7 @@ phase5_get (token_ty *tp) interpret_unicode = false; string: triple = false; + lexical_context = lc_string; { int c1 = phase2_getc (); if (c1 == quote_char) @@ -1528,7 +1536,7 @@ phase5_get (token_ty *tp) } backslash_counter = 0; /* Start accumulating the string. */ - init_mixed_string_buffer (&literal); + init_mixed_string_buffer (&literal, lc_string); for (;;) { int uc = phase7_getuc (quote_char, triple, interpret_ansic, @@ -1546,6 +1554,7 @@ phase5_get (token_ty *tp) tp->string = xstrdup (mixed_string_buffer_result (&literal)); free_mixed_string_buffer (&literal); tp->comment = add_reference (savable_comment); + lexical_context = lc_outside; tp->type = token_type_string; return; } @@ -1788,6 +1797,8 @@ extract_python (FILE *f, logical_file_name = xstrdup (logical_filename); line_number = 1; + lexical_context = lc_outside; + last_comment_line = -1; last_non_comment_line = -1; diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index dc7af2878..c9d4c2ae1 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -1871,29 +1871,60 @@ extract_from_file (const char *file_name, extractor_ty extractor, +/* Error message about non-ASCII character in a specific lexical context. */ +char * +non_ascii_error_message (lexical_context_ty lcontext, + const char *file_name, size_t line_number) +{ + char buffer[21]; + char *errmsg; + + if (line_number == (size_t)(-1)) + buffer[0] = '\0'; + else + sprintf (buffer, ":%ld", (long) line_number); + + switch (lcontext) + { + case lc_outside: + errmsg = + xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer); + break; + case lc_comment: + errmsg = + xasprintf (_("Non-ASCII comment at or before %s%s."), + file_name, buffer); + break; + case lc_string: + errmsg = + xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer); + break; + default: + abort (); + } + return errmsg; +} + /* Convert the given string from xgettext_current_source_encoding to the output file encoding (i.e. ASCII or UTF-8). The resulting string is either the argument string, or freshly allocated. The file_name and line_number are only used for error message purposes. */ char * from_current_source_encoding (const char *string, + lexical_context_ty lcontext, const char *file_name, size_t line_number) { if (xgettext_current_source_encoding == po_charset_ascii) { if (!is_ascii_string (string)) { - char buffer[21]; - - if (line_number == (size_t)(-1)) - buffer[0] = '\0'; - else - sprintf (buffer, ":%ld", (long) line_number); multiline_error (xstrdup (""), - xasprintf (_("\ -Non-ASCII string at %s%s.\n\ -Please specify the source encoding through --from-code.\n"), - file_name, buffer)); + xasprintf ("%s\n%s\n", + non_ascii_error_message (lcontext, + file_name, + line_number), + _("\ +Please specify the source encoding through --from-code."))); exit (EXIT_FAILURE); } } @@ -1920,8 +1951,8 @@ Please specify the source encoding through --from-code.\n"), return (char *) string; } -#define CONVERT_STRING(string) \ - string = from_current_source_encoding (string, pos->file_name, \ +#define CONVERT_STRING(string, lcontext) \ + string = from_current_source_encoding (string, lcontext, pos->file_name, \ pos->line_number); @@ -2045,8 +2076,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid, do_wrap = undecided; if (msgctxt != NULL) - CONVERT_STRING (msgctxt); - CONVERT_STRING (msgid); + CONVERT_STRING (msgctxt, lc_string); + CONVERT_STRING (msgid, lc_string); if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header) { @@ -2114,7 +2145,7 @@ meta information, not the empty string.\n"))); if (s == NULL) break; - CONVERT_STRING (s); + CONVERT_STRING (s, lc_comment); /* To reduce the possibility of unwanted matches we do a two step match: the line must contain `xgettext:' and one of @@ -2271,7 +2302,7 @@ remember_a_message_plural (message_ty *mp, char *string, savable_comment_to_xgettext_comment (comment); - CONVERT_STRING (msgid_plural); + CONVERT_STRING (msgid_plural, lc_string); /* See if the message is already a plural message. */ if (mp->msgid_plural == NULL) diff --git a/gettext-tools/src/xgettext.h b/gettext-tools/src/xgettext.h index 570dacd05..0d84c4563 100644 --- a/gettext-tools/src/xgettext.h +++ b/gettext-tools/src/xgettext.h @@ -1,5 +1,5 @@ /* xgettext common functions. - Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2005-2006, 2008 Free Software Foundation, Inc. Written by Peter Miller and Bruno Haible , 2001. @@ -136,6 +136,21 @@ extern flag_context_list_ty * extern void xgettext_record_flag (const char *optionstring); +/* Context while building up lexical tokens. */ +typedef enum + { + lc_outside, /* Initial context: outside of comments and strings. */ + lc_comment, /* Inside a comment. */ + lc_string /* Inside a string literal. */ + } + lexical_context_ty; + +/* Error message about non-ASCII character in a specific lexical context. */ +extern char *non_ascii_error_message (lexical_context_ty lcontext, + const char *file_name, + size_t line_number); + + /* Canonicalized encoding name for all input files. */ extern const char *xgettext_global_source_encoding; @@ -157,8 +172,10 @@ extern iconv_t xgettext_current_source_iconv; /* Convert the given string from xgettext_current_source_encoding to the output file encoding (i.e. ASCII or UTF-8). The resulting string is either the argument string, or freshly allocated. - The file_name and line_number are only used for error message purposes. */ + The lcontext, file_name and line_number are only used for error message + purposes. */ extern char *from_current_source_encoding (const char *string, + lexical_context_ty lcontext, const char *file_name, size_t line_number); -- 2.47.2