+2008-04-20 Bruno Haible <bruno@clisp.org>
+
+ Improve error message.
+ * xgettext.h (lexical_context_ty): New type.
+ (non_ascii_error_message): New declaration.
+ (from_current_source_encoding): Add lcontext argument.
+ * xgettext.c (non_ascii_error_message): New function.
+ (from_current_source_encoding): Add lcontext argument. Use
+ non_ascii_error_message.
+ (CONVERT_STRING): Add lcontext argument.
+ (remember_a_message, remember_a_message_plural): Update.
+ * x-csharp.c (lexical_context): New variable.
+ (phase2_getc): Use non_ascii_error_message.
+ (comment_start, comment_line_end, phase6_get): Set lexical_context.
+ (extract_csharp): Initialize lexical_context.
+ * x-java.c (struct string_buffer): Add lcontext field.
+ (init_string_buffer): Add lcontext argument.
+ (string_buffer_flush_curr_buffer): Update from_current_source_encoding
+ call.
+ (comment_start): Set lcontext.
+ (phase5_get): Pass lcontext argument.
+ * x-perl.c (get_here_document, phase2_getc,
+ extract_quotelike_pass1_utf8): Pass lcontext argument.
+ * x-python.c (lexical_context): New variable.
+ (phase2_getc): Use non_ascii_error_message.
+ (comment_start, comment_line_end, phase6_get): Set lexical_context.
+ (struct mixed_string_buffer): Add lcontext field.
+ (init_mixed_string_buffer): Add lcontext argument.
+ (mixed_string_buffer_flush_curr_buffer): Update
+ from_current_source_encoding call.
+ (phase5_get): Set lexical_context. Pass lcontext argument.
+ (extract_python): Initialize lexical_context.
+ Reported by Mark Weyer.
+
2008-04-20 Bruno Haible <bruno@clisp.org>
* xgettext.c (usage): Clarify single-letter options that take an
/* xgettext C# backend.
- Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2003.
This program is free software: you can redistribute it and/or modify
/* Newline Unicode character. */
#define UNL 0x000a
+static lexical_context_ty lexical_context;
+
static int phase2_pushback[1];
static int phase2_pushback_length;
return UEOF;
if (!c_isascii (c))
{
- char buffer[21];
- sprintf (buffer, ":%ld", (long) line_number);
multiline_error (xstrdup (""),
- xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
- real_file_name, buffer));
+ xasprintf ("%s\n%s\n",
+ non_ascii_error_message (lexical_context,
+ real_file_name,
+ line_number),
+ _("\
+Please specify the source encoding through --from-code.")));
exit (EXIT_FAILURE);
}
return c;
static inline void
comment_start ()
{
+ lexical_context = lc_comment;
comment_buffer.utf8_buflen = 0;
}
--buflen;
buffer[buflen] = '\0';
savable_comment_add (buffer);
+ lexical_context = lc_outside;
}
{
struct string_buffer literal;
+ lexical_context = lc_string;
init_string_buffer (&literal);
accumulate_escaped (&literal, '"');
tp->string = xstrdup (string_buffer_result (&literal));
free_string_buffer (&literal);
tp->comment = add_reference (savable_comment);
+ lexical_context = lc_outside;
tp->type = token_type_string_literal;
return;
}
/* Verbatim string literal. */
struct string_buffer literal;
+ lexical_context = lc_string;
init_string_buffer (&literal);
for (;;)
{
tp->string = xstrdup (string_buffer_result (&literal));
free_string_buffer (&literal);
tp->comment = add_reference (savable_comment);
+ lexical_context = lc_outside;
tp->type = token_type_string_literal;
return;
}
logical_file_name = xstrdup (logical_filename);
line_number = 1;
+ lexical_context = lc_outside;
+
logical_line_number = 1;
last_comment_line = -1;
last_non_comment_line = -1;
/* xgettext Java backend.
- Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2003.
This program is free software: you can redistribute it and/or modify
char *curr_buffer;
size_t curr_buflen;
size_t curr_allocated;
+ /* The lexical context. Used only for error message purposes. */
+ lexical_context_ty lcontext;
};
/* Initialize a 'struct string_buffer' to empty. */
static inline void
-init_string_buffer (struct string_buffer *bp)
+init_string_buffer (struct string_buffer *bp, lexical_context_ty lcontext)
{
bp->utf8_buffer = NULL;
bp->utf8_buflen = 0;
bp->curr_buffer = NULL;
bp->curr_buflen = 0;
bp->curr_allocated = 0;
+ bp->lcontext = lcontext;
}
/* Auxiliary function: Append a byte to bp->curr. */
string_buffer_append_byte (bp, '\0');
/* Convert from the source encoding to UTF-8. */
- curr = from_current_source_encoding (bp->curr_buffer,
+ curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
logical_file_name, lineno);
/* Append it to bp->utf8_buffer. */
comment_buffer.utf8_buflen = 0;
comment_buffer.utf16_surr = 0;
comment_buffer.curr_buflen = 0;
+ comment_buffer.lcontext = lc_comment;
}
static inline bool
{
struct string_buffer literal;
- init_string_buffer (&literal);
+ init_string_buffer (&literal, lc_string);
accumulate_escaped (&literal, '"');
tp->string = xstrdup (string_buffer_result (&literal));
free_string_buffer (&literal);
{
struct string_buffer literal;
- init_string_buffer (&literal);
+ init_string_buffer (&literal, lc_outside);
accumulate_escaped (&literal, '\'');
free_string_buffer (&literal);
tp->type = token_type_other;
/* xgettext Perl backend.
- Copyright (C) 2002-2007 Free Software Foundation, Inc.
+ Copyright (C) 2002-2008 Free Software Foundation, Inc.
This file was written by Guido Flohr <guido@imperia.net>, 2002-2003.
/* Convert to UTF-8. */
my_line_utf8 =
- from_current_source_encoding (my_linebuf, logical_file_name,
+ from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
line_number + here_eaten);
if (my_line_utf8 != my_linebuf)
{
buffer[buflen] = '\0';
/* Convert it to UTF-8. */
utf8_string =
- from_current_source_encoding (buffer, logical_file_name, lineno);
+ from_current_source_encoding (buffer, lc_comment, logical_file_name,
+ lineno);
/* Save it until we encounter the corresponding string. */
savable_comment_add (utf8_string);
last_comment_line = lineno;
{
char *string = extract_quotelike_pass1 (delim);
char *utf8_string =
- from_current_source_encoding (string, logical_file_name, line_number);
+ from_current_source_encoding (string, lc_string, logical_file_name,
+ line_number);
if (utf8_string != string)
free (string);
return utf8_string;
/* xgettext Python backend.
- Copyright (C) 2002-2003, 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2002-2003, 2005-2008 Free Software Foundation, Inc.
This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
/* End-of-file indicator for functions returning an UCS-4 character. */
#define UEOF -1
+static lexical_context_ty lexical_context;
+
static int phase2_pushback[max (9, UNINAME_MAX + 3)];
static int phase2_pushback_length;
return UEOF;
if (!c_isascii (c))
{
- char buffer[21];
- sprintf (buffer, ":%ld", (long) line_number);
multiline_error (xstrdup (""),
- xasprintf (_("\
-Non-ASCII string at %s%s.\n\
+ xasprintf ("%s\n%s\n",
+ non_ascii_error_message (lexical_context,
+ real_file_name,
+ line_number),
+ _("\
Please specify the source encoding through --from-code or through a comment\n\
-as specified in http://www.python.org/peps/pep-0263.html.\n"),
- real_file_name, buffer));
+as specified in http://www.python.org/peps/pep-0263.html.\n")));
exit (EXIT_FAILURE);
}
return c;
static inline void
comment_start ()
{
+ lexical_context = lc_comment;
comment_buffer.utf8_buflen = 0;
}
--buflen;
buffer[buflen] = '\0';
savable_comment_add (buffer);
+ lexical_context = lc_outside;
return buffer;
}
char *curr_buffer;
size_t curr_buflen;
size_t curr_allocated;
+ /* The lexical context. Used only for error message purposes. */
+ lexical_context_ty lcontext;
};
/* Initialize a 'struct mixed_string_buffer' to empty. */
static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp)
+init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
{
bp->utf8_buffer = NULL;
bp->utf8_buflen = 0;
bp->curr_buffer = NULL;
bp->curr_buflen = 0;
bp->curr_allocated = 0;
+ bp->lcontext = lcontext;
}
/* Auxiliary function: Append a byte to bp->curr. */
mixed_string_buffer_append_byte (bp, '\0');
/* Convert from the source encoding to UTF-8. */
- curr = from_current_source_encoding (bp->curr_buffer,
+ curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
logical_file_name, lineno);
/* Append it to bp->utf8_buffer. */
interpret_unicode = false;
string:
triple = false;
+ lexical_context = lc_string;
{
int c1 = phase2_getc ();
if (c1 == quote_char)
}
backslash_counter = 0;
/* Start accumulating the string. */
- init_mixed_string_buffer (&literal);
+ init_mixed_string_buffer (&literal, lc_string);
for (;;)
{
int uc = phase7_getuc (quote_char, triple, interpret_ansic,
tp->string = xstrdup (mixed_string_buffer_result (&literal));
free_mixed_string_buffer (&literal);
tp->comment = add_reference (savable_comment);
+ lexical_context = lc_outside;
tp->type = token_type_string;
return;
}
logical_file_name = xstrdup (logical_filename);
line_number = 1;
+ lexical_context = lc_outside;
+
last_comment_line = -1;
last_non_comment_line = -1;
+/* Error message about non-ASCII character in a specific lexical context. */
+char *
+non_ascii_error_message (lexical_context_ty lcontext,
+ const char *file_name, size_t line_number)
+{
+ char buffer[21];
+ char *errmsg;
+
+ if (line_number == (size_t)(-1))
+ buffer[0] = '\0';
+ else
+ sprintf (buffer, ":%ld", (long) line_number);
+
+ switch (lcontext)
+ {
+ case lc_outside:
+ errmsg =
+ xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
+ break;
+ case lc_comment:
+ errmsg =
+ xasprintf (_("Non-ASCII comment at or before %s%s."),
+ file_name, buffer);
+ break;
+ case lc_string:
+ errmsg =
+ xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
+ break;
+ default:
+ abort ();
+ }
+ return errmsg;
+}
+
/* Convert the given string from xgettext_current_source_encoding to
the output file encoding (i.e. ASCII or UTF-8).
The resulting string is either the argument string, or freshly allocated.
The file_name and line_number are only used for error message purposes. */
char *
from_current_source_encoding (const char *string,
+ lexical_context_ty lcontext,
const char *file_name, size_t line_number)
{
if (xgettext_current_source_encoding == po_charset_ascii)
{
if (!is_ascii_string (string))
{
- char buffer[21];
-
- if (line_number == (size_t)(-1))
- buffer[0] = '\0';
- else
- sprintf (buffer, ":%ld", (long) line_number);
multiline_error (xstrdup (""),
- xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
- file_name, buffer));
+ xasprintf ("%s\n%s\n",
+ non_ascii_error_message (lcontext,
+ file_name,
+ line_number),
+ _("\
+Please specify the source encoding through --from-code.")));
exit (EXIT_FAILURE);
}
}
return (char *) string;
}
-#define CONVERT_STRING(string) \
- string = from_current_source_encoding (string, pos->file_name, \
+#define CONVERT_STRING(string, lcontext) \
+ string = from_current_source_encoding (string, lcontext, pos->file_name, \
pos->line_number);
do_wrap = undecided;
if (msgctxt != NULL)
- CONVERT_STRING (msgctxt);
- CONVERT_STRING (msgid);
+ CONVERT_STRING (msgctxt, lc_string);
+ CONVERT_STRING (msgid, lc_string);
if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
{
if (s == NULL)
break;
- CONVERT_STRING (s);
+ CONVERT_STRING (s, lc_comment);
/* To reduce the possibility of unwanted matches we do a two
step match: the line must contain `xgettext:' and one of
savable_comment_to_xgettext_comment (comment);
- CONVERT_STRING (msgid_plural);
+ CONVERT_STRING (msgid_plural, lc_string);
/* See if the message is already a plural message. */
if (mp->msgid_plural == NULL)
/* xgettext common functions.
- Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2005-2006, 2008 Free Software Foundation, Inc.
Written by Peter Miller <millerp@canb.auug.org.au>
and Bruno Haible <haible@clisp.cons.org>, 2001.
extern void xgettext_record_flag (const char *optionstring);
+/* Context while building up lexical tokens. */
+typedef enum
+ {
+ lc_outside, /* Initial context: outside of comments and strings. */
+ lc_comment, /* Inside a comment. */
+ lc_string /* Inside a string literal. */
+ }
+ lexical_context_ty;
+
+/* Error message about non-ASCII character in a specific lexical context. */
+extern char *non_ascii_error_message (lexical_context_ty lcontext,
+ const char *file_name,
+ size_t line_number);
+
+
/* Canonicalized encoding name for all input files. */
extern const char *xgettext_global_source_encoding;
/* Convert the given string from xgettext_current_source_encoding to
the output file encoding (i.e. ASCII or UTF-8).
The resulting string is either the argument string, or freshly allocated.
- The file_name and line_number are only used for error message purposes. */
+ The lcontext, file_name and line_number are only used for error message
+ purposes. */
extern char *from_current_source_encoding (const char *string,
+ lexical_context_ty lcontext,
const char *file_name,
size_t line_number);