From 6424c7c7f7abf1c69b45aa22f02619bc04e35da6 Mon Sep 17 00:00:00 2001
From: Bruno Haible <bruno@clisp.org>
Date: Sun, 20 Apr 2008 05:23:52 +0000
Subject: [PATCH] Improve error message.

---
 gettext-tools/src/ChangeLog  | 34 +++++++++++++++++++
 gettext-tools/src/x-csharp.c | 24 ++++++++++----
 gettext-tools/src/x-java.c   | 14 +++++---
 gettext-tools/src/x-perl.c   | 10 +++---
 gettext-tools/src/x-python.c | 31 ++++++++++++------
 gettext-tools/src/xgettext.c | 63 +++++++++++++++++++++++++++---------
 gettext-tools/src/xgettext.h | 21 ++++++++++--
 7 files changed, 153 insertions(+), 44 deletions(-)

diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 7053937ed..bc89c7870 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,37 @@
+2008-04-20  Bruno Haible  <bruno@clisp.org>
+
+	Improve error message.
+	* xgettext.h (lexical_context_ty): New type.
+	(non_ascii_error_message): New declaration.
+	(from_current_source_encoding): Add lcontext argument.
+	* xgettext.c (non_ascii_error_message): New function.
+	(from_current_source_encoding): Add lcontext argument. Use
+	non_ascii_error_message.
+	(CONVERT_STRING): Add lcontext argument.
+	(remember_a_message, remember_a_message_plural): Update.
+	* x-csharp.c (lexical_context): New variable.
+	(phase2_getc): Use non_ascii_error_message.
+	(comment_start, comment_line_end, phase6_get): Set lexical_context.
+	(extract_csharp): Initialize lexical_context.
+	* x-java.c (struct string_buffer): Add lcontext field.
+	(init_string_buffer): Add lcontext argument.
+	(string_buffer_flush_curr_buffer): Update from_current_source_encoding
+	call.
+	(comment_start): Set lcontext.
+	(phase5_get): Pass lcontext argument.
+	* x-perl.c (get_here_document, phase2_getc,
+	extract_quotelike_pass1_utf8): Pass lcontext argument.
+	* x-python.c (lexical_context): New variable.
+	(phase2_getc): Use non_ascii_error_message.
+	(comment_start, comment_line_end, phase6_get): Set lexical_context.
+	(struct mixed_string_buffer): Add lcontext field.
+	(init_mixed_string_buffer): Add lcontext argument.
+	(mixed_string_buffer_flush_curr_buffer): Update
+	from_current_source_encoding call.
+	(phase5_get): Set lexical_context. Pass lcontext argument.
+	(extract_python): Initialize lexical_context.
+	Reported by Mark Weyer.
+
 2008-04-20  Bruno Haible  <bruno@clisp.org>
 
 	* xgettext.c (usage): Clarify single-letter options that take an
diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c
index 362c84eed..9c73fc8ca 100644
--- a/gettext-tools/src/x-csharp.c
+++ b/gettext-tools/src/x-csharp.c
@@ -1,5 +1,5 @@
 /* xgettext C# backend.
-   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2003.
 
    This program is free software: you can redistribute it and/or modify
@@ -198,6 +198,8 @@ phase1_ungetc (int c)
 /* Newline Unicode character.  */
 #define UNL 0x000a
 
+static lexical_context_ty lexical_context;
+
 static int phase2_pushback[1];
 static int phase2_pushback_length;
 
@@ -215,13 +217,13 @@ phase2_getc ()
 	return UEOF;
       if (!c_isascii (c))
 	{
-	  char buffer[21];
-	  sprintf (buffer, ":%ld", (long) line_number);
 	  multiline_error (xstrdup (""),
-			   xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
-			   real_file_name, buffer));
+			   xasprintf ("%s\n%s\n",
+				      non_ascii_error_message (lexical_context,
+							       real_file_name,
+							       line_number),
+				      _("\
+Please specify the source encoding through --from-code.")));
 	  exit (EXIT_FAILURE);
 	}
       return c;
@@ -595,6 +597,7 @@ static struct string_buffer comment_buffer;
 static inline void
 comment_start ()
 {
+  lexical_context = lc_comment;
   comment_buffer.utf8_buflen = 0;
 }
 
@@ -622,6 +625,7 @@ comment_line_end (size_t chars_to_remove)
     --buflen;
   buffer[buflen] = '\0';
   savable_comment_add (buffer);
+  lexical_context = lc_outside;
 }
 
 
@@ -1636,11 +1640,13 @@ phase6_get (token_ty *tp)
 	  {
 	    struct string_buffer literal;
 
+	    lexical_context = lc_string;
 	    init_string_buffer (&literal);
 	    accumulate_escaped (&literal, '"');
 	    tp->string = xstrdup (string_buffer_result (&literal));
 	    free_string_buffer (&literal);
 	    tp->comment = add_reference (savable_comment);
+	    lexical_context = lc_outside;
 	    tp->type = token_type_string_literal;
 	    return;
 	  }
@@ -1680,6 +1686,7 @@ phase6_get (token_ty *tp)
 	      /* Verbatim string literal.  */
 	      struct string_buffer literal;
 
+	      lexical_context = lc_string;
 	      init_string_buffer (&literal);
 	      for (;;)
 		{
@@ -1703,6 +1710,7 @@ phase6_get (token_ty *tp)
 	      tp->string = xstrdup (string_buffer_result (&literal));
 	      free_string_buffer (&literal);
 	      tp->comment = add_reference (savable_comment);
+	      lexical_context = lc_outside;
 	      tp->type = token_type_string_literal;
 	      return;
 	    }
@@ -2118,6 +2126,8 @@ extract_csharp (FILE *f,
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;
 
+  lexical_context = lc_outside;
+
   logical_line_number = 1;
   last_comment_line = -1;
   last_non_comment_line = -1;
diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c
index 75481ba42..ae32e64b4 100644
--- a/gettext-tools/src/x-java.c
+++ b/gettext-tools/src/x-java.c
@@ -1,5 +1,5 @@
 /* xgettext Java backend.
-   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2003.
 
    This program is free software: you can redistribute it and/or modify
@@ -401,11 +401,13 @@ struct string_buffer
   char *curr_buffer;
   size_t curr_buflen;
   size_t curr_allocated;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
 };
 
 /* Initialize a 'struct string_buffer' to empty.  */
 static inline void
-init_string_buffer (struct string_buffer *bp)
+init_string_buffer (struct string_buffer *bp, lexical_context_ty lcontext)
 {
   bp->utf8_buffer = NULL;
   bp->utf8_buflen = 0;
@@ -414,6 +416,7 @@ init_string_buffer (struct string_buffer *bp)
   bp->curr_buffer = NULL;
   bp->curr_buflen = 0;
   bp->curr_allocated = 0;
+  bp->lcontext = lcontext;
 }
 
 /* Auxiliary function: Append a byte to bp->curr.  */
@@ -518,7 +521,7 @@ string_buffer_flush_curr_buffer (struct string_buffer *bp, int lineno)
       string_buffer_append_byte (bp, '\0');
 
       /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer,
+      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
 					   logical_file_name, lineno);
 
       /* Append it to bp->utf8_buffer.  */
@@ -625,6 +628,7 @@ comment_start ()
   comment_buffer.utf8_buflen = 0;
   comment_buffer.utf16_surr = 0;
   comment_buffer.curr_buflen = 0;
+  comment_buffer.lcontext = lc_comment;
 }
 
 static inline bool
@@ -1042,7 +1046,7 @@ phase5_get (token_ty *tp)
 	  {
 	    struct string_buffer literal;
 
-	    init_string_buffer (&literal);
+	    init_string_buffer (&literal, lc_string);
 	    accumulate_escaped (&literal, '"');
 	    tp->string = xstrdup (string_buffer_result (&literal));
 	    free_string_buffer (&literal);
@@ -1056,7 +1060,7 @@ phase5_get (token_ty *tp)
 	  {
 	    struct string_buffer literal;
 
-	    init_string_buffer (&literal);
+	    init_string_buffer (&literal, lc_outside);
 	    accumulate_escaped (&literal, '\'');
 	    free_string_buffer (&literal);
 	    tp->type = token_type_other;
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c
index 26ff5124c..d468e0d2b 100644
--- a/gettext-tools/src/x-perl.c
+++ b/gettext-tools/src/x-perl.c
@@ -1,5 +1,5 @@
 /* xgettext Perl backend.
-   Copyright (C) 2002-2007 Free Software Foundation, Inc.
+   Copyright (C) 2002-2008 Free Software Foundation, Inc.
 
    This file was written by Guido Flohr <guido@imperia.net>, 2002-2003.
 
@@ -317,7 +317,7 @@ get_here_document (const char *delimiter)
 
       /* Convert to UTF-8.  */
       my_line_utf8 =
-	from_current_source_encoding (my_linebuf, logical_file_name,
+	from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
 				      line_number + here_eaten);
       if (my_line_utf8 != my_linebuf)
 	{
@@ -466,7 +466,8 @@ phase2_getc ()
       buffer[buflen] = '\0';
       /* Convert it to UTF-8.  */
       utf8_string =
-	from_current_source_encoding (buffer, logical_file_name, lineno);
+	from_current_source_encoding (buffer, lc_comment, logical_file_name,
+				      lineno);
       /* Save it until we encounter the corresponding string.  */
       savable_comment_add (utf8_string);
       last_comment_line = lineno;
@@ -735,7 +736,8 @@ extract_quotelike_pass1_utf8 (int delim)
 {
   char *string = extract_quotelike_pass1 (delim);
   char *utf8_string =
-    from_current_source_encoding (string, logical_file_name, line_number);
+    from_current_source_encoding (string, lc_string, logical_file_name,
+				  line_number);
   if (utf8_string != string)
     free (string);
   return utf8_string;
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c
index 8597a4135..e6dcc3190 100644
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -1,5 +1,5 @@
 /* xgettext Python backend.
-   Copyright (C) 2002-2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2002-2003, 2005-2008 Free Software Foundation, Inc.
 
    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
 
@@ -210,6 +210,8 @@ phase1_ungetc (int c)
 /* End-of-file indicator for functions returning an UCS-4 character.  */
 #define UEOF -1
 
+static lexical_context_ty lexical_context;
+
 static int phase2_pushback[max (9, UNINAME_MAX + 3)];
 static int phase2_pushback_length;
 
@@ -227,14 +229,14 @@ phase2_getc ()
 	return UEOF;
       if (!c_isascii (c))
 	{
-	  char buffer[21];
-	  sprintf (buffer, ":%ld", (long) line_number);
 	  multiline_error (xstrdup (""),
-			   xasprintf (_("\
-Non-ASCII string at %s%s.\n\
+			   xasprintf ("%s\n%s\n",
+				      non_ascii_error_message (lexical_context,
+							       real_file_name,
+							       line_number),
+				      _("\
 Please specify the source encoding through --from-code or through a comment\n\
-as specified in http://www.python.org/peps/pep-0263.html.\n"),
-			   real_file_name, buffer));
+as specified in http://www.python.org/peps/pep-0263.html.\n")));
 	  exit (EXIT_FAILURE);
 	}
       return c;
@@ -539,6 +541,7 @@ static struct unicode_string_buffer comment_buffer;
 static inline void
 comment_start ()
 {
+  lexical_context = lc_comment;
   comment_buffer.utf8_buflen = 0;
 }
 
@@ -565,6 +568,7 @@ comment_line_end ()
     --buflen;
   buffer[buflen] = '\0';
   savable_comment_add (buffer);
+  lexical_context = lc_outside;
   return buffer;
 }
 
@@ -793,11 +797,13 @@ struct mixed_string_buffer
   char *curr_buffer;
   size_t curr_buflen;
   size_t curr_allocated;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
 };
 
 /* Initialize a 'struct mixed_string_buffer' to empty.  */
 static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp)
+init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
 {
   bp->utf8_buffer = NULL;
   bp->utf8_buflen = 0;
@@ -806,6 +812,7 @@ init_mixed_string_buffer (struct mixed_string_buffer *bp)
   bp->curr_buffer = NULL;
   bp->curr_buflen = 0;
   bp->curr_allocated = 0;
+  bp->lcontext = lcontext;
 }
 
 /* Auxiliary function: Append a byte to bp->curr.  */
@@ -875,7 +882,7 @@ mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int linen
       mixed_string_buffer_append_byte (bp, '\0');
 
       /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer,
+      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
 					   logical_file_name, lineno);
 
       /* Append it to bp->utf8_buffer.  */
@@ -1510,6 +1517,7 @@ phase5_get (token_ty *tp)
 	      interpret_unicode = false;
 	    string:
 	      triple = false;
+	      lexical_context = lc_string;
 	      {
 		int c1 = phase2_getc ();
 		if (c1 == quote_char)
@@ -1528,7 +1536,7 @@ phase5_get (token_ty *tp)
 	      }
 	      backslash_counter = 0;
 	      /* Start accumulating the string.  */
-	      init_mixed_string_buffer (&literal);
+	      init_mixed_string_buffer (&literal, lc_string);
 	      for (;;)
 		{
 		  int uc = phase7_getuc (quote_char, triple, interpret_ansic,
@@ -1546,6 +1554,7 @@ phase5_get (token_ty *tp)
 	      tp->string = xstrdup (mixed_string_buffer_result (&literal));
 	      free_mixed_string_buffer (&literal);
 	      tp->comment = add_reference (savable_comment);
+	      lexical_context = lc_outside;
 	      tp->type = token_type_string;
 	      return;
 	  }
@@ -1788,6 +1797,8 @@ extract_python (FILE *f,
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;
 
+  lexical_context = lc_outside;
+
   last_comment_line = -1;
   last_non_comment_line = -1;
 
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c
index dc7af2878..c9d4c2ae1 100644
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -1871,29 +1871,60 @@ extract_from_file (const char *file_name, extractor_ty extractor,
 
 
 
+/* Error message about non-ASCII character in a specific lexical context.  */
+char *
+non_ascii_error_message (lexical_context_ty lcontext,
+			 const char *file_name, size_t line_number)
+{
+  char buffer[21];
+  char *errmsg;
+
+  if (line_number == (size_t)(-1))
+    buffer[0] = '\0';
+  else
+    sprintf (buffer, ":%ld", (long) line_number);
+
+  switch (lcontext)
+    {
+    case lc_outside:
+      errmsg =
+	xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
+      break;
+    case lc_comment:
+      errmsg =
+	xasprintf (_("Non-ASCII comment at or before %s%s."),
+		   file_name, buffer);
+      break;
+    case lc_string:
+      errmsg =
+	xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
+      break;
+    default:
+      abort ();
+    }
+  return errmsg;
+}
+
 /* Convert the given string from xgettext_current_source_encoding to
    the output file encoding (i.e. ASCII or UTF-8).
    The resulting string is either the argument string, or freshly allocated.
    The file_name and line_number are only used for error message purposes.  */
 char *
 from_current_source_encoding (const char *string,
+			      lexical_context_ty lcontext,
 			      const char *file_name, size_t line_number)
 {
   if (xgettext_current_source_encoding == po_charset_ascii)
     {
       if (!is_ascii_string (string))
 	{
-	  char buffer[21];
-
-	  if (line_number == (size_t)(-1))
-	    buffer[0] = '\0';
-	  else
-	    sprintf (buffer, ":%ld", (long) line_number);
 	  multiline_error (xstrdup (""),
-			   xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
-				      file_name, buffer));
+			   xasprintf ("%s\n%s\n",
+				      non_ascii_error_message (lcontext,
+							       file_name,
+							       line_number),
+				      _("\
+Please specify the source encoding through --from-code.")));
 	  exit (EXIT_FAILURE);
 	}
     }
@@ -1920,8 +1951,8 @@ Please specify the source encoding through --from-code.\n"),
   return (char *) string;
 }
 
-#define CONVERT_STRING(string) \
-  string = from_current_source_encoding (string, pos->file_name, \
+#define CONVERT_STRING(string, lcontext) \
+  string = from_current_source_encoding (string, lcontext, pos->file_name, \
 					 pos->line_number);
 
 
@@ -2045,8 +2076,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
   do_wrap = undecided;
 
   if (msgctxt != NULL)
-    CONVERT_STRING (msgctxt);
-  CONVERT_STRING (msgid);
+    CONVERT_STRING (msgctxt, lc_string);
+  CONVERT_STRING (msgid, lc_string);
 
   if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
     {
@@ -2114,7 +2145,7 @@ meta information, not the empty string.\n")));
 	if (s == NULL)
 	  break;
 
-	CONVERT_STRING (s);
+	CONVERT_STRING (s, lc_comment);
 
 	/* To reduce the possibility of unwanted matches we do a two
 	   step match: the line must contain `xgettext:' and one of
@@ -2271,7 +2302,7 @@ remember_a_message_plural (message_ty *mp, char *string,
 
   savable_comment_to_xgettext_comment (comment);
 
-  CONVERT_STRING (msgid_plural);
+  CONVERT_STRING (msgid_plural, lc_string);
 
   /* See if the message is already a plural message.  */
   if (mp->msgid_plural == NULL)
diff --git a/gettext-tools/src/xgettext.h b/gettext-tools/src/xgettext.h
index 570dacd05..0d84c4563 100644
--- a/gettext-tools/src/xgettext.h
+++ b/gettext-tools/src/xgettext.h
@@ -1,5 +1,5 @@
 /* xgettext common functions.
-   Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2005-2006, 2008 Free Software Foundation, Inc.
    Written by Peter Miller <millerp@canb.auug.org.au>
    and Bruno Haible <haible@clisp.cons.org>, 2001.
 
@@ -136,6 +136,21 @@ extern flag_context_list_ty *
 extern void xgettext_record_flag (const char *optionstring);
 
 
+/* Context while building up lexical tokens.  */
+typedef enum
+  {
+    lc_outside, /* Initial context: outside of comments and strings.  */
+    lc_comment, /* Inside a comment.  */
+    lc_string   /* Inside a string literal.  */
+  }
+  lexical_context_ty;
+
+/* Error message about non-ASCII character in a specific lexical context.  */
+extern char *non_ascii_error_message (lexical_context_ty lcontext,
+				      const char *file_name,
+				      size_t line_number);
+
+
 /* Canonicalized encoding name for all input files.  */
 extern const char *xgettext_global_source_encoding;
 
@@ -157,8 +172,10 @@ extern iconv_t xgettext_current_source_iconv;
 /* Convert the given string from xgettext_current_source_encoding to
    the output file encoding (i.e. ASCII or UTF-8).
    The resulting string is either the argument string, or freshly allocated.
-   The file_name and line_number are only used for error message purposes.  */
+   The lcontext, file_name and line_number are only used for error message
+   purposes.  */
 extern char *from_current_source_encoding (const char *string,
+					   lexical_context_ty lcontext,
 					   const char *file_name,
 					   size_t line_number);
 
-- 
2.47.2