Improve error message.

author Bruno Haible <bruno@clisp.org>

Sun, 20 Apr 2008 05:23:52 +0000 (05:23 +0000)

committer Bruno Haible <bruno@clisp.org>

Tue, 23 Jun 2009 10:15:39 +0000 (12:15 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 20 Apr 2008 05:23:52 +0000 (05:23 +0000)
committer Bruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:15:39 +0000 (12:15 +0200)
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index 7053937ed45a68d047849eeeb9ec5d2f313f903c..bc89c78708064993a02497775b5989c5a58e8e1d 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,37 @@
+2008-04-20  Bruno Haible  <bruno@clisp.org>
+
+       Improve error message.
+       * xgettext.h (lexical_context_ty): New type.
+       (non_ascii_error_message): New declaration.
+       (from_current_source_encoding): Add lcontext argument.
+       * xgettext.c (non_ascii_error_message): New function.
+       (from_current_source_encoding): Add lcontext argument. Use
+       non_ascii_error_message.
+       (CONVERT_STRING): Add lcontext argument.
+       (remember_a_message, remember_a_message_plural): Update.
+       * x-csharp.c (lexical_context): New variable.
+       (phase2_getc): Use non_ascii_error_message.
+       (comment_start, comment_line_end, phase6_get): Set lexical_context.
+       (extract_csharp): Initialize lexical_context.
+       * x-java.c (struct string_buffer): Add lcontext field.
+       (init_string_buffer): Add lcontext argument.
+       (string_buffer_flush_curr_buffer): Update from_current_source_encoding
+       call.
+       (comment_start): Set lcontext.
+       (phase5_get): Pass lcontext argument.
+       * x-perl.c (get_here_document, phase2_getc,
+       extract_quotelike_pass1_utf8): Pass lcontext argument.
+       * x-python.c (lexical_context): New variable.
+       (phase2_getc): Use non_ascii_error_message.
+       (comment_start, comment_line_end, phase6_get): Set lexical_context.
+       (struct mixed_string_buffer): Add lcontext field.
+       (init_mixed_string_buffer): Add lcontext argument.
+       (mixed_string_buffer_flush_curr_buffer): Update
+       from_current_source_encoding call.
+       (phase5_get): Set lexical_context. Pass lcontext argument.
+       (extract_python): Initialize lexical_context.
+       Reported by Mark Weyer.
+
  2008-04-20  Bruno Haible  <bruno@clisp.org>
  
         * xgettext.c (usage): Clarify single-letter options that take an
diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c

index 362c84eeda6687d180026b9613e4148923d88216..9c73fc8ca7f94ff8ed6a7fed562e6ac8802866ae 100644 (file)
--- a/gettext-tools/src/x-csharp.c
+++ b/gettext-tools/src/x-csharp.c
@@ -1,5 +1,5 @@
  /* xgettext C# backend.
-   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
     Written by Bruno Haible <bruno@clisp.org>, 2003.
  
     This program is free software: you can redistribute it and/or modify
@@ -198,6 +198,8 @@ phase1_ungetc (int c)
  /* Newline Unicode character.  */
  #define UNL 0x000a
  
+static lexical_context_ty lexical_context;
+
  static int phase2_pushback[1];
  static int phase2_pushback_length;
  
@@ -215,13 +217,13 @@ phase2_getc ()
         return UEOF;
        if (!c_isascii (c))
         {
-         char buffer[21];
-         sprintf (buffer, ":%ld", (long) line_number);
           multiline_error (xstrdup (""),
-                          xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
-                          real_file_name, buffer));
+                          xasprintf ("%s\n%s\n",
+                                     non_ascii_error_message (lexical_context,
+                                                              real_file_name,
+                                                              line_number),
+                                     _("\
+Please specify the source encoding through --from-code.")));
           exit (EXIT_FAILURE);
         }
        return c;
@@ -595,6 +597,7 @@ static struct string_buffer comment_buffer;
  static inline void
  comment_start ()
  {
+  lexical_context = lc_comment;
    comment_buffer.utf8_buflen = 0;
  }
  
@@ -622,6 +625,7 @@ comment_line_end (size_t chars_to_remove)
      --buflen;
    buffer[buflen] = '\0';
    savable_comment_add (buffer);
+  lexical_context = lc_outside;
  }
  
  
@@ -1636,11 +1640,13 @@ phase6_get (token_ty *tp)
           {
             struct string_buffer literal;
  
+           lexical_context = lc_string;
             init_string_buffer (&literal);
             accumulate_escaped (&literal, '"');
             tp->string = xstrdup (string_buffer_result (&literal));
             free_string_buffer (&literal);
             tp->comment = add_reference (savable_comment);
+           lexical_context = lc_outside;
             tp->type = token_type_string_literal;
             return;
           }
@@ -1680,6 +1686,7 @@ phase6_get (token_ty *tp)
               /* Verbatim string literal.  */
               struct string_buffer literal;
  
+             lexical_context = lc_string;
               init_string_buffer (&literal);
               for (;;)
                 {
@@ -1703,6 +1710,7 @@ phase6_get (token_ty *tp)
               tp->string = xstrdup (string_buffer_result (&literal));
               free_string_buffer (&literal);
               tp->comment = add_reference (savable_comment);
+             lexical_context = lc_outside;
               tp->type = token_type_string_literal;
               return;
             }
@@ -2118,6 +2126,8 @@ extract_csharp (FILE *f,
    logical_file_name = xstrdup (logical_filename);
    line_number = 1;
  
+  lexical_context = lc_outside;
+
    logical_line_number = 1;
    last_comment_line = -1;
    last_non_comment_line = -1;
diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c

index 75481ba422cbfbd7f4d8022f13fff3258cea28aa..ae32e64b4afe48445e3c0c5f278b39c64304b74f 100644 (file)
--- a/gettext-tools/src/x-java.c
+++ b/gettext-tools/src/x-java.c
@@ -1,5 +1,5 @@
  /* xgettext Java backend.
-   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005-2008 Free Software Foundation, Inc.
     Written by Bruno Haible <bruno@clisp.org>, 2003.
  
     This program is free software: you can redistribute it and/or modify
@@ -401,11 +401,13 @@ struct string_buffer
    char *curr_buffer;
    size_t curr_buflen;
    size_t curr_allocated;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
  };
  
  /* Initialize a 'struct string_buffer' to empty.  */
  static inline void
-init_string_buffer (struct string_buffer *bp)
+init_string_buffer (struct string_buffer *bp, lexical_context_ty lcontext)
  {
    bp->utf8_buffer = NULL;
    bp->utf8_buflen = 0;
@@ -414,6 +416,7 @@ init_string_buffer (struct string_buffer *bp)
    bp->curr_buffer = NULL;
    bp->curr_buflen = 0;
    bp->curr_allocated = 0;
+  bp->lcontext = lcontext;
  }
  
  /* Auxiliary function: Append a byte to bp->curr.  */
@@ -518,7 +521,7 @@ string_buffer_flush_curr_buffer (struct string_buffer *bp, int lineno)
        string_buffer_append_byte (bp, '\0');
  
        /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer,
+      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
                                            logical_file_name, lineno);
  
        /* Append it to bp->utf8_buffer.  */
@@ -625,6 +628,7 @@ comment_start ()
    comment_buffer.utf8_buflen = 0;
    comment_buffer.utf16_surr = 0;
    comment_buffer.curr_buflen = 0;
+  comment_buffer.lcontext = lc_comment;
  }
  
  static inline bool
@@ -1042,7 +1046,7 @@ phase5_get (token_ty *tp)
           {
             struct string_buffer literal;
  
-           init_string_buffer (&literal);
+           init_string_buffer (&literal, lc_string);
             accumulate_escaped (&literal, '"');
             tp->string = xstrdup (string_buffer_result (&literal));
             free_string_buffer (&literal);
@@ -1056,7 +1060,7 @@ phase5_get (token_ty *tp)
           {
             struct string_buffer literal;
  
-           init_string_buffer (&literal);
+           init_string_buffer (&literal, lc_outside);
             accumulate_escaped (&literal, '\'');
             free_string_buffer (&literal);
             tp->type = token_type_other;
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c

index 26ff5124c6f6868777c64f99943b538fe5c2bc99..d468e0d2b6524c2820abd874831f351ccb89f4d5 100644 (file)
--- a/gettext-tools/src/x-perl.c
+++ b/gettext-tools/src/x-perl.c
@@ -1,5 +1,5 @@
  /* xgettext Perl backend.
-   Copyright (C) 2002-2007 Free Software Foundation, Inc.
+   Copyright (C) 2002-2008 Free Software Foundation, Inc.
  
     This file was written by Guido Flohr <guido@imperia.net>, 2002-2003.
  
@@ -317,7 +317,7 @@ get_here_document (const char *delimiter)
  
        /* Convert to UTF-8.  */
        my_line_utf8 =
-       from_current_source_encoding (my_linebuf, logical_file_name,
+       from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
                                       line_number + here_eaten);
        if (my_line_utf8 != my_linebuf)
         {
@@ -466,7 +466,8 @@ phase2_getc ()
        buffer[buflen] = '\0';
        /* Convert it to UTF-8.  */
        utf8_string =
-       from_current_source_encoding (buffer, logical_file_name, lineno);
+       from_current_source_encoding (buffer, lc_comment, logical_file_name,
+                                     lineno);
        /* Save it until we encounter the corresponding string.  */
        savable_comment_add (utf8_string);
        last_comment_line = lineno;
@@ -735,7 +736,8 @@ extract_quotelike_pass1_utf8 (int delim)
  {
    char *string = extract_quotelike_pass1 (delim);
    char *utf8_string =
-    from_current_source_encoding (string, logical_file_name, line_number);
+    from_current_source_encoding (string, lc_string, logical_file_name,
+                                 line_number);
    if (utf8_string != string)
      free (string);
    return utf8_string;
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c

index 8597a41353f211e2c0fa57d229de6c42347a5d7d..e6dcc3190ce307535377c9cf7611d661956a838f 100644 (file)
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -1,5 +1,5 @@
  /* xgettext Python backend.
-   Copyright (C) 2002-2003, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2002-2003, 2005-2008 Free Software Foundation, Inc.
  
     This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
  
@@ -210,6 +210,8 @@ phase1_ungetc (int c)
  /* End-of-file indicator for functions returning an UCS-4 character.  */
  #define UEOF -1
  
+static lexical_context_ty lexical_context;
+
  static int phase2_pushback[max (9, UNINAME_MAX + 3)];
  static int phase2_pushback_length;
  
@@ -227,14 +229,14 @@ phase2_getc ()
         return UEOF;
        if (!c_isascii (c))
         {
-         char buffer[21];
-         sprintf (buffer, ":%ld", (long) line_number);
           multiline_error (xstrdup (""),
-                          xasprintf (_("\
-Non-ASCII string at %s%s.\n\
+                          xasprintf ("%s\n%s\n",
+                                     non_ascii_error_message (lexical_context,
+                                                              real_file_name,
+                                                              line_number),
+                                     _("\
  Please specify the source encoding through --from-code or through a comment\n\
-as specified in http://www.python.org/peps/pep-0263.html.\n"),
-                          real_file_name, buffer));
+as specified in http://www.python.org/peps/pep-0263.html.\n")));
           exit (EXIT_FAILURE);
         }
        return c;
@@ -539,6 +541,7 @@ static struct unicode_string_buffer comment_buffer;
  static inline void
  comment_start ()
  {
+  lexical_context = lc_comment;
    comment_buffer.utf8_buflen = 0;
  }
  
@@ -565,6 +568,7 @@ comment_line_end ()
      --buflen;
    buffer[buflen] = '\0';
    savable_comment_add (buffer);
+  lexical_context = lc_outside;
    return buffer;
  }
  
@@ -793,11 +797,13 @@ struct mixed_string_buffer
    char *curr_buffer;
    size_t curr_buflen;
    size_t curr_allocated;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
  };
  
  /* Initialize a 'struct mixed_string_buffer' to empty.  */
  static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp)
+init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
  {
    bp->utf8_buffer = NULL;
    bp->utf8_buflen = 0;
@@ -806,6 +812,7 @@ init_mixed_string_buffer (struct mixed_string_buffer *bp)
    bp->curr_buffer = NULL;
    bp->curr_buflen = 0;
    bp->curr_allocated = 0;
+  bp->lcontext = lcontext;
  }
  
  /* Auxiliary function: Append a byte to bp->curr.  */
@@ -875,7 +882,7 @@ mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int linen
        mixed_string_buffer_append_byte (bp, '\0');
  
        /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer,
+      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
                                            logical_file_name, lineno);
  
        /* Append it to bp->utf8_buffer.  */
@@ -1510,6 +1517,7 @@ phase5_get (token_ty *tp)
               interpret_unicode = false;
             string:
               triple = false;
+             lexical_context = lc_string;
               {
                 int c1 = phase2_getc ();
                 if (c1 == quote_char)
@@ -1528,7 +1536,7 @@ phase5_get (token_ty *tp)
               }
               backslash_counter = 0;
               /* Start accumulating the string.  */
-             init_mixed_string_buffer (&literal);
+             init_mixed_string_buffer (&literal, lc_string);
               for (;;)
                 {
                   int uc = phase7_getuc (quote_char, triple, interpret_ansic,
@@ -1546,6 +1554,7 @@ phase5_get (token_ty *tp)
               tp->string = xstrdup (mixed_string_buffer_result (&literal));
               free_mixed_string_buffer (&literal);
               tp->comment = add_reference (savable_comment);
+             lexical_context = lc_outside;
               tp->type = token_type_string;
               return;
           }
@@ -1788,6 +1797,8 @@ extract_python (FILE *f,
    logical_file_name = xstrdup (logical_filename);
    line_number = 1;
  
+  lexical_context = lc_outside;
+
    last_comment_line = -1;
    last_non_comment_line = -1;
  
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index dc7af28789b0882bf52086ea8f4c7beac7052210..c9d4c2ae163b131ee259fd37ed6cd769f1312c9d 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -1871,29 +1871,60 @@ extract_from_file (const char *file_name, extractor_ty extractor,
  
  
  
+/* Error message about non-ASCII character in a specific lexical context.  */
+char *
+non_ascii_error_message (lexical_context_ty lcontext,
+                        const char *file_name, size_t line_number)
+{
+  char buffer[21];
+  char *errmsg;
+
+  if (line_number == (size_t)(-1))
+    buffer[0] = '\0';
+  else
+    sprintf (buffer, ":%ld", (long) line_number);
+
+  switch (lcontext)
+    {
+    case lc_outside:
+      errmsg =
+       xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
+      break;
+    case lc_comment:
+      errmsg =
+       xasprintf (_("Non-ASCII comment at or before %s%s."),
+                  file_name, buffer);
+      break;
+    case lc_string:
+      errmsg =
+       xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
+      break;
+    default:
+      abort ();
+    }
+  return errmsg;
+}
+
  /* Convert the given string from xgettext_current_source_encoding to
     the output file encoding (i.e. ASCII or UTF-8).
     The resulting string is either the argument string, or freshly allocated.
     The file_name and line_number are only used for error message purposes.  */
  char *
  from_current_source_encoding (const char *string,
+                             lexical_context_ty lcontext,
                               const char *file_name, size_t line_number)
  {
    if (xgettext_current_source_encoding == po_charset_ascii)
      {
        if (!is_ascii_string (string))
         {
-         char buffer[21];
-
-         if (line_number == (size_t)(-1))
-           buffer[0] = '\0';
-         else
-           sprintf (buffer, ":%ld", (long) line_number);
           multiline_error (xstrdup (""),
-                          xasprintf (_("\
-Non-ASCII string at %s%s.\n\
-Please specify the source encoding through --from-code.\n"),
-                                     file_name, buffer));
+                          xasprintf ("%s\n%s\n",
+                                     non_ascii_error_message (lcontext,
+                                                              file_name,
+                                                              line_number),
+                                     _("\
+Please specify the source encoding through --from-code.")));
           exit (EXIT_FAILURE);
         }
      }
@@ -1920,8 +1951,8 @@ Please specify the source encoding through --from-code.\n"),
    return (char *) string;
  }
  
-#define CONVERT_STRING(string) \
-  string = from_current_source_encoding (string, pos->file_name, \
+#define CONVERT_STRING(string, lcontext) \
+  string = from_current_source_encoding (string, lcontext, pos->file_name, \
                                          pos->line_number);
  
  
@@ -2045,8 +2076,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
    do_wrap = undecided;
  
    if (msgctxt != NULL)
-    CONVERT_STRING (msgctxt);
-  CONVERT_STRING (msgid);
+    CONVERT_STRING (msgctxt, lc_string);
+  CONVERT_STRING (msgid, lc_string);
  
    if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
      {
@@ -2114,7 +2145,7 @@ meta information, not the empty string.\n")));
         if (s == NULL)
           break;
  
-       CONVERT_STRING (s);
+       CONVERT_STRING (s, lc_comment);
  
         /* To reduce the possibility of unwanted matches we do a two
            step match: the line must contain `xgettext:' and one of
@@ -2271,7 +2302,7 @@ remember_a_message_plural (message_ty *mp, char *string,
  
    savable_comment_to_xgettext_comment (comment);
  
-  CONVERT_STRING (msgid_plural);
+  CONVERT_STRING (msgid_plural, lc_string);
  
    /* See if the message is already a plural message.  */
    if (mp->msgid_plural == NULL)
diff --git a/gettext-tools/src/xgettext.h b/gettext-tools/src/xgettext.h

index 570dacd05c5f6a1252f8d7162453e24858d33f17..0d84c45631b80c709137fcf3c096d6c832d2ad38 100644 (file)
--- a/gettext-tools/src/xgettext.h
+++ b/gettext-tools/src/xgettext.h
@@ -1,5 +1,5 @@
  /* xgettext common functions.
-   Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2005-2006, 2008 Free Software Foundation, Inc.
     Written by Peter Miller <millerp@canb.auug.org.au>
     and Bruno Haible <haible@clisp.cons.org>, 2001.
  
@@ -136,6 +136,21 @@ extern flag_context_list_ty *
  extern void xgettext_record_flag (const char *optionstring);
  
  
+/* Context while building up lexical tokens.  */
+typedef enum
+  {
+    lc_outside, /* Initial context: outside of comments and strings.  */
+    lc_comment, /* Inside a comment.  */
+    lc_string   /* Inside a string literal.  */
+  }
+  lexical_context_ty;
+
+/* Error message about non-ASCII character in a specific lexical context.  */
+extern char *non_ascii_error_message (lexical_context_ty lcontext,
+                                     const char *file_name,
+                                     size_t line_number);
+
+
  /* Canonicalized encoding name for all input files.  */
  extern const char *xgettext_global_source_encoding;
  
@@ -157,8 +172,10 @@ extern iconv_t xgettext_current_source_iconv;
  /* Convert the given string from xgettext_current_source_encoding to
     the output file encoding (i.e. ASCII or UTF-8).
     The resulting string is either the argument string, or freshly allocated.
-   The file_name and line_number are only used for error message purposes.  */
+   The lcontext, file_name and line_number are only used for error message
+   purposes.  */
  extern char *from_current_source_encoding (const char *string,
+                                          lexical_context_ty lcontext,
                                            const char *file_name,
                                            size_t line_number);
author	Bruno Haible <bruno@clisp.org>
	Sun, 20 Apr 2008 05:23:52 +0000 (05:23 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 23 Jun 2009 10:15:39 +0000 (12:15 +0200)
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/x-csharp.c		patch \| blob \| blame \| history
gettext-tools/src/x-java.c		patch \| blob \| blame \| history
gettext-tools/src/x-perl.c		patch \| blob \| blame \| history
gettext-tools/src/x-python.c		patch \| blob \| blame \| history
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/src/xgettext.h		patch \| blob \| blame \| history