From: Daiki Ueno <ueno@gnu.org>
Date: Wed, 7 May 2014 09:51:14 +0000 (+0900)
Subject: c: Interpret string literals lazily
X-Git-Tag: v0.19~44
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8e319a8bc7e535c5e2b0475d46aecdec0bf89dbe;p=thirdparty%2Fgettext.git

c: Interpret string literals lazily

* x-c.c (P7_EOF, P7_STRING_END, P7_QUOTES, P7_QUOTE, P7_NEWLINE)
(UNICODE, IS_UNICODE, UNICODE_VALUE): Remove.
(phase7_get): Remove.
(phase7_ungetc): Remove.
(phase5_get): Use 'phase3_get' directly to extract string
literals; use 'arglist_parser_remember_literal' instead of
'arglist_parser_remember'.
(literalstring_parse): New function.
(literalstring_c): New variable.
(extract_parenthesized): Remove the
'xgettext_current_source_encoding' setting to prevent encoding
conversion around 'arglist_parser_done'.
* x-c.h (SCANNERS_C): Register 'literalstring_c' as a
literalstring_parser.
(literalstring_c): New variable declaration.
---

diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 26a4b151a..510a38d16 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,22 @@
+2014-05-09  Daiki Ueno  <ueno@gnu.org>
+
+	c: Interpret string literals lazily
+	* x-c.c (P7_EOF, P7_STRING_END, P7_QUOTES, P7_QUOTE, P7_NEWLINE)
+	(UNICODE, IS_UNICODE, UNICODE_VALUE): Remove.
+	(phase7_get): Remove.
+	(phase7_ungetc): Remove.
+	(phase5_get): Use 'phase3_get' directly to extract string
+	literals; use 'arglist_parser_remember_literal' instead of
+	'arglist_parser_remember'.
+	(literalstring_parse): New function.
+	(literalstring_c): New variable.
+	(extract_parenthesized): Remove the
+	'xgettext_current_source_encoding' setting to prevent encoding
+	conversion around 'arglist_parser_done'.
+	* x-c.h (SCANNERS_C): Register 'literalstring_c' as a
+	literalstring_parser.
+	(literalstring_c): New variable declaration.
+
 2014-05-09  Daiki Ueno  <ueno@gnu.org>
 
 	xgettext: Provide a way to interpret string literals lazily
diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c
index 9050433b5..2ae2f3c9e 100644
--- a/gettext-tools/src/x-c.c
+++ b/gettext-tools/src/x-c.c
@@ -860,228 +860,226 @@ struct token_ty
 };
 
 
-/* 7. Replace escape sequences within character strings with their
-   single character equivalents.  This is called from phase 5, because
-   we don't have to worry about the #include argument.  There are
-   pathological cases which could bite us (like the DOS directory
-   separator), but just pretend it can't happen.  */
-
-/* Return value of phase7_getc when EOF is reached.  */
-#define P7_EOF (-1)
-#define P7_STRING_END (-2)
-
-/* Replace escape sequences within character strings with their single
-   character equivalents.  */
-#define P7_QUOTES (-3)
-#define P7_QUOTE (-4)
-#define P7_NEWLINE (-5)
-
-/* Convert an UTF-16 or UTF-32 code point to a return value that can be
-   distinguished from a single-byte return value.  */
-#define UNICODE(code) (0x100 + (code))
-
-/* Test a return value of phase7_getuc whether it designates an UTF-16 or
-   UTF-32 code point.  */
-#define IS_UNICODE(p7_result) ((p7_result) >= 0x100)
-
-/* Extract the UTF-16 or UTF-32 code of a return value that satisfies
-   IS_UNICODE.  */
-#define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_name || tp->type == token_type_string_literal)
+    free (tp->string);
+  if (tp->type == token_type_string_literal
+      || tp->type == token_type_objc_special)
+    drop_reference (tp->comment);
+}
 
 
-static int
-phase7_getc ()
+static char *
+literalstring_parse (const char *string, lex_pos_ty *pos,
+                     enum literalstring_escape_type type)
 {
-  int c, n, j;
+  struct mixed_string_buffer *bp;
+  const char *p;
 
-  /* Use phase 3, because phase 4 elides comments.  */
-  c = phase3_getc ();
+  /* Start accumulating the string.  */
+  bp = mixed_string_buffer_alloc (lc_string,
+                                  logical_file_name,
+                                  line_number);
 
-  /* Return a magic newline indicator, so that we can distinguish
-     between the user requesting a newline in the string (e.g. using
-     "\n" or "\012") from the user failing to terminate the string or
-     character constant.  The ANSI C standard says: 3.1.3.4 Character
-     Constants contain "any character except single quote, backslash or
-     newline; or an escape sequence" and 3.1.4 String Literals contain
-     "any character except double quote, backslash or newline; or an
-     escape sequence".
-
-     Most compilers give a fatal error in this case, however gcc is
-     stupidly silent, even though this is a very common typo.  OK, so
-     "gcc --pedantic" will tell me, but that gripes about too much other
-     stuff.  Could I have a "gcc -Wnewline-in-string" option, or
-     better yet a "gcc -fno-newline-in-string" option, please?  Gcc is
-     also inconsistent between string literals and character constants:
-     you may not embed newlines in character constants; try it, you get
-     a useful diagnostic.  --PMiller  */
-  if (c == '\n')
-    return P7_NEWLINE;
-
-  if (c == '"')
-    return P7_QUOTES;
-  if (c == '\'')
-    return P7_QUOTE;
-  if (c != '\\')
-    return c;
-  c = phase3_getc ();
-  switch (c)
+  for (p = string; *p != '\0'; p++)
     {
-    default:
-      /* Unknown escape sequences really should be an error, but just
-         ignore them, and let the real compiler complain.  */
-      phase3_ungetc (c);
-      return '\\';
-
-    case '"':
-    case '\'':
-    case '?':
-    case '\\':
-      return c;
+      int c;
 
-    case 'a':
-      return '\a';
-    case 'b':
-      return '\b';
-
-      /* The \e escape is preculiar to gcc, and assumes an ASCII
-         character set (or superset).  We don't provide support for it
-         here.  */
-
-    case 'f':
-      return '\f';
-    case 'n':
-      return '\n';
-    case 'r':
-      return '\r';
-    case 't':
-      return '\t';
-    case 'v':
-      return '\v';
-
-    case 'x':
-      c = phase3_getc ();
-      switch (c)
+      if (*p != '\\')
         {
-        default:
-          phase3_ungetc (c);
-          phase3_ungetc ('x');
-          return '\\';
-
-        case '0': case '1': case '2': case '3': case '4':
-        case '5': case '6': case '7': case '8': case '9':
-        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-          break;
-        }
-      n = 0;
-      for (;;)
-        {
-          switch (c)
-            {
-            default:
-              phase3_ungetc (c);
-              return n;
-
-            case '0': case '1': case '2': case '3': case '4':
-            case '5': case '6': case '7': case '8': case '9':
-              n = n * 16 + c - '0';
-              break;
-
-            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-              n = n * 16 + 10 + c - 'A';
-              break;
-
-            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-              n = n * 16 + 10 + c - 'a';
-              break;
-            }
-          c = phase3_getc ();
+          mixed_string_buffer_append_char (bp, *p);
+          continue;
         }
-      return n;
 
-    case '0': case '1': case '2': case '3':
-    case '4': case '5': case '6': case '7':
-      n = 0;
-      for (j = 0; j < 3; ++j)
+      if (!(type & LET_ANSI_C) && !(type & LET_UNICODE))
         {
-          n = n * 8 + c - '0';
-          c = phase3_getc ();
-          switch (c)
-            {
-            default:
-              break;
-
-            case '0': case '1': case '2': case '3':
-            case '4': case '5': case '6': case '7':
-              continue;
-            }
-          break;
+          mixed_string_buffer_append_char (bp, '\\');
+          continue;
         }
-      phase3_ungetc (c);
-      return n;
 
-    case 'U': case 'u':
-      {
-        unsigned char buf[8];
+      c = *++p;
 
-        n = 0;
-        for (j = 0; j < (c == 'u' ? 4 : 8); j++)
+      if (type & LET_ANSI_C)
+        switch (c)
           {
-            int c1 = phase3_getc ();
-
-            if (c1 >= '0' && c1 <= '9')
-              n = (n << 4) + (c1 - '0');
-            else if (c1 >= 'A' && c1 <= 'F')
-              n = (n << 4) + (c1 - 'A' + 10);
-            else if (c1 >= 'a' && c1 <= 'f')
-              n = (n << 4) + (c1 - 'a' + 10);
-            else
+          case '"':
+          case '\'':
+          case '?':
+          case '\\':
+            mixed_string_buffer_append_char (bp, c);
+            continue;
+
+          case 'a':
+            mixed_string_buffer_append_char (bp, '\a');
+            continue;
+          case 'b':
+            mixed_string_buffer_append_char (bp, '\b');
+            continue;
+
+            /* The \e escape is preculiar to gcc, and assumes an ASCII
+               character set (or superset).  We don't provide support for it
+               here.  */
+
+          case 'f':
+            mixed_string_buffer_append_char (bp, '\f');
+            continue;
+          case 'n':
+            mixed_string_buffer_append_char (bp, '\n');
+            continue;
+          case 'r':
+            mixed_string_buffer_append_char (bp, '\r');
+            continue;
+          case 't':
+            mixed_string_buffer_append_char (bp, '\t');
+            continue;
+          case 'v':
+            mixed_string_buffer_append_char (bp, '\v');
+            continue;
+
+          case 'x':
+            c = *++p;
+            switch (c)
               {
-                phase3_ungetc (c1);
-                while (--j >= 0)
-                  phase3_ungetc (buf[j]);
-                phase3_ungetc (c);
-                return '\\';
+              default:
+                mixed_string_buffer_append_char (bp, '\\');
+                mixed_string_buffer_append_char (bp, 'x');
+                mixed_string_buffer_append_char (bp, c);
+                break;
+
+              case '0': case '1': case '2': case '3': case '4':
+              case '5': case '6': case '7': case '8': case '9':
+              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                {
+                  int n;
+
+                  for (n = 0; ; ++p)
+                    {
+                      switch (*p)
+                        {
+                        default:
+                          break;
+
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                          n = n * 16 + *p - '0';
+                          continue;
+
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                          n = n * 16 + 10 + *p - 'A';
+                          continue;
+
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                          n = n * 16 + 10 + *p - 'a';
+                          continue;
+                        }
+                      break;
+                    }
+
+                  mixed_string_buffer_append_char (bp, n);
+                  --p;
+                }
+                break;
               }
+            continue;
+
+          case '0': case '1': case '2': case '3':
+          case '4': case '5': case '6': case '7':
+            {
+              int n, j;
+
+              for (n = 0, j = 0; j < 3; ++j)
+                {
+                  n = n * 8 + c - '0';
+                  switch (*++p)
+                    {
+                    default:
+                      break;
+
+                    case '0': case '1': case '2': case '3':
+                    case '4': case '5': case '6': case '7':
+                      continue;
+                    }
+                  break;
+                }
 
-            buf[j] = c1;
+              mixed_string_buffer_append_char (bp, n);
+              --p;
+            }
+            continue;
           }
 
-        if (n < 0x110000)
-          return UNICODE (n);
+      if (type & LET_UNICODE)
+        switch (c)
+          {
+          case 'U': case 'u':
+            {
+              unsigned char buf[8];
+              int length = c == 'u' ? 4 : 8;
+              int n, j;
 
-        error_with_progname = false;
-        error (0, 0, _("%s:%d: warning: invalid Unicode character"),
-               logical_file_name, line_number);
-        error_with_progname = true;
+              for (n = 0, j = 0; j < length; j++)
+                {
+                  int c1 = *++p;
+
+                  if (c1 >= '0' && c1 <= '9')
+                    n = (n << 4) + (c1 - '0');
+                  else if (c1 >= 'A' && c1 <= 'F')
+                    n = (n << 4) + (c1 - 'A' + 10);
+                  else if (c1 >= 'a' && c1 <= 'f')
+                    n = (n << 4) + (c1 - 'a' + 10);
+                  else
+                    break;
+
+                  buf[j] = c1;
+                }
 
-        while (--j >= 0)
-          phase3_ungetc (buf[j]);
-        phase3_ungetc (c);
-        return '\\';
-      }
-    }
-}
+              if (j == length)
+                {
+                  if (n < 0x110000)
+                    mixed_string_buffer_append_unicode (bp, n);
+                  else
+                    {
+                      error_with_progname = false;
+                      error_at_line (0, 0,
+                                     pos->file_name, pos->line_number,
+                                     _("\
+warning: invalid Unicode character"));
+                      error_with_progname = true;
+                    }
+                }
+              else
+                {
+                  int i;
 
+                  mixed_string_buffer_append_char (bp, '\\');
+                  mixed_string_buffer_append_char (bp, c);
 
-static void
-phase7_ungetc (int c)
-{
-  phase3_ungetc (c);
-}
+                  for (i = 0; i < j; i++)
+                    mixed_string_buffer_append_char (bp, buf[i]);
 
+                  --p;
+                }
+            }
+            continue;
+          }
 
-/* Free the memory pointed to by a 'struct token_ty'.  */
-static inline void
-free_token (token_ty *tp)
-{
-  if (tp->type == token_type_name || tp->type == token_type_string_literal)
-    free (tp->string);
-  if (tp->type == token_type_string_literal
-      || tp->type == token_type_objc_special)
-    drop_reference (tp->comment);
+      mixed_string_buffer_append_char (bp, c);
+    }
+
+  return mixed_string_buffer_done (bp);
 }
 
+struct literalstring_parser literalstring_c =
+  {
+    literalstring_parse
+  };
+
 
 /* 5. Parse each resulting logical line as preprocessing tokens and
    white space.  Preprocessing tokens and C tokens don't always match.  */
@@ -1097,6 +1095,7 @@ phase5_get (token_ty *tp)
   static int bufmax;
   int bufpos;
   int c;
+  int last_was_backslash;
 
   if (phase5_pushback_length)
     {
@@ -1276,19 +1275,30 @@ phase5_get (token_ty *tp)
          but ignoring it has no effect unless one of the keywords is
          "L".  Just pretend it won't happen.  Also, we don't need to
          remember the character constant.  */
+      last_was_backslash = false;
       for (;;)
         {
-          c = phase7_getc ();
-          if (c == P7_NEWLINE)
+          c = phase3_getc ();
+          if (last_was_backslash)
             {
+              last_was_backslash = false;
+              continue;
+            }
+          switch (c)
+            {
+            case '\\':
+              last_was_backslash = true;
+              continue;
+            case '\n':
               error_with_progname = false;
               error (0, 0, _("%s:%d: warning: unterminated character constant"),
                      logical_file_name, line_number - 1);
               error_with_progname = true;
-              phase7_ungetc ('\n');
+              phase3_ungetc ('\n');
+              break;
+            case EOF: case '\'':
               break;
             }
-          if (c == EOF || c == P7_QUOTE)
             break;
         }
       tp->type = token_type_character_constant;
@@ -1296,49 +1306,55 @@ phase5_get (token_ty *tp)
 
     case '"':
       {
-        struct mixed_string_buffer *bp;
-
-        /* Start accumulating the string.  */
-        bp = mixed_string_buffer_alloc (lc_string,
-                                        logical_file_name,
-                                        line_number);
-
         /* We could worry about the 'L' before wide string constants,
            but since gettext's argument is not a wide character string,
            let the compiler complain about the argument not matching the
            prototype.  Just pretend it won't happen.  */
+        last_was_backslash = false;
+        bufpos = 0;
         for (;;)
           {
-            c = phase7_getc ();
-
-            /* Keep line_number in sync.  */
-            bp->line_number = line_number;
-
-            if (c == P7_NEWLINE)
+            c = phase3_getc ();
+            if (last_was_backslash)
+              {
+                last_was_backslash = false;
+                if (bufpos >= bufmax)
+                  {
+                    bufmax = 2 * bufmax + 10;
+                    buffer = xrealloc (buffer, bufmax);
+                  }
+                buffer[bufpos++] = c;
+                continue;
+              }
+            switch (c)
               {
+              case '\\':
+                last_was_backslash = true;
+                /* FALLTHROUGH */
+              default:
+                if (bufpos >= bufmax)
+                  {
+                    bufmax = 2 * bufmax + 10;
+                    buffer = xrealloc (buffer, bufmax);
+                  }
+                buffer[bufpos++] = c;
+                continue;
+
+              case '\n':
                 error_with_progname = false;
                 error (0, 0, _("%s:%d: warning: unterminated string literal"),
                        logical_file_name, line_number - 1);
                 error_with_progname = true;
-                phase7_ungetc ('\n');
+                phase3_ungetc ('\n');
+                break;
+              case EOF: case '"':
                 break;
               }
-            if (c == EOF || c == P7_QUOTES)
-              break;
-            if (c == P7_QUOTE)
-              c = '\'';
-            if (IS_UNICODE (c))
-              {
-                assert (UNICODE_VALUE (c) >= 0
-                        && UNICODE_VALUE (c) < 0x110000);
-                mixed_string_buffer_append_unicode (bp,
-                                                    UNICODE_VALUE (c));
-              }
-            else
-              mixed_string_buffer_append_char (bp, c);
+            break;
           }
+        buffer[bufpos] = 0;
         tp->type = token_type_string_literal;
-        tp->string = mixed_string_buffer_done (bp);
+        tp->string = xstrdup (buffer);
         tp->comment = add_reference (savable_comment);
         return;
       }
@@ -1914,10 +1930,7 @@ extract_parenthesized (message_list_ty *mlp,
                                      arglist_parser_alloc (mlp,
                                                            state ? next_shapes : NULL)))
             {
-              xgettext_current_source_encoding = po_charset_utf8;
               arglist_parser_done (argparser, arg);
-              xgettext_current_source_encoding =
-                xgettext_global_source_encoding;
               return true;
             }
           next_context_iter = null_context_list_iterator;
@@ -1926,9 +1939,7 @@ extract_parenthesized (message_list_ty *mlp,
           continue;
 
         case xgettext_token_type_rparen:
-          xgettext_current_source_encoding = po_charset_utf8;
           arglist_parser_done (argparser, arg);
-          xgettext_current_source_encoding = xgettext_global_source_encoding;
           return false;
 
         case xgettext_token_type_comma:
@@ -1962,16 +1973,41 @@ extract_parenthesized (message_list_ty *mlp,
           continue;
 
         case xgettext_token_type_string_literal:
-          xgettext_current_source_encoding = po_charset_utf8;
           if (extract_all)
-            remember_a_message (mlp, NULL, token.string, inner_context,
-                                &token.pos, NULL, token.comment);
+            {
+              char *string;
+              refcounted_string_list_ty *comment;
+              const char *encoding;
+
+              string = literalstring_parse (token.string, &token.pos,
+                                            LET_ANSI_C | LET_UNICODE);
+              free (token.string);
+              token.string = string;
+
+              if (token.comment != NULL)
+                {
+                  comment = savable_comment_convert_encoding (token.comment,
+                                                              &token.pos);
+                  drop_reference (token.comment);
+                  token.comment = comment;
+                }
+
+              /* token.string and token.comment are already converted
+                 to UTF-8.  Prevent further conversion in
+                 remember_a_message.  */
+              encoding = xgettext_current_source_encoding;
+              xgettext_current_source_encoding = po_charset_utf8;
+              remember_a_message (mlp, NULL, token.string, inner_context,
+                                  &token.pos, NULL, token.comment);
+              xgettext_current_source_encoding = encoding;
+            }
           else
-            arglist_parser_remember (argparser, arg, token.string,
-                                     inner_context,
-                                     token.pos.file_name, token.pos.line_number,
-                                     token.comment);
-          xgettext_current_source_encoding = xgettext_global_source_encoding;
+            arglist_parser_remember_literal (argparser, arg, token.string,
+                                             inner_context,
+                                             token.pos.file_name,
+                                             token.pos.line_number,
+                                             token.comment,
+                                             LET_ANSI_C | LET_UNICODE);
           drop_reference (token.comment);
           next_context_iter = null_context_list_iterator;
           selectorcall_context_iter = null_context_list_iterator;
@@ -1985,9 +2021,7 @@ extract_parenthesized (message_list_ty *mlp,
           continue;
 
         case xgettext_token_type_eof:
-          xgettext_current_source_encoding = po_charset_utf8;
           arglist_parser_done (argparser, arg);
-          xgettext_current_source_encoding = xgettext_global_source_encoding;
           return true;
 
         default:
diff --git a/gettext-tools/src/x-c.h b/gettext-tools/src/x-c.h
index 28c5b9282..85a31414f 100644
--- a/gettext-tools/src/x-c.h
+++ b/gettext-tools/src/x-c.h
@@ -43,16 +43,20 @@ extern "C" {
 #define SCANNERS_C \
   { "C",                extract_c,                                      \
                         &flag_table_c,                                  \
-                        &formatstring_c, NULL, NULL },                        \
+                        &formatstring_c, NULL,                          \
+                        &literalstring_c },                             \
   { "C++",              extract_c,                                      \
                         &flag_table_c,                                  \
-                        &formatstring_c, NULL, NULL },                        \
+                        &formatstring_c, NULL,                          \
+                        &literalstring_c },                             \
   { "ObjectiveC",       extract_objc,                                   \
                         &flag_table_objc,                               \
-                        &formatstring_c, &formatstring_objc, NULL },          \
+                        &formatstring_c, &formatstring_objc,            \
+                        &literalstring_c },                             \
   { "GCC-source",       extract_c,                                      \
                         &flag_table_gcc_internal,                       \
-                        &formatstring_gcc_internal, &formatstring_gfc_internal, NULL }, \
+                        &formatstring_gcc_internal, &formatstring_gfc_internal, \
+                        &literalstring_c },                             \
 
 /* Scan a C/C++ file and add its translatable strings to mdlp.  */
 extern void extract_c (FILE *fp, const char *real_filename,
@@ -80,6 +84,9 @@ extern void init_flag_table_objc (void);
 extern void init_flag_table_gcc_internal (void);
 
 
+extern DLL_VARIABLE struct literalstring_parser literalstring_c;
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog
index 756871c79..80d19260d 100644
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,9 @@
+2014-05-09  Daiki Ueno  <ueno@gnu.org>
+
+	c: Interpret string literals lazily
+	* xgettext-c-19: New file.
+	* Makefile.am (TESTS): Add new test.
+
 2014-05-03  Daiki Ueno  <ueno@gnu.org>
 
 	tests: Add test for prefixed comment tag
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am
index ba26362a0..077f32157 100644
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -75,7 +75,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
 	xgettext-c-2 xgettext-c-3 xgettext-c-4 xgettext-c-5 \
 	xgettext-c-6 xgettext-c-7 xgettext-c-8 xgettext-c-9 xgettext-c-10 \
 	xgettext-c-11 xgettext-c-12 xgettext-c-13 xgettext-c-14 xgettext-c-15 \
-	xgettext-c-16 xgettext-c-17 xgettext-c-18 \
+	xgettext-c-16 xgettext-c-17 xgettext-c-18 xgettext-c-19 \
 	xgettext-csharp-1 xgettext-csharp-2 xgettext-csharp-3 \
 	xgettext-csharp-4 xgettext-csharp-5 xgettext-csharp-6 \
 	xgettext-csharp-7 \
diff --git a/gettext-tools/tests/xgettext-c-19 b/gettext-tools/tests/xgettext-c-19
new file mode 100755
index 000000000..581a20092
--- /dev/null
+++ b/gettext-tools/tests/xgettext-c-19
@@ -0,0 +1,61 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test C support: mixing ANSI escapes, Unicode escapes, and bare
+# multibyte characters.
+
+cat <<\EOF > xg-c-19.in.c
+/* æåã®ã³ã¡ã³ã */
+"æåã®æå­å";
+
+/* äºçªç®ã®ã³ã¡ã³ã */
+gettext ("äºçªç®ã®æå­å");
+
+/* ä¸çªç®ã®ã³ã¡ã³ã */
+pgettext ("\u30B3\u30F3\u30C6\u30AF\u30B9\u30C8", "\xBB\xB0\xC8\xD6\xCC\xDC\xA4\xCE\xCA\xB8\xBB\xFA\xCE\xF3");
+EOF
+
+: ${ICONV=iconv}
+iconv --from UTF-8 --to EUC-JP < xg-c-19.in.c > xg-c-19.c \
+  || { echo "Skipping test: iconv does not work for EUC-JP"; exit 77; }
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --from-code=EUC-JP --add-comments --no-location \
+	    -o - xg-c-19.c | grep -v 'POT-Creation-Date' > xg-c-19.tmp.po \
+	    || exit 1
+LC_ALL=C tr -d '\r' < xg-c-19.tmp.po > xg-c-19.po || exit 1
+
+cat <<EOF > xg-c-19.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. äºçªç®ã®ã³ã¡ã³ã
+msgid "äºçªç®ã®æå­å"
+msgstr ""
+
+#. ä¸çªç®ã®ã³ã¡ã³ã
+msgctxt "ã³ã³ãã¯ã¹ã"
+msgid "ä¸çªç®ã®æå­å"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-c-19.ok xg-c-19.po
+result=$?
+
+exit $result