xgettext: Delay calls to from_current_source_encoding, part 1.

author Bruno Haible <bruno@clisp.org>

Sun, 4 Nov 2018 19:23:31 +0000 (20:23 +0100)

committer Bruno Haible <bruno@clisp.org>

Sun, 4 Nov 2018 23:25:34 +0000 (00:25 +0100)
author Bruno Haible <bruno@clisp.org>
Sun, 4 Nov 2018 19:23:31 +0000 (20:23 +0100)
committer Bruno Haible <bruno@clisp.org>
Sun, 4 Nov 2018 23:25:34 +0000 (00:25 +0100)
diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c

index fe1c6ebe6cfc5d1a13641a64a0fa8b17b6553368..a8a0b84fa1119d302db905e8f5ee0b15c1c5fa54 100644 (file)
--- a/gettext-tools/src/x-c.c
+++ b/gettext-tools/src/x-c.c
@@ -952,7 +952,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_name, token_type_string_literal */
+  char *string;                         /* for token_type_name */
+  mixed_string_ty *mixed_string;        /* for token_type_string_literal */
    refcounted_string_list_ty *comment;   /* for token_type_string_literal,
                                             token_type_objc_special */
    long number;
@@ -1175,8 +1176,10 @@ phase7_ungetc (int c)
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_name || tp->type == token_type_string_literal)
+  if (tp->type == token_type_name)
      free (tp->string);
+  if (tp->type == token_type_string_literal)
+    mixed_string_free (tp->mixed_string);
    if (tp->type == token_type_string_literal
        || tp->type == token_type_objc_special)
      drop_reference (tp->comment);
@@ -1396,7 +1399,7 @@ phase5_get (token_ty *tp)
                                    if (relevant)
                                      {
                                        tp->type = token_type_string_literal;
-                                      tp->string = mixed_string_buffer_result (&msb);
+                                      tp->mixed_string = mixed_string_buffer_result (&msb);
                                        tp->comment = add_reference (savable_comment);
                                      }
                                    else
@@ -1671,7 +1674,7 @@ phase5_get (token_ty *tp)
                mixed_string_buffer_append_char (&msb, c);
            }
          tp->type = token_type_string_literal;
-        tp->string = mixed_string_buffer_result (&msb);
+        tp->mixed_string = mixed_string_buffer_result (&msb);
          tp->comment = add_reference (savable_comment);
          return;
        }
@@ -1838,13 +1841,13 @@ phase6_get (token_ty *tp)
            && buf[1].type == token_type_number
            && buf[2].type == token_type_string_literal)
          {
-          logical_file_name = xstrdup (buf[2].string);
+          logical_file_name = mixed_string_contents (buf[2].mixed_string);
            line_number = buf[1].number;
          }
        if (bufpos >= 2 && buf[0].type == token_type_number
            && buf[1].type == token_type_string_literal)
          {
-          logical_file_name = xstrdup (buf[1].string);
+          logical_file_name = mixed_string_contents (buf[1].mixed_string);
            line_number = buf[0].number;
          }
  
@@ -1922,7 +1925,9 @@ phase8a_get (token_ty *tp)
        /* Turn PRIdXXX into "<PRIdXXX>".  */
        char *new_string = xasprintf ("<%s>", tp->string);
        free (tp->string);
-      tp->string = new_string;
+      tp->mixed_string =
+        mixed_string_alloc_utf8 (new_string, lc_string,
+                                 logical_file_name, line_number);
        tp->comment = add_reference (savable_comment);
        tp->type = token_type_string_literal;
      }
@@ -2016,7 +2021,6 @@ phase8_get (token_ty *tp)
    for (;;)
      {
        token_ty tmp;
-      size_t len;
  
        phase8c_get (&tmp);
        if (tmp.type != token_type_string_literal)
@@ -2024,9 +2028,8 @@ phase8_get (token_ty *tp)
            phase8c_unget (&tmp);
            return;
          }
-      len = strlen (tp->string);
-      tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
-      strcpy (tp->string + len, tmp.string);
+      tp->mixed_string =
+        mixed_string_concat_free1 (tp->mixed_string, tmp.mixed_string);
        free_token (&tmp);
      }
  }
@@ -2057,16 +2060,18 @@ struct xgettext_token_ty
    /* This field is used only for xgettext_token_type_keyword.  */
    const struct callshapes *shapes;
  
-  /* This field is used only for xgettext_token_type_string_literal,
-     xgettext_token_type_keyword, xgettext_token_type_symbol.  */
+  /* This field is used only for xgettext_token_type_keyword,
+     xgettext_token_type_symbol.  */
    char *string;
  
+  /* This field is used only for xgettext_token_type_string_literal.  */
+  mixed_string_ty *mixed_string;
+
    /* This field is used only for xgettext_token_type_string_literal.  */
    refcounted_string_list_ty *comment;
  
-  /* These fields are only for
-       xgettext_token_type_keyword,
-       xgettext_token_type_string_literal.  */
+  /* This field is used only for xgettext_token_type_keyword,
+     xgettext_token_type_string_literal.  */
    lex_pos_ty pos;
  };
  
@@ -2135,7 +2140,7 @@ x_c_lex (xgettext_token_ty *tp)
            last_non_comment_line = newline_count;
  
            tp->type = xgettext_token_type_string_literal;
-          tp->string = token.string;
+          tp->mixed_string = token.mixed_string;
            tp->comment = token.comment;
            tp->pos.file_name = logical_file_name;
            tp->pos.line_number = token.line_number;
@@ -2295,17 +2300,22 @@ extract_parenthesized (message_list_ty *mlp,
            continue;
  
          case xgettext_token_type_string_literal:
-          xgettext_current_source_encoding = po_charset_utf8;
-          if (extract_all)
-            remember_a_message (mlp, NULL, token.string, inner_context,
-                                &token.pos, NULL, token.comment);
-          else
-            arglist_parser_remember (argparser, arg, token.string,
-                                     inner_context,
-                                     token.pos.file_name, token.pos.line_number,
-                                     token.comment);
-          xgettext_current_source_encoding = xgettext_global_source_encoding;
-          drop_reference (token.comment);
+          {
+            char *string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+            xgettext_current_source_encoding = po_charset_utf8;
+            if (extract_all)
+              remember_a_message (mlp, NULL, string, inner_context,
+                                  &token.pos, NULL, token.comment);
+            else
+              arglist_parser_remember (argparser, arg, string,
+                                       inner_context,
+                                       token.pos.file_name,
+                                       token.pos.line_number,
+                                       token.comment);
+            xgettext_current_source_encoding = xgettext_global_source_encoding;
+            drop_reference (token.comment);
+          }
            next_context_iter = null_context_list_iterator;
            selectorcall_context_iter = null_context_list_iterator;
            state = 0;
diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c

index 61c101140a65125bb1549f9dab06894352f3370f..c5a8e5ff6021b27f5d1cb16a0d2c0a52ac8fcc94 100644 (file)
--- a/gettext-tools/src/x-csharp.c
+++ b/gettext-tools/src/x-csharp.c
@@ -550,7 +550,8 @@ comment_add (int c)
  static inline void
  comment_line_end (size_t chars_to_remove)
  {
-  char *buffer = mixed_string_buffer_result (&comment_buffer);
+  char *buffer =
+    mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
    size_t buflen = strlen (buffer);
  
    buflen -= chars_to_remove;
@@ -1258,7 +1259,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_string_literal, token_type_symbol */
+  char *string;                         /* for token_type_symbol */
+  mixed_string_ty *mixed_string;        /* for token_type_string_literal */
    refcounted_string_list_ty *comment;   /* for token_type_string_literal */
    int line_number;
    int logical_line_number;
@@ -1269,10 +1271,13 @@ struct token_ty
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+  if (tp->type == token_type_symbol)
      free (tp->string);
    if (tp->type == token_type_string_literal)
-    drop_reference (tp->comment);
+    {
+      mixed_string_free (tp->mixed_string);
+      drop_reference (tp->comment);
+    }
  }
  
  
@@ -1581,7 +1586,7 @@ phase6_get (token_ty *tp)
                                        logical_file_name,
                                        logical_line_number);
              accumulate_escaped (&literal, '"');
-            tp->string = mixed_string_buffer_result (&literal);
+            tp->mixed_string = mixed_string_buffer_result (&literal);
              tp->comment = add_reference (savable_comment);
              lexical_context = lc_outside;
              tp->type = token_type_string_literal;
@@ -1641,7 +1646,7 @@ phase6_get (token_ty *tp)
                    /* No special treatment of newline and backslash here.  */
                    mixed_string_buffer_append_unicode (&literal, c);
                  }
-              tp->string = mixed_string_buffer_result (&literal);
+              tp->mixed_string = mixed_string_buffer_result (&literal);
                tp->comment = add_reference (savable_comment);
                lexical_context = lc_outside;
                tp->type = token_type_string_literal;
@@ -1655,6 +1660,8 @@ phase6_get (token_ty *tp)
            if (is_identifier_start (c))
              {
                struct mixed_string_buffer buffer;
+              mixed_string_ty *mixed_string;
+
                mixed_string_buffer_init (&buffer, lexical_context,
                                          logical_file_name, logical_line_number);
                for (;;)
@@ -1667,7 +1674,9 @@ phase6_get (token_ty *tp)
                      break;
                  }
                phase4_ungetc (c);
-              tp->string = mixed_string_buffer_result (&buffer);
+              mixed_string = mixed_string_buffer_result (&buffer);
+              tp->string = mixed_string_contents (mixed_string);
+              mixed_string_free (mixed_string);
                tp->type = token_type_symbol;
                return;
              }
@@ -1714,8 +1723,7 @@ phase7_get (token_ty *tp)
    phase6_get (tp);
    if (tp->type == token_type_string_literal)
      {
-      char *sum = tp->string;
-      size_t sum_len = strlen (sum);
+      mixed_string_ty *sum = tp->mixed_string;
  
        for (;;)
          {
@@ -1734,12 +1742,7 @@ phase7_get (token_ty *tp)
                    phase6_get (&token_after);
                    if (token_after.type != token_type_dot)
                      {
-                      char *addend = token3.string;
-                      size_t addend_len = strlen (addend);
-
-                      sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
-                      memcpy (sum + sum_len, addend, addend_len + 1);
-                      sum_len += addend_len;
+                      sum = mixed_string_concat_free1 (sum, token3.mixed_string);
  
                        phase6_unget (&token_after);
                        free_token (&token3);
@@ -1753,7 +1756,7 @@ phase7_get (token_ty *tp)
            phase6_unget (&token2);
            break;
          }
-      tp->string = sum;
+      tp->mixed_string = sum;
      }
  }
  
@@ -2006,16 +2009,21 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
  
          case token_type_string_literal:
            {
+            char *string;
              lex_pos_ty pos;
+
+            string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+
              pos.file_name = logical_file_name;
              pos.line_number = token.line_number;
  
              xgettext_current_source_encoding = po_charset_utf8;
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, inner_context,
+              remember_a_message (mlp, NULL, string, inner_context,
                                    &pos, NULL, token.comment);
              else
-              arglist_parser_remember (argparser, arg, token.string,
+              arglist_parser_remember (argparser, arg, string,
                                         inner_context,
                                         pos.file_name, pos.line_number,
                                         token.comment);
diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c

index 868c11d10f65adfb13b28076288d6428892eccb0..8e8477d5073984c3c0d0be45954994061c2e9b2e 100644 (file)
--- a/gettext-tools/src/x-java.c
+++ b/gettext-tools/src/x-java.c
@@ -434,7 +434,8 @@ comment_add (int c)
  static inline void
  comment_line_end (size_t chars_to_remove)
  {
-  char *buffer = mixed_string_buffer_result (&comment_buffer);
+  char *buffer =
+    mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
    size_t buflen = strlen (buffer);
  
    buflen -= chars_to_remove;
@@ -564,7 +565,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_string_literal, token_type_symbol */
+  char *string;                         /* for token_type_symbol */
+  mixed_string_ty *mixed_string;        /* for token_type_string_literal */
    refcounted_string_list_ty *comment;   /* for token_type_string_literal */
    int line_number;
  };
@@ -574,10 +576,13 @@ struct token_ty
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+  if (tp->type == token_type_symbol)
      free (tp->string);
    if (tp->type == token_type_string_literal)
-    drop_reference (tp->comment);
+    {
+      free (tp->mixed_string);
+      drop_reference (tp->comment);
+    }
  }
  
  
@@ -836,7 +841,7 @@ phase5_get (token_ty *tp)
              mixed_string_buffer_init (&literal, lc_string,
                                        logical_file_name, line_number);
              accumulate_escaped (&literal, '"');
-            tp->string = mixed_string_buffer_result (&literal);
+            tp->mixed_string = mixed_string_buffer_result (&literal);
              tp->comment = add_reference (savable_comment);
              tp->type = token_type_string_literal;
              return;
@@ -916,8 +921,7 @@ phase6_get (token_ty *tp)
    phase5_get (tp);
    if (tp->type == token_type_string_literal && phase6_last != token_type_rparen)
      {
-      char *sum = tp->string;
-      size_t sum_len = strlen (sum);
+      mixed_string_ty *sum = tp->mixed_string;
  
        for (;;)
          {
@@ -936,12 +940,7 @@ phase6_get (token_ty *tp)
                    phase5_get (&token_after);
                    if (token_after.type != token_type_dot)
                      {
-                      char *addend = token3.string;
-                      size_t addend_len = strlen (addend);
-
-                      sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
-                      memcpy (sum + sum_len, addend, addend_len + 1);
-                      sum_len += addend_len;
+                      sum = mixed_string_concat_free1 (sum, token3.mixed_string);
  
                        phase5_unget (&token_after);
                        free_token (&token3);
@@ -955,7 +954,7 @@ phase6_get (token_ty *tp)
            phase5_unget (&token2);
            break;
          }
-      tp->string = sum;
+      tp->mixed_string = sum;
      }
    phase6_last = tp->type;
  }
@@ -1209,16 +1208,21 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
  
          case token_type_string_literal:
            {
+            char *string;
              lex_pos_ty pos;
+
+            string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+
              pos.file_name = logical_file_name;
              pos.line_number = token.line_number;
  
              xgettext_current_source_encoding = po_charset_utf8;
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, inner_context,
+              remember_a_message (mlp, NULL, string, inner_context,
                                    &pos, NULL, token.comment);
              else
-              arglist_parser_remember (argparser, arg, token.string,
+              arglist_parser_remember (argparser, arg, string,
                                         inner_context,
                                         pos.file_name, pos.line_number,
                                         token.comment);
diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c

index 2879837c0f4a4f1df4fa815261da3264d280cbd4..c2599dafe8d3c1250f42c3d8de336130ae51719f 100644 (file)
--- a/gettext-tools/src/x-javascript.c
+++ b/gettext-tools/src/x-javascript.c
@@ -479,7 +479,8 @@ comment_add (int c)
  static inline const char *
  comment_line_end (size_t chars_to_remove)
  {
-  char *buffer = mixed_string_buffer_result (&comment_buffer);
+  char *buffer =
+    mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
    size_t buflen = strlen (buffer) - chars_to_remove;
  
    while (buflen >= 1
@@ -673,8 +674,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_string, token_type_symbol,
-                           token_type_keyword */
+  char *string;                  /* for token_type_symbol, token_type_keyword */
+  mixed_string_ty *mixed_string;        /* for token_type_string */
    refcounted_string_list_ty *comment;   /* for token_type_string */
    int line_number;
  };
@@ -684,11 +685,13 @@ struct token_ty
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_string || tp->type == token_type_symbol
-      || tp->type == token_type_keyword)
+  if (tp->type == token_type_symbol || tp->type == token_type_keyword)
      free (tp->string);
    if (tp->type == token_type_string)
-    drop_reference (tp->comment);
+    {
+      mixed_string_free (tp->mixed_string);
+      drop_reference (tp->comment);
+    }
  }
  
  
@@ -1152,7 +1155,7 @@ phase5_get (token_ty *tp)
                  else
                    mixed_string_buffer_append_char (&msb, uc);
                }
-            tp->string = mixed_string_buffer_result (&msb);
+            tp->mixed_string = mixed_string_buffer_result (&msb);
              tp->comment = add_reference (savable_comment);
              lexical_context = lc_outside;
              tp->type = last_token_type = token_type_string;
@@ -1331,8 +1334,7 @@ x_javascript_lex (token_ty *tp)
    phase5_get (tp);
    if (tp->type == token_type_string)
      {
-      char *sum = tp->string;
-      size_t sum_len = strlen (sum);
+      mixed_string_ty *sum = tp->mixed_string;
  
        for (;;)
          {
@@ -1346,12 +1348,7 @@ x_javascript_lex (token_ty *tp)
                phase5_get (&token3);
                if (token3.type == token_type_string)
                  {
-                  char *addend = token3.string;
-                  size_t addend_len = strlen (addend);
-
-                  sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
-                  memcpy (sum + sum_len, addend, addend_len + 1);
-                  sum_len += addend_len;
+                  sum = mixed_string_concat_free1 (sum, token3.mixed_string);
  
                    free_token (&token3);
                    free_token (&token2);
@@ -1362,7 +1359,7 @@ x_javascript_lex (token_ty *tp)
            phase5_unget (&token2);
            break;
          }
-      tp->string = sum;
+      tp->mixed_string = sum;
      }
  }
  
@@ -1512,16 +1509,21 @@ extract_balanced (message_list_ty *mlp,
  
          case token_type_string:
            {
+            char *string;
              lex_pos_ty pos;
+
+            string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+
              pos.file_name = logical_file_name;
              pos.line_number = token.line_number;
  
              xgettext_current_source_encoding = po_charset_utf8;
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, inner_context,
+              remember_a_message (mlp, NULL, string, inner_context,
                                    &pos, NULL, token.comment);
              else
-              arglist_parser_remember (argparser, arg, token.string,
+              arglist_parser_remember (argparser, arg, string,
                                         inner_context,
                                         pos.file_name, pos.line_number,
                                         token.comment);
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c

index 5109d569a3bb397332b69f65884eff5de1b8fab5..50aa587c1de71b49bc2c0ffe37520ea2cd895df4 100644 (file)
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -533,7 +533,8 @@ comment_add (int c)
  static inline const char *
  comment_line_end ()
  {
-  char *buffer = mixed_string_buffer_result (&comment_buffer);
+  char *buffer =
+    mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
    size_t buflen = strlen (buffer);
  
    while (buflen >= 1
@@ -780,7 +781,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_string, token_type_symbol */
+  char *string;                         /* for token_type_symbol */
+  mixed_string_ty *mixed_string;        /* for token_type_string */
    refcounted_string_list_ty *comment;   /* for token_type_string */
    int line_number;
  };
@@ -789,10 +791,13 @@ struct token_ty
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_string || tp->type == token_type_symbol)
+  if (tp->type == token_type_symbol)
      free (tp->string);
    if (tp->type == token_type_string)
-    drop_reference (tp->comment);
+    {
+      mixed_string_free (tp->mixed_string);
+      drop_reference (tp->comment);
+    }
  }
  
  
@@ -1358,7 +1363,7 @@ phase5_get (token_ty *tp)
                      else
                        mixed_string_buffer_append_char (&msb, uc);
                    }
-                tp->string = mixed_string_buffer_result (&msb);
+                tp->mixed_string = mixed_string_buffer_result (&msb);
                  tp->comment = add_reference (savable_comment);
                  lexical_context = lc_outside;
                  tp->type = token_type_string;
@@ -1429,13 +1434,13 @@ x_python_lex (token_ty *tp)
    phase5_get (tp);
    if (tp->type == token_type_string)
      {
-      char *sum = tp->string;
-      size_t sum_len = strlen (sum);
+      mixed_string_ty *sum = tp->mixed_string;
  
        for (;;)
          {
-          token_ty token2, *tp2 = NULL;
+          token_ty token2;
            token_ty token3;
+          token_ty *tp2 = NULL;
  
            phase5_get (&token2);
            switch (token2.type)
@@ -1461,12 +1466,7 @@ x_python_lex (token_ty *tp)
  
            if (tp2)
              {
-              char *addend = tp2->string;
-              size_t addend_len = strlen (addend);
-
-              sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
-              memcpy (sum + sum_len, addend, addend_len + 1);
-              sum_len += addend_len;
+              sum = mixed_string_concat_free1 (sum, tp2->mixed_string);
  
                free_token (tp2);
                continue;
@@ -1474,7 +1474,7 @@ x_python_lex (token_ty *tp)
            phase5_unget (&token2);
            break;
          }
-      tp->string = sum;
+      tp->mixed_string = sum;
      }
  }
  
@@ -1624,16 +1624,21 @@ extract_balanced (message_list_ty *mlp,
  
          case token_type_string:
            {
+            char *string;
              lex_pos_ty pos;
+
+            string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+
              pos.file_name = logical_file_name;
              pos.line_number = token.line_number;
  
              xgettext_current_source_encoding = po_charset_utf8;
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, inner_context,
+              remember_a_message (mlp, NULL, string, inner_context,
                                    &pos, NULL, token.comment);
              else
-              arglist_parser_remember (argparser, arg, token.string,
+              arglist_parser_remember (argparser, arg, string,
                                         inner_context,
                                         pos.file_name, pos.line_number,
                                         token.comment);
diff --git a/gettext-tools/src/x-rst.c b/gettext-tools/src/x-rst.c

index 847d60bd84ad3133d84979ac5cb1dcda926bc4ea..860b43befe379b31d42859e69a84d40029b5aae9 100644 (file)
--- a/gettext-tools/src/x-rst.c
+++ b/gettext-tools/src/x-rst.c
@@ -502,7 +502,8 @@ extract_rsj (FILE *f,
            char *s1;
            if (parse_string () != pr_parsed)
              goto invalid_json;
-          s1 = mixed_string_buffer_result (&stringbuf);
+          s1 = mixed_string_contents_free1 (
+                 mixed_string_buffer_result (&stringbuf));
  
            /* Parse a colon.  */
            c = phase2_getc ();
@@ -549,7 +550,8 @@ extract_rsj (FILE *f,
                                char *s2;
                                if (parse_string () != pr_parsed)
                                  goto invalid_json;
-                              s2 = mixed_string_buffer_result (&stringbuf);
+                              s2 = mixed_string_contents_free1 (
+                                     mixed_string_buffer_result (&stringbuf));
  
                                /* Parse a colon.  */
                                c = phase2_getc ();
@@ -570,7 +572,9 @@ extract_rsj (FILE *f,
                                      goto invalid_rsj;
                                    if (r == pr_syntax || location != NULL)
                                      goto invalid_json;
-                                  location = mixed_string_buffer_result (&stringbuf);
+                                  location =
+                                    mixed_string_contents_free1 (
+                                      mixed_string_buffer_result (&stringbuf));
                                  }
                                else if (strcmp (s2, "sourcebytes") == 0)
                                  {
@@ -606,7 +610,9 @@ extract_rsj (FILE *f,
                                      goto invalid_rsj;
                                    if (r == pr_syntax || msgid != NULL)
                                      goto invalid_json;
-                                  msgid = mixed_string_buffer_result (&stringbuf);
+                                  msgid =
+                                    mixed_string_contents_free1 (
+                                      mixed_string_buffer_result (&stringbuf));
                                  }
                                else
                                  goto invalid_rsj;
diff --git a/gettext-tools/src/x-vala.c b/gettext-tools/src/x-vala.c

index 618f87a83ebc60ef349d804bf5a62a2ac8532885..f765f522256c832939fcb885e5d542ac3dd5cfa4 100644 (file)
--- a/gettext-tools/src/x-vala.c
+++ b/gettext-tools/src/x-vala.c
@@ -365,7 +365,8 @@ typedef struct token_ty token_ty;
  struct token_ty
  {
    token_type_ty type;
-  char *string;         /* for token_type_symbol, token_type_string_literal */
+  char *string;                         /* for token_type_symbol */
+  mixed_string_ty *mixed_string;        /* for token_type_string_literal */
    refcounted_string_list_ty *comment;   /* for token_type_string_literal */
    int line_number;
  };
@@ -374,10 +375,13 @@ struct token_ty
  static inline void
  free_token (token_ty *tp)
  {
-  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+  if (tp->type == token_type_symbol)
      free (tp->string);
    if (tp->type == token_type_string_literal)
-    drop_reference (tp->comment);
+    {
+      mixed_string_free (tp->mixed_string);
+      drop_reference (tp->comment);
+    }
  }
  
  
@@ -923,10 +927,18 @@ phase3_get (token_ty *tp)
                      mixed_string_buffer_append_char (&msb, c);
                  }
              /* Done accumulating the string.  */
-            tp->type = last_token_type =
-              template ? token_type_string_template : token_type_string_literal;
-            tp->string = mixed_string_buffer_result (&msb);
-            tp->comment = add_reference (savable_comment);
+            if (template)
+              {
+                tp->type = token_type_string_template;
+                mixed_string_buffer_destroy (&msb);
+              }
+            else
+              {
+                tp->type = token_type_string_literal;
+                tp->mixed_string = mixed_string_buffer_result (&msb);
+                tp->comment = add_reference (savable_comment);
+              }
+            last_token_type = tp->type;
              return;
            }
  
@@ -1153,8 +1165,7 @@ x_vala_lex (token_ty *tp)
    phase3_get (tp);
    if (tp->type == token_type_string_literal)
      {
-      char *sum = tp->string;
-      size_t sum_len = strlen (sum);
+      mixed_string_ty *sum = tp->mixed_string;
  
        for (;;)
          {
@@ -1168,12 +1179,7 @@ x_vala_lex (token_ty *tp)
                phase3_get (&token3);
                if (token3.type == token_type_string_literal)
                  {
-                  char *addend = token3.string;
-                  size_t addend_len = strlen (addend);
-
-                  sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
-                  memcpy (sum + sum_len, addend, addend_len + 1);
-                  sum_len += addend_len;
+                  sum = mixed_string_concat_free1 (sum, token3.mixed_string);
  
                    free_token (&token3);
                    free_token (&token2);
@@ -1184,7 +1190,7 @@ x_vala_lex (token_ty *tp)
            phase3_unget (&token2);
            break;
          }
-      tp->string = sum;
+      tp->mixed_string = sum;
      }
  }
  
@@ -1315,13 +1321,18 @@ extract_balanced (message_list_ty *mlp, token_type_ty delim,
  
          case token_type_string_literal:
            {
+            char *string;
              lex_pos_ty pos;
+
+            string = mixed_string_contents (token.mixed_string);
+            mixed_string_free (token.mixed_string);
+
              pos.file_name = logical_file_name;
              pos.line_number = token.line_number;
  
              xgettext_current_source_encoding = po_charset_utf8;
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, inner_context,
+              remember_a_message (mlp, NULL, string, inner_context,
                                    &pos, NULL, token.comment);
              else
                {
@@ -1331,13 +1342,13 @@ extract_balanced (message_list_ty *mlp, token_type_ty delim,
                      struct arglist_parser *tmp_argparser;
                      tmp_argparser = arglist_parser_alloc (mlp, next_shapes);
  
-                    arglist_parser_remember (tmp_argparser, 1, token.string,
+                    arglist_parser_remember (tmp_argparser, 1, string,
                                               inner_context, pos.file_name,
                                               pos.line_number, token.comment);
                      arglist_parser_done (tmp_argparser, 1);
                    }
                  else
-                  arglist_parser_remember (argparser, arg, token.string,
+                  arglist_parser_remember (argparser, arg, string,
                                             inner_context, pos.file_name,
                                             pos.line_number, token.comment);
                }
diff --git a/gettext-tools/src/xg-mixed-string.c b/gettext-tools/src/xg-mixed-string.c

index 0023bc77f3fb5cab001178f2aa54694385860627..1892267afbbbee48b9a83533e63db74e5e1bf502 100644 (file)
--- a/gettext-tools/src/xg-mixed-string.c
+++ b/gettext-tools/src/xg-mixed-string.c
@@ -22,11 +22,15 @@
  /* Specification.  */
  #include "xg-mixed-string.h"
  
+#include <assert.h>
  #include <stdlib.h>
  #include <string.h>
  
  #include "error.h"
  #include "error-progname.h"
+#include "flexmember.h"
+#include "msgl-ascii.h"
+#include "po-charset.h"
  #include "unistr.h"
  #include "xalloc.h"
  
@@ -36,19 +40,375 @@
  #define _(str) gettext (str)
  
  
+/* Allocates a single segment.  */
+static inline struct mixed_string_segment *
+segment_alloc (enum segment_type type, const char *string, size_t length)
+{
+  struct mixed_string_segment *segment =
+    (struct mixed_string_segment *)
+    xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, length));
+  segment->type = type;
+  segment->length = length;
+  memcpy (segment->contents, string, length);
+  return segment;
+}
+
+/* Clones a single segment.  */
+static inline struct mixed_string_segment *
+segment_clone (const struct mixed_string_segment *segment)
+{
+  return segment_alloc (segment->type, segment->contents, segment->length);
+}
+
+mixed_string_ty *
+mixed_string_alloc_simple (const char *string,
+                           lexical_context_ty lcontext,
+                           const char *logical_file_name,
+                           int line_number)
+{
+  struct mixed_string *ms = XMALLOC (struct mixed_string);
+
+  if (*string == '\0')
+    {
+      /* An empty string.  */
+      ms->segments = NULL;
+      ms->nsegments = 0;
+    }
+  else
+    {
+      ms->segments = XNMALLOC (1, struct mixed_string_segment *);
+      if ((xgettext_current_source_encoding == po_charset_ascii
+           || xgettext_current_source_encoding == po_charset_utf8)
+          && is_ascii_string (string))
+        /* An optimization.  */
+        ms->segments[0] =
+          segment_alloc (utf8_encoded, string, strlen (string));
+      else
+        /* The general case.  */
+        ms->segments[0] =
+          segment_alloc (source_encoded, string, strlen (string));
+      ms->nsegments = 1;
+    }
+  ms->lcontext = lcontext;
+  ms->logical_file_name = logical_file_name;
+  ms->line_number = line_number;
+
+  return ms;
+}
+
+mixed_string_ty *
+mixed_string_alloc_utf8 (const char *string,
+                         lexical_context_ty lcontext,
+                         const char *logical_file_name,
+                         int line_number)
+{
+  struct mixed_string *ms = XMALLOC (struct mixed_string);
+
+  if (*string == '\0')
+    {
+      /* An empty string.  */
+      ms->segments = NULL;
+      ms->nsegments = 0;
+    }
+  else
+    {
+      ms->segments = XNMALLOC (1, struct mixed_string_segment *);
+      ms->segments[0] = segment_alloc (utf8_encoded, string, strlen (string));
+      ms->nsegments = 1;
+    }
+  ms->lcontext = lcontext;
+  ms->logical_file_name = logical_file_name;
+  ms->line_number = line_number;
+
+  return ms;
+}
+
+mixed_string_ty *
+mixed_string_clone (const mixed_string_ty *ms1)
+{
+  struct mixed_string *ms = XMALLOC (struct mixed_string);
+  size_t nsegments = ms1->nsegments;
+
+  if (nsegments == 0)
+    {
+      ms->segments = NULL;
+      ms->nsegments = 0;
+    }
+  else
+    {
+      size_t i;
+
+      ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+      for (i = 0; i < nsegments; i++)
+        ms->segments[i] = segment_clone (ms1->segments[i]);
+      ms->nsegments = nsegments;
+    }
+  ms->lcontext = ms1->lcontext;
+  ms->logical_file_name = ms1->logical_file_name;
+  ms->line_number = ms1->line_number;
+
+  return ms;
+}
+
+char *
+mixed_string_contents (const mixed_string_ty *ms)
+{
+  size_t nsegments = ms->nsegments;
+  /* Trivial cases.  */
+  if (nsegments == 0)
+    return xstrdup ("");
+  if (nsegments == 1 && ms->segments[0]->type == utf8_encoded)
+    {
+      /* Return the segment, with a NUL at the end.  */
+      size_t len = ms->segments[0]->length;
+      char *string = XNMALLOC (len + 1, char);
+      memcpy (string, ms->segments[0]->contents, len);
+      string[len] = '\0';
+      return string;
+    }
+  /* General case.  */
+  {
+    size_t i;
+
+    for (i = 0; i < nsegments - 1; i++)
+      if (memchr (ms->segments[i]->contents, '\0', ms->segments[i]->length)
+          != NULL)
+        {
+          /* Segment i contains a NUL character.  Ignore the remaining
+             segments.  */
+          nsegments = i + 1;
+          break;
+        }
+  }
+  {
+    char **converted_segments = XNMALLOC (nsegments, char *);
+    size_t length;
+
+    length = 0;
+    {
+      size_t i;
+
+      for (i = 0; i < nsegments; i++)
+        if (ms->segments[i]->type == source_encoded)
+          {
+            char *source_encoded_string;
+            char *utf8_encoded_string;
+
+            /* Copy the segment's contents, with a NUL at the end.  */
+            {
+              size_t len = ms->segments[i]->length;
+              source_encoded_string = XNMALLOC (len + 1, char);
+              memcpy (source_encoded_string, ms->segments[i]->contents, len);
+              source_encoded_string[len] = '\0';
+            }
+            /* Convert it to UTF-8 encoding.  */
+            utf8_encoded_string =
+              from_current_source_encoding (source_encoded_string,
+                                            ms->lcontext,
+                                            ms->logical_file_name,
+                                            ms->line_number);
+            if (utf8_encoded_string != source_encoded_string)
+              free (source_encoded_string);
+            converted_segments[i] = utf8_encoded_string;
+            length += strlen (utf8_encoded_string);
+          }
+        else
+          length += ms->segments[i]->length;
+    }
+
+    {
+      char *string = XNMALLOC (length + 1, char);
+      {
+        char *p;
+        size_t i;
+
+        p = string;
+        for (i = 0; i < nsegments; i++)
+          if (ms->segments[i]->type == source_encoded)
+            {
+              p = stpcpy (p, converted_segments[i]);
+              free (converted_segments[i]);
+            }
+          else
+            {
+              memcpy (p, ms->segments[i]->contents, ms->segments[i]->length);
+              p += ms->segments[i]->length;
+            }
+        assert (p == string + length);
+        *p = '\0';
+      }
+
+      free (converted_segments);
+      return string;
+    }
+  }
+}
+
+void
+mixed_string_free (mixed_string_ty *ms)
+{
+  struct mixed_string_segment **segments = ms->segments;
+  size_t nsegments = ms->nsegments;
+  if (nsegments > 0)
+    {
+      size_t i;
+      for (i = 0; i < nsegments; i++)
+        free (segments[i]);
+    }
+  free (segments);
+  free (ms);
+}
+
+char *
+mixed_string_contents_free1 (mixed_string_ty *ms)
+{
+  char *contents = mixed_string_contents (ms);
+  mixed_string_free (ms);
+  return contents;
+}
+
+mixed_string_ty *
+mixed_string_concat (const mixed_string_ty *ms1,
+                     const mixed_string_ty *ms2)
+{
+  /* Trivial cases.  */
+  if (ms2->nsegments == 0)
+    return mixed_string_clone (ms1);
+  if (ms1->nsegments == 0)
+    return mixed_string_clone (ms2);
+  /* General case.  */
+  {
+    struct mixed_string *ms = XMALLOC (struct mixed_string);
+    size_t nsegments = ms1->nsegments + ms2->nsegments;
+    size_t j;
+    if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
+      {
+        /* Combine the last segment of ms1 with the first segment of ms2.  */
+        size_t i;
+
+        nsegments -= 1;
+        ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+        j = 0;
+        for (i = 0; i < ms1->nsegments - 1; i++)
+          ms->segments[j++] = segment_clone (ms1->segments[i]);
+        {
+          size_t len1 = ms1->segments[i]->length;
+          size_t len2 = ms2->segments[0]->length;
+          struct mixed_string_segment *newseg =
+            (struct mixed_string_segment *)
+            xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
+                                 len1 + len2));
+          newseg->type = ms2->segments[0]->type;
+          newseg->length = len1 + len2;
+          memcpy (newseg->contents, ms1->segments[i]->contents, len1);
+          memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
+          ms->segments[j++] = newseg;
+        }
+        for (i = 1; i < ms2->nsegments; i++)
+          ms->segments[j++] = segment_clone (ms2->segments[i]);
+      }
+    else
+      {
+        size_t i;
+
+        ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+        j = 0;
+        for (i = 0; i < ms1->nsegments; i++)
+          ms->segments[j++] = segment_clone (ms1->segments[i]);
+        for (i = 0; i < ms2->nsegments; i++)
+          ms->segments[j++] = segment_clone (ms2->segments[i]);
+      }
+    assert (j == nsegments);
+    ms->nsegments = nsegments;
+    ms->lcontext = ms1->lcontext;
+    ms->logical_file_name = ms1->logical_file_name;
+    ms->line_number = ms1->line_number;
+
+    return ms;
+  }
+}
+
+mixed_string_ty *
+mixed_string_concat_free1 (mixed_string_ty *ms1, const mixed_string_ty *ms2)
+{
+  /* Trivial cases.  */
+  if (ms2->nsegments == 0)
+    return ms1;
+  if (ms1->nsegments == 0)
+    {
+      mixed_string_free (ms1);
+      return mixed_string_clone (ms2);
+    }
+  /* General case.  */
+  {
+    struct mixed_string *ms = XMALLOC (struct mixed_string);
+    size_t nsegments = ms1->nsegments + ms2->nsegments;
+    size_t j;
+    if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
+      {
+        /* Combine the last segment of ms1 with the first segment of ms2.  */
+        size_t i;
+
+        nsegments -= 1;
+        ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+        j = 0;
+        for (i = 0; i < ms1->nsegments - 1; i++)
+          ms->segments[j++] = ms1->segments[i];
+        {
+          size_t len1 = ms1->segments[i]->length;
+          size_t len2 = ms2->segments[0]->length;
+          struct mixed_string_segment *newseg =
+            (struct mixed_string_segment *)
+            xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
+                                 len1 + len2));
+          newseg->type = ms2->segments[0]->type;
+          newseg->length = len1 + len2;
+          memcpy (newseg->contents, ms1->segments[i]->contents, len1);
+          memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
+          ms->segments[j++] = newseg;
+        }
+        free (ms1->segments[i]);
+        for (i = 1; i < ms2->nsegments; i++)
+          ms->segments[j++] = segment_clone (ms2->segments[i]);
+      }
+    else
+      {
+        size_t i;
+
+        ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+        j = 0;
+        for (i = 0; i < ms1->nsegments; i++)
+          ms->segments[j++] = ms1->segments[i];
+        for (i = 0; i < ms2->nsegments; i++)
+          ms->segments[j++] = segment_clone (ms2->segments[i]);
+      }
+    assert (j == nsegments);
+    free (ms1->segments);
+    ms->nsegments = nsegments;
+    ms->lcontext = ms1->lcontext;
+    ms->logical_file_name = ms1->logical_file_name;
+    ms->line_number = ms1->line_number;
+    free (ms1);
+
+    return ms;
+  }
+}
+
+
  void
  mixed_string_buffer_init (struct mixed_string_buffer *bp,
                            lexical_context_ty lcontext,
                            const char *logical_file_name,
                            int line_number)
  {
-  bp->utf8_buffer = NULL;
-  bp->utf8_buflen = 0;
-  bp->utf8_allocated = 0;
-  bp->utf16_surr = 0;
+  bp->segments = NULL;
+  bp->nsegments = 0;
+  bp->nsegments_allocated = 0;
+  bp->curr_type = -1;
    bp->curr_buffer = NULL;
    bp->curr_buflen = 0;
    bp->curr_allocated = 0;
+  bp->utf16_surr = 0;
    bp->lcontext = lcontext;
    bp->logical_file_name = logical_file_name;
    bp->line_number = line_number;
@@ -57,7 +417,23 @@ mixed_string_buffer_init (struct mixed_string_buffer *bp,
  bool
  mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp)
  {
-  return (bp->utf8_buflen == 0 && bp->utf16_surr == 0 && bp->curr_buflen == 0);
+  return (bp->nsegments == 0 && bp->curr_buflen == 0);
+}
+
+/* Auxiliary function: Ensure count more bytes are available in
+   bp->curr_buffer.  */
+static inline void
+mixed_string_buffer_grow_curr_buffer (struct mixed_string_buffer *bp,
+                                      size_t count)
+{
+  if (bp->curr_buflen + count > bp->curr_allocated)
+    {
+      size_t new_allocated = 2 * bp->curr_allocated + 10;
+      if (new_allocated < bp->curr_buflen + count)
+        new_allocated = bp->curr_buflen + count;
+      bp->curr_allocated = new_allocated;
+      bp->curr_buffer = xrealloc (bp->curr_buffer, new_allocated);
+    }
  }
  
  /* Auxiliary function: Append a byte to bp->curr.  */
@@ -73,23 +449,8 @@ mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
    bp->curr_buffer[bp->curr_buflen++] = c;
  }
  
-/* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
-static inline void
-mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp,
-                                         size_t count)
-{
-  if (bp->utf8_buflen + count > bp->utf8_allocated)
-    {
-      size_t new_allocated = 2 * bp->utf8_allocated + 10;
-      if (new_allocated < bp->utf8_buflen + count)
-        new_allocated = bp->utf8_buflen + count;
-      bp->utf8_allocated = new_allocated;
-      bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
-    }
-}
-
-/* Auxiliary function: Append a Unicode character to bp->utf8.
-   uc must be < 0x110000.  */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, append a
+   Unicode character to bp->curr_buffer.  uc must be < 0x110000.  */
  static inline void
  mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
                                             ucs4_t uc)
@@ -101,13 +462,13 @@ mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
      /* The caller should have ensured that uc is not out-of-range.  */
      abort ();
  
-  mixed_string_buffer_grow_utf8_buffer (bp, count);
-  memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
-  bp->utf8_buflen += count;
+  mixed_string_buffer_grow_curr_buffer (bp, count);
+  memcpy (bp->curr_buffer + bp->curr_buflen, utf8buf, count);
+  bp->curr_buflen += count;
  }
  
-/* Auxiliary function: Handle the attempt to append a lone surrogate to
-   bp->utf8.  */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, handle the
+   attempt to append a lone surrogate to bp->curr_buffer.  */
  static void
  mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp,
                                             ucs4_t uc)
@@ -138,7 +499,8 @@ mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp,
    mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
  }
  
-/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, flush
+   bp->utf16_surr into bp->curr_buffer.  */
  static inline void
  mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
  {
@@ -149,31 +511,38 @@ mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
      }
  }
  
-/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
+/* Auxiliary function: Append a segment to bp->segments.  */
  static inline void
-mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
-                                       int line_number)
+mixed_string_buffer_add_segment (struct mixed_string_buffer *bp,
+                                 struct mixed_string_segment *newseg)
  {
-  if (bp->curr_buflen > 0)
+  if (bp->nsegments == bp->nsegments_allocated)
      {
-      char *curr;
-      size_t count;
-
-      mixed_string_buffer_append_to_curr_buffer (bp, '\0');
-
-      /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
-                                           bp->logical_file_name,
-                                           line_number);
-
-      /* Append it to bp->utf8_buffer.  */
-      count = strlen (curr);
-      mixed_string_buffer_grow_utf8_buffer (bp, count);
-      memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
-      bp->utf8_buflen += count;
+      size_t new_allocated =
+        bp->nsegments_allocated = 2 * bp->nsegments_allocated + 1;
+      bp->segments =
+        (struct mixed_string_segment **)
+        xrealloc (bp->segments,
+                  new_allocated * sizeof (struct mixed_string_segment *));
+    }
+  bp->segments[bp->nsegments++] = newseg;
+}
  
-      if (curr != bp->curr_buffer)
-        free (curr);
+/* Auxiliary function: Flush bp->curr_buffer and bp->utf16_surr into
+   bp->segments.  */
+static void
+mixed_string_buffer_flush_curr (struct mixed_string_buffer *bp)
+{
+  if (bp->curr_type == utf8_encoded)
+    mixed_string_buffer_flush_utf16_surr (bp);
+  if (bp->curr_type != -1)
+    {
+      if (bp->curr_buflen > 0)
+        {
+          struct mixed_string_segment *segment =
+            segment_alloc (bp->curr_type, bp->curr_buffer, bp->curr_buflen);
+          mixed_string_buffer_add_segment (bp, segment);
+        }
        bp->curr_buflen = 0;
      }
  }
@@ -181,23 +550,26 @@ mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
  void
  mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
  {
-  /* Switch from Unicode character mode to multibyte character mode.  */
-  mixed_string_buffer_flush_utf16_surr (bp);
-
-  /* When a newline is seen, convert the accumulated multibyte sequence.
-     This ensures a correct line number in the error message in case of
-     a conversion error.  The "- 1" is to account for the newline.  */
-  if (c == '\n')
-    mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1);
+  /* Switch to multibyte character mode.  */
+  if (bp->curr_type != source_encoded)
+    {
+      mixed_string_buffer_flush_curr (bp);
+      bp->curr_type = source_encoded;
+    }
  
-  mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
+    mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
  }
  
  void
  mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
  {
-  /* Switch from multibyte character mode to Unicode character mode.  */
-  mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
+  /* Switch to Unicode character mode.  */
+  if (bp->curr_type != utf8_encoded)
+    {
+      mixed_string_buffer_flush_curr (bp);
+      bp->curr_type = utf8_encoded;
+      assert (bp->utf16_surr == 0);
+    }
  
    /* Test whether this character and the previous one form a Unicode
       surrogate character pair.  */
@@ -230,26 +602,44 @@ mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
  void
  mixed_string_buffer_destroy (struct mixed_string_buffer *bp)
  {
-  free (bp->utf8_buffer);
+  struct mixed_string_segment **segments = bp->segments;
+  size_t nsegments = bp->nsegments;
+  if (nsegments > 0)
+    {
+      size_t i;
+      for (i = 0; i < nsegments; i++)
+        free (segments[i]);
+    }
+  free (segments);
    free (bp->curr_buffer);
  }
  
-char *
+mixed_string_ty *
  mixed_string_buffer_result (struct mixed_string_buffer *bp)
  {
-  char *utf8_buffer;
-
-  /* Flush all into bp->utf8_buffer.  */
-  mixed_string_buffer_flush_utf16_surr (bp);
-  mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
-  /* NUL-terminate it.  */
-  mixed_string_buffer_grow_utf8_buffer (bp, 1);
-  bp->utf8_buffer[bp->utf8_buflen] = '\0';
-
-  /* Free curr_buffer.  */
-  utf8_buffer = bp->utf8_buffer;
-  free (bp->curr_buffer);
-
-  /* Return it.  */
-  return utf8_buffer;
+  mixed_string_buffer_flush_curr (bp);
+
+  {
+    struct mixed_string *ms = XMALLOC (struct mixed_string);
+    size_t nsegments = bp->nsegments;
+
+    if (nsegments > 0)
+      ms->segments =
+        (struct mixed_string_segment **)
+        xrealloc (bp->segments,
+                  nsegments * sizeof (struct mixed_string_segment *));
+    else
+      {
+        assert (bp->segments == NULL);
+        ms->segments = NULL;
+      }
+    ms->nsegments = nsegments;
+    ms->lcontext = bp->lcontext;
+    ms->logical_file_name = bp->logical_file_name;
+    ms->line_number = bp->line_number;
+
+    free (bp->curr_buffer);
+
+    return ms;
+  }
  }
diff --git a/gettext-tools/src/xg-mixed-string.h b/gettext-tools/src/xg-mixed-string.h

index b456e1af9c41a6e7bd3fe53f76733e786b70502a..8674c75459921b88de690a168090c3a5e8ff67fd 100644 (file)
--- a/gettext-tools/src/xg-mixed-string.h
+++ b/gettext-tools/src/xg-mixed-string.h
@@ -28,22 +28,95 @@ extern "C" {
  #endif
  
  
+/* A string that contains segments in the xgettext_current_source_encoding
+   and segments in UTF-8, in an alternating way.  */
+
+enum segment_type
+{
+  source_encoded,
+  utf8_encoded
+};
+
+struct mixed_string_segment
+{
+  /*enum segment_type*/ unsigned char type;
+  size_t length;
+  char contents[FLEXIBLE_ARRAY_MEMBER];
+};
+
+typedef struct mixed_string mixed_string_ty;
+struct mixed_string
+{
+  /* The alternating segments.  */
+  struct mixed_string_segment **segments;
+  size_t nsegments;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
+  const char *logical_file_name;
+  int line_number;
+};
+
+/* Creates a mixed_string that contains just a string in the
+   xgettext_current_source_encoding.  */
+extern mixed_string_ty *
+       mixed_string_alloc_simple (const char *string,
+                                  lexical_context_ty lcontext,
+                                  const char *logical_file_name,
+                                  int line_number);
+
+/* Creates a mixed_string that contains just a UTF-8 string.  */
+extern mixed_string_ty *
+       mixed_string_alloc_utf8 (const char *string,
+                                lexical_context_ty lcontext,
+                                const char *logical_file_name,
+                                int line_number);
+
+/* Creates a copy of a mixed_string.  */
+extern mixed_string_ty *
+       mixed_string_clone (const mixed_string_ty *ms1);
+
+/* Returns the contents of a mixed_string as an UTF-8 encoded string.
+   This may provoke an error if no source encoding has been specified
+   through --from-code.  The result is freshly allocated.  */
+extern char *
+       mixed_string_contents (const mixed_string_ty *ms);
+
+/* Frees a mixed_string.  */
+extern void
+       mixed_string_free (mixed_string_ty *ms);
+
+/* Returns the contents of a mixed_string as an UTF-8 encoded string,
+   and frees the argument.  */
+extern char *
+       mixed_string_contents_free1 (mixed_string_ty *ms);
+
+/* Concatenates two mixed_strings.  */
+extern mixed_string_ty *
+       mixed_string_concat (const mixed_string_ty *ms1,
+                            const mixed_string_ty *ms2);
+/* Concatenates two mixed_strings, and frees the first argument.  */
+extern mixed_string_ty *
+       mixed_string_concat_free1 (mixed_string_ty *ms1,
+                                  const mixed_string_ty *ms2);
+
+
  /* A string buffer type that allows appending bytes (in the
     xgettext_current_source_encoding) or Unicode characters.
-   Returns the entire string in UTF-8 encoding.  */
+   When done, it returns the entire string as a mixed_string.  */
  
  struct mixed_string_buffer
  {
-  /* The part of the string that has already been converted to UTF-8.  */
-  char *utf8_buffer;
-  size_t utf8_buflen;
-  size_t utf8_allocated;
-  /* The first half of an UTF-16 surrogate character.  */
-  unsigned short utf16_surr;
-  /* The part of the string that is still in the source encoding.  */
+  /* The alternating segments that are already finished.  */
+  struct mixed_string_segment **segments;
+  size_t nsegments;
+  size_t nsegments_allocated;
+  /* The segment that is being accumulated.  */
+  int curr_type; /* An enum segment_type, or -1. */
    char *curr_buffer;
    size_t curr_buflen;
    size_t curr_allocated;
+  /* The first half of an UTF-16 surrogate character.  */
+  unsigned short utf16_surr;
    /* The lexical context.  Used only for error message purposes.  */
    lexical_context_ty lcontext;
    const char *logical_file_name;
@@ -58,22 +131,27 @@ extern void
                                   int line_number);
  
  /* Determines whether a mixed_string_buffer is still empty.  */
-extern bool mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp);
+extern bool
+       mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp);
  
  /* Appends a character to a mixed_string_buffer.  */
-extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
-                                             int c);
+extern void
+       mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c);
  
  /* Appends a Unicode character to a mixed_string_buffer.  */
-extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
-                                                int c);
+extern void
+       mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
+                                           int c);
  
-/* Frees the memory pointed to by a 'struct mixed_string_buffer'.  */
-extern void mixed_string_buffer_destroy (struct mixed_string_buffer *bp);
+/* Frees the memory pointed to by a 'struct mixed_string_buffer' and
+   discards the accumulated string.  */
+extern void
+       mixed_string_buffer_destroy (struct mixed_string_buffer *bp);
  
  /* Frees the memory pointed to by a 'struct mixed_string_buffer'
-   and returns the accumulated string in UTF-8.  */
-extern char * mixed_string_buffer_result (struct mixed_string_buffer *bp);
+   and returns the accumulated string.  */
+extern mixed_string_ty *
+       mixed_string_buffer_result (struct mixed_string_buffer *bp);
  
  
  #ifdef __cplusplus
author	Bruno Haible <bruno@clisp.org>
	Sun, 4 Nov 2018 19:23:31 +0000 (20:23 +0100)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 4 Nov 2018 23:25:34 +0000 (00:25 +0100)
gettext-tools/src/x-c.c		patch \| blob \| blame \| history
gettext-tools/src/x-csharp.c		patch \| blob \| blame \| history
gettext-tools/src/x-java.c		patch \| blob \| blame \| history
gettext-tools/src/x-javascript.c		patch \| blob \| blame \| history
gettext-tools/src/x-python.c		patch \| blob \| blame \| history
gettext-tools/src/x-rst.c		patch \| blob \| blame \| history
gettext-tools/src/x-vala.c		patch \| blob \| blame \| history
gettext-tools/src/xg-mixed-string.c		patch \| blob \| blame \| history
gettext-tools/src/xg-mixed-string.h		patch \| blob \| blame \| history