xgettext: Factor out commonly used mixed_string_buffer

author Daiki Ueno <ueno@gnu.org>

Fri, 2 May 2014 06:58:04 +0000 (15:58 +0900)

committer Daiki Ueno <ueno@gnu.org>

Fri, 2 May 2014 08:57:58 +0000 (17:57 +0900)
author Daiki Ueno <ueno@gnu.org>
Fri, 2 May 2014 06:58:04 +0000 (15:58 +0900)
committer Daiki Ueno <ueno@gnu.org>
Fri, 2 May 2014 08:57:58 +0000 (17:57 +0900)
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index c7c953fff89cc73baf8df10f33b3592a5549fb85..f4ead255a835a4953f802c55bbc3e84f2348c410 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,43 @@
+2014-05-02  Daiki Ueno  <ueno@gnu.org>
+
+       xgettext: Factor out commonly used mixed_string_buffer
+       * x-python.c (init_mixed_string_buffer)
+       (mixed_string_buffer_append_byte)
+       (mixed_string_buffer_append_unicode_grow)
+       (mixed_string_buffer_append_unicode)
+       (mixed_string_buffer_flush_utf16_surr)
+       (mixed_string_buffer_flush_curr_buffer)
+       (mixed_string_buffer_append, mixed_string_buffer_result)
+       (free_mixed_string_buffer): Move to...
+       * xgettext.c: ...here.
+       (mixed_string_buffer_alloc): Rename from init_mixed_string_buffer.
+       (mixed_string_buffer_append_to_curr_buffer): Rename from
+       mixed_string_buffer_append_byte.
+       (mixed_string_buffer_append_to_utf8_buffer): Rename from
+       mixed_string_buffer_append_unicode.
+       (mixed_string_buffer_grow_utf8_buffer): Rename from
+       mixed_string_buffer_append_unicode_grow.
+       (mixed_string_buffer_append_char): Split from
+       mixed_string_buffer_append.
+       (mixed_string_buffer_append_unicode): Split from
+       mixed_string_buffer_append.
+       (mixed_string_buffer_done): New function merging
+       mixed_string_buffer_result and free_mixed_string_buffer.
+       * xgettext.h (mixed_string_buffer): New struct moved from
+       x-python.c; add logical_file_name and line_number fields.
+       (mixed_string_buffer_alloc): New function declaration.
+       (mixed_string_buffer_append_char): New function declaration.
+       (mixed_string_buffer_append_unicode): New function declaration.
+       (mixed_string_buffer_done): New function declaration.
+       * x-javascript.c (init_mixed_string_buffer)
+       (mixed_string_buffer_append_byte)
+       (mixed_string_buffer_append_unicode_grow)
+       (mixed_string_buffer_append_unicode)
+       (mixed_string_buffer_flush_utf16_surr)
+       (mixed_string_buffer_flush_curr_buffer)
+       (mixed_string_buffer_append, mixed_string_buffer_result)
+       (free_mixed_string_buffer): Remove.
+
  2014-04-30  Daiki Ueno  <ueno@gnu.org>
  
         scheme: Recognize GIMP script-fu extension _"abc"
diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c

index 59bbcbe2eada1c86515b9c5643dc972e62a0a85e..45f67e22dac55239d9bf483f00be2778d868f00e 100644 (file)
--- a/gettext-tools/src/x-javascript.c
+++ b/gettext-tools/src/x-javascript.c
@@ -714,203 +714,6 @@ phase3_ungetc (int c)
     IS_UNICODE.  */
  #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
  
-/* A string buffer type that allows appending bytes (in the
-   xgettext_current_source_encoding) or Unicode characters.
-   Returns the entire string in UTF-8 encoding.  */
-
-struct mixed_string_buffer
-{
-  /* The part of the string that has already been converted to UTF-8.  */
-  char *utf8_buffer;
-  size_t utf8_buflen;
-  size_t utf8_allocated;
-  /* The first half of an UTF-16 surrogate character.  */
-  unsigned short utf16_surr;
-  /* The part of the string that is still in the source encoding.  */
-  char *curr_buffer;
-  size_t curr_buflen;
-  size_t curr_allocated;
-  /* The lexical context.  Used only for error message purposes.  */
-  lexical_context_ty lcontext;
-};
-
-/* Initialize a 'struct mixed_string_buffer' to empty.  */
-static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
-{
-  bp->utf8_buffer = NULL;
-  bp->utf8_buflen = 0;
-  bp->utf8_allocated = 0;
-  bp->utf16_surr = 0;
-  bp->curr_buffer = NULL;
-  bp->curr_buflen = 0;
-  bp->curr_allocated = 0;
-  bp->lcontext = lcontext;
-}
-
-/* Auxiliary function: Append a byte to bp->curr.  */
-static inline void
-mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c)
-{
-  if (bp->curr_buflen == bp->curr_allocated)
-    {
-      bp->curr_allocated = 2 * bp->curr_allocated + 10;
-      bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
-    }
-  bp->curr_buffer[bp->curr_buflen++] = c;
-}
-
-/* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
-static inline void
-mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count)
-{
-  if (bp->utf8_buflen + count > bp->utf8_allocated)
-    {
-      size_t new_allocated = 2 * bp->utf8_allocated + 10;
-      if (new_allocated < bp->utf8_buflen + count)
-        new_allocated = bp->utf8_buflen + count;
-      bp->utf8_allocated = new_allocated;
-      bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
-    }
-}
-
-/* Auxiliary function: Append a Unicode character to bp->utf8.
-   uc must be < 0x110000.  */
-static inline void
-mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc)
-{
-  unsigned char utf8buf[6];
-  int count = u8_uctomb (utf8buf, uc, 6);
-
-  if (count < 0)
-    /* The caller should have ensured that uc is not out-of-range.  */
-    abort ();
-
-  mixed_string_buffer_append_unicode_grow (bp, count);
-  memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
-  bp->utf8_buflen += count;
-}
-
-/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
-static inline void
-mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
-{
-  if (bp->utf16_surr != 0)
-    {
-      /* A half surrogate is invalid, therefore use U+FFFD instead.  */
-      mixed_string_buffer_append_unicode (bp, 0xfffd);
-      bp->utf16_surr = 0;
-    }
-}
-
-/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
-static inline void
-mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno)
-{
-  if (bp->curr_buflen > 0)
-    {
-      char *curr;
-      size_t count;
-
-      mixed_string_buffer_append_byte (bp, '\0');
-
-      /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
-                                           logical_file_name, lineno);
-
-      /* Append it to bp->utf8_buffer.  */
-      count = strlen (curr);
-      mixed_string_buffer_append_unicode_grow (bp, count);
-      memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
-      bp->utf8_buflen += count;
-
-      if (curr != bp->curr_buffer)
-        free (curr);
-      bp->curr_buflen = 0;
-    }
-}
-
-/* Append a character or Unicode character to a 'struct mixed_string_buffer'.  */
-static void
-mixed_string_buffer_append (struct mixed_string_buffer *bp, int c)
-{
-  if (IS_UNICODE (c))
-    {
-      /* Append a Unicode character.  */
-
-      /* Switch from multibyte character mode to Unicode character mode.  */
-      mixed_string_buffer_flush_curr_buffer (bp, line_number);
-
-      /* Test whether this character and the previous one form a Unicode
-         surrogate character pair.  */
-      if (bp->utf16_surr != 0
-          && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)))
-        {
-          unsigned short utf16buf[2];
-          ucs4_t uc;
-
-          utf16buf[0] = bp->utf16_surr;
-          utf16buf[1] = UNICODE_VALUE (c);
-          if (u16_mbtouc (&uc, utf16buf, 2) != 2)
-            abort ();
-
-          mixed_string_buffer_append_unicode (bp, uc);
-          bp->utf16_surr = 0;
-        }
-      else
-        {
-          mixed_string_buffer_flush_utf16_surr (bp);
-
-          if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00))
-            bp->utf16_surr = UNICODE_VALUE (c);
-          else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))
-            {
-              /* A half surrogate is invalid, therefore use U+FFFD instead.  */
-              mixed_string_buffer_append_unicode (bp, 0xfffd);
-            }
-          else
-            mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c));
-        }
-    }
-  else
-    {
-      /* Append a single byte.  */
-
-      /* Switch from Unicode character mode to multibyte character mode.  */
-      mixed_string_buffer_flush_utf16_surr (bp);
-
-      /* When a newline is seen, convert the accumulated multibyte sequence.
-         This ensures a correct line number in the error message in case of
-         a conversion error.  The "- 1" is to account for the newline.  */
-      if (c == '\n')
-        mixed_string_buffer_flush_curr_buffer (bp, line_number - 1);
-
-      mixed_string_buffer_append_byte (bp, (unsigned char) c);
-    }
-}
-
-/* Return the string buffer's contents.  */
-static char *
-mixed_string_buffer_result (struct mixed_string_buffer *bp)
-{
-  /* Flush all into bp->utf8_buffer.  */
-  mixed_string_buffer_flush_utf16_surr (bp);
-  mixed_string_buffer_flush_curr_buffer (bp, line_number);
-  /* NUL-terminate it.  */
-  mixed_string_buffer_append_unicode_grow (bp, 1);
-  bp->utf8_buffer[bp->utf8_buflen] = '\0';
-  /* Return it.  */
-  return bp->utf8_buffer;
-}
-
-/* Free the memory pointed to by a 'struct mixed_string_buffer'.  */
-static inline void
-free_mixed_string_buffer (struct mixed_string_buffer *bp)
-{
-  free (bp->utf8_buffer);
-  free (bp->curr_buffer);
-}
-
  
  /* ========================== Reading of tokens.  ========================== */
  
@@ -1387,29 +1190,36 @@ phase5_get (token_ty *tp)
  
          /* Strings.  */
            {
-            struct mixed_string_buffer literal;
+            struct mixed_string_buffer *bp;
              int quote_char;
  
              case '"': case '\'':
                quote_char = c;
                lexical_context = lc_string;
                /* Start accumulating the string.  */
-              init_mixed_string_buffer (&literal, lc_string);
+              bp = mixed_string_buffer_alloc (lexical_context,
+                                              logical_file_name,
+                                              line_number);
                for (;;)
                  {
                    int uc = phase7_getuc (quote_char);
  
+                  bp->line_number = line_number;
+
                    if (uc == P7_EOF || uc == P7_STRING_END)
                      break;
  
                    if (IS_UNICODE (uc))
-                    assert (UNICODE_VALUE (uc) >= 0
-                            && UNICODE_VALUE (uc) < 0x110000);
-
-                  mixed_string_buffer_append (&literal, uc);
+                    {
+                      assert (UNICODE_VALUE (uc) >= 0
+                              && UNICODE_VALUE (uc) < 0x110000);
+                      mixed_string_buffer_append_unicode (bp,
+                                                          UNICODE_VALUE (uc));
+                    }
+                  else
+                    mixed_string_buffer_append_char (bp, uc);
                  }
-              tp->string = xstrdup (mixed_string_buffer_result (&literal));
-              free_mixed_string_buffer (&literal);
+              tp->string = xstrdup (mixed_string_buffer_done (bp));
                tp->comment = add_reference (savable_comment);
                lexical_context = lc_outside;
                tp->type = last_token_type = token_type_string;
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c

index 4ebe0d71472dfb133992ace98b9ad68bef8da39c..da7468328b94ea792c301ff2657fa4b76ba3bcc5 100644 (file)
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -825,203 +825,6 @@ phase3_ungetc (int c)
     IS_UNICODE.  */
  #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
  
-/* A string buffer type that allows appending bytes (in the
-   xgettext_current_source_encoding) or Unicode characters.
-   Returns the entire string in UTF-8 encoding.  */
-
-struct mixed_string_buffer
-{
-  /* The part of the string that has already been converted to UTF-8.  */
-  char *utf8_buffer;
-  size_t utf8_buflen;
-  size_t utf8_allocated;
-  /* The first half of an UTF-16 surrogate character.  */
-  unsigned short utf16_surr;
-  /* The part of the string that is still in the source encoding.  */
-  char *curr_buffer;
-  size_t curr_buflen;
-  size_t curr_allocated;
-  /* The lexical context.  Used only for error message purposes.  */
-  lexical_context_ty lcontext;
-};
-
-/* Initialize a 'struct mixed_string_buffer' to empty.  */
-static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
-{
-  bp->utf8_buffer = NULL;
-  bp->utf8_buflen = 0;
-  bp->utf8_allocated = 0;
-  bp->utf16_surr = 0;
-  bp->curr_buffer = NULL;
-  bp->curr_buflen = 0;
-  bp->curr_allocated = 0;
-  bp->lcontext = lcontext;
-}
-
-/* Auxiliary function: Append a byte to bp->curr.  */
-static inline void
-mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c)
-{
-  if (bp->curr_buflen == bp->curr_allocated)
-    {
-      bp->curr_allocated = 2 * bp->curr_allocated + 10;
-      bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
-    }
-  bp->curr_buffer[bp->curr_buflen++] = c;
-}
-
-/* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
-static inline void
-mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count)
-{
-  if (bp->utf8_buflen + count > bp->utf8_allocated)
-    {
-      size_t new_allocated = 2 * bp->utf8_allocated + 10;
-      if (new_allocated < bp->utf8_buflen + count)
-        new_allocated = bp->utf8_buflen + count;
-      bp->utf8_allocated = new_allocated;
-      bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
-    }
-}
-
-/* Auxiliary function: Append a Unicode character to bp->utf8.
-   uc must be < 0x110000.  */
-static inline void
-mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc)
-{
-  unsigned char utf8buf[6];
-  int count = u8_uctomb (utf8buf, uc, 6);
-
-  if (count < 0)
-    /* The caller should have ensured that uc is not out-of-range.  */
-    abort ();
-
-  mixed_string_buffer_append_unicode_grow (bp, count);
-  memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
-  bp->utf8_buflen += count;
-}
-
-/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
-static inline void
-mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
-{
-  if (bp->utf16_surr != 0)
-    {
-      /* A half surrogate is invalid, therefore use U+FFFD instead.  */
-      mixed_string_buffer_append_unicode (bp, 0xfffd);
-      bp->utf16_surr = 0;
-    }
-}
-
-/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
-static inline void
-mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno)
-{
-  if (bp->curr_buflen > 0)
-    {
-      char *curr;
-      size_t count;
-
-      mixed_string_buffer_append_byte (bp, '\0');
-
-      /* Convert from the source encoding to UTF-8.  */
-      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
-                                           logical_file_name, lineno);
-
-      /* Append it to bp->utf8_buffer.  */
-      count = strlen (curr);
-      mixed_string_buffer_append_unicode_grow (bp, count);
-      memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
-      bp->utf8_buflen += count;
-
-      if (curr != bp->curr_buffer)
-        free (curr);
-      bp->curr_buflen = 0;
-    }
-}
-
-/* Append a character or Unicode character to a 'struct mixed_string_buffer'.  */
-static void
-mixed_string_buffer_append (struct mixed_string_buffer *bp, int c)
-{
-  if (IS_UNICODE (c))
-    {
-      /* Append a Unicode character.  */
-
-      /* Switch from multibyte character mode to Unicode character mode.  */
-      mixed_string_buffer_flush_curr_buffer (bp, line_number);
-
-      /* Test whether this character and the previous one form a Unicode
-         surrogate character pair.  */
-      if (bp->utf16_surr != 0
-          && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)))
-        {
-          unsigned short utf16buf[2];
-          ucs4_t uc;
-
-          utf16buf[0] = bp->utf16_surr;
-          utf16buf[1] = UNICODE_VALUE (c);
-          if (u16_mbtouc (&uc, utf16buf, 2) != 2)
-            abort ();
-
-          mixed_string_buffer_append_unicode (bp, uc);
-          bp->utf16_surr = 0;
-        }
-      else
-        {
-          mixed_string_buffer_flush_utf16_surr (bp);
-
-          if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00))
-            bp->utf16_surr = UNICODE_VALUE (c);
-          else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))
-            {
-              /* A half surrogate is invalid, therefore use U+FFFD instead.  */
-              mixed_string_buffer_append_unicode (bp, 0xfffd);
-            }
-          else
-            mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c));
-        }
-    }
-  else
-    {
-      /* Append a single byte.  */
-
-      /* Switch from Unicode character mode to multibyte character mode.  */
-      mixed_string_buffer_flush_utf16_surr (bp);
-
-      /* When a newline is seen, convert the accumulated multibyte sequence.
-         This ensures a correct line number in the error message in case of
-         a conversion error.  The "- 1" is to account for the newline.  */
-      if (c == '\n')
-        mixed_string_buffer_flush_curr_buffer (bp, line_number - 1);
-
-      mixed_string_buffer_append_byte (bp, (unsigned char) c);
-    }
-}
-
-/* Return the string buffer's contents.  */
-static char *
-mixed_string_buffer_result (struct mixed_string_buffer *bp)
-{
-  /* Flush all into bp->utf8_buffer.  */
-  mixed_string_buffer_flush_utf16_surr (bp);
-  mixed_string_buffer_flush_curr_buffer (bp, line_number);
-  /* NUL-terminate it.  */
-  mixed_string_buffer_append_unicode_grow (bp, 1);
-  bp->utf8_buffer[bp->utf8_buflen] = '\0';
-  /* Return it.  */
-  return bp->utf8_buffer;
-}
-
-/* Free the memory pointed to by a 'struct mixed_string_buffer'.  */
-static inline void
-free_mixed_string_buffer (struct mixed_string_buffer *bp)
-{
-  free (bp->utf8_buffer);
-  free (bp->curr_buffer);
-}
-
  
  /* ========================== Reading of tokens.  ========================== */
  
@@ -1526,7 +1329,7 @@ phase5_get (token_ty *tp)
  
          /* Strings.  */
            {
-            struct mixed_string_buffer literal;
+            struct mixed_string_buffer *bp;
              int quote_char;
              bool interpret_ansic;
              bool interpret_unicode;
@@ -1598,23 +1401,30 @@ phase5_get (token_ty *tp)
                }
                backslash_counter = 0;
                /* Start accumulating the string.  */
-              init_mixed_string_buffer (&literal, lc_string);
+              bp = mixed_string_buffer_alloc (lexical_context,
+                                              logical_file_name,
+                                              line_number);
                for (;;)
                  {
                    int uc = phase7_getuc (quote_char, triple, interpret_ansic,
                                           interpret_unicode, &backslash_counter);
  
+                  bp->line_number = line_number;
+
                    if (uc == P7_EOF || uc == P7_STRING_END)
                      break;
  
                    if (IS_UNICODE (uc))
-                    assert (UNICODE_VALUE (uc) >= 0
-                            && UNICODE_VALUE (uc) < 0x110000);
-
-                  mixed_string_buffer_append (&literal, uc);
+                    {
+                      assert (UNICODE_VALUE (uc) >= 0
+                              && UNICODE_VALUE (uc) < 0x110000);
+                      mixed_string_buffer_append_unicode (bp,
+                                                          UNICODE_VALUE (uc));
+                    }
+                  else
+                    mixed_string_buffer_append_char (bp, uc);
                  }
-              tp->string = xstrdup (mixed_string_buffer_result (&literal));
-              free_mixed_string_buffer (&literal);
+              tp->string = xstrdup (mixed_string_buffer_done (bp));
                tp->comment = add_reference (savable_comment);
                lexical_context = lc_outside;
                tp->type = token_type_string;
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index 42fdc316e5594688df80b20910bee0d1bafa9014..c1697c2315c66ddac449526c53cd5b4dc81a4266 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -66,6 +66,7 @@
  #include "color.h"
  #include "format.h"
  #include "propername.h"
+#include "unistr.h"
  #include "gettext.h"
  
  /* A convenience macro.  I don't like writing gettext() every time.  */
@@ -3068,6 +3069,186 @@ arglist_parser_done (struct arglist_parser *ap, int argnum)
  }
  
  
+struct mixed_string_buffer *
+mixed_string_buffer_alloc (lexical_context_ty lcontext,
+                           const char *logical_file_name,
+                           int line_number)
+{
+  struct mixed_string_buffer *bp = XMALLOC (struct mixed_string_buffer);
+  bp->utf8_buffer = NULL;
+  bp->utf8_buflen = 0;
+  bp->utf8_allocated = 0;
+  bp->utf16_surr = 0;
+  bp->curr_buffer = NULL;
+  bp->curr_buflen = 0;
+  bp->curr_allocated = 0;
+  bp->lcontext = lcontext;
+  bp->logical_file_name = logical_file_name;
+  bp->line_number = line_number;
+  return bp;
+}
+
+/* Auxiliary function: Append a byte to bp->curr.  */
+static inline void
+mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
+                                           unsigned char c)
+{
+  if (bp->curr_buflen == bp->curr_allocated)
+    {
+      bp->curr_allocated = 2 * bp->curr_allocated + 10;
+      bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
+    }
+  bp->curr_buffer[bp->curr_buflen++] = c;
+}
+
+/* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
+static inline void
+mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp,
+                                         size_t count)
+{
+  if (bp->utf8_buflen + count > bp->utf8_allocated)
+    {
+      size_t new_allocated = 2 * bp->utf8_allocated + 10;
+      if (new_allocated < bp->utf8_buflen + count)
+        new_allocated = bp->utf8_buflen + count;
+      bp->utf8_allocated = new_allocated;
+      bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
+    }
+}
+
+/* Auxiliary function: Append a Unicode character to bp->utf8.
+   uc must be < 0x110000.  */
+static inline void
+mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
+                                           ucs4_t uc)
+{
+  unsigned char utf8buf[6];
+  int count = u8_uctomb (utf8buf, uc, 6);
+
+  if (count < 0)
+    /* The caller should have ensured that uc is not out-of-range.  */
+    abort ();
+
+  mixed_string_buffer_grow_utf8_buffer (bp, count);
+  memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
+  bp->utf8_buflen += count;
+}
+
+/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
+static inline void
+mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
+{
+  if (bp->utf16_surr != 0)
+    {
+      /* A half surrogate is invalid, therefore use U+FFFD instead.  */
+      mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
+      bp->utf16_surr = 0;
+    }
+}
+
+/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
+static inline void
+mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
+                                       int line_number)
+{
+  if (bp->curr_buflen > 0)
+    {
+      char *curr;
+      size_t count;
+
+      mixed_string_buffer_append_to_utf8_buffer (bp, '\0');
+
+      /* Convert from the source encoding to UTF-8.  */
+      curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
+                                           bp->logical_file_name,
+                                           line_number);
+
+      /* Append it to bp->utf8_buffer.  */
+      count = strlen (curr);
+      mixed_string_buffer_grow_utf8_buffer (bp, count);
+      memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
+      bp->utf8_buflen += count;
+
+      if (curr != bp->curr_buffer)
+        free (curr);
+      bp->curr_buflen = 0;
+    }
+}
+
+void
+mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
+{
+  /* Switch from Unicode character mode to multibyte character mode.  */
+  mixed_string_buffer_flush_utf16_surr (bp);
+
+  /* When a newline is seen, convert the accumulated multibyte sequence.
+     This ensures a correct line number in the error message in case of
+     a conversion error.  The "- 1" is to account for the newline.  */
+  if (c == '\n')
+    mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1);
+
+  mixed_string_buffer_append_to_utf8_buffer (bp, (unsigned char) c);
+}
+
+void
+mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
+{
+  /* Switch from multibyte character mode to Unicode character mode.  */
+  mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
+
+  /* Test whether this character and the previous one form a Unicode
+     surrogate character pair.  */
+  if (bp->utf16_surr != 0 && (c >= 0xdc00 && c < 0xe000))
+    {
+      unsigned short utf16buf[2];
+      ucs4_t uc;
+
+      utf16buf[0] = bp->utf16_surr;
+      utf16buf[1] = c;
+      if (u16_mbtouc (&uc, utf16buf, 2) != 2)
+        abort ();
+
+      mixed_string_buffer_append_to_utf8_buffer (bp, uc);
+      bp->utf16_surr = 0;
+    }
+  else
+    {
+      mixed_string_buffer_flush_utf16_surr (bp);
+
+      if (c >= 0xd800 && c < 0xdc00)
+        bp->utf16_surr = c;
+      else if (c >= 0xdc00 && c < 0xe000)
+        {
+          /* A half surrogate is invalid, therefore use U+FFFD instead.  */
+          mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
+        }
+      else
+        mixed_string_buffer_append_to_utf8_buffer (bp, c);
+    }
+}
+
+char *
+mixed_string_buffer_done (struct mixed_string_buffer *bp)
+{
+  char *utf8_buffer;
+
+  /* Flush all into bp->utf8_buffer.  */
+  mixed_string_buffer_flush_utf16_surr (bp);
+  mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
+  /* NUL-terminate it.  */
+  mixed_string_buffer_grow_utf8_buffer (bp, 1);
+  bp->utf8_buffer[bp->utf8_buflen] = '\0';
+
+  /* Free curr_buffer and bp itself.  */
+  utf8_buffer = bp->utf8_buffer;
+  free (bp->curr_buffer);
+  free (bp);
+
+  /* Return it.  */
+  return utf8_buffer;
+}
+
+
  static message_ty *
  construct_header ()
  {
diff --git a/gettext-tools/src/xgettext.h b/gettext-tools/src/xgettext.h

index 2f8a084869f5998695fa831cba36ea9f03580d3d..81e82667f32b50f97352782d6ca08072c0bef2da 100644 (file)
--- a/gettext-tools/src/xgettext.h
+++ b/gettext-tools/src/xgettext.h
@@ -334,6 +334,47 @@ extern bool arglist_parser_decidedp (struct arglist_parser *ap, int argnum);
  extern void arglist_parser_done (struct arglist_parser *ap, int argnum);
  
  
+/* A string buffer type that allows appending bytes (in the
+   xgettext_current_source_encoding) or Unicode characters.
+   Returns the entire string in UTF-8 encoding.  */
+
+struct mixed_string_buffer
+{
+  /* The part of the string that has already been converted to UTF-8.  */
+  char *utf8_buffer;
+  size_t utf8_buflen;
+  size_t utf8_allocated;
+  /* The first half of an UTF-16 surrogate character.  */
+  unsigned short utf16_surr;
+  /* The part of the string that is still in the source encoding.  */
+  char *curr_buffer;
+  size_t curr_buflen;
+  size_t curr_allocated;
+  /* The lexical context.  Used only for error message purposes.  */
+  lexical_context_ty lcontext;
+  const char *logical_file_name;
+  int line_number;
+};
+
+/* Creates a fresh mixed_string_buffer.  */
+extern struct mixed_string_buffer *
+       mixed_string_buffer_alloc (lexical_context_ty lcontext,
+                                  const char *logical_file_name,
+                                  int line_number);
+
+/* Appends a character to a mixed_string_buffer.  */
+extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
+                                             int c);
+
+/* Appends a Unicode character to a mixed_string_buffer.  */
+extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
+                                                int c);
+
+/* Frees mixed_string_buffer and returns the accumulated string as a
+   UTF-8 string.  */
+extern char * mixed_string_buffer_done (struct mixed_string_buffer *bp);
+
+
  #ifdef __cplusplus
  }
  #endif
author	Daiki Ueno <ueno@gnu.org>
	Fri, 2 May 2014 06:58:04 +0000 (15:58 +0900)
committer	Daiki Ueno <ueno@gnu.org>
	Fri, 2 May 2014 08:57:58 +0000 (17:57 +0900)
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/x-javascript.c		patch \| blob \| blame \| history
gettext-tools/src/x-python.c		patch \| blob \| blame \| history
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/src/xgettext.h		patch \| blob \| blame \| history