struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_name, token_type_string_literal */
+ char *string; /* for token_type_name */
+ mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal,
token_type_objc_special */
long number;
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_name || tp->type == token_type_string_literal)
+ if (tp->type == token_type_name)
free (tp->string);
+ if (tp->type == token_type_string_literal)
+ mixed_string_free (tp->mixed_string);
if (tp->type == token_type_string_literal
|| tp->type == token_type_objc_special)
drop_reference (tp->comment);
if (relevant)
{
tp->type = token_type_string_literal;
- tp->string = mixed_string_buffer_result (&msb);
+ tp->mixed_string = mixed_string_buffer_result (&msb);
tp->comment = add_reference (savable_comment);
}
else
mixed_string_buffer_append_char (&msb, c);
}
tp->type = token_type_string_literal;
- tp->string = mixed_string_buffer_result (&msb);
+ tp->mixed_string = mixed_string_buffer_result (&msb);
tp->comment = add_reference (savable_comment);
return;
}
&& buf[1].type == token_type_number
&& buf[2].type == token_type_string_literal)
{
- logical_file_name = xstrdup (buf[2].string);
+ logical_file_name = mixed_string_contents (buf[2].mixed_string);
line_number = buf[1].number;
}
if (bufpos >= 2 && buf[0].type == token_type_number
&& buf[1].type == token_type_string_literal)
{
- logical_file_name = xstrdup (buf[1].string);
+ logical_file_name = mixed_string_contents (buf[1].mixed_string);
line_number = buf[0].number;
}
/* Turn PRIdXXX into "<PRIdXXX>". */
char *new_string = xasprintf ("<%s>", tp->string);
free (tp->string);
- tp->string = new_string;
+ tp->mixed_string =
+ mixed_string_alloc_utf8 (new_string, lc_string,
+ logical_file_name, line_number);
tp->comment = add_reference (savable_comment);
tp->type = token_type_string_literal;
}
for (;;)
{
token_ty tmp;
- size_t len;
phase8c_get (&tmp);
if (tmp.type != token_type_string_literal)
phase8c_unget (&tmp);
return;
}
- len = strlen (tp->string);
- tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
- strcpy (tp->string + len, tmp.string);
+ tp->mixed_string =
+ mixed_string_concat_free1 (tp->mixed_string, tmp.mixed_string);
free_token (&tmp);
}
}
/* This field is used only for xgettext_token_type_keyword. */
const struct callshapes *shapes;
- /* This field is used only for xgettext_token_type_string_literal,
- xgettext_token_type_keyword, xgettext_token_type_symbol. */
+ /* This field is used only for xgettext_token_type_keyword,
+ xgettext_token_type_symbol. */
char *string;
+ /* This field is used only for xgettext_token_type_string_literal. */
+ mixed_string_ty *mixed_string;
+
/* This field is used only for xgettext_token_type_string_literal. */
refcounted_string_list_ty *comment;
- /* These fields are only for
- xgettext_token_type_keyword,
- xgettext_token_type_string_literal. */
+ /* This field is used only for xgettext_token_type_keyword,
+ xgettext_token_type_string_literal. */
lex_pos_ty pos;
};
last_non_comment_line = newline_count;
tp->type = xgettext_token_type_string_literal;
- tp->string = token.string;
+ tp->mixed_string = token.mixed_string;
tp->comment = token.comment;
tp->pos.file_name = logical_file_name;
tp->pos.line_number = token.line_number;
continue;
case xgettext_token_type_string_literal:
- xgettext_current_source_encoding = po_charset_utf8;
- if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
- &token.pos, NULL, token.comment);
- else
- arglist_parser_remember (argparser, arg, token.string,
- inner_context,
- token.pos.file_name, token.pos.line_number,
- token.comment);
- xgettext_current_source_encoding = xgettext_global_source_encoding;
- drop_reference (token.comment);
+ {
+ char *string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+ xgettext_current_source_encoding = po_charset_utf8;
+ if (extract_all)
+ remember_a_message (mlp, NULL, string, inner_context,
+ &token.pos, NULL, token.comment);
+ else
+ arglist_parser_remember (argparser, arg, string,
+ inner_context,
+ token.pos.file_name,
+ token.pos.line_number,
+ token.comment);
+ xgettext_current_source_encoding = xgettext_global_source_encoding;
+ drop_reference (token.comment);
+ }
next_context_iter = null_context_list_iterator;
selectorcall_context_iter = null_context_list_iterator;
state = 0;
static inline void
comment_line_end (size_t chars_to_remove)
{
- char *buffer = mixed_string_buffer_result (&comment_buffer);
+ char *buffer =
+ mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
size_t buflen = strlen (buffer);
buflen -= chars_to_remove;
struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_string_literal, token_type_symbol */
+ char *string; /* for token_type_symbol */
+ mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
int logical_line_number;
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+ if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string_literal)
- drop_reference (tp->comment);
+ {
+ mixed_string_free (tp->mixed_string);
+ drop_reference (tp->comment);
+ }
}
logical_file_name,
logical_line_number);
accumulate_escaped (&literal, '"');
- tp->string = mixed_string_buffer_result (&literal);
+ tp->mixed_string = mixed_string_buffer_result (&literal);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string_literal;
/* No special treatment of newline and backslash here. */
mixed_string_buffer_append_unicode (&literal, c);
}
- tp->string = mixed_string_buffer_result (&literal);
+ tp->mixed_string = mixed_string_buffer_result (&literal);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string_literal;
if (is_identifier_start (c))
{
struct mixed_string_buffer buffer;
+ mixed_string_ty *mixed_string;
+
mixed_string_buffer_init (&buffer, lexical_context,
logical_file_name, logical_line_number);
for (;;)
break;
}
phase4_ungetc (c);
- tp->string = mixed_string_buffer_result (&buffer);
+ mixed_string = mixed_string_buffer_result (&buffer);
+ tp->string = mixed_string_contents (mixed_string);
+ mixed_string_free (mixed_string);
tp->type = token_type_symbol;
return;
}
phase6_get (tp);
if (tp->type == token_type_string_literal)
{
- char *sum = tp->string;
- size_t sum_len = strlen (sum);
+ mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
phase6_get (&token_after);
if (token_after.type != token_type_dot)
{
- char *addend = token3.string;
- size_t addend_len = strlen (addend);
-
- sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
- memcpy (sum + sum_len, addend, addend_len + 1);
- sum_len += addend_len;
+ sum = mixed_string_concat_free1 (sum, token3.mixed_string);
phase6_unget (&token_after);
free_token (&token3);
phase6_unget (&token2);
break;
}
- tp->string = sum;
+ tp->mixed_string = sum;
}
}
case token_type_string_literal:
{
+ char *string;
lex_pos_ty pos;
+
+ string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
xgettext_current_source_encoding = po_charset_utf8;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
+ remember_a_message (mlp, NULL, string, inner_context,
&pos, NULL, token.comment);
else
- arglist_parser_remember (argparser, arg, token.string,
+ arglist_parser_remember (argparser, arg, string,
inner_context,
pos.file_name, pos.line_number,
token.comment);
static inline void
comment_line_end (size_t chars_to_remove)
{
- char *buffer = mixed_string_buffer_result (&comment_buffer);
+ char *buffer =
+ mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
size_t buflen = strlen (buffer);
buflen -= chars_to_remove;
struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_string_literal, token_type_symbol */
+ char *string; /* for token_type_symbol */
+ mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
};
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+ if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string_literal)
- drop_reference (tp->comment);
+ {
+ free (tp->mixed_string);
+ drop_reference (tp->comment);
+ }
}
mixed_string_buffer_init (&literal, lc_string,
logical_file_name, line_number);
accumulate_escaped (&literal, '"');
- tp->string = mixed_string_buffer_result (&literal);
+ tp->mixed_string = mixed_string_buffer_result (&literal);
tp->comment = add_reference (savable_comment);
tp->type = token_type_string_literal;
return;
phase5_get (tp);
if (tp->type == token_type_string_literal && phase6_last != token_type_rparen)
{
- char *sum = tp->string;
- size_t sum_len = strlen (sum);
+ mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
phase5_get (&token_after);
if (token_after.type != token_type_dot)
{
- char *addend = token3.string;
- size_t addend_len = strlen (addend);
-
- sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
- memcpy (sum + sum_len, addend, addend_len + 1);
- sum_len += addend_len;
+ sum = mixed_string_concat_free1 (sum, token3.mixed_string);
phase5_unget (&token_after);
free_token (&token3);
phase5_unget (&token2);
break;
}
- tp->string = sum;
+ tp->mixed_string = sum;
}
phase6_last = tp->type;
}
case token_type_string_literal:
{
+ char *string;
lex_pos_ty pos;
+
+ string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
xgettext_current_source_encoding = po_charset_utf8;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
+ remember_a_message (mlp, NULL, string, inner_context,
&pos, NULL, token.comment);
else
- arglist_parser_remember (argparser, arg, token.string,
+ arglist_parser_remember (argparser, arg, string,
inner_context,
pos.file_name, pos.line_number,
token.comment);
static inline const char *
comment_line_end (size_t chars_to_remove)
{
- char *buffer = mixed_string_buffer_result (&comment_buffer);
+ char *buffer =
+ mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
size_t buflen = strlen (buffer) - chars_to_remove;
while (buflen >= 1
struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_string, token_type_symbol,
- token_type_keyword */
+ char *string; /* for token_type_symbol, token_type_keyword */
+ mixed_string_ty *mixed_string; /* for token_type_string */
refcounted_string_list_ty *comment; /* for token_type_string */
int line_number;
};
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_string || tp->type == token_type_symbol
- || tp->type == token_type_keyword)
+ if (tp->type == token_type_symbol || tp->type == token_type_keyword)
free (tp->string);
if (tp->type == token_type_string)
- drop_reference (tp->comment);
+ {
+ mixed_string_free (tp->mixed_string);
+ drop_reference (tp->comment);
+ }
}
else
mixed_string_buffer_append_char (&msb, uc);
}
- tp->string = mixed_string_buffer_result (&msb);
+ tp->mixed_string = mixed_string_buffer_result (&msb);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = last_token_type = token_type_string;
phase5_get (tp);
if (tp->type == token_type_string)
{
- char *sum = tp->string;
- size_t sum_len = strlen (sum);
+ mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
phase5_get (&token3);
if (token3.type == token_type_string)
{
- char *addend = token3.string;
- size_t addend_len = strlen (addend);
-
- sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
- memcpy (sum + sum_len, addend, addend_len + 1);
- sum_len += addend_len;
+ sum = mixed_string_concat_free1 (sum, token3.mixed_string);
free_token (&token3);
free_token (&token2);
phase5_unget (&token2);
break;
}
- tp->string = sum;
+ tp->mixed_string = sum;
}
}
case token_type_string:
{
+ char *string;
lex_pos_ty pos;
+
+ string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
xgettext_current_source_encoding = po_charset_utf8;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
+ remember_a_message (mlp, NULL, string, inner_context,
&pos, NULL, token.comment);
else
- arglist_parser_remember (argparser, arg, token.string,
+ arglist_parser_remember (argparser, arg, string,
inner_context,
pos.file_name, pos.line_number,
token.comment);
static inline const char *
comment_line_end ()
{
- char *buffer = mixed_string_buffer_result (&comment_buffer);
+ char *buffer =
+ mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
size_t buflen = strlen (buffer);
while (buflen >= 1
struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_string, token_type_symbol */
+ char *string; /* for token_type_symbol */
+ mixed_string_ty *mixed_string; /* for token_type_string */
refcounted_string_list_ty *comment; /* for token_type_string */
int line_number;
};
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_string || tp->type == token_type_symbol)
+ if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string)
- drop_reference (tp->comment);
+ {
+ mixed_string_free (tp->mixed_string);
+ drop_reference (tp->comment);
+ }
}
else
mixed_string_buffer_append_char (&msb, uc);
}
- tp->string = mixed_string_buffer_result (&msb);
+ tp->mixed_string = mixed_string_buffer_result (&msb);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string;
phase5_get (tp);
if (tp->type == token_type_string)
{
- char *sum = tp->string;
- size_t sum_len = strlen (sum);
+ mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
- token_ty token2, *tp2 = NULL;
+ token_ty token2;
token_ty token3;
+ token_ty *tp2 = NULL;
phase5_get (&token2);
switch (token2.type)
if (tp2)
{
- char *addend = tp2->string;
- size_t addend_len = strlen (addend);
-
- sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
- memcpy (sum + sum_len, addend, addend_len + 1);
- sum_len += addend_len;
+ sum = mixed_string_concat_free1 (sum, tp2->mixed_string);
free_token (tp2);
continue;
phase5_unget (&token2);
break;
}
- tp->string = sum;
+ tp->mixed_string = sum;
}
}
case token_type_string:
{
+ char *string;
lex_pos_ty pos;
+
+ string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
xgettext_current_source_encoding = po_charset_utf8;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
+ remember_a_message (mlp, NULL, string, inner_context,
&pos, NULL, token.comment);
else
- arglist_parser_remember (argparser, arg, token.string,
+ arglist_parser_remember (argparser, arg, string,
inner_context,
pos.file_name, pos.line_number,
token.comment);
char *s1;
if (parse_string () != pr_parsed)
goto invalid_json;
- s1 = mixed_string_buffer_result (&stringbuf);
+ s1 = mixed_string_contents_free1 (
+ mixed_string_buffer_result (&stringbuf));
/* Parse a colon. */
c = phase2_getc ();
char *s2;
if (parse_string () != pr_parsed)
goto invalid_json;
- s2 = mixed_string_buffer_result (&stringbuf);
+ s2 = mixed_string_contents_free1 (
+ mixed_string_buffer_result (&stringbuf));
/* Parse a colon. */
c = phase2_getc ();
goto invalid_rsj;
if (r == pr_syntax || location != NULL)
goto invalid_json;
- location = mixed_string_buffer_result (&stringbuf);
+ location =
+ mixed_string_contents_free1 (
+ mixed_string_buffer_result (&stringbuf));
}
else if (strcmp (s2, "sourcebytes") == 0)
{
goto invalid_rsj;
if (r == pr_syntax || msgid != NULL)
goto invalid_json;
- msgid = mixed_string_buffer_result (&stringbuf);
+ msgid =
+ mixed_string_contents_free1 (
+ mixed_string_buffer_result (&stringbuf));
}
else
goto invalid_rsj;
struct token_ty
{
token_type_ty type;
- char *string; /* for token_type_symbol, token_type_string_literal */
+ char *string; /* for token_type_symbol */
+ mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
};
static inline void
free_token (token_ty *tp)
{
- if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+ if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string_literal)
- drop_reference (tp->comment);
+ {
+ mixed_string_free (tp->mixed_string);
+ drop_reference (tp->comment);
+ }
}
mixed_string_buffer_append_char (&msb, c);
}
/* Done accumulating the string. */
- tp->type = last_token_type =
- template ? token_type_string_template : token_type_string_literal;
- tp->string = mixed_string_buffer_result (&msb);
- tp->comment = add_reference (savable_comment);
+ if (template)
+ {
+ tp->type = token_type_string_template;
+ mixed_string_buffer_destroy (&msb);
+ }
+ else
+ {
+ tp->type = token_type_string_literal;
+ tp->mixed_string = mixed_string_buffer_result (&msb);
+ tp->comment = add_reference (savable_comment);
+ }
+ last_token_type = tp->type;
return;
}
phase3_get (tp);
if (tp->type == token_type_string_literal)
{
- char *sum = tp->string;
- size_t sum_len = strlen (sum);
+ mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
phase3_get (&token3);
if (token3.type == token_type_string_literal)
{
- char *addend = token3.string;
- size_t addend_len = strlen (addend);
-
- sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
- memcpy (sum + sum_len, addend, addend_len + 1);
- sum_len += addend_len;
+ sum = mixed_string_concat_free1 (sum, token3.mixed_string);
free_token (&token3);
free_token (&token2);
phase3_unget (&token2);
break;
}
- tp->string = sum;
+ tp->mixed_string = sum;
}
}
case token_type_string_literal:
{
+ char *string;
lex_pos_ty pos;
+
+ string = mixed_string_contents (token.mixed_string);
+ mixed_string_free (token.mixed_string);
+
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
xgettext_current_source_encoding = po_charset_utf8;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, inner_context,
+ remember_a_message (mlp, NULL, string, inner_context,
&pos, NULL, token.comment);
else
{
struct arglist_parser *tmp_argparser;
tmp_argparser = arglist_parser_alloc (mlp, next_shapes);
- arglist_parser_remember (tmp_argparser, 1, token.string,
+ arglist_parser_remember (tmp_argparser, 1, string,
inner_context, pos.file_name,
pos.line_number, token.comment);
arglist_parser_done (tmp_argparser, 1);
}
else
- arglist_parser_remember (argparser, arg, token.string,
+ arglist_parser_remember (argparser, arg, string,
inner_context, pos.file_name,
pos.line_number, token.comment);
}
/* Specification. */
#include "xg-mixed-string.h"
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "error.h"
#include "error-progname.h"
+#include "flexmember.h"
+#include "msgl-ascii.h"
+#include "po-charset.h"
#include "unistr.h"
#include "xalloc.h"
#define _(str) gettext (str)
+/* Allocates a single segment. */
+static inline struct mixed_string_segment *
+segment_alloc (enum segment_type type, const char *string, size_t length)
+{
+ struct mixed_string_segment *segment =
+ (struct mixed_string_segment *)
+ xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, length));
+ segment->type = type;
+ segment->length = length;
+ memcpy (segment->contents, string, length);
+ return segment;
+}
+
+/* Clones a single segment. */
+static inline struct mixed_string_segment *
+segment_clone (const struct mixed_string_segment *segment)
+{
+ return segment_alloc (segment->type, segment->contents, segment->length);
+}
+
+mixed_string_ty *
+mixed_string_alloc_simple (const char *string,
+ lexical_context_ty lcontext,
+ const char *logical_file_name,
+ int line_number)
+{
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+
+ if (*string == '\0')
+ {
+ /* An empty string. */
+ ms->segments = NULL;
+ ms->nsegments = 0;
+ }
+ else
+ {
+ ms->segments = XNMALLOC (1, struct mixed_string_segment *);
+ if ((xgettext_current_source_encoding == po_charset_ascii
+ || xgettext_current_source_encoding == po_charset_utf8)
+ && is_ascii_string (string))
+ /* An optimization. */
+ ms->segments[0] =
+ segment_alloc (utf8_encoded, string, strlen (string));
+ else
+ /* The general case. */
+ ms->segments[0] =
+ segment_alloc (source_encoded, string, strlen (string));
+ ms->nsegments = 1;
+ }
+ ms->lcontext = lcontext;
+ ms->logical_file_name = logical_file_name;
+ ms->line_number = line_number;
+
+ return ms;
+}
+
+mixed_string_ty *
+mixed_string_alloc_utf8 (const char *string,
+ lexical_context_ty lcontext,
+ const char *logical_file_name,
+ int line_number)
+{
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+
+ if (*string == '\0')
+ {
+ /* An empty string. */
+ ms->segments = NULL;
+ ms->nsegments = 0;
+ }
+ else
+ {
+ ms->segments = XNMALLOC (1, struct mixed_string_segment *);
+ ms->segments[0] = segment_alloc (utf8_encoded, string, strlen (string));
+ ms->nsegments = 1;
+ }
+ ms->lcontext = lcontext;
+ ms->logical_file_name = logical_file_name;
+ ms->line_number = line_number;
+
+ return ms;
+}
+
+mixed_string_ty *
+mixed_string_clone (const mixed_string_ty *ms1)
+{
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+ size_t nsegments = ms1->nsegments;
+
+ if (nsegments == 0)
+ {
+ ms->segments = NULL;
+ ms->nsegments = 0;
+ }
+ else
+ {
+ size_t i;
+
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ for (i = 0; i < nsegments; i++)
+ ms->segments[i] = segment_clone (ms1->segments[i]);
+ ms->nsegments = nsegments;
+ }
+ ms->lcontext = ms1->lcontext;
+ ms->logical_file_name = ms1->logical_file_name;
+ ms->line_number = ms1->line_number;
+
+ return ms;
+}
+
+char *
+mixed_string_contents (const mixed_string_ty *ms)
+{
+ size_t nsegments = ms->nsegments;
+ /* Trivial cases. */
+ if (nsegments == 0)
+ return xstrdup ("");
+ if (nsegments == 1 && ms->segments[0]->type == utf8_encoded)
+ {
+ /* Return the segment, with a NUL at the end. */
+ size_t len = ms->segments[0]->length;
+ char *string = XNMALLOC (len + 1, char);
+ memcpy (string, ms->segments[0]->contents, len);
+ string[len] = '\0';
+ return string;
+ }
+ /* General case. */
+ {
+ size_t i;
+
+ for (i = 0; i < nsegments - 1; i++)
+ if (memchr (ms->segments[i]->contents, '\0', ms->segments[i]->length)
+ != NULL)
+ {
+ /* Segment i contains a NUL character. Ignore the remaining
+ segments. */
+ nsegments = i + 1;
+ break;
+ }
+ }
+ {
+ char **converted_segments = XNMALLOC (nsegments, char *);
+ size_t length;
+
+ length = 0;
+ {
+ size_t i;
+
+ for (i = 0; i < nsegments; i++)
+ if (ms->segments[i]->type == source_encoded)
+ {
+ char *source_encoded_string;
+ char *utf8_encoded_string;
+
+ /* Copy the segment's contents, with a NUL at the end. */
+ {
+ size_t len = ms->segments[i]->length;
+ source_encoded_string = XNMALLOC (len + 1, char);
+ memcpy (source_encoded_string, ms->segments[i]->contents, len);
+ source_encoded_string[len] = '\0';
+ }
+ /* Convert it to UTF-8 encoding. */
+ utf8_encoded_string =
+ from_current_source_encoding (source_encoded_string,
+ ms->lcontext,
+ ms->logical_file_name,
+ ms->line_number);
+ if (utf8_encoded_string != source_encoded_string)
+ free (source_encoded_string);
+ converted_segments[i] = utf8_encoded_string;
+ length += strlen (utf8_encoded_string);
+ }
+ else
+ length += ms->segments[i]->length;
+ }
+
+ {
+ char *string = XNMALLOC (length + 1, char);
+ {
+ char *p;
+ size_t i;
+
+ p = string;
+ for (i = 0; i < nsegments; i++)
+ if (ms->segments[i]->type == source_encoded)
+ {
+ p = stpcpy (p, converted_segments[i]);
+ free (converted_segments[i]);
+ }
+ else
+ {
+ memcpy (p, ms->segments[i]->contents, ms->segments[i]->length);
+ p += ms->segments[i]->length;
+ }
+ assert (p == string + length);
+ *p = '\0';
+ }
+
+ free (converted_segments);
+ return string;
+ }
+ }
+}
+
+void
+mixed_string_free (mixed_string_ty *ms)
+{
+ struct mixed_string_segment **segments = ms->segments;
+ size_t nsegments = ms->nsegments;
+ if (nsegments > 0)
+ {
+ size_t i;
+ for (i = 0; i < nsegments; i++)
+ free (segments[i]);
+ }
+ free (segments);
+ free (ms);
+}
+
+char *
+mixed_string_contents_free1 (mixed_string_ty *ms)
+{
+ char *contents = mixed_string_contents (ms);
+ mixed_string_free (ms);
+ return contents;
+}
+
+mixed_string_ty *
+mixed_string_concat (const mixed_string_ty *ms1,
+ const mixed_string_ty *ms2)
+{
+ /* Trivial cases. */
+ if (ms2->nsegments == 0)
+ return mixed_string_clone (ms1);
+ if (ms1->nsegments == 0)
+ return mixed_string_clone (ms2);
+ /* General case. */
+ {
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+ size_t nsegments = ms1->nsegments + ms2->nsegments;
+ size_t j;
+ if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
+ {
+ /* Combine the last segment of ms1 with the first segment of ms2. */
+ size_t i;
+
+ nsegments -= 1;
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ j = 0;
+ for (i = 0; i < ms1->nsegments - 1; i++)
+ ms->segments[j++] = segment_clone (ms1->segments[i]);
+ {
+ size_t len1 = ms1->segments[i]->length;
+ size_t len2 = ms2->segments[0]->length;
+ struct mixed_string_segment *newseg =
+ (struct mixed_string_segment *)
+ xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
+ len1 + len2));
+ newseg->type = ms2->segments[0]->type;
+ newseg->length = len1 + len2;
+ memcpy (newseg->contents, ms1->segments[i]->contents, len1);
+ memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
+ ms->segments[j++] = newseg;
+ }
+ for (i = 1; i < ms2->nsegments; i++)
+ ms->segments[j++] = segment_clone (ms2->segments[i]);
+ }
+ else
+ {
+ size_t i;
+
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ j = 0;
+ for (i = 0; i < ms1->nsegments; i++)
+ ms->segments[j++] = segment_clone (ms1->segments[i]);
+ for (i = 0; i < ms2->nsegments; i++)
+ ms->segments[j++] = segment_clone (ms2->segments[i]);
+ }
+ assert (j == nsegments);
+ ms->nsegments = nsegments;
+ ms->lcontext = ms1->lcontext;
+ ms->logical_file_name = ms1->logical_file_name;
+ ms->line_number = ms1->line_number;
+
+ return ms;
+ }
+}
+
+mixed_string_ty *
+mixed_string_concat_free1 (mixed_string_ty *ms1, const mixed_string_ty *ms2)
+{
+ /* Trivial cases. */
+ if (ms2->nsegments == 0)
+ return ms1;
+ if (ms1->nsegments == 0)
+ {
+ mixed_string_free (ms1);
+ return mixed_string_clone (ms2);
+ }
+ /* General case. */
+ {
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+ size_t nsegments = ms1->nsegments + ms2->nsegments;
+ size_t j;
+ if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
+ {
+ /* Combine the last segment of ms1 with the first segment of ms2. */
+ size_t i;
+
+ nsegments -= 1;
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ j = 0;
+ for (i = 0; i < ms1->nsegments - 1; i++)
+ ms->segments[j++] = ms1->segments[i];
+ {
+ size_t len1 = ms1->segments[i]->length;
+ size_t len2 = ms2->segments[0]->length;
+ struct mixed_string_segment *newseg =
+ (struct mixed_string_segment *)
+ xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
+ len1 + len2));
+ newseg->type = ms2->segments[0]->type;
+ newseg->length = len1 + len2;
+ memcpy (newseg->contents, ms1->segments[i]->contents, len1);
+ memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
+ ms->segments[j++] = newseg;
+ }
+ free (ms1->segments[i]);
+ for (i = 1; i < ms2->nsegments; i++)
+ ms->segments[j++] = segment_clone (ms2->segments[i]);
+ }
+ else
+ {
+ size_t i;
+
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ j = 0;
+ for (i = 0; i < ms1->nsegments; i++)
+ ms->segments[j++] = ms1->segments[i];
+ for (i = 0; i < ms2->nsegments; i++)
+ ms->segments[j++] = segment_clone (ms2->segments[i]);
+ }
+ assert (j == nsegments);
+ free (ms1->segments);
+ ms->nsegments = nsegments;
+ ms->lcontext = ms1->lcontext;
+ ms->logical_file_name = ms1->logical_file_name;
+ ms->line_number = ms1->line_number;
+ free (ms1);
+
+ return ms;
+ }
+}
+
+
void
mixed_string_buffer_init (struct mixed_string_buffer *bp,
lexical_context_ty lcontext,
const char *logical_file_name,
int line_number)
{
- bp->utf8_buffer = NULL;
- bp->utf8_buflen = 0;
- bp->utf8_allocated = 0;
- bp->utf16_surr = 0;
+ bp->segments = NULL;
+ bp->nsegments = 0;
+ bp->nsegments_allocated = 0;
+ bp->curr_type = -1;
bp->curr_buffer = NULL;
bp->curr_buflen = 0;
bp->curr_allocated = 0;
+ bp->utf16_surr = 0;
bp->lcontext = lcontext;
bp->logical_file_name = logical_file_name;
bp->line_number = line_number;
bool
mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp)
{
- return (bp->utf8_buflen == 0 && bp->utf16_surr == 0 && bp->curr_buflen == 0);
+ return (bp->nsegments == 0 && bp->curr_buflen == 0);
+}
+
+/* Auxiliary function: Ensure count more bytes are available in
+ bp->curr_buffer. */
+static inline void
+mixed_string_buffer_grow_curr_buffer (struct mixed_string_buffer *bp,
+ size_t count)
+{
+ if (bp->curr_buflen + count > bp->curr_allocated)
+ {
+ size_t new_allocated = 2 * bp->curr_allocated + 10;
+ if (new_allocated < bp->curr_buflen + count)
+ new_allocated = bp->curr_buflen + count;
+ bp->curr_allocated = new_allocated;
+ bp->curr_buffer = xrealloc (bp->curr_buffer, new_allocated);
+ }
}
/* Auxiliary function: Append a byte to bp->curr. */
bp->curr_buffer[bp->curr_buflen++] = c;
}
-/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
-static inline void
-mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp,
- size_t count)
-{
- if (bp->utf8_buflen + count > bp->utf8_allocated)
- {
- size_t new_allocated = 2 * bp->utf8_allocated + 10;
- if (new_allocated < bp->utf8_buflen + count)
- new_allocated = bp->utf8_buflen + count;
- bp->utf8_allocated = new_allocated;
- bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
- }
-}
-
-/* Auxiliary function: Append a Unicode character to bp->utf8.
- uc must be < 0x110000. */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, append a
+ Unicode character to bp->curr_buffer. uc must be < 0x110000. */
static inline void
mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
ucs4_t uc)
/* The caller should have ensured that uc is not out-of-range. */
abort ();
- mixed_string_buffer_grow_utf8_buffer (bp, count);
- memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
- bp->utf8_buflen += count;
+ mixed_string_buffer_grow_curr_buffer (bp, count);
+ memcpy (bp->curr_buffer + bp->curr_buflen, utf8buf, count);
+ bp->curr_buflen += count;
}
-/* Auxiliary function: Handle the attempt to append a lone surrogate to
- bp->utf8. */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, handle the
+ attempt to append a lone surrogate to bp->curr_buffer. */
static void
mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp,
ucs4_t uc)
mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
}
-/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */
+/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, flush
+ bp->utf16_surr into bp->curr_buffer. */
static inline void
mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
{
}
}
-/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */
+/* Auxiliary function: Append a segment to bp->segments. */
static inline void
-mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
- int line_number)
+mixed_string_buffer_add_segment (struct mixed_string_buffer *bp,
+ struct mixed_string_segment *newseg)
{
- if (bp->curr_buflen > 0)
+ if (bp->nsegments == bp->nsegments_allocated)
{
- char *curr;
- size_t count;
-
- mixed_string_buffer_append_to_curr_buffer (bp, '\0');
-
- /* Convert from the source encoding to UTF-8. */
- curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
- bp->logical_file_name,
- line_number);
-
- /* Append it to bp->utf8_buffer. */
- count = strlen (curr);
- mixed_string_buffer_grow_utf8_buffer (bp, count);
- memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
- bp->utf8_buflen += count;
+ size_t new_allocated =
+ bp->nsegments_allocated = 2 * bp->nsegments_allocated + 1;
+ bp->segments =
+ (struct mixed_string_segment **)
+ xrealloc (bp->segments,
+ new_allocated * sizeof (struct mixed_string_segment *));
+ }
+ bp->segments[bp->nsegments++] = newseg;
+}
- if (curr != bp->curr_buffer)
- free (curr);
+/* Auxiliary function: Flush bp->curr_buffer and bp->utf16_surr into
+ bp->segments. */
+static void
+mixed_string_buffer_flush_curr (struct mixed_string_buffer *bp)
+{
+ if (bp->curr_type == utf8_encoded)
+ mixed_string_buffer_flush_utf16_surr (bp);
+ if (bp->curr_type != -1)
+ {
+ if (bp->curr_buflen > 0)
+ {
+ struct mixed_string_segment *segment =
+ segment_alloc (bp->curr_type, bp->curr_buffer, bp->curr_buflen);
+ mixed_string_buffer_add_segment (bp, segment);
+ }
bp->curr_buflen = 0;
}
}
void
mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
{
- /* Switch from Unicode character mode to multibyte character mode. */
- mixed_string_buffer_flush_utf16_surr (bp);
-
- /* When a newline is seen, convert the accumulated multibyte sequence.
- This ensures a correct line number in the error message in case of
- a conversion error. The "- 1" is to account for the newline. */
- if (c == '\n')
- mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1);
+ /* Switch to multibyte character mode. */
+ if (bp->curr_type != source_encoded)
+ {
+ mixed_string_buffer_flush_curr (bp);
+ bp->curr_type = source_encoded;
+ }
- mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
+ mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
}
void
mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
{
- /* Switch from multibyte character mode to Unicode character mode. */
- mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
+ /* Switch to Unicode character mode. */
+ if (bp->curr_type != utf8_encoded)
+ {
+ mixed_string_buffer_flush_curr (bp);
+ bp->curr_type = utf8_encoded;
+ assert (bp->utf16_surr == 0);
+ }
/* Test whether this character and the previous one form a Unicode
surrogate character pair. */
void
mixed_string_buffer_destroy (struct mixed_string_buffer *bp)
{
- free (bp->utf8_buffer);
+ struct mixed_string_segment **segments = bp->segments;
+ size_t nsegments = bp->nsegments;
+ if (nsegments > 0)
+ {
+ size_t i;
+ for (i = 0; i < nsegments; i++)
+ free (segments[i]);
+ }
+ free (segments);
free (bp->curr_buffer);
}
-char *
+mixed_string_ty *
mixed_string_buffer_result (struct mixed_string_buffer *bp)
{
- char *utf8_buffer;
-
- /* Flush all into bp->utf8_buffer. */
- mixed_string_buffer_flush_utf16_surr (bp);
- mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
- /* NUL-terminate it. */
- mixed_string_buffer_grow_utf8_buffer (bp, 1);
- bp->utf8_buffer[bp->utf8_buflen] = '\0';
-
- /* Free curr_buffer. */
- utf8_buffer = bp->utf8_buffer;
- free (bp->curr_buffer);
-
- /* Return it. */
- return utf8_buffer;
+ mixed_string_buffer_flush_curr (bp);
+
+ {
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+ size_t nsegments = bp->nsegments;
+
+ if (nsegments > 0)
+ ms->segments =
+ (struct mixed_string_segment **)
+ xrealloc (bp->segments,
+ nsegments * sizeof (struct mixed_string_segment *));
+ else
+ {
+ assert (bp->segments == NULL);
+ ms->segments = NULL;
+ }
+ ms->nsegments = nsegments;
+ ms->lcontext = bp->lcontext;
+ ms->logical_file_name = bp->logical_file_name;
+ ms->line_number = bp->line_number;
+
+ free (bp->curr_buffer);
+
+ return ms;
+ }
}
#endif
+/* A string that contains segments in the xgettext_current_source_encoding
+ and segments in UTF-8, in an alternating way. */
+
+enum segment_type
+{
+ source_encoded,
+ utf8_encoded
+};
+
+struct mixed_string_segment
+{
+ /*enum segment_type*/ unsigned char type;
+ size_t length;
+ char contents[FLEXIBLE_ARRAY_MEMBER];
+};
+
+typedef struct mixed_string mixed_string_ty;
+struct mixed_string
+{
+ /* The alternating segments. */
+ struct mixed_string_segment **segments;
+ size_t nsegments;
+ /* The lexical context. Used only for error message purposes. */
+ lexical_context_ty lcontext;
+ const char *logical_file_name;
+ int line_number;
+};
+
+/* Creates a mixed_string that contains just a string in the
+ xgettext_current_source_encoding. */
+extern mixed_string_ty *
+ mixed_string_alloc_simple (const char *string,
+ lexical_context_ty lcontext,
+ const char *logical_file_name,
+ int line_number);
+
+/* Creates a mixed_string that contains just a UTF-8 string. */
+extern mixed_string_ty *
+ mixed_string_alloc_utf8 (const char *string,
+ lexical_context_ty lcontext,
+ const char *logical_file_name,
+ int line_number);
+
+/* Creates a copy of a mixed_string. */
+extern mixed_string_ty *
+ mixed_string_clone (const mixed_string_ty *ms1);
+
+/* Returns the contents of a mixed_string as an UTF-8 encoded string.
+ This may provoke an error if no source encoding has been specified
+ through --from-code. The result is freshly allocated. */
+extern char *
+ mixed_string_contents (const mixed_string_ty *ms);
+
+/* Frees a mixed_string. */
+extern void
+ mixed_string_free (mixed_string_ty *ms);
+
+/* Returns the contents of a mixed_string as an UTF-8 encoded string,
+ and frees the argument. */
+extern char *
+ mixed_string_contents_free1 (mixed_string_ty *ms);
+
+/* Concatenates two mixed_strings. */
+extern mixed_string_ty *
+ mixed_string_concat (const mixed_string_ty *ms1,
+ const mixed_string_ty *ms2);
+/* Concatenates two mixed_strings, and frees the first argument. */
+extern mixed_string_ty *
+ mixed_string_concat_free1 (mixed_string_ty *ms1,
+ const mixed_string_ty *ms2);
+
+
/* A string buffer type that allows appending bytes (in the
xgettext_current_source_encoding) or Unicode characters.
- Returns the entire string in UTF-8 encoding. */
+ When done, it returns the entire string as a mixed_string. */
struct mixed_string_buffer
{
- /* The part of the string that has already been converted to UTF-8. */
- char *utf8_buffer;
- size_t utf8_buflen;
- size_t utf8_allocated;
- /* The first half of an UTF-16 surrogate character. */
- unsigned short utf16_surr;
- /* The part of the string that is still in the source encoding. */
+ /* The alternating segments that are already finished. */
+ struct mixed_string_segment **segments;
+ size_t nsegments;
+ size_t nsegments_allocated;
+ /* The segment that is being accumulated. */
+ int curr_type; /* An enum segment_type, or -1. */
char *curr_buffer;
size_t curr_buflen;
size_t curr_allocated;
+ /* The first half of an UTF-16 surrogate character. */
+ unsigned short utf16_surr;
/* The lexical context. Used only for error message purposes. */
lexical_context_ty lcontext;
const char *logical_file_name;
int line_number);
/* Determines whether a mixed_string_buffer is still empty. */
-extern bool mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp);
+extern bool
+ mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp);
/* Appends a character to a mixed_string_buffer. */
-extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
- int c);
+extern void
+ mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c);
/* Appends a Unicode character to a mixed_string_buffer. */
-extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
- int c);
+extern void
+ mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
+ int c);
-/* Frees the memory pointed to by a 'struct mixed_string_buffer'. */
-extern void mixed_string_buffer_destroy (struct mixed_string_buffer *bp);
+/* Frees the memory pointed to by a 'struct mixed_string_buffer' and
+ discards the accumulated string. */
+extern void
+ mixed_string_buffer_destroy (struct mixed_string_buffer *bp);
/* Frees the memory pointed to by a 'struct mixed_string_buffer'
- and returns the accumulated string in UTF-8. */
-extern char * mixed_string_buffer_result (struct mixed_string_buffer *bp);
+ and returns the accumulated string. */
+extern mixed_string_ty *
+ mixed_string_buffer_result (struct mixed_string_buffer *bp);
#ifdef __cplusplus