From: Bruno Haible Date: Sun, 4 Nov 2018 19:23:31 +0000 (+0100) Subject: xgettext: Delay calls to from_current_source_encoding, part 1. X-Git-Tag: v0.20~259 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e91cc1c908a917ec5eefbf7eb110d9714a0b6211;p=thirdparty%2Fgettext.git xgettext: Delay calls to from_current_source_encoding, part 1. * xg-mixed-string.h (enum segment_type, struct mixed_string_segment, struct mixed_string, mixed_string_ty): New types. (mixed_string_alloc_simple, mixed_string_alloc_utf8, mixed_string_clone, mixed_string_contents, mixed_string_free, mixed_string_contents_free1, mixed_string_concat, mixed_string_concat_free1): New declarations. (struct mixed_string_buffer): Change internal representation to use segments. (mixed_string_buffer_result): Change return type to 'mixed_string_ty *'. * xg-mixed-string.c: Include flexmember.h, msgl-ascii.h, po-charset.h. (segment_alloc, segment_clone, mixed_string_alloc_simple, mixed_string_alloc_utf8, mixed_string_clone, mixed_string_contents, mixed_string_free, mixed_string_contents_free1, mixed_string_concat, mixed_string_concat_free1): New functions. (mixed_string_buffer_init, mixed_string_buffer_is_empty): Change to match new internal representation. (mixed_string_buffer_grow_curr_buffer): New function. (mixed_string_buffer_grow_utf8_buffer): Remove function. (mixed_string_buffer_append_to_utf8_buffer): Update accordingly. (mixed_string_buffer_flush_curr_buffer): Remove function. (mixed_string_buffer_add_segment, mixed_string_buffer_flush_curr): New functions. (mixed_string_buffer_append_char, mixed_string_buffer_append_unicode, mixed_string_buffer_destroy, mixed_string_buffer_result): Change to match new internal representation. * x-c.c (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase5_get, phase6_get, phase8a_get): For tokens of type token_type_string_literal, use a mixed_string. (phase8_get): Use mixed_string_concat_free1. (struct xgettext_token_ty): New field 'mixed_string'. (x_c_lex, extract_parenthesized): For tokens of type xgettext_token_type_string_literal, use a mixed_string. * x-csharp.c (comment_line_end): Update. (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase6_get): For tokens of type token_type_string_literal, use a mixed_string. (phase7_get): Use mixed_string_concat_free1. (extract_parenthesized): For tokens of type token_type_string_literal, use a mixed_string. * x-java.c (comment_line_end): Update. (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase5_get): For tokens of type token_type_string_literal, use a mixed_string. (phase6_get): Use mixed_string_concat_free1. (extract_parenthesized): For tokens of type token_type_string_literal, use a mixed_string. * x-javascript.c (comment_line_end): Update. (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase5_get): For tokens of type token_type_string, use a mixed_string. (x_javascript_lex): Use mixed_string_concat_free1. (extract_balanced): For tokens of type token_type_string, use a mixed_string. * x-python.c (comment_line_end): Update. (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase5_get): For tokens of type token_type_string, use a mixed_string. (x_python_lex): Use mixed_string_concat_free1. (extract_balanced): For tokens of type token_type_string, use a mixed_string. * x-rst.c (extract_rsj): Update. * x-vala.c (struct token_ty): New field 'mixed_string'. (free_token): Update accordingly. (phase3_get): For tokens of type token_type_string_literal, use a mixed_string. For tokens of type token_type_string_template, fix a memory leak. (x_vala_lex): Use mixed_string_concat_free1. (extract_balanced): For tokens of type token_type_string_literal, use a mixed_string. --- diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c index fe1c6ebe6..a8a0b84fa 100644 --- a/gettext-tools/src/x-c.c +++ b/gettext-tools/src/x-c.c @@ -952,7 +952,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_name, token_type_string_literal */ + char *string; /* for token_type_name */ + mixed_string_ty *mixed_string; /* for token_type_string_literal */ refcounted_string_list_ty *comment; /* for token_type_string_literal, token_type_objc_special */ long number; @@ -1175,8 +1176,10 @@ phase7_ungetc (int c) static inline void free_token (token_ty *tp) { - if (tp->type == token_type_name || tp->type == token_type_string_literal) + if (tp->type == token_type_name) free (tp->string); + if (tp->type == token_type_string_literal) + mixed_string_free (tp->mixed_string); if (tp->type == token_type_string_literal || tp->type == token_type_objc_special) drop_reference (tp->comment); @@ -1396,7 +1399,7 @@ phase5_get (token_ty *tp) if (relevant) { tp->type = token_type_string_literal; - tp->string = mixed_string_buffer_result (&msb); + tp->mixed_string = mixed_string_buffer_result (&msb); tp->comment = add_reference (savable_comment); } else @@ -1671,7 +1674,7 @@ phase5_get (token_ty *tp) mixed_string_buffer_append_char (&msb, c); } tp->type = token_type_string_literal; - tp->string = mixed_string_buffer_result (&msb); + tp->mixed_string = mixed_string_buffer_result (&msb); tp->comment = add_reference (savable_comment); return; } @@ -1838,13 +1841,13 @@ phase6_get (token_ty *tp) && buf[1].type == token_type_number && buf[2].type == token_type_string_literal) { - logical_file_name = xstrdup (buf[2].string); + logical_file_name = mixed_string_contents (buf[2].mixed_string); line_number = buf[1].number; } if (bufpos >= 2 && buf[0].type == token_type_number && buf[1].type == token_type_string_literal) { - logical_file_name = xstrdup (buf[1].string); + logical_file_name = mixed_string_contents (buf[1].mixed_string); line_number = buf[0].number; } @@ -1922,7 +1925,9 @@ phase8a_get (token_ty *tp) /* Turn PRIdXXX into "". */ char *new_string = xasprintf ("<%s>", tp->string); free (tp->string); - tp->string = new_string; + tp->mixed_string = + mixed_string_alloc_utf8 (new_string, lc_string, + logical_file_name, line_number); tp->comment = add_reference (savable_comment); tp->type = token_type_string_literal; } @@ -2016,7 +2021,6 @@ phase8_get (token_ty *tp) for (;;) { token_ty tmp; - size_t len; phase8c_get (&tmp); if (tmp.type != token_type_string_literal) @@ -2024,9 +2028,8 @@ phase8_get (token_ty *tp) phase8c_unget (&tmp); return; } - len = strlen (tp->string); - tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1); - strcpy (tp->string + len, tmp.string); + tp->mixed_string = + mixed_string_concat_free1 (tp->mixed_string, tmp.mixed_string); free_token (&tmp); } } @@ -2057,16 +2060,18 @@ struct xgettext_token_ty /* This field is used only for xgettext_token_type_keyword. */ const struct callshapes *shapes; - /* This field is used only for xgettext_token_type_string_literal, - xgettext_token_type_keyword, xgettext_token_type_symbol. */ + /* This field is used only for xgettext_token_type_keyword, + xgettext_token_type_symbol. */ char *string; + /* This field is used only for xgettext_token_type_string_literal. */ + mixed_string_ty *mixed_string; + /* This field is used only for xgettext_token_type_string_literal. */ refcounted_string_list_ty *comment; - /* These fields are only for - xgettext_token_type_keyword, - xgettext_token_type_string_literal. */ + /* This field is used only for xgettext_token_type_keyword, + xgettext_token_type_string_literal. */ lex_pos_ty pos; }; @@ -2135,7 +2140,7 @@ x_c_lex (xgettext_token_ty *tp) last_non_comment_line = newline_count; tp->type = xgettext_token_type_string_literal; - tp->string = token.string; + tp->mixed_string = token.mixed_string; tp->comment = token.comment; tp->pos.file_name = logical_file_name; tp->pos.line_number = token.line_number; @@ -2295,17 +2300,22 @@ extract_parenthesized (message_list_ty *mlp, continue; case xgettext_token_type_string_literal: - xgettext_current_source_encoding = po_charset_utf8; - if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, - &token.pos, NULL, token.comment); - else - arglist_parser_remember (argparser, arg, token.string, - inner_context, - token.pos.file_name, token.pos.line_number, - token.comment); - xgettext_current_source_encoding = xgettext_global_source_encoding; - drop_reference (token.comment); + { + char *string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + xgettext_current_source_encoding = po_charset_utf8; + if (extract_all) + remember_a_message (mlp, NULL, string, inner_context, + &token.pos, NULL, token.comment); + else + arglist_parser_remember (argparser, arg, string, + inner_context, + token.pos.file_name, + token.pos.line_number, + token.comment); + xgettext_current_source_encoding = xgettext_global_source_encoding; + drop_reference (token.comment); + } next_context_iter = null_context_list_iterator; selectorcall_context_iter = null_context_list_iterator; state = 0; diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c index 61c101140..c5a8e5ff6 100644 --- a/gettext-tools/src/x-csharp.c +++ b/gettext-tools/src/x-csharp.c @@ -550,7 +550,8 @@ comment_add (int c) static inline void comment_line_end (size_t chars_to_remove) { - char *buffer = mixed_string_buffer_result (&comment_buffer); + char *buffer = + mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer)); size_t buflen = strlen (buffer); buflen -= chars_to_remove; @@ -1258,7 +1259,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_string_literal, token_type_symbol */ + char *string; /* for token_type_symbol */ + mixed_string_ty *mixed_string; /* for token_type_string_literal */ refcounted_string_list_ty *comment; /* for token_type_string_literal */ int line_number; int logical_line_number; @@ -1269,10 +1271,13 @@ struct token_ty static inline void free_token (token_ty *tp) { - if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + if (tp->type == token_type_symbol) free (tp->string); if (tp->type == token_type_string_literal) - drop_reference (tp->comment); + { + mixed_string_free (tp->mixed_string); + drop_reference (tp->comment); + } } @@ -1581,7 +1586,7 @@ phase6_get (token_ty *tp) logical_file_name, logical_line_number); accumulate_escaped (&literal, '"'); - tp->string = mixed_string_buffer_result (&literal); + tp->mixed_string = mixed_string_buffer_result (&literal); tp->comment = add_reference (savable_comment); lexical_context = lc_outside; tp->type = token_type_string_literal; @@ -1641,7 +1646,7 @@ phase6_get (token_ty *tp) /* No special treatment of newline and backslash here. */ mixed_string_buffer_append_unicode (&literal, c); } - tp->string = mixed_string_buffer_result (&literal); + tp->mixed_string = mixed_string_buffer_result (&literal); tp->comment = add_reference (savable_comment); lexical_context = lc_outside; tp->type = token_type_string_literal; @@ -1655,6 +1660,8 @@ phase6_get (token_ty *tp) if (is_identifier_start (c)) { struct mixed_string_buffer buffer; + mixed_string_ty *mixed_string; + mixed_string_buffer_init (&buffer, lexical_context, logical_file_name, logical_line_number); for (;;) @@ -1667,7 +1674,9 @@ phase6_get (token_ty *tp) break; } phase4_ungetc (c); - tp->string = mixed_string_buffer_result (&buffer); + mixed_string = mixed_string_buffer_result (&buffer); + tp->string = mixed_string_contents (mixed_string); + mixed_string_free (mixed_string); tp->type = token_type_symbol; return; } @@ -1714,8 +1723,7 @@ phase7_get (token_ty *tp) phase6_get (tp); if (tp->type == token_type_string_literal) { - char *sum = tp->string; - size_t sum_len = strlen (sum); + mixed_string_ty *sum = tp->mixed_string; for (;;) { @@ -1734,12 +1742,7 @@ phase7_get (token_ty *tp) phase6_get (&token_after); if (token_after.type != token_type_dot) { - char *addend = token3.string; - size_t addend_len = strlen (addend); - - sum = (char *) xrealloc (sum, sum_len + addend_len + 1); - memcpy (sum + sum_len, addend, addend_len + 1); - sum_len += addend_len; + sum = mixed_string_concat_free1 (sum, token3.mixed_string); phase6_unget (&token_after); free_token (&token3); @@ -1753,7 +1756,7 @@ phase7_get (token_ty *tp) phase6_unget (&token2); break; } - tp->string = sum; + tp->mixed_string = sum; } } @@ -2006,16 +2009,21 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, case token_type_string_literal: { + char *string; lex_pos_ty pos; + + string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + pos.file_name = logical_file_name; pos.line_number = token.line_number; xgettext_current_source_encoding = po_charset_utf8; if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, + remember_a_message (mlp, NULL, string, inner_context, &pos, NULL, token.comment); else - arglist_parser_remember (argparser, arg, token.string, + arglist_parser_remember (argparser, arg, string, inner_context, pos.file_name, pos.line_number, token.comment); diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c index 868c11d10..8e8477d50 100644 --- a/gettext-tools/src/x-java.c +++ b/gettext-tools/src/x-java.c @@ -434,7 +434,8 @@ comment_add (int c) static inline void comment_line_end (size_t chars_to_remove) { - char *buffer = mixed_string_buffer_result (&comment_buffer); + char *buffer = + mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer)); size_t buflen = strlen (buffer); buflen -= chars_to_remove; @@ -564,7 +565,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_string_literal, token_type_symbol */ + char *string; /* for token_type_symbol */ + mixed_string_ty *mixed_string; /* for token_type_string_literal */ refcounted_string_list_ty *comment; /* for token_type_string_literal */ int line_number; }; @@ -574,10 +576,13 @@ struct token_ty static inline void free_token (token_ty *tp) { - if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + if (tp->type == token_type_symbol) free (tp->string); if (tp->type == token_type_string_literal) - drop_reference (tp->comment); + { + free (tp->mixed_string); + drop_reference (tp->comment); + } } @@ -836,7 +841,7 @@ phase5_get (token_ty *tp) mixed_string_buffer_init (&literal, lc_string, logical_file_name, line_number); accumulate_escaped (&literal, '"'); - tp->string = mixed_string_buffer_result (&literal); + tp->mixed_string = mixed_string_buffer_result (&literal); tp->comment = add_reference (savable_comment); tp->type = token_type_string_literal; return; @@ -916,8 +921,7 @@ phase6_get (token_ty *tp) phase5_get (tp); if (tp->type == token_type_string_literal && phase6_last != token_type_rparen) { - char *sum = tp->string; - size_t sum_len = strlen (sum); + mixed_string_ty *sum = tp->mixed_string; for (;;) { @@ -936,12 +940,7 @@ phase6_get (token_ty *tp) phase5_get (&token_after); if (token_after.type != token_type_dot) { - char *addend = token3.string; - size_t addend_len = strlen (addend); - - sum = (char *) xrealloc (sum, sum_len + addend_len + 1); - memcpy (sum + sum_len, addend, addend_len + 1); - sum_len += addend_len; + sum = mixed_string_concat_free1 (sum, token3.mixed_string); phase5_unget (&token_after); free_token (&token3); @@ -955,7 +954,7 @@ phase6_get (token_ty *tp) phase5_unget (&token2); break; } - tp->string = sum; + tp->mixed_string = sum; } phase6_last = tp->type; } @@ -1209,16 +1208,21 @@ extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, case token_type_string_literal: { + char *string; lex_pos_ty pos; + + string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + pos.file_name = logical_file_name; pos.line_number = token.line_number; xgettext_current_source_encoding = po_charset_utf8; if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, + remember_a_message (mlp, NULL, string, inner_context, &pos, NULL, token.comment); else - arglist_parser_remember (argparser, arg, token.string, + arglist_parser_remember (argparser, arg, string, inner_context, pos.file_name, pos.line_number, token.comment); diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c index 2879837c0..c2599dafe 100644 --- a/gettext-tools/src/x-javascript.c +++ b/gettext-tools/src/x-javascript.c @@ -479,7 +479,8 @@ comment_add (int c) static inline const char * comment_line_end (size_t chars_to_remove) { - char *buffer = mixed_string_buffer_result (&comment_buffer); + char *buffer = + mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer)); size_t buflen = strlen (buffer) - chars_to_remove; while (buflen >= 1 @@ -673,8 +674,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_string, token_type_symbol, - token_type_keyword */ + char *string; /* for token_type_symbol, token_type_keyword */ + mixed_string_ty *mixed_string; /* for token_type_string */ refcounted_string_list_ty *comment; /* for token_type_string */ int line_number; }; @@ -684,11 +685,13 @@ struct token_ty static inline void free_token (token_ty *tp) { - if (tp->type == token_type_string || tp->type == token_type_symbol - || tp->type == token_type_keyword) + if (tp->type == token_type_symbol || tp->type == token_type_keyword) free (tp->string); if (tp->type == token_type_string) - drop_reference (tp->comment); + { + mixed_string_free (tp->mixed_string); + drop_reference (tp->comment); + } } @@ -1152,7 +1155,7 @@ phase5_get (token_ty *tp) else mixed_string_buffer_append_char (&msb, uc); } - tp->string = mixed_string_buffer_result (&msb); + tp->mixed_string = mixed_string_buffer_result (&msb); tp->comment = add_reference (savable_comment); lexical_context = lc_outside; tp->type = last_token_type = token_type_string; @@ -1331,8 +1334,7 @@ x_javascript_lex (token_ty *tp) phase5_get (tp); if (tp->type == token_type_string) { - char *sum = tp->string; - size_t sum_len = strlen (sum); + mixed_string_ty *sum = tp->mixed_string; for (;;) { @@ -1346,12 +1348,7 @@ x_javascript_lex (token_ty *tp) phase5_get (&token3); if (token3.type == token_type_string) { - char *addend = token3.string; - size_t addend_len = strlen (addend); - - sum = (char *) xrealloc (sum, sum_len + addend_len + 1); - memcpy (sum + sum_len, addend, addend_len + 1); - sum_len += addend_len; + sum = mixed_string_concat_free1 (sum, token3.mixed_string); free_token (&token3); free_token (&token2); @@ -1362,7 +1359,7 @@ x_javascript_lex (token_ty *tp) phase5_unget (&token2); break; } - tp->string = sum; + tp->mixed_string = sum; } } @@ -1512,16 +1509,21 @@ extract_balanced (message_list_ty *mlp, case token_type_string: { + char *string; lex_pos_ty pos; + + string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + pos.file_name = logical_file_name; pos.line_number = token.line_number; xgettext_current_source_encoding = po_charset_utf8; if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, + remember_a_message (mlp, NULL, string, inner_context, &pos, NULL, token.comment); else - arglist_parser_remember (argparser, arg, token.string, + arglist_parser_remember (argparser, arg, string, inner_context, pos.file_name, pos.line_number, token.comment); diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c index 5109d569a..50aa587c1 100644 --- a/gettext-tools/src/x-python.c +++ b/gettext-tools/src/x-python.c @@ -533,7 +533,8 @@ comment_add (int c) static inline const char * comment_line_end () { - char *buffer = mixed_string_buffer_result (&comment_buffer); + char *buffer = + mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer)); size_t buflen = strlen (buffer); while (buflen >= 1 @@ -780,7 +781,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_string, token_type_symbol */ + char *string; /* for token_type_symbol */ + mixed_string_ty *mixed_string; /* for token_type_string */ refcounted_string_list_ty *comment; /* for token_type_string */ int line_number; }; @@ -789,10 +791,13 @@ struct token_ty static inline void free_token (token_ty *tp) { - if (tp->type == token_type_string || tp->type == token_type_symbol) + if (tp->type == token_type_symbol) free (tp->string); if (tp->type == token_type_string) - drop_reference (tp->comment); + { + mixed_string_free (tp->mixed_string); + drop_reference (tp->comment); + } } @@ -1358,7 +1363,7 @@ phase5_get (token_ty *tp) else mixed_string_buffer_append_char (&msb, uc); } - tp->string = mixed_string_buffer_result (&msb); + tp->mixed_string = mixed_string_buffer_result (&msb); tp->comment = add_reference (savable_comment); lexical_context = lc_outside; tp->type = token_type_string; @@ -1429,13 +1434,13 @@ x_python_lex (token_ty *tp) phase5_get (tp); if (tp->type == token_type_string) { - char *sum = tp->string; - size_t sum_len = strlen (sum); + mixed_string_ty *sum = tp->mixed_string; for (;;) { - token_ty token2, *tp2 = NULL; + token_ty token2; token_ty token3; + token_ty *tp2 = NULL; phase5_get (&token2); switch (token2.type) @@ -1461,12 +1466,7 @@ x_python_lex (token_ty *tp) if (tp2) { - char *addend = tp2->string; - size_t addend_len = strlen (addend); - - sum = (char *) xrealloc (sum, sum_len + addend_len + 1); - memcpy (sum + sum_len, addend, addend_len + 1); - sum_len += addend_len; + sum = mixed_string_concat_free1 (sum, tp2->mixed_string); free_token (tp2); continue; @@ -1474,7 +1474,7 @@ x_python_lex (token_ty *tp) phase5_unget (&token2); break; } - tp->string = sum; + tp->mixed_string = sum; } } @@ -1624,16 +1624,21 @@ extract_balanced (message_list_ty *mlp, case token_type_string: { + char *string; lex_pos_ty pos; + + string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + pos.file_name = logical_file_name; pos.line_number = token.line_number; xgettext_current_source_encoding = po_charset_utf8; if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, + remember_a_message (mlp, NULL, string, inner_context, &pos, NULL, token.comment); else - arglist_parser_remember (argparser, arg, token.string, + arglist_parser_remember (argparser, arg, string, inner_context, pos.file_name, pos.line_number, token.comment); diff --git a/gettext-tools/src/x-rst.c b/gettext-tools/src/x-rst.c index 847d60bd8..860b43bef 100644 --- a/gettext-tools/src/x-rst.c +++ b/gettext-tools/src/x-rst.c @@ -502,7 +502,8 @@ extract_rsj (FILE *f, char *s1; if (parse_string () != pr_parsed) goto invalid_json; - s1 = mixed_string_buffer_result (&stringbuf); + s1 = mixed_string_contents_free1 ( + mixed_string_buffer_result (&stringbuf)); /* Parse a colon. */ c = phase2_getc (); @@ -549,7 +550,8 @@ extract_rsj (FILE *f, char *s2; if (parse_string () != pr_parsed) goto invalid_json; - s2 = mixed_string_buffer_result (&stringbuf); + s2 = mixed_string_contents_free1 ( + mixed_string_buffer_result (&stringbuf)); /* Parse a colon. */ c = phase2_getc (); @@ -570,7 +572,9 @@ extract_rsj (FILE *f, goto invalid_rsj; if (r == pr_syntax || location != NULL) goto invalid_json; - location = mixed_string_buffer_result (&stringbuf); + location = + mixed_string_contents_free1 ( + mixed_string_buffer_result (&stringbuf)); } else if (strcmp (s2, "sourcebytes") == 0) { @@ -606,7 +610,9 @@ extract_rsj (FILE *f, goto invalid_rsj; if (r == pr_syntax || msgid != NULL) goto invalid_json; - msgid = mixed_string_buffer_result (&stringbuf); + msgid = + mixed_string_contents_free1 ( + mixed_string_buffer_result (&stringbuf)); } else goto invalid_rsj; diff --git a/gettext-tools/src/x-vala.c b/gettext-tools/src/x-vala.c index 618f87a83..f765f5222 100644 --- a/gettext-tools/src/x-vala.c +++ b/gettext-tools/src/x-vala.c @@ -365,7 +365,8 @@ typedef struct token_ty token_ty; struct token_ty { token_type_ty type; - char *string; /* for token_type_symbol, token_type_string_literal */ + char *string; /* for token_type_symbol */ + mixed_string_ty *mixed_string; /* for token_type_string_literal */ refcounted_string_list_ty *comment; /* for token_type_string_literal */ int line_number; }; @@ -374,10 +375,13 @@ struct token_ty static inline void free_token (token_ty *tp) { - if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + if (tp->type == token_type_symbol) free (tp->string); if (tp->type == token_type_string_literal) - drop_reference (tp->comment); + { + mixed_string_free (tp->mixed_string); + drop_reference (tp->comment); + } } @@ -923,10 +927,18 @@ phase3_get (token_ty *tp) mixed_string_buffer_append_char (&msb, c); } /* Done accumulating the string. */ - tp->type = last_token_type = - template ? token_type_string_template : token_type_string_literal; - tp->string = mixed_string_buffer_result (&msb); - tp->comment = add_reference (savable_comment); + if (template) + { + tp->type = token_type_string_template; + mixed_string_buffer_destroy (&msb); + } + else + { + tp->type = token_type_string_literal; + tp->mixed_string = mixed_string_buffer_result (&msb); + tp->comment = add_reference (savable_comment); + } + last_token_type = tp->type; return; } @@ -1153,8 +1165,7 @@ x_vala_lex (token_ty *tp) phase3_get (tp); if (tp->type == token_type_string_literal) { - char *sum = tp->string; - size_t sum_len = strlen (sum); + mixed_string_ty *sum = tp->mixed_string; for (;;) { @@ -1168,12 +1179,7 @@ x_vala_lex (token_ty *tp) phase3_get (&token3); if (token3.type == token_type_string_literal) { - char *addend = token3.string; - size_t addend_len = strlen (addend); - - sum = (char *) xrealloc (sum, sum_len + addend_len + 1); - memcpy (sum + sum_len, addend, addend_len + 1); - sum_len += addend_len; + sum = mixed_string_concat_free1 (sum, token3.mixed_string); free_token (&token3); free_token (&token2); @@ -1184,7 +1190,7 @@ x_vala_lex (token_ty *tp) phase3_unget (&token2); break; } - tp->string = sum; + tp->mixed_string = sum; } } @@ -1315,13 +1321,18 @@ extract_balanced (message_list_ty *mlp, token_type_ty delim, case token_type_string_literal: { + char *string; lex_pos_ty pos; + + string = mixed_string_contents (token.mixed_string); + mixed_string_free (token.mixed_string); + pos.file_name = logical_file_name; pos.line_number = token.line_number; xgettext_current_source_encoding = po_charset_utf8; if (extract_all) - remember_a_message (mlp, NULL, token.string, inner_context, + remember_a_message (mlp, NULL, string, inner_context, &pos, NULL, token.comment); else { @@ -1331,13 +1342,13 @@ extract_balanced (message_list_ty *mlp, token_type_ty delim, struct arglist_parser *tmp_argparser; tmp_argparser = arglist_parser_alloc (mlp, next_shapes); - arglist_parser_remember (tmp_argparser, 1, token.string, + arglist_parser_remember (tmp_argparser, 1, string, inner_context, pos.file_name, pos.line_number, token.comment); arglist_parser_done (tmp_argparser, 1); } else - arglist_parser_remember (argparser, arg, token.string, + arglist_parser_remember (argparser, arg, string, inner_context, pos.file_name, pos.line_number, token.comment); } diff --git a/gettext-tools/src/xg-mixed-string.c b/gettext-tools/src/xg-mixed-string.c index 0023bc77f..1892267af 100644 --- a/gettext-tools/src/xg-mixed-string.c +++ b/gettext-tools/src/xg-mixed-string.c @@ -22,11 +22,15 @@ /* Specification. */ #include "xg-mixed-string.h" +#include #include #include #include "error.h" #include "error-progname.h" +#include "flexmember.h" +#include "msgl-ascii.h" +#include "po-charset.h" #include "unistr.h" #include "xalloc.h" @@ -36,19 +40,375 @@ #define _(str) gettext (str) +/* Allocates a single segment. */ +static inline struct mixed_string_segment * +segment_alloc (enum segment_type type, const char *string, size_t length) +{ + struct mixed_string_segment *segment = + (struct mixed_string_segment *) + xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, length)); + segment->type = type; + segment->length = length; + memcpy (segment->contents, string, length); + return segment; +} + +/* Clones a single segment. */ +static inline struct mixed_string_segment * +segment_clone (const struct mixed_string_segment *segment) +{ + return segment_alloc (segment->type, segment->contents, segment->length); +} + +mixed_string_ty * +mixed_string_alloc_simple (const char *string, + lexical_context_ty lcontext, + const char *logical_file_name, + int line_number) +{ + struct mixed_string *ms = XMALLOC (struct mixed_string); + + if (*string == '\0') + { + /* An empty string. */ + ms->segments = NULL; + ms->nsegments = 0; + } + else + { + ms->segments = XNMALLOC (1, struct mixed_string_segment *); + if ((xgettext_current_source_encoding == po_charset_ascii + || xgettext_current_source_encoding == po_charset_utf8) + && is_ascii_string (string)) + /* An optimization. */ + ms->segments[0] = + segment_alloc (utf8_encoded, string, strlen (string)); + else + /* The general case. */ + ms->segments[0] = + segment_alloc (source_encoded, string, strlen (string)); + ms->nsegments = 1; + } + ms->lcontext = lcontext; + ms->logical_file_name = logical_file_name; + ms->line_number = line_number; + + return ms; +} + +mixed_string_ty * +mixed_string_alloc_utf8 (const char *string, + lexical_context_ty lcontext, + const char *logical_file_name, + int line_number) +{ + struct mixed_string *ms = XMALLOC (struct mixed_string); + + if (*string == '\0') + { + /* An empty string. */ + ms->segments = NULL; + ms->nsegments = 0; + } + else + { + ms->segments = XNMALLOC (1, struct mixed_string_segment *); + ms->segments[0] = segment_alloc (utf8_encoded, string, strlen (string)); + ms->nsegments = 1; + } + ms->lcontext = lcontext; + ms->logical_file_name = logical_file_name; + ms->line_number = line_number; + + return ms; +} + +mixed_string_ty * +mixed_string_clone (const mixed_string_ty *ms1) +{ + struct mixed_string *ms = XMALLOC (struct mixed_string); + size_t nsegments = ms1->nsegments; + + if (nsegments == 0) + { + ms->segments = NULL; + ms->nsegments = 0; + } + else + { + size_t i; + + ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *); + for (i = 0; i < nsegments; i++) + ms->segments[i] = segment_clone (ms1->segments[i]); + ms->nsegments = nsegments; + } + ms->lcontext = ms1->lcontext; + ms->logical_file_name = ms1->logical_file_name; + ms->line_number = ms1->line_number; + + return ms; +} + +char * +mixed_string_contents (const mixed_string_ty *ms) +{ + size_t nsegments = ms->nsegments; + /* Trivial cases. */ + if (nsegments == 0) + return xstrdup (""); + if (nsegments == 1 && ms->segments[0]->type == utf8_encoded) + { + /* Return the segment, with a NUL at the end. */ + size_t len = ms->segments[0]->length; + char *string = XNMALLOC (len + 1, char); + memcpy (string, ms->segments[0]->contents, len); + string[len] = '\0'; + return string; + } + /* General case. */ + { + size_t i; + + for (i = 0; i < nsegments - 1; i++) + if (memchr (ms->segments[i]->contents, '\0', ms->segments[i]->length) + != NULL) + { + /* Segment i contains a NUL character. Ignore the remaining + segments. */ + nsegments = i + 1; + break; + } + } + { + char **converted_segments = XNMALLOC (nsegments, char *); + size_t length; + + length = 0; + { + size_t i; + + for (i = 0; i < nsegments; i++) + if (ms->segments[i]->type == source_encoded) + { + char *source_encoded_string; + char *utf8_encoded_string; + + /* Copy the segment's contents, with a NUL at the end. */ + { + size_t len = ms->segments[i]->length; + source_encoded_string = XNMALLOC (len + 1, char); + memcpy (source_encoded_string, ms->segments[i]->contents, len); + source_encoded_string[len] = '\0'; + } + /* Convert it to UTF-8 encoding. */ + utf8_encoded_string = + from_current_source_encoding (source_encoded_string, + ms->lcontext, + ms->logical_file_name, + ms->line_number); + if (utf8_encoded_string != source_encoded_string) + free (source_encoded_string); + converted_segments[i] = utf8_encoded_string; + length += strlen (utf8_encoded_string); + } + else + length += ms->segments[i]->length; + } + + { + char *string = XNMALLOC (length + 1, char); + { + char *p; + size_t i; + + p = string; + for (i = 0; i < nsegments; i++) + if (ms->segments[i]->type == source_encoded) + { + p = stpcpy (p, converted_segments[i]); + free (converted_segments[i]); + } + else + { + memcpy (p, ms->segments[i]->contents, ms->segments[i]->length); + p += ms->segments[i]->length; + } + assert (p == string + length); + *p = '\0'; + } + + free (converted_segments); + return string; + } + } +} + +void +mixed_string_free (mixed_string_ty *ms) +{ + struct mixed_string_segment **segments = ms->segments; + size_t nsegments = ms->nsegments; + if (nsegments > 0) + { + size_t i; + for (i = 0; i < nsegments; i++) + free (segments[i]); + } + free (segments); + free (ms); +} + +char * +mixed_string_contents_free1 (mixed_string_ty *ms) +{ + char *contents = mixed_string_contents (ms); + mixed_string_free (ms); + return contents; +} + +mixed_string_ty * +mixed_string_concat (const mixed_string_ty *ms1, + const mixed_string_ty *ms2) +{ + /* Trivial cases. */ + if (ms2->nsegments == 0) + return mixed_string_clone (ms1); + if (ms1->nsegments == 0) + return mixed_string_clone (ms2); + /* General case. */ + { + struct mixed_string *ms = XMALLOC (struct mixed_string); + size_t nsegments = ms1->nsegments + ms2->nsegments; + size_t j; + if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type) + { + /* Combine the last segment of ms1 with the first segment of ms2. */ + size_t i; + + nsegments -= 1; + ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *); + j = 0; + for (i = 0; i < ms1->nsegments - 1; i++) + ms->segments[j++] = segment_clone (ms1->segments[i]); + { + size_t len1 = ms1->segments[i]->length; + size_t len2 = ms2->segments[0]->length; + struct mixed_string_segment *newseg = + (struct mixed_string_segment *) + xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, + len1 + len2)); + newseg->type = ms2->segments[0]->type; + newseg->length = len1 + len2; + memcpy (newseg->contents, ms1->segments[i]->contents, len1); + memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2); + ms->segments[j++] = newseg; + } + for (i = 1; i < ms2->nsegments; i++) + ms->segments[j++] = segment_clone (ms2->segments[i]); + } + else + { + size_t i; + + ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *); + j = 0; + for (i = 0; i < ms1->nsegments; i++) + ms->segments[j++] = segment_clone (ms1->segments[i]); + for (i = 0; i < ms2->nsegments; i++) + ms->segments[j++] = segment_clone (ms2->segments[i]); + } + assert (j == nsegments); + ms->nsegments = nsegments; + ms->lcontext = ms1->lcontext; + ms->logical_file_name = ms1->logical_file_name; + ms->line_number = ms1->line_number; + + return ms; + } +} + +mixed_string_ty * +mixed_string_concat_free1 (mixed_string_ty *ms1, const mixed_string_ty *ms2) +{ + /* Trivial cases. */ + if (ms2->nsegments == 0) + return ms1; + if (ms1->nsegments == 0) + { + mixed_string_free (ms1); + return mixed_string_clone (ms2); + } + /* General case. */ + { + struct mixed_string *ms = XMALLOC (struct mixed_string); + size_t nsegments = ms1->nsegments + ms2->nsegments; + size_t j; + if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type) + { + /* Combine the last segment of ms1 with the first segment of ms2. */ + size_t i; + + nsegments -= 1; + ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *); + j = 0; + for (i = 0; i < ms1->nsegments - 1; i++) + ms->segments[j++] = ms1->segments[i]; + { + size_t len1 = ms1->segments[i]->length; + size_t len2 = ms2->segments[0]->length; + struct mixed_string_segment *newseg = + (struct mixed_string_segment *) + xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, + len1 + len2)); + newseg->type = ms2->segments[0]->type; + newseg->length = len1 + len2; + memcpy (newseg->contents, ms1->segments[i]->contents, len1); + memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2); + ms->segments[j++] = newseg; + } + free (ms1->segments[i]); + for (i = 1; i < ms2->nsegments; i++) + ms->segments[j++] = segment_clone (ms2->segments[i]); + } + else + { + size_t i; + + ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *); + j = 0; + for (i = 0; i < ms1->nsegments; i++) + ms->segments[j++] = ms1->segments[i]; + for (i = 0; i < ms2->nsegments; i++) + ms->segments[j++] = segment_clone (ms2->segments[i]); + } + assert (j == nsegments); + free (ms1->segments); + ms->nsegments = nsegments; + ms->lcontext = ms1->lcontext; + ms->logical_file_name = ms1->logical_file_name; + ms->line_number = ms1->line_number; + free (ms1); + + return ms; + } +} + + void mixed_string_buffer_init (struct mixed_string_buffer *bp, lexical_context_ty lcontext, const char *logical_file_name, int line_number) { - bp->utf8_buffer = NULL; - bp->utf8_buflen = 0; - bp->utf8_allocated = 0; - bp->utf16_surr = 0; + bp->segments = NULL; + bp->nsegments = 0; + bp->nsegments_allocated = 0; + bp->curr_type = -1; bp->curr_buffer = NULL; bp->curr_buflen = 0; bp->curr_allocated = 0; + bp->utf16_surr = 0; bp->lcontext = lcontext; bp->logical_file_name = logical_file_name; bp->line_number = line_number; @@ -57,7 +417,23 @@ mixed_string_buffer_init (struct mixed_string_buffer *bp, bool mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp) { - return (bp->utf8_buflen == 0 && bp->utf16_surr == 0 && bp->curr_buflen == 0); + return (bp->nsegments == 0 && bp->curr_buflen == 0); +} + +/* Auxiliary function: Ensure count more bytes are available in + bp->curr_buffer. */ +static inline void +mixed_string_buffer_grow_curr_buffer (struct mixed_string_buffer *bp, + size_t count) +{ + if (bp->curr_buflen + count > bp->curr_allocated) + { + size_t new_allocated = 2 * bp->curr_allocated + 10; + if (new_allocated < bp->curr_buflen + count) + new_allocated = bp->curr_buflen + count; + bp->curr_allocated = new_allocated; + bp->curr_buffer = xrealloc (bp->curr_buffer, new_allocated); + } } /* Auxiliary function: Append a byte to bp->curr. */ @@ -73,23 +449,8 @@ mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp, bp->curr_buffer[bp->curr_buflen++] = c; } -/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ -static inline void -mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp, - size_t count) -{ - if (bp->utf8_buflen + count > bp->utf8_allocated) - { - size_t new_allocated = 2 * bp->utf8_allocated + 10; - if (new_allocated < bp->utf8_buflen + count) - new_allocated = bp->utf8_buflen + count; - bp->utf8_allocated = new_allocated; - bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); - } -} - -/* Auxiliary function: Append a Unicode character to bp->utf8. - uc must be < 0x110000. */ +/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, append a + Unicode character to bp->curr_buffer. uc must be < 0x110000. */ static inline void mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp, ucs4_t uc) @@ -101,13 +462,13 @@ mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp, /* The caller should have ensured that uc is not out-of-range. */ abort (); - mixed_string_buffer_grow_utf8_buffer (bp, count); - memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); - bp->utf8_buflen += count; + mixed_string_buffer_grow_curr_buffer (bp, count); + memcpy (bp->curr_buffer + bp->curr_buflen, utf8buf, count); + bp->curr_buflen += count; } -/* Auxiliary function: Handle the attempt to append a lone surrogate to - bp->utf8. */ +/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, handle the + attempt to append a lone surrogate to bp->curr_buffer. */ static void mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp, ucs4_t uc) @@ -138,7 +499,8 @@ mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp, mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd); } -/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */ +/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, flush + bp->utf16_surr into bp->curr_buffer. */ static inline void mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp) { @@ -149,31 +511,38 @@ mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp) } } -/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */ +/* Auxiliary function: Append a segment to bp->segments. */ static inline void -mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, - int line_number) +mixed_string_buffer_add_segment (struct mixed_string_buffer *bp, + struct mixed_string_segment *newseg) { - if (bp->curr_buflen > 0) + if (bp->nsegments == bp->nsegments_allocated) { - char *curr; - size_t count; - - mixed_string_buffer_append_to_curr_buffer (bp, '\0'); - - /* Convert from the source encoding to UTF-8. */ - curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext, - bp->logical_file_name, - line_number); - - /* Append it to bp->utf8_buffer. */ - count = strlen (curr); - mixed_string_buffer_grow_utf8_buffer (bp, count); - memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count); - bp->utf8_buflen += count; + size_t new_allocated = + bp->nsegments_allocated = 2 * bp->nsegments_allocated + 1; + bp->segments = + (struct mixed_string_segment **) + xrealloc (bp->segments, + new_allocated * sizeof (struct mixed_string_segment *)); + } + bp->segments[bp->nsegments++] = newseg; +} - if (curr != bp->curr_buffer) - free (curr); +/* Auxiliary function: Flush bp->curr_buffer and bp->utf16_surr into + bp->segments. */ +static void +mixed_string_buffer_flush_curr (struct mixed_string_buffer *bp) +{ + if (bp->curr_type == utf8_encoded) + mixed_string_buffer_flush_utf16_surr (bp); + if (bp->curr_type != -1) + { + if (bp->curr_buflen > 0) + { + struct mixed_string_segment *segment = + segment_alloc (bp->curr_type, bp->curr_buffer, bp->curr_buflen); + mixed_string_buffer_add_segment (bp, segment); + } bp->curr_buflen = 0; } } @@ -181,23 +550,26 @@ mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, void mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c) { - /* Switch from Unicode character mode to multibyte character mode. */ - mixed_string_buffer_flush_utf16_surr (bp); - - /* When a newline is seen, convert the accumulated multibyte sequence. - This ensures a correct line number in the error message in case of - a conversion error. The "- 1" is to account for the newline. */ - if (c == '\n') - mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1); + /* Switch to multibyte character mode. */ + if (bp->curr_type != source_encoded) + { + mixed_string_buffer_flush_curr (bp); + bp->curr_type = source_encoded; + } - mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c); + mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c); } void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c) { - /* Switch from multibyte character mode to Unicode character mode. */ - mixed_string_buffer_flush_curr_buffer (bp, bp->line_number); + /* Switch to Unicode character mode. */ + if (bp->curr_type != utf8_encoded) + { + mixed_string_buffer_flush_curr (bp); + bp->curr_type = utf8_encoded; + assert (bp->utf16_surr == 0); + } /* Test whether this character and the previous one form a Unicode surrogate character pair. */ @@ -230,26 +602,44 @@ mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c) void mixed_string_buffer_destroy (struct mixed_string_buffer *bp) { - free (bp->utf8_buffer); + struct mixed_string_segment **segments = bp->segments; + size_t nsegments = bp->nsegments; + if (nsegments > 0) + { + size_t i; + for (i = 0; i < nsegments; i++) + free (segments[i]); + } + free (segments); free (bp->curr_buffer); } -char * +mixed_string_ty * mixed_string_buffer_result (struct mixed_string_buffer *bp) { - char *utf8_buffer; - - /* Flush all into bp->utf8_buffer. */ - mixed_string_buffer_flush_utf16_surr (bp); - mixed_string_buffer_flush_curr_buffer (bp, bp->line_number); - /* NUL-terminate it. */ - mixed_string_buffer_grow_utf8_buffer (bp, 1); - bp->utf8_buffer[bp->utf8_buflen] = '\0'; - - /* Free curr_buffer. */ - utf8_buffer = bp->utf8_buffer; - free (bp->curr_buffer); - - /* Return it. */ - return utf8_buffer; + mixed_string_buffer_flush_curr (bp); + + { + struct mixed_string *ms = XMALLOC (struct mixed_string); + size_t nsegments = bp->nsegments; + + if (nsegments > 0) + ms->segments = + (struct mixed_string_segment **) + xrealloc (bp->segments, + nsegments * sizeof (struct mixed_string_segment *)); + else + { + assert (bp->segments == NULL); + ms->segments = NULL; + } + ms->nsegments = nsegments; + ms->lcontext = bp->lcontext; + ms->logical_file_name = bp->logical_file_name; + ms->line_number = bp->line_number; + + free (bp->curr_buffer); + + return ms; + } } diff --git a/gettext-tools/src/xg-mixed-string.h b/gettext-tools/src/xg-mixed-string.h index b456e1af9..8674c7545 100644 --- a/gettext-tools/src/xg-mixed-string.h +++ b/gettext-tools/src/xg-mixed-string.h @@ -28,22 +28,95 @@ extern "C" { #endif +/* A string that contains segments in the xgettext_current_source_encoding + and segments in UTF-8, in an alternating way. */ + +enum segment_type +{ + source_encoded, + utf8_encoded +}; + +struct mixed_string_segment +{ + /*enum segment_type*/ unsigned char type; + size_t length; + char contents[FLEXIBLE_ARRAY_MEMBER]; +}; + +typedef struct mixed_string mixed_string_ty; +struct mixed_string +{ + /* The alternating segments. */ + struct mixed_string_segment **segments; + size_t nsegments; + /* The lexical context. Used only for error message purposes. */ + lexical_context_ty lcontext; + const char *logical_file_name; + int line_number; +}; + +/* Creates a mixed_string that contains just a string in the + xgettext_current_source_encoding. */ +extern mixed_string_ty * + mixed_string_alloc_simple (const char *string, + lexical_context_ty lcontext, + const char *logical_file_name, + int line_number); + +/* Creates a mixed_string that contains just a UTF-8 string. */ +extern mixed_string_ty * + mixed_string_alloc_utf8 (const char *string, + lexical_context_ty lcontext, + const char *logical_file_name, + int line_number); + +/* Creates a copy of a mixed_string. */ +extern mixed_string_ty * + mixed_string_clone (const mixed_string_ty *ms1); + +/* Returns the contents of a mixed_string as an UTF-8 encoded string. + This may provoke an error if no source encoding has been specified + through --from-code. The result is freshly allocated. */ +extern char * + mixed_string_contents (const mixed_string_ty *ms); + +/* Frees a mixed_string. */ +extern void + mixed_string_free (mixed_string_ty *ms); + +/* Returns the contents of a mixed_string as an UTF-8 encoded string, + and frees the argument. */ +extern char * + mixed_string_contents_free1 (mixed_string_ty *ms); + +/* Concatenates two mixed_strings. */ +extern mixed_string_ty * + mixed_string_concat (const mixed_string_ty *ms1, + const mixed_string_ty *ms2); +/* Concatenates two mixed_strings, and frees the first argument. */ +extern mixed_string_ty * + mixed_string_concat_free1 (mixed_string_ty *ms1, + const mixed_string_ty *ms2); + + /* A string buffer type that allows appending bytes (in the xgettext_current_source_encoding) or Unicode characters. - Returns the entire string in UTF-8 encoding. */ + When done, it returns the entire string as a mixed_string. */ struct mixed_string_buffer { - /* The part of the string that has already been converted to UTF-8. */ - char *utf8_buffer; - size_t utf8_buflen; - size_t utf8_allocated; - /* The first half of an UTF-16 surrogate character. */ - unsigned short utf16_surr; - /* The part of the string that is still in the source encoding. */ + /* The alternating segments that are already finished. */ + struct mixed_string_segment **segments; + size_t nsegments; + size_t nsegments_allocated; + /* The segment that is being accumulated. */ + int curr_type; /* An enum segment_type, or -1. */ char *curr_buffer; size_t curr_buflen; size_t curr_allocated; + /* The first half of an UTF-16 surrogate character. */ + unsigned short utf16_surr; /* The lexical context. Used only for error message purposes. */ lexical_context_ty lcontext; const char *logical_file_name; @@ -58,22 +131,27 @@ extern void int line_number); /* Determines whether a mixed_string_buffer is still empty. */ -extern bool mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp); +extern bool + mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp); /* Appends a character to a mixed_string_buffer. */ -extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp, - int c); +extern void + mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c); /* Appends a Unicode character to a mixed_string_buffer. */ -extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, - int c); +extern void + mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, + int c); -/* Frees the memory pointed to by a 'struct mixed_string_buffer'. */ -extern void mixed_string_buffer_destroy (struct mixed_string_buffer *bp); +/* Frees the memory pointed to by a 'struct mixed_string_buffer' and + discards the accumulated string. */ +extern void + mixed_string_buffer_destroy (struct mixed_string_buffer *bp); /* Frees the memory pointed to by a 'struct mixed_string_buffer' - and returns the accumulated string in UTF-8. */ -extern char * mixed_string_buffer_result (struct mixed_string_buffer *bp); + and returns the accumulated string. */ +extern mixed_string_ty * + mixed_string_buffer_result (struct mixed_string_buffer *bp); #ifdef __cplusplus