}
-/* ========================== Reading of tokens. ========================== */
-
-
-/* A token consists of a sequence of characters. */
-struct token
-{
- int allocated; /* number of allocated 'token_char's */
- int charcount; /* number of used 'token_char's */
- char *chars; /* the token's constituents */
-};
-
-/* Initialize a 'struct token'. */
-static inline void
-init_token (struct token *tp)
-{
- tp->allocated = 10;
- tp->chars = XNMALLOC (tp->allocated, char);
- tp->charcount = 0;
-}
-
-/* Free the memory pointed to by a 'struct token'. */
-static inline void
-free_token (struct token *tp)
-{
- free (tp->chars);
-}
-
-/* Ensure there is enough room in the token for one more character. */
-static inline void
-grow_token (struct token *tp)
-{
- if (tp->charcount == tp->allocated)
- {
- tp->allocated *= 2;
- tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
- }
-}
-
-/* Convert a struct token * to a char*. */
-static char *
-string_of_token (const struct token *tp)
-{
- char *str;
- int n;
-
- n = tp->charcount;
- str = XNMALLOC (n + 1, char);
- memcpy (str, tp->chars, n);
- str[n] = '\0';
- return str;
-}
-
-
/* ========================= Accumulating messages ========================= */
struct word
{
enum word_type type;
- struct token *token; /* for t_string */
- int line_number_at_start; /* for t_string */
+ struct mixed_string_buffer *token; /* for t_string */
+ int line_number_at_start; /* for t_string */
};
/* Free the memory pointed to by a 'struct word'. */
{
if (wp->type == t_string)
{
- free_token (wp->token);
+ mixed_string_buffer_destroy (wp->token);
free (wp->token);
}
}
-/* Convert a t_string token to a char*. */
-static char *
-string_of_word (const struct word *wp)
-{
- char *str;
- int n;
-
- if (!(wp->type == t_string))
- abort ();
- n = wp->token->charcount;
- str = XNMALLOC (n + 1, char);
- memcpy (str, wp->token->chars, n);
- str[n] = '\0';
- return str;
-}
-
-/* Convert a t_string token to a char*, ignoring the first OFFSET bytes. */
-static char *
-substring_of_word (const struct word *wp, size_t offset)
-{
- char *str;
- int n;
-
- if (!(wp->type == t_string))
- abort ();
- n = wp->token->charcount;
- if (!(offset <= n))
- abort ();
- str = XNMALLOC (n - offset + 1, char);
- memcpy (str, wp->token->chars + offset, n - offset);
- str[n - offset] = '\0';
- return str;
-}
-
/* Whitespace recognition. */
}
wp->type = t_string;
- wp->token = XMALLOC (struct token);
- init_token (wp->token);
+ wp->token = XMALLOC (struct mixed_string_buffer);
+ mixed_string_buffer_init (wp->token, lc_string,
+ logical_file_name, line_number);
wp->line_number_at_start = line_number;
/* True while all characters in the token seen so far are digits. */
all_unquoted_digits = true;
phase2_ungetc (c2);
wp->type = t_redirect;
- free_token (wp->token);
+ mixed_string_buffer_destroy (wp->token);
free (wp->token);
last_non_comment_line = line_number;
all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');
- if (all_unquoted_name_characters && wp->token->charcount > 0 && c == '=')
+ if (all_unquoted_name_characters
+ && !mixed_string_buffer_is_empty (wp->token)
+ && c == '=')
{
wp->type = t_assignment;
continue;
all_unquoted_name_characters =
all_unquoted_name_characters
&& ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
- || (wp->token->charcount > 0 && c >= '0' && c <= '9'));
+ || (!mixed_string_buffer_is_empty (wp->token)
+ && c >= '0' && c <= '9'));
if (c == '$')
{
c = n;
}
break;
+
+ case 'u': case 'U':
+ {
+ unsigned char buf[8];
+ int j;
+ unsigned int n;
+
+ n = 0;
+ for (j = 0; j < (c == 'u' ? 4 : 8); j++)
+ {
+ int c1 = phase1_getc ();
+
+ if (c1 >= '0' && c1 <= '9')
+ n = (n << 4) + (c1 - '0');
+ else if (c1 >= 'A' && c1 <= 'F')
+ n = (n << 4) + (c1 - 'A' + 10);
+ else if (c1 >= 'a' && c1 <= 'f')
+ n = (n << 4) + (c1 - 'a' + 10);
+ else
+ {
+ phase1_ungetc (c1);
+ break;
+ }
+
+ buf[j] = c1;
+ }
+ if (j > 0)
+ {
+ if (n < 0x110000)
+ {
+ if (wp->type == t_string)
+ mixed_string_buffer_append_unicode (wp->token, n);
+ goto done_escape_sequence;
+ }
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, line_number, (size_t)(-1), false,
+ _("invalid Unicode character"));
+ while (--j >= 0)
+ phase1_ungetc (buf[j]);
+ }
+ phase1_ungetc (c);
+ c = '\\';
+ break;
+ }
}
}
if (wp->type == t_string)
- {
- grow_token (wp->token);
- wp->token->chars[wp->token->charcount++] =
- (unsigned char) c;
- }
+ mixed_string_buffer_append_char (wp->token,
+ (unsigned char) c);
+ done_escape_sequence: ;
}
/* The result is a literal string. Don't change wp->type. */
continue;
{
/* $"...": Bash builtin for internationalized string. */
lex_pos_ty pos;
- struct token string;
+ struct mixed_string_buffer string;
saw_opening_singlequote ();
open_singlequote_terminator = '"';
pos.file_name = logical_file_name;
pos.line_number = line_number;
- init_token (&string);
+ mixed_string_buffer_init (&string, lc_string,
+ logical_file_name, line_number);
for (;;)
{
c = phase2_getc ();
saw_closing_singlequote ();
break;
}
- grow_token (&string);
- string.chars[string.charcount++] = (unsigned char) c;
+ mixed_string_buffer_append_char (&string, (unsigned char) c);
}
- remember_a_message (mlp, NULL, string_of_token (&string),
- false, false, region, &pos,
+ remember_a_message (mlp, NULL,
+ mixed_string_contents_free1 (
+ mixed_string_buffer_result (&string)),
+ true, false, region, &pos,
NULL, savable_comment, false);
- free_token (&string);
if_error (IF_SEVERITY_WARNING,
pos.file_name, pos.line_number, (size_t)(-1), false,
break;
if (wp->type == t_string)
- {
- grow_token (wp->token);
- wp->token->chars[wp->token->charcount++] = (unsigned char) c;
- }
+ mixed_string_buffer_append_char (wp->token, (unsigned char) c);
}
phase2_ungetc (c);
if (wp->type != t_string)
{
- free_token (wp->token);
+ mixed_string_buffer_destroy (wp->token);
free (wp->token);
}
last_non_comment_line = line_number;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
- remember_a_message (mlp, NULL, string_of_word (&inner), false,
- false, inner_region, &pos,
+ remember_a_message (mlp, NULL,
+ mixed_string_contents_free1 (
+ mixed_string_buffer_cloned_result (inner.token)),
+ true, false, inner_region, &pos,
NULL, savable_comment, false);
}
}
}
else if (inner.type == t_string)
{
- char *function_name = string_of_word (&inner);
+ char *function_name =
+ mixed_string_contents_free1 (
+ mixed_string_buffer_cloned_result (inner.token));
if (strcmp (function_name, "env") == 0)
{
&& memcmp (argparser->keyword, "ngettext", 8) == 0));
if (accepts_context && argparser->next_is_msgctxt)
{
- char *s = string_of_word (&inner);
- mixed_string_ty *ms =
- mixed_string_alloc_simple (s, lc_string,
- logical_file_name,
- inner.line_number_at_start);
- free (s);
+ mixed_string_ty *ms = mixed_string_buffer_cloned_result (inner.token);
argparser->next_is_msgctxt = false;
arglist_parser_remember_msgctxt (argparser, ms,
inner_region,
matters_for_argparser = false;
}
else if (accepts_context
- && ((inner.token->charcount == 2
- && memcmp (inner.token->chars, "-c", 2) == 0)
- || (inner.token->charcount == 9
- && memcmp (inner.token->chars, "--context", 9) == 0)))
+ && (mixed_string_buffer_equals (inner.token, "-c")
+ || mixed_string_buffer_equals (inner.token, "--context")))
{
argparser->next_is_msgctxt = true;
matters_for_argparser = false;
}
else if (accepts_context
- && (inner.token->charcount >= 10
- && memcmp (inner.token->chars, "--context=", 10) == 0))
+ && mixed_string_buffer_startswith (inner.token, "--context="))
{
- char *s = substring_of_word (&inner, 10);
- mixed_string_ty *ms =
- mixed_string_alloc_simple (s, lc_string,
- logical_file_name,
- inner.line_number_at_start);
- free (s);
+ mixed_string_ty *ms = mixed_string_buffer_cloned_result (inner.token);
+ mixed_string_remove_prefix (ms, 10);
argparser->next_is_msgctxt = false;
arglist_parser_remember_msgctxt (argparser, ms,
inner_region,
matters_for_argparser = false;
}
else if (accepts_expand
- && inner.token->charcount == 2
- && memcmp (inner.token->chars, "-e", 2) == 0)
+ && mixed_string_buffer_equals (inner.token, "-e"))
{
must_expand_arg_strings = true;
matters_for_argparser = false;
}
else
{
- char *s = string_of_word (&inner);
- mixed_string_ty *ms;
+ mixed_string_ty *ms = mixed_string_buffer_cloned_result (inner.token);
/* When '-e' was specified, expand escape sequences in s. */
if (accepts_expand && must_expand_arg_strings)
{
+ char *s = mixed_string_contents (ms);
bool expands_backslash_c =
(argparser->keyword_len == 7
&& memcmp (argparser->keyword, "gettext", 7) == 0);
/* We can ignore the value of expands_backslash_c, because
here we don't support the gettext '-s' option. */
if (expanded != s)
- free (s);
- s = expanded;
+ {
+ mixed_string_ty *expanded_ms =
+ mixed_string_alloc_utf8 (expanded, ms->lcontext,
+ ms->logical_file_name,
+ ms->line_number);
+ mixed_string_free (ms);
+ ms = expanded_ms;
+ }
+ free (s);
}
- ms = mixed_string_alloc_simple (s, lc_string,
- logical_file_name,
- inner.line_number_at_start);
- free (s);
arglist_parser_remember (argparser, arg, ms,
inner_region,
logical_file_name,
/* Handling strings that are given partially in the source encoding and
partially in Unicode.
- Copyright (C) 2001-2024 Free Software Foundation, Inc.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
}
}
+void
+mixed_string_remove_prefix (mixed_string_ty *ms, size_t prefix_length)
+{
+ if (prefix_length > 0)
+ {
+ if (ms->nsegments == 0)
+ abort ();
+ struct mixed_string_segment *old_segment0 = ms->segments[0];
+ if (!(old_segment0->length >= prefix_length))
+ abort ();
+ struct mixed_string_segment *new_segment0 =
+ segment_alloc (old_segment0->type,
+ old_segment0->contents + prefix_length,
+ old_segment0->length - prefix_length);
+ free (old_segment0);
+ ms->segments[0] = new_segment0;
+ }
+}
+
void
mixed_string_buffer_init (struct mixed_string_buffer *bp,
bool
mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp)
{
- return (bp->nsegments == 0 && bp->curr_buflen == 0);
+ return (bp->nsegments == 0 && bp->curr_buflen == 0 && bp->utf16_surr == 0);
+}
+
+bool
+mixed_string_buffer_equals (const struct mixed_string_buffer *bp,
+ const char *other)
+{
+ size_t other_len = strlen (other);
+ return (bp->nsegments == 0
+ && bp->curr_buflen == other_len
+ && (other_len == 0 || memcmp (bp->curr_buffer, other, other_len) == 0)
+ && bp->utf16_surr == 0);
+}
+
+bool
+mixed_string_buffer_startswith (const struct mixed_string_buffer *bp,
+ const char *prefix)
+{
+ size_t prefix_len = strlen (prefix);
+ return prefix_len == 0
+ || (bp->nsegments == 0
+ ? bp->curr_buflen >= prefix_len
+ && memcmp (bp->curr_buffer, prefix, prefix_len) == 0
+ : bp->segments[0]->length >= prefix_len
+ && memcmp (bp->segments[0]->contents, prefix, prefix_len) == 0);
}
/* Auxiliary function: Ensure count more bytes are available in
return ms;
}
}
+
+mixed_string_ty *
+mixed_string_buffer_cloned_result (struct mixed_string_buffer *bp)
+{
+ mixed_string_buffer_flush_curr (bp);
+
+ {
+ struct mixed_string *ms = XMALLOC (struct mixed_string);
+ size_t nsegments = bp->nsegments;
+
+ if (nsegments > 0)
+ {
+ size_t i;
+
+ ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
+ for (i = 0; i < nsegments; i++)
+ ms->segments[i] = segment_clone (bp->segments[i]);
+ }
+ else
+ {
+ assert (bp->segments == NULL);
+ ms->segments = NULL;
+ }
+ ms->nsegments = nsegments;
+ ms->lcontext = bp->lcontext;
+ ms->logical_file_name = bp->logical_file_name;
+ ms->line_number = bp->line_number;
+
+ return ms;
+ }
+}