#define _(str) gettext(str)
-#if HAVE_ICONV
-# include "unistr.h"
-#endif
-
#if HAVE_DECL_GETC_UNLOCKED
# undef getc
# define getc getc_unlocked
/* Current position within the PO file. */
lex_pos_ty gram_pos;
-int gram_pos_column;
-
-/* Whether the PO file is in the role of a POT file. */
-bool gram_pot_role;
/* Error handling during the parsing of a PO file.
- These functions can access gram_pos and gram_pos_column. */
+ These functions can access gram_pos and ps->gram_pos_column. */
/* VARARGS1 */
void
error (EXIT_FAILURE, 0, _("memory exhausted"));
va_end (ap);
po_xerror (PO_SEVERITY_ERROR, NULL, gram_pos.file_name, gram_pos.line_number,
- gram_pos_column + 1, false, buffer);
+ ps->gram_pos_column + 1, false, buffer);
free (buffer);
if (error_message_count >= gram_max_allowed_errors)
character. If XY is a multibyte character, X \ newline Y is invalid.
*/
-/* Multibyte character data type. */
-/* Note this depends on po_lex_charset and po_lex_iconv, which get set
- while the file is being parsed. */
-
-#define MBCHAR_BUF_SIZE 24
-
-struct mbchar
-{
- size_t bytes; /* number of bytes of current character, > 0 */
-#if HAVE_ICONV
- bool uc_valid; /* true if uc is a valid Unicode character */
- ucs4_t uc; /* if uc_valid: the current character */
-#endif
- char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
-};
-
-/* We want to pass multibyte characters by reference automatically,
- therefore we use an array type. */
-typedef struct mbchar mbchar_t[1];
-
/* A version of memcpy optimized for the case n <= 1. */
static inline void
memcpy_small (void *dst, const void *src, size_t n)
#define MB_UNPRINTABLE_WIDTH 1
static int
-mb_width (const mbchar_t mbc)
+mb_width (struct po_parser_state *ps, const mbchar_t mbc)
{
#if HAVE_ICONV
if (mbc->uc_valid)
if (uc >= 0x0000 && uc <= 0x001F)
{
if (uc == 0x0009)
- return 8 - (gram_pos_column & 7);
+ return 8 - (ps->gram_pos_column & 7);
return 0;
}
if ((uc >= 0x007F && uc <= 0x009F) || (uc >= 0x2028 && uc <= 0x2029))
mbc->buf[0] <= 0x1F)
{
if (mbc->buf[0] == 0x09)
- return 8 - (gram_pos_column & 7);
+ return 8 - (ps->gram_pos_column & 7);
return 0;
}
if (mbc->buf[0] == 0x7F)
/* Multibyte character input. */
-/* Number of characters that can be pushed back.
- We need 1 for lex_getc, plus 1 for lex_ungetc. */
-#define NPUSHBACK 2
-
-/* Data type of a multibyte character input stream. */
-struct mbfile
-{
- FILE *fp;
- bool eof_seen;
- int have_pushback;
- unsigned int bufcount;
- char buf[MBCHAR_BUF_SIZE];
- struct mbchar pushback[NPUSHBACK];
-};
-
-/* We want to pass multibyte streams by reference automatically,
- therefore we use an array type. */
-typedef struct mbfile mbfile_t[1];
-
-/* Whether invalid multibyte sequences in the input shall be signalled
- or silently tolerated. */
-static bool signal_eilseq;
-
static inline void
mbfile_init (mbfile_t mbf, FILE *stream)
{
/* Read the next multibyte character from mbf and put it into mbc.
If a read error occurs, errno is set and ferror (mbf->fp) becomes true. */
static void
-mbfile_getc (mbchar_t mbc, mbfile_t mbf)
+mbfile_getc (struct po_parser_state *ps, mbchar_t mbc, mbfile_t mbf)
{
size_t bytes;
{
/* An invalid multibyte sequence was encountered. */
/* Return a single byte. */
- if (signal_eilseq)
- po_gram_error (NULL, _("invalid multibyte sequence"));
+ if (ps->signal_eilseq)
+ po_gram_error (ps, _("invalid multibyte sequence"));
bytes = 1;
mbc->uc_valid = false;
break;
mbf->eof_seen = true;
if (ferror (mbf->fp))
goto eof;
- if (signal_eilseq)
- po_gram_error (NULL, _("incomplete multibyte sequence at end of file"));
+ if (ps->signal_eilseq)
+ po_gram_error (ps, _("incomplete multibyte sequence at end of file"));
bytes = mbf->bufcount;
mbc->uc_valid = false;
break;
mbf->buf[mbf->bufcount++] = (unsigned char) c;
if (c == '\n')
{
- if (signal_eilseq)
- po_gram_error (NULL, _("incomplete multibyte sequence at end of line"));
+ if (ps->signal_eilseq)
+ po_gram_error (ps, _("incomplete multibyte sequence at end of line"));
bytes = mbf->bufcount - 1;
mbc->uc_valid = false;
break;
{
/* scratchbuf contains an out-of-range Unicode character
(> 0x10ffff). */
- if (signal_eilseq)
- po_gram_error (NULL, _("invalid multibyte sequence"));
+ if (ps->signal_eilseq)
+ po_gram_error (ps, _("invalid multibyte sequence"));
mbc->uc_valid = false;
break;
}
/* Lexer variables. */
-static mbfile_t mbf;
unsigned int gram_max_allowed_errors = 20;
-static bool po_lex_obsolete;
-static bool po_lex_previous;
static bool pass_comments = false;
bool pass_obsolete_entries = false;
/* Prepare lexical analysis. */
void
-lex_start (FILE *fp, const char *real_filename, const char *logical_filename,
- bool is_pot_role)
+lex_start (struct po_parser_state *ps,
+ FILE *fp, const char *real_filename, const char *logical_filename)
{
/* Ignore the logical_filename, because PO file entries already have
their file names attached. But use real_filename for error messages. */
gram_pos.file_name = xstrdup (real_filename);
- mbfile_init (mbf, fp);
+ mbfile_init (ps->mbf, fp);
gram_pos.line_number = 1;
- gram_pos_column = 0;
- gram_pot_role = is_pot_role;
- signal_eilseq = true;
- po_lex_obsolete = false;
- po_lex_previous = false;
+ ps->gram_pos_column = 0;
+ ps->signal_eilseq = true;
+ ps->po_lex_obsolete = false;
+ ps->po_lex_previous = false;
po_lex_charset_init ();
}
void
lex_end ()
{
- mbf->fp = NULL;
gram_pos.file_name = NULL;
gram_pos.line_number = 0;
- gram_pos_column = 0;
- gram_pot_role = false;
- signal_eilseq = false;
- po_lex_obsolete = false;
- po_lex_previous = false;
po_lex_charset_close ();
}
/* Read a single character, dealing with backslash-newline.
Also keep track of the current line number and column number. */
static void
-lex_getc (mbchar_t mbc)
+lex_getc (struct po_parser_state *ps, mbchar_t mbc)
{
for (;;)
{
- mbfile_getc (mbc, mbf);
+ mbfile_getc (ps, mbc, ps->mbf);
if (mb_iseof (mbc))
{
- if (ferror (mbf->fp))
+ if (ferror (ps->mbf->fp))
bomb:
{
const char *errno_description = strerror (errno);
if (mb_iseq (mbc, '\n'))
{
gram_pos.line_number++;
- gram_pos_column = 0;
+ ps->gram_pos_column = 0;
break;
}
- gram_pos_column += mb_width (mbc);
+ ps->gram_pos_column += mb_width (ps, mbc);
if (mb_iseq (mbc, '\\'))
{
mbchar_t mbc2;
- mbfile_getc (mbc2, mbf);
+ mbfile_getc (ps, mbc2, ps->mbf);
if (mb_iseof (mbc2))
{
- if (ferror (mbf->fp))
+ if (ferror (ps->mbf->fp))
goto bomb;
break;
}
if (!mb_iseq (mbc2, '\n'))
{
- mbfile_ungetc (mbc2, mbf);
+ mbfile_ungetc (mbc2, ps->mbf);
break;
}
gram_pos.line_number++;
- gram_pos_column = 0;
+ ps->gram_pos_column = 0;
}
else
break;
static void
-lex_ungetc (const mbchar_t mbc)
+lex_ungetc (struct po_parser_state *ps, const mbchar_t mbc)
{
if (!mb_iseof (mbc))
{
gram_pos.line_number--;
else
/* Decrement the column number. Also works well enough for tabs. */
- gram_pos_column -= mb_width (mbc);
+ ps->gram_pos_column -= mb_width (ps, mbc);
- mbfile_ungetc (mbc, mbf);
+ mbfile_ungetc (mbc, ps->mbf);
}
}
static int
-keyword_p (const char *s)
+keyword_p (struct po_parser_state *ps, const char *s)
{
- if (!po_lex_previous)
+ if (!ps->po_lex_previous)
{
if (!strcmp (s, "domain"))
return DOMAIN;
static int
-control_sequence ()
+control_sequence (struct po_parser_state *ps)
{
mbchar_t mbc;
int val;
int max;
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_len (mbc) == 1)
switch (mb_ptr (mbc) [0])
{
val = val * 8 + (c - '0');
if (++max == 3)
break;
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_len (mbc) == 1)
switch (mb_ptr (mbc) [0])
{
default:
break;
}
- lex_ungetc (mbc);
+ lex_ungetc (ps, mbc);
break;
}
return val;
case 'x':
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_iseof (mbc) || mb_len (mbc) != 1
|| !c_isxdigit (mb_ptr (mbc) [0]))
break;
/* Warning: not portable, can't depend on 'a'..'f' ordering */
val += c - 'a' + 10;
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_len (mbc) == 1)
switch (mb_ptr (mbc) [0])
{
default:
break;
}
- lex_ungetc (mbc);
+ lex_ungetc (ps, mbc);
break;
}
return val;
/* FIXME: \u and \U are not handled. */
}
- lex_ungetc (mbc);
- po_gram_error (NULL, _("invalid control sequence"));
+ lex_ungetc (ps, mbc);
+ po_gram_error (ps, _("invalid control sequence"));
return ' ';
}
/* Return the next token in the PO file. The return codes are defined
in "po-gram-gen2.h". Associated data is put in 'po_gram_lval'. */
int
-po_gram_lex (union PO_GRAM_STYPE *lval)
+po_gram_lex (union PO_GRAM_STYPE *lval, struct po_parser_state *ps)
{
static char *buf;
static size_t bufmax;
for (;;)
{
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_iseof (mbc))
/* Yacc want this for end of file. */
switch (mb_ptr (mbc) [0])
{
case '\n':
- po_lex_obsolete = false;
- po_lex_previous = false;
+ ps->po_lex_obsolete = false;
+ ps->po_lex_previous = false;
/* Ignore whitespace, not relevant for the grammar. */
break;
break;
case '#':
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_iseq (mbc, '~'))
/* A pseudo-comment beginning with #~ is found. This is
not a comment. It is the format for obsolete entries.
We simply discard the "#~" prefix. The following
characters are expected to be well formed. */
{
- po_lex_obsolete = true;
+ ps->po_lex_obsolete = true;
/* A pseudo-comment beginning with #~| denotes a previous
untranslated string in an obsolete entry. This does not
make much sense semantically, and is implemented here
for completeness only. */
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_iseq (mbc, '|'))
- po_lex_previous = true;
+ ps->po_lex_previous = true;
else
- lex_ungetc (mbc);
+ lex_ungetc (ps, mbc);
break;
}
if (mb_iseq (mbc, '|'))
prefix, but change the keywords and string returns
accordingly. */
{
- po_lex_previous = true;
+ ps->po_lex_previous = true;
break;
}
/* Accumulate comments into a buffer. If we have been asked
to pass comments, generate a COMMENT token, otherwise
discard it. */
- signal_eilseq = false;
+ ps->signal_eilseq = false;
if (pass_comments)
{
bufpos = 0;
memcpy_small (&buf[bufpos], mb_ptr (mbc), mb_len (mbc));
bufpos += mb_len (mbc);
- lex_getc (mbc);
+ lex_getc (ps, mbc);
}
buf[bufpos] = '\0';
lval->string.string = buf;
lval->string.pos = gram_pos;
- lval->string.obsolete = po_lex_obsolete;
- po_lex_obsolete = false;
- signal_eilseq = true;
+ lval->string.obsolete = ps->po_lex_obsolete;
+ ps->po_lex_obsolete = false;
+ ps->signal_eilseq = true;
return COMMENT;
}
else
comments while they get not passed to the upper layers
is not very efficient. */
while (!mb_iseof (mbc) && !mb_iseq (mbc, '\n'))
- lex_getc (mbc);
- po_lex_obsolete = false;
- signal_eilseq = true;
+ lex_getc (ps, mbc);
+ ps->po_lex_obsolete = false;
+ ps->signal_eilseq = true;
}
break;
bufpos = 0;
for (;;)
{
- lex_getc (mbc);
+ lex_getc (ps, mbc);
while (bufpos + mb_len (mbc) >= bufmax)
{
bufmax += 100;
break;
if (mb_iseq (mbc, '\\'))
{
- buf[bufpos++] = control_sequence ();
+ buf[bufpos++] = control_sequence (ps);
continue;
}
/* FIXME: Treatment of embedded \000 chars is incorrect. */
lval->string.string = xstrdup (buf);
lval->string.pos = gram_pos;
- lval->string.obsolete = po_lex_obsolete;
- return (po_lex_previous ? PREV_STRING : STRING);
+ lval->string.obsolete = ps->po_lex_obsolete;
+ return (ps->po_lex_previous ? PREV_STRING : STRING);
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
buf = xrealloc (buf, bufmax);
}
buf[bufpos++] = c;
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_len (mbc) == 1)
switch (mb_ptr (mbc) [0])
{
}
break;
}
- lex_ungetc (mbc);
+ lex_ungetc (ps, mbc);
buf[bufpos] = '\0';
{
- int k = keyword_p (buf);
+ int k = keyword_p (ps, buf);
if (k == NAME)
{
lval->string.string = xstrdup (buf);
lval->string.pos = gram_pos;
- lval->string.obsolete = po_lex_obsolete;
+ lval->string.obsolete = ps->po_lex_obsolete;
}
else
{
lval->pos.pos = gram_pos;
- lval->pos.obsolete = po_lex_obsolete;
+ lval->pos.obsolete = ps->po_lex_obsolete;
}
return k;
}
buf = xrealloc (buf, bufmax + 1);
}
buf[bufpos++] = c;
- lex_getc (mbc);
+ lex_getc (ps, mbc);
if (mb_len (mbc) == 1)
switch (mb_ptr (mbc) [0])
{
}
break;
}
- lex_ungetc (mbc);
+ lex_ungetc (ps, mbc);
buf[bufpos] = '\0';
lval->number.number = atol (buf);
lval->number.pos = gram_pos;
- lval->number.obsolete = po_lex_obsolete;
+ lval->number.obsolete = ps->po_lex_obsolete;
return NUMBER;
case '[':
lval->pos.pos = gram_pos;
- lval->pos.obsolete = po_lex_obsolete;
+ lval->pos.obsolete = ps->po_lex_obsolete;
return '[';
case ']':
lval->pos.pos = gram_pos;
- lval->pos.obsolete = po_lex_obsolete;
+ lval->pos.obsolete = ps->po_lex_obsolete;
return ']';
default: