[thirdparty/glibc.git] / locale / programs / linereader.c

/* Copyright (C) 1996-2020 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published
   by the Free Software Foundation; version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, see <https://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#include "localedef.h"
#include "charmap.h"
#include "error.h"
#include "linereader.h"
#include "locfile.h"

/* Prototypes for local functions.  */
static struct token *get_toplvl_escape (struct linereader *lr);
static struct token *get_symname (struct linereader *lr);
static struct token *get_ident (struct linereader *lr);
static struct token *get_string (struct linereader *lr,
				 const struct charmap_t *charmap,
				 struct localedef_t *locale,
				 const struct repertoire_t *repertoire,
				 int verbose);


struct linereader *
lr_open (const char *fname, kw_hash_fct_t hf)
{
  FILE *fp;

  if (fname == NULL || strcmp (fname, "-") == 0
      || strcmp (fname, "/dev/stdin") == 0)
    return lr_create (stdin, "<stdin>", hf);
  else
    {
      fp = fopen (fname, "rm");
      if (fp == NULL)
	return NULL;
      return lr_create (fp, fname, hf);
    }
}

struct linereader *
lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
{
  struct linereader *result;
  int n;

  result = (struct linereader *) xmalloc (sizeof (*result));

  result->fp = fp;
  result->fname = xstrdup (fname);
  result->buf = NULL;
  result->bufsize = 0;
  result->lineno = 1;
  result->idx = 0;
  result->comment_char = '#';
  result->escape_char = '\\';
  result->translate_strings = 1;
  result->return_widestr = 0;

  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  if (n < 0)
    {
      int save = errno;
      fclose (result->fp);
      free ((char *) result->fname);
      free (result);
      errno = save;
      return NULL;
    }

  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
    n -= 2;

  result->buf[n] = '\0';
  result->bufact = n;
  result->hash_fct = hf;

  return result;
}


int
lr_eof (struct linereader *lr)
{
  return lr->bufact = 0;
}


void
lr_ignore_rest (struct linereader *lr, int verbose)
{
  if (verbose)
    {
      while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
	     && lr->buf[lr->idx] != lr->comment_char)
	if (lr->buf[lr->idx] == '\0')
	  {
	    if (lr_next (lr) < 0)
	      return;
	  }
	else
	  ++lr->idx;

      if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
	  && lr->buf[lr->idx] != lr->comment_char)
	lr_error (lr, _("trailing garbage at end of line"));
    }

  /* Ignore continued line.  */
  while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
    if (lr_next (lr) < 0)
      break;

  lr->idx = lr->bufact;
}


void
lr_close (struct linereader *lr)
{
  fclose (lr->fp);
  free (lr->buf);
  free (lr);
}


int
lr_next (struct linereader *lr)
{
  int n;

  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
  if (n < 0)
    return -1;

  ++lr->lineno;

  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
    {
#if 0
      /* XXX Is this correct?  */
      /* An escaped newline character is substituted with a single <SP>.  */
      --n;
      lr->buf[n - 1] = ' ';
#else
      n -= 2;
#endif
    }

  lr->buf[n] = '\0';
  lr->bufact = n;
  lr->idx = 0;

  return 0;
}


/* Defined in error.c.  */
/* This variable is incremented each time `error' is called.  */
extern unsigned int error_message_count;

/* The calling program should define program_name and set it to the
   name of the executing program.  */
extern char *program_name;


struct token *
lr_token (struct linereader *lr, const struct charmap_t *charmap,
	  struct localedef_t *locale, const struct repertoire_t *repertoire,
	  int verbose)
{
  int ch;

  while (1)
    {
      do
	{
	  ch = lr_getc (lr);

	  if (ch == EOF)
	    {
	      lr->token.tok = tok_eof;
	      return &lr->token;
	    };

	  if (ch == '\n')
	    {
	      lr->token.tok = tok_eol;
	      return &lr->token;
	    }
	}
      while (isspace (ch));

      if (ch != lr->comment_char)
	break;

      /* Is there an newline at the end of the buffer?  */
      if (lr->buf[lr->bufact - 1] != '\n')
	{
	  /* No.  Some people want this to mean that only the line in
	     the file not the logical, concatenated line is ignored.
	     Let's try this.  */
	  lr->idx = lr->bufact;
	  continue;
	}

      /* Ignore rest of line.  */
      lr_ignore_rest (lr, 0);
      lr->token.tok = tok_eol;
      return &lr->token;
    }

  /* Match escape sequences.  */
  if (ch == lr->escape_char)
    return get_toplvl_escape (lr);

  /* Match ellipsis.  */
  if (ch == '.')
    {
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 10; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis4_2;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis4;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis3;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 6; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis2_2;
	  return &lr->token;
	}
      if (lr->buf[lr->idx] == '.')
	{
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis2;
	  return &lr->token;
	}
    }

  switch (ch)
    {
    case '<':
      return get_symname (lr);

    case '0' ... '9':
      lr->token.tok = tok_number;
      lr->token.val.num = ch - '0';

      while (isdigit (ch = lr_getc (lr)))
	{
	  lr->token.val.num *= 10;
	  lr->token.val.num += ch - '0';
	}
      if (isalpha (ch))
	lr_error (lr, _("garbage at end of number"));
      lr_ungetn (lr, 1);

      return &lr->token;

    case ';':
      lr->token.tok = tok_semicolon;
      return &lr->token;

    case ',':
      lr->token.tok = tok_comma;
      return &lr->token;

    case '(':
      lr->token.tok = tok_open_brace;
      return &lr->token;

    case ')':
      lr->token.tok = tok_close_brace;
      return &lr->token;

    case '"':
      return get_string (lr, charmap, locale, repertoire, verbose);

    case '-':
      ch = lr_getc (lr);
      if (ch == '1')
	{
	  lr->token.tok = tok_minus1;
	  return &lr->token;
	}
      lr_ungetn (lr, 2);
      break;
    }

  return get_ident (lr);
}


static struct token *
get_toplvl_escape (struct linereader *lr)
{
  /* This is supposed to be a numeric value.  We return the
     numerical value and the number of bytes.  */
  size_t start_idx = lr->idx - 1;
  unsigned char *bytes = lr->token.val.charcode.bytes;
  size_t nbytes = 0;
  int ch;

  do
    {
      unsigned int byte = 0;
      unsigned int base = 8;

      ch = lr_getc (lr);

      if (ch == 'd')
	{
	  base = 10;
	  ch = lr_getc (lr);
	}
      else if (ch == 'x')
	{
	  base = 16;
	  ch = lr_getc (lr);
	}

      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	{
	esc_error:
	  lr->token.val.str.startmb = &lr->buf[start_idx];

	  while (ch != EOF && !isspace (ch))
	    ch = lr_getc (lr);
	  lr->token.val.str.lenmb = lr->idx - start_idx;

	  lr->token.tok = tok_error;
	  return &lr->token;
	}

      if (isdigit (ch))
	byte = ch - '0';
      else
	byte = tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	goto esc_error;

      byte *= base;
      if (isdigit (ch))
	byte += ch - '0';
      else
	byte += tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if (base != 16 && isdigit (ch))
	{
	  byte *= base;
	  byte += ch - '0';

	  ch = lr_getc (lr);
	}

      bytes[nbytes++] = byte;
    }
  while (ch == lr->escape_char
	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));

  if (!isspace (ch))
    lr_error (lr, _("garbage at end of character code specification"));

  lr_ungetn (lr, 1);

  lr->token.tok = tok_charcode;
  lr->token.val.charcode.nbytes = nbytes;

  return &lr->token;
}


#define ADDC(ch) \
  do									      \
    {									      \
      if (bufact == bufmax)						      \
	{								      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      buf[bufact++] = (ch);						      \
    }									      \
  while (0)


#define ADDS(s, l) \
  do									      \
    {									      \
      size_t _l = (l);							      \
      if (bufact + _l > bufmax)						      \
	{								      \
	  if (bufact < _l)						      \
	    bufact = _l;						      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      memcpy (&buf[bufact], s, _l);					      \
      bufact += _l;							      \
    }									      \
  while (0)


#define ADDWC(ch) \
  do									      \
    {									      \
      if (buf2act == buf2max)						      \
	{								      \
	  buf2max *= 2;							      \
	  buf2 = xrealloc (buf2, buf2max * 4);				      \
	}								      \
      buf2[buf2act++] = (ch);						      \
    }									      \
  while (0)


static struct token *
get_symname (struct linereader *lr)
{
  /* Symbol in brackets.  We must distinguish three kinds:
     1. reserved words
     2. ISO 10646 position values
     3. all other.  */
  char *buf;
  size_t bufact = 0;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = (char *) xmalloc (bufmax);

  do
    {
      ch = lr_getc (lr);
      if (ch == lr->escape_char)
	{
	  int c2 = lr_getc (lr);
	  ADDC (c2);

	  if (c2 == '\n')
	    ch = '\n';
	}
      else
	ADDC (ch);
    }
  while (ch != '>' && ch != '\n');

  if (ch == '\n')
    lr_error (lr, _("unterminated symbolic name"));

  /* Test for ISO 10646 position value.  */
  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
    {
      char *cp = buf + 1;
      while (cp < &buf[bufact - 1] && isxdigit (*cp))
	++cp;

      if (cp == &buf[bufact - 1])
	{
	  /* Yes, it is.  */
	  lr->token.tok = tok_ucs4;
	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);

	  return &lr->token;
	}
    }

  /* It is a symbolic name.  Test for reserved words.  */
  kw = lr->hash_fct (buf, bufact - 1);

  if (kw != NULL && kw->symname_or_ident == 1)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_bsymbol;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact - 1;
    }

  return &lr->token;
}


static struct token *
get_ident (struct linereader *lr)
{
  char *buf;
  size_t bufact;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = xmalloc (bufmax);
  bufact = 0;

  ADDC (lr->buf[lr->idx - 1]);

  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
	 && ch != '<' && ch != ',' && ch != EOF)
    {
      if (ch == lr->escape_char)
	{
	  ch = lr_getc (lr);
	  if (ch == '\n' || ch == EOF)
	    {
	      lr_error (lr, _("invalid escape sequence"));
	      break;
	    }
	}
      ADDC (ch);
    }

  lr_ungetc (lr, ch);

  kw = lr->hash_fct (buf, bufact);

  if (kw != NULL && kw->symname_or_ident == 0)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_ident;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact;
    }

  return &lr->token;
}


static struct token *
get_string (struct linereader *lr, const struct charmap_t *charmap,
	    struct localedef_t *locale, const struct repertoire_t *repertoire,
	    int verbose)
{
  int return_widestr = lr->return_widestr;
  char *buf;
  wchar_t *buf2 = NULL;
  size_t bufact;
  size_t bufmax = 56;

  /* We must return two different strings.  */
  buf = xmalloc (bufmax);
  bufact = 0;

  /* We know it'll be a string.  */
  lr->token.tok = tok_string;

  /* If we need not translate the strings (i.e., expand <...> parts)
     we can run a simple loop.  */
  if (!lr->translate_strings)
    {
      int ch;

      buf2 = NULL;
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	ADDC (ch);

      /* Catch errors with trailing escape character.  */
      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
	{
	  lr_error (lr, _("illegal escape sequence at end of string"));
	  --bufact;
	}
      else if (ch == '\n' || ch == EOF)
	lr_error (lr, _("unterminated string"));

      ADDC ('\0');
    }
  else
    {
      int illegal_string = 0;
      size_t buf2act = 0;
      size_t buf2max = 56 * sizeof (uint32_t);
      int ch;

      /* We have to provide the wide character result as well.  */
      if (return_widestr)
	buf2 = xmalloc (buf2max);

      /* Read until the end of the string (or end of the line or file).  */
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	{
	  size_t startidx;
	  uint32_t wch;
	  struct charseq *seq;

	  if (ch != '<')
	    {
	      /* The standards leave it up to the implementation to decide
		 what to do with character which stand for themself.  We
		 could jump through hoops to find out the value relative to
		 the charmap and the repertoire map, but instead we leave
		 it up to the locale definition author to write a better
		 definition.  We assume here that every character which
		 stands for itself is encoded using ISO 8859-1.  Using the
		 escape character is allowed.  */
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}

	      ADDC (ch);
	      if (return_widestr)
		ADDWC ((uint32_t) ch);

	      continue;
	    }

	  /* Now we have to search for the end of the symbolic name, i.e.,
	     the closing '>'.  */
	  startidx = bufact;
	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
	    {
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}
	      ADDC (ch);
	    }
	  if (ch == '\n' || ch == EOF)
	    /* Not a correct string.  */
	    break;
	  if (bufact == startidx)
	    {
	      /* <> is no correct name.  Ignore it and also signal an
		 error.  */
	      illegal_string = 1;
	      continue;
	    }

	  /* It might be a Uxxxx symbol.  */
	  if (buf[startidx] == 'U'
	      && (bufact - startidx == 5 || bufact - startidx == 9))
	    {
	      char *cp = buf + startidx + 1;
	      while (cp < &buf[bufact] && isxdigit (*cp))
		++cp;

	      if (cp == &buf[bufact])
		{
		  char utmp[10];

		  /* Yes, it is.  */
		  ADDC ('\0');
		  wch = strtoul (buf + startidx + 1, NULL, 16);

		  /* Now forget about the name we just added.  */
		  bufact = startidx;

		  if (return_widestr)
		    ADDWC (wch);

		  /* See whether the charmap contains the Uxxxxxxxx names.  */
		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
		  seq = charmap_find_value (charmap, utmp, 9);

		  if (seq == NULL)
		    {
		     /* No, this isn't the case.  Now determine from
			the repertoire the name of the character and
			find it in the charmap.  */
		      if (repertoire != NULL)
			{
			  const char *symbol;

			  symbol = repertoire_find_symbol (repertoire, wch);

			  if (symbol != NULL)
			    seq = charmap_find_value (charmap, symbol,
						      strlen (symbol));
			}

		      if (seq == NULL)
			{
#ifndef NO_TRANSLITERATION
			  /* Transliterate if possible.  */
			  if (locale != NULL)
			    {
			      uint32_t *translit;

			      if ((locale->avail & CTYPE_LOCALE) == 0)
				{
				  /* Load the CTYPE data now.  */
				  int old_needed = locale->needed;

				  locale->needed = 0;
				  locale = load_locale (LC_CTYPE,
							locale->name,
							locale->repertoire_name,
							charmap, locale);
				  locale->needed = old_needed;
				}

			      if ((locale->avail & CTYPE_LOCALE) != 0
				  && ((translit = find_translit (locale,
								 charmap, wch))
				      != NULL))
				/* The CTYPE data contains a matching
				   transliteration.  */
				{
				  int i;

				  for (i = 0; translit[i] != 0; ++i)
				    {
				      char utmp[10];

				      snprintf (utmp, sizeof (utmp), "U%08X",
						translit[i]);
				      seq = charmap_find_value (charmap, utmp,
								9);
				      assert (seq != NULL);
				      ADDS (seq->bytes, seq->nbytes);
				    }

				  continue;
				}
			    }
#endif	/* NO_TRANSLITERATION */

			  /* Not a known name.  */
			  illegal_string = 1;
			}
		    }

		  if (seq != NULL)
		    ADDS (seq->bytes, seq->nbytes);

		  continue;
		}
	    }

	  /* We now have the symbolic name in buf[startidx] to
	     buf[bufact-1].  Now find out the value for this character
	     in the charmap as well as in the repertoire map (in this
	     order).  */
	  seq = charmap_find_value (charmap, &buf[startidx],
				    bufact - startidx);

	  if (seq == NULL)
	    {
	      /* This name is not in the charmap.  */
	      lr_error (lr, _("symbol `%.*s' not in charmap"),
			(int) (bufact - startidx), &buf[startidx]);
	      illegal_string = 1;
	    }

	  if (return_widestr)
	    {
	      /* Now the same for the multibyte representation.  */
	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
		wch = seq->ucs4;
	      else
		{
		  wch = repertoire_find_value (repertoire, &buf[startidx],
					       bufact - startidx);
		  if (seq != NULL)
		    seq->ucs4 = wch;
		}

	      if (wch == ILLEGAL_CHAR_VALUE)
		{
		  /* This name is not in the repertoire map.  */
		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
			    (int) (bufact - startidx), &buf[startidx]);
		  illegal_string = 1;
		}
	      else
		ADDWC (wch);
	    }

	  /* Now forget about the name we just added.  */
	  bufact = startidx;

	  /* And copy the bytes.  */
	  if (seq != NULL)
	    ADDS (seq->bytes, seq->nbytes);
	}

      if (ch == '\n' || ch == EOF)
	{
	  lr_error (lr, _("unterminated string"));
	  illegal_string = 1;
	}

      if (illegal_string)
	{
	  free (buf);
	  free (buf2);
	  lr->token.val.str.startmb = NULL;
	  lr->token.val.str.lenmb = 0;
	  lr->token.val.str.startwc = NULL;
	  lr->token.val.str.lenwc = 0;

	  return &lr->token;
	}

      ADDC ('\0');

      if (return_widestr)
	{
	  ADDWC (0);
	  lr->token.val.str.startwc = xrealloc (buf2,
						buf2act * sizeof (uint32_t));
	  lr->token.val.str.lenwc = buf2act;
	}
    }

  lr->token.val.str.startmb = xrealloc (buf, bufact);
  lr->token.val.str.lenmb = bufact;

  return &lr->token;
}
Commit	Line	Data
d614a753	1	/* Copyright (C) 1996-2020 Free Software Foundation, Inc.
5290baf0	2	This file is part of the GNU C Library.
4b10dd6c	3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
19bc17a9	4
43bc8ac6	5	This program is free software; you can redistribute it and/or modify
2e2efe65 RM	6	it under the terms of the GNU General Public License as published
	7	by the Free Software Foundation; version 2 of the License, or
	8	(at your option) any later version.
19bc17a9	9
43bc8ac6	10	This program is distributed in the hope that it will be useful,
5290baf0	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6 UD	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43bc8ac6 UD	13	GNU General Public License for more details.
19bc17a9	14
43bc8ac6	15	You should have received a copy of the GNU General Public License
5a82c748	16	along with this program; if not, see <https://www.gnu.org/licenses/>. */
19bc17a9 RM	17
	18	#ifdef HAVE_CONFIG_H
	19	# include <config.h>
	20	#endif
	21
47e8b443	22	#include <assert.h>
19bc17a9 RM	23	#include <ctype.h>
	24	#include <errno.h>
	25	#include <libintl.h>
	26	#include <stdarg.h>
	27	#include <stdlib.h>
	28	#include <string.h>
e054f494	29	#include <stdint.h>
19bc17a9	30
f2b98f97	31	#include "localedef.h"
4b10dd6c	32	#include "charmap.h"
19bc17a9 RM	33	#include "error.h"
19bc17a9 RM	34	#include "linereader.h"
47e8b443	35	#include "locfile.h"
93693c4d	36
4b10dd6c	37	/* Prototypes for local functions. */
19bc17a9 RM	38	static struct token get_toplvl_escape (struct linereader lr);
	39	static struct token get_symname (struct linereader lr);
	40	static struct token get_ident (struct linereader lr);
	41	static struct token get_string (struct linereader lr,
4b10dd6c	42	const struct charmap_t *charmap,
47e8b443	43	struct localedef_t *locale,
93693c4d UD	44	const struct repertoire_t *repertoire,
93693c4d UD	45	int verbose);
19bc17a9 RM	46
	47
	48	struct linereader *
	49	lr_open (const char *fname, kw_hash_fct_t hf)
	50	{
	51	FILE *fp;
19bc17a9 RM	52
	53	if (fname == NULL \|\| strcmp (fname, "-") == 0
	54	\|\| strcmp (fname, "/dev/stdin") == 0)
3e076219	55	return lr_create (stdin, "<stdin>", hf);
19bc17a9 RM	56	else
19bc17a9 RM	57	{
2e2dc1a5	58	fp = fopen (fname, "rm");
19bc17a9 RM	59	if (fp == NULL)
19bc17a9 RM	60	return NULL;
3e076219	61	return lr_create (fp, fname, hf);
19bc17a9	62	}
3e076219 UD	63	}
	64
	65	struct linereader *
	66	lr_create (FILE fp, const char fname, kw_hash_fct_t hf)
	67	{
	68	struct linereader *result;
	69	int n;
19bc17a9 RM	70
	71	result = (struct linereader ) xmalloc (sizeof (result));
	72
	73	result->fp = fp;
3e076219	74	result->fname = xstrdup (fname);
19bc17a9 RM	75	result->buf = NULL;
	76	result->bufsize = 0;
	77	result->lineno = 1;
	78	result->idx = 0;
	79	result->comment_char = '#';
	80	result->escape_char = '\\';
	81	result->translate_strings = 1;
7c11c4a1	82	result->return_widestr = 0;
19bc17a9 RM	83
	84	n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
	85	if (n < 0)
	86	{
	87	int save = errno;
	88	fclose (result->fp);
46ec036d	89	free ((char *) result->fname);
19bc17a9 RM	90	free (result);
	91	errno = save;
	92	return NULL;
	93	}
	94
	95	if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
	96	n -= 2;
	97
	98	result->buf[n] = '\0';
	99	result->bufact = n;
	100	result->hash_fct = hf;
	101
	102	return result;
	103	}
	104
	105
	106	int
	107	lr_eof (struct linereader *lr)
	108	{
	109	return lr->bufact = 0;
	110	}
	111
	112
dd9423a6 UD	113	void
	114	lr_ignore_rest (struct linereader *lr, int verbose)
	115	{
	116	if (verbose)
	117	{
	118	while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
	119	&& lr->buf[lr->idx] != lr->comment_char)
	120	if (lr->buf[lr->idx] == '\0')
	121	{
	122	if (lr_next (lr) < 0)
	123	return;
	124	}
	125	else
	126	++lr->idx;
	127
	128	if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
	129	&& lr->buf[lr->idx] != lr->comment_char)
	130	lr_error (lr, _("trailing garbage at end of line"));
	131	}
	132
	133	/* Ignore continued line. */
	134	while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
	135	if (lr_next (lr) < 0)
	136	break;
	137
	138	lr->idx = lr->bufact;
	139	}
	140
	141
19bc17a9 RM	142	void
	143	lr_close (struct linereader *lr)
	144	{
	145	fclose (lr->fp);
	146	free (lr->buf);
	147	free (lr);
	148	}
	149
	150
	151	int
	152	lr_next (struct linereader *lr)
	153	{
	154	int n;
	155
	156	n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
	157	if (n < 0)
	158	return -1;
	159
	160	++lr->lineno;
	161
	162	if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
	163	{
4b10dd6c UD	164	#if 0
4b10dd6c UD	165	/* XXX Is this correct? */
19bc17a9 RM	166	/* An escaped newline character is substituted with a single <SP>. */
	167	--n;
	168	lr->buf[n - 1] = ' ';
4b10dd6c UD	169	#else
	170	n -= 2;
	171	#endif
19bc17a9 RM	172	}
	173
	174	lr->buf[n] = '\0';
	175	lr->bufact = n;
	176	lr->idx = 0;
	177
	178	return 0;
	179	}
	180
	181
	182	/* Defined in error.c. */
	183	/* This variable is incremented each time `error' is called. */
	184	extern unsigned int error_message_count;
	185
	186	/* The calling program should define program_name and set it to the
	187	name of the executing program. */
	188	extern char *program_name;
	189
	190
	191	struct token *
4b10dd6c	192	lr_token (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	193	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	194	int verbose)
19bc17a9 RM	195	{
	196	int ch;
	197
	198	while (1)
	199	{
	200	do
	201	{
	202	ch = lr_getc (lr);
	203
76fbcfdd UD	204	if (ch == EOF)
	205	{
	206	lr->token.tok = tok_eof;
	207	return &lr->token;
	208	};
	209
19bc17a9 RM	210	if (ch == '\n')
	211	{
	212	lr->token.tok = tok_eol;
	213	return &lr->token;
	214	}
	215	}
	216	while (isspace (ch));
	217
19bc17a9 RM	218	if (ch != lr->comment_char)
	219	break;
	220
a0dc5206 UD	221	/* Is there an newline at the end of the buffer? */
	222	if (lr->buf[lr->bufact - 1] != '\n')
	223	{
	224	/* No. Some people want this to mean that only the line in
	225	the file not the logical, concatenated line is ignored.
	226	Let's try this. */
	227	lr->idx = lr->bufact;
	228	continue;
	229	}
	230
19bc17a9 RM	231	/* Ignore rest of line. */
	232	lr_ignore_rest (lr, 0);
	233	lr->token.tok = tok_eol;
	234	return &lr->token;
	235	}
	236
	237	/* Match escape sequences. */
	238	if (ch == lr->escape_char)
	239	return get_toplvl_escape (lr);
	240
	241	/* Match ellipsis. */
4b10dd6c	242	if (ch == '.')
19bc17a9	243	{
a0dc5206 UD	244	if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	245	{
	246	int cnt;
	247	for (cnt = 0; cnt < 10; ++cnt)
	248	lr_getc (lr);
	249	lr->token.tok = tok_ellipsis4_2;
	250	return &lr->token;
	251	}
4b10dd6c UD	252	if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	253	{
	254	lr_getc (lr);
	255	lr_getc (lr);
	256	lr_getc (lr);
	257	lr->token.tok = tok_ellipsis4;
	258	return &lr->token;
	259	}
	260	if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	261	{
	262	lr_getc (lr);
	263	lr_getc (lr);
	264	lr->token.tok = tok_ellipsis3;
	265	return &lr->token;
	266	}
a0dc5206 UD	267	if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	268	{
	269	int cnt;
	270	for (cnt = 0; cnt < 6; ++cnt)
	271	lr_getc (lr);
	272	lr->token.tok = tok_ellipsis2_2;
	273	return &lr->token;
	274	}
4b10dd6c UD	275	if (lr->buf[lr->idx] == '.')
	276	{
	277	lr_getc (lr);
	278	lr->token.tok = tok_ellipsis2;
	279	return &lr->token;
	280	}
19bc17a9 RM	281	}
	282
	283	switch (ch)
	284	{
	285	case '<':
	286	return get_symname (lr);
	287
	288	case '0' ... '9':
	289	lr->token.tok = tok_number;
	290	lr->token.val.num = ch - '0';
	291
	292	while (isdigit (ch = lr_getc (lr)))
	293	{
	294	lr->token.val.num *= 10;
	295	lr->token.val.num += ch - '0';
	296	}
	297	if (isalpha (ch))
5290baf0	298	lr_error (lr, _("garbage at end of number"));
19bc17a9 RM	299	lr_ungetn (lr, 1);
	300
	301	return &lr->token;
	302
	303	case ';':
	304	lr->token.tok = tok_semicolon;
	305	return &lr->token;
	306
	307	case ',':
	308	lr->token.tok = tok_comma;
	309	return &lr->token;
	310
	311	case '(':
	312	lr->token.tok = tok_open_brace;
	313	return &lr->token;
	314
	315	case ')':
	316	lr->token.tok = tok_close_brace;
	317	return &lr->token;
	318
	319	case '"':
47e8b443	320	return get_string (lr, charmap, locale, repertoire, verbose);
19bc17a9 RM	321
	322	case '-':
	323	ch = lr_getc (lr);
	324	if (ch == '1')
	325	{
	326	lr->token.tok = tok_minus1;
	327	return &lr->token;
	328	}
	329	lr_ungetn (lr, 2);
	330	break;
	331	}
	332
	333	return get_ident (lr);
	334	}
	335
	336
	337	static struct token *
	338	get_toplvl_escape (struct linereader *lr)
	339	{
	340	/* This is supposed to be a numeric value. We return the
	341	numerical value and the number of bytes. */
	342	size_t start_idx = lr->idx - 1;
9cfe5381 RM	343	unsigned char *bytes = lr->token.val.charcode.bytes;
9cfe5381 RM	344	size_t nbytes = 0;
19bc17a9 RM	345	int ch;
	346
	347	do
	348	{
	349	unsigned int byte = 0;
	350	unsigned int base = 8;
	351
	352	ch = lr_getc (lr);
	353
	354	if (ch == 'd')
	355	{
	356	base = 10;
	357	ch = lr_getc (lr);
	358	}
	359	else if (ch == 'x')
	360	{
	361	base = 16;
	362	ch = lr_getc (lr);
	363	}
	364
	365	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	366	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	367	{
19bc17a9 RM	368	esc_error:
4b10dd6c	369	lr->token.val.str.startmb = &lr->buf[start_idx];
19bc17a9	370
76fbcfdd	371	while (ch != EOF && !isspace (ch))
19bc17a9	372	ch = lr_getc (lr);
4b10dd6c	373	lr->token.val.str.lenmb = lr->idx - start_idx;
19bc17a9 RM	374
	375	lr->token.tok = tok_error;
	376	return &lr->token;
	377	}
	378
	379	if (isdigit (ch))
	380	byte = ch - '0';
	381	else
4b10dd6c	382	byte = tolower (ch) - 'a' + 10;
19bc17a9 RM	383
	384	ch = lr_getc (lr);
	385	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	386	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	387	goto esc_error;
	388
	389	byte *= base;
	390	if (isdigit (ch))
	391	byte += ch - '0';
	392	else
4b10dd6c	393	byte += tolower (ch) - 'a' + 10;
19bc17a9 RM	394
	395	ch = lr_getc (lr);
	396	if (base != 16 && isdigit (ch))
	397	{
	398	byte *= base;
679f5a56	399	byte += ch - '0';
19bc17a9 RM	400
	401	ch = lr_getc (lr);
	402	}
	403
4b10dd6c	404	bytes[nbytes++] = byte;
19bc17a9	405	}
c50ec4e0	406	while (ch == lr->escape_char
6dd67bd5	407	&& nbytes < (int) sizeof (lr->token.val.charcode.bytes));
19bc17a9 RM	408
	409	if (!isspace (ch))
	410	lr_error (lr, _("garbage at end of character code specification"));
	411
	412	lr_ungetn (lr, 1);
	413
	414	lr->token.tok = tok_charcode;
19bc17a9 RM	415	lr->token.val.charcode.nbytes = nbytes;
	416
	417	return &lr->token;
	418	}
	419
	420
4b10dd6c UD	421	#define ADDC(ch) \
	422	do \
	423	{ \
	424	if (bufact == bufmax) \
	425	{ \
	426	bufmax *= 2; \
	427	buf = xrealloc (buf, bufmax); \
	428	} \
	429	buf[bufact++] = (ch); \
	430	} \
	431	while (0)
	432
	433
	434	#define ADDS(s, l) \
	435	do \
	436	{ \
	437	size_t _l = (l); \
	438	if (bufact + _l > bufmax) \
	439	{ \
	440	if (bufact < _l) \
	441	bufact = _l; \
	442	bufmax *= 2; \
	443	buf = xrealloc (buf, bufmax); \
	444	} \
	445	memcpy (&buf[bufact], s, _l); \
	446	bufact += _l; \
	447	} \
	448	while (0)
	449
	450
	451	#define ADDWC(ch) \
	452	do \
	453	{ \
	454	if (buf2act == buf2max) \
	455	{ \
	456	buf2max *= 2; \
	457	buf2 = xrealloc (buf2, buf2max * 4); \
	458	} \
	459	buf2[buf2act++] = (ch); \
	460	} \
19bc17a9 RM	461	while (0)
	462
	463
	464	static struct token *
	465	get_symname (struct linereader *lr)
	466	{
	467	/* Symbol in brackets. We must distinguish three kinds:
	468	1. reserved words
	469	2. ISO 10646 position values
	470	3. all other. */
	471	char *buf;
	472	size_t bufact = 0;
	473	size_t bufmax = 56;
	474	const struct keyword_t *kw;
	475	int ch;
	476
	477	buf = (char *) xmalloc (bufmax);
	478
	479	do
	480	{
	481	ch = lr_getc (lr);
	482	if (ch == lr->escape_char)
	483	{
	484	int c2 = lr_getc (lr);
	485	ADDC (c2);
	486
	487	if (c2 == '\n')
	488	ch = '\n';
	489	}
	490	else
	491	ADDC (ch);
	492	}
	493	while (ch != '>' && ch != '\n');
	494
	495	if (ch == '\n')
	496	lr_error (lr, _("unterminated symbolic name"));
	497
	498	/* Test for ISO 10646 position value. */
	499	if (buf[0] == 'U' && (bufact == 6 \|\| bufact == 10))
	500	{
	501	char *cp = buf + 1;
	502	while (cp < &buf[bufact - 1] && isxdigit (*cp))
	503	++cp;
	504
	505	if (cp == &buf[bufact - 1])
	506	{
	507	/* Yes, it is. */
4b10dd6c UD	508	lr->token.tok = tok_ucs4;
4b10dd6c UD	509	lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
19bc17a9 RM	510
	511	return &lr->token;
	512	}
	513	}
	514
	515	/* It is a symbolic name. Test for reserved words. */
	516	kw = lr->hash_fct (buf, bufact - 1);
	517
	518	if (kw != NULL && kw->symname_or_ident == 1)
	519	{
	520	lr->token.tok = kw->token;
	521	free (buf);
	522	}
	523	else
	524	{
	525	lr->token.tok = tok_bsymbol;
	526
19bc17a9	527	buf = xrealloc (buf, bufact + 1);
b16dba4c	528	buf[bufact] = '\0';
19bc17a9	529
4b10dd6c UD	530	lr->token.val.str.startmb = buf;
4b10dd6c UD	531	lr->token.val.str.lenmb = bufact - 1;
19bc17a9 RM	532	}
	533
	534	return &lr->token;
	535	}
	536
	537
	538	static struct token *
	539	get_ident (struct linereader *lr)
	540	{
	541	char *buf;
	542	size_t bufact;
	543	size_t bufmax = 56;
	544	const struct keyword_t *kw;
	545	int ch;
	546
	547	buf = xmalloc (bufmax);
	548	bufact = 0;
	549
	550	ADDC (lr->buf[lr->idx - 1]);
	551
	552	while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
f126ef67	553	&& ch != '<' && ch != ',' && ch != EOF)
4b10dd6c UD	554	{
	555	if (ch == lr->escape_char)
	556	{
	557	ch = lr_getc (lr);
	558	if (ch == '\n' \|\| ch == EOF)
	559	{
	560	lr_error (lr, _("invalid escape sequence"));
	561	break;
	562	}
	563	}
	564	ADDC (ch);
	565	}
19bc17a9	566
f126ef67	567	lr_ungetc (lr, ch);
19bc17a9 RM	568
	569	kw = lr->hash_fct (buf, bufact);
	570
	571	if (kw != NULL && kw->symname_or_ident == 0)
	572	{
	573	lr->token.tok = kw->token;
	574	free (buf);
	575	}
	576	else
	577	{
	578	lr->token.tok = tok_ident;
	579
19bc17a9	580	buf = xrealloc (buf, bufact + 1);
b16dba4c	581	buf[bufact] = '\0';
19bc17a9	582
4b10dd6c UD	583	lr->token.val.str.startmb = buf;
4b10dd6c UD	584	lr->token.val.str.lenmb = bufact;
19bc17a9 RM	585	}
	586
	587	return &lr->token;
	588	}
	589
	590
	591	static struct token *
4b10dd6c	592	get_string (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	593	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	594	int verbose)
19bc17a9	595	{
4b10dd6c UD	596	int return_widestr = lr->return_widestr;
4b10dd6c UD	597	char *buf;
a9c27b3e	598	wchar_t *buf2 = NULL;
19bc17a9 RM	599	size_t bufact;
19bc17a9 RM	600	size_t bufmax = 56;
19bc17a9	601
4b10dd6c	602	/* We must return two different strings. */
19bc17a9 RM	603	buf = xmalloc (bufmax);
	604	bufact = 0;
	605
4b10dd6c UD	606	/* We know it'll be a string. */
	607	lr->token.tok = tok_string;
	608
	609	/* If we need not translate the strings (i.e., expand <...> parts)
	610	we can run a simple loop. */
	611	if (!lr->translate_strings)
	612	{
	613	int ch;
	614
	615	buf2 = NULL;
	616	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
19bc17a9	617	ADDC (ch);
4b10dd6c UD	618
	619	/* Catch errors with trailing escape character. */
	620	if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	621	&& (bufact == 1 \|\| buf[bufact - 2] != lr->escape_char))
	622	{
	623	lr_error (lr, _("illegal escape sequence at end of string"));
	624	--bufact;
	625	}
	626	else if (ch == '\n' \|\| ch == EOF)
	627	lr_error (lr, _("unterminated string"));
	628
	629	ADDC ('\0');
	630	}
	631	else
	632	{
	633	int illegal_string = 0;
	634	size_t buf2act = 0;
	635	size_t buf2max = 56 * sizeof (uint32_t);
	636	int ch;
4b10dd6c UD	637
	638	/* We have to provide the wide character result as well. */
	639	if (return_widestr)
	640	buf2 = xmalloc (buf2max);
	641
	642	/* Read until the end of the string (or end of the line or file). */
	643	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	644	{
	645	size_t startidx;
	646	uint32_t wch;
	647	struct charseq *seq;
	648
	649	if (ch != '<')
	650	{
	651	/* The standards leave it up to the implementation to decide
	652	what to do with character which stand for themself. We
	653	could jump through hoops to find out the value relative to
	654	the charmap and the repertoire map, but instead we leave
	655	it up to the locale definition author to write a better
	656	definition. We assume here that every character which
	657	stands for itself is encoded using ISO 8859-1. Using the
	658	escape character is allowed. */
	659	if (ch == lr->escape_char)
	660	{
	661	ch = lr_getc (lr);
	662	if (ch == '\n' \|\| ch == EOF)
	663	break;
	664	}
	665
4b10dd6c UD	666	ADDC (ch);
	667	if (return_widestr)
	668	ADDWC ((uint32_t) ch);
	669
	670	continue;
	671	}
	672
	673	/* Now we have to search for the end of the symbolic name, i.e.,
	674	the closing '>'. */
	675	startidx = bufact;
	676	while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
	677	{
	678	if (ch == lr->escape_char)
	679	{
	680	ch = lr_getc (lr);
	681	if (ch == '\n' \|\| ch == EOF)
	682	break;
	683	}
	684	ADDC (ch);
	685	}
	686	if (ch == '\n' \|\| ch == EOF)
	687	/* Not a correct string. */
	688	break;
	689	if (bufact == startidx)
	690	{
	691	/* <> is no correct name. Ignore it and also signal an
	692	error. */
19bc17a9	693	illegal_string = 1;
4b10dd6c UD	694	continue;
4b10dd6c UD	695	}
19bc17a9	696
4b10dd6c UD	697	/* It might be a Uxxxx symbol. */
	698	if (buf[startidx] == 'U'
	699	&& (bufact - startidx == 5 \|\| bufact - startidx == 9))
	700	{
	701	char *cp = buf + startidx + 1;
	702	while (cp < &buf[bufact] && isxdigit (*cp))
	703	++cp;
	704
	705	if (cp == &buf[bufact])
	706	{
3c833378	707	char utmp[10];
4b10dd6c UD	708
	709	/* Yes, it is. */
	710	ADDC ('\0');
	711	wch = strtoul (buf + startidx + 1, NULL, 16);
	712
	713	/* Now forget about the name we just added. */
	714	bufact = startidx;
	715
	716	if (return_widestr)
	717	ADDWC (wch);
	718
3c833378 UD	719	/* See whether the charmap contains the Uxxxxxxxx names. */
	720	snprintf (utmp, sizeof (utmp), "U%08X", wch);
	721	seq = charmap_find_value (charmap, utmp, 9);
4b10dd6c	722
3c833378	723	if (seq == NULL)
4b10dd6c	724	{
3c833378 UD	725	/* No, this isn't the case. Now determine from
	726	the repertoire the name of the character and
	727	find it in the charmap. */
	728	if (repertoire != NULL)
3c833378	729	{
47e8b443	730	const char *symbol;
3c833378	731
47e8b443 UD	732	symbol = repertoire_find_symbol (repertoire, wch);
	733
	734	if (symbol != NULL)
	735	seq = charmap_find_value (charmap, symbol,
	736	strlen (symbol));
	737	}
	738
	739	if (seq == NULL)
	740	{
	741	#ifndef NO_TRANSLITERATION
	742	/* Transliterate if possible. */
	743	if (locale != NULL)
	744	{
	745	uint32_t *translit;
	746
	747	if ((locale->avail & CTYPE_LOCALE) == 0)
	748	{
	749	/* Load the CTYPE data now. */
	750	int old_needed = locale->needed;
	751
	752	locale->needed = 0;
69f6a804	753	locale = load_locale (LC_CTYPE,
47e8b443 UD	754	locale->name,
	755	locale->repertoire_name,
	756	charmap, locale);
	757	locale->needed = old_needed;
	758	}
	759
	760	if ((locale->avail & CTYPE_LOCALE) != 0
	761	&& ((translit = find_translit (locale,
	762	charmap, wch))
	763	!= NULL))
	764	/* The CTYPE data contains a matching
	765	transliteration. */
	766	{
	767	int i;
	768
	769	for (i = 0; translit[i] != 0; ++i)
	770	{
	771	char utmp[10];
	772
	773	snprintf (utmp, sizeof (utmp), "U%08X",
	774	translit[i]);
	775	seq = charmap_find_value (charmap, utmp,
	776	9);
	777	assert (seq != NULL);
	778	ADDS (seq->bytes, seq->nbytes);
	779	}
	780
	781	continue;
	782	}
	783	}
	784	#endif /* NO_TRANSLITERATION */
	785
	786	/* Not a known name. */
	787	illegal_string = 1;
3c833378	788	}
4b10dd6c UD	789	}
4b10dd6c UD	790
3c833378 UD	791	if (seq != NULL)
	792	ADDS (seq->bytes, seq->nbytes);
	793
4b10dd6c UD	794	continue;
	795	}
	796	}
	797
3c833378 UD	798	/* We now have the symbolic name in buf[startidx] to
	799	buf[bufact-1]. Now find out the value for this character
	800	in the charmap as well as in the repertoire map (in this
	801	order). */
	802	seq = charmap_find_value (charmap, &buf[startidx],
	803	bufact - startidx);
	804
	805	if (seq == NULL)
	806	{
	807	/* This name is not in the charmap. */
	808	lr_error (lr, _("symbol `%.*s' not in charmap"),
	809	(int) (bufact - startidx), &buf[startidx]);
	810	illegal_string = 1;
	811	}
	812
4b10dd6c UD	813	if (return_widestr)
4b10dd6c UD	814	{
3c833378 UD	815	/* Now the same for the multibyte representation. */
	816	if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
	817	wch = seq->ucs4;
	818	else
	819	{
	820	wch = repertoire_find_value (repertoire, &buf[startidx],
	821	bufact - startidx);
	822	if (seq != NULL)
	823	seq->ucs4 = wch;
	824	}
	825
4b10dd6c UD	826	if (wch == ILLEGAL_CHAR_VALUE)
	827	{
	828	/* This name is not in the repertoire map. */
	829	lr_error (lr, _("symbol `%.*s' not in repertoire map"),
70e51ab9	830	(int) (bufact - startidx), &buf[startidx]);
4b10dd6c UD	831	illegal_string = 1;
	832	}
	833	else
	834	ADDWC (wch);
	835	}
	836
3c833378 UD	837	/* Now forget about the name we just added. */
3c833378 UD	838	bufact = startidx;
19bc17a9	839
3c833378 UD	840	/* And copy the bytes. */
	841	if (seq != NULL)
	842	ADDS (seq->bytes, seq->nbytes);
4b10dd6c	843	}
19bc17a9	844
4b10dd6c UD	845	if (ch == '\n' \|\| ch == EOF)
	846	{
	847	lr_error (lr, _("unterminated string"));
	848	illegal_string = 1;
	849	}
19bc17a9	850
4b10dd6c UD	851	if (illegal_string)
	852	{
	853	free (buf);
72e6cdfa	854	free (buf2);
4b10dd6c UD	855	lr->token.val.str.startmb = NULL;
4b10dd6c UD	856	lr->token.val.str.lenmb = 0;
d5fd1f3f UD	857	lr->token.val.str.startwc = NULL;
d5fd1f3f UD	858	lr->token.val.str.lenwc = 0;
19bc17a9	859
4b10dd6c UD	860	return &lr->token;
4b10dd6c UD	861	}
19bc17a9	862
4b10dd6c	863	ADDC ('\0');
19bc17a9	864
4b10dd6c UD	865	if (return_widestr)
	866	{
	867	ADDWC (0);
	868	lr->token.val.str.startwc = xrealloc (buf2,
	869	buf2act * sizeof (uint32_t));
	870	lr->token.val.str.lenwc = buf2act;
	871	}
19bc17a9 RM	872	}
19bc17a9 RM	873
4b10dd6c UD	874	lr->token.val.str.startmb = xrealloc (buf, bufact);
	875	lr->token.val.str.lenmb = bufact;
	876
19bc17a9 RM	877	return &lr->token;
19bc17a9 RM	878	}