[thirdparty/glibc.git] / locale / programs / linereader.c

/* Copyright (C) 1996-2016 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published
   by the Free Software Foundation; version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#include "localedef.h"
#include "charmap.h"
#include "error.h"
#include "linereader.h"
#include "locfile.h"

/* Prototypes for local functions.  */
static struct token *get_toplvl_escape (struct linereader *lr);
static struct token *get_symname (struct linereader *lr);
static struct token *get_ident (struct linereader *lr);
static struct token *get_string (struct linereader *lr,
				 const struct charmap_t *charmap,
				 struct localedef_t *locale,
				 const struct repertoire_t *repertoire,
				 int verbose);


struct linereader *
lr_open (const char *fname, kw_hash_fct_t hf)
{
  FILE *fp;

  if (fname == NULL || strcmp (fname, "-") == 0
      || strcmp (fname, "/dev/stdin") == 0)
    return lr_create (stdin, "<stdin>", hf);
  else
    {
      fp = fopen (fname, "rm");
      if (fp == NULL)
	return NULL;
      return lr_create (fp, fname, hf);
    }
}

struct linereader *
lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
{
  struct linereader *result;
  int n;

  result = (struct linereader *) xmalloc (sizeof (*result));

  result->fp = fp;
  result->fname = xstrdup (fname);
  result->buf = NULL;
  result->bufsize = 0;
  result->lineno = 1;
  result->idx = 0;
  result->comment_char = '#';
  result->escape_char = '\\';
  result->translate_strings = 1;
  result->return_widestr = 0;

  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  if (n < 0)
    {
      int save = errno;
      fclose (result->fp);
      free ((char *) result->fname);
      free (result);
      errno = save;
      return NULL;
    }

  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
    n -= 2;

  result->buf[n] = '\0';
  result->bufact = n;
  result->hash_fct = hf;

  return result;
}


int
lr_eof (struct linereader *lr)
{
  return lr->bufact = 0;
}


void
lr_ignore_rest (struct linereader *lr, int verbose)
{
  if (verbose)
    {
      while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
	     && lr->buf[lr->idx] != lr->comment_char)
	if (lr->buf[lr->idx] == '\0')
	  {
	    if (lr_next (lr) < 0)
	      return;
	  }
	else
	  ++lr->idx;

      if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
	  && lr->buf[lr->idx] != lr->comment_char)
	lr_error (lr, _("trailing garbage at end of line"));
    }

  /* Ignore continued line.  */
  while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
    if (lr_next (lr) < 0)
      break;

  lr->idx = lr->bufact;
}


void
lr_close (struct linereader *lr)
{
  fclose (lr->fp);
  free (lr->buf);
  free (lr);
}


int
lr_next (struct linereader *lr)
{
  int n;

  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
  if (n < 0)
    return -1;

  ++lr->lineno;

  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
    {
#if 0
      /* XXX Is this correct?  */
      /* An escaped newline character is substituted with a single <SP>.  */
      --n;
      lr->buf[n - 1] = ' ';
#else
      n -= 2;
#endif
    }

  lr->buf[n] = '\0';
  lr->bufact = n;
  lr->idx = 0;

  return 0;
}


/* Defined in error.c.  */
/* This variable is incremented each time `error' is called.  */
extern unsigned int error_message_count;

/* The calling program should define program_name and set it to the
   name of the executing program.  */
extern char *program_name;


struct token *
lr_token (struct linereader *lr, const struct charmap_t *charmap,
	  struct localedef_t *locale, const struct repertoire_t *repertoire,
	  int verbose)
{
  int ch;

  while (1)
    {
      do
	{
	  ch = lr_getc (lr);

	  if (ch == EOF)
	    {
	      lr->token.tok = tok_eof;
	      return &lr->token;
	    };

	  if (ch == '\n')
	    {
	      lr->token.tok = tok_eol;
	      return &lr->token;
	    }
	}
      while (isspace (ch));

      if (ch != lr->comment_char)
	break;

      /* Is there an newline at the end of the buffer?  */
      if (lr->buf[lr->bufact - 1] != '\n')
	{
	  /* No.  Some people want this to mean that only the line in
	     the file not the logical, concatenated line is ignored.
	     Let's try this.  */
	  lr->idx = lr->bufact;
	  continue;
	}

      /* Ignore rest of line.  */
      lr_ignore_rest (lr, 0);
      lr->token.tok = tok_eol;
      return &lr->token;
    }

  /* Match escape sequences.  */
  if (ch == lr->escape_char)
    return get_toplvl_escape (lr);

  /* Match ellipsis.  */
  if (ch == '.')
    {
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 10; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis4_2;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis4;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis3;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 6; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis2_2;
	  return &lr->token;
	}
      if (lr->buf[lr->idx] == '.')
	{
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis2;
	  return &lr->token;
	}
    }

  switch (ch)
    {
    case '<':
      return get_symname (lr);

    case '0' ... '9':
      lr->token.tok = tok_number;
      lr->token.val.num = ch - '0';

      while (isdigit (ch = lr_getc (lr)))
	{
	  lr->token.val.num *= 10;
	  lr->token.val.num += ch - '0';
	}
      if (isalpha (ch))
	lr_error (lr, _("garbage at end of number"));
      lr_ungetn (lr, 1);

      return &lr->token;

    case ';':
      lr->token.tok = tok_semicolon;
      return &lr->token;

    case ',':
      lr->token.tok = tok_comma;
      return &lr->token;

    case '(':
      lr->token.tok = tok_open_brace;
      return &lr->token;

    case ')':
      lr->token.tok = tok_close_brace;
      return &lr->token;

    case '"':
      return get_string (lr, charmap, locale, repertoire, verbose);

    case '-':
      ch = lr_getc (lr);
      if (ch == '1')
	{
	  lr->token.tok = tok_minus1;
	  return &lr->token;
	}
      lr_ungetn (lr, 2);
      break;
    }

  return get_ident (lr);
}


static struct token *
get_toplvl_escape (struct linereader *lr)
{
  /* This is supposed to be a numeric value.  We return the
     numerical value and the number of bytes.  */
  size_t start_idx = lr->idx - 1;
  unsigned char *bytes = lr->token.val.charcode.bytes;
  size_t nbytes = 0;
  int ch;

  do
    {
      unsigned int byte = 0;
      unsigned int base = 8;

      ch = lr_getc (lr);

      if (ch == 'd')
	{
	  base = 10;
	  ch = lr_getc (lr);
	}
      else if (ch == 'x')
	{
	  base = 16;
	  ch = lr_getc (lr);
	}

      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	{
	esc_error:
	  lr->token.val.str.startmb = &lr->buf[start_idx];

	  while (ch != EOF && !isspace (ch))
	    ch = lr_getc (lr);
	  lr->token.val.str.lenmb = lr->idx - start_idx;

	  lr->token.tok = tok_error;
	  return &lr->token;
	}

      if (isdigit (ch))
	byte = ch - '0';
      else
	byte = tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	goto esc_error;

      byte *= base;
      if (isdigit (ch))
	byte += ch - '0';
      else
	byte += tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if (base != 16 && isdigit (ch))
	{
	  byte *= base;
	  byte += ch - '0';

	  ch = lr_getc (lr);
	}

      bytes[nbytes++] = byte;
    }
  while (ch == lr->escape_char
	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));

  if (!isspace (ch))
    lr_error (lr, _("garbage at end of character code specification"));

  lr_ungetn (lr, 1);

  lr->token.tok = tok_charcode;
  lr->token.val.charcode.nbytes = nbytes;

  return &lr->token;
}


#define ADDC(ch) \
  do									      \
    {									      \
      if (bufact == bufmax)						      \
	{								      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      buf[bufact++] = (ch);						      \
    }									      \
  while (0)


#define ADDS(s, l) \
  do									      \
    {									      \
      size_t _l = (l);							      \
      if (bufact + _l > bufmax)						      \
	{								      \
	  if (bufact < _l)						      \
	    bufact = _l;						      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      memcpy (&buf[bufact], s, _l);					      \
      bufact += _l;							      \
    }									      \
  while (0)


#define ADDWC(ch) \
  do									      \
    {									      \
      if (buf2act == buf2max)						      \
	{								      \
	  buf2max *= 2;							      \
	  buf2 = xrealloc (buf2, buf2max * 4);				      \
	}								      \
      buf2[buf2act++] = (ch);						      \
    }									      \
  while (0)


static struct token *
get_symname (struct linereader *lr)
{
  /* Symbol in brackets.  We must distinguish three kinds:
     1. reserved words
     2. ISO 10646 position values
     3. all other.  */
  char *buf;
  size_t bufact = 0;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = (char *) xmalloc (bufmax);

  do
    {
      ch = lr_getc (lr);
      if (ch == lr->escape_char)
	{
	  int c2 = lr_getc (lr);
	  ADDC (c2);

	  if (c2 == '\n')
	    ch = '\n';
	}
      else
	ADDC (ch);
    }
  while (ch != '>' && ch != '\n');

  if (ch == '\n')
    lr_error (lr, _("unterminated symbolic name"));

  /* Test for ISO 10646 position value.  */
  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
    {
      char *cp = buf + 1;
      while (cp < &buf[bufact - 1] && isxdigit (*cp))
	++cp;

      if (cp == &buf[bufact - 1])
	{
	  /* Yes, it is.  */
	  lr->token.tok = tok_ucs4;
	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);

	  return &lr->token;
	}
    }

  /* It is a symbolic name.  Test for reserved words.  */
  kw = lr->hash_fct (buf, bufact - 1);

  if (kw != NULL && kw->symname_or_ident == 1)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_bsymbol;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact - 1;
    }

  return &lr->token;
}


static struct token *
get_ident (struct linereader *lr)
{
  char *buf;
  size_t bufact;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = xmalloc (bufmax);
  bufact = 0;

  ADDC (lr->buf[lr->idx - 1]);

  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
	 && ch != '<' && ch != ',' && ch != EOF)
    {
      if (ch == lr->escape_char)
	{
	  ch = lr_getc (lr);
	  if (ch == '\n' || ch == EOF)
	    {
	      lr_error (lr, _("invalid escape sequence"));
	      break;
	    }
	}
      ADDC (ch);
    }

  lr_ungetc (lr, ch);

  kw = lr->hash_fct (buf, bufact);

  if (kw != NULL && kw->symname_or_ident == 0)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_ident;

      buf = xrealloc (buf, bufact + 1);
      buf[bufact] = '\0';

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact;
    }

  return &lr->token;
}


static struct token *
get_string (struct linereader *lr, const struct charmap_t *charmap,
	    struct localedef_t *locale, const struct repertoire_t *repertoire,
	    int verbose)
{
  int return_widestr = lr->return_widestr;
  char *buf;
  wchar_t *buf2 = NULL;
  size_t bufact;
  size_t bufmax = 56;

  /* We must return two different strings.  */
  buf = xmalloc (bufmax);
  bufact = 0;

  /* We know it'll be a string.  */
  lr->token.tok = tok_string;

  /* If we need not translate the strings (i.e., expand <...> parts)
     we can run a simple loop.  */
  if (!lr->translate_strings)
    {
      int ch;

      buf2 = NULL;
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	ADDC (ch);

      /* Catch errors with trailing escape character.  */
      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
	{
	  lr_error (lr, _("illegal escape sequence at end of string"));
	  --bufact;
	}
      else if (ch == '\n' || ch == EOF)
	lr_error (lr, _("unterminated string"));

      ADDC ('\0');
    }
  else
    {
      int illegal_string = 0;
      size_t buf2act = 0;
      size_t buf2max = 56 * sizeof (uint32_t);
      int ch;
      int warned = 0;

      /* We have to provide the wide character result as well.  */
      if (return_widestr)
	buf2 = xmalloc (buf2max);

      /* Read until the end of the string (or end of the line or file).  */
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	{
	  size_t startidx;
	  uint32_t wch;
	  struct charseq *seq;

	  if (ch != '<')
	    {
	      /* The standards leave it up to the implementation to decide
		 what to do with character which stand for themself.  We
		 could jump through hoops to find out the value relative to
		 the charmap and the repertoire map, but instead we leave
		 it up to the locale definition author to write a better
		 definition.  We assume here that every character which
		 stands for itself is encoded using ISO 8859-1.  Using the
		 escape character is allowed.  */
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}

	      if (verbose && !warned)
		{
		  lr_error (lr, _("\
non-symbolic character value should not be used"));
		  warned = 1;
		}

	      ADDC (ch);
	      if (return_widestr)
		ADDWC ((uint32_t) ch);

	      continue;
	    }

	  /* Now we have to search for the end of the symbolic name, i.e.,
	     the closing '>'.  */
	  startidx = bufact;
	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
	    {
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}
	      ADDC (ch);
	    }
	  if (ch == '\n' || ch == EOF)
	    /* Not a correct string.  */
	    break;
	  if (bufact == startidx)
	    {
	      /* <> is no correct name.  Ignore it and also signal an
		 error.  */
	      illegal_string = 1;
	      continue;
	    }

	  /* It might be a Uxxxx symbol.  */
	  if (buf[startidx] == 'U'
	      && (bufact - startidx == 5 || bufact - startidx == 9))
	    {
	      char *cp = buf + startidx + 1;
	      while (cp < &buf[bufact] && isxdigit (*cp))
		++cp;

	      if (cp == &buf[bufact])
		{
		  char utmp[10];

		  /* Yes, it is.  */
		  ADDC ('\0');
		  wch = strtoul (buf + startidx + 1, NULL, 16);

		  /* Now forget about the name we just added.  */
		  bufact = startidx;

		  if (return_widestr)
		    ADDWC (wch);

		  /* See whether the charmap contains the Uxxxxxxxx names.  */
		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
		  seq = charmap_find_value (charmap, utmp, 9);

		  if (seq == NULL)
		    {
		     /* No, this isn't the case.  Now determine from
			the repertoire the name of the character and
			find it in the charmap.  */
		      if (repertoire != NULL)
			{
			  const char *symbol;

			  symbol = repertoire_find_symbol (repertoire, wch);

			  if (symbol != NULL)
			    seq = charmap_find_value (charmap, symbol,
						      strlen (symbol));
			}

		      if (seq == NULL)
			{
#ifndef NO_TRANSLITERATION
			  /* Transliterate if possible.  */
			  if (locale != NULL)
			    {
			      uint32_t *translit;

			      if ((locale->avail & CTYPE_LOCALE) == 0)
				{
				  /* Load the CTYPE data now.  */
				  int old_needed = locale->needed;

				  locale->needed = 0;
				  locale = load_locale (LC_CTYPE,
							locale->name,
							locale->repertoire_name,
							charmap, locale);
				  locale->needed = old_needed;
				}

			      if ((locale->avail & CTYPE_LOCALE) != 0
				  && ((translit = find_translit (locale,
								 charmap, wch))
				      != NULL))
				/* The CTYPE data contains a matching
				   transliteration.  */
				{
				  int i;

				  for (i = 0; translit[i] != 0; ++i)
				    {
				      char utmp[10];

				      snprintf (utmp, sizeof (utmp), "U%08X",
						translit[i]);
				      seq = charmap_find_value (charmap, utmp,
								9);
				      assert (seq != NULL);
				      ADDS (seq->bytes, seq->nbytes);
				    }

				  continue;
				}
			    }
#endif	/* NO_TRANSLITERATION */

			  /* Not a known name.  */
			  illegal_string = 1;
			}
		    }

		  if (seq != NULL)
		    ADDS (seq->bytes, seq->nbytes);

		  continue;
		}
	    }

	  /* We now have the symbolic name in buf[startidx] to
	     buf[bufact-1].  Now find out the value for this character
	     in the charmap as well as in the repertoire map (in this
	     order).  */
	  seq = charmap_find_value (charmap, &buf[startidx],
				    bufact - startidx);

	  if (seq == NULL)
	    {
	      /* This name is not in the charmap.  */
	      lr_error (lr, _("symbol `%.*s' not in charmap"),
			(int) (bufact - startidx), &buf[startidx]);
	      illegal_string = 1;
	    }

	  if (return_widestr)
	    {
	      /* Now the same for the multibyte representation.  */
	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
		wch = seq->ucs4;
	      else
		{
		  wch = repertoire_find_value (repertoire, &buf[startidx],
					       bufact - startidx);
		  if (seq != NULL)
		    seq->ucs4 = wch;
		}

	      if (wch == ILLEGAL_CHAR_VALUE)
		{
		  /* This name is not in the repertoire map.  */
		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
			    (int) (bufact - startidx), &buf[startidx]);
		  illegal_string = 1;
		}
	      else
		ADDWC (wch);
	    }

	  /* Now forget about the name we just added.  */
	  bufact = startidx;

	  /* And copy the bytes.  */
	  if (seq != NULL)
	    ADDS (seq->bytes, seq->nbytes);
	}

      if (ch == '\n' || ch == EOF)
	{
	  lr_error (lr, _("unterminated string"));
	  illegal_string = 1;
	}

      if (illegal_string)
	{
	  free (buf);
	  free (buf2);
	  lr->token.val.str.startmb = NULL;
	  lr->token.val.str.lenmb = 0;
	  lr->token.val.str.startwc = NULL;
	  lr->token.val.str.lenwc = 0;

	  return &lr->token;
	}

      ADDC ('\0');

      if (return_widestr)
	{
	  ADDWC (0);
	  lr->token.val.str.startwc = xrealloc (buf2,
						buf2act * sizeof (uint32_t));
	  lr->token.val.str.lenwc = buf2act;
	}
    }

  lr->token.val.str.startmb = xrealloc (buf, bufact);
  lr->token.val.str.lenmb = bufact;

  return &lr->token;
}
Commit	Line	Data
f7a9f785	1	/* Copyright (C) 1996-2016 Free Software Foundation, Inc.
5290baf0	2	This file is part of the GNU C Library.
4b10dd6c	3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
19bc17a9	4
43bc8ac6	5	This program is free software; you can redistribute it and/or modify
2e2efe65 RM	6	it under the terms of the GNU General Public License as published
	7	by the Free Software Foundation; version 2 of the License, or
	8	(at your option) any later version.
19bc17a9	9
43bc8ac6	10	This program is distributed in the hope that it will be useful,
5290baf0	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6 UD	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43bc8ac6 UD	13	GNU General Public License for more details.
19bc17a9	14
43bc8ac6	15	You should have received a copy of the GNU General Public License
59ba27a6	16	along with this program; if not, see <http://www.gnu.org/licenses/>. */
19bc17a9 RM	17
	18	#ifdef HAVE_CONFIG_H
	19	# include <config.h>
	20	#endif
	21
47e8b443	22	#include <assert.h>
19bc17a9 RM	23	#include <ctype.h>
	24	#include <errno.h>
	25	#include <libintl.h>
	26	#include <stdarg.h>
	27	#include <stdlib.h>
	28	#include <string.h>
e054f494	29	#include <stdint.h>
19bc17a9	30
f2b98f97	31	#include "localedef.h"
4b10dd6c	32	#include "charmap.h"
19bc17a9 RM	33	#include "error.h"
19bc17a9 RM	34	#include "linereader.h"
47e8b443	35	#include "locfile.h"
93693c4d	36
4b10dd6c	37	/* Prototypes for local functions. */
19bc17a9 RM	38	static struct token get_toplvl_escape (struct linereader lr);
	39	static struct token get_symname (struct linereader lr);
	40	static struct token get_ident (struct linereader lr);
	41	static struct token get_string (struct linereader lr,
4b10dd6c	42	const struct charmap_t *charmap,
47e8b443	43	struct localedef_t *locale,
93693c4d UD	44	const struct repertoire_t *repertoire,
93693c4d UD	45	int verbose);
19bc17a9 RM	46
	47
	48	struct linereader *
	49	lr_open (const char *fname, kw_hash_fct_t hf)
	50	{
	51	FILE *fp;
19bc17a9 RM	52
	53	if (fname == NULL \|\| strcmp (fname, "-") == 0
	54	\|\| strcmp (fname, "/dev/stdin") == 0)
3e076219	55	return lr_create (stdin, "<stdin>", hf);
19bc17a9 RM	56	else
19bc17a9 RM	57	{
2e2dc1a5	58	fp = fopen (fname, "rm");
19bc17a9 RM	59	if (fp == NULL)
19bc17a9 RM	60	return NULL;
3e076219	61	return lr_create (fp, fname, hf);
19bc17a9	62	}
3e076219 UD	63	}
	64
	65	struct linereader *
	66	lr_create (FILE fp, const char fname, kw_hash_fct_t hf)
	67	{
	68	struct linereader *result;
	69	int n;
19bc17a9 RM	70
	71	result = (struct linereader ) xmalloc (sizeof (result));
	72
	73	result->fp = fp;
3e076219	74	result->fname = xstrdup (fname);
19bc17a9 RM	75	result->buf = NULL;
	76	result->bufsize = 0;
	77	result->lineno = 1;
	78	result->idx = 0;
	79	result->comment_char = '#';
	80	result->escape_char = '\\';
	81	result->translate_strings = 1;
7c11c4a1	82	result->return_widestr = 0;
19bc17a9 RM	83
	84	n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
	85	if (n < 0)
	86	{
	87	int save = errno;
	88	fclose (result->fp);
46ec036d	89	free ((char *) result->fname);
19bc17a9 RM	90	free (result);
	91	errno = save;
	92	return NULL;
	93	}
	94
	95	if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
	96	n -= 2;
	97
	98	result->buf[n] = '\0';
	99	result->bufact = n;
	100	result->hash_fct = hf;
	101
	102	return result;
	103	}
	104
	105
	106	int
	107	lr_eof (struct linereader *lr)
	108	{
	109	return lr->bufact = 0;
	110	}
	111
	112
dd9423a6 UD	113	void
	114	lr_ignore_rest (struct linereader *lr, int verbose)
	115	{
	116	if (verbose)
	117	{
	118	while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
	119	&& lr->buf[lr->idx] != lr->comment_char)
	120	if (lr->buf[lr->idx] == '\0')
	121	{
	122	if (lr_next (lr) < 0)
	123	return;
	124	}
	125	else
	126	++lr->idx;
	127
	128	if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
	129	&& lr->buf[lr->idx] != lr->comment_char)
	130	lr_error (lr, _("trailing garbage at end of line"));
	131	}
	132
	133	/* Ignore continued line. */
	134	while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
	135	if (lr_next (lr) < 0)
	136	break;
	137
	138	lr->idx = lr->bufact;
	139	}
	140
	141
19bc17a9 RM	142	void
	143	lr_close (struct linereader *lr)
	144	{
	145	fclose (lr->fp);
	146	free (lr->buf);
	147	free (lr);
	148	}
	149
	150
	151	int
	152	lr_next (struct linereader *lr)
	153	{
	154	int n;
	155
	156	n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
	157	if (n < 0)
	158	return -1;
	159
	160	++lr->lineno;
	161
	162	if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
	163	{
4b10dd6c UD	164	#if 0
4b10dd6c UD	165	/* XXX Is this correct? */
19bc17a9 RM	166	/* An escaped newline character is substituted with a single <SP>. */
	167	--n;
	168	lr->buf[n - 1] = ' ';
4b10dd6c UD	169	#else
	170	n -= 2;
	171	#endif
19bc17a9 RM	172	}
	173
	174	lr->buf[n] = '\0';
	175	lr->bufact = n;
	176	lr->idx = 0;
	177
	178	return 0;
	179	}
	180
	181
	182	/* Defined in error.c. */
	183	/* This variable is incremented each time `error' is called. */
	184	extern unsigned int error_message_count;
	185
	186	/* The calling program should define program_name and set it to the
	187	name of the executing program. */
	188	extern char *program_name;
	189
	190
	191	struct token *
4b10dd6c	192	lr_token (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	193	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	194	int verbose)
19bc17a9 RM	195	{
	196	int ch;
	197
	198	while (1)
	199	{
	200	do
	201	{
	202	ch = lr_getc (lr);
	203
76fbcfdd UD	204	if (ch == EOF)
	205	{
	206	lr->token.tok = tok_eof;
	207	return &lr->token;
	208	};
	209
19bc17a9 RM	210	if (ch == '\n')
	211	{
	212	lr->token.tok = tok_eol;
	213	return &lr->token;
	214	}
	215	}
	216	while (isspace (ch));
	217
19bc17a9 RM	218	if (ch != lr->comment_char)
	219	break;
	220
a0dc5206 UD	221	/* Is there an newline at the end of the buffer? */
	222	if (lr->buf[lr->bufact - 1] != '\n')
	223	{
	224	/* No. Some people want this to mean that only the line in
	225	the file not the logical, concatenated line is ignored.
	226	Let's try this. */
	227	lr->idx = lr->bufact;
	228	continue;
	229	}
	230
19bc17a9 RM	231	/* Ignore rest of line. */
	232	lr_ignore_rest (lr, 0);
	233	lr->token.tok = tok_eol;
	234	return &lr->token;
	235	}
	236
	237	/* Match escape sequences. */
	238	if (ch == lr->escape_char)
	239	return get_toplvl_escape (lr);
	240
	241	/* Match ellipsis. */
4b10dd6c	242	if (ch == '.')
19bc17a9	243	{
a0dc5206 UD	244	if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	245	{
	246	int cnt;
	247	for (cnt = 0; cnt < 10; ++cnt)
	248	lr_getc (lr);
	249	lr->token.tok = tok_ellipsis4_2;
	250	return &lr->token;
	251	}
4b10dd6c UD	252	if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	253	{
	254	lr_getc (lr);
	255	lr_getc (lr);
	256	lr_getc (lr);
	257	lr->token.tok = tok_ellipsis4;
	258	return &lr->token;
	259	}
	260	if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	261	{
	262	lr_getc (lr);
	263	lr_getc (lr);
	264	lr->token.tok = tok_ellipsis3;
	265	return &lr->token;
	266	}
a0dc5206 UD	267	if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	268	{
	269	int cnt;
	270	for (cnt = 0; cnt < 6; ++cnt)
	271	lr_getc (lr);
	272	lr->token.tok = tok_ellipsis2_2;
	273	return &lr->token;
	274	}
4b10dd6c UD	275	if (lr->buf[lr->idx] == '.')
	276	{
	277	lr_getc (lr);
	278	lr->token.tok = tok_ellipsis2;
	279	return &lr->token;
	280	}
19bc17a9 RM	281	}
	282
	283	switch (ch)
	284	{
	285	case '<':
	286	return get_symname (lr);
	287
	288	case '0' ... '9':
	289	lr->token.tok = tok_number;
	290	lr->token.val.num = ch - '0';
	291
	292	while (isdigit (ch = lr_getc (lr)))
	293	{
	294	lr->token.val.num *= 10;
	295	lr->token.val.num += ch - '0';
	296	}
	297	if (isalpha (ch))
5290baf0	298	lr_error (lr, _("garbage at end of number"));
19bc17a9 RM	299	lr_ungetn (lr, 1);
	300
	301	return &lr->token;
	302
	303	case ';':
	304	lr->token.tok = tok_semicolon;
	305	return &lr->token;
	306
	307	case ',':
	308	lr->token.tok = tok_comma;
	309	return &lr->token;
	310
	311	case '(':
	312	lr->token.tok = tok_open_brace;
	313	return &lr->token;
	314
	315	case ')':
	316	lr->token.tok = tok_close_brace;
	317	return &lr->token;
	318
	319	case '"':
47e8b443	320	return get_string (lr, charmap, locale, repertoire, verbose);
19bc17a9 RM	321
	322	case '-':
	323	ch = lr_getc (lr);
	324	if (ch == '1')
	325	{
	326	lr->token.tok = tok_minus1;
	327	return &lr->token;
	328	}
	329	lr_ungetn (lr, 2);
	330	break;
	331	}
	332
	333	return get_ident (lr);
	334	}
	335
	336
	337	static struct token *
	338	get_toplvl_escape (struct linereader *lr)
	339	{
	340	/* This is supposed to be a numeric value. We return the
	341	numerical value and the number of bytes. */
	342	size_t start_idx = lr->idx - 1;
9cfe5381 RM	343	unsigned char *bytes = lr->token.val.charcode.bytes;
9cfe5381 RM	344	size_t nbytes = 0;
19bc17a9 RM	345	int ch;
	346
	347	do
	348	{
	349	unsigned int byte = 0;
	350	unsigned int base = 8;
	351
	352	ch = lr_getc (lr);
	353
	354	if (ch == 'd')
	355	{
	356	base = 10;
	357	ch = lr_getc (lr);
	358	}
	359	else if (ch == 'x')
	360	{
	361	base = 16;
	362	ch = lr_getc (lr);
	363	}
	364
	365	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	366	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	367	{
19bc17a9 RM	368	esc_error:
4b10dd6c	369	lr->token.val.str.startmb = &lr->buf[start_idx];
19bc17a9	370
76fbcfdd	371	while (ch != EOF && !isspace (ch))
19bc17a9	372	ch = lr_getc (lr);
4b10dd6c	373	lr->token.val.str.lenmb = lr->idx - start_idx;
19bc17a9 RM	374
	375	lr->token.tok = tok_error;
	376	return &lr->token;
	377	}
	378
	379	if (isdigit (ch))
	380	byte = ch - '0';
	381	else
4b10dd6c	382	byte = tolower (ch) - 'a' + 10;
19bc17a9 RM	383
	384	ch = lr_getc (lr);
	385	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	386	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	387	goto esc_error;
	388
	389	byte *= base;
	390	if (isdigit (ch))
	391	byte += ch - '0';
	392	else
4b10dd6c	393	byte += tolower (ch) - 'a' + 10;
19bc17a9 RM	394
	395	ch = lr_getc (lr);
	396	if (base != 16 && isdigit (ch))
	397	{
	398	byte *= base;
679f5a56	399	byte += ch - '0';
19bc17a9 RM	400
	401	ch = lr_getc (lr);
	402	}
	403
4b10dd6c	404	bytes[nbytes++] = byte;
19bc17a9	405	}
c50ec4e0	406	while (ch == lr->escape_char
6dd67bd5	407	&& nbytes < (int) sizeof (lr->token.val.charcode.bytes));
19bc17a9 RM	408
	409	if (!isspace (ch))
	410	lr_error (lr, _("garbage at end of character code specification"));
	411
	412	lr_ungetn (lr, 1);
	413
	414	lr->token.tok = tok_charcode;
19bc17a9 RM	415	lr->token.val.charcode.nbytes = nbytes;
	416
	417	return &lr->token;
	418	}
	419
	420
4b10dd6c UD	421	#define ADDC(ch) \
	422	do \
	423	{ \
	424	if (bufact == bufmax) \
	425	{ \
	426	bufmax *= 2; \
	427	buf = xrealloc (buf, bufmax); \
	428	} \
	429	buf[bufact++] = (ch); \
	430	} \
	431	while (0)
	432
	433
	434	#define ADDS(s, l) \
	435	do \
	436	{ \
	437	size_t _l = (l); \
	438	if (bufact + _l > bufmax) \
	439	{ \
	440	if (bufact < _l) \
	441	bufact = _l; \
	442	bufmax *= 2; \
	443	buf = xrealloc (buf, bufmax); \
	444	} \
	445	memcpy (&buf[bufact], s, _l); \
	446	bufact += _l; \
	447	} \
	448	while (0)
	449
	450
	451	#define ADDWC(ch) \
	452	do \
	453	{ \
	454	if (buf2act == buf2max) \
	455	{ \
	456	buf2max *= 2; \
	457	buf2 = xrealloc (buf2, buf2max * 4); \
	458	} \
	459	buf2[buf2act++] = (ch); \
	460	} \
19bc17a9 RM	461	while (0)
	462
	463
	464	static struct token *
	465	get_symname (struct linereader *lr)
	466	{
	467	/* Symbol in brackets. We must distinguish three kinds:
	468	1. reserved words
	469	2. ISO 10646 position values
	470	3. all other. */
	471	char *buf;
	472	size_t bufact = 0;
	473	size_t bufmax = 56;
	474	const struct keyword_t *kw;
	475	int ch;
	476
	477	buf = (char *) xmalloc (bufmax);
	478
	479	do
	480	{
	481	ch = lr_getc (lr);
	482	if (ch == lr->escape_char)
	483	{
	484	int c2 = lr_getc (lr);
	485	ADDC (c2);
	486
	487	if (c2 == '\n')
	488	ch = '\n';
	489	}
	490	else
	491	ADDC (ch);
	492	}
	493	while (ch != '>' && ch != '\n');
	494
	495	if (ch == '\n')
	496	lr_error (lr, _("unterminated symbolic name"));
	497
	498	/* Test for ISO 10646 position value. */
	499	if (buf[0] == 'U' && (bufact == 6 \|\| bufact == 10))
	500	{
	501	char *cp = buf + 1;
	502	while (cp < &buf[bufact - 1] && isxdigit (*cp))
	503	++cp;
	504
	505	if (cp == &buf[bufact - 1])
	506	{
	507	/* Yes, it is. */
4b10dd6c UD	508	lr->token.tok = tok_ucs4;
4b10dd6c UD	509	lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
19bc17a9 RM	510
	511	return &lr->token;
	512	}
	513	}
	514
	515	/* It is a symbolic name. Test for reserved words. */
	516	kw = lr->hash_fct (buf, bufact - 1);
	517
	518	if (kw != NULL && kw->symname_or_ident == 1)
	519	{
	520	lr->token.tok = kw->token;
	521	free (buf);
	522	}
	523	else
	524	{
	525	lr->token.tok = tok_bsymbol;
	526
19bc17a9	527	buf = xrealloc (buf, bufact + 1);
b16dba4c	528	buf[bufact] = '\0';
19bc17a9	529
4b10dd6c UD	530	lr->token.val.str.startmb = buf;
4b10dd6c UD	531	lr->token.val.str.lenmb = bufact - 1;
19bc17a9 RM	532	}
	533
	534	return &lr->token;
	535	}
	536
	537
	538	static struct token *
	539	get_ident (struct linereader *lr)
	540	{
	541	char *buf;
	542	size_t bufact;
	543	size_t bufmax = 56;
	544	const struct keyword_t *kw;
	545	int ch;
	546
	547	buf = xmalloc (bufmax);
	548	bufact = 0;
	549
	550	ADDC (lr->buf[lr->idx - 1]);
	551
	552	while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
f126ef67	553	&& ch != '<' && ch != ',' && ch != EOF)
4b10dd6c UD	554	{
	555	if (ch == lr->escape_char)
	556	{
	557	ch = lr_getc (lr);
	558	if (ch == '\n' \|\| ch == EOF)
	559	{
	560	lr_error (lr, _("invalid escape sequence"));
	561	break;
	562	}
	563	}
	564	ADDC (ch);
	565	}
19bc17a9	566
f126ef67	567	lr_ungetc (lr, ch);
19bc17a9 RM	568
	569	kw = lr->hash_fct (buf, bufact);
	570
	571	if (kw != NULL && kw->symname_or_ident == 0)
	572	{
	573	lr->token.tok = kw->token;
	574	free (buf);
	575	}
	576	else
	577	{
	578	lr->token.tok = tok_ident;
	579
19bc17a9	580	buf = xrealloc (buf, bufact + 1);
b16dba4c	581	buf[bufact] = '\0';
19bc17a9	582
4b10dd6c UD	583	lr->token.val.str.startmb = buf;
4b10dd6c UD	584	lr->token.val.str.lenmb = bufact;
19bc17a9 RM	585	}
	586
	587	return &lr->token;
	588	}
	589
	590
	591	static struct token *
4b10dd6c	592	get_string (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	593	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	594	int verbose)
19bc17a9	595	{
4b10dd6c UD	596	int return_widestr = lr->return_widestr;
4b10dd6c UD	597	char *buf;
a9c27b3e	598	wchar_t *buf2 = NULL;
19bc17a9 RM	599	size_t bufact;
19bc17a9 RM	600	size_t bufmax = 56;
19bc17a9	601
4b10dd6c	602	/* We must return two different strings. */
19bc17a9 RM	603	buf = xmalloc (bufmax);
	604	bufact = 0;
	605
4b10dd6c UD	606	/* We know it'll be a string. */
	607	lr->token.tok = tok_string;
	608
	609	/* If we need not translate the strings (i.e., expand <...> parts)
	610	we can run a simple loop. */
	611	if (!lr->translate_strings)
	612	{
	613	int ch;
	614
	615	buf2 = NULL;
	616	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
19bc17a9	617	ADDC (ch);
4b10dd6c UD	618
	619	/* Catch errors with trailing escape character. */
	620	if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	621	&& (bufact == 1 \|\| buf[bufact - 2] != lr->escape_char))
	622	{
	623	lr_error (lr, _("illegal escape sequence at end of string"));
	624	--bufact;
	625	}
	626	else if (ch == '\n' \|\| ch == EOF)
	627	lr_error (lr, _("unterminated string"));
	628
	629	ADDC ('\0');
	630	}
	631	else
	632	{
	633	int illegal_string = 0;
	634	size_t buf2act = 0;
	635	size_t buf2max = 56 * sizeof (uint32_t);
	636	int ch;
	637	int warned = 0;
	638
	639	/* We have to provide the wide character result as well. */
	640	if (return_widestr)
	641	buf2 = xmalloc (buf2max);
	642
	643	/* Read until the end of the string (or end of the line or file). */
	644	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	645	{
	646	size_t startidx;
	647	uint32_t wch;
	648	struct charseq *seq;
	649
	650	if (ch != '<')
	651	{
	652	/* The standards leave it up to the implementation to decide
	653	what to do with character which stand for themself. We
	654	could jump through hoops to find out the value relative to
	655	the charmap and the repertoire map, but instead we leave
	656	it up to the locale definition author to write a better
	657	definition. We assume here that every character which
	658	stands for itself is encoded using ISO 8859-1. Using the
	659	escape character is allowed. */
	660	if (ch == lr->escape_char)
	661	{
	662	ch = lr_getc (lr);
	663	if (ch == '\n' \|\| ch == EOF)
	664	break;
	665	}
	666
	667	if (verbose && !warned)
	668	{
	669	lr_error (lr, _("\
	670	non-symbolic character value should not be used"));
	671	warned = 1;
	672	}
	673
	674	ADDC (ch);
	675	if (return_widestr)
	676	ADDWC ((uint32_t) ch);
	677
	678	continue;
	679	}
	680
	681	/* Now we have to search for the end of the symbolic name, i.e.,
682	the closing '>'. */
683	startidx = bufact;
684	while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
685	{
686	if (ch == lr->escape_char)
687	{
688	ch = lr_getc (lr);
689	if (ch == '\n' \|\| ch == EOF)
690	break;
691	}
692	ADDC (ch);
693	}
694	if (ch == '\n' \|\| ch == EOF)
695	/* Not a correct string. */
696	break;
697	if (bufact == startidx)
698	{
699	/* <> is no correct name. Ignore it and also signal an
700	error. */
19bc17a9	701	illegal_string = 1;
4b10dd6c UD	702	continue;
4b10dd6c UD	703	}
19bc17a9	704
4b10dd6c UD	705	/* It might be a Uxxxx symbol. */
	706	if (buf[startidx] == 'U'
	707	&& (bufact - startidx == 5 \|\| bufact - startidx == 9))
	708	{
	709	char *cp = buf + startidx + 1;
	710	while (cp < &buf[bufact] && isxdigit (*cp))
	711	++cp;
	712
	713	if (cp == &buf[bufact])
	714	{
3c833378	715	char utmp[10];
4b10dd6c UD	716
	717	/* Yes, it is. */
	718	ADDC ('\0');
	719	wch = strtoul (buf + startidx + 1, NULL, 16);
	720
	721	/* Now forget about the name we just added. */
	722	bufact = startidx;
	723
	724	if (return_widestr)
	725	ADDWC (wch);
	726
3c833378 UD	727	/* See whether the charmap contains the Uxxxxxxxx names. */
	728	snprintf (utmp, sizeof (utmp), "U%08X", wch);
	729	seq = charmap_find_value (charmap, utmp, 9);
4b10dd6c	730
3c833378	731	if (seq == NULL)
4b10dd6c	732	{
3c833378 UD	733	/* No, this isn't the case. Now determine from
	734	the repertoire the name of the character and
	735	find it in the charmap. */
	736	if (repertoire != NULL)
3c833378	737	{
47e8b443	738	const char *symbol;
3c833378	739
47e8b443 UD	740	symbol = repertoire_find_symbol (repertoire, wch);
	741
	742	if (symbol != NULL)
	743	seq = charmap_find_value (charmap, symbol,
	744	strlen (symbol));
	745	}
	746
	747	if (seq == NULL)
	748	{
	749	#ifndef NO_TRANSLITERATION
	750	/* Transliterate if possible. */
	751	if (locale != NULL)
	752	{
	753	uint32_t *translit;
	754
	755	if ((locale->avail & CTYPE_LOCALE) == 0)
	756	{
	757	/* Load the CTYPE data now. */
	758	int old_needed = locale->needed;
	759
	760	locale->needed = 0;
69f6a804	761	locale = load_locale (LC_CTYPE,
47e8b443 UD	762	locale->name,
	763	locale->repertoire_name,
	764	charmap, locale);
	765	locale->needed = old_needed;
	766	}
	767
	768	if ((locale->avail & CTYPE_LOCALE) != 0
	769	&& ((translit = find_translit (locale,
	770	charmap, wch))
	771	!= NULL))
	772	/* The CTYPE data contains a matching
	773	transliteration. */
	774	{
	775	int i;
	776
	777	for (i = 0; translit[i] != 0; ++i)
	778	{
	779	char utmp[10];
	780
	781	snprintf (utmp, sizeof (utmp), "U%08X",
	782	translit[i]);
	783	seq = charmap_find_value (charmap, utmp,
	784	9);
	785	assert (seq != NULL);
	786	ADDS (seq->bytes, seq->nbytes);
	787	}
	788
	789	continue;
	790	}
	791	}
	792	#endif /* NO_TRANSLITERATION */
	793
	794	/* Not a known name. */
	795	illegal_string = 1;
3c833378	796	}
4b10dd6c UD	797	}
4b10dd6c UD	798
3c833378 UD	799	if (seq != NULL)
	800	ADDS (seq->bytes, seq->nbytes);
	801
4b10dd6c UD	802	continue;
	803	}
	804	}
	805
3c833378 UD	806	/* We now have the symbolic name in buf[startidx] to
	807	buf[bufact-1]. Now find out the value for this character
	808	in the charmap as well as in the repertoire map (in this
	809	order). */
	810	seq = charmap_find_value (charmap, &buf[startidx],
	811	bufact - startidx);
	812
	813	if (seq == NULL)
	814	{
	815	/* This name is not in the charmap. */
	816	lr_error (lr, _("symbol `%.*s' not in charmap"),
	817	(int) (bufact - startidx), &buf[startidx]);
	818	illegal_string = 1;
	819	}
	820
4b10dd6c UD	821	if (return_widestr)
4b10dd6c UD	822	{
3c833378 UD	823	/* Now the same for the multibyte representation. */
	824	if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
	825	wch = seq->ucs4;
	826	else
	827	{
	828	wch = repertoire_find_value (repertoire, &buf[startidx],
	829	bufact - startidx);
	830	if (seq != NULL)
	831	seq->ucs4 = wch;
	832	}
	833
4b10dd6c UD	834	if (wch == ILLEGAL_CHAR_VALUE)
	835	{
	836	/* This name is not in the repertoire map. */
	837	lr_error (lr, _("symbol `%.*s' not in repertoire map"),
70e51ab9	838	(int) (bufact - startidx), &buf[startidx]);
4b10dd6c UD	839	illegal_string = 1;
	840	}
	841	else
	842	ADDWC (wch);
	843	}
	844
3c833378 UD	845	/* Now forget about the name we just added. */
3c833378 UD	846	bufact = startidx;
19bc17a9	847
3c833378 UD	848	/* And copy the bytes. */
	849	if (seq != NULL)
	850	ADDS (seq->bytes, seq->nbytes);
4b10dd6c	851	}
19bc17a9	852
4b10dd6c UD	853	if (ch == '\n' \|\| ch == EOF)
	854	{
	855	lr_error (lr, _("unterminated string"));
	856	illegal_string = 1;
	857	}
19bc17a9	858
4b10dd6c UD	859	if (illegal_string)
	860	{
	861	free (buf);
72e6cdfa	862	free (buf2);
4b10dd6c UD	863	lr->token.val.str.startmb = NULL;
4b10dd6c UD	864	lr->token.val.str.lenmb = 0;
d5fd1f3f UD	865	lr->token.val.str.startwc = NULL;
d5fd1f3f UD	866	lr->token.val.str.lenwc = 0;
19bc17a9	867
4b10dd6c UD	868	return &lr->token;
4b10dd6c UD	869	}
19bc17a9	870
4b10dd6c	871	ADDC ('\0');
19bc17a9	872
4b10dd6c UD	873	if (return_widestr)
	874	{
	875	ADDWC (0);
	876	lr->token.val.str.startwc = xrealloc (buf2,
	877	buf2act * sizeof (uint32_t));
	878	lr->token.val.str.lenwc = buf2act;
	879	}
19bc17a9 RM	880	}
19bc17a9 RM	881
4b10dd6c UD	882	lr->token.val.str.startmb = xrealloc (buf, bufact);
	883	lr->token.val.str.lenmb = bufact;
	884
19bc17a9 RM	885	return &lr->token;
19bc17a9 RM	886	}