[thirdparty/glibc.git] / locale / programs / linereader.c

/* Copyright (C) 1996-2001, 2002 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>

#include "localedef.h"
#include "charmap.h"
#include "error.h"
#include "linereader.h"
#include "locfile.h"

/* Prototypes for local functions.  */
static struct token *get_toplvl_escape (struct linereader *lr);
static struct token *get_symname (struct linereader *lr);
static struct token *get_ident (struct linereader *lr);
static struct token *get_string (struct linereader *lr,
				 const struct charmap_t *charmap,
				 struct localedef_t *locale,
				 const struct repertoire_t *repertoire,
				 int verbose);


struct linereader *
lr_open (const char *fname, kw_hash_fct_t hf)
{
  FILE *fp;

  if (fname == NULL || strcmp (fname, "-") == 0
      || strcmp (fname, "/dev/stdin") == 0)
    return lr_create (stdin, "<stdin>", hf);
  else
    {
      fp = fopen (fname, "rm");
      if (fp == NULL)
	return NULL;
      return lr_create (fp, fname, hf);
    }
}

struct linereader *
lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
{
  struct linereader *result;
  int n;

  result = (struct linereader *) xmalloc (sizeof (*result));

  result->fp = fp;
  result->fname = xstrdup (fname);
  result->buf = NULL;
  result->bufsize = 0;
  result->lineno = 1;
  result->idx = 0;
  result->comment_char = '#';
  result->escape_char = '\\';
  result->translate_strings = 1;

  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  if (n < 0)
    {
      int save = errno;
      fclose (result->fp);
      free ((char *) result->fname);
      free (result);
      errno = save;
      return NULL;
    }

  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
    n -= 2;

  result->buf[n] = '\0';
  result->bufact = n;
  result->hash_fct = hf;

  return result;
}


int
lr_eof (struct linereader *lr)
{
  return lr->bufact = 0;
}


void
lr_close (struct linereader *lr)
{
  fclose (lr->fp);
  free (lr->buf);
  free (lr);
}


int
lr_next (struct linereader *lr)
{
  int n;

  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
  if (n < 0)
    return -1;

  ++lr->lineno;

  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
    {
#if 0
      /* XXX Is this correct?  */
      /* An escaped newline character is substituted with a single <SP>.  */
      --n;
      lr->buf[n - 1] = ' ';
#else
      n -= 2;
#endif
    }

  lr->buf[n] = '\0';
  lr->bufact = n;
  lr->idx = 0;

  return 0;
}


/* Defined in error.c.  */
/* This variable is incremented each time `error' is called.  */
extern unsigned int error_message_count;

/* The calling program should define program_name and set it to the
   name of the executing program.  */
extern char *program_name;


struct token *
lr_token (struct linereader *lr, const struct charmap_t *charmap,
	  struct localedef_t *locale, const struct repertoire_t *repertoire,
	  int verbose)
{
  int ch;

  while (1)
    {
      do
	{
	  ch = lr_getc (lr);

	  if (ch == EOF)
	    {
	      lr->token.tok = tok_eof;
	      return &lr->token;
	    };

	  if (ch == '\n')
	    {
	      lr->token.tok = tok_eol;
	      return &lr->token;
	    }
	}
      while (isspace (ch));

      if (ch == EOF)
	{
	  lr->token.tok = tok_eof;
	  return &lr->token;
	};

      if (ch != lr->comment_char)
	break;

      /* Is there an newline at the end of the buffer?  */
      if (lr->buf[lr->bufact - 1] != '\n')
	{
	  /* No.  Some people want this to mean that only the line in
	     the file not the logical, concatenated line is ignored.
	     Let's try this.  */
	  lr->idx = lr->bufact;
	  continue;
	}

      /* Ignore rest of line.  */
      lr_ignore_rest (lr, 0);
      lr->token.tok = tok_eol;
      return &lr->token;
    }

  /* Match escape sequences.  */
  if (ch == lr->escape_char)
    return get_toplvl_escape (lr);

  /* Match ellipsis.  */
  if (ch == '.')
    {
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 10; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis4_2;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis4;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	{
	  lr_getc (lr);
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis3;
	  return &lr->token;
	}
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	{
	  int cnt;
	  for (cnt = 0; cnt < 6; ++cnt)
	    lr_getc (lr);
	  lr->token.tok = tok_ellipsis2_2;
	  return &lr->token;
	}
      if (lr->buf[lr->idx] == '.')
	{
	  lr_getc (lr);
	  lr->token.tok = tok_ellipsis2;
	  return &lr->token;
	}
    }

  switch (ch)
    {
    case '<':
      return get_symname (lr);

    case '0' ... '9':
      lr->token.tok = tok_number;
      lr->token.val.num = ch - '0';

      while (isdigit (ch = lr_getc (lr)))
	{
	  lr->token.val.num *= 10;
	  lr->token.val.num += ch - '0';
	}
      if (isalpha (ch))
	lr_error (lr, _("garbage at end of number"));
      lr_ungetn (lr, 1);

      return &lr->token;

    case ';':
      lr->token.tok = tok_semicolon;
      return &lr->token;

    case ',':
      lr->token.tok = tok_comma;
      return &lr->token;

    case '(':
      lr->token.tok = tok_open_brace;
      return &lr->token;

    case ')':
      lr->token.tok = tok_close_brace;
      return &lr->token;

    case '"':
      return get_string (lr, charmap, locale, repertoire, verbose);

    case '-':
      ch = lr_getc (lr);
      if (ch == '1')
	{
	  lr->token.tok = tok_minus1;
	  return &lr->token;
	}
      lr_ungetn (lr, 2);
      break;
    }

  return get_ident (lr);
}


static struct token *
get_toplvl_escape (struct linereader *lr)
{
  /* This is supposed to be a numeric value.  We return the
     numerical value and the number of bytes.  */
  size_t start_idx = lr->idx - 1;
  char *bytes = lr->token.val.charcode.bytes;
  int nbytes = 0;
  int ch;

  do
    {
      unsigned int byte = 0;
      unsigned int base = 8;

      ch = lr_getc (lr);

      if (ch == 'd')
	{
	  base = 10;
	  ch = lr_getc (lr);
	}
      else if (ch == 'x')
	{
	  base = 16;
	  ch = lr_getc (lr);
	}

      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	{
	esc_error:
	  lr->token.val.str.startmb = &lr->buf[start_idx];

	  while (ch != EOF && !isspace (ch))
	    ch = lr_getc (lr);
	  lr->token.val.str.lenmb = lr->idx - start_idx;

	  lr->token.tok = tok_error;
	  return &lr->token;
	}

      if (isdigit (ch))
	byte = ch - '0';
      else
	byte = tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if ((base == 16 && !isxdigit (ch))
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
	goto esc_error;

      byte *= base;
      if (isdigit (ch))
	byte += ch - '0';
      else
	byte += tolower (ch) - 'a' + 10;

      ch = lr_getc (lr);
      if (base != 16 && isdigit (ch))
	{
	  byte *= base;
	  byte += ch - '0';

	  ch = lr_getc (lr);
	}

      bytes[nbytes++] = byte;
    }
  while (ch == lr->escape_char
	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));

  if (!isspace (ch))
    lr_error (lr, _("garbage at end of character code specification"));

  lr_ungetn (lr, 1);

  lr->token.tok = tok_charcode;
  lr->token.val.charcode.nbytes = nbytes;

  return &lr->token;
}


#define ADDC(ch) \
  do									      \
    {									      \
      if (bufact == bufmax)						      \
	{								      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      buf[bufact++] = (ch);						      \
    }									      \
  while (0)


#define ADDS(s, l) \
  do									      \
    {									      \
      size_t _l = (l);							      \
      if (bufact + _l > bufmax)						      \
	{								      \
	  if (bufact < _l)						      \
	    bufact = _l;						      \
	  bufmax *= 2;							      \
	  buf = xrealloc (buf, bufmax);					      \
	}								      \
      memcpy (&buf[bufact], s, _l);					      \
      bufact += _l;							      \
    }									      \
  while (0)


#define ADDWC(ch) \
  do									      \
    {									      \
      if (buf2act == buf2max)						      \
	{								      \
	  buf2max *= 2;							      \
	  buf2 = xrealloc (buf2, buf2max * 4);				      \
	}								      \
      buf2[buf2act++] = (ch);						      \
    }									      \
  while (0)


static struct token *
get_symname (struct linereader *lr)
{
  /* Symbol in brackets.  We must distinguish three kinds:
     1. reserved words
     2. ISO 10646 position values
     3. all other.  */
  char *buf;
  size_t bufact = 0;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = (char *) xmalloc (bufmax);

  do
    {
      ch = lr_getc (lr);
      if (ch == lr->escape_char)
	{
	  int c2 = lr_getc (lr);
	  ADDC (c2);

	  if (c2 == '\n')
	    ch = '\n';
	}
      else
	ADDC (ch);
    }
  while (ch != '>' && ch != '\n');

  if (ch == '\n')
    lr_error (lr, _("unterminated symbolic name"));

  /* Test for ISO 10646 position value.  */
  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
    {
      char *cp = buf + 1;
      while (cp < &buf[bufact - 1] && isxdigit (*cp))
	++cp;

      if (cp == &buf[bufact - 1])
	{
	  /* Yes, it is.  */
	  lr->token.tok = tok_ucs4;
	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);

	  return &lr->token;
	}
    }

  /* It is a symbolic name.  Test for reserved words.  */
  kw = lr->hash_fct (buf, bufact - 1);

  if (kw != NULL && kw->symname_or_ident == 1)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_bsymbol;

      buf[bufact] = '\0';
      buf = xrealloc (buf, bufact + 1);

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact - 1;
    }

  return &lr->token;
}


static struct token *
get_ident (struct linereader *lr)
{
  char *buf;
  size_t bufact;
  size_t bufmax = 56;
  const struct keyword_t *kw;
  int ch;

  buf = xmalloc (bufmax);
  bufact = 0;

  ADDC (lr->buf[lr->idx - 1]);

  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
	 && ch != '<' && ch != ',' && ch != EOF)
    {
      if (ch == lr->escape_char)
	{
	  ch = lr_getc (lr);
	  if (ch == '\n' || ch == EOF)
	    {
	      lr_error (lr, _("invalid escape sequence"));
	      break;
	    }
	}
      ADDC (ch);
    }

  lr_ungetc (lr, ch);

  kw = lr->hash_fct (buf, bufact);

  if (kw != NULL && kw->symname_or_ident == 0)
    {
      lr->token.tok = kw->token;
      free (buf);
    }
  else
    {
      lr->token.tok = tok_ident;

      buf[bufact] = '\0';
      buf = xrealloc (buf, bufact + 1);

      lr->token.val.str.startmb = buf;
      lr->token.val.str.lenmb = bufact;
    }

  return &lr->token;
}


static struct token *
get_string (struct linereader *lr, const struct charmap_t *charmap,
	    struct localedef_t *locale, const struct repertoire_t *repertoire,
	    int verbose)
{
  int return_widestr = lr->return_widestr;
  char *buf;
  wchar_t *buf2 = NULL;
  size_t bufact;
  size_t bufmax = 56;

  /* We must return two different strings.  */
  buf = xmalloc (bufmax);
  bufact = 0;

  /* We know it'll be a string.  */
  lr->token.tok = tok_string;

  /* If we need not translate the strings (i.e., expand <...> parts)
     we can run a simple loop.  */
  if (!lr->translate_strings)
    {
      int ch;

      buf2 = NULL;
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	ADDC (ch);

      /* Catch errors with trailing escape character.  */
      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
	{
	  lr_error (lr, _("illegal escape sequence at end of string"));
	  --bufact;
	}
      else if (ch == '\n' || ch == EOF)
	lr_error (lr, _("unterminated string"));

      ADDC ('\0');
    }
  else
    {
      int illegal_string = 0;
      size_t buf2act = 0;
      size_t buf2max = 56 * sizeof (uint32_t);
      int ch;
      int warned = 0;

      /* We have to provide the wide character result as well.  */
      if (return_widestr)
	buf2 = xmalloc (buf2max);

      /* Read until the end of the string (or end of the line or file).  */
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	{
	  size_t startidx;
	  uint32_t wch;
	  struct charseq *seq;

	  if (ch != '<')
	    {
	      /* The standards leave it up to the implementation to decide
		 what to do with character which stand for themself.  We
		 could jump through hoops to find out the value relative to
		 the charmap and the repertoire map, but instead we leave
		 it up to the locale definition author to write a better
		 definition.  We assume here that every character which
		 stands for itself is encoded using ISO 8859-1.  Using the
		 escape character is allowed.  */
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}

	      if (verbose && !warned)
		{
		  lr_error (lr, _("\
non-symbolic character value should not be used"));
		  warned = 1;
		}

	      ADDC (ch);
	      if (return_widestr)
		ADDWC ((uint32_t) ch);

	      continue;
	    }

	  /* Now we have to search for the end of the symbolic name, i.e.,
	     the closing '>'.  */
	  startidx = bufact;
	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
	    {
	      if (ch == lr->escape_char)
		{
		  ch = lr_getc (lr);
		  if (ch == '\n' || ch == EOF)
		    break;
		}
	      ADDC (ch);
	    }
	  if (ch == '\n' || ch == EOF)
	    /* Not a correct string.  */
	    break;
	  if (bufact == startidx)
	    {
	      /* <> is no correct name.  Ignore it and also signal an
		 error.  */
	      illegal_string = 1;
	      continue;
	    }

	  /* It might be a Uxxxx symbol.  */
	  if (buf[startidx] == 'U'
	      && (bufact - startidx == 5 || bufact - startidx == 9))
	    {
	      char *cp = buf + startidx + 1;
	      while (cp < &buf[bufact] && isxdigit (*cp))
		++cp;

	      if (cp == &buf[bufact])
		{
		  char utmp[10];

		  /* Yes, it is.  */
		  ADDC ('\0');
		  wch = strtoul (buf + startidx + 1, NULL, 16);

		  /* Now forget about the name we just added.  */
		  bufact = startidx;

		  if (return_widestr)
		    ADDWC (wch);

		  /* See whether the charmap contains the Uxxxxxxxx names.  */
		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
		  seq = charmap_find_value (charmap, utmp, 9);

		  if (seq == NULL)
		    {
		     /* No, this isn't the case.  Now determine from
			the repertoire the name of the character and
			find it in the charmap.  */
		      if (repertoire != NULL)
			{
			  const char *symbol;

			  symbol = repertoire_find_symbol (repertoire, wch);

			  if (symbol != NULL)
			    seq = charmap_find_value (charmap, symbol,
						      strlen (symbol));
			}

		      if (seq == NULL)
			{
#ifndef NO_TRANSLITERATION
			  /* Transliterate if possible.  */
			  if (locale != NULL)
			    {
			      uint32_t *translit;

			      if ((locale->avail & CTYPE_LOCALE) == 0)
				{
				  /* Load the CTYPE data now.  */
				  int old_needed = locale->needed;

				  locale->needed = 0;
				  locale = load_locale (LC_CTYPE,
							locale->name,
							locale->repertoire_name,
							charmap, locale);
				  locale->needed = old_needed;
				}

			      if ((locale->avail & CTYPE_LOCALE) != 0
				  && ((translit = find_translit (locale,
								 charmap, wch))
				      != NULL))
				/* The CTYPE data contains a matching
				   transliteration.  */
				{
				  int i;

				  for (i = 0; translit[i] != 0; ++i)
				    {
				      char utmp[10];

				      snprintf (utmp, sizeof (utmp), "U%08X",
						translit[i]);
				      seq = charmap_find_value (charmap, utmp,
								9);
				      assert (seq != NULL);
				      ADDS (seq->bytes, seq->nbytes);
				    }

				  continue;
				}
			    }
#endif	/* NO_TRANSLITERATION */

			  /* Not a known name.  */
			  illegal_string = 1;
			}
		    }

		  if (seq != NULL)
		    ADDS (seq->bytes, seq->nbytes);

		  continue;
		}
	    }

	  /* We now have the symbolic name in buf[startidx] to
	     buf[bufact-1].  Now find out the value for this character
	     in the charmap as well as in the repertoire map (in this
	     order).  */
	  seq = charmap_find_value (charmap, &buf[startidx],
				    bufact - startidx);

	  if (seq == NULL)
	    {
	      /* This name is not in the charmap.  */
	      lr_error (lr, _("symbol `%.*s' not in charmap"),
			(int) (bufact - startidx), &buf[startidx]);
	      illegal_string = 1;
	    }

	  if (return_widestr)
	    {
	      /* Now the same for the multibyte representation.  */
	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
		wch = seq->ucs4;
	      else
		{
		  wch = repertoire_find_value (repertoire, &buf[startidx],
					       bufact - startidx);
		  if (seq != NULL)
		    seq->ucs4 = wch;
		}

	      if (wch == ILLEGAL_CHAR_VALUE)
		{
		  /* This name is not in the repertoire map.  */
		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
			    (int) (bufact - startidx), &buf[startidx]);
		  illegal_string = 1;
		}
	      else
		ADDWC (wch);
	    }

	  /* Now forget about the name we just added.  */
	  bufact = startidx;

	  /* And copy the bytes.  */
	  if (seq != NULL)
	    ADDS (seq->bytes, seq->nbytes);
	}

      if (ch == '\n' || ch == EOF)
	{
	  lr_error (lr, _("unterminated string"));
	  illegal_string = 1;
	}

      if (illegal_string)
	{
	  free (buf);
	  if (buf2 != NULL)
	    free (buf2);
	  lr->token.val.str.startmb = NULL;
	  lr->token.val.str.lenmb = 0;
	  lr->token.val.str.startwc = NULL;
	  lr->token.val.str.lenwc = 0;

	  return &lr->token;
	}

      ADDC ('\0');

      if (return_widestr)
	{
	  ADDWC (0);
	  lr->token.val.str.startwc = xrealloc (buf2,
						buf2act * sizeof (uint32_t));
	  lr->token.val.str.lenwc = buf2act;
	}
    }

  lr->token.val.str.startmb = xrealloc (buf, bufact);
  lr->token.val.str.lenmb = bufact;

  return &lr->token;
}
Commit	Line	Data
f2b98f97	1	/* Copyright (C) 1996-2001, 2002 Free Software Foundation, Inc.
5290baf0	2	This file is part of the GNU C Library.
4b10dd6c	3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
19bc17a9	4
5290baf0	5	The GNU C Library is free software; you can redistribute it and/or
41bdb6e2 AJ	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
19bc17a9	9
5290baf0 UD	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2	13	Lesser General Public License for more details.
19bc17a9	14
41bdb6e2 AJ	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, write to the Free
	17	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	18	02111-1307 USA. */
19bc17a9 RM	19
	20	#ifdef HAVE_CONFIG_H
	21	# include <config.h>
	22	#endif
	23
47e8b443	24	#include <assert.h>
19bc17a9 RM	25	#include <ctype.h>
	26	#include <errno.h>
	27	#include <libintl.h>
	28	#include <stdarg.h>
	29	#include <stdlib.h>
	30	#include <string.h>
	31
f2b98f97	32	#include "localedef.h"
4b10dd6c	33	#include "charmap.h"
19bc17a9 RM	34	#include "error.h"
19bc17a9 RM	35	#include "linereader.h"
47e8b443	36	#include "locfile.h"
93693c4d	37
4b10dd6c	38	/* Prototypes for local functions. */
19bc17a9 RM	39	static struct token get_toplvl_escape (struct linereader lr);
	40	static struct token get_symname (struct linereader lr);
	41	static struct token get_ident (struct linereader lr);
	42	static struct token get_string (struct linereader lr,
4b10dd6c	43	const struct charmap_t *charmap,
47e8b443	44	struct localedef_t *locale,
93693c4d UD	45	const struct repertoire_t *repertoire,
93693c4d UD	46	int verbose);
19bc17a9 RM	47
	48
	49	struct linereader *
	50	lr_open (const char *fname, kw_hash_fct_t hf)
	51	{
	52	FILE *fp;
19bc17a9 RM	53
	54	if (fname == NULL \|\| strcmp (fname, "-") == 0
	55	\|\| strcmp (fname, "/dev/stdin") == 0)
3e076219	56	return lr_create (stdin, "<stdin>", hf);
19bc17a9 RM	57	else
19bc17a9 RM	58	{
2e2dc1a5	59	fp = fopen (fname, "rm");
19bc17a9 RM	60	if (fp == NULL)
19bc17a9 RM	61	return NULL;
3e076219	62	return lr_create (fp, fname, hf);
19bc17a9	63	}
3e076219 UD	64	}
	65
	66	struct linereader *
	67	lr_create (FILE fp, const char fname, kw_hash_fct_t hf)
	68	{
	69	struct linereader *result;
	70	int n;
19bc17a9 RM	71
	72	result = (struct linereader ) xmalloc (sizeof (result));
	73
	74	result->fp = fp;
3e076219	75	result->fname = xstrdup (fname);
19bc17a9 RM	76	result->buf = NULL;
	77	result->bufsize = 0;
	78	result->lineno = 1;
	79	result->idx = 0;
	80	result->comment_char = '#';
	81	result->escape_char = '\\';
	82	result->translate_strings = 1;
	83
	84	n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
	85	if (n < 0)
	86	{
	87	int save = errno;
	88	fclose (result->fp);
46ec036d	89	free ((char *) result->fname);
19bc17a9 RM	90	free (result);
	91	errno = save;
	92	return NULL;
	93	}
	94
	95	if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
	96	n -= 2;
	97
	98	result->buf[n] = '\0';
	99	result->bufact = n;
	100	result->hash_fct = hf;
	101
	102	return result;
	103	}
	104
	105
	106	int
	107	lr_eof (struct linereader *lr)
	108	{
	109	return lr->bufact = 0;
	110	}
	111
	112
	113	void
	114	lr_close (struct linereader *lr)
	115	{
	116	fclose (lr->fp);
	117	free (lr->buf);
	118	free (lr);
	119	}
	120
	121
	122	int
	123	lr_next (struct linereader *lr)
	124	{
	125	int n;
	126
	127	n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
	128	if (n < 0)
	129	return -1;
	130
	131	++lr->lineno;
	132
	133	if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
	134	{
4b10dd6c UD	135	#if 0
4b10dd6c UD	136	/* XXX Is this correct? */
19bc17a9 RM	137	/* An escaped newline character is substituted with a single <SP>. */
	138	--n;
	139	lr->buf[n - 1] = ' ';
4b10dd6c UD	140	#else
	141	n -= 2;
	142	#endif
19bc17a9 RM	143	}
	144
	145	lr->buf[n] = '\0';
	146	lr->bufact = n;
	147	lr->idx = 0;
	148
	149	return 0;
	150	}
	151
	152
	153	/* Defined in error.c. */
	154	/* This variable is incremented each time `error' is called. */
	155	extern unsigned int error_message_count;
	156
	157	/* The calling program should define program_name and set it to the
	158	name of the executing program. */
	159	extern char *program_name;
	160
	161
	162	struct token *
4b10dd6c	163	lr_token (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	164	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	165	int verbose)
19bc17a9 RM	166	{
	167	int ch;
	168
	169	while (1)
	170	{
	171	do
	172	{
	173	ch = lr_getc (lr);
	174
76fbcfdd UD	175	if (ch == EOF)
	176	{
	177	lr->token.tok = tok_eof;
	178	return &lr->token;
	179	};
	180
19bc17a9 RM	181	if (ch == '\n')
	182	{
	183	lr->token.tok = tok_eol;
	184	return &lr->token;
	185	}
	186	}
	187	while (isspace (ch));
	188
	189	if (ch == EOF)
	190	{
	191	lr->token.tok = tok_eof;
	192	return &lr->token;
	193	};
	194
	195	if (ch != lr->comment_char)
	196	break;
	197
a0dc5206 UD	198	/* Is there an newline at the end of the buffer? */
	199	if (lr->buf[lr->bufact - 1] != '\n')
	200	{
	201	/* No. Some people want this to mean that only the line in
	202	the file not the logical, concatenated line is ignored.
	203	Let's try this. */
	204	lr->idx = lr->bufact;
	205	continue;
	206	}
	207
19bc17a9 RM	208	/* Ignore rest of line. */
	209	lr_ignore_rest (lr, 0);
	210	lr->token.tok = tok_eol;
	211	return &lr->token;
	212	}
	213
	214	/* Match escape sequences. */
	215	if (ch == lr->escape_char)
	216	return get_toplvl_escape (lr);
	217
	218	/* Match ellipsis. */
4b10dd6c	219	if (ch == '.')
19bc17a9	220	{
a0dc5206 UD	221	if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
	222	{
	223	int cnt;
	224	for (cnt = 0; cnt < 10; ++cnt)
	225	lr_getc (lr);
	226	lr->token.tok = tok_ellipsis4_2;
	227	return &lr->token;
	228	}
4b10dd6c UD	229	if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
	230	{
	231	lr_getc (lr);
	232	lr_getc (lr);
	233	lr_getc (lr);
	234	lr->token.tok = tok_ellipsis4;
	235	return &lr->token;
	236	}
	237	if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
	238	{
	239	lr_getc (lr);
	240	lr_getc (lr);
	241	lr->token.tok = tok_ellipsis3;
	242	return &lr->token;
	243	}
a0dc5206 UD	244	if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
	245	{
	246	int cnt;
	247	for (cnt = 0; cnt < 6; ++cnt)
	248	lr_getc (lr);
	249	lr->token.tok = tok_ellipsis2_2;
	250	return &lr->token;
	251	}
4b10dd6c UD	252	if (lr->buf[lr->idx] == '.')
	253	{
	254	lr_getc (lr);
	255	lr->token.tok = tok_ellipsis2;
	256	return &lr->token;
	257	}
19bc17a9 RM	258	}
	259
	260	switch (ch)
	261	{
	262	case '<':
	263	return get_symname (lr);
	264
	265	case '0' ... '9':
	266	lr->token.tok = tok_number;
	267	lr->token.val.num = ch - '0';
	268
	269	while (isdigit (ch = lr_getc (lr)))
	270	{
	271	lr->token.val.num *= 10;
	272	lr->token.val.num += ch - '0';
	273	}
	274	if (isalpha (ch))
5290baf0	275	lr_error (lr, _("garbage at end of number"));
19bc17a9 RM	276	lr_ungetn (lr, 1);
	277
	278	return &lr->token;
	279
	280	case ';':
	281	lr->token.tok = tok_semicolon;
	282	return &lr->token;
	283
	284	case ',':
	285	lr->token.tok = tok_comma;
	286	return &lr->token;
	287
	288	case '(':
	289	lr->token.tok = tok_open_brace;
	290	return &lr->token;
	291
	292	case ')':
	293	lr->token.tok = tok_close_brace;
	294	return &lr->token;
	295
	296	case '"':
47e8b443	297	return get_string (lr, charmap, locale, repertoire, verbose);
19bc17a9 RM	298
	299	case '-':
	300	ch = lr_getc (lr);
	301	if (ch == '1')
	302	{
	303	lr->token.tok = tok_minus1;
	304	return &lr->token;
	305	}
	306	lr_ungetn (lr, 2);
	307	break;
	308	}
	309
	310	return get_ident (lr);
	311	}
	312
	313
	314	static struct token *
	315	get_toplvl_escape (struct linereader *lr)
	316	{
	317	/* This is supposed to be a numeric value. We return the
	318	numerical value and the number of bytes. */
	319	size_t start_idx = lr->idx - 1;
4b10dd6c	320	char *bytes = lr->token.val.charcode.bytes;
19bc17a9 RM	321	int nbytes = 0;
	322	int ch;
	323
	324	do
	325	{
	326	unsigned int byte = 0;
	327	unsigned int base = 8;
	328
	329	ch = lr_getc (lr);
	330
	331	if (ch == 'd')
	332	{
	333	base = 10;
	334	ch = lr_getc (lr);
	335	}
	336	else if (ch == 'x')
	337	{
	338	base = 16;
	339	ch = lr_getc (lr);
	340	}
	341
	342	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	343	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	344	{
19bc17a9 RM	345	esc_error:
4b10dd6c	346	lr->token.val.str.startmb = &lr->buf[start_idx];
19bc17a9	347
76fbcfdd	348	while (ch != EOF && !isspace (ch))
19bc17a9	349	ch = lr_getc (lr);
4b10dd6c	350	lr->token.val.str.lenmb = lr->idx - start_idx;
19bc17a9 RM	351
	352	lr->token.tok = tok_error;
	353	return &lr->token;
	354	}
	355
	356	if (isdigit (ch))
	357	byte = ch - '0';
	358	else
4b10dd6c	359	byte = tolower (ch) - 'a' + 10;
19bc17a9 RM	360
	361	ch = lr_getc (lr);
	362	if ((base == 16 && !isxdigit (ch))
ba1ffaa1	363	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
19bc17a9 RM	364	goto esc_error;
	365
	366	byte *= base;
	367	if (isdigit (ch))
	368	byte += ch - '0';
	369	else
4b10dd6c	370	byte += tolower (ch) - 'a' + 10;
19bc17a9 RM	371
	372	ch = lr_getc (lr);
	373	if (base != 16 && isdigit (ch))
	374	{
	375	byte *= base;
679f5a56	376	byte += ch - '0';
19bc17a9 RM	377
	378	ch = lr_getc (lr);
	379	}
	380
4b10dd6c	381	bytes[nbytes++] = byte;
19bc17a9	382	}
c50ec4e0	383	while (ch == lr->escape_char
6dd67bd5	384	&& nbytes < (int) sizeof (lr->token.val.charcode.bytes));
19bc17a9 RM	385
	386	if (!isspace (ch))
	387	lr_error (lr, _("garbage at end of character code specification"));
	388
	389	lr_ungetn (lr, 1);
	390
	391	lr->token.tok = tok_charcode;
19bc17a9 RM	392	lr->token.val.charcode.nbytes = nbytes;
	393
	394	return &lr->token;
	395	}
	396
	397
4b10dd6c UD	398	#define ADDC(ch) \
	399	do \
	400	{ \
	401	if (bufact == bufmax) \
	402	{ \
	403	bufmax *= 2; \
	404	buf = xrealloc (buf, bufmax); \
	405	} \
	406	buf[bufact++] = (ch); \
	407	} \
	408	while (0)
	409
	410
	411	#define ADDS(s, l) \
	412	do \
	413	{ \
	414	size_t _l = (l); \
	415	if (bufact + _l > bufmax) \
	416	{ \
	417	if (bufact < _l) \
	418	bufact = _l; \
	419	bufmax *= 2; \
	420	buf = xrealloc (buf, bufmax); \
	421	} \
	422	memcpy (&buf[bufact], s, _l); \
	423	bufact += _l; \
	424	} \
	425	while (0)
	426
	427
	428	#define ADDWC(ch) \
	429	do \
	430	{ \
	431	if (buf2act == buf2max) \
	432	{ \
	433	buf2max *= 2; \
	434	buf2 = xrealloc (buf2, buf2max * 4); \
	435	} \
	436	buf2[buf2act++] = (ch); \
	437	} \
19bc17a9 RM	438	while (0)
	439
	440
	441	static struct token *
	442	get_symname (struct linereader *lr)
	443	{
	444	/* Symbol in brackets. We must distinguish three kinds:
	445	1. reserved words
	446	2. ISO 10646 position values
	447	3. all other. */
	448	char *buf;
	449	size_t bufact = 0;
	450	size_t bufmax = 56;
	451	const struct keyword_t *kw;
	452	int ch;
	453
	454	buf = (char *) xmalloc (bufmax);
	455
	456	do
	457	{
	458	ch = lr_getc (lr);
	459	if (ch == lr->escape_char)
	460	{
	461	int c2 = lr_getc (lr);
	462	ADDC (c2);
	463
	464	if (c2 == '\n')
	465	ch = '\n';
	466	}
	467	else
	468	ADDC (ch);
	469	}
	470	while (ch != '>' && ch != '\n');
	471
	472	if (ch == '\n')
	473	lr_error (lr, _("unterminated symbolic name"));
	474
	475	/* Test for ISO 10646 position value. */
	476	if (buf[0] == 'U' && (bufact == 6 \|\| bufact == 10))
	477	{
	478	char *cp = buf + 1;
	479	while (cp < &buf[bufact - 1] && isxdigit (*cp))
	480	++cp;
	481
	482	if (cp == &buf[bufact - 1])
	483	{
	484	/* Yes, it is. */
4b10dd6c UD	485	lr->token.tok = tok_ucs4;
4b10dd6c UD	486	lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
19bc17a9 RM	487
	488	return &lr->token;
	489	}
	490	}
	491
	492	/* It is a symbolic name. Test for reserved words. */
	493	kw = lr->hash_fct (buf, bufact - 1);
	494
	495	if (kw != NULL && kw->symname_or_ident == 1)
	496	{
	497	lr->token.tok = kw->token;
	498	free (buf);
	499	}
	500	else
	501	{
	502	lr->token.tok = tok_bsymbol;
	503
	504	buf[bufact] = '\0';
	505	buf = xrealloc (buf, bufact + 1);
	506
4b10dd6c UD	507	lr->token.val.str.startmb = buf;
4b10dd6c UD	508	lr->token.val.str.lenmb = bufact - 1;
19bc17a9 RM	509	}
	510
	511	return &lr->token;
	512	}
	513
	514
	515	static struct token *
	516	get_ident (struct linereader *lr)
	517	{
	518	char *buf;
	519	size_t bufact;
	520	size_t bufmax = 56;
	521	const struct keyword_t *kw;
	522	int ch;
	523
	524	buf = xmalloc (bufmax);
	525	bufact = 0;
	526
	527	ADDC (lr->buf[lr->idx - 1]);
	528
	529	while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
f126ef67	530	&& ch != '<' && ch != ',' && ch != EOF)
4b10dd6c UD	531	{
	532	if (ch == lr->escape_char)
	533	{
	534	ch = lr_getc (lr);
	535	if (ch == '\n' \|\| ch == EOF)
	536	{
	537	lr_error (lr, _("invalid escape sequence"));
	538	break;
	539	}
	540	}
	541	ADDC (ch);
	542	}
19bc17a9	543
f126ef67	544	lr_ungetc (lr, ch);
19bc17a9 RM	545
	546	kw = lr->hash_fct (buf, bufact);
	547
	548	if (kw != NULL && kw->symname_or_ident == 0)
	549	{
	550	lr->token.tok = kw->token;
	551	free (buf);
	552	}
	553	else
	554	{
	555	lr->token.tok = tok_ident;
	556
	557	buf[bufact] = '\0';
	558	buf = xrealloc (buf, bufact + 1);
	559
4b10dd6c UD	560	lr->token.val.str.startmb = buf;
4b10dd6c UD	561	lr->token.val.str.lenmb = bufact;
19bc17a9 RM	562	}
	563
	564	return &lr->token;
	565	}
	566
	567
	568	static struct token *
4b10dd6c	569	get_string (struct linereader lr, const struct charmap_t charmap,
47e8b443 UD	570	struct localedef_t locale, const struct repertoire_t repertoire,
47e8b443 UD	571	int verbose)
19bc17a9	572	{
4b10dd6c UD	573	int return_widestr = lr->return_widestr;
4b10dd6c UD	574	char *buf;
a9c27b3e	575	wchar_t *buf2 = NULL;
19bc17a9 RM	576	size_t bufact;
19bc17a9 RM	577	size_t bufmax = 56;
19bc17a9	578
4b10dd6c	579	/* We must return two different strings. */
19bc17a9 RM	580	buf = xmalloc (bufmax);
	581	bufact = 0;
	582
4b10dd6c UD	583	/* We know it'll be a string. */
	584	lr->token.tok = tok_string;
	585
	586	/* If we need not translate the strings (i.e., expand <...> parts)
	587	we can run a simple loop. */
	588	if (!lr->translate_strings)
	589	{
	590	int ch;
	591
	592	buf2 = NULL;
	593	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
19bc17a9	594	ADDC (ch);
4b10dd6c UD	595
	596	/* Catch errors with trailing escape character. */
	597	if (bufact > 0 && buf[bufact - 1] == lr->escape_char
	598	&& (bufact == 1 \|\| buf[bufact - 2] != lr->escape_char))
	599	{
	600	lr_error (lr, _("illegal escape sequence at end of string"));
	601	--bufact;
	602	}
	603	else if (ch == '\n' \|\| ch == EOF)
	604	lr_error (lr, _("unterminated string"));
	605
	606	ADDC ('\0');
	607	}
	608	else
	609	{
	610	int illegal_string = 0;
	611	size_t buf2act = 0;
	612	size_t buf2max = 56 * sizeof (uint32_t);
	613	int ch;
	614	int warned = 0;
	615
	616	/* We have to provide the wide character result as well. */
	617	if (return_widestr)
	618	buf2 = xmalloc (buf2max);
	619
	620	/* Read until the end of the string (or end of the line or file). */
	621	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
	622	{
	623	size_t startidx;
	624	uint32_t wch;
	625	struct charseq *seq;
	626
	627	if (ch != '<')
	628	{
	629	/* The standards leave it up to the implementation to decide
	630	what to do with character which stand for themself. We
	631	could jump through hoops to find out the value relative to
	632	the charmap and the repertoire map, but instead we leave
	633	it up to the locale definition author to write a better
	634	definition. We assume here that every character which
	635	stands for itself is encoded using ISO 8859-1. Using the
	636	escape character is allowed. */
	637	if (ch == lr->escape_char)
	638	{
	639	ch = lr_getc (lr);
	640	if (ch == '\n' \|\| ch == EOF)
	641	break;
	642	}
	643
	644	if (verbose && !warned)
	645	{
	646	lr_error (lr, _("\
	647	non-symbolic character value should not be used"));
	648	warned = 1;
	649	}
	650
	651	ADDC (ch);
	652	if (return_widestr)
	653	ADDWC ((uint32_t) ch);
	654
	655	continue;
	656	}
	657
	658	/* Now we have to search for the end of the symbolic name, i.e.,
659	the closing '>'. */
660	startidx = bufact;
661	while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
662	{
663	if (ch == lr->escape_char)
664	{
665	ch = lr_getc (lr);
666	if (ch == '\n' \|\| ch == EOF)
667	break;
668	}
669	ADDC (ch);
670	}
671	if (ch == '\n' \|\| ch == EOF)
672	/* Not a correct string. */
673	break;
674	if (bufact == startidx)
675	{
676	/* <> is no correct name. Ignore it and also signal an
677	error. */
19bc17a9	678	illegal_string = 1;
4b10dd6c UD	679	continue;
4b10dd6c UD	680	}
19bc17a9	681
4b10dd6c UD	682	/* It might be a Uxxxx symbol. */
	683	if (buf[startidx] == 'U'
	684	&& (bufact - startidx == 5 \|\| bufact - startidx == 9))
	685	{
	686	char *cp = buf + startidx + 1;
	687	while (cp < &buf[bufact] && isxdigit (*cp))
	688	++cp;
	689
	690	if (cp == &buf[bufact])
	691	{
3c833378	692	char utmp[10];
4b10dd6c UD	693
	694	/* Yes, it is. */
	695	ADDC ('\0');
	696	wch = strtoul (buf + startidx + 1, NULL, 16);
	697
	698	/* Now forget about the name we just added. */
	699	bufact = startidx;
	700
	701	if (return_widestr)
	702	ADDWC (wch);
	703
3c833378 UD	704	/* See whether the charmap contains the Uxxxxxxxx names. */
	705	snprintf (utmp, sizeof (utmp), "U%08X", wch);
	706	seq = charmap_find_value (charmap, utmp, 9);
4b10dd6c	707
3c833378	708	if (seq == NULL)
4b10dd6c	709	{
3c833378 UD	710	/* No, this isn't the case. Now determine from
	711	the repertoire the name of the character and
	712	find it in the charmap. */
	713	if (repertoire != NULL)
3c833378	714	{
47e8b443	715	const char *symbol;
3c833378	716
47e8b443 UD	717	symbol = repertoire_find_symbol (repertoire, wch);
	718
	719	if (symbol != NULL)
	720	seq = charmap_find_value (charmap, symbol,
	721	strlen (symbol));
	722	}
	723
	724	if (seq == NULL)
	725	{
	726	#ifndef NO_TRANSLITERATION
	727	/* Transliterate if possible. */
	728	if (locale != NULL)
	729	{
	730	uint32_t *translit;
	731
	732	if ((locale->avail & CTYPE_LOCALE) == 0)
	733	{
	734	/* Load the CTYPE data now. */
	735	int old_needed = locale->needed;
	736
	737	locale->needed = 0;
69f6a804	738	locale = load_locale (LC_CTYPE,
47e8b443 UD	739	locale->name,
	740	locale->repertoire_name,
	741	charmap, locale);
	742	locale->needed = old_needed;
	743	}
	744
	745	if ((locale->avail & CTYPE_LOCALE) != 0
	746	&& ((translit = find_translit (locale,
	747	charmap, wch))
	748	!= NULL))
	749	/* The CTYPE data contains a matching
	750	transliteration. */
	751	{
	752	int i;
	753
	754	for (i = 0; translit[i] != 0; ++i)
	755	{
	756	char utmp[10];
	757
	758	snprintf (utmp, sizeof (utmp), "U%08X",
	759	translit[i]);
	760	seq = charmap_find_value (charmap, utmp,
	761	9);
	762	assert (seq != NULL);
	763	ADDS (seq->bytes, seq->nbytes);
	764	}
	765
	766	continue;
	767	}
	768	}
	769	#endif /* NO_TRANSLITERATION */
	770
	771	/* Not a known name. */
	772	illegal_string = 1;
3c833378	773	}
4b10dd6c UD	774	}
4b10dd6c UD	775
3c833378 UD	776	if (seq != NULL)
	777	ADDS (seq->bytes, seq->nbytes);
	778
4b10dd6c UD	779	continue;
	780	}
	781	}
	782
3c833378 UD	783	/* We now have the symbolic name in buf[startidx] to
	784	buf[bufact-1]. Now find out the value for this character
	785	in the charmap as well as in the repertoire map (in this
	786	order). */
	787	seq = charmap_find_value (charmap, &buf[startidx],
	788	bufact - startidx);
	789
	790	if (seq == NULL)
	791	{
	792	/* This name is not in the charmap. */
	793	lr_error (lr, _("symbol `%.*s' not in charmap"),
	794	(int) (bufact - startidx), &buf[startidx]);
	795	illegal_string = 1;
	796	}
	797
4b10dd6c UD	798	if (return_widestr)
4b10dd6c UD	799	{
3c833378 UD	800	/* Now the same for the multibyte representation. */
	801	if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
	802	wch = seq->ucs4;
	803	else
	804	{
	805	wch = repertoire_find_value (repertoire, &buf[startidx],
	806	bufact - startidx);
	807	if (seq != NULL)
	808	seq->ucs4 = wch;
	809	}
	810
4b10dd6c UD	811	if (wch == ILLEGAL_CHAR_VALUE)
	812	{
	813	/* This name is not in the repertoire map. */
	814	lr_error (lr, _("symbol `%.*s' not in repertoire map"),
70e51ab9	815	(int) (bufact - startidx), &buf[startidx]);
4b10dd6c UD	816	illegal_string = 1;
	817	}
	818	else
	819	ADDWC (wch);
	820	}
	821
3c833378 UD	822	/* Now forget about the name we just added. */
3c833378 UD	823	bufact = startidx;
19bc17a9	824
3c833378 UD	825	/* And copy the bytes. */
	826	if (seq != NULL)
	827	ADDS (seq->bytes, seq->nbytes);
4b10dd6c	828	}
19bc17a9	829
4b10dd6c UD	830	if (ch == '\n' \|\| ch == EOF)
	831	{
	832	lr_error (lr, _("unterminated string"));
	833	illegal_string = 1;
	834	}
19bc17a9	835
4b10dd6c UD	836	if (illegal_string)
	837	{
	838	free (buf);
	839	if (buf2 != NULL)
	840	free (buf2);
	841	lr->token.val.str.startmb = NULL;
	842	lr->token.val.str.lenmb = 0;
d5fd1f3f UD	843	lr->token.val.str.startwc = NULL;
d5fd1f3f UD	844	lr->token.val.str.lenwc = 0;
19bc17a9	845
4b10dd6c UD	846	return &lr->token;
4b10dd6c UD	847	}
19bc17a9	848
4b10dd6c	849	ADDC ('\0');
19bc17a9	850
4b10dd6c UD	851	if (return_widestr)
	852	{
	853	ADDWC (0);
	854	lr->token.val.str.startwc = xrealloc (buf2,
	855	buf2act * sizeof (uint32_t));
	856	lr->token.val.str.lenwc = buf2act;
	857	}
19bc17a9 RM	858	}
19bc17a9 RM	859
4b10dd6c UD	860	lr->token.val.str.startmb = xrealloc (buf, bufact);
	861	lr->token.val.str.lenmb = bufact;
	862
19bc17a9 RM	863	return &lr->token;
19bc17a9 RM	864	}