]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/linereader.c
Update.
[thirdparty/glibc.git] / locale / programs / linereader.c
CommitLineData
f2b98f97 1/* Copyright (C) 1996-2001, 2002 Free Software Foundation, Inc.
5290baf0 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
19bc17a9 4
5290baf0 5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
19bc17a9 9
5290baf0
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
19bc17a9 14
41bdb6e2
AJ
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
47e8b443 24#include <assert.h>
19bc17a9
RM
25#include <ctype.h>
26#include <errno.h>
27#include <libintl.h>
28#include <stdarg.h>
29#include <stdlib.h>
30#include <string.h>
31
f2b98f97 32#include "localedef.h"
4b10dd6c 33#include "charmap.h"
19bc17a9
RM
34#include "error.h"
35#include "linereader.h"
47e8b443 36#include "locfile.h"
93693c4d 37
4b10dd6c 38/* Prototypes for local functions. */
19bc17a9
RM
39static struct token *get_toplvl_escape (struct linereader *lr);
40static struct token *get_symname (struct linereader *lr);
41static struct token *get_ident (struct linereader *lr);
42static struct token *get_string (struct linereader *lr,
4b10dd6c 43 const struct charmap_t *charmap,
47e8b443 44 struct localedef_t *locale,
93693c4d
UD
45 const struct repertoire_t *repertoire,
46 int verbose);
19bc17a9
RM
47
48
49struct linereader *
50lr_open (const char *fname, kw_hash_fct_t hf)
51{
52 FILE *fp;
19bc17a9
RM
53
54 if (fname == NULL || strcmp (fname, "-") == 0
55 || strcmp (fname, "/dev/stdin") == 0)
3e076219 56 return lr_create (stdin, "<stdin>", hf);
19bc17a9
RM
57 else
58 {
2e2dc1a5 59 fp = fopen (fname, "rm");
19bc17a9
RM
60 if (fp == NULL)
61 return NULL;
3e076219 62 return lr_create (fp, fname, hf);
19bc17a9 63 }
3e076219
UD
64}
65
66struct linereader *
67lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
68{
69 struct linereader *result;
70 int n;
19bc17a9
RM
71
72 result = (struct linereader *) xmalloc (sizeof (*result));
73
74 result->fp = fp;
3e076219 75 result->fname = xstrdup (fname);
19bc17a9
RM
76 result->buf = NULL;
77 result->bufsize = 0;
78 result->lineno = 1;
79 result->idx = 0;
80 result->comment_char = '#';
81 result->escape_char = '\\';
82 result->translate_strings = 1;
83
84 n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
85 if (n < 0)
86 {
87 int save = errno;
88 fclose (result->fp);
46ec036d 89 free ((char *) result->fname);
19bc17a9
RM
90 free (result);
91 errno = save;
92 return NULL;
93 }
94
95 if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
96 n -= 2;
97
98 result->buf[n] = '\0';
99 result->bufact = n;
100 result->hash_fct = hf;
101
102 return result;
103}
104
105
106int
107lr_eof (struct linereader *lr)
108{
109 return lr->bufact = 0;
110}
111
112
113void
114lr_close (struct linereader *lr)
115{
116 fclose (lr->fp);
117 free (lr->buf);
118 free (lr);
119}
120
121
122int
123lr_next (struct linereader *lr)
124{
125 int n;
126
127 n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
128 if (n < 0)
129 return -1;
130
131 ++lr->lineno;
132
133 if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
134 {
4b10dd6c
UD
135#if 0
136 /* XXX Is this correct? */
19bc17a9
RM
137 /* An escaped newline character is substituted with a single <SP>. */
138 --n;
139 lr->buf[n - 1] = ' ';
4b10dd6c
UD
140#else
141 n -= 2;
142#endif
19bc17a9
RM
143 }
144
145 lr->buf[n] = '\0';
146 lr->bufact = n;
147 lr->idx = 0;
148
149 return 0;
150}
151
152
153/* Defined in error.c. */
154/* This variable is incremented each time `error' is called. */
155extern unsigned int error_message_count;
156
157/* The calling program should define program_name and set it to the
158 name of the executing program. */
159extern char *program_name;
160
161
162struct token *
4b10dd6c 163lr_token (struct linereader *lr, const struct charmap_t *charmap,
47e8b443
UD
164 struct localedef_t *locale, const struct repertoire_t *repertoire,
165 int verbose)
19bc17a9
RM
166{
167 int ch;
168
169 while (1)
170 {
171 do
172 {
173 ch = lr_getc (lr);
174
76fbcfdd
UD
175 if (ch == EOF)
176 {
177 lr->token.tok = tok_eof;
178 return &lr->token;
179 };
180
19bc17a9
RM
181 if (ch == '\n')
182 {
183 lr->token.tok = tok_eol;
184 return &lr->token;
185 }
186 }
187 while (isspace (ch));
188
189 if (ch == EOF)
190 {
191 lr->token.tok = tok_eof;
192 return &lr->token;
193 };
194
195 if (ch != lr->comment_char)
196 break;
197
a0dc5206
UD
198 /* Is there an newline at the end of the buffer? */
199 if (lr->buf[lr->bufact - 1] != '\n')
200 {
201 /* No. Some people want this to mean that only the line in
202 the file not the logical, concatenated line is ignored.
203 Let's try this. */
204 lr->idx = lr->bufact;
205 continue;
206 }
207
19bc17a9
RM
208 /* Ignore rest of line. */
209 lr_ignore_rest (lr, 0);
210 lr->token.tok = tok_eol;
211 return &lr->token;
212 }
213
214 /* Match escape sequences. */
215 if (ch == lr->escape_char)
216 return get_toplvl_escape (lr);
217
218 /* Match ellipsis. */
4b10dd6c 219 if (ch == '.')
19bc17a9 220 {
a0dc5206
UD
221 if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
222 {
223 int cnt;
224 for (cnt = 0; cnt < 10; ++cnt)
225 lr_getc (lr);
226 lr->token.tok = tok_ellipsis4_2;
227 return &lr->token;
228 }
4b10dd6c
UD
229 if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
230 {
231 lr_getc (lr);
232 lr_getc (lr);
233 lr_getc (lr);
234 lr->token.tok = tok_ellipsis4;
235 return &lr->token;
236 }
237 if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
238 {
239 lr_getc (lr);
240 lr_getc (lr);
241 lr->token.tok = tok_ellipsis3;
242 return &lr->token;
243 }
a0dc5206
UD
244 if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
245 {
246 int cnt;
247 for (cnt = 0; cnt < 6; ++cnt)
248 lr_getc (lr);
249 lr->token.tok = tok_ellipsis2_2;
250 return &lr->token;
251 }
4b10dd6c
UD
252 if (lr->buf[lr->idx] == '.')
253 {
254 lr_getc (lr);
255 lr->token.tok = tok_ellipsis2;
256 return &lr->token;
257 }
19bc17a9
RM
258 }
259
260 switch (ch)
261 {
262 case '<':
263 return get_symname (lr);
264
265 case '0' ... '9':
266 lr->token.tok = tok_number;
267 lr->token.val.num = ch - '0';
268
269 while (isdigit (ch = lr_getc (lr)))
270 {
271 lr->token.val.num *= 10;
272 lr->token.val.num += ch - '0';
273 }
274 if (isalpha (ch))
5290baf0 275 lr_error (lr, _("garbage at end of number"));
19bc17a9
RM
276 lr_ungetn (lr, 1);
277
278 return &lr->token;
279
280 case ';':
281 lr->token.tok = tok_semicolon;
282 return &lr->token;
283
284 case ',':
285 lr->token.tok = tok_comma;
286 return &lr->token;
287
288 case '(':
289 lr->token.tok = tok_open_brace;
290 return &lr->token;
291
292 case ')':
293 lr->token.tok = tok_close_brace;
294 return &lr->token;
295
296 case '"':
47e8b443 297 return get_string (lr, charmap, locale, repertoire, verbose);
19bc17a9
RM
298
299 case '-':
300 ch = lr_getc (lr);
301 if (ch == '1')
302 {
303 lr->token.tok = tok_minus1;
304 return &lr->token;
305 }
306 lr_ungetn (lr, 2);
307 break;
308 }
309
310 return get_ident (lr);
311}
312
313
314static struct token *
315get_toplvl_escape (struct linereader *lr)
316{
317 /* This is supposed to be a numeric value. We return the
318 numerical value and the number of bytes. */
319 size_t start_idx = lr->idx - 1;
4b10dd6c 320 char *bytes = lr->token.val.charcode.bytes;
19bc17a9
RM
321 int nbytes = 0;
322 int ch;
323
324 do
325 {
326 unsigned int byte = 0;
327 unsigned int base = 8;
328
329 ch = lr_getc (lr);
330
331 if (ch == 'd')
332 {
333 base = 10;
334 ch = lr_getc (lr);
335 }
336 else if (ch == 'x')
337 {
338 base = 16;
339 ch = lr_getc (lr);
340 }
341
342 if ((base == 16 && !isxdigit (ch))
ba1ffaa1 343 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
19bc17a9
RM
344 {
345 esc_error:
4b10dd6c 346 lr->token.val.str.startmb = &lr->buf[start_idx];
19bc17a9 347
76fbcfdd 348 while (ch != EOF && !isspace (ch))
19bc17a9 349 ch = lr_getc (lr);
4b10dd6c 350 lr->token.val.str.lenmb = lr->idx - start_idx;
19bc17a9
RM
351
352 lr->token.tok = tok_error;
353 return &lr->token;
354 }
355
356 if (isdigit (ch))
357 byte = ch - '0';
358 else
4b10dd6c 359 byte = tolower (ch) - 'a' + 10;
19bc17a9
RM
360
361 ch = lr_getc (lr);
362 if ((base == 16 && !isxdigit (ch))
ba1ffaa1 363 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
19bc17a9
RM
364 goto esc_error;
365
366 byte *= base;
367 if (isdigit (ch))
368 byte += ch - '0';
369 else
4b10dd6c 370 byte += tolower (ch) - 'a' + 10;
19bc17a9
RM
371
372 ch = lr_getc (lr);
373 if (base != 16 && isdigit (ch))
374 {
375 byte *= base;
679f5a56 376 byte += ch - '0';
19bc17a9
RM
377
378 ch = lr_getc (lr);
379 }
380
4b10dd6c 381 bytes[nbytes++] = byte;
19bc17a9 382 }
c50ec4e0 383 while (ch == lr->escape_char
6dd67bd5 384 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
19bc17a9
RM
385
386 if (!isspace (ch))
387 lr_error (lr, _("garbage at end of character code specification"));
388
389 lr_ungetn (lr, 1);
390
391 lr->token.tok = tok_charcode;
19bc17a9
RM
392 lr->token.val.charcode.nbytes = nbytes;
393
394 return &lr->token;
395}
396
397
4b10dd6c
UD
398#define ADDC(ch) \
399 do \
400 { \
401 if (bufact == bufmax) \
402 { \
403 bufmax *= 2; \
404 buf = xrealloc (buf, bufmax); \
405 } \
406 buf[bufact++] = (ch); \
407 } \
408 while (0)
409
410
411#define ADDS(s, l) \
412 do \
413 { \
414 size_t _l = (l); \
415 if (bufact + _l > bufmax) \
416 { \
417 if (bufact < _l) \
418 bufact = _l; \
419 bufmax *= 2; \
420 buf = xrealloc (buf, bufmax); \
421 } \
422 memcpy (&buf[bufact], s, _l); \
423 bufact += _l; \
424 } \
425 while (0)
426
427
428#define ADDWC(ch) \
429 do \
430 { \
431 if (buf2act == buf2max) \
432 { \
433 buf2max *= 2; \
434 buf2 = xrealloc (buf2, buf2max * 4); \
435 } \
436 buf2[buf2act++] = (ch); \
437 } \
19bc17a9
RM
438 while (0)
439
440
441static struct token *
442get_symname (struct linereader *lr)
443{
444 /* Symbol in brackets. We must distinguish three kinds:
445 1. reserved words
446 2. ISO 10646 position values
447 3. all other. */
448 char *buf;
449 size_t bufact = 0;
450 size_t bufmax = 56;
451 const struct keyword_t *kw;
452 int ch;
453
454 buf = (char *) xmalloc (bufmax);
455
456 do
457 {
458 ch = lr_getc (lr);
459 if (ch == lr->escape_char)
460 {
461 int c2 = lr_getc (lr);
462 ADDC (c2);
463
464 if (c2 == '\n')
465 ch = '\n';
466 }
467 else
468 ADDC (ch);
469 }
470 while (ch != '>' && ch != '\n');
471
472 if (ch == '\n')
473 lr_error (lr, _("unterminated symbolic name"));
474
475 /* Test for ISO 10646 position value. */
476 if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
477 {
478 char *cp = buf + 1;
479 while (cp < &buf[bufact - 1] && isxdigit (*cp))
480 ++cp;
481
482 if (cp == &buf[bufact - 1])
483 {
484 /* Yes, it is. */
4b10dd6c
UD
485 lr->token.tok = tok_ucs4;
486 lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
19bc17a9
RM
487
488 return &lr->token;
489 }
490 }
491
492 /* It is a symbolic name. Test for reserved words. */
493 kw = lr->hash_fct (buf, bufact - 1);
494
495 if (kw != NULL && kw->symname_or_ident == 1)
496 {
497 lr->token.tok = kw->token;
498 free (buf);
499 }
500 else
501 {
502 lr->token.tok = tok_bsymbol;
503
504 buf[bufact] = '\0';
505 buf = xrealloc (buf, bufact + 1);
506
4b10dd6c
UD
507 lr->token.val.str.startmb = buf;
508 lr->token.val.str.lenmb = bufact - 1;
19bc17a9
RM
509 }
510
511 return &lr->token;
512}
513
514
515static struct token *
516get_ident (struct linereader *lr)
517{
518 char *buf;
519 size_t bufact;
520 size_t bufmax = 56;
521 const struct keyword_t *kw;
522 int ch;
523
524 buf = xmalloc (bufmax);
525 bufact = 0;
526
527 ADDC (lr->buf[lr->idx - 1]);
528
529 while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
f126ef67 530 && ch != '<' && ch != ',' && ch != EOF)
4b10dd6c
UD
531 {
532 if (ch == lr->escape_char)
533 {
534 ch = lr_getc (lr);
535 if (ch == '\n' || ch == EOF)
536 {
537 lr_error (lr, _("invalid escape sequence"));
538 break;
539 }
540 }
541 ADDC (ch);
542 }
19bc17a9 543
f126ef67 544 lr_ungetc (lr, ch);
19bc17a9
RM
545
546 kw = lr->hash_fct (buf, bufact);
547
548 if (kw != NULL && kw->symname_or_ident == 0)
549 {
550 lr->token.tok = kw->token;
551 free (buf);
552 }
553 else
554 {
555 lr->token.tok = tok_ident;
556
557 buf[bufact] = '\0';
558 buf = xrealloc (buf, bufact + 1);
559
4b10dd6c
UD
560 lr->token.val.str.startmb = buf;
561 lr->token.val.str.lenmb = bufact;
19bc17a9
RM
562 }
563
564 return &lr->token;
565}
566
567
568static struct token *
4b10dd6c 569get_string (struct linereader *lr, const struct charmap_t *charmap,
47e8b443
UD
570 struct localedef_t *locale, const struct repertoire_t *repertoire,
571 int verbose)
19bc17a9 572{
4b10dd6c
UD
573 int return_widestr = lr->return_widestr;
574 char *buf;
a9c27b3e 575 wchar_t *buf2 = NULL;
19bc17a9
RM
576 size_t bufact;
577 size_t bufmax = 56;
19bc17a9 578
4b10dd6c 579 /* We must return two different strings. */
19bc17a9
RM
580 buf = xmalloc (bufmax);
581 bufact = 0;
582
4b10dd6c
UD
583 /* We know it'll be a string. */
584 lr->token.tok = tok_string;
585
586 /* If we need not translate the strings (i.e., expand <...> parts)
587 we can run a simple loop. */
588 if (!lr->translate_strings)
589 {
590 int ch;
591
592 buf2 = NULL;
593 while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
19bc17a9 594 ADDC (ch);
4b10dd6c
UD
595
596 /* Catch errors with trailing escape character. */
597 if (bufact > 0 && buf[bufact - 1] == lr->escape_char
598 && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
599 {
600 lr_error (lr, _("illegal escape sequence at end of string"));
601 --bufact;
602 }
603 else if (ch == '\n' || ch == EOF)
604 lr_error (lr, _("unterminated string"));
605
606 ADDC ('\0');
607 }
608 else
609 {
610 int illegal_string = 0;
611 size_t buf2act = 0;
612 size_t buf2max = 56 * sizeof (uint32_t);
613 int ch;
614 int warned = 0;
615
616 /* We have to provide the wide character result as well. */
617 if (return_widestr)
618 buf2 = xmalloc (buf2max);
619
620 /* Read until the end of the string (or end of the line or file). */
621 while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
622 {
623 size_t startidx;
624 uint32_t wch;
625 struct charseq *seq;
626
627 if (ch != '<')
628 {
629 /* The standards leave it up to the implementation to decide
630 what to do with character which stand for themself. We
631 could jump through hoops to find out the value relative to
632 the charmap and the repertoire map, but instead we leave
633 it up to the locale definition author to write a better
634 definition. We assume here that every character which
635 stands for itself is encoded using ISO 8859-1. Using the
636 escape character is allowed. */
637 if (ch == lr->escape_char)
638 {
639 ch = lr_getc (lr);
640 if (ch == '\n' || ch == EOF)
641 break;
642 }
643
644 if (verbose && !warned)
645 {
646 lr_error (lr, _("\
647non-symbolic character value should not be used"));
648 warned = 1;
649 }
650
651 ADDC (ch);
652 if (return_widestr)
653 ADDWC ((uint32_t) ch);
654
655 continue;
656 }
657
658 /* Now we have to search for the end of the symbolic name, i.e.,
659 the closing '>'. */
660 startidx = bufact;
661 while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
662 {
663 if (ch == lr->escape_char)
664 {
665 ch = lr_getc (lr);
666 if (ch == '\n' || ch == EOF)
667 break;
668 }
669 ADDC (ch);
670 }
671 if (ch == '\n' || ch == EOF)
672 /* Not a correct string. */
673 break;
674 if (bufact == startidx)
675 {
676 /* <> is no correct name. Ignore it and also signal an
677 error. */
19bc17a9 678 illegal_string = 1;
4b10dd6c
UD
679 continue;
680 }
19bc17a9 681
4b10dd6c
UD
682 /* It might be a Uxxxx symbol. */
683 if (buf[startidx] == 'U'
684 && (bufact - startidx == 5 || bufact - startidx == 9))
685 {
686 char *cp = buf + startidx + 1;
687 while (cp < &buf[bufact] && isxdigit (*cp))
688 ++cp;
689
690 if (cp == &buf[bufact])
691 {
3c833378 692 char utmp[10];
4b10dd6c
UD
693
694 /* Yes, it is. */
695 ADDC ('\0');
696 wch = strtoul (buf + startidx + 1, NULL, 16);
697
698 /* Now forget about the name we just added. */
699 bufact = startidx;
700
701 if (return_widestr)
702 ADDWC (wch);
703
3c833378
UD
704 /* See whether the charmap contains the Uxxxxxxxx names. */
705 snprintf (utmp, sizeof (utmp), "U%08X", wch);
706 seq = charmap_find_value (charmap, utmp, 9);
4b10dd6c 707
3c833378 708 if (seq == NULL)
4b10dd6c 709 {
3c833378
UD
710 /* No, this isn't the case. Now determine from
711 the repertoire the name of the character and
712 find it in the charmap. */
713 if (repertoire != NULL)
3c833378 714 {
47e8b443 715 const char *symbol;
3c833378 716
47e8b443
UD
717 symbol = repertoire_find_symbol (repertoire, wch);
718
719 if (symbol != NULL)
720 seq = charmap_find_value (charmap, symbol,
721 strlen (symbol));
722 }
723
724 if (seq == NULL)
725 {
726#ifndef NO_TRANSLITERATION
727 /* Transliterate if possible. */
728 if (locale != NULL)
729 {
730 uint32_t *translit;
731
732 if ((locale->avail & CTYPE_LOCALE) == 0)
733 {
734 /* Load the CTYPE data now. */
735 int old_needed = locale->needed;
736
737 locale->needed = 0;
69f6a804 738 locale = load_locale (LC_CTYPE,
47e8b443
UD
739 locale->name,
740 locale->repertoire_name,
741 charmap, locale);
742 locale->needed = old_needed;
743 }
744
745 if ((locale->avail & CTYPE_LOCALE) != 0
746 && ((translit = find_translit (locale,
747 charmap, wch))
748 != NULL))
749 /* The CTYPE data contains a matching
750 transliteration. */
751 {
752 int i;
753
754 for (i = 0; translit[i] != 0; ++i)
755 {
756 char utmp[10];
757
758 snprintf (utmp, sizeof (utmp), "U%08X",
759 translit[i]);
760 seq = charmap_find_value (charmap, utmp,
761 9);
762 assert (seq != NULL);
763 ADDS (seq->bytes, seq->nbytes);
764 }
765
766 continue;
767 }
768 }
769#endif /* NO_TRANSLITERATION */
770
771 /* Not a known name. */
772 illegal_string = 1;
3c833378 773 }
4b10dd6c
UD
774 }
775
3c833378
UD
776 if (seq != NULL)
777 ADDS (seq->bytes, seq->nbytes);
778
4b10dd6c
UD
779 continue;
780 }
781 }
782
3c833378
UD
783 /* We now have the symbolic name in buf[startidx] to
784 buf[bufact-1]. Now find out the value for this character
785 in the charmap as well as in the repertoire map (in this
786 order). */
787 seq = charmap_find_value (charmap, &buf[startidx],
788 bufact - startidx);
789
790 if (seq == NULL)
791 {
792 /* This name is not in the charmap. */
793 lr_error (lr, _("symbol `%.*s' not in charmap"),
794 (int) (bufact - startidx), &buf[startidx]);
795 illegal_string = 1;
796 }
797
4b10dd6c
UD
798 if (return_widestr)
799 {
3c833378
UD
800 /* Now the same for the multibyte representation. */
801 if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
802 wch = seq->ucs4;
803 else
804 {
805 wch = repertoire_find_value (repertoire, &buf[startidx],
806 bufact - startidx);
807 if (seq != NULL)
808 seq->ucs4 = wch;
809 }
810
4b10dd6c
UD
811 if (wch == ILLEGAL_CHAR_VALUE)
812 {
813 /* This name is not in the repertoire map. */
814 lr_error (lr, _("symbol `%.*s' not in repertoire map"),
70e51ab9 815 (int) (bufact - startidx), &buf[startidx]);
4b10dd6c
UD
816 illegal_string = 1;
817 }
818 else
819 ADDWC (wch);
820 }
821
3c833378
UD
822 /* Now forget about the name we just added. */
823 bufact = startidx;
19bc17a9 824
3c833378
UD
825 /* And copy the bytes. */
826 if (seq != NULL)
827 ADDS (seq->bytes, seq->nbytes);
4b10dd6c 828 }
19bc17a9 829
4b10dd6c
UD
830 if (ch == '\n' || ch == EOF)
831 {
832 lr_error (lr, _("unterminated string"));
833 illegal_string = 1;
834 }
19bc17a9 835
4b10dd6c
UD
836 if (illegal_string)
837 {
838 free (buf);
839 if (buf2 != NULL)
840 free (buf2);
841 lr->token.val.str.startmb = NULL;
842 lr->token.val.str.lenmb = 0;
d5fd1f3f
UD
843 lr->token.val.str.startwc = NULL;
844 lr->token.val.str.lenwc = 0;
19bc17a9 845
4b10dd6c
UD
846 return &lr->token;
847 }
19bc17a9 848
4b10dd6c 849 ADDC ('\0');
19bc17a9 850
4b10dd6c
UD
851 if (return_widestr)
852 {
853 ADDWC (0);
854 lr->token.val.str.startwc = xrealloc (buf2,
855 buf2act * sizeof (uint32_t));
856 lr->token.val.str.lenwc = buf2act;
857 }
19bc17a9
RM
858 }
859
4b10dd6c
UD
860 lr->token.val.str.startmb = xrealloc (buf, bufact);
861 lr->token.val.str.lenmb = bufact;
862
19bc17a9
RM
863 return &lr->token;
864}