]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/linereader.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / locale / programs / linereader.c
CommitLineData
f7a9f785 1/* Copyright (C) 1996-2016 Free Software Foundation, Inc.
5290baf0 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
19bc17a9 4
43bc8ac6 5 This program is free software; you can redistribute it and/or modify
2e2efe65
RM
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
19bc17a9 9
43bc8ac6 10 This program is distributed in the hope that it will be useful,
5290baf0 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
19bc17a9 14
43bc8ac6 15 You should have received a copy of the GNU General Public License
59ba27a6 16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19bc17a9
RM
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
47e8b443 22#include <assert.h>
19bc17a9
RM
23#include <ctype.h>
24#include <errno.h>
25#include <libintl.h>
26#include <stdarg.h>
27#include <stdlib.h>
28#include <string.h>
e054f494 29#include <stdint.h>
19bc17a9 30
f2b98f97 31#include "localedef.h"
4b10dd6c 32#include "charmap.h"
19bc17a9
RM
33#include "error.h"
34#include "linereader.h"
47e8b443 35#include "locfile.h"
93693c4d 36
4b10dd6c 37/* Prototypes for local functions. */
19bc17a9
RM
38static struct token *get_toplvl_escape (struct linereader *lr);
39static struct token *get_symname (struct linereader *lr);
40static struct token *get_ident (struct linereader *lr);
41static struct token *get_string (struct linereader *lr,
4b10dd6c 42 const struct charmap_t *charmap,
47e8b443 43 struct localedef_t *locale,
93693c4d
UD
44 const struct repertoire_t *repertoire,
45 int verbose);
19bc17a9
RM
46
47
48struct linereader *
49lr_open (const char *fname, kw_hash_fct_t hf)
50{
51 FILE *fp;
19bc17a9
RM
52
53 if (fname == NULL || strcmp (fname, "-") == 0
54 || strcmp (fname, "/dev/stdin") == 0)
3e076219 55 return lr_create (stdin, "<stdin>", hf);
19bc17a9
RM
56 else
57 {
2e2dc1a5 58 fp = fopen (fname, "rm");
19bc17a9
RM
59 if (fp == NULL)
60 return NULL;
3e076219 61 return lr_create (fp, fname, hf);
19bc17a9 62 }
3e076219
UD
63}
64
65struct linereader *
66lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
67{
68 struct linereader *result;
69 int n;
19bc17a9
RM
70
71 result = (struct linereader *) xmalloc (sizeof (*result));
72
73 result->fp = fp;
3e076219 74 result->fname = xstrdup (fname);
19bc17a9
RM
75 result->buf = NULL;
76 result->bufsize = 0;
77 result->lineno = 1;
78 result->idx = 0;
79 result->comment_char = '#';
80 result->escape_char = '\\';
81 result->translate_strings = 1;
7c11c4a1 82 result->return_widestr = 0;
19bc17a9
RM
83
84 n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
85 if (n < 0)
86 {
87 int save = errno;
88 fclose (result->fp);
46ec036d 89 free ((char *) result->fname);
19bc17a9
RM
90 free (result);
91 errno = save;
92 return NULL;
93 }
94
95 if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
96 n -= 2;
97
98 result->buf[n] = '\0';
99 result->bufact = n;
100 result->hash_fct = hf;
101
102 return result;
103}
104
105
106int
107lr_eof (struct linereader *lr)
108{
109 return lr->bufact = 0;
110}
111
112
dd9423a6
UD
113void
114lr_ignore_rest (struct linereader *lr, int verbose)
115{
116 if (verbose)
117 {
118 while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
119 && lr->buf[lr->idx] != lr->comment_char)
120 if (lr->buf[lr->idx] == '\0')
121 {
122 if (lr_next (lr) < 0)
123 return;
124 }
125 else
126 ++lr->idx;
127
128 if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
129 && lr->buf[lr->idx] != lr->comment_char)
130 lr_error (lr, _("trailing garbage at end of line"));
131 }
132
133 /* Ignore continued line. */
134 while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
135 if (lr_next (lr) < 0)
136 break;
137
138 lr->idx = lr->bufact;
139}
140
141
19bc17a9
RM
142void
143lr_close (struct linereader *lr)
144{
145 fclose (lr->fp);
146 free (lr->buf);
147 free (lr);
148}
149
150
151int
152lr_next (struct linereader *lr)
153{
154 int n;
155
156 n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
157 if (n < 0)
158 return -1;
159
160 ++lr->lineno;
161
162 if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
163 {
4b10dd6c
UD
164#if 0
165 /* XXX Is this correct? */
19bc17a9
RM
166 /* An escaped newline character is substituted with a single <SP>. */
167 --n;
168 lr->buf[n - 1] = ' ';
4b10dd6c
UD
169#else
170 n -= 2;
171#endif
19bc17a9
RM
172 }
173
174 lr->buf[n] = '\0';
175 lr->bufact = n;
176 lr->idx = 0;
177
178 return 0;
179}
180
181
182/* Defined in error.c. */
183/* This variable is incremented each time `error' is called. */
184extern unsigned int error_message_count;
185
186/* The calling program should define program_name and set it to the
187 name of the executing program. */
188extern char *program_name;
189
190
191struct token *
4b10dd6c 192lr_token (struct linereader *lr, const struct charmap_t *charmap,
47e8b443
UD
193 struct localedef_t *locale, const struct repertoire_t *repertoire,
194 int verbose)
19bc17a9
RM
195{
196 int ch;
197
198 while (1)
199 {
200 do
201 {
202 ch = lr_getc (lr);
203
76fbcfdd
UD
204 if (ch == EOF)
205 {
206 lr->token.tok = tok_eof;
207 return &lr->token;
208 };
209
19bc17a9
RM
210 if (ch == '\n')
211 {
212 lr->token.tok = tok_eol;
213 return &lr->token;
214 }
215 }
216 while (isspace (ch));
217
19bc17a9
RM
218 if (ch != lr->comment_char)
219 break;
220
a0dc5206
UD
221 /* Is there an newline at the end of the buffer? */
222 if (lr->buf[lr->bufact - 1] != '\n')
223 {
224 /* No. Some people want this to mean that only the line in
225 the file not the logical, concatenated line is ignored.
226 Let's try this. */
227 lr->idx = lr->bufact;
228 continue;
229 }
230
19bc17a9
RM
231 /* Ignore rest of line. */
232 lr_ignore_rest (lr, 0);
233 lr->token.tok = tok_eol;
234 return &lr->token;
235 }
236
237 /* Match escape sequences. */
238 if (ch == lr->escape_char)
239 return get_toplvl_escape (lr);
240
241 /* Match ellipsis. */
4b10dd6c 242 if (ch == '.')
19bc17a9 243 {
a0dc5206
UD
244 if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
245 {
246 int cnt;
247 for (cnt = 0; cnt < 10; ++cnt)
248 lr_getc (lr);
249 lr->token.tok = tok_ellipsis4_2;
250 return &lr->token;
251 }
4b10dd6c
UD
252 if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
253 {
254 lr_getc (lr);
255 lr_getc (lr);
256 lr_getc (lr);
257 lr->token.tok = tok_ellipsis4;
258 return &lr->token;
259 }
260 if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
261 {
262 lr_getc (lr);
263 lr_getc (lr);
264 lr->token.tok = tok_ellipsis3;
265 return &lr->token;
266 }
a0dc5206
UD
267 if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
268 {
269 int cnt;
270 for (cnt = 0; cnt < 6; ++cnt)
271 lr_getc (lr);
272 lr->token.tok = tok_ellipsis2_2;
273 return &lr->token;
274 }
4b10dd6c
UD
275 if (lr->buf[lr->idx] == '.')
276 {
277 lr_getc (lr);
278 lr->token.tok = tok_ellipsis2;
279 return &lr->token;
280 }
19bc17a9
RM
281 }
282
283 switch (ch)
284 {
285 case '<':
286 return get_symname (lr);
287
288 case '0' ... '9':
289 lr->token.tok = tok_number;
290 lr->token.val.num = ch - '0';
291
292 while (isdigit (ch = lr_getc (lr)))
293 {
294 lr->token.val.num *= 10;
295 lr->token.val.num += ch - '0';
296 }
297 if (isalpha (ch))
5290baf0 298 lr_error (lr, _("garbage at end of number"));
19bc17a9
RM
299 lr_ungetn (lr, 1);
300
301 return &lr->token;
302
303 case ';':
304 lr->token.tok = tok_semicolon;
305 return &lr->token;
306
307 case ',':
308 lr->token.tok = tok_comma;
309 return &lr->token;
310
311 case '(':
312 lr->token.tok = tok_open_brace;
313 return &lr->token;
314
315 case ')':
316 lr->token.tok = tok_close_brace;
317 return &lr->token;
318
319 case '"':
47e8b443 320 return get_string (lr, charmap, locale, repertoire, verbose);
19bc17a9
RM
321
322 case '-':
323 ch = lr_getc (lr);
324 if (ch == '1')
325 {
326 lr->token.tok = tok_minus1;
327 return &lr->token;
328 }
329 lr_ungetn (lr, 2);
330 break;
331 }
332
333 return get_ident (lr);
334}
335
336
337static struct token *
338get_toplvl_escape (struct linereader *lr)
339{
340 /* This is supposed to be a numeric value. We return the
341 numerical value and the number of bytes. */
342 size_t start_idx = lr->idx - 1;
9cfe5381
RM
343 unsigned char *bytes = lr->token.val.charcode.bytes;
344 size_t nbytes = 0;
19bc17a9
RM
345 int ch;
346
347 do
348 {
349 unsigned int byte = 0;
350 unsigned int base = 8;
351
352 ch = lr_getc (lr);
353
354 if (ch == 'd')
355 {
356 base = 10;
357 ch = lr_getc (lr);
358 }
359 else if (ch == 'x')
360 {
361 base = 16;
362 ch = lr_getc (lr);
363 }
364
365 if ((base == 16 && !isxdigit (ch))
ba1ffaa1 366 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
19bc17a9
RM
367 {
368 esc_error:
4b10dd6c 369 lr->token.val.str.startmb = &lr->buf[start_idx];
19bc17a9 370
76fbcfdd 371 while (ch != EOF && !isspace (ch))
19bc17a9 372 ch = lr_getc (lr);
4b10dd6c 373 lr->token.val.str.lenmb = lr->idx - start_idx;
19bc17a9
RM
374
375 lr->token.tok = tok_error;
376 return &lr->token;
377 }
378
379 if (isdigit (ch))
380 byte = ch - '0';
381 else
4b10dd6c 382 byte = tolower (ch) - 'a' + 10;
19bc17a9
RM
383
384 ch = lr_getc (lr);
385 if ((base == 16 && !isxdigit (ch))
ba1ffaa1 386 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
19bc17a9
RM
387 goto esc_error;
388
389 byte *= base;
390 if (isdigit (ch))
391 byte += ch - '0';
392 else
4b10dd6c 393 byte += tolower (ch) - 'a' + 10;
19bc17a9
RM
394
395 ch = lr_getc (lr);
396 if (base != 16 && isdigit (ch))
397 {
398 byte *= base;
679f5a56 399 byte += ch - '0';
19bc17a9
RM
400
401 ch = lr_getc (lr);
402 }
403
4b10dd6c 404 bytes[nbytes++] = byte;
19bc17a9 405 }
c50ec4e0 406 while (ch == lr->escape_char
6dd67bd5 407 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
19bc17a9
RM
408
409 if (!isspace (ch))
410 lr_error (lr, _("garbage at end of character code specification"));
411
412 lr_ungetn (lr, 1);
413
414 lr->token.tok = tok_charcode;
19bc17a9
RM
415 lr->token.val.charcode.nbytes = nbytes;
416
417 return &lr->token;
418}
419
420
4b10dd6c
UD
421#define ADDC(ch) \
422 do \
423 { \
424 if (bufact == bufmax) \
425 { \
426 bufmax *= 2; \
427 buf = xrealloc (buf, bufmax); \
428 } \
429 buf[bufact++] = (ch); \
430 } \
431 while (0)
432
433
434#define ADDS(s, l) \
435 do \
436 { \
437 size_t _l = (l); \
438 if (bufact + _l > bufmax) \
439 { \
440 if (bufact < _l) \
441 bufact = _l; \
442 bufmax *= 2; \
443 buf = xrealloc (buf, bufmax); \
444 } \
445 memcpy (&buf[bufact], s, _l); \
446 bufact += _l; \
447 } \
448 while (0)
449
450
451#define ADDWC(ch) \
452 do \
453 { \
454 if (buf2act == buf2max) \
455 { \
456 buf2max *= 2; \
457 buf2 = xrealloc (buf2, buf2max * 4); \
458 } \
459 buf2[buf2act++] = (ch); \
460 } \
19bc17a9
RM
461 while (0)
462
463
464static struct token *
465get_symname (struct linereader *lr)
466{
467 /* Symbol in brackets. We must distinguish three kinds:
468 1. reserved words
469 2. ISO 10646 position values
470 3. all other. */
471 char *buf;
472 size_t bufact = 0;
473 size_t bufmax = 56;
474 const struct keyword_t *kw;
475 int ch;
476
477 buf = (char *) xmalloc (bufmax);
478
479 do
480 {
481 ch = lr_getc (lr);
482 if (ch == lr->escape_char)
483 {
484 int c2 = lr_getc (lr);
485 ADDC (c2);
486
487 if (c2 == '\n')
488 ch = '\n';
489 }
490 else
491 ADDC (ch);
492 }
493 while (ch != '>' && ch != '\n');
494
495 if (ch == '\n')
496 lr_error (lr, _("unterminated symbolic name"));
497
498 /* Test for ISO 10646 position value. */
499 if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
500 {
501 char *cp = buf + 1;
502 while (cp < &buf[bufact - 1] && isxdigit (*cp))
503 ++cp;
504
505 if (cp == &buf[bufact - 1])
506 {
507 /* Yes, it is. */
4b10dd6c
UD
508 lr->token.tok = tok_ucs4;
509 lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
19bc17a9
RM
510
511 return &lr->token;
512 }
513 }
514
515 /* It is a symbolic name. Test for reserved words. */
516 kw = lr->hash_fct (buf, bufact - 1);
517
518 if (kw != NULL && kw->symname_or_ident == 1)
519 {
520 lr->token.tok = kw->token;
521 free (buf);
522 }
523 else
524 {
525 lr->token.tok = tok_bsymbol;
526
19bc17a9 527 buf = xrealloc (buf, bufact + 1);
b16dba4c 528 buf[bufact] = '\0';
19bc17a9 529
4b10dd6c
UD
530 lr->token.val.str.startmb = buf;
531 lr->token.val.str.lenmb = bufact - 1;
19bc17a9
RM
532 }
533
534 return &lr->token;
535}
536
537
538static struct token *
539get_ident (struct linereader *lr)
540{
541 char *buf;
542 size_t bufact;
543 size_t bufmax = 56;
544 const struct keyword_t *kw;
545 int ch;
546
547 buf = xmalloc (bufmax);
548 bufact = 0;
549
550 ADDC (lr->buf[lr->idx - 1]);
551
552 while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
f126ef67 553 && ch != '<' && ch != ',' && ch != EOF)
4b10dd6c
UD
554 {
555 if (ch == lr->escape_char)
556 {
557 ch = lr_getc (lr);
558 if (ch == '\n' || ch == EOF)
559 {
560 lr_error (lr, _("invalid escape sequence"));
561 break;
562 }
563 }
564 ADDC (ch);
565 }
19bc17a9 566
f126ef67 567 lr_ungetc (lr, ch);
19bc17a9
RM
568
569 kw = lr->hash_fct (buf, bufact);
570
571 if (kw != NULL && kw->symname_or_ident == 0)
572 {
573 lr->token.tok = kw->token;
574 free (buf);
575 }
576 else
577 {
578 lr->token.tok = tok_ident;
579
19bc17a9 580 buf = xrealloc (buf, bufact + 1);
b16dba4c 581 buf[bufact] = '\0';
19bc17a9 582
4b10dd6c
UD
583 lr->token.val.str.startmb = buf;
584 lr->token.val.str.lenmb = bufact;
19bc17a9
RM
585 }
586
587 return &lr->token;
588}
589
590
591static struct token *
4b10dd6c 592get_string (struct linereader *lr, const struct charmap_t *charmap,
47e8b443
UD
593 struct localedef_t *locale, const struct repertoire_t *repertoire,
594 int verbose)
19bc17a9 595{
4b10dd6c
UD
596 int return_widestr = lr->return_widestr;
597 char *buf;
a9c27b3e 598 wchar_t *buf2 = NULL;
19bc17a9
RM
599 size_t bufact;
600 size_t bufmax = 56;
19bc17a9 601
4b10dd6c 602 /* We must return two different strings. */
19bc17a9
RM
603 buf = xmalloc (bufmax);
604 bufact = 0;
605
4b10dd6c
UD
606 /* We know it'll be a string. */
607 lr->token.tok = tok_string;
608
609 /* If we need not translate the strings (i.e., expand <...> parts)
610 we can run a simple loop. */
611 if (!lr->translate_strings)
612 {
613 int ch;
614
615 buf2 = NULL;
616 while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
19bc17a9 617 ADDC (ch);
4b10dd6c
UD
618
619 /* Catch errors with trailing escape character. */
620 if (bufact > 0 && buf[bufact - 1] == lr->escape_char
621 && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
622 {
623 lr_error (lr, _("illegal escape sequence at end of string"));
624 --bufact;
625 }
626 else if (ch == '\n' || ch == EOF)
627 lr_error (lr, _("unterminated string"));
628
629 ADDC ('\0');
630 }
631 else
632 {
633 int illegal_string = 0;
634 size_t buf2act = 0;
635 size_t buf2max = 56 * sizeof (uint32_t);
636 int ch;
637 int warned = 0;
638
639 /* We have to provide the wide character result as well. */
640 if (return_widestr)
641 buf2 = xmalloc (buf2max);
642
643 /* Read until the end of the string (or end of the line or file). */
644 while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
645 {
646 size_t startidx;
647 uint32_t wch;
648 struct charseq *seq;
649
650 if (ch != '<')
651 {
652 /* The standards leave it up to the implementation to decide
653 what to do with character which stand for themself. We
654 could jump through hoops to find out the value relative to
655 the charmap and the repertoire map, but instead we leave
656 it up to the locale definition author to write a better
657 definition. We assume here that every character which
658 stands for itself is encoded using ISO 8859-1. Using the
659 escape character is allowed. */
660 if (ch == lr->escape_char)
661 {
662 ch = lr_getc (lr);
663 if (ch == '\n' || ch == EOF)
664 break;
665 }
666
667 if (verbose && !warned)
668 {
669 lr_error (lr, _("\
670non-symbolic character value should not be used"));
671 warned = 1;
672 }
673
674 ADDC (ch);
675 if (return_widestr)
676 ADDWC ((uint32_t) ch);
677
678 continue;
679 }
680
681 /* Now we have to search for the end of the symbolic name, i.e.,
682 the closing '>'. */
683 startidx = bufact;
684 while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
685 {
686 if (ch == lr->escape_char)
687 {
688 ch = lr_getc (lr);
689 if (ch == '\n' || ch == EOF)
690 break;
691 }
692 ADDC (ch);
693 }
694 if (ch == '\n' || ch == EOF)
695 /* Not a correct string. */
696 break;
697 if (bufact == startidx)
698 {
699 /* <> is no correct name. Ignore it and also signal an
700 error. */
19bc17a9 701 illegal_string = 1;
4b10dd6c
UD
702 continue;
703 }
19bc17a9 704
4b10dd6c
UD
705 /* It might be a Uxxxx symbol. */
706 if (buf[startidx] == 'U'
707 && (bufact - startidx == 5 || bufact - startidx == 9))
708 {
709 char *cp = buf + startidx + 1;
710 while (cp < &buf[bufact] && isxdigit (*cp))
711 ++cp;
712
713 if (cp == &buf[bufact])
714 {
3c833378 715 char utmp[10];
4b10dd6c
UD
716
717 /* Yes, it is. */
718 ADDC ('\0');
719 wch = strtoul (buf + startidx + 1, NULL, 16);
720
721 /* Now forget about the name we just added. */
722 bufact = startidx;
723
724 if (return_widestr)
725 ADDWC (wch);
726
3c833378
UD
727 /* See whether the charmap contains the Uxxxxxxxx names. */
728 snprintf (utmp, sizeof (utmp), "U%08X", wch);
729 seq = charmap_find_value (charmap, utmp, 9);
4b10dd6c 730
3c833378 731 if (seq == NULL)
4b10dd6c 732 {
3c833378
UD
733 /* No, this isn't the case. Now determine from
734 the repertoire the name of the character and
735 find it in the charmap. */
736 if (repertoire != NULL)
3c833378 737 {
47e8b443 738 const char *symbol;
3c833378 739
47e8b443
UD
740 symbol = repertoire_find_symbol (repertoire, wch);
741
742 if (symbol != NULL)
743 seq = charmap_find_value (charmap, symbol,
744 strlen (symbol));
745 }
746
747 if (seq == NULL)
748 {
749#ifndef NO_TRANSLITERATION
750 /* Transliterate if possible. */
751 if (locale != NULL)
752 {
753 uint32_t *translit;
754
755 if ((locale->avail & CTYPE_LOCALE) == 0)
756 {
757 /* Load the CTYPE data now. */
758 int old_needed = locale->needed;
759
760 locale->needed = 0;
69f6a804 761 locale = load_locale (LC_CTYPE,
47e8b443
UD
762 locale->name,
763 locale->repertoire_name,
764 charmap, locale);
765 locale->needed = old_needed;
766 }
767
768 if ((locale->avail & CTYPE_LOCALE) != 0
769 && ((translit = find_translit (locale,
770 charmap, wch))
771 != NULL))
772 /* The CTYPE data contains a matching
773 transliteration. */
774 {
775 int i;
776
777 for (i = 0; translit[i] != 0; ++i)
778 {
779 char utmp[10];
780
781 snprintf (utmp, sizeof (utmp), "U%08X",
782 translit[i]);
783 seq = charmap_find_value (charmap, utmp,
784 9);
785 assert (seq != NULL);
786 ADDS (seq->bytes, seq->nbytes);
787 }
788
789 continue;
790 }
791 }
792#endif /* NO_TRANSLITERATION */
793
794 /* Not a known name. */
795 illegal_string = 1;
3c833378 796 }
4b10dd6c
UD
797 }
798
3c833378
UD
799 if (seq != NULL)
800 ADDS (seq->bytes, seq->nbytes);
801
4b10dd6c
UD
802 continue;
803 }
804 }
805
3c833378
UD
806 /* We now have the symbolic name in buf[startidx] to
807 buf[bufact-1]. Now find out the value for this character
808 in the charmap as well as in the repertoire map (in this
809 order). */
810 seq = charmap_find_value (charmap, &buf[startidx],
811 bufact - startidx);
812
813 if (seq == NULL)
814 {
815 /* This name is not in the charmap. */
816 lr_error (lr, _("symbol `%.*s' not in charmap"),
817 (int) (bufact - startidx), &buf[startidx]);
818 illegal_string = 1;
819 }
820
4b10dd6c
UD
821 if (return_widestr)
822 {
3c833378
UD
823 /* Now the same for the multibyte representation. */
824 if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
825 wch = seq->ucs4;
826 else
827 {
828 wch = repertoire_find_value (repertoire, &buf[startidx],
829 bufact - startidx);
830 if (seq != NULL)
831 seq->ucs4 = wch;
832 }
833
4b10dd6c
UD
834 if (wch == ILLEGAL_CHAR_VALUE)
835 {
836 /* This name is not in the repertoire map. */
837 lr_error (lr, _("symbol `%.*s' not in repertoire map"),
70e51ab9 838 (int) (bufact - startidx), &buf[startidx]);
4b10dd6c
UD
839 illegal_string = 1;
840 }
841 else
842 ADDWC (wch);
843 }
844
3c833378
UD
845 /* Now forget about the name we just added. */
846 bufact = startidx;
19bc17a9 847
3c833378
UD
848 /* And copy the bytes. */
849 if (seq != NULL)
850 ADDS (seq->bytes, seq->nbytes);
4b10dd6c 851 }
19bc17a9 852
4b10dd6c
UD
853 if (ch == '\n' || ch == EOF)
854 {
855 lr_error (lr, _("unterminated string"));
856 illegal_string = 1;
857 }
19bc17a9 858
4b10dd6c
UD
859 if (illegal_string)
860 {
861 free (buf);
72e6cdfa 862 free (buf2);
4b10dd6c
UD
863 lr->token.val.str.startmb = NULL;
864 lr->token.val.str.lenmb = 0;
d5fd1f3f
UD
865 lr->token.val.str.startwc = NULL;
866 lr->token.val.str.lenwc = 0;
19bc17a9 867
4b10dd6c
UD
868 return &lr->token;
869 }
19bc17a9 870
4b10dd6c 871 ADDC ('\0');
19bc17a9 872
4b10dd6c
UD
873 if (return_widestr)
874 {
875 ADDWC (0);
876 lr->token.val.str.startwc = xrealloc (buf2,
877 buf2act * sizeof (uint32_t));
878 lr->token.val.str.lenwc = buf2act;
879 }
19bc17a9
RM
880 }
881
4b10dd6c
UD
882 lr->token.val.str.startmb = xrealloc (buf, bufact);
883 lr->token.val.str.lenmb = bufact;
884
19bc17a9
RM
885 return &lr->token;
886}