]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgfortran/io/format.c
re PR fortran/40508 (memory leak in internal write of gfortran)
[thirdparty/gcc.git] / libgfortran / io / format.c
1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
5
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27
28 /* format.c-- parse a FORMAT string into a binary format suitable for
29 * interpretation during I/O statements */
30
31 #include "io.h"
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdbool.h>
35
36 #define FARRAY_SIZE 64
37
38 typedef struct fnode_array
39 {
40 struct fnode_array *next;
41 fnode array[FARRAY_SIZE];
42 }
43 fnode_array;
44
45 typedef struct format_data
46 {
47 char *format_string, *string;
48 const char *error;
49 char error_element;
50 format_token saved_token;
51 int value, format_string_len, reversion_ok;
52 fnode *avail;
53 const fnode *saved_format;
54 fnode_array *last;
55 fnode_array array;
56 }
57 format_data;
58
59 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
60 NULL };
61
62 /* Error messages. */
63
64 static const char posint_required[] = "Positive width required in format",
65 period_required[] = "Period required in format",
66 nonneg_required[] = "Nonnegative width required in format",
67 unexpected_element[] = "Unexpected element '%c' in format\n",
68 unexpected_end[] = "Unexpected end of format string",
69 bad_string[] = "Unterminated character constant in format",
70 bad_hollerith[] = "Hollerith constant extends past the end of the format",
71 reversion_error[] = "Exhausted data descriptors in format",
72 zero_width[] = "Zero width in format descriptor";
73
74 /* The following routines support caching format data from parsed format strings
75 into a hash table. This avoids repeatedly parsing duplicate format strings
76 or format strings in I/O statements that are repeated in loops. */
77
78
79 /* Traverse the table and free all data. */
80
81 void
82 free_format_hash_table (gfc_unit *u)
83 {
84 size_t i;
85
86 /* free_format_data handles any NULL pointers. */
87 for (i = 0; i < FORMAT_HASH_SIZE; i++)
88 {
89 if (u->format_hash_table[i].hashed_fmt != NULL)
90 {
91 free_format_data (u->format_hash_table[i].hashed_fmt);
92 free_mem (u->format_hash_table[i].key);
93 }
94 u->format_hash_table[i].key = NULL;
95 u->format_hash_table[i].key_len = 0;
96 u->format_hash_table[i].hashed_fmt = NULL;
97 }
98 }
99
100 /* Traverse the format_data structure and reset the fnode counters. */
101
102 static void
103 reset_node (fnode *fn)
104 {
105 fnode *f;
106
107 fn->count = 0;
108 fn->current = NULL;
109
110 if (fn->format != FMT_LPAREN)
111 return;
112
113 for (f = fn->u.child; f; f = f->next)
114 {
115 if (f->format == FMT_RPAREN)
116 break;
117 reset_node (f);
118 }
119 }
120
121 static void
122 reset_fnode_counters (st_parameter_dt *dtp)
123 {
124 fnode *f;
125 format_data *fmt;
126
127 fmt = dtp->u.p.fmt;
128
129 /* Clear this pointer at the head so things start at the right place. */
130 fmt->array.array[0].current = NULL;
131
132 for (f = fmt->last->array[0].u.child; f; f = f->next)
133 reset_node (f);
134 }
135
136
137 /* A simple hashing function to generate an index into the hash table. */
138
139 static inline
140 uint32_t format_hash (st_parameter_dt *dtp)
141 {
142 char *key;
143 gfc_charlen_type key_len;
144 uint32_t hash = 0;
145 gfc_charlen_type i;
146
147 /* Hash the format string. Super simple, but what the heck! */
148 key = dtp->format;
149 key_len = dtp->format_len;
150 for (i = 0; i < key_len; i++)
151 hash ^= key[i];
152 hash &= (FORMAT_HASH_SIZE - 1);
153 return hash;
154 }
155
156
157 static void
158 save_parsed_format (st_parameter_dt *dtp)
159 {
160 uint32_t hash;
161 gfc_unit *u;
162
163 hash = format_hash (dtp);
164 u = dtp->u.p.current_unit;
165
166 /* Index into the hash table. We are simply replacing whatever is there
167 relying on probability. */
168 if (u->format_hash_table[hash].hashed_fmt != NULL)
169 free_format_data (u->format_hash_table[hash].hashed_fmt);
170 u->format_hash_table[hash].hashed_fmt = NULL;
171
172 if (u->format_hash_table[hash].key != NULL)
173 free_mem (u->format_hash_table[hash].key);
174 u->format_hash_table[hash].key = get_mem (dtp->format_len);
175 memcpy (u->format_hash_table[hash].key, dtp->format, dtp->format_len);
176
177 u->format_hash_table[hash].key_len = dtp->format_len;
178 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
179 }
180
181
182 static format_data *
183 find_parsed_format (st_parameter_dt *dtp)
184 {
185 uint32_t hash;
186 gfc_unit *u;
187
188 hash = format_hash (dtp);
189 u = dtp->u.p.current_unit;
190
191 if (u->format_hash_table[hash].key != NULL)
192 {
193 /* See if it matches. */
194 if (u->format_hash_table[hash].key_len == dtp->format_len)
195 {
196 /* So far so good. */
197 if (strncmp (u->format_hash_table[hash].key,
198 dtp->format, dtp->format_len) == 0)
199 return u->format_hash_table[hash].hashed_fmt;
200 }
201 }
202 return NULL;
203 }
204
205
206 /* next_char()-- Return the next character in the format string.
207 * Returns -1 when the string is done. If the literal flag is set,
208 * spaces are significant, otherwise they are not. */
209
210 static int
211 next_char (format_data *fmt, int literal)
212 {
213 int c;
214
215 do
216 {
217 if (fmt->format_string_len == 0)
218 return -1;
219
220 fmt->format_string_len--;
221 c = toupper (*fmt->format_string++);
222 fmt->error_element = c;
223 }
224 while ((c == ' ' || c == '\t') && !literal);
225
226 return c;
227 }
228
229
230 /* unget_char()-- Back up one character position. */
231
232 #define unget_char(fmt) \
233 { fmt->format_string--; fmt->format_string_len++; }
234
235
236 /* get_fnode()-- Allocate a new format node, inserting it into the
237 * current singly linked list. These are initially allocated from the
238 * static buffer. */
239
240 static fnode *
241 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
242 {
243 fnode *f;
244
245 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
246 {
247 fmt->last->next = get_mem (sizeof (fnode_array));
248 fmt->last = fmt->last->next;
249 fmt->last->next = NULL;
250 fmt->avail = &fmt->last->array[0];
251 }
252 f = fmt->avail++;
253 memset (f, '\0', sizeof (fnode));
254
255 if (*head == NULL)
256 *head = *tail = f;
257 else
258 {
259 (*tail)->next = f;
260 *tail = f;
261 }
262
263 f->format = t;
264 f->repeat = -1;
265 f->source = fmt->format_string;
266 return f;
267 }
268
269
270 /* free_format_data()-- Free all allocated format data. */
271
272 void
273 free_format_data (format_data *fmt)
274 {
275 fnode_array *fa, *fa_next;
276
277
278 if (fmt == NULL)
279 return;
280
281 for (fa = fmt->array.next; fa; fa = fa_next)
282 {
283 fa_next = fa->next;
284 free_mem (fa);
285 }
286
287 free_mem (fmt);
288 fmt = NULL;
289 }
290
291
292 /* format_lex()-- Simple lexical analyzer for getting the next token
293 * in a FORMAT string. We support a one-level token pushback in the
294 * fmt->saved_token variable. */
295
296 static format_token
297 format_lex (format_data *fmt)
298 {
299 format_token token;
300 int negative_flag;
301 int c;
302 char delim;
303
304 if (fmt->saved_token != FMT_NONE)
305 {
306 token = fmt->saved_token;
307 fmt->saved_token = FMT_NONE;
308 return token;
309 }
310
311 negative_flag = 0;
312 c = next_char (fmt, 0);
313
314 switch (c)
315 {
316 case '(':
317 token = FMT_LPAREN;
318 break;
319
320 case ')':
321 token = FMT_RPAREN;
322 break;
323
324 case '-':
325 negative_flag = 1;
326 /* Fall Through */
327
328 case '+':
329 c = next_char (fmt, 0);
330 if (!isdigit (c))
331 {
332 token = FMT_UNKNOWN;
333 break;
334 }
335
336 fmt->value = c - '0';
337
338 for (;;)
339 {
340 c = next_char (fmt, 0);
341 if (!isdigit (c))
342 break;
343
344 fmt->value = 10 * fmt->value + c - '0';
345 }
346
347 unget_char (fmt);
348
349 if (negative_flag)
350 fmt->value = -fmt->value;
351 token = FMT_SIGNED_INT;
352 break;
353
354 case '0':
355 case '1':
356 case '2':
357 case '3':
358 case '4':
359 case '5':
360 case '6':
361 case '7':
362 case '8':
363 case '9':
364 fmt->value = c - '0';
365
366 for (;;)
367 {
368 c = next_char (fmt, 0);
369 if (!isdigit (c))
370 break;
371
372 fmt->value = 10 * fmt->value + c - '0';
373 }
374
375 unget_char (fmt);
376 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
377 break;
378
379 case '.':
380 token = FMT_PERIOD;
381 break;
382
383 case ',':
384 token = FMT_COMMA;
385 break;
386
387 case ':':
388 token = FMT_COLON;
389 break;
390
391 case '/':
392 token = FMT_SLASH;
393 break;
394
395 case '$':
396 token = FMT_DOLLAR;
397 break;
398
399 case 'T':
400 switch (next_char (fmt, 0))
401 {
402 case 'L':
403 token = FMT_TL;
404 break;
405 case 'R':
406 token = FMT_TR;
407 break;
408 default:
409 token = FMT_T;
410 unget_char (fmt);
411 break;
412 }
413
414 break;
415
416 case 'X':
417 token = FMT_X;
418 break;
419
420 case 'S':
421 switch (next_char (fmt, 0))
422 {
423 case 'S':
424 token = FMT_SS;
425 break;
426 case 'P':
427 token = FMT_SP;
428 break;
429 default:
430 token = FMT_S;
431 unget_char (fmt);
432 break;
433 }
434
435 break;
436
437 case 'B':
438 switch (next_char (fmt, 0))
439 {
440 case 'N':
441 token = FMT_BN;
442 break;
443 case 'Z':
444 token = FMT_BZ;
445 break;
446 default:
447 token = FMT_B;
448 unget_char (fmt);
449 break;
450 }
451
452 break;
453
454 case '\'':
455 case '"':
456 delim = c;
457
458 fmt->string = fmt->format_string;
459 fmt->value = 0; /* This is the length of the string */
460
461 for (;;)
462 {
463 c = next_char (fmt, 1);
464 if (c == -1)
465 {
466 token = FMT_BADSTRING;
467 fmt->error = bad_string;
468 break;
469 }
470
471 if (c == delim)
472 {
473 c = next_char (fmt, 1);
474
475 if (c == -1)
476 {
477 token = FMT_BADSTRING;
478 fmt->error = bad_string;
479 break;
480 }
481
482 if (c != delim)
483 {
484 unget_char (fmt);
485 token = FMT_STRING;
486 break;
487 }
488 }
489
490 fmt->value++;
491 }
492
493 break;
494
495 case 'P':
496 token = FMT_P;
497 break;
498
499 case 'I':
500 token = FMT_I;
501 break;
502
503 case 'O':
504 token = FMT_O;
505 break;
506
507 case 'Z':
508 token = FMT_Z;
509 break;
510
511 case 'F':
512 token = FMT_F;
513 break;
514
515 case 'E':
516 switch (next_char (fmt, 0))
517 {
518 case 'N':
519 token = FMT_EN;
520 break;
521 case 'S':
522 token = FMT_ES;
523 break;
524 default:
525 token = FMT_E;
526 unget_char (fmt);
527 break;
528 }
529 break;
530
531 case 'G':
532 token = FMT_G;
533 break;
534
535 case 'H':
536 token = FMT_H;
537 break;
538
539 case 'L':
540 token = FMT_L;
541 break;
542
543 case 'A':
544 token = FMT_A;
545 break;
546
547 case 'D':
548 switch (next_char (fmt, 0))
549 {
550 case 'P':
551 token = FMT_DP;
552 break;
553 case 'C':
554 token = FMT_DC;
555 break;
556 default:
557 token = FMT_D;
558 unget_char (fmt);
559 break;
560 }
561 break;
562
563 case -1:
564 token = FMT_END;
565 break;
566
567 default:
568 token = FMT_UNKNOWN;
569 break;
570 }
571
572 return token;
573 }
574
575
576 /* parse_format_list()-- Parse a format list. Assumes that a left
577 * paren has already been seen. Returns a list representing the
578 * parenthesis node which contains the rest of the list. */
579
580 static fnode *
581 parse_format_list (st_parameter_dt *dtp)
582 {
583 fnode *head, *tail;
584 format_token t, u, t2;
585 int repeat;
586 format_data *fmt = dtp->u.p.fmt;
587 bool save_format;
588
589 head = tail = NULL;
590 save_format = !is_internal_unit (dtp);
591
592 /* Get the next format item */
593 format_item:
594 t = format_lex (fmt);
595 format_item_1:
596 switch (t)
597 {
598 case FMT_POSINT:
599 repeat = fmt->value;
600
601 t = format_lex (fmt);
602 switch (t)
603 {
604 case FMT_LPAREN:
605 get_fnode (fmt, &head, &tail, FMT_LPAREN);
606 tail->repeat = repeat;
607 tail->u.child = parse_format_list (dtp);
608 if (fmt->error != NULL)
609 goto finished;
610
611 goto between_desc;
612
613 case FMT_SLASH:
614 get_fnode (fmt, &head, &tail, FMT_SLASH);
615 tail->repeat = repeat;
616 goto optional_comma;
617
618 case FMT_X:
619 get_fnode (fmt, &head, &tail, FMT_X);
620 tail->repeat = 1;
621 tail->u.k = fmt->value;
622 goto between_desc;
623
624 case FMT_P:
625 goto p_descriptor;
626
627 default:
628 goto data_desc;
629 }
630
631 case FMT_LPAREN:
632 get_fnode (fmt, &head, &tail, FMT_LPAREN);
633 tail->repeat = 1;
634 tail->u.child = parse_format_list (dtp);
635 if (fmt->error != NULL)
636 goto finished;
637
638 goto between_desc;
639
640 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
641 case FMT_ZERO: /* Same for zero. */
642 t = format_lex (fmt);
643 if (t != FMT_P)
644 {
645 fmt->error = "Expected P edit descriptor in format";
646 goto finished;
647 }
648
649 p_descriptor:
650 get_fnode (fmt, &head, &tail, FMT_P);
651 tail->u.k = fmt->value;
652 tail->repeat = 1;
653
654 t = format_lex (fmt);
655 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
656 || t == FMT_G || t == FMT_E)
657 {
658 repeat = 1;
659 goto data_desc;
660 }
661
662 fmt->saved_token = t;
663 goto optional_comma;
664
665 case FMT_P: /* P and X require a prior number */
666 fmt->error = "P descriptor requires leading scale factor";
667 goto finished;
668
669 case FMT_X:
670 /*
671 EXTENSION!
672
673 If we would be pedantic in the library, we would have to reject
674 an X descriptor without an integer prefix:
675
676 fmt->error = "X descriptor requires leading space count";
677 goto finished;
678
679 However, this is an extension supported by many Fortran compilers,
680 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
681 runtime library, and make the front end reject it if the compiler
682 is in pedantic mode. The interpretation of 'X' is '1X'.
683 */
684 get_fnode (fmt, &head, &tail, FMT_X);
685 tail->repeat = 1;
686 tail->u.k = 1;
687 goto between_desc;
688
689 case FMT_STRING:
690 get_fnode (fmt, &head, &tail, FMT_STRING);
691
692 tail->u.string.p = fmt->string;
693 tail->u.string.length = fmt->value;
694 tail->repeat = 1;
695 goto optional_comma;
696
697 case FMT_DC:
698 case FMT_DP:
699 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
700 "descriptor not allowed");
701 save_format = true;
702 /* Fall through. */
703 case FMT_S:
704 case FMT_SS:
705 case FMT_SP:
706 case FMT_BN:
707 case FMT_BZ:
708 get_fnode (fmt, &head, &tail, t);
709 tail->repeat = 1;
710 goto between_desc;
711
712 case FMT_COLON:
713 get_fnode (fmt, &head, &tail, FMT_COLON);
714 tail->repeat = 1;
715 goto optional_comma;
716
717 case FMT_SLASH:
718 get_fnode (fmt, &head, &tail, FMT_SLASH);
719 tail->repeat = 1;
720 tail->u.r = 1;
721 goto optional_comma;
722
723 case FMT_DOLLAR:
724 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
725 tail->repeat = 1;
726 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
727 save_format = false;
728 goto between_desc;
729
730
731 case FMT_T:
732 case FMT_TL:
733 case FMT_TR:
734 t2 = format_lex (fmt);
735 if (t2 != FMT_POSINT)
736 {
737 fmt->error = posint_required;
738 goto finished;
739 }
740 get_fnode (fmt, &head, &tail, t);
741 tail->u.n = fmt->value;
742 tail->repeat = 1;
743 goto between_desc;
744
745 case FMT_I:
746 case FMT_B:
747 case FMT_O:
748 case FMT_Z:
749 case FMT_E:
750 case FMT_EN:
751 case FMT_ES:
752 case FMT_D:
753 case FMT_L:
754 case FMT_A:
755 case FMT_F:
756 case FMT_G:
757 repeat = 1;
758 goto data_desc;
759
760 case FMT_H:
761 get_fnode (fmt, &head, &tail, FMT_STRING);
762
763 if (fmt->format_string_len < 1)
764 {
765 fmt->error = bad_hollerith;
766 goto finished;
767 }
768
769 tail->u.string.p = fmt->format_string;
770 tail->u.string.length = 1;
771 tail->repeat = 1;
772
773 fmt->format_string++;
774 fmt->format_string_len--;
775
776 goto between_desc;
777
778 case FMT_END:
779 fmt->error = unexpected_end;
780 goto finished;
781
782 case FMT_BADSTRING:
783 goto finished;
784
785 case FMT_RPAREN:
786 goto finished;
787
788 default:
789 fmt->error = unexpected_element;
790 goto finished;
791 }
792
793 /* In this state, t must currently be a data descriptor. Deal with
794 things that can/must follow the descriptor */
795 data_desc:
796 switch (t)
797 {
798 case FMT_P:
799 t = format_lex (fmt);
800 if (t == FMT_POSINT)
801 {
802 fmt->error = "Repeat count cannot follow P descriptor";
803 goto finished;
804 }
805
806 fmt->saved_token = t;
807 get_fnode (fmt, &head, &tail, FMT_P);
808
809 goto optional_comma;
810
811 case FMT_L:
812 t = format_lex (fmt);
813 if (t != FMT_POSINT)
814 {
815 if (notification_std(GFC_STD_GNU) == ERROR)
816 {
817 fmt->error = posint_required;
818 goto finished;
819 }
820 else
821 {
822 fmt->saved_token = t;
823 fmt->value = 1; /* Default width */
824 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
825 save_format = false;
826 }
827 }
828
829 get_fnode (fmt, &head, &tail, FMT_L);
830 tail->u.n = fmt->value;
831 tail->repeat = repeat;
832 break;
833
834 case FMT_A:
835 t = format_lex (fmt);
836 if (t == FMT_ZERO)
837 {
838 fmt->error = zero_width;
839 goto finished;
840 }
841
842 if (t != FMT_POSINT)
843 {
844 fmt->saved_token = t;
845 fmt->value = -1; /* Width not present */
846 }
847
848 get_fnode (fmt, &head, &tail, FMT_A);
849 tail->repeat = repeat;
850 tail->u.n = fmt->value;
851 break;
852
853 case FMT_D:
854 case FMT_E:
855 case FMT_F:
856 case FMT_G:
857 case FMT_EN:
858 case FMT_ES:
859 get_fnode (fmt, &head, &tail, t);
860 tail->repeat = repeat;
861
862 u = format_lex (fmt);
863 if (t == FMT_G && u == FMT_ZERO)
864 {
865 if (notification_std (GFC_STD_F2008) == ERROR
866 || dtp->u.p.mode == READING)
867 {
868 fmt->error = zero_width;
869 goto finished;
870 }
871 tail->u.real.w = 0;
872 u = format_lex (fmt);
873 if (u != FMT_PERIOD)
874 {
875 fmt->saved_token = u;
876 break;
877 }
878
879 u = format_lex (fmt);
880 if (u != FMT_POSINT)
881 {
882 fmt->error = posint_required;
883 goto finished;
884 }
885 tail->u.real.d = fmt->value;
886 break;
887 }
888 if (t == FMT_F || dtp->u.p.mode == WRITING)
889 {
890 if (u != FMT_POSINT && u != FMT_ZERO)
891 {
892 fmt->error = nonneg_required;
893 goto finished;
894 }
895 }
896 else
897 {
898 if (u != FMT_POSINT)
899 {
900 fmt->error = posint_required;
901 goto finished;
902 }
903 }
904
905 tail->u.real.w = fmt->value;
906 t2 = t;
907 t = format_lex (fmt);
908 if (t != FMT_PERIOD)
909 {
910 /* We treat a missing decimal descriptor as 0. Note: This is only
911 allowed if -std=legacy, otherwise an error occurs. */
912 if (compile_options.warn_std != 0)
913 {
914 fmt->error = period_required;
915 goto finished;
916 }
917 fmt->saved_token = t;
918 tail->u.real.d = 0;
919 break;
920 }
921
922 t = format_lex (fmt);
923 if (t != FMT_ZERO && t != FMT_POSINT)
924 {
925 fmt->error = nonneg_required;
926 goto finished;
927 }
928
929 tail->u.real.d = fmt->value;
930
931 if (t == FMT_D || t == FMT_F)
932 break;
933
934 tail->u.real.e = -1;
935
936 /* Look for optional exponent */
937 t = format_lex (fmt);
938 if (t != FMT_E)
939 fmt->saved_token = t;
940 else
941 {
942 t = format_lex (fmt);
943 if (t != FMT_POSINT)
944 {
945 fmt->error = "Positive exponent width required in format";
946 goto finished;
947 }
948
949 tail->u.real.e = fmt->value;
950 }
951
952 break;
953
954 case FMT_H:
955 if (repeat > fmt->format_string_len)
956 {
957 fmt->error = bad_hollerith;
958 goto finished;
959 }
960
961 get_fnode (fmt, &head, &tail, FMT_STRING);
962
963 tail->u.string.p = fmt->format_string;
964 tail->u.string.length = repeat;
965 tail->repeat = 1;
966
967 fmt->format_string += fmt->value;
968 fmt->format_string_len -= repeat;
969
970 break;
971
972 case FMT_I:
973 case FMT_B:
974 case FMT_O:
975 case FMT_Z:
976 get_fnode (fmt, &head, &tail, t);
977 tail->repeat = repeat;
978
979 t = format_lex (fmt);
980
981 if (dtp->u.p.mode == READING)
982 {
983 if (t != FMT_POSINT)
984 {
985 fmt->error = posint_required;
986 goto finished;
987 }
988 }
989 else
990 {
991 if (t != FMT_ZERO && t != FMT_POSINT)
992 {
993 fmt->error = nonneg_required;
994 goto finished;
995 }
996 }
997
998 tail->u.integer.w = fmt->value;
999 tail->u.integer.m = -1;
1000
1001 t = format_lex (fmt);
1002 if (t != FMT_PERIOD)
1003 {
1004 fmt->saved_token = t;
1005 }
1006 else
1007 {
1008 t = format_lex (fmt);
1009 if (t != FMT_ZERO && t != FMT_POSINT)
1010 {
1011 fmt->error = nonneg_required;
1012 goto finished;
1013 }
1014
1015 tail->u.integer.m = fmt->value;
1016 }
1017
1018 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1019 {
1020 fmt->error = "Minimum digits exceeds field width";
1021 goto finished;
1022 }
1023
1024 break;
1025
1026 default:
1027 fmt->error = unexpected_element;
1028 goto finished;
1029 }
1030
1031 /* Between a descriptor and what comes next */
1032 between_desc:
1033 t = format_lex (fmt);
1034 switch (t)
1035 {
1036 case FMT_COMMA:
1037 goto format_item;
1038
1039 case FMT_RPAREN:
1040 goto finished;
1041
1042 case FMT_SLASH:
1043 case FMT_COLON:
1044 get_fnode (fmt, &head, &tail, t);
1045 tail->repeat = 1;
1046 goto optional_comma;
1047
1048 case FMT_END:
1049 fmt->error = unexpected_end;
1050 goto finished;
1051
1052 default:
1053 /* Assume a missing comma, this is a GNU extension */
1054 goto format_item_1;
1055 }
1056
1057 /* Optional comma is a weird between state where we've just finished
1058 reading a colon, slash or P descriptor. */
1059 optional_comma:
1060 t = format_lex (fmt);
1061 switch (t)
1062 {
1063 case FMT_COMMA:
1064 break;
1065
1066 case FMT_RPAREN:
1067 goto finished;
1068
1069 default: /* Assume that we have another format item */
1070 fmt->saved_token = t;
1071 break;
1072 }
1073
1074 goto format_item;
1075
1076 finished:
1077 return head;
1078 }
1079
1080
1081 /* format_error()-- Generate an error message for a format statement.
1082 * If the node that gives the location of the error is NULL, the error
1083 * is assumed to happen at parse time, and the current location of the
1084 * parser is shown.
1085 *
1086 * We generate a message showing where the problem is. We take extra
1087 * care to print only the relevant part of the format if it is longer
1088 * than a standard 80 column display. */
1089
1090 void
1091 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1092 {
1093 int width, i, j, offset;
1094 char *p, buffer[300];
1095 format_data *fmt = dtp->u.p.fmt;
1096
1097 if (f != NULL)
1098 fmt->format_string = f->source;
1099
1100 if (message == unexpected_element)
1101 sprintf (buffer, message, fmt->error_element);
1102 else
1103 sprintf (buffer, "%s\n", message);
1104
1105 j = fmt->format_string - dtp->format;
1106
1107 offset = (j > 60) ? j - 40 : 0;
1108
1109 j -= offset;
1110 width = dtp->format_len - offset;
1111
1112 if (width > 80)
1113 width = 80;
1114
1115 /* Show the format */
1116
1117 p = strchr (buffer, '\0');
1118
1119 memcpy (p, dtp->format + offset, width);
1120
1121 p += width;
1122 *p++ = '\n';
1123
1124 /* Show where the problem is */
1125
1126 for (i = 1; i < j; i++)
1127 *p++ = ' ';
1128
1129 *p++ = '^';
1130 *p = '\0';
1131
1132 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1133 }
1134
1135
1136 /* revert()-- Do reversion of the format. Control reverts to the left
1137 * parenthesis that matches the rightmost right parenthesis. From our
1138 * tree structure, we are looking for the rightmost parenthesis node
1139 * at the second level, the first level always being a single
1140 * parenthesis node. If this node doesn't exit, we use the top
1141 * level. */
1142
1143 static void
1144 revert (st_parameter_dt *dtp)
1145 {
1146 fnode *f, *r;
1147 format_data *fmt = dtp->u.p.fmt;
1148
1149 dtp->u.p.reversion_flag = 1;
1150
1151 r = NULL;
1152
1153 for (f = fmt->array.array[0].u.child; f; f = f->next)
1154 if (f->format == FMT_LPAREN)
1155 r = f;
1156
1157 /* If r is NULL because no node was found, the whole tree will be used */
1158
1159 fmt->array.array[0].current = r;
1160 fmt->array.array[0].count = 0;
1161 }
1162
1163 /* parse_format()-- Parse a format string. */
1164
1165 void
1166 parse_format (st_parameter_dt *dtp)
1167 {
1168 format_data *fmt;
1169
1170 /* Lookup format string to see if it has already been parsed. */
1171
1172 dtp->u.p.fmt = find_parsed_format (dtp);
1173
1174 if (dtp->u.p.fmt != NULL)
1175 {
1176 dtp->u.p.fmt->reversion_ok = 0;
1177 dtp->u.p.fmt->saved_token = FMT_NONE;
1178 dtp->u.p.fmt->saved_format = NULL;
1179 reset_fnode_counters (dtp);
1180 return;
1181 }
1182
1183 /* Not found so proceed as follows. */
1184
1185 dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
1186 fmt->format_string = dtp->format;
1187 fmt->format_string_len = dtp->format_len;
1188
1189 fmt->string = NULL;
1190 fmt->saved_token = FMT_NONE;
1191 fmt->error = NULL;
1192 fmt->value = 0;
1193
1194 /* Initialize variables used during traversal of the tree */
1195
1196 fmt->reversion_ok = 0;
1197 fmt->saved_format = NULL;
1198
1199 /* Allocate the first format node as the root of the tree */
1200
1201 fmt->last = &fmt->array;
1202 fmt->last->next = NULL;
1203 fmt->avail = &fmt->array.array[0];
1204
1205 memset (fmt->avail, 0, sizeof (*fmt->avail));
1206 fmt->avail->format = FMT_LPAREN;
1207 fmt->avail->repeat = 1;
1208 fmt->avail++;
1209
1210 if (format_lex (fmt) == FMT_LPAREN)
1211 fmt->array.array[0].u.child = parse_format_list (dtp);
1212 else
1213 fmt->error = "Missing initial left parenthesis in format";
1214
1215 if (fmt->error)
1216 {
1217 format_error (dtp, NULL, fmt->error);
1218 free_format_hash_table (dtp->u.p.current_unit);
1219 return;
1220 }
1221
1222 /* TODO: Interim fix for PR40508. Revise this for PR40330. */
1223 if (!is_internal_unit(dtp))
1224 save_parsed_format (dtp);
1225 }
1226
1227
1228 /* next_format0()-- Get the next format node without worrying about
1229 * reversion. Returns NULL when we hit the end of the list.
1230 * Parenthesis nodes are incremented after the list has been
1231 * exhausted, other nodes are incremented before they are returned. */
1232
1233 static const fnode *
1234 next_format0 (fnode * f)
1235 {
1236 const fnode *r;
1237
1238 if (f == NULL)
1239 return NULL;
1240
1241 if (f->format != FMT_LPAREN)
1242 {
1243 f->count++;
1244 if (f->count <= f->repeat)
1245 return f;
1246
1247 f->count = 0;
1248 return NULL;
1249 }
1250
1251 /* Deal with a parenthesis node */
1252
1253 for (; f->count < f->repeat; f->count++)
1254 {
1255 if (f->current == NULL)
1256 f->current = f->u.child;
1257
1258 for (; f->current != NULL; f->current = f->current->next)
1259 {
1260 r = next_format0 (f->current);
1261 if (r != NULL)
1262 return r;
1263 }
1264 }
1265
1266 f->count = 0;
1267 return NULL;
1268 }
1269
1270
1271 /* next_format()-- Return the next format node. If the format list
1272 * ends up being exhausted, we do reversion. Reversion is only
1273 * allowed if we've seen a data descriptor since the
1274 * initialization or the last reversion. We return NULL if there
1275 * are no more data descriptors to return (which is an error
1276 * condition). */
1277
1278 const fnode *
1279 next_format (st_parameter_dt *dtp)
1280 {
1281 format_token t;
1282 const fnode *f;
1283 format_data *fmt = dtp->u.p.fmt;
1284
1285 if (fmt->saved_format != NULL)
1286 { /* Deal with a pushed-back format node */
1287 f = fmt->saved_format;
1288 fmt->saved_format = NULL;
1289 goto done;
1290 }
1291
1292 f = next_format0 (&fmt->array.array[0]);
1293 if (f == NULL)
1294 {
1295 if (!fmt->reversion_ok)
1296 return NULL;
1297
1298 fmt->reversion_ok = 0;
1299 revert (dtp);
1300
1301 f = next_format0 (&fmt->array.array[0]);
1302 if (f == NULL)
1303 {
1304 format_error (dtp, NULL, reversion_error);
1305 return NULL;
1306 }
1307
1308 /* Push the first reverted token and return a colon node in case
1309 * there are no more data items. */
1310
1311 fmt->saved_format = f;
1312 return &colon_node;
1313 }
1314
1315 /* If this is a data edit descriptor, then reversion has become OK. */
1316 done:
1317 t = f->format;
1318
1319 if (!fmt->reversion_ok &&
1320 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1321 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1322 t == FMT_A || t == FMT_D))
1323 fmt->reversion_ok = 1;
1324 return f;
1325 }
1326
1327
1328 /* unget_format()-- Push the given format back so that it will be
1329 * returned on the next call to next_format() without affecting
1330 * counts. This is necessary when we've encountered a data
1331 * descriptor, but don't know what the data item is yet. The format
1332 * node is pushed back, and we return control to the main program,
1333 * which calls the library back with the data item (or not). */
1334
1335 void
1336 unget_format (st_parameter_dt *dtp, const fnode *f)
1337 {
1338 dtp->u.p.fmt->saved_format = f;
1339 }
1340