From 1139d69306d67aeeb0ee13a8d2a367933afda8e4 Mon Sep 17 00:00:00 2001 From: Robert Dubner Date: Thu, 6 Nov 2025 07:26:18 -0500 Subject: [PATCH] cobol: Mainly extends compilation and execution in finternal-ebcdic. We expanded our extended testing regime to execute many testcases in EBCDIC mode as well as in ASCII. This exposed hundreds of problems in both compilation (where conversions must be made between the ASCII source code and the EBCDIC execution environment) and in run-time functionality, where results from calls to system routines and internal calculations that must be done in ASCII have to be converted to EBCDIC. These changes also switch to using FIXED_WIDE_INT(128) instead of REAL_VALUE_TYPE when initializing fixed-point COBOL variable types. This provides for accurate initialization up to 37 digits, instead of losing accuracy after 33 digits. These changes also support the implementation of the COBOL DELETE FILE (Format 2) statement. These changes also introduce expanded support for specifying character encodings, including support for locales. co-authored-by: Robert Dubner co-authored-by: James K. Lowden gcc/cobol/ChangeLog: * Make-lang.in: Repair documentation generation. * cdf.y: Changes to tokens. * cobol1.cc (cobol_langhook_handle_option): Add comment. * genapi.cc (function_pointer_from_name): Use data.original() for function name. (parser_initialize_programs): Likewise. (cobol_compare): Make sure encodings of comparands are the same. (move_tree): Change name of DEFAULT_SOURCE_ENCODING macro. (parser_enter_program): Typo. (psa_FldLiteralN): Break out dirty_to_binary() support routine. (dirty_to_binary): Likewise. (parser_alphabet): Rename 'alphabet' to 'collation_sequence'. (parser_allocate): Change wsclear() to be uint32_t instead of char. (parser_label_label): Formatting. (parser_label_goto): Likewise. (get_the_filename): Breakout get_the_filename(), which handles encoding. (parser_file_open): Likewise. (set_up_delete_file_label): Implement DELETE FILE (Format 2). (parser_file_delete_file): Likewise. (parser_file_delete_on_exception): Likewise. (parser_file_delete_not_exception): Likewise. (parser_file_delete_end): Likewise. (parser_call): Use data.original(). (parser_entry): Use data.original(). (mh_source_is_literalN): Convert from sourceref.field->codeset.encoding. (binary_initial_from_float128): Change to "binary_initial". (binary_initial): Calculate in FIXED_WIDE_INT(128) instead of REAL_VALUE_TYPE. (digits_from_int128): New routine uses binary_initial. (digits_from_float128): Removed. Kept as comment for reference. (initial_from_initial): Use binary_initial. (actually_create_the_static_field): Use correct encoding. (parser_symbol_add): Likewise. * genapi.h (parser_file_delete_file): Implement FILE DELETE. (parser_file_delete_on_exception): Implement FILE DELETE. (parser_file_delete_not_exception): Implement FILE DELETE. (parser_file_delete_end): Implement FILE DELETE. * genmath.cc: Include charmaps.h. * genutil.cc (get_literal_string): Change name of DEFAULT_SOURCE_ENCODING macro. * parse.y: Token changes; numerous changes in support of encoding; support for DELETE FILE. * parse_ante.h (name_of): Use data.original(). (class prog_descr_t): Support of locales. (current_options): Formatting. (current_encoding): Formatting. (current_program_index): Formatting. (current_section): Formatting. (current_paragraph): Formatting. (is_integer_literal): Use correct encoding. (value_encoding_check): Handle encoding changes. (alphabet_add): Likewise. (data_division_ready): Likewise. * scan.l: Use data.original(). * show_parse.h: Use correct encoding. * symbols.cc (elementize): Likewise. (symbol_elem_cmp): Handle locale. (struct symbol_elem_t): Likewise. (symbol_locale): Likewise. (field_str): Change DEFAULT_SOURCE_ENCODING macro name. (symbols_alphabet_set): Formatting. (symbols_update): Modify consistency checks. (symbol_locale_add): Locale support. (cbl_locale_t::cbl_locale_t): Locale support. (cbl_alphabet_t::cbl_alphabet_t): New structure. (cbl_alphabet_t::reencode): Formatting. (cbl_alphabet_t::assign): Change name of collation_sequence. (cbl_alphabet_t::also): Likewise. (new_literal_add): Anticipate the need for four-byte characters. (guess_encoding): Eliminate. (cbl_field_t::internalize): Refine conversion of data.initial to specified encoding. * symbols.h (enum symbol_type_t): Add SymLocale. (struct cbl_field_data_t): Incorporate data.orig. (struct cbl_field_t): Likewise. (struct cbl_delete_file_t): New structure. (struct cbl_label_t): Incorporate cbl_delete_file_t. (struct cbl_locale_t): Support for locale. (hex_decode): Comment. (struct cbl_alphabet_t): Incorporate locale; change variable name to collation_sequence. (struct symbol_elem_t): Incorporate locale. (cbl_locale_of): Likewise. (cbl_alphabet_of): Likewise. (symbol_locale_add): Likewise. (wsclear): Type is now uint32_t instead of char. * util.cc (symbol_type_str): Incorporate locale. (cbl_field_t::report_invalid_initial_value): Change test so that pure PIC A() variables are limited to [a-zA-Z] and space. (valid_move): Use DEFAULT_SOURCE_ENCODING macro. (cobol_filename): Formatting. libgcobol/ChangeLog: * charmaps.cc (__gg__encoding_iconv_type): Eliminate trailing '/' characters from encoding names. (__gg__get_charmap): Switch to DEFAULT_SOURCE_ENCODING macro name. * charmaps.h (DEFAULT_CHARMAP_SOURCE): Likewise. (DEFAULT_SOURCE_ENCODING): Likewise. (class charmap_t): Enhance constructor. * encodings.h (valid_encoding): New routine. * gcobolio.h (enum cblc_file_prior_op_t): Support DELETE FILE. * gfileio.cc (get_filename): Likewise. (__io__file_remove): Likewise. (__gg__file_reopen): Likewise. (__io__file_open): Likewise. (gcobol_fileops): Likewise. (__gg__file_delete): Likewise. (__gg__file_remove): Likewise. * intrinsic.cc (get_all_time): Switch to DEFAULT_SOURCE_ENCODING macro name. (ftime_replace): Support ASCII/EBCDIC encoding. (__gg__current_date): Likewise. (__gg__max): Likewise. (__gg__lower_case): Likewise. (numval): Likewise. (numval_c): Likewise. (__gg__upper_case): Likewise. (__gg__when_compiled): Likewise. (gets_int): Likewise. (gets_nanoseconds): Likewise. (fill_cobol_tm): Likewise. (floating_format_tester): Likewise. (__gg__numval_f): Likewise. (__gg__test_numval_f): Likewise. (iscasematch): Likewise. (strcasestr): Likewise. (strcaselaststr): Likewise. (__gg__substitute): Likewise. (__gg__locale_compare): Support for locale. (__gg__locale_date): Likewise. (__gg__locale_time): Likewise. (__gg__locale_time_from_seconds): Likewise. * libgcobol.cc (class ec_status_t): Support for encoding. (int128_to_field): Likewise. (__gg__dirty_to_float): Likewise. (format_for_display_internal): Likewise. (get_float128): Likewise. (compare_field_class): Likewise. (__gg__compare_2): Likewise. (init_var_both): Likewise. (__gg__move): Likewise. (display_both): Likewise. (is_numeric_display_numeric): Likewise. (accept_envar): Likewise. (__gg__get_argv): Likewise. (__gg__unstring): Likewise. (__gg__check_fatal_exception): Likewise. (__gg__adjust_encoding): Likewise. (__gg__func_exception_location): Likewise. (__gg__func_exception_statement): Likewise. (__gg__func_exception_status): Likewise. (__gg__func_exception_file): Likewise. (__gg__just_mangle_name): Likewise. (__gg__function_handle_from_name): Likewise. (get_the_byte): Likewise. (__gg__module_name): Likewise. (__gg__accept_arg_value): Likewise. * xmlparse.cc (fatalError): Formatting. (setDocumentLocator): Formatting. (xmlchar_of): Formatting. (xmlParserErrors_str): Formatting. --- gcc/cobol/Make-lang.in | 2 +- gcc/cobol/cdf.y | 16 +- gcc/cobol/cobol1.cc | 1 + gcc/cobol/genapi.cc | 768 +++++++++++++++++++++++++++-------------- gcc/cobol/genapi.h | 6 + gcc/cobol/genmath.cc | 1 + gcc/cobol/genutil.cc | 2 +- gcc/cobol/parse.y | 320 +++++++++++++---- gcc/cobol/parse_ante.h | 142 ++++---- gcc/cobol/scan.l | 4 +- gcc/cobol/show_parse.h | 8 +- gcc/cobol/symbols.cc | 292 +++++++++++----- gcc/cobol/symbols.h | 137 ++++++-- gcc/cobol/util.cc | 33 +- libgcobol/charmaps.cc | 18 +- libgcobol/charmaps.h | 118 ++++++- libgcobol/encodings.h | 5 + libgcobol/gcobolio.h | 1 + libgcobol/gfileio.cc | 111 +++++- libgcobol/intrinsic.cc | 475 +++++++++++++------------ libgcobol/libgcobol.cc | 91 +++-- libgcobol/xmlparse.cc | 5 +- 22 files changed, 1741 insertions(+), 815 deletions(-) diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in index ed6b588fe72..9f28752f165 100644 --- a/gcc/cobol/Make-lang.in +++ b/gcc/cobol/Make-lang.in @@ -330,7 +330,7 @@ cobol.srcpdf: gcobol.pdf gcobol-io.pdf ln $^ $(srcdir)/cobol/ gcobol.pdf: $(srcdir)/cobol/gcobol.1 - groff -mdoc -T pdf $^ > $@~ + groff -mdoc -t -T pdf $^ > $@~ @mv $@~ $@ gcobol-io.pdf: $(srcdir)/cobol/gcobol.3 groff -mdoc -T pdf $^ > $@~ diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y index f72ed77c964..2d3f8192bc6 100644 --- a/gcc/cobol/cdf.y +++ b/gcc/cobol/cdf.y @@ -244,21 +244,21 @@ apply_cdf_turn( const exception_turn_t& turn ) { %type DEFINED %token OTHER 699 PARAMETER_kw 369 "PARAMETER" %token OFF 688 OVERRIDE 370 -%token THRU 949 -%token TRUE_kw 814 "True" +%token THRU 950 +%token TRUE_kw 815 "True" %token CALL_COBOL 393 "CALL" %token CALL_VERBATIM 394 "CALL (as C)" -%token TURN 816 CHECKING 497 LOCATION 650 ON 690 WITH 843 +%token TURN 817 CHECKING 497 LOCATION 650 ON 690 WITH 844 -%left OR 950 -%left AND 951 -%right NOT 952 -%left '<' '>' '=' NE 953 LE 954 GE 955 +%left OR 951 +%left AND 952 +%right NOT 953 +%left '<' '>' '=' NE 954 LE 955 GE 956 %left '-' '+' %left '*' '/' -%right NEG 957 +%right NEG 958 %define api.prefix {ydf} %define api.token.prefix{YDF_} diff --git a/gcc/cobol/cobol1.cc b/gcc/cobol/cobol1.cc index 3146da57899..77c457d496c 100644 --- a/gcc/cobol/cobol1.cc +++ b/gcc/cobol/cobol1.cc @@ -365,6 +365,7 @@ cobol_langhook_handle_option (size_t scode, return true; case OPT_fdefaultbyte: + // cobol_default_byte is an unsigned ing wsclear(cobol_default_byte); return true; diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 9d30dde96eb..8c5f28ac07d 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -863,8 +863,12 @@ function_pointer_from_name(const cbl_refer_t &name, NULL); // And, hence, no types // Fetch the FUNCTION_DECL for that FUNCTION_TYPE - tree function_decl = gg_build_fn_decl(name.field->data.initial, + char *tname = static_cast(xmalloc(name.field->data.capacity+1)); + memcpy(tname, name.field->data.original(), name.field->data.capacity); + tname[name.field->data.capacity] = '\0'; + tree function_decl = gg_build_fn_decl(tname, fndecl_type); + free(tname); // Take the address of the function decl: tree address_of_function = gg_get_address_of(function_decl); gg_assign(function_pointer, address_of_function); @@ -877,11 +881,11 @@ function_pointer_from_name(const cbl_refer_t &name, gg_assign(function_pointer, gg_cast(build_pointer_type(function_type), gg_call_expr( VOID_P, - "__gg__function_handle_from_literal", - build_int_cst_type(INT, - current_function->our_symbol_table_index), - gg_string_literal(name.field->data.initial), - NULL_TREE))); + "__gg__function_handle_from_literal", + build_int_cst_type(INT, + current_function->our_symbol_table_index), + gg_string_literal(name.field->data.original()), + NULL_TREE))); } else { @@ -919,7 +923,7 @@ parser_initialize_programs( size_t nprogs, if( progs[i].field->type == FldLiteralA ) { SHOW_PARSE_TEXT("\"") - SHOW_PARSE_TEXT(progs[i].field->data.initial) + SHOW_PARSE_TEXT(progs[i].field->data.original()) SHOW_PARSE_TEXT("\"") } else @@ -2246,21 +2250,19 @@ cobol_compare( tree return_int, { // Comparing a FldLiteralN to an alphanumeric - // CONVERSION ALERT. lefty->field->data.initial is an ASCII - // string. We want to convert it to the same encoding as the - // right side. - - cbl_encoding_t enc_left = DEFAULT_CHARMAP_SOURCE; - cbl_encoding_t enc_right = - static_cast(righty->field->codeset.encoding); - + // This next conversion may be overkill. But just in case + // the encodings of the two variables are different, we are + // going to convert left-side text to the right-side encoding + cbl_encoding_t enc_left = lefty->field->codeset.encoding; + cbl_encoding_t enc_right = righty->field->codeset.encoding; size_t outlength; - char *converted = __gg__iconverter(enc_left, - enc_right, - lefty->field->data.initial, - strlen(lefty->field->data.initial)+1, - &outlength ); - + size_t inlength = strlen(lefty->field->data.initial); + char *converted = __gg__iconverter( + enc_left, + enc_right, + lefty->field->data.initial, + inlength, + &outlength ); gg_assign( return_int, gg_call_expr( INT, "__gg__literaln_alpha_compare", @@ -2458,7 +2460,7 @@ move_tree( cbl_field_t *dest, gg_call(VOID, "__gg__string_to_alpha_edited", location, - build_int_cst_type(INT, DEFAULT_CHARMAP_SOURCE), + build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING), psz_source, min_length, member(dest->var_decl_node, "picture"), @@ -3956,7 +3958,7 @@ parser_enter_program( const char *funcname_, if( strcmp(funcname_, "main") == 0 && this_module_has_main ) { - // setting 'retval' to 1 let's the caller know that we are being told + // Setting 'retval' to 1 lets the caller know that we are being told // both to synthesize a main() entry point to duplicate GCC's default // behavior, and to create an explicit entry point named "main". This will // eventually result in a link error (because of the duplicated entry @@ -4164,178 +4166,197 @@ parser_init_list() gg_call(VOID, "__gg__variables_to_init", gg_get_address_of(array), - wsclear() ? gg_string_literal(wsclear()) : null_pointer_node, + wsclear() ? build_string_literal(1, (const char *)wsclear()) + : null_pointer_node, NULL_TREE); } -static void -psa_FldLiteralN(struct cbl_field_t *field ) +static +FIXED_WIDE_INT(128) +dirty_to_binary(const char *instring, + uint32_t &capacity, + uint32_t &digits, + int32_t &rdigits, + uint64_t &attr) { - Analyze(); - SHOW_PARSE - { - SHOW_PARSE_HEADER - SHOW_PARSE_FIELD(" ", field) - SHOW_PARSE_END - } - // We are constructing a completely static constant structure, based on the - // text string in .initial - - CHECK_FIELD(field); + digits = 0; + rdigits = 0; + attr = 0; FIXED_WIDE_INT(128) value = 0; - do + // We need to convert data.initial to an FIXED_WIDE_INT(128) value + const char *p = instring; + int sign = 1; + if( *p == '-' ) { - // This is a false do{}while, to isolate the variables: + attr |= signable_e; + sign = -1; + p += 1; + } + else if( *p == '+' ) + { + // We set it signable so that the instruction DISPLAY +1 + // actually outputs "+1" + attr |= signable_e; + p += 1; + } - // We need to convert data.initial to an FIXED_WIDE_INT(128) value - char *p = const_cast(field->data.initial); - int sign = 1; - if( *p == '-' ) - { - field->attr |= signable_e; - sign = -1; - p += 1; - } - else if( *p == '+' ) - { - // We set it signable so that the instruction DISPLAY +1 - // actually outputs "+1" - field->attr |= signable_e; - p += 1; - } + // We need to be able to handle + // 123 + // 123.456 + // 123E + // 123.456E + // where can be N, +N and -N + // + // Oh, yeah, and we're talking handling up to 32 digits, or more, so using + // library routines is off the table. + + int rdigit_delta = 0; + int exponent = 0; + const char *exp = strchr(p, 'E'); + if( !exp ) + { + exp = strchr(p, 'e'); + } + if(exp) + { + exponent = atoi(exp+1); + } - // We need to be able to handle - // 123 - // 123.456 - // 123E - // 123.456E - // where can be N, +N and -N - // - // Oh, yeah, and we're talking handling up to 32 digits, or more, so using - // library routines is off the table. + // We can now calculate the value, and the number of digits and rdigits. - int digits = 0; - int rdigits = 0; - int rdigit_delta = 0; - int exponent = 0; + // We count up leading zeroes as part of the attr->digits calculation. + // It turns out that certain comparisons need to know the number of digits, + // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, + // we need to count up leading zeroes. - const char *exp = strchr(p, 'E'); - if( !exp ) + for(;;) + { + char ch = *p++; + if( ch == symbol_decimal_point() ) { - exp = strchr(p, 'e'); + rdigit_delta = 1; + continue; } - if(exp) + if( ch < '0' || ch > '9' ) { - exponent = atoi(exp+1); + break; } + digits += 1; + rdigits += rdigit_delta; + value *= 10; + value += ch - '0'; + } - // We can now calculate the value, and the number of digits and rdigits. - - // We count up leading zeroes as part of the attr->digits calculation. - // It turns out that certain comparisons need to know the number of digits, - // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, - // we need to count up leading zeroes. - - for(;;) + if( exponent < 0 ) + { + rdigits += -exponent; + } + else + { + while(exponent--) { - char ch = *p++; - if( ch == symbol_decimal_point() ) + if(rdigits) { - rdigit_delta = 1; - continue; + rdigits -= 1; } - if( ch < '0' || ch > '9' ) + else { - break; + digits += 1; + value *= 10; } - digits += 1; - rdigits += rdigit_delta; - value *= 10; - value += ch - '0'; } + } - if( exponent < 0 ) - { - rdigits += -exponent; - } - else - { - while(exponent--) - { - if(rdigits) - { - rdigits -= 1; - } - else - { - digits += 1; - value *= 10; - } - } - } + if( (int32_t)digits < rdigits ) + { + digits = rdigits; + } - if(digits < rdigits) - { - digits = rdigits; - } - field->data.digits = digits; - field->data.rdigits = rdigits; + // We now need to calculate the capacity. - // We now need to calculate the capacity. + unsigned int min_prec = wi::min_precision(value, UNSIGNED); + if( min_prec > 64 ) + { + // Bytes 15 through 8 are non-zero + capacity = 16; + } + else if( min_prec > 32 ) + { + // Bytes 7 through 4 are non-zero + capacity = 8; + } + else if( min_prec > 16 ) + { + // Bytes 3 and 2 + capacity = 4; + } + else if( min_prec > 8 ) + { + // Byte 1 is non-zero + capacity = 2; + } + else + { + // The value is zero through 0xFF + capacity = 1; + } - unsigned int min_prec = wi::min_precision(value, UNSIGNED); - int capacity; - if( min_prec > 64 ) - { - // Bytes 15 through 8 are non-zero - capacity = 16; - } - else if( min_prec > 32 ) - { - // Bytes 7 through 4 are non-zero - capacity = 8; - } - else if( min_prec > 16 ) - { - // Bytes 3 and 2 - capacity = 4; - } - else if( min_prec > 8 ) + value *= sign; + + // One last adjustment. The number is signable, so the binary value + // is going to be treated as twos complement. That means that the highest + // bit has to be 1 for negative signable numbers, and 0 for positive. If + // necessary, adjust capacity up by one byte so that the variable fits: + + if( capacity < 16 && (attr & signable_e) ) + { + FIXED_WIDE_INT(128) mask + = wi::set_bit_in_zero(capacity * 8 - 1); + if( wi::neg_p (value) && (value & mask) == 0 ) { - // Byte 1 is non-zero - capacity = 2; + capacity *= 2; } - else + else if( !wi::neg_p (value) && (value & mask) != 0 ) { - // The value is zero through 0xFF - capacity = 1; + capacity *= 2; } + } - value *= sign; + return value; + } - // One last adjustment. The number is signable, so the binary value - // is going to be treated as twos complement. That means that the highest - // bit has to be 1 for negative signable numbers, and 0 for positive. If - // necessary, adjust capacity up by one byte so that the variable fits: +static void +psa_FldLiteralN(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + // We are constructing a completely static constant structure, based on the + // text string in .initial - if( capacity < 16 && (field->attr & signable_e) ) - { - FIXED_WIDE_INT(128) mask - = wi::set_bit_in_zero(capacity * 8 - 1); - if( wi::neg_p (value) && (value & mask) == 0 ) - { - capacity *= 2; - } - else if( !wi::neg_p (value) && (value & mask) != 0 ) - { - capacity *= 2; - } - } - field->data.capacity = capacity; + CHECK_FIELD(field); - }while(0); + uint32_t capacity; + uint32_t digits; + int32_t rdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(), + capacity, + digits, + rdigits, + attr); + // This is a rare occurrence of a parser_xxx call changing the entry + // in the symbol table. + field->data.capacity = capacity; + field->data.digits = digits; + field->data.rdigits = rdigits; + field->attr |= attr; char base_name[257]; char id_string[32] = ""; @@ -5136,9 +5157,9 @@ parser_alphabet( const cbl_alphabet_t& alphabet ) // character i has the ordinal alphabet[i] unsigned char ch = i; - ach[ch] = (alphabet.alphabet[i]); + ach[ch] = (alphabet.collation_sequence[i]); gg_assign( gg_array_value(table256, ch), - build_int_cst_type(UCHAR, (alphabet.alphabet[i])) ); + build_int_cst_type(UCHAR, (alphabet.collation_sequence[i])) ); } unsigned int low_char = alphabet.low_char; @@ -6811,7 +6832,7 @@ parser_allocate(cbl_refer_t size_or_based, cbl_field_t *f_working = current_options().initial_working(); cbl_field_t *f_local = current_options().initial_local(); - int default_byte = wsclear() ? *wsclear() : -1; + unsigned int default_byte = wsclear() ? *wsclear() : (uint32_t)(-1); gg_call(VOID, "__gg__allocate", @@ -8201,7 +8222,7 @@ parser_label_label(struct cbl_label_t *label) } CHECK_LABEL(label); - + #if 1 // At the present time, label_verify.lay is returning true, so I edited // out the if( !... ) to quiet cppcheck @@ -8252,7 +8273,7 @@ parser_label_goto(struct cbl_label_t *label) } CHECK_LABEL(label); - + label_verify.go_to(label); label_verify.go_to(label); @@ -9933,6 +9954,44 @@ parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char ) } } +static +tree get_the_filename(bool "ed_name, const cbl_file_t *file) + { + // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. + // The runtime has a (char *)filename, so we need to + // do a runtime conversion. + + tree psz; // This is going to be either the name of the file, or the + // possible run-time environment variable that will contain + // the name of the file. + + cbl_field_t *field_of_name = symbol_field_forward(file->filename); + quoted_name = false; + if( field_of_name->type == FldForward ) + { + // The target of ASSIGN TO was unquoted, but didn't resolve to a + // cbl_field_t. This means that the name of the field is an + // environment variable that will hold the file name + psz = gg_define_char_star(); + gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); + } + else + { + // The name is coming from a presumably FldAlphaNumeric variable + psz = get_string_from(field_of_name); + gg_call( CHAR_P, + "__gg__convert_encoding", + psz, + build_int_cst_type(INT, + field_of_name->codeset.encoding), + build_int_cst_type(INT, + DEFAULT_SOURCE_ENCODING), + NULL_TREE); + quoted_name = true; + } + return psz; + } + void parser_file_open( struct cbl_file_t *file, int mode_char ) { @@ -9985,45 +10044,15 @@ parser_file_open( struct cbl_file_t *file, int mode_char ) TRACE1_END } - // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. - // The runtime has a (char *)filename, so we need to - // do a runtime conversion. - - tree psz; // This is going to be either the name of the file, or the - // possible run-time environment variable that will contain - // the name of the file. - - cbl_field_t *field_of_name = symbol_field_forward(file->filename); - bool quoted_name = false; - if( field_of_name->type == FldForward ) - { - // The target of ASSIGN TO was unquoted, but didn't resolve to a - // cbl_field_t. This means that the name of the field is an - // environment variable that will hold the file name - psz = gg_define_char_star(); - gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); - } - else - { - // The name is coming from a presumably FldAlphaNumeric variable - psz = get_string_from(field_of_name); - gg_call( CHAR_P, - "__gg__convert_encoding", - psz, - build_int_cst_type(INT, - field_of_name->codeset.encoding), - build_int_cst_type(INT, - DEFAULT_CHARMAP_SOURCE), - NULL_TREE); - quoted_name = true; - } + bool quoted_name; + tree pszFilename = get_the_filename(quoted_name, file); sv_is_i_o = true; store_location_stuff("OPEN"); gg_call(VOID, "__gg__file_open", gg_get_address_of(file->var_decl_node), - psz, + pszFilename, build_int_cst_type(INT, mode_char), quoted_name ? integer_one_node : integer_zero_node, NULL_TREE); @@ -10384,6 +10413,121 @@ parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ ) } } +static void +set_up_delete_file_label(cbl_label_t *delete_file_label) + { + if( delete_file_label ) + { + if( !delete_file_label->structs.delete_file ) + { + delete_file_label->structs.delete_file + = static_cast + (xmalloc(sizeof(struct cbl_delete_file_t))); + // Set up the address pairs for this clause + gg_create_goto_pair( + &delete_file_label->structs.delete_file->over.go_to, + &delete_file_label->structs.delete_file->over.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->exception.go_to, + &delete_file_label->structs.delete_file->exception.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->no_exception.go_to, + &delete_file_label->structs.delete_file->no_exception.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->bottom.go_to, + &delete_file_label->structs.delete_file->bottom.label); + } + } + } + +void +parser_file_delete_file( cbl_label_t *name, + std::vector filenames ) + { + // This removes a file from the file system. It is distinct from the + // FILE DELETE statement, which deletes a record from a file. + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + for(size_t i=0; iname) + } + SHOW_PARSE_END + } + set_up_delete_file_label(name); + tree there_was_an_error = gg_define_int(0); + for(size_t i=0; ivar_decl_node), + pszFilename, + quoted_name ? integer_one_node : integer_zero_node, + NULL_TREE))); + set_user_status(filenames[i]); + } + IF( there_was_an_error, eq_op, integer_zero_node ) + { + // There was no error detected. + gg_append_statement(name->structs.delete_file->no_exception.go_to); + } + ELSE + { + // There was an error detected. + gg_append_statement(name->structs.delete_file->exception.go_to); + } + } + +void +parser_file_delete_on_exception( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.go_to); + gg_append_statement(name->structs.delete_file->exception.label); + } + +void +parser_file_delete_not_exception( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.go_to); + gg_append_statement(name->structs.delete_file->no_exception.label); + } + +void +parser_file_delete_end( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.label); + } + void parser_file_rewrite(cbl_file_t *file, cbl_field_t *record_area, @@ -13639,7 +13783,7 @@ parser_call( cbl_refer_t name, create_and_call(narg, args, NULL_TREE, - name.field->data.initial, + name.field->data.original(), returned_value_type, returned, not_except); @@ -13747,7 +13891,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args ) { SHOW_PARSE_HEADER SHOW_PARSE_TEXT(" ") - SHOW_PARSE_TEXT(name->data.initial) + SHOW_PARSE_TEXT(name->data.original()) SHOW_PARSE_END } @@ -13756,7 +13900,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args ) // Get the name of the ENTRY point. // cppcheck-suppress nullPointerRedundantCheck - char *psz = cobol_name_mangler(name->data.initial); + char *psz = cobol_name_mangler(name->data.original()); // Create a goto/label pair. The label will be set up here; the goto will // be used when we re-enter the containing function: @@ -14642,13 +14786,12 @@ mh_source_is_literalN(cbl_refer_t &destref, SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move") } - // We know that the encoding of the literal::initial is in ASCII - // We need the data sent to __gg__psz_to_alpha_move to be in the // encoding of the destination size_t charsout; - const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + const char *converted = __gg__iconverter( + sourceref.field->codeset.encoding, destref.field->codeset.encoding, sourceref.field->data.initial, strlen(sourceref.field->data.initial), @@ -16086,54 +16229,50 @@ real_powi10 (uint32_t x) return pow10; } +static char * -binary_initial_from_float128(cbl_field_t *field, int rdigits, - REAL_VALUE_TYPE value) +binary_initial(cbl_field_t *field) { // This routine returns an xmalloced buffer designed to replace the // data.initial member of the incoming field char *retval = NULL; - // We need to adjust value so that it has no decimal places - if( rdigits ) + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(), + capacity, + ddigits, + drdigits, + attr); + int scaled_rdigits = get_scaled_rdigits(field); + + int i = field->data.rdigits; + while( i<0 ) { - REAL_VALUE_TYPE pow10 = real_powi10 (rdigits); - real_arithmetic (&value, MULT_EXPR, &value, &pow10); - real_convert (&value, TYPE_MODE (float128_type_node), &value); + value128 = value128/10; + i += 1; } - // We need to make sure that the resulting string will fit into - // a number with 'digits' digits - // Keep in mind that pure binary types, like BINARY-CHAR, have no digits - if( field->data.digits ) + // We take the digits of value128, and put them into ach. We line up + // the rdigits, and we truncate the string after desired_digits + while(drdigits < scaled_rdigits) { - REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits); - mpfr_t m0, m1; - - mpfr_inits2 (REAL_MODE_FORMAT (TYPE_MODE (float128_type_node))->p, - m0, m1, NULL); - mpfr_from_real (m0, &value, MPFR_RNDN); - mpfr_from_real (m1, &pow10, MPFR_RNDN); - mpfr_clear_flags (); - mpfr_fmod (m0, m0, m1, MPFR_RNDN); - real_from_mpfr (&value, m0, - REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)), - MPFR_RNDN); - real_convert (&value, TYPE_MODE (float128_type_node), &value); - mpfr_clears (m0, m1, NULL); + value128 *= 10; + drdigits += 1; + } + while(drdigits > scaled_rdigits) + { + value128 = value128 / 10; + drdigits -= 1; } - - real_roundeven (&value, TYPE_MODE (float128_type_node), &value); - - bool fail = false; - FIXED_WIDE_INT(128) i - = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED); retval = static_cast(xmalloc(field->data.capacity)); gcc_assert(retval); switch(field->data.capacity) { - tree type; + tree type; case 1: case 2: case 4: @@ -16141,12 +16280,12 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits, case 16: type = build_nonstandard_integer_type ( field->data.capacity * BITS_PER_UNIT, 0); - native_encode_wide_int (type, i, PTRCAST(unsigned char, retval), + native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval), field->data.capacity); break; default: fprintf(stderr, - "Trouble in binary_initial_from_float128 at %s() %s:%d\n", + "Trouble in binary_initial at %s() %s:%d\n", __func__, __FILE__, __LINE__); @@ -16157,6 +16296,60 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits, return retval; } +static void +digits_from_int128( char *ach, + cbl_field_t *field, + uint32_t desired_digits, + FIXED_WIDE_INT(128) value128, // cppcheck-suppress unknownMacro + int32_t rdigits) + { + if( value128 < 0 ) + { + value128 = -value128; + } + + // 'rdigits' are the number of rdigits in value128. + + int scaled_rdigits = get_scaled_rdigits(field); + + int i = field->data.rdigits; + while( i<0 ) + { + value128 = value128/10; + i += 1; + } + + // We take the digits of value128, and put them into ach. We line up + // the rdigits, and we truncate the string after desired_digits + while(rdigits < scaled_rdigits) + { + value128 *= 10; + rdigits += 1; + } + while(rdigits > scaled_rdigits) + { + value128 = value128 / 10; + rdigits -= 1; + } + char conv[128]; + print_dec (value128, conv, SIGNED); + size_t len = strlen(conv); + + if( lenname, (double)value, ach); - gcc_assert( strlen(ach) <= field->data.digits ); if( strlen(ach) < width ) { @@ -16203,6 +16394,7 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits } strcpy(retval + (width-strlen(ach)), ach); } +#endif static char * initial_from_initial(cbl_field_t *field) @@ -16211,10 +16403,9 @@ initial_from_initial(cbl_field_t *field) // This routine returns an xmalloced buffer that is intended to replace the // data.initial member of the incoming field. - //fprintf(stderr, "initial_from_initial %s\n", field->name); + //fprintf(stderr, " %s\n", field->name); char *retval = NULL; - int rdigits; // Let's handle the possibility of a figurative constant cbl_figconst_t figconst = cbl_figconst_of(field->data.initial); @@ -16253,6 +16444,8 @@ initial_from_initial(cbl_field_t *field) if( field->data.etc_type == cbl_field_data_t::value_e ) value = TREE_REAL_CST (field->data.value_of ()); +#if 0 + int rdigits; // There is always the infuriating possibility of a P-scaled number if( field->attr & scaled_e ) { @@ -16288,17 +16481,18 @@ initial_from_initial(cbl_field_t *field) // Not P-scaled rdigits = field->data.rdigits; } +#endif switch(field->type) { case FldNumericBin5: case FldIndex: - retval = binary_initial_from_float128(field, rdigits, value); + retval = binary_initial(field); break; case FldNumericBinary: { - retval = binary_initial_from_float128(field, rdigits, value); + retval = binary_initial(field); size_t left = 0; size_t right = field->data.capacity - 1; while(left < right) @@ -16328,7 +16522,17 @@ initial_from_initial(cbl_field_t *field) negative = false; } - digits_from_float128(ach, field, field->data.digits, rdigits, value); + // Convert the data.initial to a __int128 + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial, + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, field->data.digits, value128, drdigits); const char *digits = ach; if( (field->attr & signable_e) @@ -16404,7 +16608,16 @@ initial_from_initial(cbl_field_t *field) size_t ndigits = (field->attr & separate_e) ? field->data.capacity * 2 : field->data.capacity * 2 - 1; - digits_from_float128(ach, field, ndigits, rdigits, value); + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial, + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, ndigits, value128, drdigits); const char *digits = ach; for(size_t i=0; idata.capacity; - digits_from_float128(ach, field, ndigits, rdigits, value); - /* ??? This resides in libgcobol valconv.cc. */ + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(), + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, ndigits, value128, drdigits); + + // __gg__string_to_numeric_edited operates in ASCII space: __gg__string_to_numeric_edited( retval, ach, field->data.rdigits, negative, field->data.picture); + // So now we convert it to the target encoding: + size_t nbytes; + const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING, + field->codeset.encoding, + retval, + strlen(retval), + &nbytes); + strcpy(retval, converted); } } break; @@ -16556,10 +16787,32 @@ initial_from_initial(cbl_field_t *field) case FldLiteralN: { -//// retval = static_cast(xmalloc(field->data.capacity+1)); -//// gcc_assert(retval); -//// memcpy(retval, field->data.initial, field->data.capacity); -//// retval[field->data.capacity] = '\0'; + // This requires annotation. + + // The compiler originally used ASCII for field->data.initial. Later we + // expanded the field with the addition of the codeset.encoding + // For consistency in the parser processing, the FldLiteralN is arriving + // with the Object-Computer's character encoding, and field->data.initial + // is showing up encoded. + + // But on the run-time side, if the initial string is needed, it is + // invariably more useful in ASCII. Consider converting that string to + // a floating-point value, for example. + + // So, we are going to convert the data.initial string back to ASCII + // here. Later on, when we establish the run-time encoding, we will + // check for FldLiteralN and set that to ASCII as well. See + // actually_create_the_static_field(). + + size_t nbytes; + const char *converted = __gg__iconverter(field->codeset.encoding, + DEFAULT_SOURCE_ENCODING, + field->data.initial, + strlen(field->data.initial), + &nbytes); + retval = static_cast(xmalloc(strlen(field->data.initial)+1)); + gcc_assert(retval); + strcpy(retval, converted); break; } @@ -16716,9 +16969,14 @@ actually_create_the_static_field( cbl_field_t *new_var, next_field = TREE_CHAIN(next_field); // INT, "encoding", + // For FldLiteralN we force the encoding to be ASCII. + // See initial_from_initial() for an explanation. CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr), next_field, - build_int_cst_type(INT, new_var->codeset.encoding)); + build_int_cst_type(INT, + new_var->type == FldLiteralN ? + DEFAULT_SOURCE_ENCODING + : new_var->codeset.encoding)); next_field = TREE_CHAIN(next_field); // INT, "alphabet", @@ -17643,6 +17901,10 @@ parser_symbol_add(struct cbl_field_t *new_var ) length_of_initial_string = new_var->data.capacity+1; break; + case FldLiteralN: + length_of_initial_string = strlen(new_initial)+1; + break; + default: length_of_initial_string = new_var->data.capacity; break; diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h index 6582d2e8898..802bba75944 100644 --- a/gcc/cobol/genapi.h +++ b/gcc/cobol/genapi.h @@ -400,6 +400,12 @@ parser_file_rewrite( cbl_file_t *file, cbl_field_t *field, void parser_file_delete( cbl_file_t *file, bool sequentially ); +void parser_file_delete_file( cbl_label_t *name, + std::vector filenames ); +void parser_file_delete_on_exception( cbl_label_t *name ); +void parser_file_delete_not_exception( cbl_label_t *name ); +void parser_file_delete_end( cbl_label_t *name ); + #if condition_lists struct cbl_conditional_t { cbl_field_t *tgt; diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc index 320e6bf4366..7d6ae8c93d4 100644 --- a/gcc/cobol/genmath.cc +++ b/gcc/cobol/genmath.cc @@ -42,6 +42,7 @@ #include "gengen.h" #include "structs.h" #include "../../libgcobol/gcobolio.h" +#include "../../libgcobol/charmaps.h" #include "show_parse.h" void diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 56b6b83728b..63f37f68806 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -1744,7 +1744,7 @@ get_literal_string(cbl_field_t *field) char *buffer = static_cast(xcalloc(1, buffer_length)); size_t charsout; - const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING, field->codeset.encoding, field->data.initial, field->data.capacity, diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 9187a59a3cf..d54a686511f 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -51,7 +51,7 @@ accept_envar_e, }; - struct collating_an_t { + struct coll_alphanat_t { const char *alpha, *national; }; @@ -575,7 +575,7 @@ class locale_tgt_t { RD RECORD RECORDING RECORDS RECURSIVE REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS - REPOSITORY RERUN RESERVE RESTRICTED RESUME + REPOSITORY RERUN RESERVE RESTRICTED RESUME RETRY REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN SAME SCREEN SD @@ -702,8 +702,8 @@ class locale_tgt_t { %type open_io alphabet_etc %type device_name %type numed context_word ctx_name locale_spec -%type collating_sequences collating_ans -%type collating_an +%type char_class_locales coll_alphanats +%type coll_alphanat %type namestr alphabet_lit program_as repo_as %type perform_cond kind_of_name %type alloc_ret @@ -738,6 +738,9 @@ class locale_tgt_t { relative_key_clause reserve_clause sharing_clause %type filename read_body write_body delete_body +%type