From: Robert Dubner Date: Fri, 10 Oct 2025 15:35:44 +0000 (-0400) Subject: cobol: Convert to individual variable character-set encoding. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0e95ebf465c2c05be8cddb8d4659c7b26b1e1a19;p=thirdparty%2Fgcc.git cobol: Convert to individual variable character-set encoding. Prior to this "patch", the GCOBOL compiler was capable of producing binaries that operated internally in either ASCII or EBCDIC. The COBOL specification, however, allows for the concurrent presence of two encodings, known as "alphanumeric" and "national". In order to support this capability, we have chosen to establish an "encoding" characteristic that gets carried along with every variable, This change affected many parts of the COBOL front end compiler. If anybody looks at any of the changes listed below, they will find they fall into two classes: 1) Removing the dependence on a global ASCII vs EBCDIC determination. 2) Adding a dependence on a new ::encoding characteristic of the compile time and run time cbl_field_t and cblc_field_t variable structures. Those structures now contain the cbl_encoding_t ::encoding members, which drive the use of the iconv(3) function in moving back and forth between variable encodings. Although the effort is not complete, these changes represent the bulk of what needs to be done. With these changes in place, all of our current ASCII and EBCDIC tests run properly. gcc/cobol/ChangeLog: * cdf.y: In support of the described changes. * gcobol.1: Likewise. * genapi.cc (level_88_helper): Likewise. (get_level_88_domain): Likewise. (get_class_condition_string): Likewise. (initialize_variable_internal): Likewise. (gg_default_qualification): Likewise. (cobol_compare): Likewise. (move_tree): Likewise. (move_tree_to_field): Likewise. (psa_FldBlob): Likewise. (parser_accept_date_yymmdd): Likewise. (parser_accept_date_yyyymmdd): Likewise. (parser_accept_date_yyddd): Likewise. (parser_accept_date_yyyyddd): Likewise. (parser_accept_date_dow): Likewise. (parser_accept_date_hhmmssff): Likewise. (parser_alphabet): Likewise. (parser_alphabet_use): Likewise. (parser_display_internal): Likewise. (parser_display): Likewise. (is_valuable): Likewise. (parser_division): Likewise. (parser_relop_long): Likewise. (parser_setop): Likewise. (parser_set_conditional88): Likewise. (parser_file_add): Likewise. (parser_file_open): Likewise. (create_and_call): Likewise. (parser_call): Likewise. (mh_identical): Likewise. (mh_source_is_literalN): Likewise. (picky_memcpy): Likewise. (mh_numeric_display): Likewise. (mh_source_is_group): Likewise. (mh_source_is_literalA): Likewise. (move_helper): Likewise. (initial_from_initial): Likewise. (actually_create_the_static_field): Likewise. (psa_FldLiteralA): Likewise. (parser_symbol_add): Likewise. * genmath.cc (arithmetic_operation): Likewise. * genutil.cc (get_binary_value): Likewise. (get_literal_string): Likewise. * genutil.h (EBCDIC_MINUS): Likewise. (EBCDIC_PLUS): Likewise. (EBCDIC_ZERO): Likewise. (EBCDIC_NINE): Likewise. * parse.y: Likewise. * parse_ante.h (name_of): Likewise. (class prog_descr_t): Likewise. (current_encoding): Likewise. (needs_picture): Likewise. (is_callable): Likewise. (field_attr_str): Likewise. (value_encoding_check): Likewise. (field_alloc): Likewise. (file_add): Likewise. * scan.l: Likewise. * structs.cc (create_cblc_field_t): Likewise. * symbols.cc (elementize): Likewise. (cbl_field_attr_str): Likewise. (is_variable_length): Likewise. (field_str): Likewise. (extend_66_capacity): Likewise. (assert): Likewise. (symbols_update): Likewise. (symbol_field_parent_set): Likewise. (add_token): Likewise. (symbol_table_init): Likewise. (symbol_field_add): Likewise. (symbol_field_forward_add): Likewise. (symbol_field_same_as): Likewise. (cbl_alphabet_t::reencode): Likewise. (new_temporary_impl): Likewise. (parser_symbol_add2): Likewise. (new_literal_add): Likewise. (temporaries_t::literal): Likewise. (new_literal): Likewise. (standard_internal): Likewise. (new_temporary): Likewise. (cbl_field_t::holds_ascii): Likewise. (cbl_field_t::is_ascii): Likewise. (cbl_field_t::internalize): Likewise. (symbol_label_add): Likewise. (symbol_label_section_exists): Likewise. (cbl_occurs_t::subscript_ok): Likewise. (cbl_file_t::deforward): Likewise. (has_value): Likewise. * symbols.h (is_numeric): Likewise. (__gg__encoding_iconv_name): Likewise. (current_encoding): Likewise. (struct cbl_field_t): Likewise. (new_literal): Likewise. (class temporaries_t): Likewise. (struct function_descr_t): Likewise. (hex_decode): Likewise. (struct cbl_alphabet_t): Likewise. (struct cbl_file_t): Likewise. * symfind.cc (field_structure): Likewise. (erase_symbol_map_fwds): Likewise. (symbol_find): Likewise. * token_names.h: Likewise. * util.cc (cbl_field_type_str): Likewise. (is_elementary): Likewise. (symbol_field_type_update): Likewise. (cbl_field_t::report_invalid_initial_value): Likewise. (valid_move): Likewise. (valid_picture): Likewise. (type_capacity): Likewise. (gcc_location_set_impl): Likewise. (cbl_unimplementedw): Likewise. libgcobol/ChangeLog: * charmaps.cc (raw_is_SBC): Likewise. (extract_next_code_point): Likewise. (flipper): Likewise. (__gg__ascii_to_ascii_chr): Likewise. (__gg__ascii_to_ebcdic_chr): Likewise. (__gg__raw_to_ascii): Likewise. (__gg__raw_to_ebcdic): Likewise. (convert_cp1252_to_utf8): Likewise. (__gg__text_conversion_override): Likewise. (__gg__ascii_to_ascii): Likewise. (__gg__encoding_iconv_name): Likewise. (__gg__encoding_iconv_type): Likewise. (__gg__ascii_to_ebcdic): Likewise. (__gg__iconverter): Likewise. (__gg__ebcdic_to_ascii): Likewise. (__gg__ascii_to_console): Likewise. (__gg__ebcdic_to_console): Likewise. (__gg__console_to_ascii): Likewise. (__gg__console_to_ebcdic): Likewise. (_to_ctype): Likewise. (_from_ctype): Likewise. (__gg__get_charmap): Likewise. * charmaps.h (internal_is_ebcdic): Likewise. (internal_space): Likewise. (internal_zero): Likewise. (internal_period): Likewise. (internal_comma): Likewise. (internal_dquote): Likewise. (internal_asterisk): Likewise. (internal_plus): Likewise. (internal_minus): Likewise. (internal_cr): Likewise. (internal_ff): Likewise. (internal_newline): Likewise. (internal_return): Likewise. (internal_0): Likewise. (internal_1): Likewise. (internal_2): Likewise. (internal_3): Likewise. (internal_4): Likewise. (internal_5): Likewise. (internal_6): Likewise. (internal_7): Likewise. (internal_8): Likewise. (internal_9): Likewise. (internal_colon): Likewise. (internal_query): Likewise. (internal_A): Likewise. (internal_B): Likewise. (internal_C): Likewise. (internal_D): Likewise. (internal_E): Likewise. (internal_F): Likewise. (internal_G): Likewise. (internal_H): Likewise. (internal_I): Likewise. (internal_J): Likewise. (internal_K): Likewise. (internal_L): Likewise. (internal_M): Likewise. (internal_N): Likewise. (internal_O): Likewise. (internal_P): Likewise. (internal_Q): Likewise. (internal_R): Likewise. (internal_S): Likewise. (internal_T): Likewise. (internal_U): Likewise. (internal_V): Likewise. (internal_W): Likewise. (internal_X): Likewise. (internal_Y): Likewise. (internal_Z): Likewise. (internal_a): Likewise. (internal_b): Likewise. (internal_c): Likewise. (internal_d): Likewise. (internal_e): Likewise. (internal_f): Likewise. (internal_g): Likewise. (internal_h): Likewise. (internal_i): Likewise. (internal_j): Likewise. (internal_k): Likewise. (internal_l): Likewise. (internal_m): Likewise. (internal_n): Likewise. (internal_o): Likewise. (internal_p): Likewise. (internal_q): Likewise. (internal_r): Likewise. (internal_s): Likewise. (internal_t): Likewise. (internal_u): Likewise. (internal_v): Likewise. (internal_w): Likewise. (internal_x): Likewise. (internal_y): Likewise. (internal_z): Likewise. (enum text_codeset_t): Likewise. (__gg__ascii_to_ascii_chr): Likewise. (__gg__ascii_to_ebcdic_chr): Likewise. (ascii_to_internal): Likewise. (__gg__ascii_to_ascii): Likewise. (__gg__ascii_to_ebcdic): Likewise. (ascii_to_internal_str): Likewise. (__gg__raw_to_ascii): Likewise. (__gg__raw_to_ebcdic): Likewise. (raw_to_internal): Likewise. (__gg__ascii_to_console): Likewise. (__gg__ebcdic_to_console): Likewise. (internal_to_console): Likewise. (__gg__console_to_ascii): Likewise. (__gg__console_to_ebcdic): Likewise. (console_to_internal): Likewise. (__gg__ebcdic_to_ascii): Likewise. (internal_to_ascii): Likewise. (__gg__encoding_iconv_name): Likewise. (__gg__encoding_iconv_type): Likewise. (__gg__iconverter): Likewise. (DEFAULT_CHARMAP_SOURCE): Likewise. (class charmap_t): Likewise. (__gg__get_charmap): Likewise. * common-defs.h (EBCDIC_MINUS): Likewise. (EBCDIC_PLUS): Likewise. (EBCDIC_ZERO): Likewise. (EBCDIC_NINE): Likewise. (PACKED_NYBBLE_PLUS): Likewise. (PACKED_NYBBLE_MINUS): Likewise. (PACKED_NYBBLE_UNSIGNED): Likewise. (NUMERIC_DISPLAY_SIGN_BIT_EBCDIC): Likewise. (NUMERIC_DISPLAY_SIGN_BIT): Likewise. (SEPARATE_PLUS): Likewise. (SEPARATE_MINUS): Likewise. (ZONED_ZERO): Likewise. (ZONE_SIGNED_EBCDIC): Likewise. (enum cbl_field_type_t): Likewise. (enum cbl_field_attr_t): Likewise. (enum cbl_figconst_t): Likewise. (enum cbl_encoding_t): Likewise. * constants.cc (struct cblc_field_t): Likewise. (X): Likewise. (S9): Likewise. * gcobolio.h: Likewise. * gfileio.cc (get_filename): Likewise. (relative_file_delete): Likewise. (relative_file_start): Likewise. (relative_file_rewrite): Likewise. (relative_file_write_varying): Likewise. (relative_file_write): Likewise. (sequential_file_write): Likewise. (line_sequential_file_read): Likewise. (sequential_file_read): Likewise. (relative_file_read): Likewise. (file_indexed_open): Likewise. (__gg__file_reopen): Likewise. (__io__file_open): Likewise. (__io__file_close): Likewise. (__gg__file_open): Likewise. * intrinsic.cc (trim_trailing_spaces): Likewise. (is_zulu_format): Likewise. (string_to_dest): Likewise. (get_all_time): Likewise. (ftime_replace): Likewise. (__gg__char): Likewise. (__gg__current_date): Likewise. (__gg__seconds_past_midnight): Likewise. (__gg__formatted_current_date): Likewise. (__gg__formatted_date): Likewise. (__gg__formatted_datetime): Likewise. (__gg__formatted_time): Likewise. (__gg__lower_case): Likewise. (numval): Likewise. (numval_c): Likewise. (__gg__ord): Likewise. (__gg__trim): Likewise. (__gg__random): Likewise. (__gg__random_next): Likewise. (__gg__reverse): Likewise. (__gg__upper_case): Likewise. (__gg__when_compiled): Likewise. (gets_int): Likewise. (gets_year): Likewise. (gets_month): Likewise. (gets_day): Likewise. (gets_day_of_week): Likewise. (gets_day_of_year): Likewise. (gets_week): Likewise. (gets_hours): Likewise. (gets_minutes): Likewise. (gets_seconds): Likewise. (gets_nanoseconds): Likewise. (fill_cobol_tm): Likewise. (__gg__hex_of): Likewise. (floating_format_tester): Likewise. (__gg__numval_f): Likewise. (__gg__test_numval_f): Likewise. (strcasestr): Likewise. (strlaststr): Likewise. (__gg__locale_compare): Likewise. (__gg__locale_date): Likewise. (__gg__locale_time): Likewise. (__gg__locale_time_from_seconds): Likewise. * libgcobol.cc (struct program_state): Likewise. (turn_sign_bit_on): Likewise. (turn_sign_bit_off): Likewise. (is_sign_bit_on): Likewise. (__gg__string_to_alpha_edited_ascii): Likewise. (int128_to_field): Likewise. (edited_to_binary): Likewise. (get_binary_value_local): Likewise. (__gg__get_date_yymmdd): Likewise. (__gg__get_date_yyyymmdd): Likewise. (__gg__get_date_yyddd): Likewise. (__gg__get_yyyyddd): Likewise. (__gg__get_date_dow): Likewise. (__gg__get_date_hhmmssff): Likewise. (__gg__dirty_to_binary_internal): Likewise. (__gg__dirty_to_binary): Likewise. (__gg__dirty_to_float): Likewise. (psz_to_internal): Likewise. (get_scaled_rdigits): Likewise. (format_for_display_internal): Likewise. (format_for_display_local): Likewise. (compare_88): Likewise. (compare_field_class): Likewise. (compare_strings): Likewise. (__gg__compare_2): Likewise. (init_var_both): Likewise. (alpha_to_alpha_move_from_location): Likewise. (alpha_to_alpha_move): Likewise. (__gg__move): Likewise. (__gg__move_literala): Likewise. (normalize_id): Likewise. (inspect_backward_format_1): Likewise. (__gg__inspect_format_1): Likewise. (inspect_backward_format_2): Likewise. (__gg__inspect_format_2): Likewise. (normalize_for_inspect_format_4): Likewise. (__gg__inspect_format_4): Likewise. (move_string): Likewise. (brute_force_trim): Likewise. (__gg__string): Likewise. (display_both): Likewise. (__gg__display_string): Likewise. (not_mangled_core): Likewise. (__gg__accept): Likewise. (__gg__set_initial_switch_value): Likewise. (__gg__onetime_initialization): Likewise. (is_numeric_display_numeric): Likewise. (is_alpha_a_number): Likewise. (__gg__classify): Likewise. (__gg__convert_encoding): Likewise. (__gg__convert_encoding_length): Likewise. (accept_envar): Likewise. (__gg__accept_envar): Likewise. (__gg__set_envar): Likewise. (__gg__get_argc): Likewise. (__gg__get_argv): Likewise. (__gg__get_command_line): Likewise. (__gg__alphabet_use): Likewise. (__gg__ascii_to_internal_field): Likewise. (__gg__ascii_to_internal): Likewise. (__gg__console_to_internal): Likewise. (__gg__parser_set_conditional): Likewise. (__gg__internal_to_console_in_place): Likewise. (__gg__literaln_alpha_compare): Likewise. (__gg__unstring): Likewise. (struct cbl_exception_t): Likewise. (__gg__codeset_figurative_constants): Likewise. (__gg__function_handle_from_cobpath): Likewise. (__gg__just_mangle_name): Likewise. (__gg__function_handle_from_name): Likewise. (get_the_byte): Likewise. (__gg__set_env_name): Likewise. (__gg__get_env_name): Likewise. (__gg__get_env_value): Likewise. (__gg__set_env_value): Likewise. (__gg__fprintf_stderr): Likewise. (__gg__accept_arg_value): Likewise. (__gg__fc_char): Likewise. * libgcobol.h (__gg__dirty_to_binary_internal): Likewise. (__gg__dirty_to_binary): Likewise. (__gg__internal_to_console_in_place): Likewise. (__gg__fc_char): Likewise. (__gg__convert_encoding): Likewise. (__gg__convert_encoding_length): Likewise. * stringbin.cc (string_from_combined): Likewise. (__gg__binary_to_string_internal): Likewise. (__gg__binary_to_string_encoded): Likewise. (__gg__numeric_display_to_binary): Likewise. (__gg__packed_to_binary): Likewise. * stringbin.h (__gg__binary_to_string_internal): Likewise. (__gg__binary_to_string_encoded): Likewise. (__gg__numeric_display_to_binary): Likewise. * valconv.cc (__gg__alphabet_create): Likewise. (__gg__string_to_numeric_edited): Likewise. (__gg__string_to_alpha_edited): Likewise. (__gg__remove_trailing_zeroes): Likewise. * valconv.h (__VALCONV_H): Likewise. * encodings.h: New file. gcc/testsuite/ChangeLog: * cobol.dg/group1/check_88.cob: Likewise. --- diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y index 53fea5d894c..f01c8f6848a 100644 --- a/gcc/cobol/cdf.y +++ b/gcc/cobol/cdf.y @@ -198,7 +198,7 @@ apply_cdf_turn( const exception_turn_t& turn ) { %type namelit name_any name_one %type name subscript subscripts inof %token BOOL -%token FEATURE 365 NUMBER 303 EXCEPTION_NAME 280 "EXCEPTION NAME" +%token FEATURE 366 NUMBER 303 EXCEPTION_NAME 280 "EXCEPTION NAME" %type cdf_expr %type cdf_relexpr cdf_reloper cdf_and cdf_bool_expr @@ -210,55 +210,55 @@ apply_cdf_turn( const exception_turn_t& turn ) { %type cdf_stackable -%token BY 486 -%token COPY 362 -%token CDF_DISPLAY 384 ">>DISPLAY" -%token IN 605 +%token BY 487 +%token COPY 363 +%token CDF_DISPLAY 385 ">>DISPLAY" +%token IN 606 %token NAME 286 %token NUMSTR 305 "numeric literal" -%token OF 686 -%token PSEUDOTEXT 721 -%token REPLACING 743 +%token OF 687 +%token PSEUDOTEXT 723 +%token REPLACING 745 %token LITERAL 298 -%token SUPPRESS 376 - -%token LSUB 367 "(" -%token SUBSCRIPT 375 RSUB 372 ")" - -%token CDF_DEFINE 383 ">>DEFINE" -%token CDF_IF 385 ">>IF" -%token CDF_ELSE 386 ">>ELSE" -%token CDF_END_IF 387 ">>END-IF" -%token CDF_EVALUATE 388 ">>EVALUATE" -%token CDF_WHEN 389 ">>WHEN" -%token CDF_END_EVALUATE 390 ">>END-EVALUATE" - -%token ALL 450 -%token CALL_CONVENTION 391 ">>CALL-CONVENTION" -%token COBOL_WORDS 380 ">>COBOL-WORDS" -%token CDF_PUSH 394 ">>PUSH" -%token CDF_POP 395 ">>POP" -%token SOURCE_FORMAT 396 ">>SOURCE FORMAT" - -%token AS 468 CONSTANT 361 DEFINED 363 +%token SUPPRESS 377 + +%token LSUB 368 "(" +%token SUBSCRIPT 376 RSUB 373 ")" + +%token CDF_DEFINE 384 ">>DEFINE" +%token CDF_IF 386 ">>IF" +%token CDF_ELSE 387 ">>ELSE" +%token CDF_END_IF 388 ">>END-IF" +%token CDF_EVALUATE 389 ">>EVALUATE" +%token CDF_WHEN 390 ">>WHEN" +%token CDF_END_EVALUATE 391 ">>END-EVALUATE" + +%token ALL 451 +%token CALL_CONVENTION 392 ">>CALL-CONVENTION" +%token COBOL_WORDS 381 ">>COBOL-WORDS" +%token CDF_PUSH 395 ">>PUSH" +%token CDF_POP 396 ">>POP" +%token SOURCE_FORMAT 397 ">>SOURCE FORMAT" + +%token AS 469 CONSTANT 362 DEFINED 364 %type DEFINED -%token OTHER 698 PARAMETER_kw 368 "PARAMETER" -%token OFF 687 OVERRIDE 369 -%token THRU 939 -%token TRUE_kw 813 "True" +%token OTHER 699 PARAMETER_kw 369 "PARAMETER" +%token OFF 688 OVERRIDE 370 +%token THRU 952 +%token TRUE_kw 815 "True" -%token CALL_COBOL 392 "CALL" -%token CALL_VERBATIM 393 "CALL (as C)" +%token CALL_COBOL 393 "CALL" +%token CALL_VERBATIM 394 "CALL (as C)" -%token TURN 815 CHECKING 496 LOCATION 649 ON 689 WITH 841 +%token TURN 817 CHECKING 497 LOCATION 650 ON 690 WITH 844 -%left OR 940 -%left AND 941 -%right NOT 942 -%left '<' '>' '=' NE 943 LE 944 GE 945 +%left OR 953 +%left AND 954 +%right NOT 955 +%left '<' '>' '=' NE 956 LE 957 GE 958 %left '-' '+' %left '*' '/' -%right NEG 947 +%right NEG 960 %define api.prefix {ydf} %define api.token.prefix{YDF_} diff --git a/gcc/cobol/gcobol.1 b/gcc/cobol/gcobol.1 index ebb833c793b..9ea9bfd1478 100644 --- a/gcc/cobol/gcobol.1 +++ b/gcc/cobol/gcobol.1 @@ -1417,6 +1417,15 @@ it may contain several directory names separated by a colon .Ev COBPATH is searched first, followed by .Ev LD_LIBRARY_PATH . +Note that +.Ev COBPATH does not change where the runtime linker looks for +.Pa libgcobol.so +itself. +How the runtime linker searches for +.Pa libgcobol.so +when the executable loads is controlled by +.Xr ld.so 8 , +not libgcobol. .Pp Each directory is searched for files whose name ends in .Ql ".so" . diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 11242c19285..99dfc071f2a 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -366,7 +366,8 @@ static char * level_88_helper(size_t parent_capacity, const cbl_domain_elem_t &elem, - size_t &returned_size) + size_t &returned_size, + cbl_encoding_t encoding) { // We return a MALLOCed return value, which the caller must free. char *retval = static_cast(xmalloc(parent_capacity + 64)); @@ -419,11 +420,20 @@ level_88_helper(size_t parent_capacity, memcpy(first_name, elem.name(), first_name_length); first_name[first_name_length] = '\0'; - // Convert it to EBCDIC, when necessary; leave it alone when not necessary. - for(size_t i=0; imapped_character(first_name[i]); } + ///////////////// end of conversion if( parent_capacity == 0 ) { @@ -505,7 +515,10 @@ get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_s char *stream; // Do the first element of the domain - stream = level_88_helper(parent_capacity, domain->first, stream_len); + stream = level_88_helper( parent_capacity, + domain->first, + stream_len, + var->codeset.encoding); if( output_index + stream_len > retval_capacity ) { retval_capacity *= 2; @@ -518,7 +531,10 @@ get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_s free(stream); // Do the second element of the domain - stream = level_88_helper(parent_capacity, domain->last, stream_len); + stream = level_88_helper( parent_capacity, + domain->last, + stream_len, + var->codeset.encoding); if( output_index + stream_len > retval_capacity ) { retval_capacity *= 2; @@ -591,15 +607,9 @@ get_class_condition_string(cbl_field_t *var) uint8_t value1; uint8_t value2; - char achFirstName[256]; - char achLastName[256]; - size_t first_name_length = domain->first.size() ? domain->first.size() : strlen(domain->first.name()); - size_t last_name_length = domain->last.size() - ? domain->last.size() - : strlen(domain->last.name()); if( domain->first.is_numeric ) { @@ -629,21 +639,11 @@ get_class_condition_string(cbl_field_t *var) { // Since the first.name is a single character, we can do this as // a single-character pair. - uint8_t ch1; uint8_t ch2; - char *p2; - size_t one; - p2 = achFirstName; - one = 8; - raw_to_internal(&p2, &one, domain->last.name(), last_name_length); - ch2 = achFirstName[0]; - - p2 = achLastName; - one = 8; - raw_to_internal(&p2, &one, domain->first.name(), first_name_length); - ch1 = achLastName[0]; + ch2 = domain->last.name()[0]; + ch1 = domain->first.name()[0]; if( ch1 < ch2 ) { @@ -670,15 +670,12 @@ get_class_condition_string(cbl_field_t *var) // We are working with a string larger than 1 character. The COBOL // spec says there can't be a THROUGH, so we ignore the last.name: - char *p2; - size_t one; - p2 = achFirstName; - one = 8; - raw_to_internal(&p2, &one, domain->last.name(), last_name_length); - + // size_t first_name_length = domain->first.size() + // ? domain->first.size() + // : strlen(domain->first.name()); for(size_t i=0; ifirst.name()[i]); } } domain += 1; @@ -1232,11 +1229,6 @@ initialize_variable_internal( cbl_refer_t refer, return; } - if( parsed_var->type == FldBlob ) - { - return; - } - Analyze(); SHOW_PARSE { @@ -1663,12 +1655,6 @@ gg_attribute_bit_set(struct cbl_field_t *var, cbl_field_attr_t bits) } #pragma GCC diagnostic pop -static void -gg_default_qualification(struct cbl_field_t * /*var*/) - { -// gg_attribute_bit_clear(var, refmod_e); - } - static void depending_on_value(tree depending_on, cbl_field_t *current_sizer) @@ -2259,21 +2245,27 @@ cobol_compare( tree return_int, case FldLiteralA: { // Comparing a FldLiteralN to an alphanumeric - // It is the case that data.initial is in the original form seen - // in the source code, which means that even in EBCDIC mode the - // characters are in the "ASCII" state. - static size_t buffer_size = 0; - static char *buffer = NULL; - raw_to_internal(&buffer, - &buffer_size, - lefty->field->data.initial, - strlen(lefty->field->data.initial)); + // CONVERSION ALERT. lefty->field->data.initial is an ASCII + // string. We want to convert it to the same encoding as the + // right side. + + cbl_encoding_t enc_left = DEFAULT_CHARMAP_SOURCE; + cbl_encoding_t enc_right = + static_cast(righty->field->codeset.encoding); + + size_t outlength; + char *converted = __gg__iconverter(enc_left, + enc_right, + lefty->field->data.initial, + strlen(lefty->field->data.initial)+1, + &outlength ); gg_assign( return_int, gg_call_expr( INT, "__gg__literaln_alpha_compare", - gg_string_literal(buffer), + build_string_literal(strlen(lefty->field->data.initial)+1, + converted), gg_get_address_of(righty->field->var_decl_node), refer_offset(*righty), refer_size_source( *righty), @@ -2364,9 +2356,12 @@ cobol_compare( tree return_int, static void move_tree( cbl_field_t *dest, tree offset, - tree psz_source, - tree length_bump=integer_zero_node) // psz_source is a null-terminated string + tree psz_source, // psz_source is a null-terminated string + tree length_bump=integer_zero_node) { + // This routine assumes that the psz_source is in the same codeset as the + // dest. + Analyze(); SHOW_PARSE { @@ -2411,15 +2406,20 @@ move_tree( cbl_field_t *dest, { case FldGroup: case FldAlphanumeric: + { // Space out the alphanumeric destination: + charmap_t *charmap = __gg__get_charmap(dest->codeset.encoding); + gg_memset( location, - build_int_cst_type(INT, internal_space), + build_int_cst_type(INT, + charmap->mapped_character(ascii_space)), length ); // Copy the alphanumeric result over. gg_memcpy( location, psz_source, min_length ); break; + } case FldNumericDisplay: case FldNumericEdited: @@ -2433,8 +2433,9 @@ move_tree( cbl_field_t *dest, gg_assign(value, gg_call_expr( INT128, - "__gg__dirty_to_binary_internal", + "__gg__dirty_to_binary", psz_source, + build_int_cst_type(INT, dest->codeset.encoding), source_length, gg_get_address_of(rdigits), NULL_TREE)); @@ -2455,8 +2456,9 @@ move_tree( cbl_field_t *dest, case FldAlphaEdited: { gg_call(VOID, - "__gg__string_to_alpha_edited_ascii", + "__gg__string_to_alpha_edited", location, + build_int_cst_type(INT, DEFAULT_CHARMAP_SOURCE), psz_source, min_length, member(dest->var_decl_node, "picture"), @@ -2493,6 +2495,7 @@ move_tree( cbl_field_t *dest, static void move_tree_to_field(cbl_field_t *field, tree psz) { + // psz has to be in the same encoding as field move_tree(field, integer_zero_node, psz); } @@ -4341,55 +4344,6 @@ psa_FldLiteralN(struct cbl_field_t *field ) // wi::to_wide( DECL_INITIAL(new_var_decl) ) } -static void -psa_FldBlob(struct cbl_field_t *var ) - { - Analyze(); - SHOW_PARSE - { - SHOW_PARSE_HEADER - SHOW_PARSE_FIELD(" ", var) - SHOW_PARSE_END - } - - CHECK_FIELD(var); - - // We are constructing a completely static constant structure. We know the - // capacity. We'll create it from the data.initial. The var_decl_node will - // be a pointer to the data - - char base_name[257]; - char id_string[32] = ""; - - static size_t our_index = 0; - - sprintf(id_string, "." HOST_SIZE_T_PRINT_DEC, (fmt_size_t)++our_index); - strcpy(base_name, var->name); - strcat(base_name, id_string); - - // Build the constructor for the array of bytes - - tree array_type = build_array_type_nelts(UCHAR, var->data.capacity); - tree array_constructor = make_node(CONSTRUCTOR); - TREE_TYPE(array_constructor) = array_type; - TREE_STATIC(array_constructor) = 1; - TREE_CONSTANT(array_constructor) = 1; - - for(size_t i=0; idata.capacity; i++) - { - CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(array_constructor), - build_int_cst_type(INT, i), - build_int_cst_type(UCHAR, var->data.initial[i])); - } - - // The array constructor is ready to be used - tree var_decl_node = gg_define_variable( array_type, - base_name, - vs_static); - DECL_INITIAL(var_decl_node) = array_constructor; - var->var_decl_node = gg_get_address_of(var_decl_node); - } - void parser_accept(const struct cbl_refer_t &tgt, special_name_t special_e, @@ -4923,8 +4877,8 @@ parser_accept_date_yymmdd( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_date_yymmdd", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -4953,8 +4907,8 @@ parser_accept_date_yyyymmdd( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_date_yyyymmdd", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -4983,8 +4937,8 @@ parser_accept_date_yyddd( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_date_yyddd", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -5013,8 +4967,8 @@ parser_accept_date_yyyyddd( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_yyyyddd", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -5043,8 +4997,8 @@ parser_accept_date_dow( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_date_dow", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -5073,8 +5027,8 @@ parser_accept_date_hhmmssff( struct cbl_field_t *target ) tree pointer = gg_define_char_star(); gg_assign(pointer, gg_call_expr(CHAR_P, "__gg__get_date_hhmmssff", + gg_get_address_of(target->var_decl_node), NULL_TREE)); - gg_default_qualification(target); move_tree_to_field( target, pointer); @@ -5097,6 +5051,7 @@ parser_accept_date_hhmmssff( struct cbl_field_t *target ) * * The parameter is always a reference to an element in the symbol table. */ + void parser_alphabet( cbl_alphabet_t& alphabet ) { @@ -5116,24 +5071,32 @@ parser_alphabet( cbl_alphabet_t& alphabet ) case EBCDIC_e: fprintf(stderr, "EBCDIC\n"); break; + case UTF8_e: + fprintf(stderr, "UTF8\n"); + break; case custom_encoding_e: fprintf(stderr, "%s\n", alphabet.name); break; + default: + { const char * p = __gg__encoding_iconv_name( alphabet.encoding ); + fprintf(stderr, "%s\n", p? p : "[unknown]"); + } } SHOW_PARSE_END } - size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); - switch(alphabet.encoding) { case ASCII_e: case iso646_e: case EBCDIC_e: + case UTF8_e: break; case custom_encoding_e: { + size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); + unsigned char ach[256]; tree table_type = build_array_type_nelts(UCHAR, 256); @@ -5141,7 +5104,7 @@ parser_alphabet( cbl_alphabet_t& alphabet ) for( int i=0; i<256; i++ ) { // character i has the ordinal alphabet[i] - unsigned char ch = ascii_to_internal(i); + unsigned char ch = i; ach[ch] = (alphabet.alphabet[i]); gg_assign( gg_array_value(table256, ch), @@ -5162,6 +5125,8 @@ parser_alphabet( cbl_alphabet_t& alphabet ) NULL_TREE ); break; } + default: + gcc_unreachable(); } } @@ -5183,9 +5148,14 @@ parser_alphabet_use( cbl_alphabet_t& alphabet ) case EBCDIC_e: fprintf(stderr, "EBCDIC\n"); break; + case UTF8_e: + fprintf(stderr, "UTF8\n"); + break; case custom_encoding_e: fprintf(stderr, "%s\n", alphabet.name); break; + default: + gcc_unreachable(); } SHOW_PARSE_END } @@ -5194,9 +5164,12 @@ parser_alphabet_use( cbl_alphabet_t& alphabet ) switch(alphabet.encoding) { + default: + gcc_unreachable(); case ASCII_e: case iso646_e: case EBCDIC_e: + case UTF8_e: __gg__low_value_character = DEGENERATE_LOW_VALUE; __gg__high_value_character = DEGENERATE_HIGH_VALUE; gg_call(VOID, @@ -5279,6 +5252,7 @@ parser_display_internal(tree file_descriptor, gg_call(VOID, "__gg__display_string", file_descriptor, + build_int_cst_type(INT, refer.field->codeset.encoding), build_string_literal(refer.field->data.capacity, refer.field->data.initial), build_int_cst_type(SIZE_T, refer.field->data.capacity), @@ -5582,7 +5556,8 @@ parser_display( const struct cbl_special_name_t *upon, } else { - gg_assign(file_descriptor,integer_one_node); // stdout is file descriptor 1. + // stdout is file descriptor 1. + gg_assign(file_descriptor, integer_one_node); } for(size_t i=0; icodeset.encoding), NULL_TREE), ne_op, integer_zero_node)); @@ -9472,12 +9449,24 @@ parser_set_conditional88( const cbl_refer_t& refer, bool which_way ) if( !figconst ) { // We are dealing with an ordinary string. - static size_t buffer_size = 0; - static char *buffer = NULL; - size_t length = src->first.size(); - raw_to_internal(&buffer, &buffer_size, src->first.name(), length); + + // When Jim gets around to converting the domain to the target encoding, + // this code will have to be removed +#if 1 + char *fname = xstrdup(src->first.name()); + charmap_t *charmap = __gg__get_charmap(tgt->codeset.encoding); + for(size_t i=0; imapped_character(fname[i]); + } + move_tree_to_field( parent, + build_string_literal(strlen(fname)+1, fname)); + free(fname); +#else move_tree_to_field( parent, - gg_string_literal(buffer)); + build_string_literal(src->first.size()+1, + src->first.name())); +#endif } else { @@ -9694,6 +9683,8 @@ parser_file_add(struct cbl_file_t *file) build_int_cst_type(INT, (int)file->optional), build_int_cst_type(SIZE_T, varies.min), build_int_cst_type(SIZE_T, varies.max), + build_int_cst_type(INT, (int)file->codeset.encoding), + build_int_cst_type(INT, (int)file->codeset.alphabet), NULL_TREE); file->var_decl_node = new_var_decl; } @@ -9782,6 +9773,14 @@ parser_file_open( struct cbl_file_t *file, int mode_char ) { // The name is coming from a presumably FldAlphaNumeric variable psz = get_string_from(field_of_name); + gg_call( CHAR_P, + "__gg__convert_encoding", + psz, + build_int_cst_type(INT, + field_of_name->codeset.encoding), + build_int_cst_type(INT, + DEFAULT_CHARMAP_SOURCE), + NULL_TREE); quoted_name = true; } @@ -13191,8 +13190,10 @@ create_and_call(size_t narg, { // Somebody was discourteous enough to return a NULL pointer // We'll jam in spaces: + charmap_t *charmap = __gg__get_charmap(returned.field->codeset.encoding); + int dest_space = charmap->mapped_character(ascii_space); gg_memset( returned_location, - char_nodes[(unsigned char)internal_space], + char_nodes[(unsigned char)dest_space], returned_length ); } ELSE @@ -13417,13 +13418,13 @@ parser_call( cbl_refer_t name, ne_op, gg_cast(TREE_TYPE(function_pointer), null_pointer_node) ) { - create_and_call(narg, - args, - function_pointer, - nullptr, - returned_value_type, - returned, - not_except); + create_and_call(narg, + args, + function_pointer, + nullptr, + returned_value_type, + returned, + not_except); } ELSE { @@ -13450,7 +13451,8 @@ parser_call( cbl_refer_t name, NULL_TREE); gg_printf("WARNING: %s:%d \"CALL %s\" not found" - " with no \"CALL ON EXCEPTION\" phrase\n", + " with no \"CALL ON EXCEPTION\" phrase.\n" + "(You might need -rdynamic or --export-dynamic for symbols in the executable.)\n", gg_string_literal(current_filename.back().c_str()), build_int_cst_type(INT, CURRENT_LINE_NUMBER), mangled_name, @@ -14268,6 +14270,7 @@ mh_identical(const cbl_refer_t &destref, && destref.field->data.rdigits == sourceref.field->data.rdigits && (destref.field->attr & (signable_e|separate_e|leading_e)) == (sourceref.field->attr & (signable_e|separate_e|leading_e)) + && destref.field->codeset.encoding == sourceref.field->codeset.encoding && !destref.field->occurs.depending_on && !sourceref.field->occurs.depending_on && !destref.refmod.from @@ -14321,19 +14324,24 @@ mh_source_is_literalN(cbl_refer_t &destref, SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move") } - static char *buffer = NULL; - static size_t buffer_size = 0; - raw_to_internal(&buffer, - &buffer_size, - sourceref.field->data.initial, - strlen(sourceref.field->data.initial)); + // We know that the encoding of the literal::initial is in ASCII + + // We need the data sent to __gg__psz_to_alpha_move to be in the + // encoding of the destination + + size_t charsout; + const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + destref.field->codeset.encoding, + sourceref.field->data.initial, + strlen(sourceref.field->data.initial), + &charsout); gg_call(VOID, "__gg__psz_to_alpha_move", gg_get_address_of(destref.field->var_decl_node), refer_offset(destref), refer_size_dest(destref), - gg_string_literal(buffer), - build_int_cst_type(SIZE_T, strlen(sourceref.field->data.initial)), + gg_string_literal(converted), + build_int_cst_type(SIZE_T, charsout), NULL_TREE); moved = true; break; @@ -14510,14 +14518,32 @@ mh_source_is_literalN(cbl_refer_t &destref, SHOW_PARSE_INDENT SHOW_PARSE_TEXT(" FldAlphaEdited") } + + // __gg__string_to_alpha_edited expects the source string to be in + // the same encoding as the target: + size_t len = strlen(sourceref.field->data.initial); + char *src = + static_cast(xmalloc(len+1)); + memcpy( src, + sourceref.field->data.initial, + strlen(sourceref.field->data.initial)); + size_t charsout; + const char *converted = __gg__iconverter( + sourceref.field->codeset.encoding, + destref.field->codeset.encoding, + src, + len, + &charsout); gg_call(VOID, - "__gg__string_to_alpha_edited_ascii", + "__gg__string_to_alpha_edited", gg_add( member(destref.field->var_decl_node, "data"), refer_offset(destref) ), - gg_string_literal(sourceref.field->data.initial), - build_int_cst_type(INT, strlen(sourceref.field->data.initial)), + build_int_cst_type(INT, destref.field->codeset.encoding), + gg_string_literal(converted), + build_int_cst_type(INT, len), gg_string_literal(destref.field->data.picture), NULL_TREE); + free(src); moved = true; break; } @@ -14829,8 +14855,11 @@ picky_memset(tree &dest_p, unsigned char value, size_t length) } static void -picky_memcpy(tree &dest_p, const tree &source_p, size_t length) +picky_memcpy(tree &dest_p, const tree &source_p, size_t length, tree zero) { + // This is the routine that copies digits for NumericDisplay. In addition + // to just moving digits from source to destination, it has to handle + // clearing up embedded sign information. if( length ) { tree dest_ep = gg_define_variable(TREE_TYPE(dest_p)); @@ -14839,7 +14868,10 @@ picky_memcpy(tree &dest_p, const tree &source_p, size_t length) build_int_cst_type(SIZE_T, length))); WHILE( dest_p, lt_op, dest_ep ) { - gg_assign(gg_indirect(dest_p), gg_indirect(source_p)); + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(zero, + gg_bitwise_and(gg_indirect(source_p), + build_int_cst_type(UCHAR, 0x0F)))); gg_increment(dest_p); gg_increment(source_p); } @@ -14869,10 +14901,10 @@ mh_numeric_display( const cbl_refer_t &destref, // Fasten your seat belts. - // This routine is complicated by the fact that although I had several + // This routine is complicated by the fact that although I had several // false starts of putting this into libgcobol, I keep coming back to the // fact that assignment of zoned values is common. And, so, there are all - // kinds of things that are known at compile time that would turn into + // kinds of things that are known at compile time that would turn into // execution-time decisions if I moved them to the library. So, complex // or not, I am doing all this code here at compile time because it will // minimize the code at execution time. @@ -14885,11 +14917,19 @@ mh_numeric_display( const cbl_refer_t &destref, // nybble is 0xC0 for positive values, and 0xD0 for negative; all other // digits are 0x70. + charmap_t *charmap_source = + __gg__get_charmap(sourceref.field->codeset.encoding); + charmap_t *charmap_dest = + __gg__get_charmap( destref.field->codeset.encoding); + static tree source_sign_loc = gg_define_variable(UCHAR_P, "..mhnd_sign_loc", vs_file_static); - static tree source_sign_byte = gg_define_variable(UCHAR, - "..mhnd_sign_byte", + static tree dest_sign_loc = gg_define_variable(UCHAR_P, + "..mhnd_dest_sign_loc", + vs_file_static); + static tree source_sign = gg_define_variable(INT, + "..mhnd_sign", vs_file_static); // The destination data pointer static tree dest_p = gg_define_variable( UCHAR_P, @@ -14904,10 +14944,6 @@ mh_numeric_display( const cbl_refer_t &destref, "..mhnd_source_e", vs_file_static); - gg_assign(dest_p, qualified_data_location(destref)); - gg_assign(source_p, gg_add(member(sourceref.field, "data"), - tsource.offset)); - bool source_is_signable = sourceref.field->attr & signable_e; bool source_is_leading = sourceref.field->attr & leading_e; bool source_is_separate = sourceref.field->attr & separate_e; @@ -14916,108 +14952,157 @@ mh_numeric_display( const cbl_refer_t &destref, bool dest_is_leading = destref.field->attr & leading_e; bool dest_is_separate = destref.field->attr & separate_e; - if( source_is_signable ) - { - // The source is signable, so we are going to calculate the location of - // the source sign information. + int switch_source = (source_is_signable ? 4 : 0 ) + + (source_is_leading ? 2 : 0 ) + + (source_is_separate ? 1 : 0 ) ; - gg_assign(source_sign_loc, - gg_add(member(sourceref.field->var_decl_node, "data"), - tsource.offset)); + int switch_dest = (dest_is_signable ? 4 : 0 ) + + (dest_is_leading ? 2 : 0 ) + + (dest_is_separate ? 1 : 0 ) ; - if( (source_is_leading) ) - { - // The source sign location is in the leading position. - if( source_is_separate ) - { - // We have LEADING SEPARATE, so the first actual digit is at - // source_p+1. - gg_increment(source_p); - } - } - else - { - // The sign location is trailing. Whether separate or not, the - // location is the final byte of the data: + // Calculate the start of the source data: + gg_assign(source_p, gg_add(member(sourceref.field, "data"), + tsource.offset)); + + // Calculate the start of the destination data + gg_assign(dest_p, qualified_data_location(destref)); + + // Figure out exactly where the sign is, if any, and where the input + // digits are. + + switch( switch_source ) + { + case 0: + case 1: + case 2: + case 3: + // not signable + gg_assign(source_sign, integer_zero_node); + break; + case 4: + // signable, not leading, not separate + // Calculate location of the sign byte; it's the last byte of the data gg_assign(source_sign_loc, - gg_add(source_sign_loc, + gg_add(source_p, build_int_cst_type(SIZE_T, sourceref.field->data.capacity-1))); - } - // Pick up the byte that contains the sign data, whether internal or - // external: - gg_assign(source_sign_byte, gg_indirect(source_sign_loc)); + break; + case 5: + // signable, not leading, separate + // Calculate location of the sign byte; it's the last byte of the data + gg_assign(source_sign_loc, + gg_add(source_p, + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1))); + break; + case 6: + // signable, leading, not separate + // Calculate location of the sign byte; it's the first byte of the data + gg_assign(source_sign_loc, source_p); + break; + case 7: + // signable, leading, separate + // Calculate location of the sign byte; it's the first byte of the data + gg_assign(source_sign_loc, source_p); + gg_increment(source_p); + break; + } + // At this point, the source sign is at source_sign_loc, and the digits + // start at source_p - if( !source_is_separate ) + // Let's learn what the source sign is + if( source_is_signable && source_is_separate ) + { + IF( gg_indirect(source_sign_loc), + eq_op, + build_int_cst_type(UCHAR, + charmap_source->mapped_character(ascii_minus)) ) + { + // Flag the source as negative + gg_assign(source_sign, integer_one_node); + } + ELSE { - // The source is signable and internal. We will modify the zone of - // the source sign byte to force it to be plain vanilla positive. - - // When the move is done, we will replace that byte with the original - // value. - gg_assign(gg_indirect(source_sign_loc), - gg_bitwise_or(build_int_cst_type(UCHAR, ZONED_ZERO), - gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, 0x0F)))); + // Flag the source as positive + gg_assign(source_sign, integer_zero_node); } + ENDIF } - - // Let the shenanigans begin. - - // We are now ready to output the very first byte. - - // The first thing to do is see if we need to output a leading sign - // character - if( dest_is_signable - && dest_is_leading - && dest_is_separate ) + if( source_is_signable && !source_is_separate ) { - // The output is signed, separate, and leading, so the first character - // needs to be either '+' or '-' - if( source_is_separate ) + // We need to look for an indication that we are internally signed. We + // can tell that by checking to see if the digit is between '0' and '9' + IF( gg_indirect(source_sign_loc), + lt_op, + build_int_cst_type(UCHAR, + charmap_source->mapped_character(ascii_0)) ) { - // The source and dest are both signable/separate. - // Oooh. Shiny. We already have the sign character from the source, - // so we assign that to the destination. - gg_assign(gg_indirect(dest_p), source_sign_byte); + // The sign byte is less than '0', so we are negative + gg_assign(source_sign, integer_one_node); } - else + ELSE { - // The source is internal. - if( source_is_signable ) + IF( gg_indirect(source_sign_loc), + gt_op, + build_int_cst_type(UCHAR, + charmap_source->mapped_character(ascii_9)) ) { - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, SEPARATE_MINUS)); - - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, SEPARATE_PLUS)); - } - ENDIF + // The sign byte is greater than '9', so we are negative + gg_assign(source_sign, integer_one_node); } - else + ELSE { - // The source is not signable, so the signed becomes positive no - // matter what the sign of the source. - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, SEPARATE_PLUS)); + // The sign byte is betwixt '0' and '9', so we are positive + gg_assign(source_sign, integer_zero_node); } + ENDIF } - gg_increment(dest_p); + ENDIF } - // We have the leading '+' or '-', assuming one is needed. We can - // now start outputting the digits to the left of the decimal place + // We now know the source's sign, and where its digits are. + + // The first order of business is to move the digits into place. To do + // that, we need to know where things go in the destination: + + switch( switch_dest ) + { + case 0: + case 1: + case 2: + case 3: + // not signable + break; + case 4: + // signable, not leading, not separate + // Calculate location of the sign byte; it's the last byte of the data + gg_assign(dest_sign_loc, + gg_add(dest_p, + build_int_cst_type(SIZE_T, + destref.field->data.capacity-1))); + break; + case 5: + // signable, not leading, separate + // Calculate location of the sign byte; it's the last byte of the data + gg_assign(dest_sign_loc, + gg_add(dest_p, + build_int_cst_type(SIZE_T, + destref.field->data.capacity-1))); + break; + case 6: + // signable, leading, not separate + // Calculate location of the sign byte; it's the first byte of the data + gg_assign(dest_sign_loc, dest_p); + break; + case 7: + // signable, leading, separate + // Calculate location of the sign byte; it's the first byte of the data + gg_assign(dest_sign_loc, dest_p); + gg_increment(dest_p); + break; + } + + // We can now start copying the digits to the left of the decimal place int dest_ldigits = (int)destref.field->data.digits - destref.field->data.rdigits; @@ -15031,9 +15116,9 @@ mh_numeric_display( const cbl_refer_t &destref, // The destination has more ldigits than the source, and needs some // leading zeroes: picky_memset( dest_p, - ZONED_ZERO , + charmap_dest->mapped_character(ascii_0) , dest_ldigits - source_ldigits); - // With the leading zeros set, copy over the ldigits: + // With the leading zeros set, set the number of ldigits to copy: digit_count = source_ldigits; } else if( dest_ldigits == source_ldigits ) @@ -15041,7 +15126,7 @@ mh_numeric_display( const cbl_refer_t &destref, // This is the Goldilocks zone. Everything is *just* right. digit_count = dest_ldigits; } - else + else // dest_ldigits < source_ldigits { // The destination is smaller than the source. We have to throw away the // the high-order digits of the source. If any of them are non-zero, then @@ -15057,7 +15142,7 @@ mh_numeric_display( const cbl_refer_t &destref, IF( gg_indirect(source_p), ne_op, build_int_cst_type( UCHAR, - ZONED_ZERO) ) + charmap_source->mapped_character(ascii_0)) ) { set_exception_code(ec_size_truncation_e); gg_assign(size_error, integer_one_node); @@ -15073,9 +15158,8 @@ mh_numeric_display( const cbl_refer_t &destref, // remaining digits digit_count = dest_ldigits; } - - // The ldigits are in place. We now go the very similar exercise for the - // rdigits: + // We now have digit_count, which will cover the ldigits. Augment it by + // the number of rdigits: int dest_rdigits = destref.field->data.rdigits; int source_rdigits = sourceref.field->data.rdigits; @@ -15103,136 +15187,79 @@ mh_numeric_display( const cbl_refer_t &destref, // over only the necessary rdigits, discarding the ones to the right. digit_count += dest_rdigits; } - picky_memcpy(dest_p, source_p, digit_count); + picky_memcpy(dest_p, + source_p, + digit_count, + build_int_cst_type(UCHAR, + charmap_dest->mapped_character(ascii_0))); picky_memset( dest_p, - ZONED_ZERO , + charmap_dest->mapped_character(ascii_0), trailing_zeros); - // With the digits in place, we need to sort out what to do if the target - // is signable: - if( dest_is_signable ) + // With the digits in place, the only thing left is to establish the sign + + switch( switch_dest ) { - if( dest_is_separate - && !dest_is_leading ) - { - // The target is separate/trailing, so we need to tack a '+' - // or '-' character - if( source_is_separate ) - { - // The source was separate, so we already have what we need in the - // source_sign_byte: - gg_assign(gg_indirect(dest_p), source_sign_byte); - gg_increment(dest_p); - } - else + case 0: + case 1: + case 2: + case 3: + // not signable, so there is nothing to do. + break; + case 4: + case 6: + // signable, not leading, not separate + if( charmap_dest->is_like_ebcdic() ) { - // The source is either internal, or unsigned - if( source_is_signable ) + IF( source_sign, ne_op, integer_zero_node ) { - // The source is signable/internal, so we need to extract the - // sign bit from source_sign_byte - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, SEPARATE_MINUS)); - - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, SEPARATE_PLUS)); - } - ENDIF + // It's negative ebcdic, so we have to turn the bit off. + gg_assign(gg_indirect(dest_sign_loc), + gg_bitwise_and(gg_indirect(dest_sign_loc), + build_int_cst_type(UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT_EBCDIC))); } - else + ELSE { - // The source is unsigned, so dest is positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, - SEPARATE_PLUS)); } - } - gg_increment(dest_p); - } - else if( !dest_is_separate ) - { - // The destination is signed/internal - if( dest_is_leading ) - { - // The sign bit goes into the first byte: - gg_assign(dest_p, qualified_data_location(destref)); + ENDIF } else { - // The sign bit goes into the last byte: - gg_decrement(dest_p); - } - // dest_p now points to the internal sign location - if( internal_codeset_is_ebcdic() ) - { - // For EBCDIC, the zone is going to end up being 0xC0 or 0xD0 - gg_assign(gg_indirect(dest_p), - gg_bitwise_and(gg_indirect(dest_p), - build_int_cst_type(UCHAR, - ZONE_SIGNED_EBCDIC+0x0F))); - } - - if( source_is_signable ) - { - if( source_is_separate ) + IF( source_sign, ne_op, integer_zero_node ) { - // The source is separate, so source_sign_byte is '+' or '-' - IF( source_sign_byte, - eq_op, - build_int_cst_type(UCHAR, SEPARATE_MINUS) ) - { - // The source is negative, so turn on the internal "is minus" bit - gg_assign(gg_indirect(dest_p), - gg_bitwise_or(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - ELSE - ENDIF + // It's negative ascii, so we have to turn the bit on. + gg_assign(gg_indirect(dest_sign_loc), + gg_bitwise_or(gg_indirect(dest_sign_loc), + build_int_cst_type(UCHAR, + NUMERIC_DISPLAY_SIGN_BIT_ASCII))); } - else + ELSE { - // The source is signable/internal, so the sign bit is in - // source_sign_byte. Whatever it is, it has to go into dest_p: - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type(UCHAR, 0) ) - { - // The source was negative, so make the dest negative - gg_assign(gg_indirect(dest_p), - gg_bitwise_or(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - ELSE - ENDIF } + ENDIF } - } - } + break; + case 5: + case 7: + // signable, not leading, separate + // signable, leading, separate + // Calculate location of the sign byte; it's the last byte of the data - if( source_is_signable - && !source_is_separate) - { - // The source is signable internal, so we need to restore the original - // sign byte in the original source data: - gg_assign(gg_indirect(source_sign_loc), source_sign_byte); + IF( source_sign, eq_op, integer_zero_node ) + { + gg_assign(gg_indirect(dest_sign_loc), + build_int_cst_type(UCHAR, + charmap_dest->mapped_character(ascii_plus))); + } + ELSE + { + gg_assign(gg_indirect(dest_sign_loc), + build_int_cst_type(UCHAR, + charmap_dest->mapped_character(ascii_minus))); + } + ENDIF + break; } moved = true; } @@ -15337,7 +15364,9 @@ mh_source_is_group( const cbl_refer_t &destref, ELSE { // There are too-few source bytes: - gg_memset(tdest, build_int_cst_type(INT, internal_space), dbytes); + charmap_t *charmap = __gg__get_charmap(destref.field->codeset.encoding); + int dest_space = charmap->mapped_character(ascii_space); + gg_memset(tdest, build_int_cst_type(INT, dest_space), dbytes); gg_memcpy(tdest, tsource, sbytes); } ENDIF @@ -15346,6 +15375,141 @@ mh_source_is_group( const cbl_refer_t &destref, return retval; } +static bool +mh_source_is_literalA(const cbl_refer_t &destref, + const cbl_refer_t &sourceref, + cbl_round_t rounded, + tree size_error) + { + bool moved = false; + if( sourceref.field->type == FldLiteralA ) + { + // We are moving a literal somewhere. Because a program-id can take + // variables of ANY LENGTH, we don't know the length of the target + // variable. We do, however, know its encoding. So, we are going to + // construct a string with the same number of characters as the source, but + // in the target variable's encoding. + + // We will then call a library routine that will be in charge of trimming + // and space filling. + + cbl_encoding_t encoding_dest = destref.field->codeset.encoding; + charmap_t *charmap_dest = __gg__get_charmap(encoding_dest); + + if( destref.refmod.from + || destref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } + + static char *buffer = NULL; + static size_t buffer_size = 0; + size_t source_length = sourceref.field->data.capacity; + + if( buffer_size < source_length ) + { + buffer_size = source_length; + buffer = static_cast(xrealloc(buffer, source_length)); + } + gcc_assert(buffer); + + cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); + if( figconst ) + { + // We are going to fill 'buffer' with a solid run of the figurative + // constant in the destination codeset. + char const_char = 0x7F; // Head off a compiler warning about + // // uninitialized variables + switch(figconst) + { + case normal_value_e : + // This is not possible, it says here in the fine print. + abort(); + break; + case low_value_e : + const_char = charmap_dest->low_value_character(); + break; + case zero_value_e : + const_char = charmap_dest->mapped_character(ascii_zero); + break; + case space_value_e : + const_char = charmap_dest->mapped_character(ascii_space); + break; + case quote_value_e : + const_char = charmap_dest->quote_character(); + break; + case high_value_e : + const_char = charmap_dest->high_value_character(); + break; + case null_value_e: + const_char = 0x00; + break; + } + memset(buffer, const_char, source_length); + } + else + { + // We are going to convert the source string to the destination codeset, + // and then copy it to 'buffer', trimming if necessary, and space-filling + // to the right if necessary: + cbl_encoding_t encoding_src = sourceref.field->codeset.encoding; + + size_t outlength; + const char *source_string = __gg__iconverter( encoding_src, + encoding_dest, + sourceref.field->data.initial, + source_length, + &outlength ); + // Copy over the converted string + memcpy( buffer, + source_string, + outlength ); + } + + // If the source is flagged ALL, or if we are setting the destination to + // a figurative constant, pass along the ALL bit: + int rounded_parameter = rounded + | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0); + + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__move_literala", + gg_get_address_of(destref.field->var_decl_node), + refer_offset(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE)); + } + else + { + gg_call ( INT, + "__gg__move_literala", + gg_get_address_of(destref.field->var_decl_node), + refer_offset(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE); + } + if( destref.refmod.from + || destref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } + moved = true; + } + return moved; + } + static void move_helper(tree size_error, // This is an INT cbl_refer_t destref, @@ -15451,112 +15615,10 @@ move_helper(tree size_error, // This is an INT if( !moved && sourceref.field->type == FldLiteralA) { - SHOW_PARSE1 - { - SHOW_PARSE_INDENT - SHOW_PARSE_TEXT("__gg__move_literala") - } - - cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); - - if( destref.refmod.from - || destref.refmod.len ) - { - // Let the move routine know to treat the destination as alphanumeric - gg_attribute_bit_set(destref.field, refmod_e); - } - - static char *buffer = NULL; - static size_t buffer_size = 0; - size_t source_length = sourceref.field->data.capacity; - - if( buffer_size < source_length ) - { - buffer_size = source_length; - buffer = static_cast(xrealloc(buffer, buffer_size)); - } - gcc_assert(buffer); - - if( figconst ) - { - char const_char = 0x7F; // Head off a compiler warning about - // // uninitialized variables - switch(figconst) - { - case normal_value_e : - // This is not possible, it says here in the fine print. - abort(); - break; - case low_value_e : - const_char = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e : - const_char = internal_zero; - break; - case space_value_e : - const_char = internal_space; - break; - case quote_value_e : - const_char = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - const_char = ascii_to_internal(__gg__high_value_character); - break; - case null_value_e: - const_char = 0x00; - break; - } - memset(buffer, const_char, source_length); - } - else - { - memset( buffer, ascii_space, source_length); - memcpy( buffer, - sourceref.field->data.initial, - std::min(source_length, (size_t)sourceref.field->data.capacity) ); - for( size_t i=0; ivar_decl_node), - refer_offset(destref), - refer_size_dest(destref), - build_int_cst_type(INT, rounded_parameter), - build_string_literal(source_length, - buffer), - build_int_cst_type( SIZE_T, source_length), - NULL_TREE)); - } - else - { - gg_call ( INT, - "__gg__move_literala", - gg_get_address_of(destref.field->var_decl_node), - refer_offset(destref), - refer_size_dest(destref), - build_int_cst_type(INT, rounded_parameter), - build_string_literal(source_length, - buffer), - build_int_cst_type( SIZE_T, source_length), - NULL_TREE); - } - if( destref.refmod.from - || destref.refmod.len ) - { - // Return that value to its original form - gg_attribute_bit_clear(destref.field, refmod_e); - } - moved = true; + moved = mh_source_is_literalA(destref, + sourceref, + rounded, + size_error); } if( !moved ) @@ -15837,43 +15899,11 @@ initial_from_initial(cbl_field_t *field) int rdigits; // Let's handle the possibility of a figurative constant - cbl_figconst_t figconst = cbl_figconst_of( field->data.initial); - //cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + cbl_figconst_t figconst = cbl_figconst_of(field->data.initial); if( figconst ) { - int const_char = 0xFF; // Head off a compiler warning about uninitialized - // // variables - switch(figconst) - { - case normal_value_e : - // This really should never happen because normal_value_e is zero - abort(); - break; - case low_value_e : - const_char = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e : - const_char = internal_zero; - break; - case space_value_e : - const_char = internal_space; - break; - case quote_value_e : - const_char = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - const_char = __gg__high_value_character; - } - else - { - const_char = ascii_to_internal(__gg__high_value_character); - } - break; - case null_value_e: - break; - } + charmap_t *charmap = __gg__get_charmap(field->codeset.encoding); + int const_char = charmap->figconst_character(figconst); bool set_return = figconst != zero_value_e; if( !set_return ) { @@ -15962,6 +15992,8 @@ initial_from_initial(cbl_field_t *field) case FldNumericDisplay: { + charmap_t *charmap = __gg__get_charmap(field->codeset.encoding); + retval = static_cast(xmalloc(field->data.capacity)); gcc_assert(retval); char *pretval = retval; @@ -15988,17 +16020,17 @@ initial_from_initial(cbl_field_t *field) // This zoned decimal value is signable, separate, and leading. if( negative ) { - *pretval++ = internal_minus; + *pretval++ = charmap->mapped_character(ascii_minus); } else { - *pretval++ = internal_plus; + *pretval++ = charmap->mapped_character(ascii_plus); } } for(size_t i=0; idata.digits; i++) { - // Start by assuming its an value that can't be signed - *pretval++ = internal_zero + ((*digits++) & 0x0F); + // Start by assuming it's an value that can't be signed + *pretval++ = charmap->mapped_character(ascii_0) + ((*digits++) & 0x0F); } if( (field->attr & signable_e) && (field->attr & separate_e) @@ -16007,11 +16039,11 @@ initial_from_initial(cbl_field_t *field) // The value is signable, separate, and trailing if( negative ) { - *pretval++ = internal_minus; + *pretval++ = charmap->mapped_character(ascii_minus); } else { - *pretval++ = internal_plus; + *pretval++ = charmap->mapped_character(ascii_plus); } } if( (field->attr & signable_e) @@ -16019,18 +16051,10 @@ initial_from_initial(cbl_field_t *field) { // This value is signable, and not separate. So, the sign information // goes into the first or last byte: - char *sign_location = field->attr & leading_e ? + char *sign_location = field->attr & leading_e ? retval : retval + field->data.digits - 1 ; - if( internal_codeset_is_ebcdic() ) - { - // Change the zone from 0xFO to 0xC0 - *sign_location &= (ZONE_SIGNED_EBCDIC + 0x0F); - } - if( negative ) - { - // Turn on the sign bit: - *sign_location |= NUMERIC_DISPLAY_SIGN_BIT; - } + *sign_location = charmap->set_digit_negative(*sign_location, + negative); } break; } @@ -16113,10 +16137,8 @@ initial_from_initial(cbl_field_t *field) } else { - size_t buffer_size = 0; size_t length = field->data.capacity; - memset(retval, internal_space, length); - raw_to_internal(&retval, &buffer_size, field->data.initial, length); + memcpy(retval, field->data.initial, length); if( strlen(field->data.initial) < length ) { // If this is true, then the initial string must've been Z'xyz' @@ -16130,6 +16152,7 @@ initial_from_initial(cbl_field_t *field) case FldNumericEdited: { + charmap_t *charmap = __gg__get_charmap(field->codeset.encoding); retval = static_cast(xmalloc(field->data.capacity+1)); gcc_assert(retval); if( field->data.initial && field->attr & quoted_e ) @@ -16140,12 +16163,12 @@ initial_from_initial(cbl_field_t *field) strlen(field->data.initial)); for(size_t i=0; idata.initial[i]); + retval[i] = field->data.initial[i]; } if( length < (size_t)field->data.capacity ) { memset( retval+length, - internal_space, + charmap->mapped_character(ascii_space), (size_t)field->data.capacity - length); } } @@ -16169,7 +16192,9 @@ initial_from_initial(cbl_field_t *field) if( (field->attr & blank_zero_e) && real_iszero (&value) ) { - memset(retval, internal_space, field->data.capacity); + memset( retval, + charmap->mapped_character(ascii_space), + field->data.capacity); } else { @@ -16213,6 +16238,10 @@ initial_from_initial(cbl_field_t *field) case FldLiteralN: { +//// retval = static_cast(xmalloc(field->data.capacity+1)); +//// gcc_assert(retval); +//// memcpy(retval, field->data.initial, field->data.capacity); +//// retval[field->data.capacity] = '\0'; break; } @@ -16368,13 +16397,24 @@ actually_create_the_static_field( cbl_field_t *new_var, build_int_cst_type(SCHAR, new_var->data.rdigits) ); next_field = TREE_CHAIN(next_field); + // INT, "encoding", + CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(INT, new_var->codeset.encoding)); + next_field = TREE_CHAIN(next_field); + + // INT, "alphabet", + CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(INT, new_var->codeset.alphabet)); + next_field = TREE_CHAIN(next_field); + DECL_INITIAL(new_var_decl) = constr; } static void psa_global(cbl_field_t *new_var) { - if( strcmp(new_var->name, "_VERY_TRUE") == 0 ) { new_var->var_decl_node = boolean_true_node; @@ -16579,11 +16619,10 @@ psa_FldLiteralA(struct cbl_field_t *field ) // We are constructing a completely static constant structure. We know the // capacity. We'll create it from the data.initial. The cblc_field_t:data - // will be an ASCII/EBCDIC copy of the .initial data. The .initial will be - // left as ASCII. The var_decl_node will be an ordinary cblc_field_t, which - // means that at this point in time, a FldLiteralA can be used anywhere a - // FldGroup or FldAlphanumeric can be used. We are counting on the parser - // not allowing a FldLiteralA to be a left-hand-side variable. + // will be a copy of the .initial data. The var_decl_node will be an ordinary + // cblc_field_t, which means that at this point in time, a FldLiteralA can be + // used anywhere a FldGroup or FldAlphanumeric can be used. We are counting + // on the parser not allowing a FldLiteralA to be a left-hand-side variable. // First make room static size_t buffer_size = 1024; @@ -16598,17 +16637,7 @@ psa_FldLiteralA(struct cbl_field_t *field ) cbl_figconst_t figconst = cbl_figconst_of( field->data.initial ); gcc_assert(figconst == normal_value_e); - if( internal_codeset_is_ebcdic() ) - { - for( size_t i=0; idata.capacity; i++ ) - { - buffer[i] = ascii_to_internal(field->data.initial[i]); - } - } - else - { - memcpy(buffer, field->data.initial, field->data.capacity); - } + memcpy(buffer, field->data.initial, field->data.capacity); buffer[field->data.capacity] = '\0'; // We have the original nul-terminated text at data.initial. We have a @@ -16673,12 +16702,6 @@ psa_FldLiteralA(struct cbl_field_t *field ) TREE_STATIC(field->var_decl_node) = 1; DECL_PRESERVE_P (field->var_decl_node) = 1; } -// TRACE1 -// { -// TRACE1_INDENT -// TRACE1_TEXT("Finished") -// TRACE1_END -// } } #endif @@ -16859,12 +16882,6 @@ parser_symbol_add(struct cbl_field_t *new_var ) goto done; } - if( new_var->type == FldBlob ) - { - psa_FldBlob(new_var); - goto done; - } - if( new_var->type == FldLiteralA ) { new_var->data.picture = ""; @@ -16873,13 +16890,7 @@ parser_symbol_add(struct cbl_field_t *new_var ) } size_t length_of_initial_string = 0; - const char *new_initial = NULL; - - // gg_printf("parser_symbol_add %s\n", build_string_literal( strlen(new_var->name)+1, new_var->name), NULL_TREE); - - // If we are dealing with an alphanumeric, and it is not hex_encoded, we - // want to convert to single-byte-encoding (if it happens to be UTF-8) and - // to EBCDIC, if EBCDIC is in force: + char *new_initial = NULL; // Make sure we have a new variable to work with. if( !new_var ) @@ -17321,7 +17332,9 @@ parser_symbol_add(struct cbl_field_t *new_var ) } else { - new_initial = new_var->data.initial; + new_initial = static_cast(xmalloc(length_of_initial_string)); + gcc_assert(new_initial); + memcpy(new_initial, new_var->data.initial, length_of_initial_string); } actual_allocate: @@ -17331,6 +17344,7 @@ parser_symbol_add(struct cbl_field_t *new_var ) new_initial, immediate_parent, new_var_decl); + free(new_initial); if( level_88_string ) { diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc index 27d5c1ee65f..320e6bf4366 100644 --- a/gcc/cobol/genmath.cc +++ b/gcc/cobol/genmath.cc @@ -178,6 +178,7 @@ arithmetic_operation(size_t nC, cbl_num_result_t *C, temp_field.data.rdigits = remainder->field->data.rdigits ; temp_field.data.initial = remainder->field->data.initial ; temp_field.data.picture = remainder->field->data.picture ; + temp_field.codeset = remainder->field->codeset ; parser_symbol_add(&temp_field); temp_remainder.field = &temp_field; diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 4b296e46e87..33057ff9345 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -890,7 +890,8 @@ get_binary_value( tree value, signp, pointer, build_int_cst_type(INT, field->data.digits), - NULL_TREE)); + build_int_cst_type(INT, field->codeset.encoding), + NULL_TREE)); // Assign the value we got from the string to our "return" value: gg_assign(value, gg_cast(TREE_TYPE(value), val128)); } @@ -1739,11 +1740,13 @@ get_literal_string(cbl_field_t *field) size_t buffer_length = field->data.capacity+1; char *buffer = static_cast(xcalloc(1, buffer_length)); - for(size_t i=0; idata.capacity; i++) - { - buffer[i] = ascii_to_internal(field->data.initial[i]); - } - + size_t charsout; + const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + field->codeset.encoding, + field->data.initial, + field->data.capacity, + &charsout); + memcpy(buffer, converted, field->data.capacity+1); return buffer; } diff --git a/gcc/cobol/genutil.h b/gcc/cobol/genutil.h index f12124ecc5b..e53e11a46bb 100644 --- a/gcc/cobol/genutil.h +++ b/gcc/cobol/genutil.h @@ -30,11 +30,6 @@ #ifndef _GENUTIL_H_ #define _GENUTIL_H_ -#define EBCDIC_MINUS (0x60) -#define EBCDIC_PLUS (0x4E) -#define EBCDIC_ZERO (0xF0) -#define EBCDIC_NINE (0xF9) - bool internal_codeset_is_ebcdic(); extern bool exception_location_active; diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 039cb957de0..c497b8f12f7 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -51,9 +51,14 @@ accept_envar_e, }; + struct collating_an_t { + const char *alpha, *national; + }; + class literal_t { size_t isym; public: + cbl_encoding_t encoding; char prefix[3]; size_t len; char *data; @@ -96,9 +101,32 @@ } literal_t& set_prefix( const char *input, size_t len ) { + encoding = current_encoding('A'); assert(len < sizeof(prefix)); std::fill(prefix, prefix + sizeof(prefix), '\0'); std::transform(input, input + len, prefix, toupper); + switch(prefix[0]) { + case '\0': case 'Z': + encoding = current_encoding('A'); + break; + case 'N': + encoding = current_encoding('N'); + if( 'X' == prefix[1] ) { + cbl_unimplemented("NX literals"); + } + break; + case 'G': + cbl_unimplemented("DBCS encoding not supported"); + break; + case 'U': + encoding = UTF8_e; + break; + case 'X': + break; + default: + gcc_unreachable(); + } + assert(encoding <= iconv_YU_e); return *this; } bool @@ -300,6 +328,7 @@ #include "genapi.h" #include "../../libgcobol/exceptl.h" #include "exceptg.h" +#include "../../libgcobol/charmaps.h" #include "parse_ante.h" %} @@ -364,6 +393,7 @@ %token MIGHT_BE "IS or IS NOT" FUNCTION_UDF "UDF name" FUNCTION_UDF_0 "UDF" + DEFAULT %token DATE_FMT "date format" TIME_FMT "time format" @@ -445,13 +475,13 @@ DAY_OF_WEEK "DAY-OF-WEEK" DAY_TO_YYYYDDD "DAY-TO-YYYYDDD" DBCS DE DEBUGGING DECIMAL_POINT - DECLARATIVES DEFAULT DELIMITED DELIMITER DEPENDING + DECLARATIVES DELIMITED DELIMITER DEPENDING DESCENDING DETAIL DIRECT DIRECT_ACCESS "DIRECT-ACCESS" DOWN DUPLICATES DYNAMIC - E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL EVERY + E EBCDIC EC EGCS ENCODING ENTRY ENVIRONMENT EQUAL EVERY EXAMINE EXHIBIT EXP EXP10 EXTEND EXTERNAL EXCEPTION_FILE "EXCEPTION-FILE" @@ -539,7 +569,7 @@ PAGE_COUNTER "PAGE-COUNTER" PF PH PI PIC PICTURE PLUS PRESENT_VALUE PRINT_SWITCH - PROCEDURE PROCEDURES PROCEED PROCESS + PROCEDURE PROCEDURES PROCEED PROCESS PROCESSING PROGRAM_ID "PROGRAM-ID" PROGRAM_kw "Program" PROPERTY PROTOTYPE PSEUDOTEXT @@ -587,10 +617,9 @@ UP UPON UPOS UPPER_CASE USAGE USING USUBSTR USUPPLEMENTARY UTILITY UUID4 UVALID UWIDTH - VALUE VARIANCE VARYING VOLATILE + VALIDATING VALUE VARIANCE VARYING VOLATILE WHEN_COMPILED WITH WORKING_STORAGE - XML XMLGENERATE XMLPARSE YEAR_TO_YYYY YYYYDDD YYYYMMDD /* unused Context Words */ @@ -655,6 +684,7 @@ END_SUBTRACT "END-SUBTRACT" END_UNSTRING "END-UNSTRING" END_WRITE "END-WRITE" + END_XML "END-XML" END_IF "END-IF" /* end tokens without semantic value */ @@ -665,7 +695,7 @@ %type sentence statements statement %type star_cbl_opt close_how -%type test_before usage_clause1 might_be +%type test_before usage_clause1 might_be alphanational %type all optional sign_leading on_off initialized strong is_signed %type count data_clauses data_clause %type nine nines nps relop spaces_etc reserved_value signed @@ -673,7 +703,9 @@ %type true_false posneg eval_posneg %type open_io alphabet_etc %type device_name -%type numed collating_sequence context_word ctx_name locale_spec +%type numed context_word ctx_name locale_spec +%type collating_sequences collating_ans +%type collating_an %type namestr alphabet_lit program_as repo_as %type perform_cond kind_of_name %type alloc_ret @@ -842,6 +874,8 @@ %type repo_func_name %type repo_func_names +%type codeset_name +%type locale_phrase %union { bool boolean; @@ -859,6 +893,10 @@ struct { radix_t radix; char *string; } numstr; struct { YYLTYPE loc; int token; literal_t name; } prog_end; struct { int token; special_name_t id; } special_type; + struct { char locale_type; const char * name; } locale_phrase; + collating_an_t collating_sequences; + struct collating_name_t { int token; const char *name; } collating_name; + struct { size_t isym; cbl_encoding_t encoding; } codeset; struct { cbl_field_type_t type; uint32_t capacity; bool signable; } computational; struct cbl_special_name_t *special; @@ -870,7 +908,7 @@ struct { cbl_file_t *file; file_status_t handled; } file_op; struct cbl_label_t *label; struct { cbl_label_t *label; int token; } exception; - struct cbl_field_data_t *field_data; + struct { cbl_encoding_t encoding; cbl_field_data_t *data; } field_data; struct cbl_field_t *field; struct { bool tf; cbl_field_t *field; } bool_field; struct { int token; cbl_field_t *cond; } cond_field; @@ -948,7 +986,10 @@ } %printer { fprintf(yyo, "clauses: 0x%04x", $$); } data_clauses -%printer { fprintf(yyo, "%s %s", refer_type_str($$), $$? $$->name() : ""); } +%printer { fprintf(yyo, "%s %s %s", + refer_type_str($$), + $$? $$->name() : "", + $$ && $$->field? $$->field->codeset.name() : ""); } %printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret %printer { fprintf(yyo, "%s %s '%s' (%s)", $$? cbl_field_type_str($$->type) : "<%empty>", @@ -1023,6 +1064,8 @@ SEARCH SET SELECT SORT SORT_MERGE STRING_kw STOP SUBTRACT START UNSTRING WRITE WHEN INVALID + XMLGENERATE "XML GENERATE" + XMLPARSE "XML PARSE" %left ABS ACCESS ACOS ACTUAL ADVANCING AFP_5A AFTER ALL ALLOCATE @@ -1031,7 +1074,7 @@ ALPHANUMERIC ALPHANUMERIC_EDITED ALPHED ALSO ALTERNATE ANNUITY ANUM ANY ANYCASE APPLY ARE - AREA AREAS AS + AREA AREAS AS ATTRIBUTES ASCENDING ACTIVATING ASIN ASSIGN AT ATAN BACKWARD BASED BASECONVERT @@ -1072,7 +1115,8 @@ DOWN DUPLICATES DYNAMIC - E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL ERROR EVERY + E EBCDIC EC EGCS ELEMENT + ENTRY ENVIRONMENT EQUAL ERROR EVERY EXAMINE EXCEPTION EXHIBIT EXP EXP10 EXTEND EXTERNAL EXCEPTION_FILE @@ -1143,12 +1187,13 @@ MIGHT_BE MINN MULTIPLE MOD MODE MODULE_NAME - NAMED NAT NATIONAL + NAMED NAMESPACE NAMESPACE_PREFIX "NAMESPACE-PREFIX" + NAT NATIONAL NATIONAL_EDITED NATIONAL_OF NATIVE NEGATIVE NESTED NEXT NINEDOT NINES NINEV NO NOTE NO_CONDITION - NULLS NULLPTR NUMBER + NONNUMERIC NULLS NULLPTR NUMBER NUME NUMED NUMED_CR NUMED_DB NUMERIC NUMERIC_EDITED NUMSTR NUMVAL NUMVAL_C @@ -1216,7 +1261,7 @@ VALUE VARIANCE VARYING VOLATILE WHEN_COMPILED WITH WORKING_STORAGE - XML XMLGENERATE XMLPARSE + XML_DECLARATION "XML-DECLARATION" YEAR_TO_YYYY YYYYDDD YYYYMMDD ZERO @@ -1269,7 +1314,7 @@ END_EVALUATE END_MULTIPLY END_PERFORM END_READ END_RETURN END_REWRITE END_SEARCH END_START END_STRING END_SUBTRACT - END_UNSTRING END_WRITE + END_UNSTRING END_WRITE END_XML error END_IF @@ -1937,11 +1982,12 @@ selected_name: external scalar { $$ = $2; } YYERROR; } uint32_t len = $name.len; - cbl_field_t field { - 0, FldLiteralA, FldInvalid, quoted_e | constant_e, - 0, 0, 0, nonarray, 0, "", 0, cbl_field_t::linkage_t(), - {len,len,0,0, $name.data}, NULL }; + // Pretend hex-encoded because that means use verbatim. + cbl_field_t field { FldLiteralA, + hex_encoded_e | quoted_e | constant_e, + {len,len,0,0, $name.data} }; field.attr |= literal_attr($name.prefix); + field.codeset.set(); $$ = new cbl_refer_t( field_add(@name, &field) ); } ; @@ -2315,20 +2361,12 @@ config_paragraphs: config_paragraph config_paragraph: SPECIAL_NAMES '.' - | SPECIAL_NAMES '.' specials '.' + | SPECIAL_NAMES '.' special_names '.' | SOURCE_COMPUTER '.' + | SOURCE_COMPUTER '.' NAME '.' | SOURCE_COMPUTER '.' NAME with_debug '.' | OBJECT_COMPUTER '.' - | OBJECT_COMPUTER '.' NAME collating_sequence[name] '.' - { - if( $name ) { - if( !current.collating_sequence($name) ) { - error_msg(@name, "collating sequence already defined as '%s'", - current.collating_sequence()); - YYERROR; - } - } - } + | OBJECT_COMPUTER '.' NAME[computer] collations '.' | REPOSITORY dot | REPOSITORY dot repo_members '.' ; @@ -2452,23 +2490,84 @@ repo_program: PROGRAM_kw NAME repo_as repo_property: PROPERTY NAME repo_as ; -with_debug: %empty - | with DEBUGGING MODE { +with_debug: with DEBUGGING MODE { if( ! set_debug(true) ) { error_msg(@2, "DEBUGGING MODE valid only in fixed format"); } } ; -collating_sequence: %empty { $$ = NULL; } - | PROGRAM_kw COLLATING SEQUENCE is NAME[name] { $$ = $name; } - | PROGRAM_kw SEQUENCE is NAME[name] { $$ = $name; } - | COLLATING SEQUENCE is NAME[name] { $$ = $name; } - | SEQUENCE is NAME[name] { $$ = $name; } +collations: %empty + | collation_classification + | collation_sequence + | collation_classification collation_sequence + | collation_sequence collation_classification + ; +collation_classification: + character CLASSIFICATION collating_sequences[seq] + { + warn_msg(@seq, "CHARACTER CLASSIFICATION ignored"); + } + ; +collation_sequence: + program_kw collating SEQUENCE collating_sequences[seq] + { + if( !current.collating_sequence($seq.alpha) ) { + error_msg(@seq, "collating sequence already defined as '%s'", + current.collating_sequence()); + YYERROR; + } + } ; -specials: special_names +collating_sequences: + is NAME[name] { + $$.alpha = $name; + $$.national = nullptr; + } + | collating_ans { $$ = $1; } + ; +collating_ans: collating_an[encoding] { + $$ = collating_an_t(); + const char **pname = + $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national; + *pname = $encoding.name; + } + | collating_ans collating_an[encoding] + { + const char **pname = + $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national; + if( *pname ) { + error_msg(@encoding, + "%qs is repeated", keyword_str($encoding.token)); + } + *pname = $encoding.name; + } ; +collating_an: for alphanational is locale_phrase[locale] { + $$.token = $alphanational; + $$.name = $locale.name; + if( ! $locale.name ) { + const char *locale_name = "???"; + switch($locale.locale_type) { + case 'L': locale_name = "LOCALE"; break; + case 'D': locale_name = "DEFAULT"; break; + case 'S': locale_name = "SYSTEM-DEFAULT"; break; + case 'U': locale_name = "USER-DEFAULT"; break; + } + cbl_unimplemented("FOR %s IS %s", + keyword_str($$.token), + locale_name); + } + warn_msg(@locale, "LOCALE phrase ignored"); + } + ; + +locale_phrase: NAME { $$.name = $1; $$.locale_type = '\0'; } + | LOCALE { $$.name = nullptr; $$.locale_type = 'L'; } + | DEFAULT { $$.name = nullptr; $$.locale_type = $1; } + ; + special_names: special_name | special_names special_name ; @@ -2481,12 +2580,26 @@ special_name: dev_mnemonic if( !namcpy(@name, $abc->name, $name) ) YYERROR; if( yydebug ) $abc->dump(); } + | ALPHABET NAME[name] for alphanational is alphabet_name[abc] + { + if( !$abc ) YYERROR; + assert($abc); // already in symbol table + if( !namcpy(@name, $abc->name, $name) ) YYERROR; + if( yydebug ) $abc->dump(); + const size_t isym = symbol_index(symbol_elem_of($abc)); + switch($alphanational) { + case ALPHANUMERIC: + current.alpha_encoding(isym, $abc->encoding); + break; + case NATIONAL: + current.national_encoding(isym, $abc->encoding); + break; + default: gcc_unreachable(); + } + } | CLASS NAME is domains { - struct cbl_field_t field = { 0, - FldClass, FldInvalid, 0, 0, 0, 0, nonarray, @NAME.first_line, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + struct cbl_field_t field = { FldClass, 0, {}, 0, $NAME }; if( !namcpy(@NAME, field.name, $2) ) YYERROR; struct cbl_domain_t *domain = @@ -2496,6 +2609,7 @@ special_name: dev_mnemonic field.data.false_value_as($domains); field.data.domain_as(domain); + field.codeset.set(); domains.clear(); if( field_add(@2, &field) == NULL ) { @@ -2520,10 +2634,9 @@ special_name: dev_mnemonic { symbol_decimal_point_set(','); } - | LOCALE NAME is locale_spec - { - current.locale($NAME, $locale_spec); - cbl_unimplemented("LOCALE syntax"); + | LOCALE NAME is locale_spec[spec] { + current.locale($NAME, $spec); + cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec); } ; | upsi @@ -2626,6 +2739,7 @@ alphabet_name: STANDARD_ALPHABET { $$ = alphabet_add(@1, ASCII_e); } | EBCDIC { $$ = alphabet_add(@1, EBCDIC_e); } | alphabet_seqs { + $1->reencode(); $$ = cbl_alphabet_of(symbol_alphabet_add(PROGRAM, $1)); } | error @@ -2825,22 +2939,12 @@ domains: domain domain: all LITERAL[a] { - if( ! string_of($a) ) { - gcc_location_set(@a); - yywarn("'%s' has embedded NUL", $a.data); - } $$ = NULL; cbl_domain_t domain(@a, $all, $a.len, $a.data); domains.push_back(domain); } | all[a_all] LITERAL[a] THRU all[z_all] LITERAL[z] { - if( ! string_of($a) ) { - yywarn("'%s' has embedded NUL", $a.data); - } - if( ! string_of($z) ) { - yywarn("'%s' has embedded NUL", $z.data); - } $$ = NULL; cbl_domain_elem_t first(@a, $a_all, $a.len, $a.data), last(@z, $z_all, $z.len, $z.data); @@ -2867,9 +2971,6 @@ domain: all LITERAL[a] domains.push_back(domain); } | all[a_all] reserved_value[a] THRU all[z_all] LITERAL[z] { - if( ! string_of($z) ) { - yywarn("'%s' has embedded NUL", $z.data); - } $$ = NULL; if( $a == NULLS ) YYERROR; auto value = constant_of(constant_index($a))->data.initial; @@ -2887,9 +2988,6 @@ domain: all LITERAL[a] } | when_set_to FALSE_kw is LITERAL[value] { - if( ! string_of($value) ) { - yywarn("'%s' has embedded NUL", $value.data); - } const char *dom = $value.data; $$ = new cbl_domain_t(@value, false, $value.len, dom); } @@ -2994,7 +3092,37 @@ fd_clause: record_desc cbl_unimplementedw("RECORDING MODE was ignored, not defined by ISO 2023"); } | VALUE OF fd_values - | CODESET is NAME + | CODESET is codeset_name[codeset] { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->codeset = cbl_file_t::codeset_t($codeset.encoding, + $codeset.isym); + cbl_unimplementedw("sorry, unimplemented CODE-SET"); + } + | CODESET for alphanational is codeset_name[codeset] + { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->codeset = cbl_file_t::codeset_t($codeset.encoding, + $codeset.isym); + if( $codeset.isym == 0 ) { + switch( $alphanational) { + case ALPHANUMERIC: + if( $codeset.encoding != ASCII_e ) { + error_msg(@alphanational, + "FOR ALPHANUMERIC: invalid codeset"); + } + break; + case NATIONAL: + if( $codeset.encoding != EBCDIC_e ) { + error_msg(@alphanational, + "FOR ALPHANUMERIC: invalid codeset"); + } + break; + default: + gcc_unreachable(); + } + } + cbl_unimplemented("CODE-SET"); + } | is GLOBAL { auto f = cbl_file_of(symbol_at(file_section_fd)); @@ -3018,6 +3146,24 @@ fd_clause: record_desc } ; +alphanational: ALPHANUMERIC { $$ = ALPHANUMERIC; } + | NATIONAL { $$ = NATIONAL; } + ; +codeset_name: STANDARD_ALPHABET { $$.isym = 0; $$.encoding = ASCII_e; } + | NATIVE { $$.isym = 0; $$.encoding = EBCDIC_e; } + | EBCDIC { $$.isym = 0; $$.encoding = EBCDIC_e; } + | NAME + { + auto e = symbol_alphabet(PROGRAM, $NAME); + if( !e ) { + error_msg(@NAME, "invalid CODE-SET: %qs", $NAME); + YYERROR; + } + $$.isym = symbol_index(e); + $$.encoding = custom_encoding_e; + } + ; + block_desc: BLOCK_kw contains rec_contains chars_recs ; rec_contains: NUMSTR[min] { @@ -3377,11 +3523,8 @@ level_name: LEVEL ctx_name error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL); YYERROR; } - struct cbl_field_t field = { 0, - FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1), - nonarray, @ctx_name.first_line, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + cbl_field_t field = { FldInvalid, capacity_cast($LEVEL), + @ctx_name.first_line }; if( !namcpy(@ctx_name, field.name, $2) ) YYERROR; $$ = field_add(@$, &field); @@ -3402,10 +3545,9 @@ level_name: LEVEL ctx_name error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL); YYERROR; } - struct cbl_field_t field = { 0, - FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1), - nonarray, @LEVEL.first_line, "", - 0, {}, {}, NULL }; + struct cbl_field_t field = { FldInvalid, + capacity_cast($LEVEL), + @LEVEL.first_line }; $$ = field_add(@1, &field); if( !$$ ) { @@ -3433,20 +3575,21 @@ const_value: cce_expr value78: literalism { cbl_field_data_t data = {}; - data.capacity = capacity_cast(strlen($1.data)); - data.initial = $1.data; - $$ = new cbl_field_data_t(data); + data.capacity = capacity_cast(strlen($1.data)); + data.initial = $1.data; + $$.encoding = $1.encoding; + $$.data = new cbl_field_data_t(data); } | const_value { cbl_field_data_t data = {}; data = build_real (float128_type_node, $1); - $$ = new cbl_field_data_t(data); + $$.data = new cbl_field_data_t(data); } | reserved_value[value] { const auto field = constant_of(constant_index($value)); - $$ = new cbl_field_data_t(field->data); + $$.data = new cbl_field_data_t(field->data); } | true_false @@ -3513,7 +3656,13 @@ data_descr1: level_name if( !cdf_value(field.name, $lit.data) ) { error_msg(@1, "%s was defined by CDF", field.name); } - value_encoding_check(@lit, $1); + if( ! field.codeset.valid() ) { + if( ! field.codeset.set(field.codeset.standard_internal.type) ) { + error_msg(@lit, "CONSTANT inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field.codeset.encoding)); + } + } + value_encoding_check(@lit, $1, $lit.encoding); } | level_name CONSTANT is_global FROM NAME { @@ -3540,12 +3689,11 @@ data_descr1: level_name dialect_error(@1, "level 78", "mf or gnu"); YYERROR; } - struct cbl_field_t field = { 0, FldLiteralA, FldInvalid, - constant_e, 0, 0, 78, nonarray, - @name.first_line, "", 0, {}, *$data, NULL }; - if( !namcpy(@name, field.name, $name) ) YYERROR; + cbl_field_t field = { FldLiteralA, constant_e, *$data.data, + 78, $name, @name.first_line }; if( field.data.initial ) { field.attr |= quoted_e; + field.codeset.set($data.encoding); if( !cdf_value(field.name, field.data.initial) ) { yywarn("%s was defined by CDF", field.name); } @@ -3564,10 +3712,8 @@ data_descr1: level_name | LEVEL88 NAME /* VALUE */ NULLPTR { - struct cbl_field_t field = { 0, - FldClass, FldInvalid, 0, 0, 0, 88, nonarray, @NAME.first_line, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + struct cbl_field_t field = {FldClass, 0, {}, + 88, $NAME, @NAME.first_line}; if( !namcpy(@NAME, field.name, $2) ) YYERROR; auto fig = constant_of(constant_index(NULLS))->data.initial; @@ -3590,19 +3736,16 @@ data_descr1: level_name } | LEVEL88 NAME VALUE domains { - struct cbl_field_t field = { 0, - FldClass, FldInvalid, 0, 0, 0, 88, nonarray, @NAME.first_line, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; - if( !namcpy(@NAME, field.name, $2) ) YYERROR; - - struct cbl_domain_t *domain = + cbl_field_t field = { + FldClass, 0, {}, 88, $NAME, @NAME.first_line}; + cbl_domain_t *domain = new cbl_domain_t[ domains.size() + 1]; std::copy(domains.begin(), domains.end(), domain); field.data.domain_as(domain); field.data.false_value_as($domains); + field.codeset.set(); domains.clear(); if( ($$ = field_add(@2, &field)) == NULL ) { @@ -3799,15 +3942,14 @@ data_descr1: level_name } // Ensure signed initial VALUE is for signed numeric type - if( is_numeric($field) && - $field->data.initial && - $field->type != FldFloat ) - { - switch( $field->data.initial[0] ) { - case '-': - if( !$field->has_attr(signable_e) ) { - error_msg(@field, "%s is unsigned but has signed VALUE '%s'", - $field->name, $field->data.initial); + if( is_numeric($field) ) { + if( $field->data.initial && $field->type != FldFloat ) { + switch( $field->data.initial[0] ) { + case '-': + if( !$field->has_attr(signable_e) ) { + error_msg(@field, "%s is unsigned but has signed VALUE '%s'", + $field->name, $field->data.initial); + } } } } @@ -4219,6 +4361,11 @@ alphanum_pic: alphanum_part { ; alphanum_part: ALNUM[picture] count { + auto field = current_field(); + if( ! field->codeset.set($picture) ) { + error_msg(@picture, "PICTURE inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field->codeset.encoding)); + } $$.attr = uniform_picture($picture); $$.nbyte = strlen($picture); auto count($count); @@ -4309,7 +4456,7 @@ usage_clause1: usage BIT { cbl_unimplemented("Boolean type not implemented"); } -| usage BINARY_INTEGER [comp] is_signed + | usage BINARY_INTEGER [comp] is_signed { // action for BINARY_INTEGER is repeated for COMPUTATIONAL, below. // If it changes, consolidate in a function. @@ -4498,6 +4645,13 @@ usage_clause1: usage BIT | usage INDEX { $$ = symbol_field_index_set( current_field() )->type; } + | usage NATIONAL { + auto field = current_field(); + if( ! field->codeset.set(EBCDIC_e) ) { + error_msg(@2, "usage NATIONAL conflicts with PICTURE"); + } + $$ = FldInvalid; + } // We should enforce data/code pointers with a different type. | usage POINTER { @@ -4535,6 +4689,10 @@ usage_clause1: usage BIT value_clause: VALUE all LITERAL[lit] { cbl_field_t *field = current_field(); + if( ! field->codeset.set($lit.encoding) ) { + error_msg(@lit, "VALUE inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field->codeset.encoding)); + } field->data.initial = $lit.data; field->attr |= literal_attr($lit.prefix); // The __gg__initialize_data routine needs to know that VALUE is a @@ -4555,7 +4713,7 @@ value_clause: VALUE all LITERAL[lit] { } } } - value_encoding_check(@lit, field); + value_encoding_check(@lit, field, $lit.encoding); } | VALUE all cce_expr[value] { cbl_field_t *field = current_field(); @@ -4583,6 +4741,13 @@ value_clause: VALUE all LITERAL[lit] { } | VALUE all reserved_value[value] { + cbl_field_t *field = current_field(); + if( ! field->codeset.valid() ) { + if( ! field->codeset.set(field->codeset.standard_internal.type) ) { + error_msg(@value, "VALUE inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field->codeset.encoding)); + } + } if( $value != NULLS ) { auto fig = constant_of(constant_index($value)); current_field()->data.initial = fig->data.initial; @@ -5082,7 +5247,9 @@ statement: error { | subtract { $$ = SUBTRACT; } | unstring { $$ = UNSTRING; } | write { $$ = WRITE; } - ; + | xmlgenerate { $$ = XMLGENERATE; } + | xmlparse { $$ = XMLPARSE; } + ; /* * ISO defines ON EXCEPTION only for Format 3 (screen). We @@ -6676,13 +6843,13 @@ typename: NAME name: qname { build_symbol_map(); - auto namelocs( name_queue.pop() ); - auto names( name_queue.namelist_of(namelocs) ); - auto inner = namelocs.back(); + auto namelocs( name_queue.pop() ); + auto names( name_queue.namelist_of(namelocs) ); + auto inner = namelocs.back(); if( ($$ = field_find(names)) == NULL ) { if( procedure_div_e == current_division ) { - error_msg(inner.loc, - "DATA-ITEM '%s' not found", inner.name ); + error_msg(inner.loc, + "DATA-ITEM '%s' not found", inner.name ); YYERROR; } /* @@ -6695,7 +6862,7 @@ name: qname auto e = symbol_field_forward_add(PROGRAM, parent, name, @1.first_line); if( !e ) YYERROR; - symbol_field_location( symbol_index(e), @qname ); + symbol_field_location( symbol_index(e), @qname ); parent = symbol_index(e); $$ = cbl_field_of(e); } @@ -6731,6 +6898,8 @@ context_word: APPLY { static char s[] ="APPLY"; $$ = s; } // OPTIONS paragraph | ATTRIBUTE { static char s[] ="ATTRIBUTE"; $$ = s; } // SET statement + | ATTRIBUTES { static char s[] ="ATTRIBUTES"; + $$ = s; } // XML GENERATE | AUTO { static char s[] ="AUTO"; $$ = s; } // screen description entry | AUTOMATIC { static char s[] ="AUTOMATIC"; @@ -9260,9 +9429,12 @@ inspect: INSPECT backward inspected TALLYING tallies if( is_literal(match) && is_literal(replace) ) { if( !$match->all && !$replace_oper->all) { if( match->data.capacity != replace->data.capacity ) { + // Make a copy of replace, because nice_name returns a static + char *replace_name = xstrdup(nice_name_of(replace)); error_msg(@match, "%qs, size %u NOT EQUAL %qs, size %u", nice_name_of(match), match->data.capacity, - nice_name_of(replace), replace->data.capacity); + replace_name, replace->data.capacity); + free(replace_name); YYERROR; } } @@ -9728,7 +9900,12 @@ ffi_name: scalar $$->field = new_literal(strlen(L.name), L.name, quoted_e); } } - | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + | LITERAL + { + // Pretend hex-encoded because that means use verbatim. + auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e); + $$ = new_reference(new_literal($1, attr)); + } ; parameters: parameter { $$ = new ffi_args_t($1); } @@ -11158,6 +11335,10 @@ first_last: %empty { $$ = 0; } | LAST { $$ = 'L'; } ; +for: %empty + | FOR + ; + is_global: %empty %prec GLOBAL { $$ = false; } | is GLOBAL { $$ = true; } ; @@ -11419,7 +11600,134 @@ cdf_none: ENTER | SERVICE_RELOAD ; +xmlgenerate: xmlgen_impl end_xml { + cbl_unimplemented("XML GENERATE"); + } + | xmlgen_cond end_xml { + cbl_unimplemented("XML GENERATE"); + } + ; +xmlgen_impl: + XMLGENERATE xmlgen_body + ; +xmlgen_cond: XMLGENERATE xmlgen_body[body] xmlexcepts[err] + ; + +xmlgen_body: XMLGENERATE name[id1] FROM name[id2] + xmlgen_count xmlencoding xmlgen_decl xmlgen_namespace + xmlgen_nameof xmlgen_typeof xmlgen_suppress + ; +xmlgen_count: %empty + | COUNT in name[id3] + ; +xmlgen_decl: %empty + | with XML_DECLARATION with ATTRIBUTES + ; +xmlgen_namespace: + %empty + | NAMESPACE is name[id4] namespace_prefix + ; +namespace_prefix: + %empty + | NAMESPACE_PREFIX is namestr[id5] + ; +xmlgen_nameof: %empty + | NAME of xmlgen_ids + ; +xmlgen_ids: xmlgen_id + | xmlgen_ids xmlgen_id + ; +xmlgen_id: name[id6] is LITERAL[lit] + ; + +xmlgen_typeof: %empty + | TYPE of xmlgen_types + ; +xmlgen_types: xmlgen_type + | xmlgen_types xmlgen_type + ; +xmlgen_type: name[id6] is xmlgen_eltype + ; +xmlgen_eltype: ATTRIBUTE + | ELEMENT + | CONTENT + ; + +xmlgen_suppress: + %empty + | SUPPRESS xml_suppressions + ; +xml_suppressions: + xml_suppression + | xml_suppressions xml_suppression + ; +xml_suppression: + name[id8] xml_when_phrase + | xml_generic_suppression xml_when_figs + ; +xml_when_phrase: + %empty %prec ZERO + | xml_when_figs + ; +xml_when_figs: xml_when_fig + | xml_when_figs OR xml_when_fig + ; +xml_when_fig: ZERO + | SPACES + | LOW_VALUES + | HIGH_VALUES + ; +xml_generic_suppression: + %empty + | EVERY xml_generic_numeric xmlgen_eltype + ; +xml_generic_numeric: + %empty + | NUMERIC + | NONNUMERIC + ; + +xmlparse: xmlparse_impl end_xml { + cbl_unimplemented("XML PARSE"); + } + | xmlparse_cond end_xml { + cbl_unimplemented("XML PARSE"); + } + ; +xmlparse_impl: XMLPARSE xmlparse_body + ; +xmlparse_cond: XMLPARSE xmlparse_body[body] xmlexcepts[err] + ; + +xmlparse_body: XMLPARSE name xmlencoding xmlreturning xmlvalidating + PROCESSING PROCEDURE is xmlprocs + ; + +xmlencoding: %empty %prec NAME + | with ENCODING name [codepage] + ; + +xmlreturning: %empty + | RETURNING NATIONAL + ; +xmlvalidating: %empty + | VALIDATING with name + | VALIDATING with FILE_KW name + ; +xmlprocs: label_1[proc] + | label_1[proc1] THRU label_1[proc2] + ; + +xmlexcepts: xmlexcept[a] statements %prec XMLPARSE + | xmlexcepts[a] xmlexcept[b] statements %prec XMLPARSE + ; +xmlexcept: EXCEPTION + ; + +end_xml: %empty %prec XMLPARSE + | END_XML %prec XMLPARSE + ; %% static YYLTYPE @@ -11436,11 +11744,9 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin bool is_function) { if( is_literal(name.field) ) { - cbl_field_t called = { 0, FldLiteralA, FldInvalid, quoted_e | constant_e, - 0, 0, 77, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), {}, NULL }; + cbl_field_t called = { FldLiteralA, quoted_e | constant_e, + name.field->data, 77 }; snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial); - called.data = name.field->data; name.field = cbl_field_of(symbol_field_add(PROGRAM, &called)); symbol_field_location(field_index(name.field), loc); parser_symbol_add(name.field); @@ -12410,7 +12716,6 @@ data_category_of( const cbl_refer_t& refer ) { case FldIndex: case FldSwitch: case FldDisplay: - case FldBlob: return data_category_none; } gcc_unreachable(); @@ -12443,7 +12748,6 @@ valid_target( const cbl_refer_t& refer ) { case FldIndex: case FldSwitch: case FldDisplay: - case FldBlob: return false; } gcc_unreachable(); @@ -12988,7 +13292,7 @@ new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) { attrs |= constant_e; attrs |= literal_attr(lit.prefix); - return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs)); + return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding); } bool @@ -13096,7 +13400,8 @@ literal_attr( const char prefix[] ) { case 1: switch(prefix[0]) { case 'B': return bool_encoded_e; - case 'N': cbl_unimplemented("National"); return none_e; + case 'N': + case 'U': return none_e; // nothing to say yet case 'X': return hex_encoded_e; case 'Z': return quoted_e; } @@ -13107,7 +13412,8 @@ literal_attr( const char prefix[] ) { case 'X': switch(prefix[0]) { case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e); - case 'N': cbl_unimplemented("National"); return none_e; + case 'N': + case 'U': cbl_unimplemented("National"); return none_e; } break; } @@ -13181,6 +13487,8 @@ bool cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on ) { if( gcobol_feature == feature_internal_ebcdic_e ) { if( internal_ebcdic_locked ) return false; + if( ! on ) gcc_unreachable(); + current.default_encoding.set(EBCDIC_e); } if( on ) { cbl_gcobol_features |= gcobol_feature; diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h index 03cb0a0492e..c3e34956cd0 100644 --- a/gcc/cobol/parse_ante.h +++ b/gcc/cobol/parse_ante.h @@ -273,8 +273,35 @@ static inline char * dequote( char input[] ) { static const char * name_of( cbl_field_t *field ) { assert(field); + // Because this can be called after .initial has been converted to the + // field->codeset.encoding, we have to undo that. There may be some danger + // associated with returning a static. I don't actually know. -- RJD. + static size_t static_length = 0; + static char * static_buffer = nullptr; + if( field->name[0] == '_' ) + { + // Make a copy of .initial + if( static_length < field->data.capacity+1 ) + { + static_length = field->data.capacity+1; + static_buffer = static_cast(xrealloc(static_buffer, + static_length)); + memcpy(static_buffer, field->data.initial, field->data.capacity); + static_buffer[field->data.capacity] = '\0'; + } + // Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE + size_t charsout; + char *converted = __gg__iconverter(field->codeset.encoding, + DEFAULT_CHARMAP_SOURCE, + field->data.initial, + field->data.capacity, + &charsout ); + memcpy(static_buffer, converted, charsout); + static_buffer[charsout] = '\0'; + } + return field->name[0] == '_' && field->data.initial? - field->data.initial : field->name; + static_buffer : field->name; } static const char * @@ -1305,11 +1332,30 @@ std::map> class prog_descr_t { std::set call_targets, subprograms; - public: +public: std::set function_repository; size_t program_index; cbl_label_t *declaratives_eval, *paragraph, *section; const char *collating_sequence; + struct encoding_t { + struct encoding_base_t { + size_t isym; + cbl_encoding_t encoding; + encoding_base_t() : isym(0), encoding(CP1252_e) {} + encoding_base_t(cbl_encoding_t encoding) : isym(0), encoding(encoding) {} + void set( size_t isym, cbl_encoding_t encoding ) { + this->isym = isym; + this->encoding = encoding; + } + void set( cbl_encoding_t encoding ) { + assert(encoding != custom_encoding_e); + this->isym = 0; + this->encoding = encoding; + } + + } alpha, national; + encoding_t() : national(EBCDIC_e) {} + } alphabet; struct locale_t { cbl_name_t name; const char *os_name; locale_t() : name(""), os_name(nullptr) {} @@ -1599,6 +1645,8 @@ static class current_t { rel_part_t antecedent_cache; public: + static prog_descr_t::encoding_t::encoding_base_t default_encoding; + current_t() : first_statement(0) , in_declaratives(false) @@ -1836,6 +1884,26 @@ static class current_t { return client->second; } + void alpha_encoding( size_t isym, cbl_encoding_t encoding ) { + prog_descr_t& program = programs.top(); + program.alphabet.alpha.set(isym, encoding); + } + void national_encoding( size_t isym, cbl_encoding_t encoding ) { + prog_descr_t& program = programs.top(); + program.alphabet.national.set(isym, encoding); + } + + cbl_encoding_t alpha_encoding() const { + if( programs.empty() ) return CP1252_e; + const prog_descr_t& program = programs.top(); + return program.alphabet.alpha.encoding; + } + cbl_encoding_t national_encoding() const { + if( programs.empty() ) return EBCDIC_e; + const prog_descr_t& program = programs.top(); + return program.alphabet.national.encoding; + } + bool collating_sequence( const cbl_name_t name ) { assert(name); @@ -1891,7 +1959,16 @@ static class current_t { const cbl_label_t *L; if( (L = symbol_program_add(parent, &label)) == NULL ) return false; - programs.push( prog_descr_t(symbol_index(symbol_elem_of(L))) ); + prog_descr_t program(symbol_index(symbol_elem_of(L))); +#if 1 //EBCDIC // enable when ready + auto alpha_encoding = + programs.empty()? default_encoding : programs.top().alphabet.alpha; + if( alpha_encoding.encoding == EBCDIC_e ) { + dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__); + } + program.alphabet.alpha = alpha_encoding; +#endif + programs.push( program ); programs.apply_pending(); bool fOK = symbol_at(programs.top().program_index) + 1 == symbols_end(); @@ -2009,6 +2086,14 @@ static class current_t { parser_leave_section( programs.top().section ); programs.pop(); +#if 0 + if( programs.empty() ) { + // The default encoding can be changed only with -finternal-ebcdic, and + // remains in effect for all programs while the compiler runs. + // This comment here to remind us. + default_encoding = prog_descr_t::encoding_t::encoding_base_t(); + } +#endif debugging_clients.clear(); error_clients.clear(); exception_clients.clear(); @@ -2189,6 +2274,8 @@ static class current_t { cbl_label_t * compute_label() { return error_labels.compute_error; } } current; +prog_descr_t::encoding_t::encoding_base_t current_t::default_encoding; + void current_enabled_ecs( tree ena ) { current.declaratives.runtime.ena = ena; } @@ -2208,6 +2295,22 @@ cbl_options_t current_options() { return current.options_paragraph; } +cbl_encoding_t current_encoding( char a_or_n ) { + cbl_encoding_t retval; + switch(a_or_n) { + case 'A': + retval = current.alpha_encoding(); + break; + case 'N': + retval = current.national_encoding(); + break; + default: + gcc_unreachable(); + break; + } + return retval; +} + size_t current_program_index() { return current.program()? current.program_index() : 0; } @@ -2338,7 +2441,6 @@ needs_picture( cbl_field_type_t type ) { case FldNumericBin5: return false; - case FldBlob: case FldClass: case FldConditional: case FldForward: @@ -2367,7 +2469,6 @@ is_callable( const cbl_field_t *field ) { case FldForward: case FldSwitch: case FldDisplay: - case FldBlob: case FldNumericDisplay: case FldNumericBinary: case FldFloat: @@ -2763,7 +2864,7 @@ field_attr_str( const cbl_field_t *field ) { intermediate_e, embiggened_e, all_alpha_e, all_x_e, all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e, global_e, external_e, blank_zero_e, linkage_e, local_e, leading_e, - separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e, + separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e, depends_on_e, initialized_e, has_value_e, ieeedec_e, big_endian_e, same_as_e, record_key_e, typedef_e, strongdef_e, }; @@ -2871,29 +2972,27 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) { return p; } -static bool -value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) { +static void +value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) { if( ! field->internalize() ) { error_msg(loc, "inconsistent string literal encoding for '%s'", field->data.initial); - return false; } - return true; + if( encoding != field->codeset.encoding ) { + warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs", + __gg__encoding_iconv_name(encoding), field->codeset.name()); + } } - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-field-initializers" static struct cbl_field_t * field_alloc( const YYLTYPE& loc, cbl_field_type_t type, size_t parent, const char name[] ) { - cbl_field_t *f, field = {}; - field.type = type; - field.usage = FldInvalid; + static const uint32_t level = 0; + cbl_field_t *f, field = { type, 0, cbl_field_data_t(), level, name, yylineno }; field.parent = parent; - field.line = yylineno; - if( !namcpy(loc, field.name, name) ) return NULL; f = field_add(loc, &field); assert(f); return f; @@ -2909,7 +3008,7 @@ static cbl_file_t * file_add( YYLTYPE loc, cbl_file_t *file ) { gcc_assert(file); enum { level = 1 }; - struct cbl_field_t area = { 0, FldAlphanumeric, FldInvalid, 0, 0,0, level, {}, yylineno }, + struct cbl_field_t area{ FldAlphanumeric, level, yylineno }, *field = field_add(loc, &area); file->default_record = field_index(field); @@ -2928,6 +3027,7 @@ file_add( YYLTYPE loc, cbl_file_t *file ) { "%s%s", record_area_name_stem, file->name); } field->file = field->parent = symbol_index(e); + field->codeset.set(); return file; } diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index 5773f09a970..53d88cbd54e 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -104,7 +104,9 @@ NP P+|(P{COUNT}) UNSIGNED [[:space:]]+UNSIGNED SIGNED [[:space:]]+SIGNED -ALNUM [AX9]+ +PREFIX G|N|U|Z + +ALNUM [AX9]+|N+|U+ AX [AX]{COUNT}? B0 [B0/]{COUNT}? @@ -452,16 +454,26 @@ COPY { myless(0); } -EXTEND { return EXTEND;} -INITIALIZE { return INITIALIZE; } -INSPECT { return INSPECT; } -INVOKE { return INVOKE; } -INTRINSIC { return INTRINSIC; } -MERGE { return MERGE; } -UNSTRING { return UNSTRING; } -XML { return XML; } -XMLGENERATE { return XMLGENERATE; } -XMLPARSE { return XMLPARSE; } +ATTRIBUTES { return ATTRIBUTES; } +ELEMENT { return ELEMENT; } +ENCODING { return ENCODING; } +EXTEND { return EXTEND;} +INITIALIZE { return INITIALIZE; } +INSPECT { return INSPECT; } +INTRINSIC { return INTRINSIC; } +INVOKE { return INVOKE; } +MERGE { return MERGE; } +NAMESPACE { return NAMESPACE; } +NAMESPACE-PREFIX { return NAMESPACE_PREFIX; } +NONNUMERIC { return NONNUMERIC; } +PROCESSING { return PROCESSING; } +UNSTRING { return UNSTRING; } +VALIDATING { return VALIDATING; } + +XML{SPC}GENERATE { return XMLGENERATE; } +XML{SPC}PARSE { return XMLPARSE; } +XML-DECLARATION { return XML_DECLARATION; } +END-XML { return END_XML; } ZEROE?S? { return ZERO; } @@ -802,7 +814,11 @@ DEPENDING { return DEPENDING; } DELIMITER { return DELIMITER; } DELETE { return DELETE; } -DEFAULT { return DEFAULT; } + +DEFAULT { yylval.number = 'D'; return DEFAULT; } +SYSTEM-DEFAULT { yylval.number = 'S'; return DEFAULT; } +USER-DEFAULT { yylval.number = 'U'; return DEFAULT; } + DECLARATIVES { return DECLARATIVES; } DECIMAL-POINT { return DECIMAL_POINT; } DEBUGGING { return DEBUGGING; } @@ -1142,9 +1158,9 @@ USE({SPC}FOR)? { return USE; } return token == NAME88? NAME : token; } - Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1); yy_push_state(quoted1); } - Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1); yy_push_state(quoted2); } N?X/{hexseq} { yylval.literal.set_prefix(yytext, yyleng); yy_push_state(hex_state); } @@ -1289,12 +1305,12 @@ USE({SPC}FOR)? { return USE; } } /* CDF REPLACING needs quotes to distinquish strings from identifiers. */ - Z?['']{STRING1}[''] { auto *s = xstrdup(yytext); + {PREFIX}?['']{STRING1}[''] { auto *s = xstrdup(yytext); std::replace(s, s + strlen(s), '\'', '"'); ydflval.string = s; update_location_col(s); return LITERAL; } - Z?[""]{STRING}[""] { ydflval.string = xstrdup(yytext); + {PREFIX}?[""]{STRING}[""] { ydflval.string = xstrdup(yytext); update_location_col(yytext); return LITERAL; } [=]{4} { static char nullstring[] = ""; @@ -1403,9 +1419,9 @@ USE({SPC}FOR)? { return USE; } yylval.string = xstrdup(yytext); return NAME; } - Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1); BEGIN(quoted1); } - Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1); BEGIN(quoted2); } . { myless(0); yy_pop_state(); @@ -1445,11 +1461,11 @@ USE({SPC}FOR)? { return USE; } BX/{hexseq} { yylval.numstr.radix = hexadecimal_e; yy_push_state(numstr_state); } - Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[''] { yylval.literal.set_prefix(yytext, yyleng-1); yy_push_state(quoted1); } - Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[""] { yylval.literal.set_prefix(yytext, yyleng-1); yy_push_state(quoted2); } - Z?[""]/{STRING}[""] { yylval.literal.set_prefix(yytext, yyleng-1); + {PREFIX}?[""]/{STRING}[""] { yylval.literal.set_prefix(yytext, yyleng-1); yy_push_state(quoted2); } {INTEGERZ}/[[:punct:]][[:space:]]{BLANK_OEOL} { return numstr_of(yytext); } diff --git a/gcc/cobol/structs.cc b/gcc/cobol/structs.cc index 2393dfbe5b9..2b13b1fa0a6 100644 --- a/gcc/cobol/structs.cc +++ b/gcc/cobol/structs.cc @@ -181,11 +181,13 @@ create_cblc_field_t() signed char level; // This variable's level in the naming heirarchy signed char digits; // Digits specified in PIC string; e.g. 5 for 99v999 signed char rdigits; // Digits to the right of the decimal point. 3 for 99v999 + cbl_encoding_t encoding; // Same as cbl_field_t::codeset::encoding + int alphabet; // Same as cbl_field_t::codeset::language } cblc_field_t; */ tree retval = NULL_TREE; retval = gg_get_filelevel_struct_type_decl( "cblc_field_t", - 16, + 17, UCHAR_P, "data", SIZE_T, "capacity", SIZE_T, "allocated", @@ -201,7 +203,8 @@ create_cblc_field_t() SCHAR, "level", SCHAR, "digits", SCHAR, "rdigits", - INT, "dummy"); // Needed to make it an even number of 32-bit ints + INT, "encoding", + INT, "alphabet"); retval = TREE_TYPE(retval); return retval; @@ -245,13 +248,15 @@ typedef struct cblc_file_t int recent_char; // This is the most recent char sent to the file int recent_key; cblc_file_prior_op_t prior_op; + int encoding; // Actually cbl_encoding_t + int alphabet; // Actually cbl_encoding_t int dummy // We need an even number of INT } cblc_file_t; */ tree retval = NULL_TREE; retval = gg_get_filelevel_struct_type_decl( "cblc_file_t", - 31, + 33, CHAR_P, "name", SIZE_T, "symbol_table_index", CHAR_P, "filename", @@ -282,6 +287,8 @@ typedef struct cblc_file_t INT, "recent_char", INT, "recent_key", INT, "prior_op", + INT, "encoding", // Actually cbl_encoding_t + INT, "alphabet", INT, "dummy"); retval = TREE_TYPE(retval); return retval; diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index 205465b2c64..d575119ba28 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -47,6 +47,7 @@ #include "inspect.h" #include "../../libgcobol/io.h" #include "genapi.h" +#include "../../libgcobol/charmaps.h" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -289,6 +290,10 @@ static symbol_elem_t elementize( const cbl_field_t& field ) { symbol_elem_t sym (SymField); sym.elem.field = field; + // Dubner did the following because he didn't feel like creating yet another + // cbl_field_t constructor that included the hardcoded encoding for the + // global special registers. + sym.elem.field.codeset.encoding = iconv_CP1252_e; return sym; } @@ -760,7 +765,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) { case leading_e: return "leading"; case separate_e: return "separate"; case envar_e: return "envar"; - case dnu_1_e: return "dnu_1"; + case encoded_e: return "encoded"; case bool_encoded_e: return "bool"; case hex_encoded_e: return "hex"; case depends_on_e: return "depends_on"; @@ -1347,7 +1352,7 @@ bool is_variable_length( const cbl_field_t *field ) { // RENAMES may be included in end_of_group. size_t isym = field_index(field), esym = end_of_group(isym); - bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym), + bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym), [field]( const auto& elem ) { if( elem.type == SymField ) { auto f = cbl_field_of(&elem); @@ -1451,7 +1456,7 @@ cbl_field_t::attr_str( const std::vector& attrs ) const char * field_str( const cbl_field_t *field ) { - static char string[3*sizeof(cbl_name_t)]; + static char string[4*sizeof(cbl_name_t)]; char *pend = string; char name[2*sizeof(cbl_name_t)] = ""; @@ -1468,6 +1473,12 @@ field_str( const cbl_field_t *field ) { } } + if( field->codeset.valid() ) { + strcat(name, " ("); + strcat(name, field->codeset.name()); + strcat(name, ") "); + } + pend += snprintf(pend, string + sizeof(string) - pend, "%02u %-20s ", field->level, name); @@ -1481,58 +1492,66 @@ field_str( const cbl_field_t *field ) { if( 'r' == parredef && field->level == 0 ) parredef = 'p'; if( field->has_attr(typedef_e) ) parredef = 'T'; - const char *data = field->data.initial? field->data.initial : NULL; - if( data ) { - auto fig = cbl_figconst_of(data); + const char *init = field->data.initial? field->data.initial : NULL; + if( init ) { + auto fig = cbl_figconst_of(init); if( normal_value_e != fig ) { - data = cbl_figconst_str(fig); + init = cbl_figconst_str(fig); } else { - char *s; - auto n = asprintf(&s, "'%s'", data); - gcc_assert(n); - auto eodata = data + field->data.capacity; - // It is possible for data.initial to be shorter than capacity. - - // This whole thing needs to be reexamined. There is an assumption for - // FldAlphanumeric values that the valid data in data.initial be the same - // length as data.capacity. But that does not hold true for other types. - // For example, a PIC 9V9 has a capacity of two, but the initial - // string provided by the COBOL programmer might be "1.2". Likewise, a - // PIC 999999 (capacity 5) might have a value of "1". - - for(size_t i = 0; idata.capacity; i++) +#if 0 + // At this point, we might have to convert 'init' back to ASCII + char *false_init = static_cast(xmalloc(field->init.capacity+1)); + memcpy(false_init, field->init.initial, field->data.capacity); + false_data[field->data.capacity] = '\0'; + size_t charsout; + + cbl_encoding_t enc_from = field->codeset.encoding; + if( field->type == FldNumericDisplay ) { - if( data[i] == '\0' ) - { - eodata = data + i; - break; - } + // Apparently we need to trace back the meaning of data.literal for + // field::type == FldNumericDisplay + enc_from = DEFAULT_CHARMAP_SOURCE; } - if( eodata != std::find_if_not(data, eodata, fisprint) ) { - char *p = reinterpret_cast(xrealloc(s, n + 8 + 2 * field->data.capacity)); - if( is_elementary(field->type) && - field->type != FldPointer && p != NULL ) { - s = p; - p += n; - strcat( p, "(0x" ); - p += 3; - for( auto d=data; d < eodata; d++ ) { - p += sprintf(p, "%02x", *d); + + init = __gg__iconverter(enc_from, + DEFAULT_CHARMAP_SOURCE, + false_data, + field->data.capacity, + &charsout); +#endif + auto eoinit = init + strlen(init); + char *s = xasprintf("'%s'", init); + + // No NUL within the initial data. + auto ok = std::none_of( init, eoinit, + []( char ch ) { return ch == '\0'; } ); + assert(ok); + + // If any of the init are unprintable, provide a hex version. + if( ! std::all_of(init, eoinit, fisprint) ) { + if( is_elementary(field->type) && field->type != FldPointer ) { + const size_t len = strlen(s) + 8 + 2 * field->data.capacity; + s = reinterpret_cast(xrealloc(s, len)); + strcat( s, " (0x" ); + char *p = s + strlen(s); + for( auto d=init; d < eoinit; d++ ) { + p += sprintf(p, "%02x", static_cast(*d)); } - strcat( p++, ")" ); + strcat( s, ")" ); + assert(strlen(s) < len); } } - data = s; + init = s; } } else { - data = "NULL"; + init = "NULL"; if( field->type == FldSwitch ) { - data = xasprintf("0x%02x", field->data.upsi_mask_of()->value); + init = xasprintf("0x%02x", field->data.upsi_mask_of()->value); } } if( field->level == 88 ) { const auto& dom = *field->data.domain_of(); - data = xasprintf("%s%s %s - %s%s", + init = xasprintf("%s%s %s - %s%s", dom.first.all? "A" : "", value_or_figconst_name(dom.first.name()) , dom.first.is_numeric? "(num)" : "", @@ -1551,7 +1570,7 @@ field_str( const cbl_field_t *field ) { intermediate_e, embiggened_e, all_alpha_e, all_x_e, all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e, /* global_e, external_e, */ blank_zero_e, /* linkage_e, local_e, */ leading_e, - separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e, + separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e, depends_on_e, /* initialized_e, */ has_value_e, ieeedec_e, big_endian_e, same_as_e, record_key_e, typedef_e, strongdef_e, }; @@ -1564,7 +1583,7 @@ field_str( const cbl_field_t *field ) { storage_type, field->data.memsize, field->data.capacity, field->data.digits, field->data.rdigits, - data, field->attr_str(attrs), field->line ); + init, field->attr_str(attrs), field->line ); return string; } @@ -1593,22 +1612,14 @@ static void extend_66_capacity( cbl_field_t *alias ) { static_assert(sizeof(symbol_elem_t*) == sizeof(const char *), "all pointers must be same size"); - assert(alias->data.picture); + assert(alias->level == 66); assert(alias->type == FldGroup); + assert(alias->data.picture); + // If data.picture is not NULL, it is the THRU symbol, see symbol_field_alias2. symbol_elem_t *e = symbol_at(alias->parent); symbol_elem_t *e2 = reinterpret_cast(const_cast(alias->data.picture)); -#ifndef __OPTIMIZE__ -#pragma message "The assert(e < e2) needs fixing" - // The following assert fails when valgrind is involved. This is the known - // problem of expecting mmap() to put new memory maps after older memory - // maps; that assumption fails when valgrind is involved. - - // For now I am defeating the assert when using -O0 so that I can run the - // NIST "make valgrind" tests. But this should be fixed so that the - // symbol table index is used, not the entry locations. - assert(e < e2); -#endif + assert(symbol_index(e) < symbol_index(e2)); alias->data.picture = NULL; capacity_of cap; @@ -1824,7 +1835,7 @@ symbols_update( size_t first, bool parsed_ok ) { if( field->level == 0 && field->is_key_name() ) continue; if( is_literal(field) && field->var_decl_node != NULL ) continue; - // If the field is a constant for a figconstant, just use it. + // If the field is a constant for a figconstant, just use it. if( field->level != 0 && field->has_attr(constant_e) ) { auto fig = cbl_figconst_field_of(field->data.initial); if( fig ) { @@ -1832,7 +1843,7 @@ symbols_update( size_t first, bool parsed_ok ) { continue; } } - + if( field->is_typedef() ) { auto isym = end_of_group( symbol_index(p) ); p = symbol_at(--isym); @@ -1853,6 +1864,44 @@ symbols_update( size_t first, bool parsed_ok ) { field->line, field->level_str(), field->name); continue; } + if( is_numeric(field) && ! field->has_attr(constant_e) ) { + if( field->data.capacity == 0 ) { + ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s", + field->name, field->data.initial); + } + } + + if( ! field->codeset.valid() ) { + switch(field->type) { + case FldForward: + case FldInvalid: + gcc_unreachable(); + case FldAlphaEdited: + case FldAlphanumeric: + case FldClass: + case FldDisplay: + case FldGroup: + case FldLiteralA: + case FldNumericDisplay: + case FldNumericEdited: + if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) { + error_msg(symbol_field_location(field_index(field)), + "internal: %qs encoding not defined", field->name); + } + break; + case FldConditional: + case FldFloat: + case FldIndex: + case FldLiteralN: + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldPointer: + case FldSwitch: + break; + } + } + assert( ! field->is_typedef() ); @@ -2076,6 +2125,12 @@ symbol_field_parent_set( cbl_field_t *field ) return NULL; } prior->type = FldGroup; + prior->codeset.set(); +//// if( ! prior->codeset.set() ) { // maybe just ignore? +//// Dubner sez: Ignore. This was triggering with -finternal-ebcdic +//// ERROR_FIELD(prior, "%qs is already National", prior->name); +//// return NULL; +//// } field->attr |= numeric_group_attrs(prior); } // verify level 88 domain value @@ -2134,7 +2189,7 @@ add_token( symbol_elem_t sym ) { } /* - * When adding registers, be sure to add a complementary cblc_field_t + * When adding special registers, be sure to create the actual cblc_field_t * in libgcobol/constants.cc. */ void @@ -2153,41 +2208,43 @@ symbol_table_init(void) { // These should match the definitions in libgcobol/constants.cc static cbl_field_t constants[] = { - { 0, FldAlphanumeric, FldInvalid, space_value_e | constq | register_e, 0, 0, 0, nonarray, 0, - "SPACE", 0, {}, {1,1,0,0, " \0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, space_value_e | constq | register_e, 0, 0, 0, nonarray, 0, - "SPACES", 0, {}, {1,1,0,0, " \0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, low_value_e | constq | register_e, 0, 0, 0, nonarray, 0, - "LOW_VALUES", 0, {}, {1,1,0,0, "L\0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, zero_value_e | constq | register_e, 0, 0, 0, nonarray, 0, - "ZEROS", 0, {}, {1,1,0,0, "0"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, high_value_e | constq | register_e, 0, 0, 0, nonarray, 0, - "HIGH_VALUES", 0, {}, {1,1,0,0, "H\0\xFF"}, NULL }, + { FldAlphanumeric, space_value_e | constq | register_e, + {1,1,0,0, " \0\xFF"}, 0, "SPACE" }, + { FldAlphanumeric, space_value_e | constq | register_e, + {1,1,0,0, " \0\xFF"}, 0, "SPACES" }, + { FldAlphanumeric, low_value_e | constq | register_e, + {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" }, + { FldAlphanumeric, zero_value_e | constq | register_e, + {1,1,0,0, "0"}, 0, "ZEROS" }, + { FldAlphanumeric, high_value_e | constq | register_e, + {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" }, // IBM standard: QUOTE is a double-quote unless APOST compiler option - { 0, FldAlphanumeric, FldInvalid, quote_value_e | constq | register_e , 0, 0, 0, nonarray, 0, - "QUOTES", 0, {}, {1,1,0,0, "\"\0\xFF"}, NULL }, - { 0, FldPointer, FldPointer, constq | register_e , 0, 0, 0, nonarray, 0, - "NULLS", 0, {}, {8,8,0,0, zeroes_for_null_pointer}, NULL }, + { FldAlphanumeric, quote_value_e | constq | register_e , + {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" }, + { FldPointer, constq | register_e , + {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" }, // IBM defines TALLY // 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO. - { 0, FldNumericBin5, FldInvalid, signable_e | register_e, 0, 0, 0, nonarray, 0, - "_TALLY", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }, + { FldNumericBin5, signable_e | register_e, + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" }, // 01 ARGI is the current index into the argv array - { 0, FldNumericBin5, FldInvalid, signable_e | register_e, 0, 0, 0, nonarray, 0, - "_ARGI", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }, + { FldNumericBin5, signable_e | register_e, + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" }, // These last two don't require actual storage; they get BOOL var_decl_node // in parser_symbol_add() - { 0, FldConditional, FldInvalid, constant_e | register_e , 0, 0, 0, nonarray, 0, - "_VERY_TRUE", 0, {}, {1,1,0,0, ""}, NULL }, - { 0, FldConditional, FldInvalid, constant_e | register_e , 0, 0, 0, nonarray, 0, - "_VERY_FALSE", 0, {}, {1,1,0,0, ""}, NULL }, + { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" }, + { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" }, }; for( struct cbl_field_t *f = constants; f < constants + COUNT_OF(constants); f++ ) { f->our_index = table.nelem; struct symbol_elem_t sym(SymField, 0); sym.elem.field = *f; + // The following makes these constants match the definitions in + // constants.cc. Consider expanding the constructor fo cbl_field_t to + // handle encoding. + sym.elem.field.codeset.encoding = iconv_CP1252_e; table.elems[table.nelem++] = sym; } @@ -2253,30 +2310,30 @@ symbol_table_init(void) { **/ static cbl_field_t debug_registers[] = { - { 0, FldGroup, FldInvalid, register_e, 0,0,1, nonarray, 0, - "DEBUG-ITEM", 0, {}, {132,132,0,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e, 0,0,2, nonarray, 0, - "DEBUG-LINE", 0, {}, {6,6,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e, 0,0,2, nonarray, 0, - "DEBUG-NAME", 0, {}, {30,30,0,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-1", 0, {}, {5,5,4,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-2", 0, {}, {5,5,4,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | register_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-3", 0, {}, {5,5,4,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, register_e|filler_e, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, signable_e | register_e, 0,0,2, nonarray, 0, - "DEBUG-CONTENTS", 0, {}, {76,76,0,0, NULL}, NULL }, + { FldGroup, register_e, + {132,132,0,0, NULL}, 1, "DEBUG-ITEM" }, + { FldAlphanumeric, register_e, + {6,6,0,0, " "}, 2, "DEBUG-LINE" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldAlphanumeric, register_e, + {30,30,0,0, NULL}, 2, "DEBUG-NAME" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-1" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-2" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-3" }, + { FldAlphanumeric, register_e | filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldAlphanumeric, signable_e | register_e, + {76,76,0,0, NULL}, 2, "DEBUG-CONTENTS" }, }; // debug registers @@ -2296,22 +2353,14 @@ symbol_table_init(void) { std::for_each(debug_start+1, p, parent_elem_set(debug_start - table.elems)); static cbl_field_t special_registers[] = { - { 0, FldNumericDisplay, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "_FILE_STATUS", - 0, {}, {2,2,2,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "UPSI-0", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, signable_e|register_e, 0, 0, 0, nonarray, 0, "RETURN-CODE", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "LINAGE-COUNTER", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldLiteralA, FldInvalid, register_e, 0, 0, 0, nonarray, 0, "_dev_stdin", - 0, {}, {0,0,0,0, "/dev/stdin"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_stdout", - 0, {}, {0,0,0,0, "/dev/stdout"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_stderr", - 0, {}, {0,0,0,0, "/dev/stderr"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq|register_e, 0, 0, 0, nonarray, 0, "_dev_null", - 0, {}, {0,0,0,0, "/dev/null"}, NULL }, + { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" }, + { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0" }, + { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" }, + { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" }, + { FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" }, + { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" }, + { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" }, + { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null" }, }; // special registers @@ -2528,6 +2577,9 @@ symbol_field_add( size_t program, struct cbl_field_t *field ) field->attr = inherit & parent->attr; field->attr |= numeric_group_attrs(parent); field->usage = parent->usage; + if( field->level == 66 || field->level == 88 ) { + field->codeset = parent->codeset; + } // BINARY-LONG, for example, sets capacity. if( is_numeric(parent->usage) && parent->data.capacity > 0 ) { field->type = parent->usage; @@ -2696,11 +2748,8 @@ symbol_field_forward_add( size_t program, size_t parent, auto e = symbol_field(program, parent, name); if( e ) return e; - struct cbl_field_t field = { 0, - FldForward, FldInvalid, 0, parent, 0, 0, - nonarray, line, "", - 0, cbl_field_t::linkage_t(), - {0,0,0,0, " "}, NULL }; + cbl_field_t field = { FldForward, 0, line }; + field.parent = parent; if( sizeof(field.name) < strlen(name) ) { dbgmsg("%s:%d: logic error: name %s too long", __func__, __LINE__, name); return NULL; @@ -2886,6 +2935,7 @@ symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) { cbl_field_t dup = {}; dup.parent = field_index(tgt); dup.line = tgt->line; + dup.codeset = tgt->codeset; elem_group_t group(++bog, eog); @@ -3097,6 +3147,87 @@ constant_of( size_t isym ) return field; } +/* + * As parsed, the alphabet reflects the encoding of the source code. If the + * program uses a different encoding for alphanumeric, convert the alphabet to + * that. + * + * Because a custom alphabet is rare and occurs at most only once per program, + * we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at + * most 256 calls to iconv(3). + */ +void +cbl_alphabet_t::reencode() { + + const unsigned char * const pend = alphabet + sizeof(alphabet); + std::vector tgt(256, (char)0xFF); + + /* + * For now, assume CP1252 source-code encoding because we're not capturing it + * anywhere except in cbl_field_t::internalize(). The only known examples of + * a custom alphabet are from NIST, which of course are ASCII. + */ + const char *fromcode = __gg__encoding_iconv_name(CP1252_e); + const char *tocode = __gg__encoding_iconv_name(current_encoding('A')); + iconv_t cd = iconv_open(tocode, fromcode); + +#if optimal_reencode + if( fromcode == tocode ) { // semantically + tgt.resize(0); + return tgt; // Return empty vector; caller copies zero bytes. + } +#endif + + /* + * Each position in the alphabet array represents a letter in the source-code + * encoding. The value at that position represents the letter's collation + * position, its sort order. For each letter in alphabet, determine value of + * that letter in the alphanumeric encoding, and set its collation position + * in that alphabet. + */ + for( const unsigned char *p = alphabet; p < pend; p++ ) { + if( *p == 0xFF ) continue; + unsigned char ch = p - alphabet; + unsigned char pos[8] = {}; + size_t inbytesleft = 1, outbytesleft = sizeof(pos); + char *inbuf = reinterpret_cast(&ch), + *outbuf = reinterpret_cast(pos); + + size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + + if( n == size_t(-1) ) { + error_msg(loc, "%s character '%c' (%x hex) cannot be converted to %s", + fromcode, ch, ch, tocode); + continue; + } + if( n != 0 ) { + dbgmsg("%s character '%c' (%x hex) irreversibly converted to %s", + fromcode, ch, ch, tocode); + } + assert(outbytesleft < sizeof(pos)); + n = sizeof(pos) - outbytesleft; + if( 1 < n ) { + error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s", + fromcode, ch, ch, n, tocode); + continue; + } + + if( ch == low_index ) { + low_index = pos[0]; + } + if( ch == last_index ) { + last_index = pos[0]; + } + if( ch == high_index ) { + high_index = pos[0]; + } + + tgt.at(pos[0]) = *p; + } + + std::copy(tgt.begin(), tgt.end(), alphabet); +} + bool cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) { if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) { @@ -3158,33 +3289,21 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr { extern int yylineno; static const struct cbl_field_t empty_alpha = { - 0, FldAlphanumeric, FldInvalid, - intermediate_e, 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {MAXIMUM_ALPHA_LENGTH, MAXIMUM_ALPHA_LENGTH, - 0, 0, NULL}, NULL }; + FldAlphanumeric, intermediate_e, + {MAXIMUM_ALPHA_LENGTH, + MAXIMUM_ALPHA_LENGTH, 0, 0, NULL} }; static const struct cbl_field_t empty_float = { - 0, FldFloat, FldInvalid, - intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {16, 16, 32, 0, NULL}, NULL }; + FldFloat, intermediate_e, + {16, 16, 32, 0, NULL} }; static const struct cbl_field_t empty_comp5 = { - 0, FldNumericBin5, FldInvalid, + FldNumericBin5, signable_e | intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }; + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL} }; static const struct cbl_field_t empty_conditional = { - 0, FldConditional, FldInvalid, intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + FldConditional, intermediate_e, cbl_field_data_t{} }; static struct cbl_field_t empty_literal = { - 0, FldInvalid, FldInvalid, CONSTANT_E, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + FldInvalid, CONSTANT_E, cbl_field_data_t{} }; + struct cbl_field_t *f = new cbl_field_t; f->type = type; @@ -3200,7 +3319,6 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr case FldSwitch: case FldDisplay: case FldPointer: - case FldBlob: break; case FldConditional: *f = empty_conditional; @@ -3232,7 +3350,9 @@ new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr snprintf(f->name, sizeof(f->name), "_stack%d",++nstack); } - f->data.initial = name; // capture e.g. the function name + f->data.initial = name; // capture e.g. the function name + + f->codeset.set(); return f; } @@ -3246,12 +3366,17 @@ new_temporary_decl() { static inline cbl_field_t * parser_symbol_add2( cbl_field_t *field ) { + if( ! field->codeset.valid() ) { + dbgmsg( "%s:%d: %s (%s) has no encoding", __func__, __LINE__, + field->name, cbl_field_type_str(field->type) ); + } parser_symbol_add(field); return field; } static cbl_field_t * -new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr ) { +new_literal_add( const char initial[], uint32_t len, + cbl_field_attr_t attr, cbl_encoding_t encoding ) { cbl_field_t *field = NULL; if( !(attr & quoted_e) ) { @@ -3266,12 +3391,14 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr field->attr |= attr; field->data.initial = len > 0? initial : empty; field->data.capacity = len; + } - if( ! field->internalize() ) - { - ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial); - } + if( ! field->has_attr(hex_encoded_e) ) { + field->codeset.set(encoding); + if( ! field->internalize() ) { + ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial); } + } static size_t literal_count = 1; sprintf(field->name, @@ -3286,22 +3413,26 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr static temporaries_t temporaries; cbl_field_t * -temporaries_t::literal( const char value[], uint32_t len, cbl_field_attr_t attr ) { - auto key = literal_an(value, quoted_e == (attr & quoted_e)); +temporaries_t::literal( uint32_t len, const char value[], + cbl_field_attr_t attr, cbl_encoding_t encoding ) { + bool is_quoted2 = quoted_e == (attr & quoted_e); + bool is_verbatim = hex_encoded_e == (attr & hex_encoded_e); + auto key = literal_an(value, is_quoted2, is_verbatim); - if( 0 == (attr & hex_encoded_e) ) { + if( ! is_verbatim ) { // TODO: try without this test once National is ready auto p = literals.find(key); if( p != literals.end() ) { cbl_field_t *field = p->second; return field; } } - return literals[key] = new_literal_add(value, len, attr); + return literals[key] = new_literal_add(value, len, attr, encoding); } cbl_field_t * -new_literal( uint32_t len, const char initial[], enum cbl_field_attr_t attr ) { - return temporaries.literal(initial, len, attr); +new_literal( uint32_t len, const char initial[], + cbl_field_attr_t attr, cbl_encoding_t encoding ) { + return temporaries.literal(len, initial, attr, encoding); } void @@ -3400,6 +3531,11 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) { return parser_symbol_add2(field); } +extern os_locale_t os_locale; + +const encodings_t cbl_field_t::codeset_t::standard_internal = { iconv_CP1252_e, "CP1252" }; +#define standard_internal cbl_field_t::codeset_t::standard_internal + cbl_field_t * new_temporary( enum cbl_field_type_t type, const char *initial ) { if( ! initial ) { @@ -3407,8 +3543,9 @@ new_temporary( enum cbl_field_type_t type, const char *initial ) { return temporaries.acquire(type, initial); } if( is_literal(type) ) { - auto field = temporaries.literal(initial, - type == FldLiteralA? quoted_e : none_e); + auto field = temporaries.literal(strlen(initial), initial, + type == FldLiteralA? quoted_e : none_e, + standard_internal.type); return field; } cbl_field_t *field = new_temporary_impl(type, initial); @@ -3451,12 +3588,38 @@ new_temporary_clone( const cbl_field_t *orig) { temporaries.add(field); } field->data = orig->data; - if( field->type == FldNumericBin5 ) field->type = orig->type; + if( field->type == FldNumericBin5 ) { + field->type = orig->type; + field->codeset = orig->codeset; + } field->attr = intermediate_e; return parser_symbol_add2(field); } +/* + * This set of ASCII-like encodings is incomplete and possibly wrong. A + * complete definition would better supported with a Boolean in enccodings_t. + * If it returns false pessimistically, the only consequence is inefficiency: + * the string is processed by iconv(3). + */ +bool +cbl_field_t::holds_ascii() const { + // True if the encoding is a superset of ASCII. + switch(codeset.encoding) { + case ASCII_e: + case CP1252_e: + case iso646_e: + return true; + default: + if( iconv_1026_e <= codeset.encoding && + codeset.encoding <= iconv_ANSI_X3_4_e ) { + return true; + } + } + return false; +} + bool cbl_field_t::is_ascii() const { return std::all_of( data.initial, @@ -3482,8 +3645,6 @@ cbl_field_t::is_ascii() const { * compilation, if it moves off the default, it adjusts only once, and * never reverts. */ -static const char standard_internal[] = "CP1252"; -extern os_locale_t os_locale; static const char * guess_encoding() { @@ -3500,52 +3661,88 @@ guess_encoding() { } } - return standard_internal; + return standard_internal.name; } const char * cbl_field_t::internalize() { - static const char *tocode = standard_internal; static const char *fromcode = guess_encoding(); - static iconv_t cd = iconv_open(tocode, fromcode); static const size_t noconv = size_t(-1); + static std::map tocodes; - if (cd == (iconv_t)-1) { - yywarn("failed % tocode = %<%s%> fromcode = %s", tocode, fromcode); + if( ! codeset.valid() ) { + dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial); + return data.initial; + } + + const char *tocode = __gg__encoding_iconv_name(codeset.encoding); + + std::string toname(tocode); + auto p = tocodes.find(toname); + if( p == tocodes.end() ) { + tocodes[toname] = iconv_open(tocode, fromcode); } + iconv_t cd = tocodes[toname]; - bool using_assumed = fromcode == os_locale.assumed; + if (cd == (iconv_t)-1) { + yywarn("failed % tocode = %qs fromcode = %qs", tocode, fromcode); + } if( fromcode == tocode || has_attr(hex_encoded_e) ) { return data.initial; } - if( is_ascii() ) return data.initial; + if( data.capacity == 0 ) { + assert(0 == strlen(data.initial)); + return data.initial; + } + if( holds_ascii() && is_ascii() ) return data.initial; assert(data.capacity > 0); - std::vector output(data.capacity + 2, '\0'); - char *out = output.data(); - char *in = const_cast(data.initial); - size_t n, inbytesleft = data.capacity, outbytesleft = output.size(); + // The final 2 bytes of the output are "!\0". It's a debugging sentinel. + size_t n; + size_t inbytesleft = data.capacity; + size_t outbytesleft = inbytesleft; + char *in = const_cast(data.initial); + char *out = static_cast( xcalloc(1, outbytesleft + 2) ), *output = out; if( !is_literal(this) && inbytesleft < strlen(data.initial) ) { inbytesleft = strlen(data.initial); } + const unsigned int in_len = inbytesleft; assert(fromcode != tocode); - while( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { - if( !using_assumed ) break; // change only once - fromcode = guess_encoding(); - cd = iconv_open(tocode, fromcode); - dbgmsg("%s: trying input encoding %s", __func__, fromcode); - if( fromcode == tocode ) break; - } + /* + * If we're currently assuming the source code is encoded according to the + * locale (the default), and there's an iconv failure, try once more using a + * different assumption, that the source code is encoded as CP1252. + * + * This heuristic means that some UTF-8 literals could be converted until a + * CP1252 byte is encountered. We could be stricter about that. + * + * Also possible is a failure to avoid iconv with fromcode and tocode denote + * the same encoding but with different spellings, e.g. CP1252 and CP1252//. + */ - if( n == noconv ) { - if( !using_assumed ) { - yywarn("failed to decode '%s' as %s", data.initial, fromcode); - return NULL; + do { + if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { + if( fromcode == os_locale.assumed ) { + fromcode = standard_internal.name; + tocodes.clear(); + cd = tocodes[toname] = iconv_open(tocode, fromcode); + dbgmsg("%s: trying input encoding %s", __func__, fromcode); + if( fromcode == tocode ) return data.initial; // no conversion required. + n = noconv - 1; // try again + } } + if( n == 0 ) break; + } while( n != noconv ); + + if( n == noconv ) { + size_t i = in_len - inbytesleft; + yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)", + fromcode, data.initial + i, tocode, inbytesleft, in_len); + if( false ) return NULL; return data.initial; } @@ -3558,27 +3755,47 @@ cbl_field_t::internalize() { } // Replace data.initial only if iconv output differs. - if( 0 != memcmp(data.initial, output.data(), out - output.data()) ) { - assert(out <= output.data() + data.capacity); - + if( 0 != memcmp(data.initial, output, out - output) ) { + assert(out <= output + data.capacity); dbgmsg("%s: converted '%.*s' to %s", __func__, data.capacity, data.initial, tocode); - - int len = int(out - output.data()); - char *mem = static_cast( xcalloc(1, output.size()) ); - - // Set the new memory to all blanks, tacking a '!' on the end. - memset(mem, 0x20, output.size() - 1); - mem[ output.size() - 2] = '!'; + struct localspace_t { + char space[4]; + size_t len, erc; + explicit localspace_t( iconv_t cd ) { + static char input[1] = { 0x20 }; + size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space); + char *in2 = input, *out2 = space; + + erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2); + len = out2 - space; + } + bool valid() const { return 0 < len && erc != size_t(-1); } + } spc(cd); + + if( ! spc.valid() ) { + dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__, + tocode, xstrerror(errno)); + ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno)); + return data.initial; + } + assert( 0 < spc.len && spc.valid() ); if( is_literal(this) ) { - data.capacity = len; // trailing '!' will be overwritten + data.capacity = out - output; // trailing '!' will be overwritten } - - memcpy(mem, output.data(), len); // copy only as much as iconv converted - + // Pad with trailing blanks, tacking a '!' on the end. + for( const char *eout = output + data.capacity; + out < eout; + out += spc.len ) { + memcpy(out, spc.space, spc.len); + } + out[0] = '!'; + assert(out[1] == '\0'); free(const_cast(data.initial)); - data.initial = mem; + data.initial = output; + } else { + free(output); } return data.initial; @@ -3724,7 +3941,7 @@ symbol_label_add( size_t program, cbl_label_t *input ) cbl_errx("%s:%d: could not add '%s'", __func__, __LINE__, label->name); } assert(e); - + common_callables_update( symbol_index(e) ); // restore munged line number unless symbol_add returned an existing label @@ -3753,7 +3970,7 @@ symbol_label_section_exists( size_t eval_label_index ) { if( program == sym.program && sym.type == SymLabel ) { const auto& L(sym.elem.label); // true if the symbol is an explicit label. - return L.type == LblSection && L.name[0] != '_'; + return L.type == LblSection && L.name[0] != '_'; } return false; } ); @@ -3761,7 +3978,7 @@ symbol_label_section_exists( size_t eval_label_index ) { symbols_dump(eval_label_index, true); } // Return true if a user-defined SECTION was found after the Declaratives - // label section. + // label section. return has_section; } @@ -4374,7 +4591,7 @@ cbl_occurs_t::subscript_ok( const cbl_field_t *subscript ) const { return bounds.lower <= (size_t)sub && (size_t)sub <= bounds.upper; } -const cbl_field_t * +const cbl_field_t * symbol_unresolved_file_key( const cbl_file_t * file, const cbl_name_t key_field_name ) { const symbol_elem_t *file_sym = symbol_elem_of(file); @@ -4597,6 +4814,8 @@ cbl_file_key_t::str() const { */ void cbl_file_t::deforward() { + const size_t ifile( symbol_index(symbol_elem_of(this)) ); + if( user_status ) { user_status = symbol_forward_to(user_status); @@ -4608,7 +4827,7 @@ cbl_file_t::deforward() { } for( auto p = keys; p < keys + nkey; p++ ) { - p->deforward( symbol_index(symbol_elem_of(this)) ); + p->deforward(ifile); } } @@ -4728,7 +4947,6 @@ has_value( cbl_field_type_t type ) { case FldForward: case FldSwitch: case FldDisplay: - case FldBlob: return false; case FldIndex: case FldPointer: diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h index c8b37a61409..ea607db4dbb 100644 --- a/gcc/cobol/symbols.h +++ b/gcc/cobol/symbols.h @@ -118,7 +118,6 @@ is_numeric( cbl_field_type_t type ) { case FldSwitch: case FldDisplay: case FldPointer: // not numeric because not computable, only settable - case FldBlob: return false; // These types are computable or, in the case of FldIndex, may be // arbitrarily set and incremented. @@ -500,8 +499,12 @@ struct cbl_subtable_t { size_t offset, isym; }; +const char * __gg__encoding_iconv_name( cbl_encoding_t encoding ); + bool is_elementary( enum cbl_field_type_t type ); +cbl_encoding_t current_encoding( char a_or_n ); + /* In cbl_field_t: * 'offset' is overloaded for FldAlphanumeric/temporary/intermediate variables * For such variables, offset is a copy of the initial capacity. This is in @@ -512,13 +515,72 @@ bool is_elementary( enum cbl_field_type_t type ); struct cbl_field_t { size_t offset; - enum cbl_field_type_t type, usage; + cbl_field_type_t type, usage; uint64_t attr; static_assert(sizeof(attr) == sizeof(cbl_field_attr_t), "wrong attr size"); size_t parent; // symbols[] index of our parent size_t our_index; // symbols[] index of this field, set in symbol_add() uint32_t level; - struct cbl_occurs_t occurs; + cbl_occurs_t occurs; + struct codeset_t { + static const encodings_t standard_internal; + cbl_encoding_t encoding; + size_t alphabet; // unlikely + explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e, + size_t alphabet = 0) // combination means "not set" + : encoding(encoding), alphabet(alphabet) + {} + bool valid() const { + return + (alphabet == 0 && encoding != custom_encoding_e) + || + (alphabet != 0 && encoding == custom_encoding_e); + } + bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) { + assert(encoding <= iconv_YU_e); + if( ! valid() ) { // setting first time + this->encoding = encoding; + this->alphabet = alphabet; + return valid(); + } + // DUBNER override. Encoding has to change when + // 01 FOO VALUE ZERO. Just 0 is okay; ZERO is not. + this->encoding = encoding; + return this->encoding == encoding && this->alphabet == alphabet; + } + bool set( const char picture_fragment[] = nullptr) { + if( ! picture_fragment ) { + cbl_encoding_t currenc = current_encoding('A'); + bool retval = set(currenc); + return retval; + } + size_t len = strlen(picture_fragment); + std::vector frag(len); + std::transform(picture_fragment, picture_fragment + len, + frag.begin(), ftoupper); + switch(frag[0]) { + case 'A': case 'X': case '9': + return set(current_encoding('A')); + case 'N': case 'U': + if( std::all_of(frag.begin(), frag.end(), + [first = frag[0]]( char ch ) { + return first == ch; + } ) ) { + // All N's indicates National; all U's indicates UTF-8. + auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e; + return set(enc); + } + return false; // They all must be the same. + } + gcc_unreachable(); + } + cbl_encoding_t set() const { + return valid()? encoding : cbl_encoding_t(-1); + } + const char *name() const { + return valid()? __gg__encoding_iconv_name(encoding) : "nocoding"; + } + } codeset; int line; // Where it appears in the file. cbl_name_t name; // Appears in the GIMPLE dump. size_t file; // nonzero if field is 01 record for a file @@ -527,18 +589,45 @@ struct cbl_field_t { cbl_ffi_crv_t crv; // Using by C/R/V in Linkage linkage_t() : optional(false), crv(by_default_e) {} } linkage; - struct cbl_field_data_t data; + cbl_field_data_t data; tree var_decl_node; // Reference to the pointer to the cblc_field_t structure tree data_decl_node; // Reference to the run-time data of the COBOL variable // // For linkage_e variables, data_decl_node is a pointer // // to the data, rather than the actual data + cbl_field_t() + : offset(0), type(FldInvalid), usage(FldInvalid), attr(0) + , parent(0), our_index(0), level(0) + , line(0), name(""), file(0) + , var_decl_node(nullptr), data_decl_node(nullptr) + {} + + cbl_field_t( cbl_field_type_t type, uint64_t attr, + const cbl_field_data_t& data, + uint32_t level = 0, const cbl_name_t name = "", int line = 0 ) + : offset(0), type(type), usage(FldInvalid), attr(attr) + , parent(0), our_index(0), level(level) + , line(line), file(0), data(data) + , var_decl_node(nullptr), data_decl_node(nullptr) + { + gcc_assert(strlen(name) < sizeof this->name); + strcpy(this->name, name); + } + + cbl_field_t( cbl_field_type_t type, uint32_t level, int line, uint64_t attr = 0 ) + : offset(0), type(type), usage(FldInvalid), attr(attr) + , parent(0), our_index(0), level(level) + , line(line), name(""), file(0) + , var_decl_node(nullptr), data_decl_node(nullptr) + {} void set_linkage( cbl_ffi_crv_t crv, bool optional ) { linkage.optional = optional; linkage.crv = crv; assert(crv != by_content_e); } + bool holds_ascii() const; + inline bool is_typedef() const { return has_attr(typedef_e); } @@ -582,7 +671,8 @@ struct cbl_field_t { attr |= same_as_e; data = that.data; - + codeset = that.codeset; + if( ! (is_typedef || that.type == FldClass) ) { data.initial = NULL; data = build_zero_cst (float128_type_node); @@ -1202,27 +1292,40 @@ cbl_field_t * new_temporary_clone( const cbl_field_t *orig); cbl_field_t * keep_temporary( cbl_field_type_t type ); cbl_field_t * new_literal( uint32_t len, const char initial[], - enum cbl_field_attr_t attr = none_e ); + cbl_field_attr_t attr, + cbl_encoding_t encoding = ASCII_e ); + +static inline cbl_field_t * +new_literal( uint32_t len, const char initial[] ) { + return new_literal(len, initial, none_e); +} void symbol_temporaries_free(); class temporaries_t { friend void symbol_temporaries_free(); struct literal_an { - bool is_quoted; + bool is_quoted, is_verbatim; // verbatim: don't use codeset std::string value; - literal_an() : is_quoted(false), value("???") {} - literal_an( const char value[], bool is_quoted ) - : is_quoted(is_quoted), value(value) {} + literal_an() : is_quoted(false), is_verbatim(false), value("???") {} + literal_an( const char value[], bool is_quoted, bool is_verbatim = false ) + : is_quoted(is_quoted), is_verbatim(is_verbatim), value(value) {} literal_an( const literal_an& that ) - : is_quoted(that.is_quoted), value(that.value) {} + : is_quoted(that.is_quoted), + is_verbatim(that.is_verbatim), + value(that.value) + {} literal_an& operator=( const literal_an& that ) { is_quoted = that.is_quoted; + is_verbatim = that.is_verbatim; value = that.value; return *this; } bool operator<( const literal_an& that ) const { if( value == that.value ) { // alpha before numeric + if( is_quoted == that.is_quoted ) { // verbatim before not + return (is_verbatim? 0 : 1) < (that.is_verbatim? 0 : 1); + } return (is_quoted? 0 : 1) < (that.is_quoted? 0 : 1); } return value < that.value; @@ -1235,7 +1338,8 @@ class temporaries_t { fieldmap_t used, freed; public: - cbl_field_t * literal( const char value[], uint32_t len, cbl_field_attr_t attr = none_e ); + cbl_field_t * literal( uint32_t len, const char value[], + cbl_field_attr_t attr, cbl_encoding_t encoding ); cbl_field_t * reuse( cbl_field_type_t type ); cbl_field_t * acquire( cbl_field_type_t type, const cbl_name_t name = nullptr ); cbl_field_t * add( cbl_field_t *field ); @@ -1338,7 +1442,6 @@ struct function_descr_t { case FldForward: case FldIndex: case FldSwitch: - case FldBlob: return '?'; case FldPointer: return 'O'; @@ -1410,6 +1513,13 @@ struct cbl_special_name_t { char * hex_decode( const char text[] ); +/* + * For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet + * holds the collation position of each encoded value. + * If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1. + * If the encoding is ASCII, then 'A' is 65 and alphabet[ 65] == 1. + * If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1. + */ struct cbl_alphabet_t { YYLTYPE loc; cbl_name_t name; @@ -1482,6 +1592,7 @@ struct cbl_alphabet_t { void also( const YYLTYPE& loc, size_t ch ); bool assign( const YYLTYPE& loc, unsigned char ch, unsigned char value ); + void reencode(); static const char * encoding_str( cbl_encoding_t encoding ) { @@ -1489,7 +1600,13 @@ struct cbl_alphabet_t { case ASCII_e: return "ascii"; case iso646_e: return "iso646"; case EBCDIC_e: return "ebcdic"; + case UTF8_e: return "utf8"; case custom_encoding_e: return "custom"; + default: + { + auto p = __gg__encoding_iconv_name( encoding ); + if( p ) return p; + } } return "???"; } @@ -1644,6 +1761,13 @@ struct cbl_file_t { size_t user_status; // index into symbol table for file status size_t vsam_status; // index into symbol table for vsam status PIC X(6) size_t record_length; // DEPENDS ON + struct codeset_t { + cbl_encoding_t encoding; + size_t alphabet; // unlikely + explicit codeset_t(cbl_encoding_t encoding = CP1252_e, size_t alphabet = 0) + : encoding(encoding), alphabet(alphabet) + {} + } codeset; int line; cbl_name_t name; cbl_sortreturn_t *addresses; // Used during parser_return_start, et al. diff --git a/gcc/cobol/symfind.cc b/gcc/cobol/symfind.cc index 8141b2206e6..472d37a20b9 100644 --- a/gcc/cobol/symfind.cc +++ b/gcc/cobol/symfind.cc @@ -205,6 +205,7 @@ field_structure( symbol_elem_t& sym ) { if( !is_data_field(sym) ) return none; cbl_field_t *field = cbl_field_of(&sym); + assert(field->type != FldForward); // eliminated by is_data_field symbol_map_t::key_type key( sym.program, field->name, field->parent ); symbol_map_t::value_type elem( key, std::vector() ); @@ -232,16 +233,6 @@ field_structure( symbol_elem_t& sym ) { return elem; } -void erase_symbol_map_fwds( size_t beg ) { - for( auto p = symbols_begin(beg); p < symbols_end(); p++ ) { - if( p->type != SymField ) continue; - const auto& field(*cbl_field_of(p)); - if( field.type == FldForward ) { - symbol_map.erase( sym_name_t(p->program, field.name, field.parent) ); - } - } -} - void build_symbol_map() { static size_t beg = 0; @@ -539,13 +530,13 @@ symbol_find( size_t program, std::list names ) { auto unique = items.size() == 1; - if( !unique ) { + if( ! unique ) { if( items.empty() ) { return std::pair(NULL, false); } if( yydebug ) { dbgmsg( "%s:%d: '%s' has " HOST_SIZE_T_PRINT_UNSIGNED " possible matches", - __func__, __LINE__, names.back(), (fmt_size_t)items.size() ); + __func__, __LINE__, names.back(), (fmt_size_t)items.size() ); std::for_each( items.begin(), items.end(), dump_symbol_map_value1 ); } } diff --git a/gcc/cobol/token_names.h b/gcc/cobol/token_names.h index ca51510fcb7..77029f73ce5 100644 --- a/gcc/cobol/token_names.h +++ b/gcc/cobol/token_names.h @@ -1,5 +1,5 @@ // generated by ./token_names.h.gen ../../build/gcc/cobol/parse.h -// Tue Jul 8 19:21:28 EDT 2025 +// Mon Sep 15 22:47:12 EDT 2025 tokens = { { "identification", IDENTIFICATION_DIV }, // 258 { "environment", ENVIRONMENT_DIV }, // 259 @@ -102,190 +102,190 @@ tokens = { { "might-be", MIGHT_BE }, // 353 { "function-udf", FUNCTION_UDF }, // 354 { "function-udf-0", FUNCTION_UDF_0 }, // 355 - { "date-fmt", DATE_FMT }, // 356 - { "time-fmt", TIME_FMT }, // 357 - { "datetime-fmt", DATETIME_FMT }, // 358 - { "basis", BASIS }, // 359 - { "cbl", CBL }, // 360 - { "constant", CONSTANT }, // 361 - { "copy", COPY }, // 362 - { "defined", DEFINED }, // 363 - { "enter", ENTER }, // 364 - { "feature", FEATURE }, // 365 - { "insertt", INSERTT }, // 366 - { "lsub", LSUB }, // 367 - { "parameter", PARAMETER_kw }, // 368 - { "override", OVERRIDE }, // 369 - { "ready", READY }, // 370 - { "reset", RESET }, // 371 - { "rsub", RSUB }, // 372 - { "service-reload", SERVICE_RELOAD }, // 373 - { "star-cbl", STAR_CBL }, // 374 - { "subscript", SUBSCRIPT }, // 375 - { "suppress", SUPPRESS }, // 376 - { "title", TITLE }, // 377 - { "trace", TRACE }, // 378 - { "use", USE }, // 379 - { "cobol-words", COBOL_WORDS }, // 380 - { "equate", EQUATE }, // 381 - { "undefine", UNDEFINE }, // 382 - { "cdf-define", CDF_DEFINE }, // 383 - { "cdf-display", CDF_DISPLAY }, // 384 - { "cdf-if", CDF_IF }, // 385 - { "cdf-else", CDF_ELSE }, // 386 - { "cdf-end-if", CDF_END_IF }, // 387 - { "cdf-evaluate", CDF_EVALUATE }, // 388 - { "cdf-when", CDF_WHEN }, // 389 - { "cdf-end-evaluate", CDF_END_EVALUATE }, // 390 - { "call-convention", CALL_CONVENTION }, // 391 - { "call-cobol", CALL_COBOL }, // 392 - { "call-verbatim", CALL_VERBATIM }, // 393 - { "cdf-push", CDF_PUSH }, // 394 - { "cdf-pop", CDF_POP }, // 395 - { "source-format", SOURCE_FORMAT }, // 396 - { "if", IF }, // 397 - { "then", THEN }, // 398 - { "else", ELSE }, // 399 - { "sentence", SENTENCE }, // 400 - { "accept", ACCEPT }, // 401 - { "add", ADD }, // 402 - { "alter", ALTER }, // 403 - { "call", CALL }, // 404 - { "cancel", CANCEL }, // 405 - { "close", CLOSE }, // 406 - { "compute", COMPUTE }, // 407 - { "continue", CONTINUE }, // 408 - { "delete", DELETE }, // 409 - { "display", DISPLAY }, // 410 - { "divide", DIVIDE }, // 411 - { "evaluate", EVALUATE }, // 412 - { "exit", EXIT }, // 413 - { "filler", FILLER_kw }, // 414 - { "goback", GOBACK }, // 415 - { "goto", GOTO }, // 416 - { "initialize", INITIALIZE }, // 417 - { "inspect", INSPECT }, // 418 - { "merge", MERGE }, // 419 - { "move", MOVE }, // 420 - { "multiply", MULTIPLY }, // 421 - { "open", OPEN }, // 422 - { "paragraph", PARAGRAPH }, // 423 - { "read", READ }, // 424 - { "release", RELEASE }, // 425 - { "return", RETURN }, // 426 - { "rewrite", REWRITE }, // 427 - { "search", SEARCH }, // 428 - { "set", SET }, // 429 - { "select", SELECT }, // 430 - { "sort", SORT }, // 431 - { "sort-merge", SORT_MERGE }, // 432 - { "string", STRING_kw }, // 433 - { "stop", STOP }, // 434 - { "subtract", SUBTRACT }, // 435 - { "start", START }, // 436 - { "unstring", UNSTRING }, // 437 - { "write", WRITE }, // 438 - { "when", WHEN }, // 439 - { "argument-number", ARGUMENT_NUMBER }, // 440 - { "argument-value", ARGUMENT_VALUE }, // 441 - { "environment-name", ENVIRONMENT_NAME }, // 442 - { "environment-value", ENVIRONMENT_VALUE }, // 443 - { "abs", ABS }, // 444 - { "access", ACCESS }, // 445 - { "acos", ACOS }, // 446 - { "actual", ACTUAL }, // 447 - { "advancing", ADVANCING }, // 448 - { "after", AFTER }, // 449 - { "all", ALL }, // 450 - { "allocate", ALLOCATE }, // 451 - { "alphabet", ALPHABET }, // 452 - { "alphabetic", ALPHABETIC }, // 453 - { "alphabetic-lower", ALPHABETIC_LOWER }, // 454 - { "alphabetic-upper", ALPHABETIC_UPPER }, // 455 - { "alphanumeric", ALPHANUMERIC }, // 456 - { "alphanumeric-edited", ALPHANUMERIC_EDITED }, // 457 - { "also", ALSO }, // 458 - { "alternate", ALTERNATE }, // 459 - { "annuity", ANNUITY }, // 460 - { "anum", ANUM }, // 461 - { "any", ANY }, // 462 - { "anycase", ANYCASE }, // 463 - { "apply", APPLY }, // 464 - { "are", ARE }, // 465 - { "area", AREA }, // 466 - { "areas", AREAS }, // 467 - { "as", AS }, // 468 - { "ascending", ASCENDING }, // 469 - { "activating", ACTIVATING }, // 470 - { "asin", ASIN }, // 471 - { "assign", ASSIGN }, // 472 - { "at", AT }, // 473 - { "atan", ATAN }, // 474 - { "based", BASED }, // 475 - { "baseconvert", BASECONVERT }, // 476 - { "before", BEFORE }, // 477 - { "binary", BINARY }, // 478 - { "bit", BIT }, // 479 - { "bit-of", BIT_OF }, // 480 - { "bit-to-char", BIT_TO_CHAR }, // 481 - { "blank", BLANK }, // 482 - { "block", BLOCK_kw }, // 483 - { "boolean-of-integer", BOOLEAN_OF_INTEGER }, // 484 - { "bottom", BOTTOM }, // 485 - { "by", BY }, // 486 - { "byte", BYTE }, // 487 - { "byte-length", BYTE_LENGTH }, // 488 - { "cf", CF }, // 489 - { "ch", CH }, // 490 - { "changed", CHANGED }, // 491 - { "char", CHAR }, // 492 - { "char-national", CHAR_NATIONAL }, // 493 - { "character", CHARACTER }, // 494 - { "characters", CHARACTERS }, // 495 - { "checking", CHECKING }, // 496 - { "class", CLASS }, // 497 - { "cobol", COBOL }, // 498 - { "code", CODE }, // 499 - { "code-set", CODESET }, // 500 - { "collating", COLLATING }, // 501 - { "column", COLUMN }, // 502 - { "combined-datetime", COMBINED_DATETIME }, // 503 - { "comma", COMMA }, // 504 - { "command-line", COMMAND_LINE }, // 505 - { "command-line-count", COMMAND_LINE_COUNT }, // 506 - { "commit", COMMIT }, // 507 - { "common", COMMON }, // 508 - { "concat", CONCAT }, // 509 - { "condition", CONDITION }, // 510 - { "configuration", CONFIGURATION_SECT }, // 511 - { "contains", CONTAINS }, // 512 - { "content", CONTENT }, // 513 - { "control", CONTROL }, // 514 - { "controls", CONTROLS }, // 515 - { "convert", CONVERT }, // 516 - { "converting", CONVERTING }, // 517 - { "corresponding", CORRESPONDING }, // 518 - { "cos", COS }, // 519 - { "count", COUNT }, // 520 - { "currency", CURRENCY }, // 521 - { "current", CURRENT }, // 522 - { "current-date", CURRENT_DATE }, // 523 - { "data", DATA }, // 524 - { "date", DATE }, // 525 - { "date-compiled", DATE_COMPILED }, // 526 - { "date-of-integer", DATE_OF_INTEGER }, // 527 - { "date-to-yyyymmdd", DATE_TO_YYYYMMDD }, // 528 - { "date-written", DATE_WRITTEN }, // 529 - { "day", DAY }, // 530 - { "day-of-integer", DAY_OF_INTEGER }, // 531 - { "day-of-week", DAY_OF_WEEK }, // 532 - { "day-to-yyyyddd", DAY_TO_YYYYDDD }, // 533 - { "dbcs", DBCS }, // 534 - { "de", DE }, // 535 - { "debugging", DEBUGGING }, // 536 - { "decimal-point", DECIMAL_POINT }, // 537 - { "declaratives", DECLARATIVES }, // 538 - { "default", DEFAULT }, // 539 + { "default", DEFAULT }, // 356 + { "date-fmt", DATE_FMT }, // 357 + { "time-fmt", TIME_FMT }, // 358 + { "datetime-fmt", DATETIME_FMT }, // 359 + { "basis", BASIS }, // 360 + { "cbl", CBL }, // 361 + { "constant", CONSTANT }, // 362 + { "copy", COPY }, // 363 + { "defined", DEFINED }, // 364 + { "enter", ENTER }, // 365 + { "feature", FEATURE }, // 366 + { "insertt", INSERTT }, // 367 + { "lsub", LSUB }, // 368 + { "parameter", PARAMETER_kw }, // 369 + { "override", OVERRIDE }, // 370 + { "ready", READY }, // 371 + { "reset", RESET }, // 372 + { "rsub", RSUB }, // 373 + { "service-reload", SERVICE_RELOAD }, // 374 + { "star-cbl", STAR_CBL }, // 375 + { "subscript", SUBSCRIPT }, // 376 + { "suppress", SUPPRESS }, // 377 + { "title", TITLE }, // 378 + { "trace", TRACE }, // 379 + { "use", USE }, // 380 + { "cobol-words", COBOL_WORDS }, // 381 + { "equate", EQUATE }, // 382 + { "undefine", UNDEFINE }, // 383 + { "cdf-define", CDF_DEFINE }, // 384 + { "cdf-display", CDF_DISPLAY }, // 385 + { "cdf-if", CDF_IF }, // 386 + { "cdf-else", CDF_ELSE }, // 387 + { "cdf-end-if", CDF_END_IF }, // 388 + { "cdf-evaluate", CDF_EVALUATE }, // 389 + { "cdf-when", CDF_WHEN }, // 390 + { "cdf-end-evaluate", CDF_END_EVALUATE }, // 391 + { "call-convention", CALL_CONVENTION }, // 392 + { "call-cobol", CALL_COBOL }, // 393 + { "call-verbatim", CALL_VERBATIM }, // 394 + { "cdf-push", CDF_PUSH }, // 395 + { "cdf-pop", CDF_POP }, // 396 + { "source-format", SOURCE_FORMAT }, // 397 + { "if", IF }, // 398 + { "then", THEN }, // 399 + { "else", ELSE }, // 400 + { "sentence", SENTENCE }, // 401 + { "accept", ACCEPT }, // 402 + { "add", ADD }, // 403 + { "alter", ALTER }, // 404 + { "call", CALL }, // 405 + { "cancel", CANCEL }, // 406 + { "close", CLOSE }, // 407 + { "compute", COMPUTE }, // 408 + { "continue", CONTINUE }, // 409 + { "delete", DELETE }, // 410 + { "display", DISPLAY }, // 411 + { "divide", DIVIDE }, // 412 + { "evaluate", EVALUATE }, // 413 + { "exit", EXIT }, // 414 + { "filler", FILLER_kw }, // 415 + { "goback", GOBACK }, // 416 + { "goto", GOTO }, // 417 + { "initialize", INITIALIZE }, // 418 + { "inspect", INSPECT }, // 419 + { "merge", MERGE }, // 420 + { "move", MOVE }, // 421 + { "multiply", MULTIPLY }, // 422 + { "open", OPEN }, // 423 + { "paragraph", PARAGRAPH }, // 424 + { "read", READ }, // 425 + { "release", RELEASE }, // 426 + { "return", RETURN }, // 427 + { "rewrite", REWRITE }, // 428 + { "search", SEARCH }, // 429 + { "set", SET }, // 430 + { "select", SELECT }, // 431 + { "sort", SORT }, // 432 + { "sort-merge", SORT_MERGE }, // 433 + { "string", STRING_kw }, // 434 + { "stop", STOP }, // 435 + { "subtract", SUBTRACT }, // 436 + { "start", START }, // 437 + { "unstring", UNSTRING }, // 438 + { "write", WRITE }, // 439 + { "when", WHEN }, // 440 + { "argument-number", ARGUMENT_NUMBER }, // 441 + { "argument-value", ARGUMENT_VALUE }, // 442 + { "environment-name", ENVIRONMENT_NAME }, // 443 + { "environment-value", ENVIRONMENT_VALUE }, // 444 + { "abs", ABS }, // 445 + { "access", ACCESS }, // 446 + { "acos", ACOS }, // 447 + { "actual", ACTUAL }, // 448 + { "advancing", ADVANCING }, // 449 + { "after", AFTER }, // 450 + { "all", ALL }, // 451 + { "allocate", ALLOCATE }, // 452 + { "alphabet", ALPHABET }, // 453 + { "alphabetic", ALPHABETIC }, // 454 + { "alphabetic-lower", ALPHABETIC_LOWER }, // 455 + { "alphabetic-upper", ALPHABETIC_UPPER }, // 456 + { "alphanumeric", ALPHANUMERIC }, // 457 + { "alphanumeric-edited", ALPHANUMERIC_EDITED }, // 458 + { "also", ALSO }, // 459 + { "alternate", ALTERNATE }, // 460 + { "annuity", ANNUITY }, // 461 + { "anum", ANUM }, // 462 + { "any", ANY }, // 463 + { "anycase", ANYCASE }, // 464 + { "apply", APPLY }, // 465 + { "are", ARE }, // 466 + { "area", AREA }, // 467 + { "areas", AREAS }, // 468 + { "as", AS }, // 469 + { "ascending", ASCENDING }, // 470 + { "activating", ACTIVATING }, // 471 + { "asin", ASIN }, // 472 + { "assign", ASSIGN }, // 473 + { "at", AT }, // 474 + { "atan", ATAN }, // 475 + { "based", BASED }, // 476 + { "baseconvert", BASECONVERT }, // 477 + { "before", BEFORE }, // 478 + { "binary", BINARY }, // 479 + { "bit", BIT }, // 480 + { "bit-of", BIT_OF }, // 481 + { "bit-to-char", BIT_TO_CHAR }, // 482 + { "blank", BLANK }, // 483 + { "block", BLOCK_kw }, // 484 + { "boolean-of-integer", BOOLEAN_OF_INTEGER }, // 485 + { "bottom", BOTTOM }, // 486 + { "by", BY }, // 487 + { "byte", BYTE }, // 488 + { "byte-length", BYTE_LENGTH }, // 489 + { "cf", CF }, // 490 + { "ch", CH }, // 491 + { "changed", CHANGED }, // 492 + { "char", CHAR }, // 493 + { "char-national", CHAR_NATIONAL }, // 494 + { "character", CHARACTER }, // 495 + { "characters", CHARACTERS }, // 496 + { "checking", CHECKING }, // 497 + { "class", CLASS }, // 498 + { "cobol", COBOL }, // 499 + { "code", CODE }, // 500 + { "code-set", CODESET }, // 501 + { "collating", COLLATING }, // 502 + { "column", COLUMN }, // 503 + { "combined-datetime", COMBINED_DATETIME }, // 504 + { "comma", COMMA }, // 505 + { "command-line", COMMAND_LINE }, // 506 + { "command-line-count", COMMAND_LINE_COUNT }, // 507 + { "commit", COMMIT }, // 508 + { "common", COMMON }, // 509 + { "concat", CONCAT }, // 510 + { "condition", CONDITION }, // 511 + { "configuration", CONFIGURATION_SECT }, // 512 + { "contains", CONTAINS }, // 513 + { "content", CONTENT }, // 514 + { "control", CONTROL }, // 515 + { "controls", CONTROLS }, // 516 + { "convert", CONVERT }, // 517 + { "converting", CONVERTING }, // 518 + { "corresponding", CORRESPONDING }, // 519 + { "cos", COS }, // 520 + { "count", COUNT }, // 521 + { "currency", CURRENCY }, // 522 + { "current", CURRENT }, // 523 + { "current-date", CURRENT_DATE }, // 524 + { "data", DATA }, // 525 + { "date", DATE }, // 526 + { "date-compiled", DATE_COMPILED }, // 527 + { "date-of-integer", DATE_OF_INTEGER }, // 528 + { "date-to-yyyymmdd", DATE_TO_YYYYMMDD }, // 529 + { "date-written", DATE_WRITTEN }, // 530 + { "day", DAY }, // 531 + { "day-of-integer", DAY_OF_INTEGER }, // 532 + { "day-of-week", DAY_OF_WEEK }, // 533 + { "day-to-yyyyddd", DAY_TO_YYYYDDD }, // 534 + { "dbcs", DBCS }, // 535 + { "de", DE }, // 536 + { "debugging", DEBUGGING }, // 537 + { "decimal-point", DECIMAL_POINT }, // 538 + { "declaratives", DECLARATIVES }, // 539 { "delimited", DELIMITED }, // 540 { "delimiter", DELIMITER }, // 541 { "depending", DEPENDING }, // 542 @@ -300,300 +300,300 @@ tokens = { { "ebcdic", EBCDIC }, // 551 { "ec", EC }, // 552 { "egcs", EGCS }, // 553 - { "entry", ENTRY }, // 554 - { "environment", ENVIRONMENT }, // 555 - { "equal", EQUAL }, // 556 - { "every", EVERY }, // 557 - { "examine", EXAMINE }, // 558 - { "exhibit", EXHIBIT }, // 559 - { "exp", EXP }, // 560 - { "exp10", EXP10 }, // 561 - { "extend", EXTEND }, // 562 - { "external", EXTERNAL }, // 563 - { "exception-file", EXCEPTION_FILE }, // 564 - { "exception-file-n", EXCEPTION_FILE_N }, // 565 - { "exception-location", EXCEPTION_LOCATION }, // 566 - { "exception-location-n", EXCEPTION_LOCATION_N }, // 567 - { "exception-statement", EXCEPTION_STATEMENT }, // 568 - { "exception-status", EXCEPTION_STATUS }, // 569 - { "factorial", FACTORIAL }, // 570 - { "false", FALSE_kw }, // 571 - { "fd", FD }, // 572 - { "file-control", FILE_CONTROL }, // 573 - { "file", FILE_KW }, // 574 - { "file-limit", FILE_LIMIT }, // 575 - { "final", FINAL }, // 576 - { "finally", FINALLY }, // 577 - { "find-string", FIND_STRING }, // 578 - { "first", FIRST }, // 579 - { "fixed", FIXED }, // 580 - { "footing", FOOTING }, // 581 - { "for", FOR }, // 582 - { "formatted-current-date", FORMATTED_CURRENT_DATE }, // 583 - { "formatted-date", FORMATTED_DATE }, // 584 - { "formatted-datetime", FORMATTED_DATETIME }, // 585 - { "formatted-time", FORMATTED_TIME }, // 586 - { "form-overflow", FORM_OVERFLOW }, // 587 - { "free", FREE }, // 588 - { "fraction-part", FRACTION_PART }, // 589 - { "from", FROM }, // 590 - { "function", FUNCTION }, // 591 - { "generate", GENERATE }, // 592 - { "giving", GIVING }, // 593 - { "global", GLOBAL }, // 594 - { "go", GO }, // 595 - { "group", GROUP }, // 596 - { "heading", HEADING }, // 597 - { "hex", HEX }, // 598 - { "hex-of", HEX_OF }, // 599 - { "hex-to-char", HEX_TO_CHAR }, // 600 - { "high-values", HIGH_VALUES }, // 601 - { "highest-algebraic", HIGHEST_ALGEBRAIC }, // 602 - { "hold", HOLD }, // 603 - { "ibm-360", IBM_360 }, // 604 - { "in", IN }, // 605 - { "include", INCLUDE }, // 606 - { "index", INDEX }, // 607 - { "indexed", INDEXED }, // 608 - { "indicate", INDICATE }, // 609 - { "initial", INITIAL_kw }, // 610 - { "initiate", INITIATE }, // 611 - { "input", INPUT }, // 612 - { "installation", INSTALLATION }, // 613 - { "interface", INTERFACE }, // 614 - { "integer", INTEGER }, // 615 - { "integer-of-boolean", INTEGER_OF_BOOLEAN }, // 616 - { "integer-of-date", INTEGER_OF_DATE }, // 617 - { "integer-of-day", INTEGER_OF_DAY }, // 618 - { "integer-of-formatted-date", INTEGER_OF_FORMATTED_DATE }, // 619 - { "integer-part", INTEGER_PART }, // 620 - { "into", INTO }, // 621 - { "intrinsic", INTRINSIC }, // 622 - { "invoke", INVOKE }, // 623 - { "i-o", IO }, // 624 - { "i-o-control", IO_CONTROL }, // 625 - { "is", IS }, // 626 - { "isnt", ISNT }, // 627 - { "kanji", KANJI }, // 628 - { "key", KEY }, // 629 - { "label", LABEL }, // 630 - { "last", LAST }, // 631 - { "leading", LEADING }, // 632 - { "left", LEFT }, // 633 - { "length", LENGTH }, // 634 - { "length-of", LENGTH_OF }, // 635 - { "limit", LIMIT }, // 636 - { "limits", LIMITS }, // 637 - { "line", LINE }, // 638 - { "lines", LINES }, // 639 - { "line-counter", LINE_COUNTER }, // 640 - { "linage", LINAGE }, // 641 - { "linkage", LINKAGE }, // 642 - { "locale", LOCALE }, // 643 - { "locale-compare", LOCALE_COMPARE }, // 644 - { "locale-date", LOCALE_DATE }, // 645 - { "locale-time", LOCALE_TIME }, // 646 - { "locale-time-from-seconds", LOCALE_TIME_FROM_SECONDS }, // 647 - { "local-storage", LOCAL_STORAGE }, // 648 - { "location", LOCATION }, // 649 - { "lock", LOCK }, // 650 - { "lock-on", LOCK_ON }, // 651 - { "log", LOG }, // 652 - { "log10", LOG10 }, // 653 - { "lower-case", LOWER_CASE }, // 654 - { "low-values", LOW_VALUES }, // 655 - { "lowest-algebraic", LOWEST_ALGEBRAIC }, // 656 - { "lparen", LPAREN }, // 657 - { "manual", MANUAL }, // 658 - { "maxx", MAXX }, // 659 - { "mean", MEAN }, // 660 - { "median", MEDIAN }, // 661 - { "midrange", MIDRANGE }, // 662 - { "minn", MINN }, // 663 - { "multiple", MULTIPLE }, // 664 - { "mod", MOD }, // 665 - { "mode", MODE }, // 666 - { "module-name", MODULE_NAME }, // 667 - { "named", NAMED }, // 668 - { "nat", NAT }, // 669 - { "national", NATIONAL }, // 670 - { "national-edited", NATIONAL_EDITED }, // 671 - { "national-of", NATIONAL_OF }, // 672 - { "native", NATIVE }, // 673 - { "nested", NESTED }, // 674 - { "next", NEXT }, // 675 - { "no", NO }, // 676 - { "note", NOTE }, // 677 - { "nulls", NULLS }, // 678 - { "null", NULLS }, // 678 - { "nullptr", NULLPTR }, // 679 - { "numeric", NUMERIC }, // 680 - { "numeric-edited", NUMERIC_EDITED }, // 681 - { "numval", NUMVAL }, // 682 - { "numval-c", NUMVAL_C }, // 683 - { "numval-f", NUMVAL_F }, // 684 - { "occurs", OCCURS }, // 685 - { "of", OF }, // 686 - { "off", OFF }, // 687 - { "omitted", OMITTED }, // 688 - { "on", ON }, // 689 - { "only", ONLY }, // 690 - { "optional", OPTIONAL }, // 691 - { "options", OPTIONS }, // 692 - { "ord", ORD }, // 693 - { "order", ORDER }, // 694 - { "ord-max", ORD_MAX }, // 695 - { "ord-min", ORD_MIN }, // 696 - { "organization", ORGANIZATION }, // 697 - { "other", OTHER }, // 698 - { "otherwise", OTHERWISE }, // 699 - { "output", OUTPUT }, // 700 - { "packed-decimal", PACKED_DECIMAL }, // 701 - { "padding", PADDING }, // 702 - { "page", PAGE }, // 703 - { "page-counter", PAGE_COUNTER }, // 704 - { "pf", PF }, // 705 - { "ph", PH }, // 706 - { "pi", PI }, // 707 - { "pic", PIC }, // 708 - { "picture", PICTURE }, // 709 - { "plus", PLUS }, // 710 - { "present-value", PRESENT_VALUE }, // 711 - { "print-switch", PRINT_SWITCH }, // 712 - { "procedure", PROCEDURE }, // 713 - { "procedures", PROCEDURES }, // 714 - { "proceed", PROCEED }, // 715 - { "process", PROCESS }, // 716 - { "program-id", PROGRAM_ID }, // 717 - { "program", PROGRAM_kw }, // 718 - { "property", PROPERTY }, // 719 - { "prototype", PROTOTYPE }, // 720 - { "pseudotext", PSEUDOTEXT }, // 721 - { "quotes", QUOTES }, // 722 - { "quote", QUOTES }, // 722 - { "random", RANDOM }, // 723 - { "random-seed", RANDOM_SEED }, // 724 - { "range", RANGE }, // 725 - { "raise", RAISE }, // 726 - { "raising", RAISING }, // 727 - { "rd", RD }, // 728 - { "record", RECORD }, // 729 - { "recording", RECORDING }, // 730 - { "records", RECORDS }, // 731 - { "recursive", RECURSIVE }, // 732 - { "redefines", REDEFINES }, // 733 - { "reel", REEL }, // 734 - { "reference", REFERENCE }, // 735 - { "relative", RELATIVE }, // 736 - { "rem", REM }, // 737 - { "remainder", REMAINDER }, // 738 - { "remarks", REMARKS }, // 739 - { "removal", REMOVAL }, // 740 - { "renames", RENAMES }, // 741 - { "replace", REPLACE }, // 742 - { "replacing", REPLACING }, // 743 - { "report", REPORT }, // 744 - { "reporting", REPORTING }, // 745 - { "reports", REPORTS }, // 746 - { "repository", REPOSITORY }, // 747 - { "rerun", RERUN }, // 748 - { "reserve", RESERVE }, // 749 - { "restricted", RESTRICTED }, // 750 - { "resume", RESUME }, // 751 - { "reverse", REVERSE }, // 752 - { "reversed", REVERSED }, // 753 - { "rewind", REWIND }, // 754 - { "rf", RF }, // 755 - { "rh", RH }, // 756 - { "right", RIGHT }, // 757 - { "rounded", ROUNDED }, // 758 - { "run", RUN }, // 759 - { "same", SAME }, // 760 - { "screen", SCREEN }, // 761 - { "sd", SD }, // 762 - { "seconds-from-formatted-time", SECONDS_FROM_FORMATTED_TIME }, // 763 - { "seconds-past-midnight", SECONDS_PAST_MIDNIGHT }, // 764 - { "security", SECURITY }, // 765 - { "separate", SEPARATE }, // 766 - { "sequence", SEQUENCE }, // 767 - { "sequential", SEQUENTIAL }, // 768 - { "sharing", SHARING }, // 769 - { "simple-exit", SIMPLE_EXIT }, // 770 - { "sign", SIGN }, // 771 - { "sin", SIN }, // 772 - { "size", SIZE }, // 773 - { "smallest-algebraic", SMALLEST_ALGEBRAIC }, // 774 - { "source", SOURCE }, // 775 - { "source-computer", SOURCE_COMPUTER }, // 776 - { "special-names", SPECIAL_NAMES }, // 777 - { "sqrt", SQRT }, // 778 - { "stack", STACK }, // 779 - { "standard", STANDARD }, // 780 - { "standard-1", STANDARD_1 }, // 781 - { "standard-deviation", STANDARD_DEVIATION }, // 782 - { "standard-compare", STANDARD_COMPARE }, // 783 - { "status", STATUS }, // 784 - { "strong", STRONG }, // 785 - { "substitute", SUBSTITUTE }, // 786 - { "sum", SUM }, // 787 - { "symbol", SYMBOL }, // 788 - { "symbolic", SYMBOLIC }, // 789 - { "synchronized", SYNCHRONIZED }, // 790 - { "tally", TALLY }, // 791 - { "tallying", TALLYING }, // 792 - { "tan", TAN }, // 793 - { "terminate", TERMINATE }, // 794 - { "test", TEST }, // 795 - { "test-date-yyyymmdd", TEST_DATE_YYYYMMDD }, // 796 - { "test-day-yyyyddd", TEST_DAY_YYYYDDD }, // 797 - { "test-formatted-datetime", TEST_FORMATTED_DATETIME }, // 798 - { "test-numval", TEST_NUMVAL }, // 799 - { "test-numval-c", TEST_NUMVAL_C }, // 800 - { "test-numval-f", TEST_NUMVAL_F }, // 801 - { "than", THAN }, // 802 - { "time", TIME }, // 803 - { "times", TIMES }, // 804 - { "to", TO }, // 805 - { "top", TOP }, // 806 - { "top-level", TOP_LEVEL }, // 807 - { "tracks", TRACKS }, // 808 - { "track-area", TRACK_AREA }, // 809 - { "trailing", TRAILING }, // 810 - { "transform", TRANSFORM }, // 811 - { "trim", TRIM }, // 812 - { "true", TRUE_kw }, // 813 - { "try", TRY }, // 814 - { "turn", TURN }, // 815 - { "type", TYPE }, // 816 - { "typedef", TYPEDEF }, // 817 - { "ulength", ULENGTH }, // 818 - { "unbounded", UNBOUNDED }, // 819 - { "unit", UNIT }, // 820 - { "units", UNITS }, // 821 - { "unit-record", UNIT_RECORD }, // 822 - { "until", UNTIL }, // 823 - { "up", UP }, // 824 - { "upon", UPON }, // 825 - { "upos", UPOS }, // 826 - { "upper-case", UPPER_CASE }, // 827 - { "usage", USAGE }, // 828 - { "using", USING }, // 829 - { "usubstr", USUBSTR }, // 830 - { "usupplementary", USUPPLEMENTARY }, // 831 - { "utility", UTILITY }, // 832 - { "uuid4", UUID4 }, // 833 - { "uvalid", UVALID }, // 834 - { "uwidth", UWIDTH }, // 835 - { "value", VALUE }, // 836 - { "variance", VARIANCE }, // 837 - { "varying", VARYING }, // 838 - { "volatile", VOLATILE }, // 839 - { "when-compiled", WHEN_COMPILED }, // 840 - { "with", WITH }, // 841 - { "working-storage", WORKING_STORAGE }, // 842 - { "xml", XML }, // 843 - { "xmlgenerate", XMLGENERATE }, // 844 - { "xmlparse", XMLPARSE }, // 845 + { "encoding", ENCODING }, // 554 + { "entry", ENTRY }, // 555 + { "environment", ENVIRONMENT }, // 556 + { "equal", EQUAL }, // 557 + { "every", EVERY }, // 558 + { "examine", EXAMINE }, // 559 + { "exhibit", EXHIBIT }, // 560 + { "exp", EXP }, // 561 + { "exp10", EXP10 }, // 562 + { "extend", EXTEND }, // 563 + { "external", EXTERNAL }, // 564 + { "exception-file", EXCEPTION_FILE }, // 565 + { "exception-file-n", EXCEPTION_FILE_N }, // 566 + { "exception-location", EXCEPTION_LOCATION }, // 567 + { "exception-location-n", EXCEPTION_LOCATION_N }, // 568 + { "exception-statement", EXCEPTION_STATEMENT }, // 569 + { "exception-status", EXCEPTION_STATUS }, // 570 + { "factorial", FACTORIAL }, // 571 + { "false", FALSE_kw }, // 572 + { "fd", FD }, // 573 + { "file-control", FILE_CONTROL }, // 574 + { "file", FILE_KW }, // 575 + { "file-limit", FILE_LIMIT }, // 576 + { "final", FINAL }, // 577 + { "finally", FINALLY }, // 578 + { "find-string", FIND_STRING }, // 579 + { "first", FIRST }, // 580 + { "fixed", FIXED }, // 581 + { "footing", FOOTING }, // 582 + { "for", FOR }, // 583 + { "formatted-current-date", FORMATTED_CURRENT_DATE }, // 584 + { "formatted-date", FORMATTED_DATE }, // 585 + { "formatted-datetime", FORMATTED_DATETIME }, // 586 + { "formatted-time", FORMATTED_TIME }, // 587 + { "form-overflow", FORM_OVERFLOW }, // 588 + { "free", FREE }, // 589 + { "fraction-part", FRACTION_PART }, // 590 + { "from", FROM }, // 591 + { "function", FUNCTION }, // 592 + { "generate", GENERATE }, // 593 + { "giving", GIVING }, // 594 + { "global", GLOBAL }, // 595 + { "go", GO }, // 596 + { "group", GROUP }, // 597 + { "heading", HEADING }, // 598 + { "hex", HEX }, // 599 + { "hex-of", HEX_OF }, // 600 + { "hex-to-char", HEX_TO_CHAR }, // 601 + { "high-values", HIGH_VALUES }, // 602 + { "highest-algebraic", HIGHEST_ALGEBRAIC }, // 603 + { "hold", HOLD }, // 604 + { "ibm-360", IBM_360 }, // 605 + { "in", IN }, // 606 + { "include", INCLUDE }, // 607 + { "index", INDEX }, // 608 + { "indexed", INDEXED }, // 609 + { "indicate", INDICATE }, // 610 + { "initial", INITIAL_kw }, // 611 + { "initiate", INITIATE }, // 612 + { "input", INPUT }, // 613 + { "installation", INSTALLATION }, // 614 + { "interface", INTERFACE }, // 615 + { "integer", INTEGER }, // 616 + { "integer-of-boolean", INTEGER_OF_BOOLEAN }, // 617 + { "integer-of-date", INTEGER_OF_DATE }, // 618 + { "integer-of-day", INTEGER_OF_DAY }, // 619 + { "integer-of-formatted-date", INTEGER_OF_FORMATTED_DATE }, // 620 + { "integer-part", INTEGER_PART }, // 621 + { "into", INTO }, // 622 + { "intrinsic", INTRINSIC }, // 623 + { "invoke", INVOKE }, // 624 + { "i-o", IO }, // 625 + { "i-o-control", IO_CONTROL }, // 626 + { "is", IS }, // 627 + { "isnt", ISNT }, // 628 + { "kanji", KANJI }, // 629 + { "key", KEY }, // 630 + { "label", LABEL }, // 631 + { "last", LAST }, // 632 + { "leading", LEADING }, // 633 + { "left", LEFT }, // 634 + { "length", LENGTH }, // 635 + { "length-of", LENGTH_OF }, // 636 + { "limit", LIMIT }, // 637 + { "limits", LIMITS }, // 638 + { "line", LINE }, // 639 + { "lines", LINES }, // 640 + { "line-counter", LINE_COUNTER }, // 641 + { "linage", LINAGE }, // 642 + { "linkage", LINKAGE }, // 643 + { "locale", LOCALE }, // 644 + { "locale-compare", LOCALE_COMPARE }, // 645 + { "locale-date", LOCALE_DATE }, // 646 + { "locale-time", LOCALE_TIME }, // 647 + { "locale-time-from-seconds", LOCALE_TIME_FROM_SECONDS }, // 648 + { "local-storage", LOCAL_STORAGE }, // 649 + { "location", LOCATION }, // 650 + { "lock", LOCK }, // 651 + { "lock-on", LOCK_ON }, // 652 + { "log", LOG }, // 653 + { "log10", LOG10 }, // 654 + { "lower-case", LOWER_CASE }, // 655 + { "low-values", LOW_VALUES }, // 656 + { "lowest-algebraic", LOWEST_ALGEBRAIC }, // 657 + { "lparen", LPAREN }, // 658 + { "manual", MANUAL }, // 659 + { "maxx", MAXX }, // 660 + { "mean", MEAN }, // 661 + { "median", MEDIAN }, // 662 + { "midrange", MIDRANGE }, // 663 + { "minn", MINN }, // 664 + { "multiple", MULTIPLE }, // 665 + { "mod", MOD }, // 666 + { "mode", MODE }, // 667 + { "module-name", MODULE_NAME }, // 668 + { "named", NAMED }, // 669 + { "nat", NAT }, // 670 + { "national", NATIONAL }, // 671 + { "national-edited", NATIONAL_EDITED }, // 672 + { "national-of", NATIONAL_OF }, // 673 + { "native", NATIVE }, // 674 + { "nested", NESTED }, // 675 + { "next", NEXT }, // 676 + { "no", NO }, // 677 + { "note", NOTE }, // 678 + { "nulls", NULLS }, // 679 + { "null", NULLS }, // 679 + { "nullptr", NULLPTR }, // 680 + { "numeric", NUMERIC }, // 681 + { "numeric-edited", NUMERIC_EDITED }, // 682 + { "numval", NUMVAL }, // 683 + { "numval-c", NUMVAL_C }, // 684 + { "numval-f", NUMVAL_F }, // 685 + { "occurs", OCCURS }, // 686 + { "of", OF }, // 687 + { "off", OFF }, // 688 + { "omitted", OMITTED }, // 689 + { "on", ON }, // 690 + { "only", ONLY }, // 691 + { "optional", OPTIONAL }, // 692 + { "options", OPTIONS }, // 693 + { "ord", ORD }, // 694 + { "order", ORDER }, // 695 + { "ord-max", ORD_MAX }, // 696 + { "ord-min", ORD_MIN }, // 697 + { "organization", ORGANIZATION }, // 698 + { "other", OTHER }, // 699 + { "otherwise", OTHERWISE }, // 700 + { "output", OUTPUT }, // 701 + { "packed-decimal", PACKED_DECIMAL }, // 702 + { "padding", PADDING }, // 703 + { "page", PAGE }, // 704 + { "page-counter", PAGE_COUNTER }, // 705 + { "pf", PF }, // 706 + { "ph", PH }, // 707 + { "pi", PI }, // 708 + { "pic", PIC }, // 709 + { "picture", PICTURE }, // 710 + { "plus", PLUS }, // 711 + { "present-value", PRESENT_VALUE }, // 712 + { "print-switch", PRINT_SWITCH }, // 713 + { "procedure", PROCEDURE }, // 714 + { "procedures", PROCEDURES }, // 715 + { "proceed", PROCEED }, // 716 + { "process", PROCESS }, // 717 + { "processing", PROCESSING }, // 718 + { "program-id", PROGRAM_ID }, // 719 + { "program", PROGRAM_kw }, // 720 + { "property", PROPERTY }, // 721 + { "prototype", PROTOTYPE }, // 722 + { "pseudotext", PSEUDOTEXT }, // 723 + { "quotes", QUOTES }, // 724 + { "quote", QUOTES }, // 724 + { "random", RANDOM }, // 725 + { "random-seed", RANDOM_SEED }, // 726 + { "range", RANGE }, // 727 + { "raise", RAISE }, // 728 + { "raising", RAISING }, // 729 + { "rd", RD }, // 730 + { "record", RECORD }, // 731 + { "recording", RECORDING }, // 732 + { "records", RECORDS }, // 733 + { "recursive", RECURSIVE }, // 734 + { "redefines", REDEFINES }, // 735 + { "reel", REEL }, // 736 + { "reference", REFERENCE }, // 737 + { "relative", RELATIVE }, // 738 + { "rem", REM }, // 739 + { "remainder", REMAINDER }, // 740 + { "remarks", REMARKS }, // 741 + { "removal", REMOVAL }, // 742 + { "renames", RENAMES }, // 743 + { "replace", REPLACE }, // 744 + { "replacing", REPLACING }, // 745 + { "report", REPORT }, // 746 + { "reporting", REPORTING }, // 747 + { "reports", REPORTS }, // 748 + { "repository", REPOSITORY }, // 749 + { "rerun", RERUN }, // 750 + { "reserve", RESERVE }, // 751 + { "restricted", RESTRICTED }, // 752 + { "resume", RESUME }, // 753 + { "reverse", REVERSE }, // 754 + { "reversed", REVERSED }, // 755 + { "rewind", REWIND }, // 756 + { "rf", RF }, // 757 + { "rh", RH }, // 758 + { "right", RIGHT }, // 759 + { "rounded", ROUNDED }, // 760 + { "run", RUN }, // 761 + { "same", SAME }, // 762 + { "screen", SCREEN }, // 763 + { "sd", SD }, // 764 + { "seconds-from-formatted-time", SECONDS_FROM_FORMATTED_TIME }, // 765 + { "seconds-past-midnight", SECONDS_PAST_MIDNIGHT }, // 766 + { "security", SECURITY }, // 767 + { "separate", SEPARATE }, // 768 + { "sequence", SEQUENCE }, // 769 + { "sequential", SEQUENTIAL }, // 770 + { "sharing", SHARING }, // 771 + { "simple-exit", SIMPLE_EXIT }, // 772 + { "sign", SIGN }, // 773 + { "sin", SIN }, // 774 + { "size", SIZE }, // 775 + { "smallest-algebraic", SMALLEST_ALGEBRAIC }, // 776 + { "source", SOURCE }, // 777 + { "source-computer", SOURCE_COMPUTER }, // 778 + { "special-names", SPECIAL_NAMES }, // 779 + { "sqrt", SQRT }, // 780 + { "stack", STACK }, // 781 + { "standard", STANDARD }, // 782 + { "standard-1", STANDARD_1 }, // 783 + { "standard-deviation", STANDARD_DEVIATION }, // 784 + { "standard-compare", STANDARD_COMPARE }, // 785 + { "status", STATUS }, // 786 + { "strong", STRONG }, // 787 + { "substitute", SUBSTITUTE }, // 788 + { "sum", SUM }, // 789 + { "symbol", SYMBOL }, // 790 + { "symbolic", SYMBOLIC }, // 791 + { "synchronized", SYNCHRONIZED }, // 792 + { "tally", TALLY }, // 793 + { "tallying", TALLYING }, // 794 + { "tan", TAN }, // 795 + { "terminate", TERMINATE }, // 796 + { "test", TEST }, // 797 + { "test-date-yyyymmdd", TEST_DATE_YYYYMMDD }, // 798 + { "test-day-yyyyddd", TEST_DAY_YYYYDDD }, // 799 + { "test-formatted-datetime", TEST_FORMATTED_DATETIME }, // 800 + { "test-numval", TEST_NUMVAL }, // 801 + { "test-numval-c", TEST_NUMVAL_C }, // 802 + { "test-numval-f", TEST_NUMVAL_F }, // 803 + { "than", THAN }, // 804 + { "time", TIME }, // 805 + { "times", TIMES }, // 806 + { "to", TO }, // 807 + { "top", TOP }, // 808 + { "top-level", TOP_LEVEL }, // 809 + { "tracks", TRACKS }, // 810 + { "track-area", TRACK_AREA }, // 811 + { "trailing", TRAILING }, // 812 + { "transform", TRANSFORM }, // 813 + { "trim", TRIM }, // 814 + { "true", TRUE_kw }, // 815 + { "try", TRY }, // 816 + { "turn", TURN }, // 817 + { "type", TYPE }, // 818 + { "typedef", TYPEDEF }, // 819 + { "ulength", ULENGTH }, // 820 + { "unbounded", UNBOUNDED }, // 821 + { "unit", UNIT }, // 822 + { "units", UNITS }, // 823 + { "unit-record", UNIT_RECORD }, // 824 + { "until", UNTIL }, // 825 + { "up", UP }, // 826 + { "upon", UPON }, // 827 + { "upos", UPOS }, // 828 + { "upper-case", UPPER_CASE }, // 829 + { "usage", USAGE }, // 830 + { "using", USING }, // 831 + { "usubstr", USUBSTR }, // 832 + { "usupplementary", USUPPLEMENTARY }, // 833 + { "utility", UTILITY }, // 834 + { "uuid4", UUID4 }, // 835 + { "uvalid", UVALID }, // 836 + { "uwidth", UWIDTH }, // 837 + { "validating", VALIDATING }, // 838 + { "value", VALUE }, // 839 + { "variance", VARIANCE }, // 840 + { "varying", VARYING }, // 841 + { "volatile", VOLATILE }, // 842 + { "when-compiled", WHEN_COMPILED }, // 843 + { "with", WITH }, // 844 + { "working-storage", WORKING_STORAGE }, // 845 { "year-to-yyyy", YEAR_TO_YYYY }, // 846 { "yyyyddd", YYYYDDD }, // 847 { "yyyymmdd", YYYYMMDD }, // 848 @@ -686,17 +686,26 @@ tokens = { { "end-subtract", END_SUBTRACT }, // 935 { "end-unstring", END_UNSTRING }, // 936 { "end-write", END_WRITE }, // 937 - { "end-if", END_IF }, // 938 - { "thru", THRU }, // 939 - { "through", THRU }, // 939 - { "or", OR }, // 940 - { "and", AND }, // 941 - { "not", NOT }, // 942 - { "ne", NE }, // 943 - { "le", LE }, // 944 - { "ge", GE }, // 945 - { "pow", POW }, // 946 - { "neg", NEG }, // 947 + { "end-xml", END_XML }, // 938 + { "end-if", END_IF }, // 939 + { "xmlgenerate", XMLGENERATE }, // 940 + { "xmlparse", XMLPARSE }, // 942 + { "attributes", ATTRIBUTES }, // 944 + { "element", ELEMENT }, // 945 + { "namespace", NAMESPACE }, // 946 + { "namespace-prefix", NAMESPACE_PREFIX }, // 947 + { "nonnumeric", NONNUMERIC }, // 949 + { "xml-declaration", XML_DECLARATION }, // 950 + { "thru", THRU }, // 952 + { "through", THRU }, // 952 + { "or", OR }, // 953 + { "and", AND }, // 954 + { "not", NOT }, // 955 + { "ne", NE }, // 956 + { "le", LE }, // 957 + { "ge", GE }, // 958 + { "pow", POW }, // 959 + { "neg", NEG }, // 960 }; // cppcheck-suppress useInitializationList @@ -799,190 +808,190 @@ token_names = { "MIGHT-BE", // 95 (353) "FUNCTION-UDF", // 96 (354) "FUNCTION-UDF-0", // 97 (355) - "DATE-FMT", // 98 (356) - "TIME-FMT", // 99 (357) - "DATETIME-FMT", // 100 (358) - "BASIS", // 101 (359) - "CBL", // 102 (360) - "CONSTANT", // 103 (361) - "COPY", // 104 (362) - "DEFINED", // 105 (363) - "ENTER", // 106 (364) - "FEATURE", // 107 (365) - "INSERTT", // 108 (366) - "LSUB", // 109 (367) - "PARAMETER", // 110 (368) - "OVERRIDE", // 111 (369) - "READY", // 112 (370) - "RESET", // 113 (371) - "RSUB", // 114 (372) - "SERVICE-RELOAD", // 115 (373) - "STAR-CBL", // 116 (374) - "SUBSCRIPT", // 117 (375) - "SUPPRESS", // 118 (376) - "TITLE", // 119 (377) - "TRACE", // 120 (378) - "USE", // 121 (379) - "COBOL-WORDS", // 122 (380) - "EQUATE", // 123 (381) - "UNDEFINE", // 124 (382) - "CDF-DEFINE", // 125 (383) - "CDF-DISPLAY", // 126 (384) - "CDF-IF", // 127 (385) - "CDF-ELSE", // 128 (386) - "CDF-END-IF", // 129 (387) - "CDF-EVALUATE", // 130 (388) - "CDF-WHEN", // 131 (389) - "CDF-END-EVALUATE", // 132 (390) - "CALL-CONVENTION", // 133 (391) - "CALL-COBOL", // 134 (392) - "CALL-VERBATIM", // 135 (393) - "CDF-PUSH", // 136 (394) - "CDF-POP", // 137 (395) - "SOURCE-FORMAT", // 138 (396) - "IF", // 139 (397) - "THEN", // 140 (398) - "ELSE", // 141 (399) - "SENTENCE", // 142 (400) - "ACCEPT", // 143 (401) - "ADD", // 144 (402) - "ALTER", // 145 (403) - "CALL", // 146 (404) - "CANCEL", // 147 (405) - "CLOSE", // 148 (406) - "COMPUTE", // 149 (407) - "CONTINUE", // 150 (408) - "DELETE", // 151 (409) - "DISPLAY", // 152 (410) - "DIVIDE", // 153 (411) - "EVALUATE", // 154 (412) - "EXIT", // 155 (413) - "FILLER", // 156 (414) - "GOBACK", // 157 (415) - "GOTO", // 158 (416) - "INITIALIZE", // 159 (417) - "INSPECT", // 160 (418) - "MERGE", // 161 (419) - "MOVE", // 162 (420) - "MULTIPLY", // 163 (421) - "OPEN", // 164 (422) - "PARAGRAPH", // 165 (423) - "READ", // 166 (424) - "RELEASE", // 167 (425) - "RETURN", // 168 (426) - "REWRITE", // 169 (427) - "SEARCH", // 170 (428) - "SET", // 171 (429) - "SELECT", // 172 (430) - "SORT", // 173 (431) - "SORT-MERGE", // 174 (432) - "STRING", // 175 (433) - "STOP", // 176 (434) - "SUBTRACT", // 177 (435) - "START", // 178 (436) - "UNSTRING", // 179 (437) - "WRITE", // 180 (438) - "WHEN", // 181 (439) - "ARGUMENT-NUMBER", // 182 (440) - "ARGUMENT-VALUE", // 183 (441) - "ENVIRONMENT-NAME", // 184 (442) - "ENVIRONMENT-VALUE", // 185 (443) - "ABS", // 186 (444) - "ACCESS", // 187 (445) - "ACOS", // 188 (446) - "ACTUAL", // 189 (447) - "ADVANCING", // 190 (448) - "AFTER", // 191 (449) - "ALL", // 192 (450) - "ALLOCATE", // 193 (451) - "ALPHABET", // 194 (452) - "ALPHABETIC", // 195 (453) - "ALPHABETIC-LOWER", // 196 (454) - "ALPHABETIC-UPPER", // 197 (455) - "ALPHANUMERIC", // 198 (456) - "ALPHANUMERIC-EDITED", // 199 (457) - "ALSO", // 200 (458) - "ALTERNATE", // 201 (459) - "ANNUITY", // 202 (460) - "ANUM", // 203 (461) - "ANY", // 204 (462) - "ANYCASE", // 205 (463) - "APPLY", // 206 (464) - "ARE", // 207 (465) - "AREA", // 208 (466) - "AREAS", // 209 (467) - "AS", // 210 (468) - "ASCENDING", // 211 (469) - "ACTIVATING", // 212 (470) - "ASIN", // 213 (471) - "ASSIGN", // 214 (472) - "AT", // 215 (473) - "ATAN", // 216 (474) - "BASED", // 217 (475) - "BASECONVERT", // 218 (476) - "BEFORE", // 219 (477) - "BINARY", // 220 (478) - "BIT", // 221 (479) - "BIT-OF", // 222 (480) - "BIT-TO-CHAR", // 223 (481) - "BLANK", // 224 (482) - "BLOCK", // 225 (483) - "BOOLEAN-OF-INTEGER", // 226 (484) - "BOTTOM", // 227 (485) - "BY", // 228 (486) - "BYTE", // 229 (487) - "BYTE-LENGTH", // 230 (488) - "CF", // 231 (489) - "CH", // 232 (490) - "CHANGED", // 233 (491) - "CHAR", // 234 (492) - "CHAR-NATIONAL", // 235 (493) - "CHARACTER", // 236 (494) - "CHARACTERS", // 237 (495) - "CHECKING", // 238 (496) - "CLASS", // 239 (497) - "COBOL", // 240 (498) - "CODE", // 241 (499) - "CODE-SET", // 242 (500) - "COLLATING", // 243 (501) - "COLUMN", // 244 (502) - "COMBINED-DATETIME", // 245 (503) - "COMMA", // 246 (504) - "COMMAND-LINE", // 247 (505) - "COMMAND-LINE-COUNT", // 248 (506) - "COMMIT", // 249 (507) - "COMMON", // 250 (508) - "CONCAT", // 251 (509) - "CONDITION", // 252 (510) - "CONFIGURATION", // 253 (511) - "CONTAINS", // 254 (512) - "CONTENT", // 255 (513) - "CONTROL", // 256 (514) - "CONTROLS", // 257 (515) - "CONVERT", // 258 (516) - "CONVERTING", // 259 (517) - "CORRESPONDING", // 260 (518) - "COS", // 261 (519) - "COUNT", // 262 (520) - "CURRENCY", // 263 (521) - "CURRENT", // 264 (522) - "CURRENT-DATE", // 265 (523) - "DATA", // 266 (524) - "DATE", // 267 (525) - "DATE-COMPILED", // 268 (526) - "DATE-OF-INTEGER", // 269 (527) - "DATE-TO-YYYYMMDD", // 270 (528) - "DATE-WRITTEN", // 271 (529) - "DAY", // 272 (530) - "DAY-OF-INTEGER", // 273 (531) - "DAY-OF-WEEK", // 274 (532) - "DAY-TO-YYYYDDD", // 275 (533) - "DBCS", // 276 (534) - "DE", // 277 (535) - "DEBUGGING", // 278 (536) - "DECIMAL-POINT", // 279 (537) - "DECLARATIVES", // 280 (538) - "DEFAULT", // 281 (539) + "DEFAULT", // 98 (356) + "DATE-FMT", // 99 (357) + "TIME-FMT", // 100 (358) + "DATETIME-FMT", // 101 (359) + "BASIS", // 102 (360) + "CBL", // 103 (361) + "CONSTANT", // 104 (362) + "COPY", // 105 (363) + "DEFINED", // 106 (364) + "ENTER", // 107 (365) + "FEATURE", // 108 (366) + "INSERTT", // 109 (367) + "LSUB", // 110 (368) + "PARAMETER", // 111 (369) + "OVERRIDE", // 112 (370) + "READY", // 113 (371) + "RESET", // 114 (372) + "RSUB", // 115 (373) + "SERVICE-RELOAD", // 116 (374) + "STAR-CBL", // 117 (375) + "SUBSCRIPT", // 118 (376) + "SUPPRESS", // 119 (377) + "TITLE", // 120 (378) + "TRACE", // 121 (379) + "USE", // 122 (380) + "COBOL-WORDS", // 123 (381) + "EQUATE", // 124 (382) + "UNDEFINE", // 125 (383) + "CDF-DEFINE", // 126 (384) + "CDF-DISPLAY", // 127 (385) + "CDF-IF", // 128 (386) + "CDF-ELSE", // 129 (387) + "CDF-END-IF", // 130 (388) + "CDF-EVALUATE", // 131 (389) + "CDF-WHEN", // 132 (390) + "CDF-END-EVALUATE", // 133 (391) + "CALL-CONVENTION", // 134 (392) + "CALL-COBOL", // 135 (393) + "CALL-VERBATIM", // 136 (394) + "CDF-PUSH", // 137 (395) + "CDF-POP", // 138 (396) + "SOURCE-FORMAT", // 139 (397) + "IF", // 140 (398) + "THEN", // 141 (399) + "ELSE", // 142 (400) + "SENTENCE", // 143 (401) + "ACCEPT", // 144 (402) + "ADD", // 145 (403) + "ALTER", // 146 (404) + "CALL", // 147 (405) + "CANCEL", // 148 (406) + "CLOSE", // 149 (407) + "COMPUTE", // 150 (408) + "CONTINUE", // 151 (409) + "DELETE", // 152 (410) + "DISPLAY", // 153 (411) + "DIVIDE", // 154 (412) + "EVALUATE", // 155 (413) + "EXIT", // 156 (414) + "FILLER", // 157 (415) + "GOBACK", // 158 (416) + "GOTO", // 159 (417) + "INITIALIZE", // 160 (418) + "INSPECT", // 161 (419) + "MERGE", // 162 (420) + "MOVE", // 163 (421) + "MULTIPLY", // 164 (422) + "OPEN", // 165 (423) + "PARAGRAPH", // 166 (424) + "READ", // 167 (425) + "RELEASE", // 168 (426) + "RETURN", // 169 (427) + "REWRITE", // 170 (428) + "SEARCH", // 171 (429) + "SET", // 172 (430) + "SELECT", // 173 (431) + "SORT", // 174 (432) + "SORT-MERGE", // 175 (433) + "STRING", // 176 (434) + "STOP", // 177 (435) + "SUBTRACT", // 178 (436) + "START", // 179 (437) + "UNSTRING", // 180 (438) + "WRITE", // 181 (439) + "WHEN", // 182 (440) + "ARGUMENT-NUMBER", // 183 (441) + "ARGUMENT-VALUE", // 184 (442) + "ENVIRONMENT-NAME", // 185 (443) + "ENVIRONMENT-VALUE", // 186 (444) + "ABS", // 187 (445) + "ACCESS", // 188 (446) + "ACOS", // 189 (447) + "ACTUAL", // 190 (448) + "ADVANCING", // 191 (449) + "AFTER", // 192 (450) + "ALL", // 193 (451) + "ALLOCATE", // 194 (452) + "ALPHABET", // 195 (453) + "ALPHABETIC", // 196 (454) + "ALPHABETIC-LOWER", // 197 (455) + "ALPHABETIC-UPPER", // 198 (456) + "ALPHANUMERIC", // 199 (457) + "ALPHANUMERIC-EDITED", // 200 (458) + "ALSO", // 201 (459) + "ALTERNATE", // 202 (460) + "ANNUITY", // 203 (461) + "ANUM", // 204 (462) + "ANY", // 205 (463) + "ANYCASE", // 206 (464) + "APPLY", // 207 (465) + "ARE", // 208 (466) + "AREA", // 209 (467) + "AREAS", // 210 (468) + "AS", // 211 (469) + "ASCENDING", // 212 (470) + "ACTIVATING", // 213 (471) + "ASIN", // 214 (472) + "ASSIGN", // 215 (473) + "AT", // 216 (474) + "ATAN", // 217 (475) + "BASED", // 218 (476) + "BASECONVERT", // 219 (477) + "BEFORE", // 220 (478) + "BINARY", // 221 (479) + "BIT", // 222 (480) + "BIT-OF", // 223 (481) + "BIT-TO-CHAR", // 224 (482) + "BLANK", // 225 (483) + "BLOCK", // 226 (484) + "BOOLEAN-OF-INTEGER", // 227 (485) + "BOTTOM", // 228 (486) + "BY", // 229 (487) + "BYTE", // 230 (488) + "BYTE-LENGTH", // 231 (489) + "CF", // 232 (490) + "CH", // 233 (491) + "CHANGED", // 234 (492) + "CHAR", // 235 (493) + "CHAR-NATIONAL", // 236 (494) + "CHARACTER", // 237 (495) + "CHARACTERS", // 238 (496) + "CHECKING", // 239 (497) + "CLASS", // 240 (498) + "COBOL", // 241 (499) + "CODE", // 242 (500) + "CODE-SET", // 243 (501) + "COLLATING", // 244 (502) + "COLUMN", // 245 (503) + "COMBINED-DATETIME", // 246 (504) + "COMMA", // 247 (505) + "COMMAND-LINE", // 248 (506) + "COMMAND-LINE-COUNT", // 249 (507) + "COMMIT", // 250 (508) + "COMMON", // 251 (509) + "CONCAT", // 252 (510) + "CONDITION", // 253 (511) + "CONFIGURATION", // 254 (512) + "CONTAINS", // 255 (513) + "CONTENT", // 256 (514) + "CONTROL", // 257 (515) + "CONTROLS", // 258 (516) + "CONVERT", // 259 (517) + "CONVERTING", // 260 (518) + "CORRESPONDING", // 261 (519) + "COS", // 262 (520) + "COUNT", // 263 (521) + "CURRENCY", // 264 (522) + "CURRENT", // 265 (523) + "CURRENT-DATE", // 266 (524) + "DATA", // 267 (525) + "DATE", // 268 (526) + "DATE-COMPILED", // 269 (527) + "DATE-OF-INTEGER", // 270 (528) + "DATE-TO-YYYYMMDD", // 271 (529) + "DATE-WRITTEN", // 272 (530) + "DAY", // 273 (531) + "DAY-OF-INTEGER", // 274 (532) + "DAY-OF-WEEK", // 275 (533) + "DAY-TO-YYYYDDD", // 276 (534) + "DBCS", // 277 (535) + "DE", // 278 (536) + "DEBUGGING", // 279 (537) + "DECIMAL-POINT", // 280 (538) + "DECLARATIVES", // 281 (539) "DELIMITED", // 282 (540) "DELIMITER", // 283 (541) "DEPENDING", // 284 (542) @@ -997,298 +1006,298 @@ token_names = { "EBCDIC", // 293 (551) "EC", // 294 (552) "EGCS", // 295 (553) - "ENTRY", // 296 (554) - "ENVIRONMENT", // 297 (555) - "EQUAL", // 298 (556) - "EVERY", // 299 (557) - "EXAMINE", // 300 (558) - "EXHIBIT", // 301 (559) - "EXP", // 302 (560) - "EXP10", // 303 (561) - "EXTEND", // 304 (562) - "EXTERNAL", // 305 (563) - "EXCEPTION-FILE", // 306 (564) - "EXCEPTION-FILE-N", // 307 (565) - "EXCEPTION-LOCATION", // 308 (566) - "EXCEPTION-LOCATION-N", // 309 (567) - "EXCEPTION-STATEMENT", // 310 (568) - "EXCEPTION-STATUS", // 311 (569) - "FACTORIAL", // 312 (570) - "FALSE", // 313 (571) - "FD", // 314 (572) - "FILE-CONTROL", // 315 (573) - "FILE", // 316 (574) - "FILE-LIMIT", // 317 (575) - "FINAL", // 318 (576) - "FINALLY", // 319 (577) - "FIND-STRING", // 320 (578) - "FIRST", // 321 (579) - "FIXED", // 322 (580) - "FOOTING", // 323 (581) - "FOR", // 324 (582) - "FORMATTED-CURRENT-DATE", // 325 (583) - "FORMATTED-DATE", // 326 (584) - "FORMATTED-DATETIME", // 327 (585) - "FORMATTED-TIME", // 328 (586) - "FORM-OVERFLOW", // 329 (587) - "FREE", // 330 (588) - "FRACTION-PART", // 331 (589) - "FROM", // 332 (590) - "FUNCTION", // 333 (591) - "GENERATE", // 334 (592) - "GIVING", // 335 (593) - "GLOBAL", // 336 (594) - "GO", // 337 (595) - "GROUP", // 338 (596) - "HEADING", // 339 (597) - "HEX", // 340 (598) - "HEX-OF", // 341 (599) - "HEX-TO-CHAR", // 342 (600) - "HIGH-VALUES", // 343 (601) - "HIGHEST-ALGEBRAIC", // 344 (602) - "HOLD", // 345 (603) - "IBM-360", // 346 (604) - "IN", // 347 (605) - "INCLUDE", // 348 (606) - "INDEX", // 349 (607) - "INDEXED", // 350 (608) - "INDICATE", // 351 (609) - "INITIAL", // 352 (610) - "INITIATE", // 353 (611) - "INPUT", // 354 (612) - "INSTALLATION", // 355 (613) - "INTERFACE", // 356 (614) - "INTEGER", // 357 (615) - "INTEGER-OF-BOOLEAN", // 358 (616) - "INTEGER-OF-DATE", // 359 (617) - "INTEGER-OF-DAY", // 360 (618) - "INTEGER-OF-FORMATTED-DATE", // 361 (619) - "INTEGER-PART", // 362 (620) - "INTO", // 363 (621) - "INTRINSIC", // 364 (622) - "INVOKE", // 365 (623) - "I-O", // 366 (624) - "I-O-CONTROL", // 367 (625) - "IS", // 368 (626) - "ISNT", // 369 (627) - "KANJI", // 370 (628) - "KEY", // 371 (629) - "LABEL", // 372 (630) - "LAST", // 373 (631) - "LEADING", // 374 (632) - "LEFT", // 375 (633) - "LENGTH", // 376 (634) - "LENGTH-OF", // 377 (635) - "LIMIT", // 378 (636) - "LIMITS", // 379 (637) - "LINE", // 380 (638) - "LINES", // 381 (639) - "LINE-COUNTER", // 382 (640) - "LINAGE", // 383 (641) - "LINKAGE", // 384 (642) - "LOCALE", // 385 (643) - "LOCALE-COMPARE", // 386 (644) - "LOCALE-DATE", // 387 (645) - "LOCALE-TIME", // 388 (646) - "LOCALE-TIME-FROM-SECONDS", // 389 (647) - "LOCAL-STORAGE", // 390 (648) - "LOCATION", // 391 (649) - "LOCK", // 392 (650) - "LOCK-ON", // 393 (651) - "LOG", // 394 (652) - "LOG10", // 395 (653) - "LOWER-CASE", // 396 (654) - "LOW-VALUES", // 397 (655) - "LOWEST-ALGEBRAIC", // 398 (656) - "LPAREN", // 399 (657) - "MANUAL", // 400 (658) - "MAXX", // 401 (659) - "MEAN", // 402 (660) - "MEDIAN", // 403 (661) - "MIDRANGE", // 404 (662) - "MINN", // 405 (663) - "MULTIPLE", // 406 (664) - "MOD", // 407 (665) - "MODE", // 408 (666) - "MODULE-NAME", // 409 (667) - "NAMED", // 410 (668) - "NAT", // 411 (669) - "NATIONAL", // 412 (670) - "NATIONAL-EDITED", // 413 (671) - "NATIONAL-OF", // 414 (672) - "NATIVE", // 415 (673) - "NESTED", // 416 (674) - "NEXT", // 417 (675) - "NO", // 418 (676) - "NOTE", // 419 (677) - "NULLS", // 420 (678) - "NULLPTR", // 421 (679) - "NUMERIC", // 422 (680) - "NUMERIC-EDITED", // 423 (681) - "NUMVAL", // 424 (682) - "NUMVAL-C", // 425 (683) - "NUMVAL-F", // 426 (684) - "OCCURS", // 427 (685) - "OF", // 428 (686) - "OFF", // 429 (687) - "OMITTED", // 430 (688) - "ON", // 431 (689) - "ONLY", // 432 (690) - "OPTIONAL", // 433 (691) - "OPTIONS", // 434 (692) - "ORD", // 435 (693) - "ORDER", // 436 (694) - "ORD-MAX", // 437 (695) - "ORD-MIN", // 438 (696) - "ORGANIZATION", // 439 (697) - "OTHER", // 440 (698) - "OTHERWISE", // 441 (699) - "OUTPUT", // 442 (700) - "PACKED-DECIMAL", // 443 (701) - "PADDING", // 444 (702) - "PAGE", // 445 (703) - "PAGE-COUNTER", // 446 (704) - "PF", // 447 (705) - "PH", // 448 (706) - "PI", // 449 (707) - "PIC", // 450 (708) - "PICTURE", // 451 (709) - "PLUS", // 452 (710) - "PRESENT-VALUE", // 453 (711) - "PRINT-SWITCH", // 454 (712) - "PROCEDURE", // 455 (713) - "PROCEDURES", // 456 (714) - "PROCEED", // 457 (715) - "PROCESS", // 458 (716) - "PROGRAM-ID", // 459 (717) - "PROGRAM", // 460 (718) - "PROPERTY", // 461 (719) - "PROTOTYPE", // 462 (720) - "PSEUDOTEXT", // 463 (721) - "QUOTES", // 464 (722) - "RANDOM", // 465 (723) - "RANDOM-SEED", // 466 (724) - "RANGE", // 467 (725) - "RAISE", // 468 (726) - "RAISING", // 469 (727) - "RD", // 470 (728) - "RECORD", // 471 (729) - "RECORDING", // 472 (730) - "RECORDS", // 473 (731) - "RECURSIVE", // 474 (732) - "REDEFINES", // 475 (733) - "REEL", // 476 (734) - "REFERENCE", // 477 (735) - "RELATIVE", // 478 (736) - "REM", // 479 (737) - "REMAINDER", // 480 (738) - "REMARKS", // 481 (739) - "REMOVAL", // 482 (740) - "RENAMES", // 483 (741) - "REPLACE", // 484 (742) - "REPLACING", // 485 (743) - "REPORT", // 486 (744) - "REPORTING", // 487 (745) - "REPORTS", // 488 (746) - "REPOSITORY", // 489 (747) - "RERUN", // 490 (748) - "RESERVE", // 491 (749) - "RESTRICTED", // 492 (750) - "RESUME", // 493 (751) - "REVERSE", // 494 (752) - "REVERSED", // 495 (753) - "REWIND", // 496 (754) - "RF", // 497 (755) - "RH", // 498 (756) - "RIGHT", // 499 (757) - "ROUNDED", // 500 (758) - "RUN", // 501 (759) - "SAME", // 502 (760) - "SCREEN", // 503 (761) - "SD", // 504 (762) - "SECONDS-FROM-FORMATTED-TIME", // 505 (763) - "SECONDS-PAST-MIDNIGHT", // 506 (764) - "SECURITY", // 507 (765) - "SEPARATE", // 508 (766) - "SEQUENCE", // 509 (767) - "SEQUENTIAL", // 510 (768) - "SHARING", // 511 (769) - "SIMPLE-EXIT", // 512 (770) - "SIGN", // 513 (771) - "SIN", // 514 (772) - "SIZE", // 515 (773) - "SMALLEST-ALGEBRAIC", // 516 (774) - "SOURCE", // 517 (775) - "SOURCE-COMPUTER", // 518 (776) - "SPECIAL-NAMES", // 519 (777) - "SQRT", // 520 (778) - "STACK", // 521 (779) - "STANDARD", // 522 (780) - "STANDARD-1", // 523 (781) - "STANDARD-DEVIATION", // 524 (782) - "STANDARD-COMPARE", // 525 (783) - "STATUS", // 526 (784) - "STRONG", // 527 (785) - "SUBSTITUTE", // 528 (786) - "SUM", // 529 (787) - "SYMBOL", // 530 (788) - "SYMBOLIC", // 531 (789) - "SYNCHRONIZED", // 532 (790) - "TALLY", // 533 (791) - "TALLYING", // 534 (792) - "TAN", // 535 (793) - "TERMINATE", // 536 (794) - "TEST", // 537 (795) - "TEST-DATE-YYYYMMDD", // 538 (796) - "TEST-DAY-YYYYDDD", // 539 (797) - "TEST-FORMATTED-DATETIME", // 540 (798) - "TEST-NUMVAL", // 541 (799) - "TEST-NUMVAL-C", // 542 (800) - "TEST-NUMVAL-F", // 543 (801) - "THAN", // 544 (802) - "TIME", // 545 (803) - "TIMES", // 546 (804) - "TO", // 547 (805) - "TOP", // 548 (806) - "TOP-LEVEL", // 549 (807) - "TRACKS", // 550 (808) - "TRACK-AREA", // 551 (809) - "TRAILING", // 552 (810) - "TRANSFORM", // 553 (811) - "TRIM", // 554 (812) - "TRUE", // 555 (813) - "TRY", // 556 (814) - "TURN", // 557 (815) - "TYPE", // 558 (816) - "TYPEDEF", // 559 (817) - "ULENGTH", // 560 (818) - "UNBOUNDED", // 561 (819) - "UNIT", // 562 (820) - "UNITS", // 563 (821) - "UNIT-RECORD", // 564 (822) - "UNTIL", // 565 (823) - "UP", // 566 (824) - "UPON", // 567 (825) - "UPOS", // 568 (826) - "UPPER-CASE", // 569 (827) - "USAGE", // 570 (828) - "USING", // 571 (829) - "USUBSTR", // 572 (830) - "USUPPLEMENTARY", // 573 (831) - "UTILITY", // 574 (832) - "UUID4", // 575 (833) - "UVALID", // 576 (834) - "UWIDTH", // 577 (835) - "VALUE", // 578 (836) - "VARIANCE", // 579 (837) - "VARYING", // 580 (838) - "VOLATILE", // 581 (839) - "WHEN-COMPILED", // 582 (840) - "WITH", // 583 (841) - "WORKING-STORAGE", // 584 (842) - "XML", // 585 (843) - "XMLGENERATE", // 586 (844) - "XMLPARSE", // 587 (845) + "ENCODING", // 296 (554) + "ENTRY", // 297 (555) + "ENVIRONMENT", // 298 (556) + "EQUAL", // 299 (557) + "EVERY", // 300 (558) + "EXAMINE", // 301 (559) + "EXHIBIT", // 302 (560) + "EXP", // 303 (561) + "EXP10", // 304 (562) + "EXTEND", // 305 (563) + "EXTERNAL", // 306 (564) + "EXCEPTION-FILE", // 307 (565) + "EXCEPTION-FILE-N", // 308 (566) + "EXCEPTION-LOCATION", // 309 (567) + "EXCEPTION-LOCATION-N", // 310 (568) + "EXCEPTION-STATEMENT", // 311 (569) + "EXCEPTION-STATUS", // 312 (570) + "FACTORIAL", // 313 (571) + "FALSE", // 314 (572) + "FD", // 315 (573) + "FILE-CONTROL", // 316 (574) + "FILE", // 317 (575) + "FILE-LIMIT", // 318 (576) + "FINAL", // 319 (577) + "FINALLY", // 320 (578) + "FIND-STRING", // 321 (579) + "FIRST", // 322 (580) + "FIXED", // 323 (581) + "FOOTING", // 324 (582) + "FOR", // 325 (583) + "FORMATTED-CURRENT-DATE", // 326 (584) + "FORMATTED-DATE", // 327 (585) + "FORMATTED-DATETIME", // 328 (586) + "FORMATTED-TIME", // 329 (587) + "FORM-OVERFLOW", // 330 (588) + "FREE", // 331 (589) + "FRACTION-PART", // 332 (590) + "FROM", // 333 (591) + "FUNCTION", // 334 (592) + "GENERATE", // 335 (593) + "GIVING", // 336 (594) + "GLOBAL", // 337 (595) + "GO", // 338 (596) + "GROUP", // 339 (597) + "HEADING", // 340 (598) + "HEX", // 341 (599) + "HEX-OF", // 342 (600) + "HEX-TO-CHAR", // 343 (601) + "HIGH-VALUES", // 344 (602) + "HIGHEST-ALGEBRAIC", // 345 (603) + "HOLD", // 346 (604) + "IBM-360", // 347 (605) + "IN", // 348 (606) + "INCLUDE", // 349 (607) + "INDEX", // 350 (608) + "INDEXED", // 351 (609) + "INDICATE", // 352 (610) + "INITIAL", // 353 (611) + "INITIATE", // 354 (612) + "INPUT", // 355 (613) + "INSTALLATION", // 356 (614) + "INTERFACE", // 357 (615) + "INTEGER", // 358 (616) + "INTEGER-OF-BOOLEAN", // 359 (617) + "INTEGER-OF-DATE", // 360 (618) + "INTEGER-OF-DAY", // 361 (619) + "INTEGER-OF-FORMATTED-DATE", // 362 (620) + "INTEGER-PART", // 363 (621) + "INTO", // 364 (622) + "INTRINSIC", // 365 (623) + "INVOKE", // 366 (624) + "I-O", // 367 (625) + "I-O-CONTROL", // 368 (626) + "IS", // 369 (627) + "ISNT", // 370 (628) + "KANJI", // 371 (629) + "KEY", // 372 (630) + "LABEL", // 373 (631) + "LAST", // 374 (632) + "LEADING", // 375 (633) + "LEFT", // 376 (634) + "LENGTH", // 377 (635) + "LENGTH-OF", // 378 (636) + "LIMIT", // 379 (637) + "LIMITS", // 380 (638) + "LINE", // 381 (639) + "LINES", // 382 (640) + "LINE-COUNTER", // 383 (641) + "LINAGE", // 384 (642) + "LINKAGE", // 385 (643) + "LOCALE", // 386 (644) + "LOCALE-COMPARE", // 387 (645) + "LOCALE-DATE", // 388 (646) + "LOCALE-TIME", // 389 (647) + "LOCALE-TIME-FROM-SECONDS", // 390 (648) + "LOCAL-STORAGE", // 391 (649) + "LOCATION", // 392 (650) + "LOCK", // 393 (651) + "LOCK-ON", // 394 (652) + "LOG", // 395 (653) + "LOG10", // 396 (654) + "LOWER-CASE", // 397 (655) + "LOW-VALUES", // 398 (656) + "LOWEST-ALGEBRAIC", // 399 (657) + "LPAREN", // 400 (658) + "MANUAL", // 401 (659) + "MAXX", // 402 (660) + "MEAN", // 403 (661) + "MEDIAN", // 404 (662) + "MIDRANGE", // 405 (663) + "MINN", // 406 (664) + "MULTIPLE", // 407 (665) + "MOD", // 408 (666) + "MODE", // 409 (667) + "MODULE-NAME", // 410 (668) + "NAMED", // 411 (669) + "NAT", // 412 (670) + "NATIONAL", // 413 (671) + "NATIONAL-EDITED", // 414 (672) + "NATIONAL-OF", // 415 (673) + "NATIVE", // 416 (674) + "NESTED", // 417 (675) + "NEXT", // 418 (676) + "NO", // 419 (677) + "NOTE", // 420 (678) + "NULLS", // 421 (679) + "NULLPTR", // 422 (680) + "NUMERIC", // 423 (681) + "NUMERIC-EDITED", // 424 (682) + "NUMVAL", // 425 (683) + "NUMVAL-C", // 426 (684) + "NUMVAL-F", // 427 (685) + "OCCURS", // 428 (686) + "OF", // 429 (687) + "OFF", // 430 (688) + "OMITTED", // 431 (689) + "ON", // 432 (690) + "ONLY", // 433 (691) + "OPTIONAL", // 434 (692) + "OPTIONS", // 435 (693) + "ORD", // 436 (694) + "ORDER", // 437 (695) + "ORD-MAX", // 438 (696) + "ORD-MIN", // 439 (697) + "ORGANIZATION", // 440 (698) + "OTHER", // 441 (699) + "OTHERWISE", // 442 (700) + "OUTPUT", // 443 (701) + "PACKED-DECIMAL", // 444 (702) + "PADDING", // 445 (703) + "PAGE", // 446 (704) + "PAGE-COUNTER", // 447 (705) + "PF", // 448 (706) + "PH", // 449 (707) + "PI", // 450 (708) + "PIC", // 451 (709) + "PICTURE", // 452 (710) + "PLUS", // 453 (711) + "PRESENT-VALUE", // 454 (712) + "PRINT-SWITCH", // 455 (713) + "PROCEDURE", // 456 (714) + "PROCEDURES", // 457 (715) + "PROCEED", // 458 (716) + "PROCESS", // 459 (717) + "PROCESSING", // 460 (718) + "PROGRAM-ID", // 461 (719) + "PROGRAM", // 462 (720) + "PROPERTY", // 463 (721) + "PROTOTYPE", // 464 (722) + "PSEUDOTEXT", // 465 (723) + "QUOTES", // 466 (724) + "RANDOM", // 467 (725) + "RANDOM-SEED", // 468 (726) + "RANGE", // 469 (727) + "RAISE", // 470 (728) + "RAISING", // 471 (729) + "RD", // 472 (730) + "RECORD", // 473 (731) + "RECORDING", // 474 (732) + "RECORDS", // 475 (733) + "RECURSIVE", // 476 (734) + "REDEFINES", // 477 (735) + "REEL", // 478 (736) + "REFERENCE", // 479 (737) + "RELATIVE", // 480 (738) + "REM", // 481 (739) + "REMAINDER", // 482 (740) + "REMARKS", // 483 (741) + "REMOVAL", // 484 (742) + "RENAMES", // 485 (743) + "REPLACE", // 486 (744) + "REPLACING", // 487 (745) + "REPORT", // 488 (746) + "REPORTING", // 489 (747) + "REPORTS", // 490 (748) + "REPOSITORY", // 491 (749) + "RERUN", // 492 (750) + "RESERVE", // 493 (751) + "RESTRICTED", // 494 (752) + "RESUME", // 495 (753) + "REVERSE", // 496 (754) + "REVERSED", // 497 (755) + "REWIND", // 498 (756) + "RF", // 499 (757) + "RH", // 500 (758) + "RIGHT", // 501 (759) + "ROUNDED", // 502 (760) + "RUN", // 503 (761) + "SAME", // 504 (762) + "SCREEN", // 505 (763) + "SD", // 506 (764) + "SECONDS-FROM-FORMATTED-TIME", // 507 (765) + "SECONDS-PAST-MIDNIGHT", // 508 (766) + "SECURITY", // 509 (767) + "SEPARATE", // 510 (768) + "SEQUENCE", // 511 (769) + "SEQUENTIAL", // 512 (770) + "SHARING", // 513 (771) + "SIMPLE-EXIT", // 514 (772) + "SIGN", // 515 (773) + "SIN", // 516 (774) + "SIZE", // 517 (775) + "SMALLEST-ALGEBRAIC", // 518 (776) + "SOURCE", // 519 (777) + "SOURCE-COMPUTER", // 520 (778) + "SPECIAL-NAMES", // 521 (779) + "SQRT", // 522 (780) + "STACK", // 523 (781) + "STANDARD", // 524 (782) + "STANDARD-1", // 525 (783) + "STANDARD-DEVIATION", // 526 (784) + "STANDARD-COMPARE", // 527 (785) + "STATUS", // 528 (786) + "STRONG", // 529 (787) + "SUBSTITUTE", // 530 (788) + "SUM", // 531 (789) + "SYMBOL", // 532 (790) + "SYMBOLIC", // 533 (791) + "SYNCHRONIZED", // 534 (792) + "TALLY", // 535 (793) + "TALLYING", // 536 (794) + "TAN", // 537 (795) + "TERMINATE", // 538 (796) + "TEST", // 539 (797) + "TEST-DATE-YYYYMMDD", // 540 (798) + "TEST-DAY-YYYYDDD", // 541 (799) + "TEST-FORMATTED-DATETIME", // 542 (800) + "TEST-NUMVAL", // 543 (801) + "TEST-NUMVAL-C", // 544 (802) + "TEST-NUMVAL-F", // 545 (803) + "THAN", // 546 (804) + "TIME", // 547 (805) + "TIMES", // 548 (806) + "TO", // 549 (807) + "TOP", // 550 (808) + "TOP-LEVEL", // 551 (809) + "TRACKS", // 552 (810) + "TRACK-AREA", // 553 (811) + "TRAILING", // 554 (812) + "TRANSFORM", // 555 (813) + "TRIM", // 556 (814) + "TRUE", // 557 (815) + "TRY", // 558 (816) + "TURN", // 559 (817) + "TYPE", // 560 (818) + "TYPEDEF", // 561 (819) + "ULENGTH", // 562 (820) + "UNBOUNDED", // 563 (821) + "UNIT", // 564 (822) + "UNITS", // 565 (823) + "UNIT-RECORD", // 566 (824) + "UNTIL", // 567 (825) + "UP", // 568 (826) + "UPON", // 569 (827) + "UPOS", // 570 (828) + "UPPER-CASE", // 571 (829) + "USAGE", // 572 (830) + "USING", // 573 (831) + "USUBSTR", // 574 (832) + "USUPPLEMENTARY", // 575 (833) + "UTILITY", // 576 (834) + "UUID4", // 577 (835) + "UVALID", // 578 (836) + "UWIDTH", // 579 (837) + "VALIDATING", // 580 (838) + "VALUE", // 581 (839) + "VARIANCE", // 582 (840) + "VARYING", // 583 (841) + "VOLATILE", // 584 (842) + "WHEN-COMPILED", // 585 (843) + "WITH", // 586 (844) + "WORKING-STORAGE", // 587 (845) "YEAR-TO-YYYY", // 588 (846) "YYYYDDD", // 589 (847) "YYYYMMDD", // 590 (848) @@ -1381,14 +1390,23 @@ token_names = { "END-SUBTRACT", // 677 (935) "END-UNSTRING", // 678 (936) "END-WRITE", // 679 (937) - "END-IF", // 680 (938) - "THRU", // 681 (939) - "OR", // 682 (940) - "AND", // 683 (941) - "NOT", // 684 (942) - "NE", // 685 (943) - "LE", // 686 (944) - "GE", // 687 (945) - "POW", // 688 (946) - "NEG", // 689 (947) + "END-XML", // 680 (938) + "END-IF", // 681 (939) + "XMLGENERATE", // 682 (940) + "XMLPARSE", // 684 (942) + "ATTRIBUTES", // 686 (944) + "ELEMENT", // 687 (945) + "NAMESPACE", // 688 (946) + "NAMESPACE-PREFIX", // 689 (947) + "NONNUMERIC", // 691 (949) + "XML-DECLARATION", // 692 (950) + "THRU", // 694 (952) + "OR", // 695 (953) + "AND", // 696 (954) + "NOT", // 697 (955) + "NE", // 698 (956) + "LE", // 699 (957) + "GE", // 700 (958) + "POW", // 701 (959) + "NEG", // 702 (960) }; diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index 2a7bf2ba86d..d3a4b01a089 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -61,6 +61,7 @@ #include "../../libgcobol/io.h" #include "genapi.h" #include "genutil.h" +#include "../../libgcobol/charmaps.h" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -323,8 +324,6 @@ cbl_field_type_str( enum cbl_field_type_t type ) return "FldSwitch"; case FldPointer: return "FldPointer"; - case FldBlob: - return "FldBlob"; } cbl_internal_error("%s:%d: invalid % %d", __func__, __LINE__, type); return "???"; @@ -613,7 +612,6 @@ is_elementary( enum cbl_field_type_t type ) case FldForward: case FldIndex: case FldSwitch: - case FldBlob: return false; case FldPointer: case FldAlphanumeric: @@ -805,6 +803,7 @@ symbol_field_type_update( cbl_field_t *field, // type matches itself if( field->type == candidate ) { if( is_usage ) field->usage = candidate; + field->codeset.set(); return true; } if( is_usage && field->usage == candidate ) return true; @@ -831,7 +830,6 @@ symbol_field_type_update( cbl_field_t *field, */ if( is_usage ) { switch(field->type) { - case FldBlob: case FldDisplay: gcc_unreachable(); // type is never just "display" break; @@ -882,11 +880,24 @@ symbol_field_type_update( cbl_field_t *field, case FldInvalid: field->type = candidate; field->attr |= numeric_group_attrs(field); + // update encoding + switch( field->type ) { + case FldNumericDisplay: + case FldAlphaEdited: + case FldNumericEdited: + { + bool retval = field->codeset.set(); + return retval; + } + default: + break; + } return true; case FldDisplay: if( is_displayable(candidate) ) { field->type = candidate; field->attr |= numeric_group_attrs(field); + if( ! field->codeset.valid() ) return field->codeset.set(); return true; } break; @@ -897,6 +908,7 @@ symbol_field_type_update( cbl_field_t *field, field->clear_attr(all_x_e); field->type = field->usage; field->attr |= numeric_group_attrs(field); + if( ! field->codeset.valid() ) return field->codeset.set(); return true; case FldNumericDisplay: case FldNumericEdited: @@ -908,7 +920,6 @@ symbol_field_type_update( cbl_field_t *field, case FldForward: case FldSwitch: case FldPointer: - case FldBlob: // invalid usage value gcc_unreachable(); break; @@ -1082,11 +1093,21 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { // consider all-alphabetic if( has_attr(all_alpha_e) ) { bool alpha_value = fig != zero_value_e; + + // In order to check for all alphabetic characters, we have to convert + // data.initial back to ASCII: + + size_t outchars; + char *initial = __gg__iconverter(codeset.encoding, + DEFAULT_CHARMAP_SOURCE, + data.initial, + data.capacity, + &outchars); if( fig == normal_value_e ) { - alpha_value = std::all_of( data.initial, - data.initial + - strlen(data.initial), + alpha_value = std::all_of( initial, + initial + + data.capacity, []( char ch ) { return ISSPACE(ch) || ISPUNCT(ch) || @@ -1094,7 +1115,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { } if( ! alpha_value ) { error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data", - name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial); + name, fig == zero_value_e? cbl_figconst_str(fig) : initial); } } @@ -1262,7 +1283,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) return false; // parser should not allow the following types here case FldForward: - case FldBlob: default: if( sizeof(matrix[0]) < field->type ) { cbl_internal_error("logic error: MOVE %s %s invalid type:", @@ -1292,8 +1312,16 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) case 0: if( src->type == FldLiteralA && is_numericish(tgt) && !is_literal(tgt) ) { // Allow if input string is an integer. - const char *p = src->data.initial, *pend = p + src->data.capacity; - if( p[0] == '+' || p[0] == '-' ) p++; + size_t outcount; + char *in_ascii = static_cast(xmalloc(4 * src->data.capacity)); + const char *in_asciip = __gg__iconverter( src->codeset.encoding, + DEFAULT_CHARMAP_SOURCE, + src->data.initial, + src->data.capacity, + &outcount ); + memcpy(in_ascii, in_asciip, outcount); + const char *p = in_ascii, *pend = p + src->data.capacity; + if( (p[0] == ascii_plus) || (p[0] == ascii_minus) ) p++; retval = std::all_of( p, pend, isdigit ); if( yydebug && ! retval ) { auto bad = std::find_if( p, pend, @@ -1302,6 +1330,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) HOST_SIZE_T_PRINT_UNSIGNED, __func__, __LINE__, *bad, (fmt_size_t)(bad - p)); } + free(in_ascii); } break; case 1: @@ -1340,8 +1369,6 @@ bool valid_picture( enum cbl_field_type_t type, const char picture[] ) { switch(type) { - case FldBlob: - gcc_unreachable(); // can't get here via the parser case FldInvalid: case FldGroup: case FldLiteralA: @@ -1386,7 +1413,6 @@ uint32_t type_capacity( enum cbl_field_type_t type, uint32_t digits ) { switch(type) { - case FldBlob: gcc_unreachable(); case FldInvalid: case FldGroup: case FldAlphanumeric: @@ -2085,11 +2111,6 @@ template static void gcc_location_set_impl( const LOC& loc ) { // Set the position to the first line & column in the location. - if( getenv("KILROY") ) - { - fprintf(stderr, "********** KILROY %d\n", loc.first_line); - } - static location_t loc_m_1 = 0; token_location = linemap_line_start( line_table, loc.first_line, 80 ); @@ -2503,7 +2524,7 @@ cbl_unimplementedw(const char *gmsgid, ...) { auto_diagnostic_group d; va_list ap; va_start(ap, gmsgid); - emit_diagnostic_valist( diagnostics::kind::sorry, + emit_diagnostic_valist( diagnostics::kind::warning, token_location, option_zero, gmsgid, &ap ); va_end(ap); } diff --git a/gcc/testsuite/cobol.dg/group1/check_88.cob b/gcc/testsuite/cobol.dg/group1/check_88.cob index f1d0685e478..25e2a709c0b 100644 --- a/gcc/testsuite/cobol.dg/group1/check_88.cob +++ b/gcc/testsuite/cobol.dg/group1/check_88.cob @@ -13,11 +13,11 @@ *> { dg-output { (\n|\r\n|\r)} } *> { dg-output {There should be no garbage after character 32(\n|\r\n|\r)} } *> { dg-output {\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\*\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-(\n|\r\n|\r)} } -*> { dg-output {.* Bundesstra.e (\n|\r\n|\r)} } -*> { dg-output {.* Bundesstra.e (\n|\r\n|\r)} } +*> { dg-output {.*(\n|\r\n|\r)} } +*> { dg-output {.*(\n|\r\n|\r)} } *> { dg-output { (\n|\r\n|\r)} } *> { dg-output {There should be no spaces before the final quote(\n|\r\n|\r)} } -*> { dg-output {".* Bundesstra.e"(\n|\r\n|\r)} } +*> { dg-output {".*"(\n|\r\n|\r)} } *> { dg-output { (\n|\r\n|\r)} } *> { dg-output { IsLow ""(\n|\r\n|\r)} } *> { dg-output { IsZero "000"(\n|\r\n|\r)} } @@ -39,7 +39,7 @@ 88 CheckZero VALUE ZERO. 88 CheckQuotes VALUE QUOTE. 88 CheckBob VALUE "bob". - 88 CheckBinary VALUE X"000102". *> { dg-warning embedded } + 88 CheckBinary VALUE X"000102". *> 01 000VARL PIC XXX VALUE LOW-VALUE. 01 000VARS PIC XXX VALUE SPACE. 01 000VARQ PIC XXX VALUE QUOTE. diff --git a/libgcobol/charmaps.cc b/libgcobol/charmaps.cc index eb826096084..5d43f0b756a 100644 --- a/libgcobol/charmaps.cc +++ b/libgcobol/charmaps.cc @@ -45,8 +45,22 @@ #include "common-defs.h" #include "io.h" #include "gcobolio.h" -#include "charmaps.h" #include "valconv.h" +#include "charmaps.h" +#include "encodings.h" + +// These values are in the ASCII space. +int __gg__decimal_point = '.' ; +int __gg__decimal_separator = ',' ; +int __gg__quote_character = '"' ; +int __gg__low_value_character = 0x00 ; +int __gg__high_value_character = 0xFF ; +char **__gg__currency_signs ; + +int __gg__default_currency_sign; + +char *__gg__ct_currency_signs[256]; // Compile-time currency signs + // First: single-byte-coded (SBC) character sets: @@ -129,7 +143,6 @@ __gg__one_to_one_values[256] = // Programmers who use the ALPHABET clause to change the HIGH-VALUE are on their // own. - const unsigned short __gg__cp1140_to_cp1252_values[256] = { @@ -222,365 +235,10 @@ __gg__ebcdic_to_cp1252_collation[256] = 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0xFF, }; -// This table is used for converting code page 1252 to the subset of UTF-8 that -// that contains CP1252 - -static const unsigned short -cp1252_to_utf8_values[256] = - { - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, // 00 - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, // 10 - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, // 20 - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, // 30 - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, // 40 - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, // 50 - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, // 60 - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, // 70 - 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f, // 80 - 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178, // 90 - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // A0 - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // B0 - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // C0 - 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // D0 - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // E0 - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // F0 - }; - -// This map table does the reverse UTF-8 conversion back to cp1252 -static const std::unordered_maputf8_to_cp1252_values = - { - {0x0000, 0x00}, {0x0001, 0x01}, {0x0002, 0x02}, {0x0003, 0x03}, {0x0004, 0x04}, {0x0005, 0x05}, {0x0006, 0x06}, {0x0007, 0x07}, - {0x0008, 0x08}, {0x0009, 0x09}, {0x000a, 0x0a}, {0x000b, 0x0b}, {0x000c, 0x0c}, {0x000d, 0x0d}, {0x000e, 0x0e}, {0x000f, 0x0f}, - {0x0010, 0x10}, {0x0011, 0x11}, {0x0012, 0x12}, {0x0013, 0x13}, {0x0014, 0x14}, {0x0015, 0x15}, {0x0016, 0x16}, {0x0017, 0x17}, - {0x0018, 0x18}, {0x0019, 0x19}, {0x001a, 0x1a}, {0x001b, 0x1b}, {0x001c, 0x1c}, {0x001d, 0x1d}, {0x001e, 0x1e}, {0x001f, 0x1f}, - {0x0020, 0x20}, {0x0021, 0x21}, {0x0022, 0x22}, {0x0023, 0x23}, {0x0024, 0x24}, {0x0025, 0x25}, {0x0026, 0x26}, {0x0027, 0x27}, - {0x0028, 0x28}, {0x0029, 0x29}, {0x002a, 0x2a}, {0x002b, 0x2b}, {0x002c, 0x2c}, {0x002d, 0x2d}, {0x002e, 0x2e}, {0x002f, 0x2f}, - {0x0030, 0x30}, {0x0031, 0x31}, {0x0032, 0x32}, {0x0033, 0x33}, {0x0034, 0x34}, {0x0035, 0x35}, {0x0036, 0x36}, {0x0037, 0x37}, - {0x0038, 0x38}, {0x0039, 0x39}, {0x003a, 0x3a}, {0x003b, 0x3b}, {0x003c, 0x3c}, {0x003d, 0x3d}, {0x003e, 0x3e}, {0x003f, 0x3f}, - {0x0040, 0x40}, {0x0041, 0x41}, {0x0042, 0x42}, {0x0043, 0x43}, {0x0044, 0x44}, {0x0045, 0x45}, {0x0046, 0x46}, {0x0047, 0x47}, - {0x0048, 0x48}, {0x0049, 0x49}, {0x004a, 0x4a}, {0x004b, 0x4b}, {0x004c, 0x4c}, {0x004d, 0x4d}, {0x004e, 0x4e}, {0x004f, 0x4f}, - {0x0050, 0x50}, {0x0051, 0x51}, {0x0052, 0x52}, {0x0053, 0x53}, {0x0054, 0x54}, {0x0055, 0x55}, {0x0056, 0x56}, {0x0057, 0x57}, - {0x0058, 0x58}, {0x0059, 0x59}, {0x005a, 0x5a}, {0x005b, 0x5b}, {0x005c, 0x5c}, {0x005d, 0x5d}, {0x005e, 0x5e}, {0x005f, 0x5f}, - {0x0060, 0x60}, {0x0061, 0x61}, {0x0062, 0x62}, {0x0063, 0x63}, {0x0064, 0x64}, {0x0065, 0x65}, {0x0066, 0x66}, {0x0067, 0x67}, - {0x0068, 0x68}, {0x0069, 0x69}, {0x006a, 0x6a}, {0x006b, 0x6b}, {0x006c, 0x6c}, {0x006d, 0x6d}, {0x006e, 0x6e}, {0x006f, 0x6f}, - {0x0070, 0x70}, {0x0071, 0x71}, {0x0072, 0x72}, {0x0073, 0x73}, {0x0074, 0x74}, {0x0075, 0x75}, {0x0076, 0x76}, {0x0077, 0x77}, - {0x0078, 0x78}, {0x0079, 0x79}, {0x007a, 0x7a}, {0x007b, 0x7b}, {0x007c, 0x7c}, {0x007d, 0x7d}, {0x007e, 0x7e}, {0x007f, 0x7f}, - {0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83}, {0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87}, - {0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b}, {0x0152, 0x8c}, {0x008d, 0x8d}, {0x017d, 0x8e}, {0x008f, 0x8f}, - {0x0090, 0x90}, {0x2018, 0x91}, {0x2019, 0x92}, {0x201c, 0x93}, {0x201d, 0x94}, {0x2022, 0x95}, {0x2013, 0x96}, {0x2014, 0x97}, - {0x02dc, 0x98}, {0x2122, 0x99}, {0x0161, 0x9a}, {0x203a, 0x9b}, {0x0153, 0x9c}, {0x009d, 0x9d}, {0x017e, 0x9e}, {0x0178, 0x9f}, - {0x00a0, 0xa0}, {0x00a1, 0xa1}, {0x00a2, 0xa2}, {0x00a3, 0xa3}, {0x00a4, 0xa4}, {0x00a5, 0xa5}, {0x00a6, 0xa6}, {0x00a7, 0xa7}, - {0x00a8, 0xa8}, {0x00a9, 0xa9}, {0x00aa, 0xaa}, {0x00ab, 0xab}, {0x00ac, 0xac}, {0x00ad, 0xad}, {0x00ae, 0xae}, {0x00af, 0xaf}, - {0x00b0, 0xb0}, {0x00b1, 0xb1}, {0x00b2, 0xb2}, {0x00b3, 0xb3}, {0x00b4, 0xb4}, {0x00b5, 0xb5}, {0x00b6, 0xb6}, {0x00b7, 0xb7}, - {0x00b8, 0xb8}, {0x00b9, 0xb9}, {0x00ba, 0xba}, {0x00bb, 0xbb}, {0x00bc, 0xbc}, {0x00bd, 0xbd}, {0x00be, 0xbe}, {0x00bf, 0xbf}, - {0x00c0, 0xc0}, {0x00c1, 0xc1}, {0x00c2, 0xc2}, {0x00c3, 0xc3}, {0x00c4, 0xc4}, {0x00c5, 0xc5}, {0x00c6, 0xc6}, {0x00c7, 0xc7}, - {0x00c8, 0xc8}, {0x00c9, 0xc9}, {0x00ca, 0xca}, {0x00cb, 0xcb}, {0x00cc, 0xcc}, {0x00cd, 0xcd}, {0x00ce, 0xce}, {0x00cf, 0xcf}, - {0x00d0, 0xd0}, {0x00d1, 0xd1}, {0x00d2, 0xd2}, {0x00d3, 0xd3}, {0x00d4, 0xd4}, {0x00d5, 0xd5}, {0x00d6, 0xd6}, {0x00d7, 0xd7}, - {0x00d8, 0xd8}, {0x00d9, 0xd9}, {0x00da, 0xda}, {0x00db, 0xdb}, {0x00dc, 0xdc}, {0x00dd, 0xdd}, {0x00de, 0xde}, {0x00df, 0xdf}, - {0x00e0, 0xe0}, {0x00e1, 0xe1}, {0x00e2, 0xe2}, {0x00e3, 0xe3}, {0x00e4, 0xe4}, {0x00e5, 0xe5}, {0x00e6, 0xe6}, {0x00e7, 0xe7}, - {0x00e8, 0xe8}, {0x00e9, 0xe9}, {0x00ea, 0xea}, {0x00eb, 0xeb}, {0x00ec, 0xec}, {0x00ed, 0xed}, {0x00ee, 0xee}, {0x00ef, 0xef}, - {0x00f0, 0xf0}, {0x00f1, 0xf1}, {0x00f2, 0xf2}, {0x00f3, 0xf3}, {0x00f4, 0xf4}, {0x00f5, 0xf5}, {0x00f6, 0xf6}, {0x00f7, 0xf7}, - {0x00f8, 0xf8}, {0x00f9, 0xf9}, {0x00fa, 0xfa}, {0x00fb, 0xfb}, {0x00fc, 0xfc}, {0x00fd, 0xfd}, {0x00fe, 0xfe}, {0x00ff, 0xff}, - }; - -// This function extracts the next unicode code point from a stream of UTF-8 -// data. - -static bool -raw_is_SBC() - { - bool retval = false; - switch(source_codeset) - { - case cs_cp1252_e: - retval = true; - break; - default: - break; - } - return retval; - } - - -static size_t -extract_next_code_point(const unsigned char *utf8, - const size_t /*length_in_bytes*/, - size_t &position) - { - long retval = -1; // Means a badly formed code point - unsigned char ch = utf8[position++]; - long under_construction = 0; - int countdown = 0; - - if( (ch & 0x80) == 0x00 ) - { - // We are in the ASCII subset of UTF-8, and we are done - retval = ch; - goto done; - } - else if( (ch & 0xE0) == 0xC0 ) - { - // There is one byte to follow - countdown = 1; - under_construction = ch & 0x1F; - } - else if( (ch & 0xF0) == 0xE0 ) - { - countdown = 2; - under_construction = ch & 0x0F; - } - else if( (ch & 0xF8) == 0xF0 ) - { - countdown = 3; - under_construction = ch & 0x07; - } - else - { - // We have a poorly-constructed UTF-8 encoding - goto done; - } - while( countdown-- ) - { - ch = utf8[position++]; - // We are in a follow-up encoded byte: - if( (ch & 0xC0) == 0x80 ) - { - // The top two bits are 10, so build in the bottom six bits - under_construction <<= 6; - under_construction |= (ch & 0x3F); - } - else - { - // This is a poorly-formed encoding - goto done; - } - } - retval = under_construction; - - done: - return retval; - } - -void flipper(void) - { - for(int i=0; i<256; i++) - { - fprintf(stderr, "{0x%4.4x, 0x%2.2x}, ", cp1252_to_utf8_values[i], i); - if( (i % 8) == 7 ) - { - fprintf(stderr, "\n"); - } - } - } - -extern "C" -char __gg__ascii_to_ascii_chr(char ch) - { - return ch; - } - -extern "C" -char __gg__ascii_to_ebcdic_chr(char ch) - { - return (char)__gg__cp1252_to_cp1140_values[(ch&0xFF)]; - } - -extern "C" -char * -__gg__raw_to_ascii(char **dest, size_t *dest_size, const char *in, size_t length) - { - // We are anticipating `length` characters, some of which might be multi- - // character UTF-8 codepoints. We are sending back a nul-terminated string - // of SBC ASCII values. - - __gg__realloc_if_necessary(dest, dest_size, length+1); - - // This is the byte position of the output - size_t index = 0; - - // This is the byte position of the input - size_t position = 0; - - while( index < length ) - { - // In the case of "display "âêîôû", when the source code is encoded in - // UTF-8, the field->data.capacity is showing up as 10, because that - // UTF-8 string is ten bytes long, and the parser is not counting - // characters. The data.initial field is indeed nul-terminated, so when we - // hit a nul, we bug out: - if( in[position] == '\0' ) - { - // We have hit the end. We want to space-fill to the right: - while( index < length ) - { - (*dest)[index++] = internal_space; - } - break; - } - - // Special handling for PIC X VALUE HIGH-VALUE. If we just hand default - // 0xFF values to the rest of the routine, the utf-8 detection will give - // us a result that confuses the remainder of the processing. - if( (in[position]&0xFF) == 0xFF ) - { - (*dest)[index++] = in[position++]; - continue; - } - - if( raw_is_SBC() ) - { - (*dest)[index++] = in[position++]; - continue; - } - - size_t code_point; - // Pull the next code_point from the UTF-8 stream - long unicode_point = extract_next_code_point(reinterpret_cast(in), - length, - position ); - - // Check for that unicode code point in the subset of characters we - // know about: - auto it = utf8_to_cp1252_values.find(unicode_point); - if( it == utf8_to_cp1252_values.end() ) - { - // That unicode character isn't in our list - code_point = ASCII_REPLACEMENT; - } - else - { - code_point = it->second; - } - (*dest)[index++] = (char)code_point; - } - (*dest)[index++] = '\0'; - - return *dest; - } - -extern "C" -char * -__gg__raw_to_ebcdic(char **dest, size_t *dest_size, const char *in, size_t length) - { - // A UTF-8 string is at least as long as the single-byte-coded resulting - // string: - __gg__realloc_if_necessary(dest, dest_size, length+1); - - size_t index = 0; - - size_t position = 0; - size_t code_point; - while( index < length ) - { - // See comments in __gg__raw_to_ascii - if( in[position] == '\0' ) - { - // We have hit the end. We want to space-fill to the right: - while( index < length ) - { - (*dest)[index++] = internal_space; - } - break; - } - if( raw_is_SBC() ) - { - code_point = in[position++]; - long ebcdic_code_point = __gg__cp1252_to_cp1140_values[code_point&0xFF]; - (*dest)[index++] = ebcdic_code_point; - continue; - } - if( (in[position]&0xff) == 0xff ) - { - // HIGH-VALUE is a special case - (*dest)[index++] = in[position++]; - continue; - } - - // Pull the next code_point from the UTF-8 stream - long unicode_point = extract_next_code_point( reinterpret_cast(in), - length, - position ); - // Check for that unicode code point in the subset of characters we - // know about: - auto it = utf8_to_cp1252_values.find(unicode_point); - if( it == utf8_to_cp1252_values.end() ) - { - // That unicode character isn't in our list - code_point = ASCII_REPLACEMENT; - } - else - { - code_point = it->second; - } - // TODO: This could be sped up by creating a utf8_to_cp1140_values map. - // But sufficient unto the day are the evils thereof - long ebcdic_code_point = __gg__cp1252_to_cp1140_values[code_point&0xFF]; - (*dest)[index++] = ebcdic_code_point; - } - (*dest)[index++] = '\0'; - - return *dest; - } - -static -char * -convert_cp1252_to_utf8(char **dest, size_t *dest_size, const char *in, size_t length) - { - // Worst case is all unicode characters. - __gg__realloc_if_necessary(dest, dest_size, 4 * length + 1); - - size_t index = 0; - for(size_t i=0; i>6); - (*dest)[index++] = 0x80 + ((unicode_point>>0) & 0x3F); - } - else if(unicode_point < 0x10000) - { - // Three-byte: - (*dest)[index++] = 0xE0 + (unicode_point>>12); - (*dest)[index++] = 0x80 + ((unicode_point>>6) & 0x3F); - (*dest)[index++] = 0x80 + ((unicode_point>>0) & 0x3F); - } - else - { - // Four-byte: - (*dest)[index++] = 0xF0 + (unicode_point>>18); - (*dest)[index++] = 0x80 + ((unicode_point>>12) & 0x3F); - (*dest)[index++] = 0x80 + ((unicode_point>>6) & 0x3F); - (*dest)[index++] = 0x80 + ((unicode_point>>0) & 0x3F); - } - } - (*dest)[index++] = '\0'; - - return *dest; - } - -// This is the address of the 256-character map for internal characters -// It'll be set to one-to-one for ASCII, and to cp1252-to-cp1140_values for -// EBCDIC. -unsigned short const *__gg__internal_codeset_map; // Here is the list of function pointers establish which ones of the paired // possibilities of conversion routines are actually in use. -char (*__gg__ascii_to_internal_chr)(char); -void (*__gg__ascii_to_internal_str)(char *str, size_t length); -char *(*__gg__raw_to_internal)(char **dest, size_t *dest_size, const char *in, const size_t length); -char *(*__gg__internal_to_console_cm)(char **dest, size_t *dest_size, const char *in, size_t length); -void (*__gg__console_to_internal_cm)(char * const str, size_t length); -void (*__gg__internal_to_ascii)(char *str, size_t length); - extern "C" void __gg__set_internal_codeset(int use_ebcdic) { @@ -594,29 +252,6 @@ void __gg__text_conversion_override(text_device_t device, // Establish the default sourcecode and console codesets, and // establish the codeset conversion routines: - if( internal_is_ebcdic ) - { -// fprintf(stderr, "Setting up EBCDIC\n"); - __gg__internal_codeset_map = __gg__cp1252_to_cp1140_values; - __gg__ascii_to_internal_chr = &__gg__ascii_to_ebcdic_chr; - __gg__ascii_to_internal_str = &__gg__ascii_to_ebcdic; - __gg__raw_to_internal = &__gg__raw_to_ebcdic; - __gg__internal_to_console_cm = &__gg__ebcdic_to_console; - __gg__console_to_internal_cm = &__gg__console_to_ebcdic; - __gg__internal_to_ascii = &__gg__ebcdic_to_ascii; - } - else - { -// fprintf(stderr, "Setting up ASCII\n"); - __gg__internal_codeset_map = __gg__one_to_one_values; - __gg__ascii_to_internal_chr = &__gg__ascii_to_ascii_chr; - __gg__ascii_to_internal_str = &__gg__ascii_to_ascii; - __gg__raw_to_internal = &__gg__raw_to_ascii; - __gg__internal_to_console_cm = &__gg__ascii_to_console; - __gg__console_to_internal_cm = &__gg__console_to_ascii; - __gg__internal_to_ascii = &__gg__ascii_to_ascii; - } - switch(device) { case td_default_e: @@ -668,255 +303,1336 @@ void __gg__text_conversion_override(text_device_t device, } } -extern "C" -void -__gg__ascii_to_ascii(char *, size_t ) - { - return; - } +static encodings_t encodings[] = { + { iconv_437_e, "437" }, + { iconv_500_e, "500" }, + { iconv_500V1_e, "500V1" }, + { iconv_850_e, "850" }, + { iconv_851_e, "851" }, + { iconv_852_e, "852" }, + { iconv_855_e, "855" }, + { iconv_856_e, "856" }, + { iconv_857_e, "857" }, + { iconv_858_e, "858" }, + { iconv_860_e, "860" }, + { iconv_861_e, "861" }, + { iconv_862_e, "862" }, + { iconv_863_e, "863" }, + { iconv_864_e, "864" }, + { iconv_865_e, "865" }, + { iconv_866_e, "866" }, + { iconv_866NAV_e, "866NAV" }, + { iconv_869_e, "869" }, + { iconv_874_e, "874" }, + { iconv_904_e, "904" }, + { iconv_1026_e, "1026" }, + { iconv_1046_e, "1046" }, + { iconv_1047_e, "1047" }, + { iconv_8859_1_e, "8859_1" }, + { iconv_8859_2_e, "8859_2" }, + { iconv_8859_3_e, "8859_3" }, + { iconv_8859_4_e, "8859_4" }, + { iconv_8859_5_e, "8859_5" }, + { iconv_8859_6_e, "8859_6" }, + { iconv_8859_7_e, "8859_7" }, + { iconv_8859_8_e, "8859_8" }, + { iconv_8859_9_e, "8859_9" }, + { iconv_10646_1_1993_e, "10646-1:1993" }, + { iconv_10646_1_1993_e, "UCS4/ 10646-1:1993/UCS4/" }, + { iconv_ANSI_X3_4_1968_e, "ANSI_X3.4-1968" }, + { iconv_ANSI_X3_4_1986_e, "ANSI_X3.4-1986" }, + { iconv_ANSI_X3_4_e, "ANSI_X3.4" }, + { iconv_ANSI_X3_110_1983_e, "ANSI_X3.110-1983" }, + { iconv_ANSI_X3_110_e, "ANSI_X3.110" }, + { iconv_ARABIC_e, "ARABIC" }, + { iconv_ARABIC7_e, "ARABIC7" }, + { iconv_ARMSCII_8_e, "ARMSCII-8" }, + { iconv_ARMSCII8_e, "ARMSCII8" }, + { iconv_ASCII_e, "ASCII" }, + { iconv_ASMO_708_e, "ASMO-708" }, + { iconv_ASMO_449_e, "ASMO_449" }, + { iconv_BALTIC_e, "BALTIC" }, + { iconv_BIG_5_e, "BIG-5" }, + { iconv_BIG_FIVE_e, "BIG-FIVE" }, + { iconv_BIG5_HKSCS_e, "BIG5-HKSCS" }, + { iconv_BIG5_e, "BIG5" }, + { iconv_BIG5HKSCS_e, "BIG5HKSCS" }, + { iconv_BIGFIVE_e, "BIGFIVE" }, + { iconv_BRF_e, "BRF" }, + { iconv_BS_4730_e, "BS_4730" }, + { iconv_CA_e, "CA" }, + { iconv_CN_BIG5_e, "CN-BIG5" }, + { iconv_CN_GB_e, "CN-GB" }, + { iconv_CN_e, "CN" }, + { iconv_CP_AR_e, "CP-AR" }, + { iconv_CP_GR_e, "CP-GR" }, + { iconv_CP_HU_e, "CP-HU" }, + { iconv_CP037_e, "CP037" }, + { iconv_CP038_e, "CP038" }, + { iconv_CP273_e, "CP273" }, + { iconv_CP274_e, "CP274" }, + { iconv_CP275_e, "CP275" }, + { iconv_CP278_e, "CP278" }, + { iconv_CP280_e, "CP280" }, + { iconv_CP281_e, "CP281" }, + { iconv_CP282_e, "CP282" }, + { iconv_CP284_e, "CP284" }, + { iconv_CP285_e, "CP285" }, + { iconv_CP290_e, "CP290" }, + { iconv_CP297_e, "CP297" }, + { iconv_CP367_e, "CP367" }, + { iconv_CP420_e, "CP420" }, + { iconv_CP423_e, "CP423" }, + { iconv_CP424_e, "CP424" }, + { iconv_CP437_e, "CP437" }, + { iconv_CP500_e, "CP500" }, + { iconv_CP737_e, "CP737" }, + { iconv_CP770_e, "CP770" }, + { iconv_CP771_e, "CP771" }, + { iconv_CP772_e, "CP772" }, + { iconv_CP773_e, "CP773" }, + { iconv_CP774_e, "CP774" }, + { iconv_CP775_e, "CP775" }, + { iconv_CP803_e, "CP803" }, + { iconv_CP813_e, "CP813" }, + { iconv_CP819_e, "CP819" }, + { iconv_CP850_e, "CP850" }, + { iconv_CP851_e, "CP851" }, + { iconv_CP852_e, "CP852" }, + { iconv_CP855_e, "CP855" }, + { iconv_CP856_e, "CP856" }, + { iconv_CP857_e, "CP857" }, + { iconv_CP858_e, "CP858" }, + { iconv_CP860_e, "CP860" }, + { iconv_CP861_e, "CP861" }, + { iconv_CP862_e, "CP862" }, + { iconv_CP863_e, "CP863" }, + { iconv_CP864_e, "CP864" }, + { iconv_CP865_e, "CP865" }, + { iconv_CP866_e, "CP866" }, + { iconv_CP866NAV_e, "CP866NAV" }, + { iconv_CP868_e, "CP868" }, + { iconv_CP869_e, "CP869" }, + { iconv_CP870_e, "CP870" }, + { iconv_CP871_e, "CP871" }, + { iconv_CP874_e, "CP874" }, + { iconv_CP875_e, "CP875" }, + { iconv_CP880_e, "CP880" }, + { iconv_CP891_e, "CP891" }, + { iconv_CP901_e, "CP901" }, + { iconv_CP902_e, "CP902" }, + { iconv_CP903_e, "CP903" }, + { iconv_CP904_e, "CP904" }, + { iconv_CP905_e, "CP905" }, + { iconv_CP912_e, "CP912" }, + { iconv_CP915_e, "CP915" }, + { iconv_CP916_e, "CP916" }, + { iconv_CP918_e, "CP918" }, + { iconv_CP920_e, "CP920" }, + { iconv_CP921_e, "CP921" }, + { iconv_CP922_e, "CP922" }, + { iconv_CP930_e, "CP930" }, + { iconv_CP932_e, "CP932" }, + { iconv_CP933_e, "CP933" }, + { iconv_CP935_e, "CP935" }, + { iconv_CP936_e, "CP936" }, + { iconv_CP937_e, "CP937" }, + { iconv_CP939_e, "CP939" }, + { iconv_CP949_e, "CP949" }, + { iconv_CP950_e, "CP950" }, + { iconv_CP1004_e, "CP1004" }, + { iconv_CP1008_e, "CP1008" }, + { iconv_CP1025_e, "CP1025" }, + { iconv_CP1026_e, "CP1026" }, + { iconv_CP1046_e, "CP1046" }, + { iconv_CP1047_e, "CP1047" }, + { iconv_CP1070_e, "CP1070" }, + { iconv_CP1079_e, "CP1079" }, + { iconv_CP1081_e, "CP1081" }, + { iconv_CP1084_e, "CP1084" }, + { iconv_CP1089_e, "CP1089" }, + { iconv_CP1097_e, "CP1097" }, + { iconv_CP1112_e, "CP1112" }, + { iconv_CP1122_e, "CP1122" }, + { iconv_CP1123_e, "CP1123" }, + { iconv_CP1124_e, "CP1124" }, + { iconv_CP1125_e, "CP1125" }, + { iconv_CP1129_e, "CP1129" }, + { iconv_CP1130_e, "CP1130" }, + { iconv_CP1132_e, "CP1132" }, + { iconv_CP1133_e, "CP1133" }, + { iconv_CP1137_e, "CP1137" }, + { iconv_CP1140_e, "CP1140" }, + { iconv_CP1141_e, "CP1141" }, + { iconv_CP1142_e, "CP1142" }, + { iconv_CP1143_e, "CP1143" }, + { iconv_CP1144_e, "CP1144" }, + { iconv_CP1145_e, "CP1145" }, + { iconv_CP1146_e, "CP1146" }, + { iconv_CP1147_e, "CP1147" }, + { iconv_CP1148_e, "CP1148" }, + { iconv_CP1149_e, "CP1149" }, + { iconv_CP1153_e, "CP1153" }, + { iconv_CP1154_e, "CP1154" }, + { iconv_CP1155_e, "CP1155" }, + { iconv_CP1156_e, "CP1156" }, + { iconv_CP1157_e, "CP1157" }, + { iconv_CP1158_e, "CP1158" }, + { iconv_CP1160_e, "CP1160" }, + { iconv_CP1161_e, "CP1161" }, + { iconv_CP1162_e, "CP1162" }, + { iconv_CP1163_e, "CP1163" }, + { iconv_CP1164_e, "CP1164" }, + { iconv_CP1166_e, "CP1166" }, + { iconv_CP1167_e, "CP1167" }, + { iconv_CP1250_e, "CP1250" }, + { iconv_CP1251_e, "CP1251" }, + { iconv_CP1252_e, "CP1252" }, + { iconv_CP1253_e, "CP1253" }, + { iconv_CP1254_e, "CP1254" }, + { iconv_CP1255_e, "CP1255" }, + { iconv_CP1256_e, "CP1256" }, + { iconv_CP1257_e, "CP1257" }, + { iconv_CP1258_e, "CP1258" }, + { iconv_CP1282_e, "CP1282" }, + { iconv_CP1361_e, "CP1361" }, + { iconv_CP1364_e, "CP1364" }, + { iconv_CP1371_e, "CP1371" }, + { iconv_CP1388_e, "CP1388" }, + { iconv_CP1390_e, "CP1390" }, + { iconv_CP1399_e, "CP1399" }, + { iconv_CP4517_e, "CP4517" }, + { iconv_CP4899_e, "CP4899" }, + { iconv_CP4909_e, "CP4909" }, + { iconv_CP4971_e, "CP4971" }, + { iconv_CP5347_e, "CP5347" }, + { iconv_CP9030_e, "CP9030" }, + { iconv_CP9066_e, "CP9066" }, + { iconv_CP9448_e, "CP9448" }, + { iconv_CP10007_e, "CP10007" }, + { iconv_CP12712_e, "CP12712" }, + { iconv_CP16804_e, "CP16804" }, + { iconv_CPIBM861_e, "CPIBM861" }, + { iconv_CSA7_1_e, "CSA7-1" }, + { iconv_CSA7_2_e, "CSA7-2" }, + { iconv_CSASCII_e, "CSASCII" }, + { iconv_CSA_T500_1983_e, "CSA_T500-1983" }, + { iconv_CSA_T500_e, "CSA_T500" }, + { iconv_CSA_Z243_4_1985_1_e, "CSA_Z243.4-1985-1" }, + { iconv_CSA_Z243_4_1985_2_e, "CSA_Z243.4-1985-2" }, + { iconv_CSA_Z243_419851_e, "CSA_Z243.419851" }, + { iconv_CSA_Z243_419852_e, "CSA_Z243.419852" }, + { iconv_CSDECMCS_e, "CSDECMCS" }, + { iconv_CSEBCDICATDE_e, "CSEBCDICATDE" }, + { iconv_CSEBCDICATDEA_e, "CSEBCDICATDEA" }, + { iconv_CSEBCDICCAFR_e, "CSEBCDICCAFR" }, + { iconv_CSEBCDICDKNO_e, "CSEBCDICDKNO" }, + { iconv_CSEBCDICDKNOA_e, "CSEBCDICDKNOA" }, + { iconv_CSEBCDICES_e, "CSEBCDICES" }, + { iconv_CSEBCDICESA_e, "CSEBCDICESA" }, + { iconv_CSEBCDICESS_e, "CSEBCDICESS" }, + { iconv_CSEBCDICFISE_e, "CSEBCDICFISE" }, + { iconv_CSEBCDICFISEA_e, "CSEBCDICFISEA" }, + { iconv_CSEBCDICFR_e, "CSEBCDICFR" }, + { iconv_CSEBCDICIT_e, "CSEBCDICIT" }, + { iconv_CSEBCDICPT_e, "CSEBCDICPT" }, + { iconv_CSEBCDICUK_e, "CSEBCDICUK" }, + { iconv_CSEBCDICUS_e, "CSEBCDICUS" }, + { iconv_CSEUCKR_e, "CSEUCKR" }, + { iconv_CSEUCPKDFMTJAPANESE_e, "CSEUCPKDFMTJAPANESE" }, + { iconv_CSGB2312_e, "CSGB2312" }, + { iconv_CSHPROMAN8_e, "CSHPROMAN8" }, + { iconv_CSIBM037_e, "CSIBM037" }, + { iconv_CSIBM038_e, "CSIBM038" }, + { iconv_CSIBM273_e, "CSIBM273" }, + { iconv_CSIBM274_e, "CSIBM274" }, + { iconv_CSIBM275_e, "CSIBM275" }, + { iconv_CSIBM277_e, "CSIBM277" }, + { iconv_CSIBM278_e, "CSIBM278" }, + { iconv_CSIBM280_e, "CSIBM280" }, + { iconv_CSIBM281_e, "CSIBM281" }, + { iconv_CSIBM284_e, "CSIBM284" }, + { iconv_CSIBM285_e, "CSIBM285" }, + { iconv_CSIBM290_e, "CSIBM290" }, + { iconv_CSIBM297_e, "CSIBM297" }, + { iconv_CSIBM420_e, "CSIBM420" }, + { iconv_CSIBM423_e, "CSIBM423" }, + { iconv_CSIBM424_e, "CSIBM424" }, + { iconv_CSIBM500_e, "CSIBM500" }, + { iconv_CSIBM803_e, "CSIBM803" }, + { iconv_CSIBM851_e, "CSIBM851" }, + { iconv_CSIBM855_e, "CSIBM855" }, + { iconv_CSIBM856_e, "CSIBM856" }, + { iconv_CSIBM857_e, "CSIBM857" }, + { iconv_CSIBM860_e, "CSIBM860" }, + { iconv_CSIBM863_e, "CSIBM863" }, + { iconv_CSIBM864_e, "CSIBM864" }, + { iconv_CSIBM865_e, "CSIBM865" }, + { iconv_CSIBM866_e, "CSIBM866" }, + { iconv_CSIBM868_e, "CSIBM868" }, + { iconv_CSIBM869_e, "CSIBM869" }, + { iconv_CSIBM870_e, "CSIBM870" }, + { iconv_CSIBM871_e, "CSIBM871" }, + { iconv_CSIBM880_e, "CSIBM880" }, + { iconv_CSIBM891_e, "CSIBM891" }, + { iconv_CSIBM901_e, "CSIBM901" }, + { iconv_CSIBM902_e, "CSIBM902" }, + { iconv_CSIBM903_e, "CSIBM903" }, + { iconv_CSIBM904_e, "CSIBM904" }, + { iconv_CSIBM905_e, "CSIBM905" }, + { iconv_CSIBM918_e, "CSIBM918" }, + { iconv_CSIBM921_e, "CSIBM921" }, + { iconv_CSIBM922_e, "CSIBM922" }, + { iconv_CSIBM930_e, "CSIBM930" }, + { iconv_CSIBM932_e, "CSIBM932" }, + { iconv_CSIBM933_e, "CSIBM933" }, + { iconv_CSIBM935_e, "CSIBM935" }, + { iconv_CSIBM937_e, "CSIBM937" }, + { iconv_CSIBM939_e, "CSIBM939" }, + { iconv_CSIBM943_e, "CSIBM943" }, + { iconv_CSIBM1008_e, "CSIBM1008" }, + { iconv_CSIBM1025_e, "CSIBM1025" }, + { iconv_CSIBM1026_e, "CSIBM1026" }, + { iconv_CSIBM1097_e, "CSIBM1097" }, + { iconv_CSIBM1112_e, "CSIBM1112" }, + { iconv_CSIBM1122_e, "CSIBM1122" }, + { iconv_CSIBM1123_e, "CSIBM1123" }, + { iconv_CSIBM1124_e, "CSIBM1124" }, + { iconv_CSIBM1129_e, "CSIBM1129" }, + { iconv_CSIBM1130_e, "CSIBM1130" }, + { iconv_CSIBM1132_e, "CSIBM1132" }, + { iconv_CSIBM1133_e, "CSIBM1133" }, + { iconv_CSIBM1137_e, "CSIBM1137" }, + { iconv_CSIBM1140_e, "CSIBM1140" }, + { iconv_CSIBM1141_e, "CSIBM1141" }, + { iconv_CSIBM1142_e, "CSIBM1142" }, + { iconv_CSIBM1143_e, "CSIBM1143" }, + { iconv_CSIBM1144_e, "CSIBM1144" }, + { iconv_CSIBM1145_e, "CSIBM1145" }, + { iconv_CSIBM1146_e, "CSIBM1146" }, + { iconv_CSIBM1147_e, "CSIBM1147" }, + { iconv_CSIBM1148_e, "CSIBM1148" }, + { iconv_CSIBM1149_e, "CSIBM1149" }, + { iconv_CSIBM1153_e, "CSIBM1153" }, + { iconv_CSIBM1154_e, "CSIBM1154" }, + { iconv_CSIBM1155_e, "CSIBM1155" }, + { iconv_CSIBM1156_e, "CSIBM1156" }, + { iconv_CSIBM1157_e, "CSIBM1157" }, + { iconv_CSIBM1158_e, "CSIBM1158" }, + { iconv_CSIBM1160_e, "CSIBM1160" }, + { iconv_CSIBM1161_e, "CSIBM1161" }, + { iconv_CSIBM1163_e, "CSIBM1163" }, + { iconv_CSIBM1164_e, "CSIBM1164" }, + { iconv_CSIBM1166_e, "CSIBM1166" }, + { iconv_CSIBM1167_e, "CSIBM1167" }, + { iconv_CSIBM1364_e, "CSIBM1364" }, + { iconv_CSIBM1371_e, "CSIBM1371" }, + { iconv_CSIBM1388_e, "CSIBM1388" }, + { iconv_CSIBM1390_e, "CSIBM1390" }, + { iconv_CSIBM1399_e, "CSIBM1399" }, + { iconv_CSIBM4517_e, "CSIBM4517" }, + { iconv_CSIBM4899_e, "CSIBM4899" }, + { iconv_CSIBM4909_e, "CSIBM4909" }, + { iconv_CSIBM4971_e, "CSIBM4971" }, + { iconv_CSIBM5347_e, "CSIBM5347" }, + { iconv_CSIBM9030_e, "CSIBM9030" }, + { iconv_CSIBM9066_e, "CSIBM9066" }, + { iconv_CSIBM9448_e, "CSIBM9448" }, + { iconv_CSIBM12712_e, "CSIBM12712" }, + { iconv_CSIBM16804_e, "CSIBM16804" }, + { iconv_CSIBM11621162_e, "CSIBM11621162" }, + { iconv_CSISO4UNITEDKINGDOM_e, "CSISO4UNITEDKINGDOM" }, + { iconv_CSISO10SWEDISH_e, "CSISO10SWEDISH" }, + { iconv_CSISO11SWEDISHFORNAMES_e, "CSISO11SWEDISHFORNAMES" }, + { iconv_CSISO14JISC6220RO_e, "CSISO14JISC6220RO" }, + { iconv_CSISO15ITALIAN_e, "CSISO15ITALIAN" }, + { iconv_CSISO16PORTUGESE_e, "CSISO16PORTUGESE" }, + { iconv_CSISO17SPANISH_e, "CSISO17SPANISH" }, + { iconv_CSISO18GREEK7OLD_e, "CSISO18GREEK7OLD" }, + { iconv_CSISO19LATINGREEK_e, "CSISO19LATINGREEK" }, + { iconv_CSISO21GERMAN_e, "CSISO21GERMAN" }, + { iconv_CSISO25FRENCH_e, "CSISO25FRENCH" }, + { iconv_CSISO27LATINGREEK1_e, "CSISO27LATINGREEK1" }, + { iconv_CSISO49INIS_e, "CSISO49INIS" }, + { iconv_CSISO50INIS8_e, "CSISO50INIS8" }, + { iconv_CSISO51INISCYRILLIC_e, "CSISO51INISCYRILLIC" }, + { iconv_CSISO58GB1988_e, "CSISO58GB1988" }, + { iconv_CSISO60DANISHNORWEGIAN_e, "CSISO60DANISHNORWEGIAN" }, + { iconv_CSISO60NORWEGIAN1_e, "CSISO60NORWEGIAN1" }, + { iconv_CSISO61NORWEGIAN2_e, "CSISO61NORWEGIAN2" }, + { iconv_CSISO69FRENCH_e, "CSISO69FRENCH" }, + { iconv_CSISO84PORTUGUESE2_e, "CSISO84PORTUGUESE2" }, + { iconv_CSISO85SPANISH2_e, "CSISO85SPANISH2" }, + { iconv_CSISO86HUNGARIAN_e, "CSISO86HUNGARIAN" }, + { iconv_CSISO88GREEK7_e, "CSISO88GREEK7" }, + { iconv_CSISO89ASMO449_e, "CSISO89ASMO449" }, + { iconv_CSISO90_e, "CSISO90" }, + { iconv_CSISO92JISC62991984B_e, "CSISO92JISC62991984B" }, + { iconv_CSISO99NAPLPS_e, "CSISO99NAPLPS" }, + { iconv_CSISO103T618BIT_e, "CSISO103T618BIT" }, + { iconv_CSISO111ECMACYRILLIC_e, "CSISO111ECMACYRILLIC" }, + { iconv_CSISO121CANADIAN1_e, "CSISO121CANADIAN1" }, + { iconv_CSISO122CANADIAN2_e, "CSISO122CANADIAN2" }, + { iconv_CSISO139CSN369103_e, "CSISO139CSN369103" }, + { iconv_CSISO141JUSIB1002_e, "CSISO141JUSIB1002" }, + { iconv_CSISO143IECP271_e, "CSISO143IECP271" }, + { iconv_CSISO150_e, "CSISO150" }, + { iconv_CSISO150GREEKCCITT_e, "CSISO150GREEKCCITT" }, + { iconv_CSISO151CUBA_e, "CSISO151CUBA" }, + { iconv_CSISO153GOST1976874_e, "CSISO153GOST1976874" }, + { iconv_CSISO646DANISH_e, "CSISO646DANISH" }, + { iconv_CSISO2022CN_e, "CSISO2022CN" }, + { iconv_CSISO2022JP_e, "CSISO2022JP" }, + { iconv_CSISO2022JP2_e, "CSISO2022JP2" }, + { iconv_CSISO2022KR_e, "CSISO2022KR" }, + { iconv_CSISO2033_e, "CSISO2033" }, + { iconv_CSISO5427CYRILLIC_e, "CSISO5427CYRILLIC" }, + { iconv_CSISO5427CYRILLIC1981_e, "CSISO5427CYRILLIC1981" }, + { iconv_CSISO5428GREEK_e, "CSISO5428GREEK" }, + { iconv_CSISO10367BOX_e, "CSISO10367BOX" }, + { iconv_CSISOLATIN1_e, "CSISOLATIN1" }, + { iconv_CSISOLATIN2_e, "CSISOLATIN2" }, + { iconv_CSISOLATIN3_e, "CSISOLATIN3" }, + { iconv_CSISOLATIN4_e, "CSISOLATIN4" }, + { iconv_CSISOLATIN5_e, "CSISOLATIN5" }, + { iconv_CSISOLATIN6_e, "CSISOLATIN6" }, + { iconv_CSISOLATINARABIC_e, "CSISOLATINARABIC" }, + { iconv_CSISOLATINCYRILLIC_e, "CSISOLATINCYRILLIC" }, + { iconv_CSISOLATINGREEK_e, "CSISOLATINGREEK" }, + { iconv_CSISOLATINHEBREW_e, "CSISOLATINHEBREW" }, + { iconv_CSKOI8R_e, "CSKOI8R" }, + { iconv_CSKSC5636_e, "CSKSC5636" }, + { iconv_CSMACINTOSH_e, "CSMACINTOSH" }, + { iconv_CSNATSDANO_e, "CSNATSDANO" }, + { iconv_CSNATSSEFI_e, "CSNATSSEFI" }, + { iconv_CSN_369103_e, "CSN_369103" }, + { iconv_CSPC8CODEPAGE437_e, "CSPC8CODEPAGE437" }, + { iconv_CSPC775BALTIC_e, "CSPC775BALTIC" }, + { iconv_CSPC850MULTILINGUAL_e, "CSPC850MULTILINGUAL" }, + { iconv_CSPC858MULTILINGUAL_e, "CSPC858MULTILINGUAL" }, + { iconv_CSPC862LATINHEBREW_e, "CSPC862LATINHEBREW" }, + { iconv_CSPCP852_e, "CSPCP852" }, + { iconv_CSSHIFTJIS_e, "CSSHIFTJIS" }, + { iconv_CSUCS4_e, "CSUCS4" }, + { iconv_CSUNICODE_e, "CSUNICODE" }, + { iconv_CSWINDOWS31J_e, "CSWINDOWS31J" }, + { iconv_CUBA_e, "CUBA" }, + { iconv_CWI_2_e, "CWI-2" }, + { iconv_CWI_e, "CWI" }, + { iconv_CYRILLIC_e, "CYRILLIC" }, + { iconv_DE_e, "DE" }, + { iconv_DEC_MCS_e, "DEC-MCS" }, + { iconv_DEC_e, "DEC" }, + { iconv_DECMCS_e, "DECMCS" }, + { iconv_DIN_66003_e, "DIN_66003" }, + { iconv_DK_e, "DK" }, + { iconv_DS2089_e, "DS2089" }, + { iconv_DS_2089_e, "DS_2089" }, + { iconv_E13B_e, "E13B" }, + { iconv_EBCDIC_AT_DE_A_e, "EBCDIC-AT-DE-A" }, + { iconv_EBCDIC_AT_DE_e, "EBCDIC-AT-DE" }, + { iconv_EBCDIC_BE_e, "EBCDIC-BE" }, + { iconv_EBCDIC_BR_e, "EBCDIC-BR" }, + { iconv_EBCDIC_CA_FR_e, "EBCDIC-CA-FR" }, + { iconv_EBCDIC_CP_AR1_e, "EBCDIC-CP-AR1" }, + { iconv_EBCDIC_CP_AR2_e, "EBCDIC-CP-AR2" }, + { iconv_EBCDIC_CP_BE_e, "EBCDIC-CP-BE" }, + { iconv_EBCDIC_CP_CA_e, "EBCDIC-CP-CA" }, + { iconv_EBCDIC_CP_CH_e, "EBCDIC-CP-CH" }, + { iconv_EBCDIC_CP_DK_e, "EBCDIC-CP-DK" }, + { iconv_EBCDIC_CP_ES_e, "EBCDIC-CP-ES" }, + { iconv_EBCDIC_CP_FI_e, "EBCDIC-CP-FI" }, + { iconv_EBCDIC_CP_FR_e, "EBCDIC-CP-FR" }, + { iconv_EBCDIC_CP_GB_e, "EBCDIC-CP-GB" }, + { iconv_EBCDIC_CP_GR_e, "EBCDIC-CP-GR" }, + { iconv_EBCDIC_CP_HE_e, "EBCDIC-CP-HE" }, + { iconv_EBCDIC_CP_IS_e, "EBCDIC-CP-IS" }, + { iconv_EBCDIC_CP_IT_e, "EBCDIC-CP-IT" }, + { iconv_EBCDIC_CP_NL_e, "EBCDIC-CP-NL" }, + { iconv_EBCDIC_CP_NO_e, "EBCDIC-CP-NO" }, + { iconv_EBCDIC_CP_ROECE_e, "EBCDIC-CP-ROECE" }, + { iconv_EBCDIC_CP_SE_e, "EBCDIC-CP-SE" }, + { iconv_EBCDIC_CP_TR_e, "EBCDIC-CP-TR" }, + { iconv_EBCDIC_CP_US_e, "EBCDIC-CP-US" }, + { iconv_EBCDIC_CP_WT_e, "EBCDIC-CP-WT" }, + { iconv_EBCDIC_CP_YU_e, "EBCDIC-CP-YU" }, + { iconv_EBCDIC_CYRILLIC_e, "EBCDIC-CYRILLIC" }, + { iconv_EBCDIC_DK_NO_A_e, "EBCDIC-DK-NO-A" }, + { iconv_EBCDIC_DK_NO_e, "EBCDIC-DK-NO" }, + { iconv_EBCDIC_ES_A_e, "EBCDIC-ES-A" }, + { iconv_EBCDIC_ES_S_e, "EBCDIC-ES-S" }, + { iconv_EBCDIC_ES_e, "EBCDIC-ES" }, + { iconv_EBCDIC_FI_SE_A_e, "EBCDIC-FI-SE-A" }, + { iconv_EBCDIC_FI_SE_e, "EBCDIC-FI-SE" }, + { iconv_EBCDIC_FR_e, "EBCDIC-FR" }, + { iconv_EBCDIC_GREEK_e, "EBCDIC-GREEK" }, + { iconv_EBCDIC_INT_e, "EBCDIC-INT" }, + { iconv_EBCDIC_INT1_e, "EBCDIC-INT1" }, + { iconv_EBCDIC_IS_FRISS_e, "EBCDIC-IS-FRISS" }, + { iconv_EBCDIC_IT_e, "EBCDIC-IT" }, + { iconv_EBCDIC_JP_E_e, "EBCDIC-JP-E" }, + { iconv_EBCDIC_JP_KANA_e, "EBCDIC-JP-KANA" }, + { iconv_EBCDIC_PT_e, "EBCDIC-PT" }, + { iconv_EBCDIC_UK_e, "EBCDIC-UK" }, + { iconv_EBCDIC_US_e, "EBCDIC-US" }, + { iconv_EBCDICATDE_e, "EBCDICATDE" }, + { iconv_EBCDICATDEA_e, "EBCDICATDEA" }, + { iconv_EBCDICCAFR_e, "EBCDICCAFR" }, + { iconv_EBCDICDKNO_e, "EBCDICDKNO" }, + { iconv_EBCDICDKNOA_e, "EBCDICDKNOA" }, + { iconv_EBCDICES_e, "EBCDICES" }, + { iconv_EBCDICESA_e, "EBCDICESA" }, + { iconv_EBCDICESS_e, "EBCDICESS" }, + { iconv_EBCDICFISE_e, "EBCDICFISE" }, + { iconv_EBCDICFISEA_e, "EBCDICFISEA" }, + { iconv_EBCDICFR_e, "EBCDICFR" }, + { iconv_EBCDICISFRISS_e, "EBCDICISFRISS" }, + { iconv_EBCDICIT_e, "EBCDICIT" }, + { iconv_EBCDICPT_e, "EBCDICPT" }, + { iconv_EBCDICUK_e, "EBCDICUK" }, + { iconv_EBCDICUS_e, "EBCDICUS" }, + { iconv_ECMA_114_e, "ECMA-114" }, + { iconv_ECMA_118_e, "ECMA-118" }, + { iconv_ECMA_128_e, "ECMA-128" }, + { iconv_ECMA_CYRILLIC_e, "ECMA-CYRILLIC" }, + { iconv_ECMACYRILLIC_e, "ECMACYRILLIC" }, + { iconv_ELOT_928_e, "ELOT_928" }, + { iconv_ES_e, "ES" }, + { iconv_ES2_e, "ES2" }, + { iconv_EUC_CN_e, "EUC-CN" }, + { iconv_EUC_JISX0213_e, "EUC-JISX0213" }, + { iconv_EUC_JP_MS_e, "EUC-JP-MS" }, + { iconv_EUC_JP_e, "EUC-JP" }, + { iconv_EUC_KR_e, "EUC-KR" }, + { iconv_EUC_TW_e, "EUC-TW" }, + { iconv_EUCCN_e, "EUCCN" }, + { iconv_EUCJP_MS_e, "EUCJP-MS" }, + { iconv_EUCJP_OPEN_e, "EUCJP-OPEN" }, + { iconv_EUCJP_WIN_e, "EUCJP-WIN" }, + { iconv_EUCJP_e, "EUCJP" }, + { iconv_EUCKR_e, "EUCKR" }, + { iconv_EUCTW_e, "EUCTW" }, + { iconv_FI_e, "FI" }, + { iconv_FR_e, "FR" }, + { iconv_GB_e, "GB" }, + { iconv_GB2312_e, "GB2312" }, + { iconv_GB13000_e, "GB13000" }, + { iconv_GB18030_e, "GB18030" }, + { iconv_GBK_e, "GBK" }, + { iconv_GB_1988_80_e, "GB_1988-80" }, + { iconv_GB_198880_e, "GB_198880" }, + { iconv_GEORGIAN_ACADEMY_e, "GEORGIAN-ACADEMY" }, + { iconv_GEORGIAN_PS_e, "GEORGIAN-PS" }, + { iconv_GOST_19768_74_e, "GOST_19768-74" }, + { iconv_GOST_19768_e, "GOST_19768" }, + { iconv_GOST_1976874_e, "GOST_1976874" }, + { iconv_GREEK_CCITT_e, "GREEK-CCITT" }, + { iconv_GREEK_e, "GREEK" }, + { iconv_GREEK7_OLD_e, "GREEK7-OLD" }, + { iconv_GREEK7_e, "GREEK7" }, + { iconv_GREEK7OLD_e, "GREEK7OLD" }, + { iconv_GREEK8_e, "GREEK8" }, + { iconv_GREEKCCITT_e, "GREEKCCITT" }, + { iconv_HEBREW_e, "HEBREW" }, + { iconv_HP_GREEK8_e, "HP-GREEK8" }, + { iconv_HP_ROMAN8_e, "HP-ROMAN8" }, + { iconv_HP_ROMAN9_e, "HP-ROMAN9" }, + { iconv_HP_THAI8_e, "HP-THAI8" }, + { iconv_HP_TURKISH8_e, "HP-TURKISH8" }, + { iconv_HPGREEK8_e, "HPGREEK8" }, + { iconv_HPROMAN8_e, "HPROMAN8" }, + { iconv_HPROMAN9_e, "HPROMAN9" }, + { iconv_HPTHAI8_e, "HPTHAI8" }, + { iconv_HPTURKISH8_e, "HPTURKISH8" }, + { iconv_HU_e, "HU" }, + { iconv_IBM_803_e, "IBM-803" }, + { iconv_IBM_856_e, "IBM-856" }, + { iconv_IBM_901_e, "IBM-901" }, + { iconv_IBM_902_e, "IBM-902" }, + { iconv_IBM_921_e, "IBM-921" }, + { iconv_IBM_922_e, "IBM-922" }, + { iconv_IBM_930_e, "IBM-930" }, + { iconv_IBM_932_e, "IBM-932" }, + { iconv_IBM_933_e, "IBM-933" }, + { iconv_IBM_935_e, "IBM-935" }, + { iconv_IBM_937_e, "IBM-937" }, + { iconv_IBM_939_e, "IBM-939" }, + { iconv_IBM_943_e, "IBM-943" }, + { iconv_IBM_1008_e, "IBM-1008" }, + { iconv_IBM_1025_e, "IBM-1025" }, + { iconv_IBM_1046_e, "IBM-1046" }, + { iconv_IBM_1047_e, "IBM-1047" }, + { iconv_IBM_1097_e, "IBM-1097" }, + { iconv_IBM_1112_e, "IBM-1112" }, + { iconv_IBM_1122_e, "IBM-1122" }, + { iconv_IBM_1123_e, "IBM-1123" }, + { iconv_IBM_1124_e, "IBM-1124" }, + { iconv_IBM_1129_e, "IBM-1129" }, + { iconv_IBM_1130_e, "IBM-1130" }, + { iconv_IBM_1132_e, "IBM-1132" }, + { iconv_IBM_1133_e, "IBM-1133" }, + { iconv_IBM_1137_e, "IBM-1137" }, + { iconv_IBM_1140_e, "IBM-1140" }, + { iconv_IBM_1141_e, "IBM-1141" }, + { iconv_IBM_1142_e, "IBM-1142" }, + { iconv_IBM_1143_e, "IBM-1143" }, + { iconv_IBM_1144_e, "IBM-1144" }, + { iconv_IBM_1145_e, "IBM-1145" }, + { iconv_IBM_1146_e, "IBM-1146" }, + { iconv_IBM_1147_e, "IBM-1147" }, + { iconv_IBM_1148_e, "IBM-1148" }, + { iconv_IBM_1149_e, "IBM-1149" }, + { iconv_IBM_1153_e, "IBM-1153" }, + { iconv_IBM_1154_e, "IBM-1154" }, + { iconv_IBM_1155_e, "IBM-1155" }, + { iconv_IBM_1156_e, "IBM-1156" }, + { iconv_IBM_1157_e, "IBM-1157" }, + { iconv_IBM_1158_e, "IBM-1158" }, + { iconv_IBM_1160_e, "IBM-1160" }, + { iconv_IBM_1161_e, "IBM-1161" }, + { iconv_IBM_1162_e, "IBM-1162" }, + { iconv_IBM_1163_e, "IBM-1163" }, + { iconv_IBM_1164_e, "IBM-1164" }, + { iconv_IBM_1166_e, "IBM-1166" }, + { iconv_IBM_1167_e, "IBM-1167" }, + { iconv_IBM_1364_e, "IBM-1364" }, + { iconv_IBM_1371_e, "IBM-1371" }, + { iconv_IBM_1388_e, "IBM-1388" }, + { iconv_IBM_1390_e, "IBM-1390" }, + { iconv_IBM_1399_e, "IBM-1399" }, + { iconv_IBM_4517_e, "IBM-4517" }, + { iconv_IBM_4899_e, "IBM-4899" }, + { iconv_IBM_4909_e, "IBM-4909" }, + { iconv_IBM_4971_e, "IBM-4971" }, + { iconv_IBM_5347_e, "IBM-5347" }, + { iconv_IBM_9030_e, "IBM-9030" }, + { iconv_IBM_9066_e, "IBM-9066" }, + { iconv_IBM_9448_e, "IBM-9448" }, + { iconv_IBM_12712_e, "IBM-12712" }, + { iconv_IBM_16804_e, "IBM-16804" }, + { iconv_IBM037_e, "IBM037" }, + { iconv_IBM038_e, "IBM038" }, + { iconv_IBM256_e, "IBM256" }, + { iconv_IBM273_e, "IBM273" }, + { iconv_IBM274_e, "IBM274" }, + { iconv_IBM275_e, "IBM275" }, + { iconv_IBM277_e, "IBM277" }, + { iconv_IBM278_e, "IBM278" }, + { iconv_IBM280_e, "IBM280" }, + { iconv_IBM281_e, "IBM281" }, + { iconv_IBM284_e, "IBM284" }, + { iconv_IBM285_e, "IBM285" }, + { iconv_IBM290_e, "IBM290" }, + { iconv_IBM297_e, "IBM297" }, + { iconv_IBM367_e, "IBM367" }, + { iconv_IBM420_e, "IBM420" }, + { iconv_IBM423_e, "IBM423" }, + { iconv_IBM424_e, "IBM424" }, + { iconv_IBM437_e, "IBM437" }, + { iconv_IBM500_e, "IBM500" }, + { iconv_IBM775_e, "IBM775" }, + { iconv_IBM803_e, "IBM803" }, + { iconv_IBM813_e, "IBM813" }, + { iconv_IBM819_e, "IBM819" }, + { iconv_IBM848_e, "IBM848" }, + { iconv_IBM850_e, "IBM850" }, + { iconv_IBM851_e, "IBM851" }, + { iconv_IBM852_e, "IBM852" }, + { iconv_IBM855_e, "IBM855" }, + { iconv_IBM856_e, "IBM856" }, + { iconv_IBM857_e, "IBM857" }, + { iconv_IBM858_e, "IBM858" }, + { iconv_IBM860_e, "IBM860" }, + { iconv_IBM861_e, "IBM861" }, + { iconv_IBM862_e, "IBM862" }, + { iconv_IBM863_e, "IBM863" }, + { iconv_IBM864_e, "IBM864" }, + { iconv_IBM865_e, "IBM865" }, + { iconv_IBM866_e, "IBM866" }, + { iconv_IBM866NAV_e, "IBM866NAV" }, + { iconv_IBM868_e, "IBM868" }, + { iconv_IBM869_e, "IBM869" }, + { iconv_IBM870_e, "IBM870" }, + { iconv_IBM871_e, "IBM871" }, + { iconv_IBM874_e, "IBM874" }, + { iconv_IBM875_e, "IBM875" }, + { iconv_IBM880_e, "IBM880" }, + { iconv_IBM891_e, "IBM891" }, + { iconv_IBM901_e, "IBM901" }, + { iconv_IBM902_e, "IBM902" }, + { iconv_IBM903_e, "IBM903" }, + { iconv_IBM904_e, "IBM904" }, + { iconv_IBM905_e, "IBM905" }, + { iconv_IBM912_e, "IBM912" }, + { iconv_IBM915_e, "IBM915" }, + { iconv_IBM916_e, "IBM916" }, + { iconv_IBM918_e, "IBM918" }, + { iconv_IBM920_e, "IBM920" }, + { iconv_IBM921_e, "IBM921" }, + { iconv_IBM922_e, "IBM922" }, + { iconv_IBM930_e, "IBM930" }, + { iconv_IBM932_e, "IBM932" }, + { iconv_IBM933_e, "IBM933" }, + { iconv_IBM935_e, "IBM935" }, + { iconv_IBM937_e, "IBM937" }, + { iconv_IBM939_e, "IBM939" }, + { iconv_IBM943_e, "IBM943" }, + { iconv_IBM1004_e, "IBM1004" }, + { iconv_IBM1008_e, "IBM1008" }, + { iconv_IBM1025_e, "IBM1025" }, + { iconv_IBM1026_e, "IBM1026" }, + { iconv_IBM1046_e, "IBM1046" }, + { iconv_IBM1047_e, "IBM1047" }, + { iconv_IBM1089_e, "IBM1089" }, + { iconv_IBM1097_e, "IBM1097" }, + { iconv_IBM1112_e, "IBM1112" }, + { iconv_IBM1122_e, "IBM1122" }, + { iconv_IBM1123_e, "IBM1123" }, + { iconv_IBM1124_e, "IBM1124" }, + { iconv_IBM1129_e, "IBM1129" }, + { iconv_IBM1130_e, "IBM1130" }, + { iconv_IBM1132_e, "IBM1132" }, + { iconv_IBM1133_e, "IBM1133" }, + { iconv_IBM1137_e, "IBM1137" }, + { iconv_IBM1140_e, "IBM1140" }, + { iconv_IBM1141_e, "IBM1141" }, + { iconv_IBM1142_e, "IBM1142" }, + { iconv_IBM1143_e, "IBM1143" }, + { iconv_IBM1144_e, "IBM1144" }, + { iconv_IBM1145_e, "IBM1145" }, + { iconv_IBM1146_e, "IBM1146" }, + { iconv_IBM1147_e, "IBM1147" }, + { iconv_IBM1148_e, "IBM1148" }, + { iconv_IBM1149_e, "IBM1149" }, + { iconv_IBM1153_e, "IBM1153" }, + { iconv_IBM1154_e, "IBM1154" }, + { iconv_IBM1155_e, "IBM1155" }, + { iconv_IBM1156_e, "IBM1156" }, + { iconv_IBM1157_e, "IBM1157" }, + { iconv_IBM1158_e, "IBM1158" }, + { iconv_IBM1160_e, "IBM1160" }, + { iconv_IBM1161_e, "IBM1161" }, + { iconv_IBM1162_e, "IBM1162" }, + { iconv_IBM1163_e, "IBM1163" }, + { iconv_IBM1164_e, "IBM1164" }, + { iconv_IBM1166_e, "IBM1166" }, + { iconv_IBM1167_e, "IBM1167" }, + { iconv_IBM1364_e, "IBM1364" }, + { iconv_IBM1371_e, "IBM1371" }, + { iconv_IBM1388_e, "IBM1388" }, + { iconv_IBM1390_e, "IBM1390" }, + { iconv_IBM1399_e, "IBM1399" }, + { iconv_IBM4517_e, "IBM4517" }, + { iconv_IBM4899_e, "IBM4899" }, + { iconv_IBM4909_e, "IBM4909" }, + { iconv_IBM4971_e, "IBM4971" }, + { iconv_IBM5347_e, "IBM5347" }, + { iconv_IBM9030_e, "IBM9030" }, + { iconv_IBM9066_e, "IBM9066" }, + { iconv_IBM9448_e, "IBM9448" }, + { iconv_IBM12712_e, "IBM12712" }, + { iconv_IBM16804_e, "IBM16804" }, + { iconv_IEC_P27_1_e, "IEC_P27-1" }, + { iconv_IEC_P271_e, "IEC_P271" }, + { iconv_INIS_8_e, "INIS-8" }, + { iconv_INIS_CYRILLIC_e, "INIS-CYRILLIC" }, + { iconv_INIS_e, "INIS" }, + { iconv_INIS8_e, "INIS8" }, + { iconv_INISCYRILLIC_e, "INISCYRILLIC" }, + { iconv_ISIRI_3342_e, "ISIRI-3342" }, + { iconv_ISIRI3342_e, "ISIRI3342" }, + { iconv_ISO_2022_CN_EXT_e, "ISO-2022-CN-EXT" }, + { iconv_ISO_2022_CN_e, "ISO-2022-CN" }, + { iconv_ISO_2022_JP_2_e, "ISO-2022-JP-2" }, + { iconv_ISO_2022_JP_3_e, "ISO-2022-JP-3" }, + { iconv_ISO_2022_JP_e, "ISO-2022-JP" }, + { iconv_ISO_2022_KR_e, "ISO-2022-KR" }, + { iconv_ISO_8859_1_e, "ISO-8859-1" }, + { iconv_ISO_8859_2_e, "ISO-8859-2" }, + { iconv_ISO_8859_3_e, "ISO-8859-3" }, + { iconv_ISO_8859_4_e, "ISO-8859-4" }, + { iconv_ISO_8859_5_e, "ISO-8859-5" }, + { iconv_ISO_8859_6_e, "ISO-8859-6" }, + { iconv_ISO_8859_7_e, "ISO-8859-7" }, + { iconv_ISO_8859_8_e, "ISO-8859-8" }, + { iconv_ISO_8859_9_e, "ISO-8859-9" }, + { iconv_ISO_8859_9E_e, "ISO-8859-9E" }, + { iconv_ISO_8859_10_e, "ISO-8859-10" }, + { iconv_ISO_8859_11_e, "ISO-8859-11" }, + { iconv_ISO_8859_13_e, "ISO-8859-13" }, + { iconv_ISO_8859_14_e, "ISO-8859-14" }, + { iconv_ISO_8859_15_e, "ISO-8859-15" }, + { iconv_ISO_8859_16_e, "ISO-8859-16" }, + { iconv_ISO_10646_e, "ISO-10646" }, + { iconv_ISO_10646_e, "UCS2/ ISO-10646/UCS2/" }, + { iconv_ISO_10646_e, "UCS4/ ISO-10646/UCS4/" }, + { iconv_ISO_10646_e, "UTF-8/ ISO-10646/UTF-8/" }, + { iconv_ISO_10646_e, "UTF8/ ISO-10646/UTF8/" }, + { iconv_ISO_CELTIC_e, "ISO-CELTIC" }, + { iconv_ISO_IR_4_e, "ISO-IR-4" }, + { iconv_ISO_IR_6_e, "ISO-IR-6" }, + { iconv_ISO_IR_8_1_e, "ISO-IR-8-1" }, + { iconv_ISO_IR_9_1_e, "ISO-IR-9-1" }, + { iconv_ISO_IR_10_e, "ISO-IR-10" }, + { iconv_ISO_IR_11_e, "ISO-IR-11" }, + { iconv_ISO_IR_14_e, "ISO-IR-14" }, + { iconv_ISO_IR_15_e, "ISO-IR-15" }, + { iconv_ISO_IR_16_e, "ISO-IR-16" }, + { iconv_ISO_IR_17_e, "ISO-IR-17" }, + { iconv_ISO_IR_18_e, "ISO-IR-18" }, + { iconv_ISO_IR_19_e, "ISO-IR-19" }, + { iconv_ISO_IR_21_e, "ISO-IR-21" }, + { iconv_ISO_IR_25_e, "ISO-IR-25" }, + { iconv_ISO_IR_27_e, "ISO-IR-27" }, + { iconv_ISO_IR_37_e, "ISO-IR-37" }, + { iconv_ISO_IR_49_e, "ISO-IR-49" }, + { iconv_ISO_IR_50_e, "ISO-IR-50" }, + { iconv_ISO_IR_51_e, "ISO-IR-51" }, + { iconv_ISO_IR_54_e, "ISO-IR-54" }, + { iconv_ISO_IR_55_e, "ISO-IR-55" }, + { iconv_ISO_IR_57_e, "ISO-IR-57" }, + { iconv_ISO_IR_60_e, "ISO-IR-60" }, + { iconv_ISO_IR_61_e, "ISO-IR-61" }, + { iconv_ISO_IR_69_e, "ISO-IR-69" }, + { iconv_ISO_IR_84_e, "ISO-IR-84" }, + { iconv_ISO_IR_85_e, "ISO-IR-85" }, + { iconv_ISO_IR_86_e, "ISO-IR-86" }, + { iconv_ISO_IR_88_e, "ISO-IR-88" }, + { iconv_ISO_IR_89_e, "ISO-IR-89" }, + { iconv_ISO_IR_90_e, "ISO-IR-90" }, + { iconv_ISO_IR_92_e, "ISO-IR-92" }, + { iconv_ISO_IR_98_e, "ISO-IR-98" }, + { iconv_ISO_IR_99_e, "ISO-IR-99" }, + { iconv_ISO_IR_100_e, "ISO-IR-100" }, + { iconv_ISO_IR_101_e, "ISO-IR-101" }, + { iconv_ISO_IR_103_e, "ISO-IR-103" }, + { iconv_ISO_IR_109_e, "ISO-IR-109" }, + { iconv_ISO_IR_110_e, "ISO-IR-110" }, + { iconv_ISO_IR_111_e, "ISO-IR-111" }, + { iconv_ISO_IR_121_e, "ISO-IR-121" }, + { iconv_ISO_IR_122_e, "ISO-IR-122" }, + { iconv_ISO_IR_126_e, "ISO-IR-126" }, + { iconv_ISO_IR_127_e, "ISO-IR-127" }, + { iconv_ISO_IR_138_e, "ISO-IR-138" }, + { iconv_ISO_IR_139_e, "ISO-IR-139" }, + { iconv_ISO_IR_141_e, "ISO-IR-141" }, + { iconv_ISO_IR_143_e, "ISO-IR-143" }, + { iconv_ISO_IR_144_e, "ISO-IR-144" }, + { iconv_ISO_IR_148_e, "ISO-IR-148" }, + { iconv_ISO_IR_150_e, "ISO-IR-150" }, + { iconv_ISO_IR_151_e, "ISO-IR-151" }, + { iconv_ISO_IR_153_e, "ISO-IR-153" }, + { iconv_ISO_IR_155_e, "ISO-IR-155" }, + { iconv_ISO_IR_156_e, "ISO-IR-156" }, + { iconv_ISO_IR_157_e, "ISO-IR-157" }, + { iconv_ISO_IR_166_e, "ISO-IR-166" }, + { iconv_ISO_IR_179_e, "ISO-IR-179" }, + { iconv_ISO_IR_193_e, "ISO-IR-193" }, + { iconv_ISO_IR_197_e, "ISO-IR-197" }, + { iconv_ISO_IR_199_e, "ISO-IR-199" }, + { iconv_ISO_IR_203_e, "ISO-IR-203" }, + { iconv_ISO_IR_209_e, "ISO-IR-209" }, + { iconv_ISO_IR_226_e, "ISO-IR-226" }, + { iconv_ISO_e, "TR_11548-1/ ISO/TR_11548-1/" }, + { iconv_ISO646_CA_e, "ISO646-CA" }, + { iconv_ISO646_CA2_e, "ISO646-CA2" }, + { iconv_ISO646_CN_e, "ISO646-CN" }, + { iconv_ISO646_CU_e, "ISO646-CU" }, + { iconv_ISO646_DE_e, "ISO646-DE" }, + { iconv_ISO646_DK_e, "ISO646-DK" }, + { iconv_ISO646_ES_e, "ISO646-ES" }, + { iconv_ISO646_ES2_e, "ISO646-ES2" }, + { iconv_ISO646_FI_e, "ISO646-FI" }, + { iconv_ISO646_FR_e, "ISO646-FR" }, + { iconv_ISO646_FR1_e, "ISO646-FR1" }, + { iconv_ISO646_GB_e, "ISO646-GB" }, + { iconv_ISO646_HU_e, "ISO646-HU" }, + { iconv_ISO646_IT_e, "ISO646-IT" }, + { iconv_ISO646_JP_OCR_B_e, "ISO646-JP-OCR-B" }, + { iconv_ISO646_JP_e, "ISO646-JP" }, + { iconv_ISO646_KR_e, "ISO646-KR" }, + { iconv_ISO646_NO_e, "ISO646-NO" }, + { iconv_ISO646_NO2_e, "ISO646-NO2" }, + { iconv_ISO646_PT_e, "ISO646-PT" }, + { iconv_ISO646_PT2_e, "ISO646-PT2" }, + { iconv_ISO646_SE_e, "ISO646-SE" }, + { iconv_ISO646_SE2_e, "ISO646-SE2" }, + { iconv_ISO646_US_e, "ISO646-US" }, + { iconv_ISO646_YU_e, "ISO646-YU" }, + { iconv_ISO2022CN_e, "ISO2022CN" }, + { iconv_ISO2022CNEXT_e, "ISO2022CNEXT" }, + { iconv_ISO2022JP_e, "ISO2022JP" }, + { iconv_ISO2022JP2_e, "ISO2022JP2" }, + { iconv_ISO2022KR_e, "ISO2022KR" }, + { iconv_ISO6937_e, "ISO6937" }, + { iconv_ISO8859_1_e, "ISO8859-1" }, + { iconv_ISO8859_2_e, "ISO8859-2" }, + { iconv_ISO8859_3_e, "ISO8859-3" }, + { iconv_ISO8859_4_e, "ISO8859-4" }, + { iconv_ISO8859_5_e, "ISO8859-5" }, + { iconv_ISO8859_6_e, "ISO8859-6" }, + { iconv_ISO8859_7_e, "ISO8859-7" }, + { iconv_ISO8859_8_e, "ISO8859-8" }, + { iconv_ISO8859_9_e, "ISO8859-9" }, + { iconv_ISO8859_9E_e, "ISO8859-9E" }, + { iconv_ISO8859_10_e, "ISO8859-10" }, + { iconv_ISO8859_11_e, "ISO8859-11" }, + { iconv_ISO8859_13_e, "ISO8859-13" }, + { iconv_ISO8859_14_e, "ISO8859-14" }, + { iconv_ISO8859_15_e, "ISO8859-15" }, + { iconv_ISO8859_16_e, "ISO8859-16" }, + { iconv_ISO11548_1_e, "ISO11548-1" }, + { iconv_ISO88591_e, "ISO88591" }, + { iconv_ISO88592_e, "ISO88592" }, + { iconv_ISO88593_e, "ISO88593" }, + { iconv_ISO88594_e, "ISO88594" }, + { iconv_ISO88595_e, "ISO88595" }, + { iconv_ISO88596_e, "ISO88596" }, + { iconv_ISO88597_e, "ISO88597" }, + { iconv_ISO88598_e, "ISO88598" }, + { iconv_ISO88599_e, "ISO88599" }, + { iconv_ISO88599E_e, "ISO88599E" }, + { iconv_ISO885910_e, "ISO885910" }, + { iconv_ISO885911_e, "ISO885911" }, + { iconv_ISO885913_e, "ISO885913" }, + { iconv_ISO885914_e, "ISO885914" }, + { iconv_ISO885915_e, "ISO885915" }, + { iconv_ISO885916_e, "ISO885916" }, + { iconv_ISO_646_IRV_1991_e, "ISO_646.IRV:1991" }, + { iconv_ISO_2033_1983_e, "ISO_2033-1983" }, + { iconv_ISO_2033_e, "ISO_2033" }, + { iconv_ISO_5427_EXT_e, "ISO_5427-EXT" }, + { iconv_ISO_5427_e, "ISO_5427" }, + { iconv_ISO_5427_1981_e, "ISO_5427:1981" }, + { iconv_ISO_5427EXT_e, "ISO_5427EXT" }, + { iconv_ISO_5428_e, "ISO_5428" }, + { iconv_ISO_5428_1980_e, "ISO_5428:1980" }, + { iconv_ISO_6937_2_e, "ISO_6937-2" }, + { iconv_ISO_6937_2_1983_e, "ISO_6937-2:1983" }, + { iconv_ISO_6937_e, "ISO_6937" }, + { iconv_ISO_6937_1992_e, "ISO_6937:1992" }, + { iconv_ISO_8859_1_e, "ISO_8859-1" }, + { iconv_ISO_8859_1_1987_e, "ISO_8859-1:1987" }, + { iconv_ISO_8859_2_e, "ISO_8859-2" }, + { iconv_ISO_8859_2_1987_e, "ISO_8859-2:1987" }, + { iconv_ISO_8859_3_e, "ISO_8859-3" }, + { iconv_ISO_8859_3_1988_e, "ISO_8859-3:1988" }, + { iconv_ISO_8859_4_e, "ISO_8859-4" }, + { iconv_ISO_8859_4_1988_e, "ISO_8859-4:1988" }, + { iconv_ISO_8859_5_e, "ISO_8859-5" }, + { iconv_ISO_8859_5_1988_e, "ISO_8859-5:1988" }, + { iconv_ISO_8859_6_e, "ISO_8859-6" }, + { iconv_ISO_8859_6_1987_e, "ISO_8859-6:1987" }, + { iconv_ISO_8859_7_e, "ISO_8859-7" }, + { iconv_ISO_8859_7_1987_e, "ISO_8859-7:1987" }, + { iconv_ISO_8859_7_2003_e, "ISO_8859-7:2003" }, + { iconv_ISO_8859_8_e, "ISO_8859-8" }, + { iconv_ISO_8859_8_1988_e, "ISO_8859-8:1988" }, + { iconv_ISO_8859_9_e, "ISO_8859-9" }, + { iconv_ISO_8859_9_1989_e, "ISO_8859-9:1989" }, + { iconv_ISO_8859_9E_e, "ISO_8859-9E" }, + { iconv_ISO_8859_10_e, "ISO_8859-10" }, + { iconv_ISO_8859_10_1992_e, "ISO_8859-10:1992" }, + { iconv_ISO_8859_14_e, "ISO_8859-14" }, + { iconv_ISO_8859_14_1998_e, "ISO_8859-14:1998" }, + { iconv_ISO_8859_15_e, "ISO_8859-15" }, + { iconv_ISO_8859_15_1998_e, "ISO_8859-15:1998" }, + { iconv_ISO_8859_16_e, "ISO_8859-16" }, + { iconv_ISO_8859_16_2001_e, "ISO_8859-16:2001" }, + { iconv_ISO_9036_e, "ISO_9036" }, + { iconv_ISO_10367_BOX_e, "ISO_10367-BOX" }, + { iconv_ISO_10367BOX_e, "ISO_10367BOX" }, + { iconv_ISO_11548_1_e, "ISO_11548-1" }, + { iconv_ISO_69372_e, "ISO_69372" }, + { iconv_IT_e, "IT" }, + { iconv_JIS_C6220_1969_RO_e, "JIS_C6220-1969-RO" }, + { iconv_JIS_C6229_1984_B_e, "JIS_C6229-1984-B" }, + { iconv_JIS_C62201969RO_e, "JIS_C62201969RO" }, + { iconv_JIS_C62291984B_e, "JIS_C62291984B" }, + { iconv_JOHAB_e, "JOHAB" }, + { iconv_JP_OCR_B_e, "JP-OCR-B" }, + { iconv_JP_e, "JP" }, + { iconv_JS_e, "JS" }, + { iconv_JUS_I_B1_002_e, "JUS_I.B1.002" }, + { iconv_KOI_7_e, "KOI-7" }, + { iconv_KOI_8_e, "KOI-8" }, + { iconv_KOI8_R_e, "KOI8-R" }, + { iconv_KOI8_RU_e, "KOI8-RU" }, + { iconv_KOI8_T_e, "KOI8-T" }, + { iconv_KOI8_U_e, "KOI8-U" }, + { iconv_KOI8_e, "KOI8" }, + { iconv_KOI8R_e, "KOI8R" }, + { iconv_KOI8U_e, "KOI8U" }, + { iconv_KSC5636_e, "KSC5636" }, + { iconv_L1_e, "L1" }, + { iconv_L2_e, "L2" }, + { iconv_L3_e, "L3" }, + { iconv_L4_e, "L4" }, + { iconv_L5_e, "L5" }, + { iconv_L6_e, "L6" }, + { iconv_L7_e, "L7" }, + { iconv_L8_e, "L8" }, + { iconv_L10_e, "L10" }, + { iconv_LATIN_9_e, "LATIN-9" }, + { iconv_LATIN_GREEK_1_e, "LATIN-GREEK-1" }, + { iconv_LATIN_GREEK_e, "LATIN-GREEK" }, + { iconv_LATIN1_e, "LATIN1" }, + { iconv_LATIN2_e, "LATIN2" }, + { iconv_LATIN3_e, "LATIN3" }, + { iconv_LATIN4_e, "LATIN4" }, + { iconv_LATIN5_e, "LATIN5" }, + { iconv_LATIN6_e, "LATIN6" }, + { iconv_LATIN7_e, "LATIN7" }, + { iconv_LATIN8_e, "LATIN8" }, + { iconv_LATIN9_e, "LATIN9" }, + { iconv_LATIN10_e, "LATIN10" }, + { iconv_LATINGREEK_e, "LATINGREEK" }, + { iconv_LATINGREEK1_e, "LATINGREEK1" }, + { iconv_MAC_CENTRALEUROPE_e, "MAC-CENTRALEUROPE" }, + { iconv_MAC_CYRILLIC_e, "MAC-CYRILLIC" }, + { iconv_MAC_IS_e, "MAC-IS" }, + { iconv_MAC_SAMI_e, "MAC-SAMI" }, + { iconv_MAC_UK_e, "MAC-UK" }, + { iconv_MAC_e, "MAC" }, + { iconv_MACCYRILLIC_e, "MACCYRILLIC" }, + { iconv_MACINTOSH_e, "MACINTOSH" }, + { iconv_MACIS_e, "MACIS" }, + { iconv_MACUK_e, "MACUK" }, + { iconv_MACUKRAINIAN_e, "MACUKRAINIAN" }, + { iconv_MIK_e, "MIK" }, + { iconv_MS_ANSI_e, "MS-ANSI" }, + { iconv_MS_ARAB_e, "MS-ARAB" }, + { iconv_MS_CYRL_e, "MS-CYRL" }, + { iconv_MS_EE_e, "MS-EE" }, + { iconv_MS_GREEK_e, "MS-GREEK" }, + { iconv_MS_HEBR_e, "MS-HEBR" }, + { iconv_MS_MAC_CYRILLIC_e, "MS-MAC-CYRILLIC" }, + { iconv_MS_TURK_e, "MS-TURK" }, + { iconv_MS932_e, "MS932" }, + { iconv_MS936_e, "MS936" }, + { iconv_MSCP949_e, "MSCP949" }, + { iconv_MSCP1361_e, "MSCP1361" }, + { iconv_MSMACCYRILLIC_e, "MSMACCYRILLIC" }, + { iconv_MSZ_7795_3_e, "MSZ_7795.3" }, + { iconv_MS_KANJI_e, "MS_KANJI" }, + { iconv_NAPLPS_e, "NAPLPS" }, + { iconv_NATS_DANO_e, "NATS-DANO" }, + { iconv_NATS_SEFI_e, "NATS-SEFI" }, + { iconv_NATSDANO_e, "NATSDANO" }, + { iconv_NATSSEFI_e, "NATSSEFI" }, + { iconv_NC_NC0010_e, "NC_NC0010" }, + { iconv_NC_NC00_10_e, "NC_NC00-10" }, + { iconv_NC_NC00_10_81_e, "NC_NC00-10:81" }, + { iconv_NF_Z_62_010_e, "NF_Z_62-010" }, + { iconv_NF_Z_62_010__1973__e, "NF_Z_62-010_(1973)" }, + { iconv_NF_Z_62_010_1973_e, "NF_Z_62-010_1973" }, + { iconv_NF_Z_62010_e, "NF_Z_62010" }, + { iconv_NF_Z_62010_1973_e, "NF_Z_62010_1973" }, + { iconv_NO_e, "NO" }, + { iconv_NO2_e, "NO2" }, + { iconv_NS_4551_1_e, "NS_4551-1" }, + { iconv_NS_4551_2_e, "NS_4551-2" }, + { iconv_NS_45511_e, "NS_45511" }, + { iconv_NS_45512_e, "NS_45512" }, + { iconv_OS2LATIN1_e, "OS2LATIN1" }, + { iconv_OSF00010001_e, "OSF00010001" }, + { iconv_OSF00010002_e, "OSF00010002" }, + { iconv_OSF00010003_e, "OSF00010003" }, + { iconv_OSF00010004_e, "OSF00010004" }, + { iconv_OSF00010005_e, "OSF00010005" }, + { iconv_OSF00010006_e, "OSF00010006" }, + { iconv_OSF00010007_e, "OSF00010007" }, + { iconv_OSF00010008_e, "OSF00010008" }, + { iconv_OSF00010009_e, "OSF00010009" }, + { iconv_OSF0001000A_e, "OSF0001000A" }, + { iconv_OSF00010020_e, "OSF00010020" }, + { iconv_OSF00010100_e, "OSF00010100" }, + { iconv_OSF00010101_e, "OSF00010101" }, + { iconv_OSF00010102_e, "OSF00010102" }, + { iconv_OSF00010104_e, "OSF00010104" }, + { iconv_OSF00010105_e, "OSF00010105" }, + { iconv_OSF00010106_e, "OSF00010106" }, + { iconv_OSF00030010_e, "OSF00030010" }, + { iconv_OSF0004000A_e, "OSF0004000A" }, + { iconv_OSF0005000A_e, "OSF0005000A" }, + { iconv_OSF05010001_e, "OSF05010001" }, + { iconv_OSF100201A4_e, "OSF100201A4" }, + { iconv_OSF100201A8_e, "OSF100201A8" }, + { iconv_OSF100201B5_e, "OSF100201B5" }, + { iconv_OSF100201F4_e, "OSF100201F4" }, + { iconv_OSF100203B5_e, "OSF100203B5" }, + { iconv_OSF1002011C_e, "OSF1002011C" }, + { iconv_OSF1002011D_e, "OSF1002011D" }, + { iconv_OSF1002035D_e, "OSF1002035D" }, + { iconv_OSF1002035E_e, "OSF1002035E" }, + { iconv_OSF1002035F_e, "OSF1002035F" }, + { iconv_OSF1002036B_e, "OSF1002036B" }, + { iconv_OSF1002037B_e, "OSF1002037B" }, + { iconv_OSF10010001_e, "OSF10010001" }, + { iconv_OSF10010004_e, "OSF10010004" }, + { iconv_OSF10010006_e, "OSF10010006" }, + { iconv_OSF10020025_e, "OSF10020025" }, + { iconv_OSF10020111_e, "OSF10020111" }, + { iconv_OSF10020115_e, "OSF10020115" }, + { iconv_OSF10020116_e, "OSF10020116" }, + { iconv_OSF10020118_e, "OSF10020118" }, + { iconv_OSF10020122_e, "OSF10020122" }, + { iconv_OSF10020129_e, "OSF10020129" }, + { iconv_OSF10020352_e, "OSF10020352" }, + { iconv_OSF10020354_e, "OSF10020354" }, + { iconv_OSF10020357_e, "OSF10020357" }, + { iconv_OSF10020359_e, "OSF10020359" }, + { iconv_OSF10020360_e, "OSF10020360" }, + { iconv_OSF10020364_e, "OSF10020364" }, + { iconv_OSF10020365_e, "OSF10020365" }, + { iconv_OSF10020366_e, "OSF10020366" }, + { iconv_OSF10020367_e, "OSF10020367" }, + { iconv_OSF10020370_e, "OSF10020370" }, + { iconv_OSF10020387_e, "OSF10020387" }, + { iconv_OSF10020388_e, "OSF10020388" }, + { iconv_OSF10020396_e, "OSF10020396" }, + { iconv_OSF10020402_e, "OSF10020402" }, + { iconv_OSF10020417_e, "OSF10020417" }, + { iconv_PT_e, "PT" }, + { iconv_PT2_e, "PT2" }, + { iconv_PT154_e, "PT154" }, + { iconv_R8_e, "R8" }, + { iconv_R9_e, "R9" }, + { iconv_RK1048_e, "RK1048" }, + { iconv_ROMAN8_e, "ROMAN8" }, + { iconv_ROMAN9_e, "ROMAN9" }, + { iconv_RUSCII_e, "RUSCII" }, + { iconv_SE_e, "SE" }, + { iconv_SE2_e, "SE2" }, + { iconv_SEN_850200_B_e, "SEN_850200_B" }, + { iconv_SEN_850200_C_e, "SEN_850200_C" }, + { iconv_SHIFT_JIS_e, "SHIFT-JIS" }, + { iconv_SHIFTJISX0213_e, "SHIFTJISX0213" }, + { iconv_SHIFT_JIS_e, "SHIFT_JIS" }, + { iconv_SHIFT_JISX0213_e, "SHIFT_JISX0213" }, + { iconv_SJIS_OPEN_e, "SJIS-OPEN" }, + { iconv_SJIS_WIN_e, "SJIS-WIN" }, + { iconv_SJIS_e, "SJIS" }, + { iconv_SS636127_e, "SS636127" }, + { iconv_STRK1048_2002_e, "STRK1048-2002" }, + { iconv_ST_SEV_358_88_e, "ST_SEV_358-88" }, + { iconv_T_61_8BIT_e, "T.61-8BIT" }, + { iconv_T_61_e, "T.61" }, + { iconv_T_618BIT_e, "T.618BIT" }, + { iconv_TCVN_5712_e, "TCVN-5712" }, + { iconv_TCVN_e, "TCVN" }, + { iconv_TCVN5712_1_e, "TCVN5712-1" }, + { iconv_TCVN5712_1_1993_e, "TCVN5712-1:1993" }, + { iconv_THAI8_e, "THAI8" }, + { iconv_TIS_620_e, "TIS-620" }, + { iconv_TIS620_0_e, "TIS620-0" }, + { iconv_TIS620_2529_1_e, "TIS620.2529-1" }, + { iconv_TIS620_2533_0_e, "TIS620.2533-0" }, + { iconv_TIS620_e, "TIS620" }, + { iconv_TS_5881_e, "TS-5881" }, + { iconv_TSCII_e, "TSCII" }, + { iconv_TURKISH8_e, "TURKISH8" }, + { iconv_UCS_2_e, "UCS-2" }, + { iconv_UCS_2BE_e, "UCS-2BE" }, + { iconv_UCS_2LE_e, "UCS-2LE" }, + { iconv_UCS_4_e, "UCS-4" }, + { iconv_UCS_4BE_e, "UCS-4BE" }, + { iconv_UCS_4LE_e, "UCS-4LE" }, + { iconv_UCS2_e, "UCS2" }, + { iconv_UCS4_e, "UCS4" }, + { iconv_UHC_e, "UHC" }, + { iconv_UJIS_e, "UJIS" }, + { iconv_UK_e, "UK" }, + { iconv_UNICODE_e, "UNICODE" }, + { iconv_UNICODEBIG_e, "UNICODEBIG" }, + { iconv_UNICODELITTLE_e, "UNICODELITTLE" }, + { iconv_US_ASCII_e, "US-ASCII" }, + { iconv_US_e, "US" }, + { iconv_UTF_7_e, "UTF-7" }, + { iconv_UTF_8_e, "UTF-8" }, + { iconv_UTF_16_e, "UTF-16" }, + { iconv_UTF_16BE_e, "UTF-16BE" }, + { iconv_UTF_16LE_e, "UTF-16LE" }, + { iconv_UTF_32_e, "UTF-32" }, + { iconv_UTF_32BE_e, "UTF-32BE" }, + { iconv_UTF_32LE_e, "UTF-32LE" }, + { iconv_UTF7_e, "UTF7" }, + { iconv_UTF8_e, "UTF8" }, + { iconv_UTF16_e, "UTF16" }, + { iconv_UTF16BE_e, "UTF16BE" }, + { iconv_UTF16LE_e, "UTF16LE" }, + { iconv_UTF32_e, "UTF32" }, + { iconv_UTF32BE_e, "UTF32BE" }, + { iconv_UTF32LE_e, "UTF32LE" }, + { iconv_VISCII_e, "VISCII" }, + { iconv_WCHAR_T_e, "WCHAR_T" }, + { iconv_WIN_SAMI_2_e, "WIN-SAMI-2" }, + { iconv_WINBALTRIM_e, "WINBALTRIM" }, + { iconv_WINDOWS_31J_e, "WINDOWS-31J" }, + { iconv_WINDOWS_874_e, "WINDOWS-874" }, + { iconv_WINDOWS_936_e, "WINDOWS-936" }, + { iconv_WINDOWS_1250_e, "WINDOWS-1250" }, + { iconv_WINDOWS_1251_e, "WINDOWS-1251" }, + { iconv_WINDOWS_1252_e, "WINDOWS-1252" }, + { iconv_WINDOWS_1253_e, "WINDOWS-1253" }, + { iconv_WINDOWS_1254_e, "WINDOWS-1254" }, + { iconv_WINDOWS_1255_e, "WINDOWS-1255" }, + { iconv_WINDOWS_1256_e, "WINDOWS-1256" }, + { iconv_WINDOWS_1257_e, "WINDOWS-1257" }, + { iconv_WINDOWS_1258_e, "WINDOWS-1258" }, + { iconv_WINSAMI2_e, "WINSAMI2" }, + { iconv_WS2_e, "WS2" }, + { iconv_YU_e, "YU" }, +}; + +const char * +__gg__encoding_iconv_name( cbl_encoding_t encoding ) { + static encodings_t *eoencodings = encodings + COUNT_OF(encodings); + + auto p = std::find_if( encodings, eoencodings, + [encoding]( const encodings_t& elem ) { + return encoding == elem.type; + } ); + return p < eoencodings? p->name : nullptr; +} + +cbl_encoding_t +__gg__encoding_iconv_type( const char *name ) { + static encodings_t *eoencodings = encodings + COUNT_OF(encodings); + + auto p = std::find_if( encodings, eoencodings, + [name]( const encodings_t& elem ) { + return strcmp(name, elem.name) == 0; + } ); + return p < eoencodings? p->type : no_encoding_e; +} -extern "C" -void -__gg__ascii_to_ebcdic(char *str, size_t length) - { - for(size_t i=0; i(malloc(retsize)); -extern "C" -void -__gg__ebcdic_to_ascii(char * const str, size_t length) + // Let's consider the possibility of each input character needed four output + // characters: + size_t needed = 4*length; + if( retsize < needed ) { - for(size_t i=0; i(realloc(retval, retsize)); } -extern "C" -char *__gg__ascii_to_console( char **dest, - size_t *dest_size, - char const * const str, - const size_t length) + if( from == to ) { - if( console_codeset == cs_utf8_e ) - { - __gg__realloc_if_necessary(dest, dest_size, length); - convert_cp1252_to_utf8(dest, dest_size, str, length); - } - else - { - __gg__realloc_if_necessary(dest, dest_size, length+1); - memcpy(*dest, str, length); - (*dest)[length] = '\0'; - } - return *dest; + memcpy(retval, str, length); + *outlength = length; } - -extern "C" -char *__gg__ebcdic_to_console(char **dest, - size_t *dest_size, - char const * const str, - const size_t length) + else { - static size_t ebcdic_size = MINIMUM_ALLOCATION_SIZE; - static char *ebcdic = static_cast(malloc(ebcdic_size)); - if(!ebcdic)abort(); - __gg__realloc_if_necessary(&ebcdic, &ebcdic_size, length); + // Converts the given string from from to to using iconv. - memcpy(ebcdic, str, length); - __gg__ebcdic_to_ascii(ebcdic, length); + // The return value points to a static memory area in this function, the + // caller has to respect that. - if( console_codeset == cs_utf8_e ) - { - convert_cp1252_to_utf8(dest, dest_size, ebcdic, length); - } - else - { - __gg__realloc_if_necessary(dest, dest_size, length+1); - strcpy(*dest, ebcdic); - } - return *dest; - } + // We attempt to minimize overhead by using a map to call + // iconv_open but once for each from/to pairing. -extern "C" -void __gg__console_to_ascii(char * const str, size_t length) - { - // In-place conversion of ASCII data that might be UTF-8 to CP1252 - if( console_codeset == cs_cp1252_e ) - { - // It's already what we want it to be - return; - } - char *dest = str; + iconv_t cd; - size_t position = 0; - while( position < length ) - { - size_t code_point; - // Pull the next code_point from the UTF-8 stream - long unicode_point - = extract_next_code_point( reinterpret_cast(str), - length, - position ); - if( unicode_point == -1 ) - { - // The UTF-8 stream was poorly formed. - code_point = ASCII_REPLACEMENT; - } - else - { - // Check for that unicode code point in the subset of characters we - // know about: - auto it = utf8_to_cp1252_values.find(unicode_point); - if( it == utf8_to_cp1252_values.end() ) - { - // That unicode character isn't in our list - code_point = ASCII_REPLACEMENT; - } - else - { - code_point = it->second; - } - } - *dest++ = (char)code_point; - } - *dest++ = '\0'; - } + static std::unordered_map pairings; + uint32_t pairing = static_cast(from) << 16; + pairing += static_cast(to); + std::unordered_map::const_iterator it = + pairings.find(pairing); + if( it == pairings.end() ) + { + // This pairing is new to us. + assert(to > custom_encoding_e); + assert(from > custom_encoding_e); + cd = iconv_open(__gg__encoding_iconv_name(to), + __gg__encoding_iconv_name(from)); + pairings[pairing] = cd; + } + else + { + // We've seen this pairing before. + cd = it->second; + } -extern "C" -void -__gg__console_to_ebcdic(char * const str, size_t length) - { - char *dest = str; + char *inbuf = const_cast(str); + char *outbuf = retval; + size_t incount = length; + size_t outcount = retsize; + *outlength = iconv( cd, + &inbuf, &incount, + &outbuf, &outcount); + *outlength = retsize - outcount; - size_t position = 0; - while( position < length ) + if( *outlength == length ) + { + /* In a kind of shortsighted way, we are going to assume + single-byte-coding, and we are going to cope here with the + COBOL-world reality of HIGH-VALUE being, by default, the value 0xFF. + This is required by IBM in the EBCDIC and ASCII worlds. The + implications for other locales are being left for another time. + + So, for now, we are regarding 0xFF as invariant. Thus, at this + point, we have to scan the input and make sure the output has 0xFF + where the input does. */ + for(size_t i=0; i(str), - length, - position ); - if( unicode_point == -1 ) - { - // The UTF-8 stream was poorly formed. - code_point = ASCII_REPLACEMENT; - } - else - { - // Check for that unicode code point in the subset of characters we - // know about: - auto it = utf8_to_cp1252_values.find(unicode_point); - if( it == utf8_to_cp1252_values.end() ) - { - // That unicode character isn't in our list - code_point = ASCII_REPLACEMENT; - } - else - { - code_point = it->second; - } - } - *dest++ = __gg__cp1252_to_cp1140_values[code_point&0xFF] ; + if( static_cast(str[i]) == 0xFF ) + { + retval[i] = static_cast(0xFF); + } } - *dest++ = '\0'; - } - -extern "C" -size_t -_to_ctype(char * const location, size_t length) - { - // Converts from our internal codeset to the system LC_TYPE codeset - const char *fromcode; - const char *tocode; - if( __gg__ebcdic_codeset_in_use ) - { - fromcode = "CP1140"; - } - else - { - fromcode = "CP1252"; - } - const char *ctype = setlocale(LC_CTYPE, ""); - - if( strcasestr(ctype, "UTF") ) - { - tocode = "UTF-8"; - } - else - { - tocode = "CP1252"; + } } + // For the convenience of those who call this routine, we are sticking a + // terminating NUL on the end of the generated string + retval[*outlength] = '\0'; - iconv_t cd = iconv_open(tocode, fromcode); - assert( cd != (iconv_t)-1 ); - - static char *dest = NULL; - static size_t dest_size = 0; - - // create a buffer long enough that iconv() won't fail: - __gg__realloc_if_necessary(&dest, &dest_size, 4*length+1); - - // Set up for the iconv() call: - char *inbuf = location; - size_t inbytesleft = length; - char *outbuf = dest; - size_t outbytesleft = 2*length+1; - - memset(dest, ' ', 2*length+1); - iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - memcpy(location, dest, length); - return 0; + return retval; } -extern "C" -size_t -_from_ctype(char * const location, size_t length) +static +std::unordered_mapmap_of_encodings; + +charmap_t * +__gg__get_charmap(cbl_encoding_t encoding) { - // Converts from our internal codeset to the system LC_TYPE codeset - const char *fromcode; - const char *tocode; - if( __gg__ebcdic_codeset_in_use ) - { - tocode = "CP1140"; - } - else + // In various places in the runtime, there will be need of charmap_t for + // various encodings. By using this routine, the overhead of creating and + // using them is kept low. + + // Sometimes the encoding is custom_encoding_e, like when initializing a + // FldPointer. But we still need to have *something*, because of the need + // to handle certain figurative constants. An example is + // 01 FOO pointer value NULL + // where the encoding is irrelevant. So, in that case we force it to be + // something. + + if( encoding == custom_encoding_e) { - tocode = "CP1252"; + encoding = DEFAULT_CHARMAP_SOURCE; } - const char *ctype = setlocale(LC_CTYPE, ""); - if( strcasestr(ctype, "UTF") ) + charmap_t *retval; + std::unordered_map::const_iterator it + = map_of_encodings.find(encoding); + if( it != map_of_encodings.end() ) { - fromcode = "UTF-8"; + retval = it->second; } else { - fromcode = "CP1252"; + retval = new charmap_t(encoding); + map_of_encodings[encoding] = retval; } - - iconv_t cd = iconv_open(tocode, fromcode); - assert( cd != (iconv_t)-1 ); - - static char *dest = NULL; - static size_t dest_size = 0; - - // create a buffer long enough that iconv() won't fail: - __gg__realloc_if_necessary(&dest, &dest_size, length+1); - - // Set up for the iconv() call: - char *inbuf = location; - size_t inbytesleft = length; - char *outbuf = dest; - size_t outbytesleft = length+1; - - memset(dest, internal_space, length+1); - ///size_t iret = - iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - memcpy(location, dest, length); - return 0; + return retval; } + diff --git a/libgcobol/charmaps.h b/libgcobol/charmaps.h index 6b4e9f5c4b4..15be0ea1bc0 100644 --- a/libgcobol/charmaps.h +++ b/libgcobol/charmaps.h @@ -103,11 +103,16 @@ Stay alert! */ +extern int __gg__decimal_point ; +extern int __gg__decimal_separator ; +extern int __gg__quote_character ; +extern int __gg__low_value_character ; +extern int __gg__high_value_character ; +extern char **__gg__currency_signs ; +extern int __gg__default_currency_sign; +extern char *__gg__ct_currency_signs[256]; // Compile-time currency signs extern bool __gg__ebcdic_codeset_in_use; -#define internal_is_ebcdic (__gg__ebcdic_codeset_in_use) - -extern unsigned short const *__gg__internal_codeset_map; #define NULLCH ('\0') #define DEGENERATE_HIGH_VALUE 0xFF @@ -197,84 +202,6 @@ extern unsigned short const *__gg__internal_codeset_map; #define ascii_newline ((uint8_t)('\n')) #define ascii_return ((uint8_t)('\r')) -#define internal_space ((uint8_t)__gg__internal_codeset_map[ascii_space]) -#define internal_zero ((uint8_t)__gg__internal_codeset_map[ascii_zero]) -#define internal_period ((uint8_t)__gg__internal_codeset_map[ascii_period]) -#define internal_comma ((uint8_t)__gg__internal_codeset_map[ascii_comma]) -#define internal_dquote ((uint8_t)__gg__internal_codeset_map[ascii_dquote]) -#define internal_asterisk ((uint8_t)__gg__internal_codeset_map[ascii_asterisk]) -#define internal_plus ((uint8_t)__gg__internal_codeset_map[ascii_plus]) -#define internal_minus ((uint8_t)__gg__internal_codeset_map[ascii_minus]) -#define internal_cr ((uint8_t)__gg__internal_codeset_map[ascii_cr]) -#define internal_ff ((uint8_t)__gg__internal_codeset_map[ascii_ff]) -#define internal_newline ((uint8_t)__gg__internal_codeset_map[ascii_newline]) -#define internal_return ((uint8_t)__gg__internal_codeset_map[ascii_return]) -#define internal_0 ((uint8_t)__gg__internal_codeset_map[ascii_0]) -#define internal_1 ((uint8_t)__gg__internal_codeset_map[ascii_1]) -#define internal_2 ((uint8_t)__gg__internal_codeset_map[ascii_2]) -#define internal_3 ((uint8_t)__gg__internal_codeset_map[ascii_3]) -#define internal_4 ((uint8_t)__gg__internal_codeset_map[ascii_4]) -#define internal_5 ((uint8_t)__gg__internal_codeset_map[ascii_5]) -#define internal_6 ((uint8_t)__gg__internal_codeset_map[ascii_6]) -#define internal_7 ((uint8_t)__gg__internal_codeset_map[ascii_7]) -#define internal_8 ((uint8_t)__gg__internal_codeset_map[ascii_8]) -#define internal_9 ((uint8_t)__gg__internal_codeset_map[ascii_9]) -#define internal_colon ((uint8_t)__gg__internal_codeset_map[ascii_colon]) -#define internal_query ((uint8_t)__gg__internal_codeset_map[ascii_query]) -#define internal_A ((uint8_t)__gg__internal_codeset_map[ascii_A]) -#define internal_B ((uint8_t)__gg__internal_codeset_map[ascii_B]) -#define internal_C ((uint8_t)__gg__internal_codeset_map[ascii_C]) -#define internal_D ((uint8_t)__gg__internal_codeset_map[ascii_D]) -#define internal_E ((uint8_t)__gg__internal_codeset_map[ascii_E]) -#define internal_F ((uint8_t)__gg__internal_codeset_map[ascii_F]) -#define internal_G ((uint8_t)__gg__internal_codeset_map[ascii_G]) -#define internal_H ((uint8_t)__gg__internal_codeset_map[ascii_H]) -#define internal_I ((uint8_t)__gg__internal_codeset_map[ascii_I]) -#define internal_J ((uint8_t)__gg__internal_codeset_map[ascii_J]) -#define internal_K ((uint8_t)__gg__internal_codeset_map[ascii_K]) -#define internal_L ((uint8_t)__gg__internal_codeset_map[ascii_L]) -#define internal_M ((uint8_t)__gg__internal_codeset_map[ascii_M]) -#define internal_N ((uint8_t)__gg__internal_codeset_map[ascii_N]) -#define internal_O ((uint8_t)__gg__internal_codeset_map[ascii_O]) -#define internal_P ((uint8_t)__gg__internal_codeset_map[ascii_P]) -#define internal_Q ((uint8_t)__gg__internal_codeset_map[ascii_Q]) -#define internal_R ((uint8_t)__gg__internal_codeset_map[ascii_R]) -#define internal_S ((uint8_t)__gg__internal_codeset_map[ascii_S]) -#define internal_T ((uint8_t)__gg__internal_codeset_map[ascii_T]) -#define internal_U ((uint8_t)__gg__internal_codeset_map[ascii_U]) -#define internal_V ((uint8_t)__gg__internal_codeset_map[ascii_V]) -#define internal_W ((uint8_t)__gg__internal_codeset_map[ascii_W]) -#define internal_X ((uint8_t)__gg__internal_codeset_map[ascii_X]) -#define internal_Y ((uint8_t)__gg__internal_codeset_map[ascii_Y]) -#define internal_Z ((uint8_t)__gg__internal_codeset_map[ascii_Z]) -#define internal_a ((uint8_t)__gg__internal_codeset_map[ascii_a]) -#define internal_b ((uint8_t)__gg__internal_codeset_map[ascii_b]) -#define internal_c ((uint8_t)__gg__internal_codeset_map[ascii_c]) -#define internal_d ((uint8_t)__gg__internal_codeset_map[ascii_d]) -#define internal_e ((uint8_t)__gg__internal_codeset_map[ascii_e]) -#define internal_f ((uint8_t)__gg__internal_codeset_map[ascii_f]) -#define internal_g ((uint8_t)__gg__internal_codeset_map[ascii_g]) -#define internal_h ((uint8_t)__gg__internal_codeset_map[ascii_h]) -#define internal_i ((uint8_t)__gg__internal_codeset_map[ascii_i]) -#define internal_j ((uint8_t)__gg__internal_codeset_map[ascii_j]) -#define internal_k ((uint8_t)__gg__internal_codeset_map[ascii_k]) -#define internal_l ((uint8_t)__gg__internal_codeset_map[ascii_l]) -#define internal_m ((uint8_t)__gg__internal_codeset_map[ascii_m]) -#define internal_n ((uint8_t)__gg__internal_codeset_map[ascii_n]) -#define internal_o ((uint8_t)__gg__internal_codeset_map[ascii_o]) -#define internal_p ((uint8_t)__gg__internal_codeset_map[ascii_p]) -#define internal_q ((uint8_t)__gg__internal_codeset_map[ascii_q]) -#define internal_r ((uint8_t)__gg__internal_codeset_map[ascii_r]) -#define internal_s ((uint8_t)__gg__internal_codeset_map[ascii_s]) -#define internal_t ((uint8_t)__gg__internal_codeset_map[ascii_t]) -#define internal_u ((uint8_t)__gg__internal_codeset_map[ascii_u]) -#define internal_v ((uint8_t)__gg__internal_codeset_map[ascii_v]) -#define internal_w ((uint8_t)__gg__internal_codeset_map[ascii_w]) -#define internal_x ((uint8_t)__gg__internal_codeset_map[ascii_x]) -#define internal_y ((uint8_t)__gg__internal_codeset_map[ascii_y]) -#define internal_z ((uint8_t)__gg__internal_codeset_map[ascii_z]) - - enum text_device_t { td_default_e, @@ -290,7 +217,6 @@ enum text_codeset_t cs_cp1140_e }; - extern unsigned char __gg__data_space[1] ; extern unsigned char __gg__data_low_values[1] ; extern unsigned char __gg__data_zeros[1] ; @@ -315,56 +241,197 @@ extern const unsigned short __gg__ebcdic_to_cp1252_collation[256]; // These routines convert a single ASCII character to either ASCII or EBCDIC -extern "C" -char __gg__ascii_to_ascii_chr(char ch); -extern "C" -char __gg__ascii_to_ebcdic_chr(char ch); -extern "C" -char (*__gg__ascii_to_internal_chr)(char); -#define ascii_to_internal(a) ((*__gg__ascii_to_internal_chr)(a)) - -extern "C" -void __gg__ascii_to_ascii(char *str, size_t length); -extern "C" -void __gg__ascii_to_ebcdic(char *str, size_t length); -extern "C" -void (*__gg__ascii_to_internal_str)(char *str, size_t length); -#define ascii_to_internal_str(a, b) ((*__gg__ascii_to_internal_str)((a), (b))) - -extern "C" -char *__gg__raw_to_ascii(char **dest, size_t *dest_size, const char *str, size_t length); -extern "C" -char *__gg__raw_to_ebcdic(char **dest, size_t *dest_size, const char *in, size_t length); -extern "C" -char *(*__gg__raw_to_internal)(char **dest, size_t *dest_length, const char *in, size_t length); -#define raw_to_internal(a, b, c, d) ((*__gg__raw_to_internal)((a), (b), (c), (d))) - -extern "C" -char *__gg__ascii_to_console(char **dest, size_t *dest_size, char const * const str, const size_t length); -extern "C" -char *__gg__ebcdic_to_console(char **dest, size_t *dest_size, char const * const str, const size_t length); -extern "C" -char *(*__gg__internal_to_console_cm)(char **dest, size_t *dest_size, const char *in, size_t length); -#define internal_to_console(a, b, c, d) ((*__gg__internal_to_console_cm)((a), (b), (c), (d))) - -extern "C" -void __gg__console_to_ascii(char * const str, size_t length); -extern "C" -void __gg__console_to_ebcdic(char * const str, size_t length); -extern "C" -void (*__gg__console_to_internal_cm)(char * const str, size_t length); -#define console_to_internal(a, b) ((*__gg__console_to_internal_cm)((a), (b))) - -extern "C" -void __gg__ebcdic_to_ascii(char *str, const size_t length); -extern "C" -void (*__gg__internal_to_ascii)(char *str, size_t length); -#define internal_to_ascii(a, b) ((*__gg__internal_to_ascii)((a), (b))) - extern "C" void __gg__set_internal_codeset(int use_ebcdic); extern "C" void __gg__text_conversion_override(text_device_t device, text_codeset_t codeset); +const char * __gg__encoding_iconv_name( cbl_encoding_t encoding ); +cbl_encoding_t __gg__encoding_iconv_type( const char *name ); + +char * __gg__iconverter(cbl_encoding_t from, + cbl_encoding_t to, + const char *str, + size_t length, + size_t *outlength); + +#define DEFAULT_CHARMAP_SOURCE (iconv_CP1252_e) + +class charmap_t + { + private: + // This is the encoding of this character map + cbl_encoding_t m_encoding; + + enum + { + sign_type_ascii, + sign_type_ebcdic, + } m_numeric_sign_type; + + // This map retains the ASCII-to-encoded value in m_encoding, so that iconv + // need be called but once for each ASCII value. + std::unordered_mapm_map_of_encodings; + + void determine_sign_type() + { + if( mapped_character(ascii_0) & 0x80 ) + { + m_numeric_sign_type = sign_type_ebcdic; + } + else + { + m_numeric_sign_type = sign_type_ascii; + } + } + + public: + explicit charmap_t(cbl_encoding_t e) : m_encoding(e) + { + determine_sign_type(); + } + explicit charmap_t(uint16_t e) : m_encoding(static_cast(e)) + { + determine_sign_type(); + } + + int mapped_character(int ch) + { + // The assumption is that anybody calling this routine is providing + // a single-byte character in the DEFAULT_CHARMAP_SOURCE encoding. We + // return the equivalent character in the m_encoding + int retval; + std::unordered_map::const_iterator it = + m_map_of_encodings.find(ch); + if( it != m_map_of_encodings.end() ) + { + retval = it->second; + } + else + { + retval = 0; + size_t outlength = 0; + const char *mapped = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + m_encoding, + PTRCAST(char, &ch), + 1, + &outlength); + memcpy(&retval, mapped, outlength); + m_map_of_encodings[ch] = retval; + } + return retval; + } + + int decimal_point() + { + return mapped_character(__gg__decimal_point); + } + int decimal_separator() + { + return mapped_character(__gg__decimal_separator); + } + int quote_character() + { + return mapped_character(__gg__quote_character); + } + int low_value_character() + { + return __gg__low_value_character; + } + int high_value_character() + { + return __gg__high_value_character; + } + + int figconst_character(cbl_figconst_t figconst) + { + int const_char = 0; // Head off a compiler warning + switch(figconst) + { + case normal_value_e : + const_char = -1; + break; + case low_value_e : + const_char = low_value_character(); + break; + case zero_value_e : + const_char = mapped_character(ascii_0); + break; + case space_value_e : + const_char = mapped_character(ascii_space); + break; + case quote_value_e : + const_char = quote_character(); + break; + case high_value_e : + const_char = high_value_character(); + break; + case null_value_e: + const_char = '\0'; + break; + default: + abort(); + break; + } + return const_char; + } + + bool + is_digit_negative(int digit) + { + bool retval; + switch(m_numeric_sign_type) + { + case sign_type_ascii: + retval = !!(digit & NUMERIC_DISPLAY_SIGN_BIT_ASCII); + break; + + case sign_type_ebcdic: + retval = !!((~digit) & NUMERIC_DISPLAY_SIGN_BIT_EBCDIC); + break; + } + return retval; + } + + int + set_digit_negative(int digit, bool is_negative) + { + switch(m_numeric_sign_type) + { + case sign_type_ascii: + if( is_negative ) + { + digit |= NUMERIC_DISPLAY_SIGN_BIT_ASCII; + } + else + { + digit &= ~NUMERIC_DISPLAY_SIGN_BIT_ASCII; + } + break; + + case sign_type_ebcdic: + if( is_negative ) + { + digit &= ~NUMERIC_DISPLAY_SIGN_BIT_EBCDIC; + } + else + { + digit |= NUMERIC_DISPLAY_SIGN_BIT_EBCDIC; + } + break; + } + return digit; + } + + bool + is_like_ebcdic() const + { + return m_numeric_sign_type == sign_type_ebcdic; + } + + }; + +charmap_t *__gg__get_charmap(cbl_encoding_t encoding); + #endif \ No newline at end of file diff --git a/libgcobol/common-defs.h b/libgcobol/common-defs.h index 4180a18da08..3e6b5ff9c7a 100644 --- a/libgcobol/common-defs.h +++ b/libgcobol/common-defs.h @@ -35,6 +35,8 @@ #include #include +#include "encodings.h" + #define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) // This constant establishes the maximum number of digits in a fixed point @@ -79,26 +81,8 @@ value is flagged negative by turning on the 0x10 bit, turning the 0xC0 to 0xD0. */ -#define EBCDIC_MINUS (0x60) -#define EBCDIC_PLUS (0x4E) -#define EBCDIC_ZERO (0xF0) -#define EBCDIC_NINE (0xF9) - -#define PACKED_NYBBLE_PLUS 0x0C -#define PACKED_NYBBLE_MINUS 0x0D -#define PACKED_NYBBLE_UNSIGNED 0x0F - #define NUMERIC_DISPLAY_SIGN_BIT_ASCII 0x40 -#define NUMERIC_DISPLAY_SIGN_BIT_EBCDIC 0x10 - -#define NUMERIC_DISPLAY_SIGN_BIT (__gg__ebcdic_codeset_in_use ? \ - NUMERIC_DISPLAY_SIGN_BIT_EBCDIC : \ - NUMERIC_DISPLAY_SIGN_BIT_ASCII) - -#define SEPARATE_PLUS (__gg__ebcdic_codeset_in_use ? EBCDIC_PLUS : '+') -#define SEPARATE_MINUS (__gg__ebcdic_codeset_in_use ? EBCDIC_MINUS : '-') -#define ZONED_ZERO (__gg__ebcdic_codeset_in_use ? EBCDIC_ZERO : '0') -#define ZONE_SIGNED_EBCDIC (0xC0) +#define NUMERIC_DISPLAY_SIGN_BIT_EBCDIC 0x20 #define LEVEL01 (1) #define LEVEL49 (49) @@ -106,7 +90,6 @@ // In the __gg__move_literala() call, we piggyback this bit onto the // cbl_round_t parameter, just to cut down on the number of parameters passed - #define REFER_ALL_BIT 0x80 // Other bits for handling MOVE ALL and so on. @@ -169,7 +152,6 @@ enum cbl_field_type_t { FldSwitch, FldDisplay, FldPointer, - FldBlob, }; @@ -231,7 +213,7 @@ enum cbl_field_attr_t : uint64_t { leading_e = 0x0004000000, // leading sign (signable_e alone means trailing) separate_e = 0x0008000000, // separate sign envar_e = 0x0010000000, // names an environment variable - dnu_1_e = 0x0020000000, // unused: this attribute bit is available + encoded_e = 0x0020000000, // data.initial matches codeset.encoding bool_encoded_e = 0x0040000000, // data.initial is a boolean string hex_encoded_e = 0x0080000000, // data.initial is a hex-encoded string depends_on_e = 0x0100000000, // A group hierachy contains a DEPENDING_ON @@ -264,7 +246,6 @@ enum cbl_figconst_t #define FIGCONST_MASK (figconst_1_e|figconst_2_e|figconst_4_e) #define DATASECT_MASK (linkage_e | local_e) - enum cbl_file_org_t { file_disorganized_e, file_sequential_e, @@ -370,13 +351,6 @@ enum cbl_arith_format_t { no_giving_e, giving_e, corresponding_e }; -enum cbl_encoding_t { - ASCII_e, // STANDARD-1 (in caps to avoid conflict with ascii_e in libgcobol.cc) - iso646_e, // STANDARD-2 - EBCDIC_e, // NATIVE or EBCDIC - custom_encoding_e, -}; - enum cbl_truncation_mode { trunc_std_e, trunc_opt_e, diff --git a/libgcobol/constants.cc b/libgcobol/constants.cc index b5567ed7dc1..1715db47c80 100644 --- a/libgcobol/constants.cc +++ b/libgcobol/constants.cc @@ -107,7 +107,8 @@ struct cblc_field_t __ggsr___2_##a = { \ .level = 0 , \ .digits = 0 , \ .rdigits = 0 , \ - .dummy = 0 , \ + .encoding = iconv_CP1252_e \ + .alphabet = 0 \ }; unsigned char __gg__data_space[1] = {' '}; @@ -122,12 +123,13 @@ struct cblc_field_t __ggsr__space = { .parent = NULL, .occurs_lower = 0 , .occurs_upper = 0 , - .attr = 0x284 , + .attr = quoted_e | constant_e | space_value_e , .type = FldAlphanumeric , .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__spaces = { @@ -141,12 +143,13 @@ struct cblc_field_t __ggsr__spaces = { .parent = NULL, .occurs_lower = 0 , .occurs_upper = 0 , - .attr = 0x284 , + .attr = quoted_e | constant_e | space_value_e , .type = FldAlphanumeric , .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_low_values[1] = {'\0'}; @@ -166,7 +169,8 @@ struct cblc_field_t __ggsr__low_values = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_zeros[1] = {'0'}; @@ -186,7 +190,8 @@ struct cblc_field_t __ggsr__zeros = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_high_values[1] = {0xFF}; @@ -206,7 +211,8 @@ struct cblc_field_t __ggsr__high_values = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_quotes[1] = {0xFF}; @@ -226,7 +232,8 @@ struct cblc_field_t __ggsr__quotes = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_nulls[8] = {0,0,0,0,0,0,0,0}; @@ -246,7 +253,8 @@ struct cblc_field_t __ggsr__nulls = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data__file_status[2] = {0,0}; @@ -266,7 +274,8 @@ struct cblc_field_t __ggsr___file_status = { .level = 0 , .digits = 2 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; @@ -287,7 +296,8 @@ struct cblc_field_t __ggsr___14_linage_counter6 = { .level = 0 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; @@ -308,7 +318,8 @@ struct cblc_field_t __ggsr__upsi_0 = { .level = 0 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; short __gg__data_return_code = 0; @@ -328,7 +339,8 @@ struct cblc_field_t __ggsr__return_code = { .level = 0 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg___data_dev_stdin[] = "/dev/stdin"; @@ -348,7 +360,8 @@ struct cblc_field_t __ggsr___dev_stdin = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg___data_dev_stdout[] = "/dev/stdout"; @@ -368,7 +381,8 @@ struct cblc_field_t __ggsr___dev_stdout = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg___data_dev_stderr[] = "/dev/stderr"; @@ -388,7 +402,8 @@ struct cblc_field_t __ggsr___dev_stderr = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg___data_dev_null[] = "/dev/null"; @@ -408,7 +423,8 @@ struct cblc_field_t __ggsr___dev_null = { .level = 0 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_tally[] = {0,0}; @@ -428,7 +444,8 @@ struct cblc_field_t __ggsr__tally = { .level = 0 , .digits = 5 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; unsigned char __gg__data_argi[] = {0,0}; @@ -448,9 +465,26 @@ struct cblc_field_t __ggsr__argi = { .level = 0 , .digits = 5 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; +/** + +Special registers used by the XML parser +Special register Implicit definition and usage Content + +XML-EVENT PICTURE X(30) USAGE DISPLAY VALUE SPACE *> name of XML event +XML-CODE PICTURE S9(9) USAGE BINARY VALUE ZERO *> status of XML event +XML-TEXT Variable-length alphanumeric item +XML-NTEXT Variable-length national item + + + +**/ + + + /* The following defines storage for the global DEBUG-ITEM: 01 DEBUG-ITEM. @@ -491,7 +525,8 @@ struct cblc_field_t __ggsr__debug_item = { .level = 01 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_line = { @@ -510,7 +545,8 @@ struct cblc_field_t __ggsr__debug_line = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_filler_1 = { @@ -529,7 +565,8 @@ struct cblc_field_t __ggsr__debug_filler_1 = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_name = { @@ -548,7 +585,8 @@ struct cblc_field_t __ggsr__debug_name = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_filler_2 = { @@ -567,7 +605,8 @@ struct cblc_field_t __ggsr__debug_filler_2 = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_sub_1 = { @@ -586,7 +625,8 @@ struct cblc_field_t __ggsr__debug_sub_1 = { .level = 05 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_filler_3 = { @@ -605,7 +645,8 @@ struct cblc_field_t __ggsr__debug_filler_3 = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_sub_2 = { @@ -624,7 +665,8 @@ struct cblc_field_t __ggsr__debug_sub_2 = { .level = 05 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_filler_4 = { @@ -643,7 +685,8 @@ struct cblc_field_t __ggsr__debug_filler_4 = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_sub_3 = { @@ -662,7 +705,8 @@ struct cblc_field_t __ggsr__debug_sub_3 = { .level = 05 , .digits = 4 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_filler_5 = { @@ -681,7 +725,8 @@ struct cblc_field_t __ggsr__debug_filler_5 = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; struct cblc_field_t __ggsr__debug_contents = { @@ -700,7 +745,8 @@ struct cblc_field_t __ggsr__debug_contents = { .level = 05 , .digits = 0 , .rdigits = 0 , - .dummy = 0 , + .encoding = iconv_CP1252_e , + .alphabet = 0 , }; #pragma GCC diagnostic pop diff --git a/libgcobol/encodings.h b/libgcobol/encodings.h new file mode 100644 index 00000000000..dfa4b671e3d --- /dev/null +++ b/libgcobol/encodings.h @@ -0,0 +1,1209 @@ + /* + * Copyright (c) 2021_2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ENCODINGS_H_ +#define _ENCODINGS_H_ + +enum cbl_encoding_t { + no_encoding_e, + custom_encoding_e, + iconv_1026_e, + iconv_1046_e, + iconv_1047_e, + iconv_10646_1_1993_e, + iconv_437_e, + iconv_500V1_e, + iconv_500_e, + iconv_850_e, + iconv_851_e, + iconv_852_e, + iconv_855_e, + iconv_856_e, + iconv_857_e, + iconv_858_e, + iconv_860_e, + iconv_861_e, + iconv_862_e, + iconv_863_e, + iconv_864_e, + iconv_865_e, + iconv_866NAV_e, + iconv_866_e, + iconv_869_e, + iconv_874_e, + iconv_8859_1_e, + iconv_8859_2_e, + iconv_8859_3_e, + iconv_8859_4_e, + iconv_8859_5_e, + iconv_8859_6_e, + iconv_8859_7_e, + iconv_8859_8_e, + iconv_8859_9_e, + iconv_904_e, + iconv_ANSI_X3_110_1983_e, + iconv_ANSI_X3_110_e, + iconv_ANSI_X3_4_1968_e, + iconv_ANSI_X3_4_1986_e, + iconv_ANSI_X3_4_e, + iconv_ARABIC7_e, + iconv_ARABIC_e, + iconv_ARMSCII8_e, + iconv_ARMSCII_8_e, + iconv_ASCII_e, // STANDARD_1 (beware of ascii_e in libgcobol.cc) + iconv_ASMO_449_e, + iconv_ASMO_708_e, + iconv_BALTIC_e, + iconv_BIG5HKSCS_e, + iconv_BIG5_HKSCS_e, + iconv_BIG5_e, + iconv_BIGFIVE_e, + iconv_BIG_5_e, + iconv_BIG_FIVE_e, + iconv_BRF_e, + iconv_BS_4730_e, + iconv_CA_e, + iconv_CN_BIG5_e, + iconv_CN_GB_e, + iconv_CN_e, + iconv_CP037_e, + iconv_CP038_e, + iconv_CP10007_e, + iconv_CP1004_e, + iconv_CP1008_e, + iconv_CP1025_e, + iconv_CP1026_e, + iconv_CP1046_e, + iconv_CP1047_e, + iconv_CP1070_e, + iconv_CP1079_e, + iconv_CP1081_e, + iconv_CP1084_e, + iconv_CP1089_e, + iconv_CP1097_e, + iconv_CP1112_e, + iconv_CP1122_e, + iconv_CP1123_e, + iconv_CP1124_e, + iconv_CP1125_e, + iconv_CP1129_e, + iconv_CP1130_e, + iconv_CP1132_e, + iconv_CP1133_e, + iconv_CP1137_e, + iconv_CP1140_e, + iconv_CP1141_e, + iconv_CP1142_e, + iconv_CP1143_e, + iconv_CP1144_e, + iconv_CP1145_e, + iconv_CP1146_e, + iconv_CP1147_e, + iconv_CP1148_e, + iconv_CP1149_e, + iconv_CP1153_e, + iconv_CP1154_e, + iconv_CP1155_e, + iconv_CP1156_e, + iconv_CP1157_e, + iconv_CP1158_e, + iconv_CP1160_e, + iconv_CP1161_e, + iconv_CP1162_e, + iconv_CP1163_e, + iconv_CP1164_e, + iconv_CP1166_e, + iconv_CP1167_e, + iconv_CP1250_e, + iconv_CP1251_e, + iconv_CP1252_e, + iconv_CP1253_e, + iconv_CP1254_e, + iconv_CP1255_e, + iconv_CP1256_e, + iconv_CP1257_e, + iconv_CP1258_e, + iconv_CP12712_e, + iconv_CP1282_e, + iconv_CP1361_e, + iconv_CP1364_e, + iconv_CP1371_e, + iconv_CP1388_e, + iconv_CP1390_e, + iconv_CP1399_e, + iconv_CP16804_e, + iconv_CP273_e, + iconv_CP274_e, + iconv_CP275_e, + iconv_CP278_e, + iconv_CP280_e, + iconv_CP281_e, + iconv_CP282_e, + iconv_CP284_e, + iconv_CP285_e, + iconv_CP290_e, + iconv_CP297_e, + iconv_CP367_e, + iconv_CP420_e, + iconv_CP423_e, + iconv_CP424_e, + iconv_CP437_e, + iconv_CP4517_e, + iconv_CP4899_e, + iconv_CP4909_e, + iconv_CP4971_e, + iconv_CP500_e, + iconv_CP5347_e, + iconv_CP737_e, + iconv_CP770_e, + iconv_CP771_e, + iconv_CP772_e, + iconv_CP773_e, + iconv_CP774_e, + iconv_CP775_e, + iconv_CP803_e, + iconv_CP813_e, + iconv_CP819_e, + iconv_CP850_e, + iconv_CP851_e, + iconv_CP852_e, + iconv_CP855_e, + iconv_CP856_e, + iconv_CP857_e, + iconv_CP858_e, + iconv_CP860_e, + iconv_CP861_e, + iconv_CP862_e, + iconv_CP863_e, + iconv_CP864_e, + iconv_CP865_e, + iconv_CP866NAV_e, + iconv_CP866_e, + iconv_CP868_e, + iconv_CP869_e, + iconv_CP870_e, + iconv_CP871_e, + iconv_CP874_e, + iconv_CP875_e, + iconv_CP880_e, + iconv_CP891_e, + iconv_CP901_e, + iconv_CP902_e, + iconv_CP9030_e, + iconv_CP903_e, + iconv_CP904_e, + iconv_CP905_e, + iconv_CP9066_e, + iconv_CP912_e, + iconv_CP915_e, + iconv_CP916_e, + iconv_CP918_e, + iconv_CP920_e, + iconv_CP921_e, + iconv_CP922_e, + iconv_CP930_e, + iconv_CP932_e, + iconv_CP933_e, + iconv_CP935_e, + iconv_CP936_e, + iconv_CP937_e, + iconv_CP939_e, + iconv_CP9448_e, + iconv_CP949_e, + iconv_CP950_e, + iconv_CPIBM861_e, + iconv_CP_AR_e, + iconv_CP_GR_e, + iconv_CP_HU_e, + iconv_CSA7_1_e, + iconv_CSA7_2_e, + iconv_CSASCII_e, + iconv_CSA_T500_1983_e, + iconv_CSA_T500_e, + iconv_CSA_Z243_419851_e, + iconv_CSA_Z243_419852_e, + iconv_CSA_Z243_4_1985_1_e, + iconv_CSA_Z243_4_1985_2_e, + iconv_CSDECMCS_e, + iconv_CSEBCDICATDEA_e, + iconv_CSEBCDICATDE_e, + iconv_CSEBCDICCAFR_e, + iconv_CSEBCDICDKNOA_e, + iconv_CSEBCDICDKNO_e, + iconv_CSEBCDICESA_e, + iconv_CSEBCDICESS_e, + iconv_CSEBCDICES_e, + iconv_CSEBCDICFISEA_e, + iconv_CSEBCDICFISE_e, + iconv_CSEBCDICFR_e, + iconv_CSEBCDICIT_e, + iconv_CSEBCDICPT_e, + iconv_CSEBCDICUK_e, + iconv_CSEBCDICUS_e, + iconv_CSEUCKR_e, + iconv_CSEUCPKDFMTJAPANESE_e, + iconv_CSGB2312_e, + iconv_CSHPROMAN8_e, + iconv_CSIBM037_e, + iconv_CSIBM038_e, + iconv_CSIBM1008_e, + iconv_CSIBM1025_e, + iconv_CSIBM1026_e, + iconv_CSIBM1097_e, + iconv_CSIBM1112_e, + iconv_CSIBM1122_e, + iconv_CSIBM1123_e, + iconv_CSIBM1124_e, + iconv_CSIBM1129_e, + iconv_CSIBM1130_e, + iconv_CSIBM1132_e, + iconv_CSIBM1133_e, + iconv_CSIBM1137_e, + iconv_CSIBM1140_e, + iconv_CSIBM1141_e, + iconv_CSIBM1142_e, + iconv_CSIBM1143_e, + iconv_CSIBM1144_e, + iconv_CSIBM1145_e, + iconv_CSIBM1146_e, + iconv_CSIBM1147_e, + iconv_CSIBM1148_e, + iconv_CSIBM1149_e, + iconv_CSIBM1153_e, + iconv_CSIBM1154_e, + iconv_CSIBM1155_e, + iconv_CSIBM1156_e, + iconv_CSIBM1157_e, + iconv_CSIBM1158_e, + iconv_CSIBM1160_e, + iconv_CSIBM1161_e, + iconv_CSIBM11621162_e, + iconv_CSIBM1163_e, + iconv_CSIBM1164_e, + iconv_CSIBM1166_e, + iconv_CSIBM1167_e, + iconv_CSIBM12712_e, + iconv_CSIBM1364_e, + iconv_CSIBM1371_e, + iconv_CSIBM1388_e, + iconv_CSIBM1390_e, + iconv_CSIBM1399_e, + iconv_CSIBM16804_e, + iconv_CSIBM273_e, + iconv_CSIBM274_e, + iconv_CSIBM275_e, + iconv_CSIBM277_e, + iconv_CSIBM278_e, + iconv_CSIBM280_e, + iconv_CSIBM281_e, + iconv_CSIBM284_e, + iconv_CSIBM285_e, + iconv_CSIBM290_e, + iconv_CSIBM297_e, + iconv_CSIBM420_e, + iconv_CSIBM423_e, + iconv_CSIBM424_e, + iconv_CSIBM4517_e, + iconv_CSIBM4899_e, + iconv_CSIBM4909_e, + iconv_CSIBM4971_e, + iconv_CSIBM500_e, + iconv_CSIBM5347_e, + iconv_CSIBM803_e, + iconv_CSIBM851_e, + iconv_CSIBM855_e, + iconv_CSIBM856_e, + iconv_CSIBM857_e, + iconv_CSIBM860_e, + iconv_CSIBM863_e, + iconv_CSIBM864_e, + iconv_CSIBM865_e, + iconv_CSIBM866_e, + iconv_CSIBM868_e, + iconv_CSIBM869_e, + iconv_CSIBM870_e, + iconv_CSIBM871_e, + iconv_CSIBM880_e, + iconv_CSIBM891_e, + iconv_CSIBM901_e, + iconv_CSIBM902_e, + iconv_CSIBM9030_e, + iconv_CSIBM903_e, + iconv_CSIBM904_e, + iconv_CSIBM905_e, + iconv_CSIBM9066_e, + iconv_CSIBM918_e, + iconv_CSIBM921_e, + iconv_CSIBM922_e, + iconv_CSIBM930_e, + iconv_CSIBM932_e, + iconv_CSIBM933_e, + iconv_CSIBM935_e, + iconv_CSIBM937_e, + iconv_CSIBM939_e, + iconv_CSIBM943_e, + iconv_CSIBM9448_e, + iconv_CSISO10367BOX_e, + iconv_CSISO103T618BIT_e, + iconv_CSISO10SWEDISH_e, + iconv_CSISO111ECMACYRILLIC_e, + iconv_CSISO11SWEDISHFORNAMES_e, + iconv_CSISO121CANADIAN1_e, + iconv_CSISO122CANADIAN2_e, + iconv_CSISO139CSN369103_e, + iconv_CSISO141JUSIB1002_e, + iconv_CSISO143IECP271_e, + iconv_CSISO14JISC6220RO_e, + iconv_CSISO150GREEKCCITT_e, + iconv_CSISO150_e, + iconv_CSISO151CUBA_e, + iconv_CSISO153GOST1976874_e, + iconv_CSISO15ITALIAN_e, + iconv_CSISO16PORTUGESE_e, + iconv_CSISO17SPANISH_e, + iconv_CSISO18GREEK7OLD_e, + iconv_CSISO19LATINGREEK_e, + iconv_CSISO2022CN_e, + iconv_CSISO2022JP2_e, + iconv_CSISO2022JP_e, + iconv_CSISO2022KR_e, + iconv_CSISO2033_e, + iconv_CSISO21GERMAN_e, + iconv_CSISO25FRENCH_e, + iconv_CSISO27LATINGREEK1_e, + iconv_CSISO49INIS_e, + iconv_CSISO4UNITEDKINGDOM_e, + iconv_CSISO50INIS8_e, + iconv_CSISO51INISCYRILLIC_e, + iconv_CSISO5427CYRILLIC1981_e, + iconv_CSISO5427CYRILLIC_e, + iconv_CSISO5428GREEK_e, + iconv_CSISO58GB1988_e, + iconv_CSISO60DANISHNORWEGIAN_e, + iconv_CSISO60NORWEGIAN1_e, + iconv_CSISO61NORWEGIAN2_e, + iconv_CSISO646DANISH_e, + iconv_CSISO69FRENCH_e, + iconv_CSISO84PORTUGUESE2_e, + iconv_CSISO85SPANISH2_e, + iconv_CSISO86HUNGARIAN_e, + iconv_CSISO88GREEK7_e, + iconv_CSISO89ASMO449_e, + iconv_CSISO90_e, + iconv_CSISO92JISC62991984B_e, + iconv_CSISO99NAPLPS_e, + iconv_CSISOLATIN1_e, + iconv_CSISOLATIN2_e, + iconv_CSISOLATIN3_e, + iconv_CSISOLATIN4_e, + iconv_CSISOLATIN5_e, + iconv_CSISOLATIN6_e, + iconv_CSISOLATINARABIC_e, + iconv_CSISOLATINCYRILLIC_e, + iconv_CSISOLATINGREEK_e, + iconv_CSISOLATINHEBREW_e, + iconv_CSKOI8R_e, + iconv_CSKSC5636_e, + iconv_CSMACINTOSH_e, + iconv_CSNATSDANO_e, + iconv_CSNATSSEFI_e, + iconv_CSN_369103_e, + iconv_CSPC775BALTIC_e, + iconv_CSPC850MULTILINGUAL_e, + iconv_CSPC858MULTILINGUAL_e, + iconv_CSPC862LATINHEBREW_e, + iconv_CSPC8CODEPAGE437_e, + iconv_CSPCP852_e, + iconv_CSSHIFTJIS_e, + iconv_CSUCS4_e, + iconv_CSUNICODE_e, + iconv_CSWINDOWS31J_e, + iconv_CUBA_e, + iconv_CWI_2_e, + iconv_CWI_e, + iconv_CYRILLIC_e, + iconv_DECMCS_e, + iconv_DEC_MCS_e, + iconv_DEC_e, + iconv_DE_e, + iconv_DIN_66003_e, + iconv_DK_e, + iconv_DS2089_e, + iconv_DS_2089_e, + iconv_E13B_e, + iconv_EBCDICATDEA_e, + iconv_EBCDICATDE_e, + iconv_EBCDICCAFR_e, + iconv_EBCDICDKNOA_e, + iconv_EBCDICDKNO_e, + iconv_EBCDICESA_e, + iconv_EBCDICESS_e, + iconv_EBCDICES_e, + iconv_EBCDICFISEA_e, + iconv_EBCDICFISE_e, + iconv_EBCDICFR_e, + iconv_EBCDICISFRISS_e, + iconv_EBCDICIT_e, + iconv_EBCDICPT_e, + iconv_EBCDICUK_e, + iconv_EBCDICUS_e, // NATIVE or EBCDIC + iconv_EBCDIC_AT_DE_A_e, + iconv_EBCDIC_AT_DE_e, + iconv_EBCDIC_BE_e, + iconv_EBCDIC_BR_e, + iconv_EBCDIC_CA_FR_e, + iconv_EBCDIC_CP_AR1_e, + iconv_EBCDIC_CP_AR2_e, + iconv_EBCDIC_CP_BE_e, + iconv_EBCDIC_CP_CA_e, + iconv_EBCDIC_CP_CH_e, + iconv_EBCDIC_CP_DK_e, + iconv_EBCDIC_CP_ES_e, + iconv_EBCDIC_CP_FI_e, + iconv_EBCDIC_CP_FR_e, + iconv_EBCDIC_CP_GB_e, + iconv_EBCDIC_CP_GR_e, + iconv_EBCDIC_CP_HE_e, + iconv_EBCDIC_CP_IS_e, + iconv_EBCDIC_CP_IT_e, + iconv_EBCDIC_CP_NL_e, + iconv_EBCDIC_CP_NO_e, + iconv_EBCDIC_CP_ROECE_e, + iconv_EBCDIC_CP_SE_e, + iconv_EBCDIC_CP_TR_e, + iconv_EBCDIC_CP_US_e, + iconv_EBCDIC_CP_WT_e, + iconv_EBCDIC_CP_YU_e, + iconv_EBCDIC_CYRILLIC_e, + iconv_EBCDIC_DK_NO_A_e, + iconv_EBCDIC_DK_NO_e, + iconv_EBCDIC_ES_A_e, + iconv_EBCDIC_ES_S_e, + iconv_EBCDIC_ES_e, + iconv_EBCDIC_FI_SE_A_e, + iconv_EBCDIC_FI_SE_e, + iconv_EBCDIC_FR_e, + iconv_EBCDIC_GREEK_e, + iconv_EBCDIC_INT1_e, + iconv_EBCDIC_INT_e, + iconv_EBCDIC_IS_FRISS_e, + iconv_EBCDIC_IT_e, + iconv_EBCDIC_JP_E_e, + iconv_EBCDIC_JP_KANA_e, + iconv_EBCDIC_PT_e, + iconv_EBCDIC_UK_e, + iconv_EBCDIC_US_e, + iconv_ECMACYRILLIC_e, + iconv_ECMA_114_e, + iconv_ECMA_118_e, + iconv_ECMA_128_e, + iconv_ECMA_CYRILLIC_e, + iconv_ELOT_928_e, + iconv_ES2_e, + iconv_ES_e, + iconv_EUCCN_e, + iconv_EUCJP_MS_e, + iconv_EUCJP_OPEN_e, + iconv_EUCJP_WIN_e, + iconv_EUCJP_e, + iconv_EUCKR_e, + iconv_EUCTW_e, + iconv_EUC_CN_e, + iconv_EUC_JISX0213_e, + iconv_EUC_JP_MS_e, + iconv_EUC_JP_e, + iconv_EUC_KR_e, + iconv_EUC_TW_e, + iconv_FI_e, + iconv_FR_e, + iconv_GB13000_e, + iconv_GB18030_e, + iconv_GB2312_e, + iconv_GBK_e, + iconv_GB_198880_e, + iconv_GB_1988_80_e, + iconv_GB_e, + iconv_GEORGIAN_ACADEMY_e, + iconv_GEORGIAN_PS_e, + iconv_GOST_1976874_e, + iconv_GOST_19768_74_e, + iconv_GOST_19768_e, + iconv_GREEK7OLD_e, + iconv_GREEK7_OLD_e, + iconv_GREEK7_e, + iconv_GREEK8_e, + iconv_GREEKCCITT_e, + iconv_GREEK_CCITT_e, + iconv_GREEK_e, + iconv_HEBREW_e, + iconv_HPGREEK8_e, + iconv_HPROMAN8_e, + iconv_HPROMAN9_e, + iconv_HPTHAI8_e, + iconv_HPTURKISH8_e, + iconv_HP_GREEK8_e, + iconv_HP_ROMAN8_e, + iconv_HP_ROMAN9_e, + iconv_HP_THAI8_e, + iconv_HP_TURKISH8_e, + iconv_HU_e, + iconv_IBM037_e, + iconv_IBM038_e, + iconv_IBM1004_e, + iconv_IBM1008_e, + iconv_IBM1025_e, + iconv_IBM1026_e, + iconv_IBM1046_e, + iconv_IBM1047_e, + iconv_IBM1089_e, + iconv_IBM1097_e, + iconv_IBM1112_e, + iconv_IBM1122_e, + iconv_IBM1123_e, + iconv_IBM1124_e, + iconv_IBM1129_e, + iconv_IBM1130_e, + iconv_IBM1132_e, + iconv_IBM1133_e, + iconv_IBM1137_e, + iconv_IBM1140_e, + iconv_IBM1141_e, + iconv_IBM1142_e, + iconv_IBM1143_e, + iconv_IBM1144_e, + iconv_IBM1145_e, + iconv_IBM1146_e, + iconv_IBM1147_e, + iconv_IBM1148_e, + iconv_IBM1149_e, + iconv_IBM1153_e, + iconv_IBM1154_e, + iconv_IBM1155_e, + iconv_IBM1156_e, + iconv_IBM1157_e, + iconv_IBM1158_e, + iconv_IBM1160_e, + iconv_IBM1161_e, + iconv_IBM1162_e, + iconv_IBM1163_e, + iconv_IBM1164_e, + iconv_IBM1166_e, + iconv_IBM1167_e, + iconv_IBM12712_e, + iconv_IBM1364_e, + iconv_IBM1371_e, + iconv_IBM1388_e, + iconv_IBM1390_e, + iconv_IBM1399_e, + iconv_IBM16804_e, + iconv_IBM256_e, + iconv_IBM273_e, + iconv_IBM274_e, + iconv_IBM275_e, + iconv_IBM277_e, + iconv_IBM278_e, + iconv_IBM280_e, + iconv_IBM281_e, + iconv_IBM284_e, + iconv_IBM285_e, + iconv_IBM290_e, + iconv_IBM297_e, + iconv_IBM367_e, + iconv_IBM420_e, + iconv_IBM423_e, + iconv_IBM424_e, + iconv_IBM437_e, + iconv_IBM4517_e, + iconv_IBM4899_e, + iconv_IBM4909_e, + iconv_IBM4971_e, + iconv_IBM500_e, + iconv_IBM5347_e, + iconv_IBM775_e, + iconv_IBM803_e, + iconv_IBM813_e, + iconv_IBM819_e, + iconv_IBM848_e, + iconv_IBM850_e, + iconv_IBM851_e, + iconv_IBM852_e, + iconv_IBM855_e, + iconv_IBM856_e, + iconv_IBM857_e, + iconv_IBM858_e, + iconv_IBM860_e, + iconv_IBM861_e, + iconv_IBM862_e, + iconv_IBM863_e, + iconv_IBM864_e, + iconv_IBM865_e, + iconv_IBM866NAV_e, + iconv_IBM866_e, + iconv_IBM868_e, + iconv_IBM869_e, + iconv_IBM870_e, + iconv_IBM871_e, + iconv_IBM874_e, + iconv_IBM875_e, + iconv_IBM880_e, + iconv_IBM891_e, + iconv_IBM901_e, + iconv_IBM902_e, + iconv_IBM9030_e, + iconv_IBM903_e, + iconv_IBM904_e, + iconv_IBM905_e, + iconv_IBM9066_e, + iconv_IBM912_e, + iconv_IBM915_e, + iconv_IBM916_e, + iconv_IBM918_e, + iconv_IBM920_e, + iconv_IBM921_e, + iconv_IBM922_e, + iconv_IBM930_e, + iconv_IBM932_e, + iconv_IBM933_e, + iconv_IBM935_e, + iconv_IBM937_e, + iconv_IBM939_e, + iconv_IBM943_e, + iconv_IBM9448_e, + iconv_IBM_1008_e, + iconv_IBM_1025_e, + iconv_IBM_1046_e, + iconv_IBM_1047_e, + iconv_IBM_1097_e, + iconv_IBM_1112_e, + iconv_IBM_1122_e, + iconv_IBM_1123_e, + iconv_IBM_1124_e, + iconv_IBM_1129_e, + iconv_IBM_1130_e, + iconv_IBM_1132_e, + iconv_IBM_1133_e, + iconv_IBM_1137_e, + iconv_IBM_1140_e, + iconv_IBM_1141_e, + iconv_IBM_1142_e, + iconv_IBM_1143_e, + iconv_IBM_1144_e, + iconv_IBM_1145_e, + iconv_IBM_1146_e, + iconv_IBM_1147_e, + iconv_IBM_1148_e, + iconv_IBM_1149_e, + iconv_IBM_1153_e, + iconv_IBM_1154_e, + iconv_IBM_1155_e, + iconv_IBM_1156_e, + iconv_IBM_1157_e, + iconv_IBM_1158_e, + iconv_IBM_1160_e, + iconv_IBM_1161_e, + iconv_IBM_1162_e, + iconv_IBM_1163_e, + iconv_IBM_1164_e, + iconv_IBM_1166_e, + iconv_IBM_1167_e, + iconv_IBM_12712_e, + iconv_IBM_1364_e, + iconv_IBM_1371_e, + iconv_IBM_1388_e, + iconv_IBM_1390_e, + iconv_IBM_1399_e, + iconv_IBM_16804_e, + iconv_IBM_4517_e, + iconv_IBM_4899_e, + iconv_IBM_4909_e, + iconv_IBM_4971_e, + iconv_IBM_5347_e, + iconv_IBM_803_e, + iconv_IBM_856_e, + iconv_IBM_901_e, + iconv_IBM_902_e, + iconv_IBM_9030_e, + iconv_IBM_9066_e, + iconv_IBM_921_e, + iconv_IBM_922_e, + iconv_IBM_930_e, + iconv_IBM_932_e, + iconv_IBM_933_e, + iconv_IBM_935_e, + iconv_IBM_937_e, + iconv_IBM_939_e, + iconv_IBM_943_e, + iconv_IBM_9448_e, + iconv_IEC_P271_e, + iconv_IEC_P27_1_e, + iconv_INIS8_e, + iconv_INISCYRILLIC_e, + iconv_INIS_8_e, + iconv_INIS_CYRILLIC_e, + iconv_INIS_e, + iconv_ISIRI3342_e, + iconv_ISIRI_3342_e, + iconv_ISO11548_1_e, + iconv_ISO2022CNEXT_e, + iconv_ISO2022CN_e, + iconv_ISO2022JP2_e, + iconv_ISO2022JP_e, + iconv_ISO2022KR_e, + iconv_ISO646_CA2_e, + iconv_ISO646_CA_e, + iconv_ISO646_CN_e, + iconv_ISO646_CU_e, + iconv_ISO646_DE_e, + iconv_ISO646_DK_e, + iconv_ISO646_ES2_e, + iconv_ISO646_ES_e, + iconv_ISO646_FI_e, + iconv_ISO646_FR1_e, + iconv_ISO646_FR_e, + iconv_ISO646_GB_e, + iconv_ISO646_HU_e, + iconv_ISO646_IT_e, + iconv_ISO646_JP_OCR_B_e, + iconv_ISO646_JP_e, + iconv_ISO646_KR_e, + iconv_ISO646_NO2_e, + iconv_ISO646_NO_e, + iconv_ISO646_PT2_e, + iconv_ISO646_PT_e, + iconv_ISO646_SE2_e, + iconv_ISO646_SE_e, + iconv_ISO646_US_e, + iconv_ISO646_YU_e, + iconv_ISO6937_e, + iconv_ISO885910_e, + iconv_ISO885911_e, + iconv_ISO885913_e, + iconv_ISO885914_e, + iconv_ISO885915_e, + iconv_ISO885916_e, + iconv_ISO88591_e, + iconv_ISO88592_e, + iconv_ISO88593_e, + iconv_ISO88594_e, + iconv_ISO88595_e, + iconv_ISO88596_e, + iconv_ISO88597_e, + iconv_ISO88598_e, + iconv_ISO88599E_e, + iconv_ISO88599_e, + iconv_ISO8859_10_e, + iconv_ISO8859_11_e, + iconv_ISO8859_13_e, + iconv_ISO8859_14_e, + iconv_ISO8859_15_e, + iconv_ISO8859_16_e, + iconv_ISO8859_1_e, + iconv_ISO8859_2_e, + iconv_ISO8859_3_e, + iconv_ISO8859_4_e, + iconv_ISO8859_5_e, + iconv_ISO8859_6_e, + iconv_ISO8859_7_e, + iconv_ISO8859_8_e, + iconv_ISO8859_9E_e, + iconv_ISO8859_9_e, + iconv_ISO_10367BOX_e, + iconv_ISO_10367_BOX_e, + iconv_ISO_10646_e, // STANDARD_2 + iconv_ISO_11548_1_e, + iconv_ISO_2022_CN_EXT_e, + iconv_ISO_2022_CN_e, + iconv_ISO_2022_JP_2_e, + iconv_ISO_2022_JP_3_e, + iconv_ISO_2022_JP_e, + iconv_ISO_2022_KR_e, + iconv_ISO_2033_1983_e, + iconv_ISO_2033_e, + iconv_ISO_5427EXT_e, + iconv_ISO_5427_1981_e, + iconv_ISO_5427_EXT_e, + iconv_ISO_5427_e, + iconv_ISO_5428_1980_e, + iconv_ISO_5428_e, + iconv_ISO_646_IRV_1991_e, + iconv_ISO_69372_e, + iconv_ISO_6937_1992_e, + iconv_ISO_6937_2_1983_e, + iconv_ISO_6937_2_e, + iconv_ISO_6937_e, + iconv_ISO_8859_10_1992_e, + iconv_ISO_8859_10_e, + iconv_ISO_8859_11_e, + iconv_ISO_8859_13_e, + iconv_ISO_8859_14_1998_e, + iconv_ISO_8859_14_e, + iconv_ISO_8859_15_1998_e, + iconv_ISO_8859_15_e, + iconv_ISO_8859_16_2001_e, + iconv_ISO_8859_16_e, + iconv_ISO_8859_1_1987_e, + iconv_ISO_8859_1_e, + iconv_ISO_8859_2_1987_e, + iconv_ISO_8859_2_e, + iconv_ISO_8859_3_1988_e, + iconv_ISO_8859_3_e, + iconv_ISO_8859_4_1988_e, + iconv_ISO_8859_4_e, + iconv_ISO_8859_5_1988_e, + iconv_ISO_8859_5_e, + iconv_ISO_8859_6_1987_e, + iconv_ISO_8859_6_e, + iconv_ISO_8859_7_1987_e, + iconv_ISO_8859_7_2003_e, + iconv_ISO_8859_7_e, + iconv_ISO_8859_8_1988_e, + iconv_ISO_8859_8_e, + iconv_ISO_8859_9E_e, + iconv_ISO_8859_9_1989_e, + iconv_ISO_8859_9_e, + iconv_ISO_9036_e, + iconv_ISO_CELTIC_e, + iconv_ISO_IR_100_e, + iconv_ISO_IR_101_e, + iconv_ISO_IR_103_e, + iconv_ISO_IR_109_e, + iconv_ISO_IR_10_e, + iconv_ISO_IR_110_e, + iconv_ISO_IR_111_e, + iconv_ISO_IR_11_e, + iconv_ISO_IR_121_e, + iconv_ISO_IR_122_e, + iconv_ISO_IR_126_e, + iconv_ISO_IR_127_e, + iconv_ISO_IR_138_e, + iconv_ISO_IR_139_e, + iconv_ISO_IR_141_e, + iconv_ISO_IR_143_e, + iconv_ISO_IR_144_e, + iconv_ISO_IR_148_e, + iconv_ISO_IR_14_e, + iconv_ISO_IR_150_e, + iconv_ISO_IR_151_e, + iconv_ISO_IR_153_e, + iconv_ISO_IR_155_e, + iconv_ISO_IR_156_e, + iconv_ISO_IR_157_e, + iconv_ISO_IR_15_e, + iconv_ISO_IR_166_e, + iconv_ISO_IR_16_e, + iconv_ISO_IR_179_e, + iconv_ISO_IR_17_e, + iconv_ISO_IR_18_e, + iconv_ISO_IR_193_e, + iconv_ISO_IR_197_e, + iconv_ISO_IR_199_e, + iconv_ISO_IR_19_e, + iconv_ISO_IR_203_e, + iconv_ISO_IR_209_e, + iconv_ISO_IR_21_e, + iconv_ISO_IR_226_e, + iconv_ISO_IR_25_e, + iconv_ISO_IR_27_e, + iconv_ISO_IR_37_e, + iconv_ISO_IR_49_e, + iconv_ISO_IR_4_e, + iconv_ISO_IR_50_e, + iconv_ISO_IR_51_e, + iconv_ISO_IR_54_e, + iconv_ISO_IR_55_e, + iconv_ISO_IR_57_e, + iconv_ISO_IR_60_e, + iconv_ISO_IR_61_e, + iconv_ISO_IR_69_e, + iconv_ISO_IR_6_e, + iconv_ISO_IR_84_e, + iconv_ISO_IR_85_e, + iconv_ISO_IR_86_e, + iconv_ISO_IR_88_e, + iconv_ISO_IR_89_e, + iconv_ISO_IR_8_1_e, + iconv_ISO_IR_90_e, + iconv_ISO_IR_92_e, + iconv_ISO_IR_98_e, + iconv_ISO_IR_99_e, + iconv_ISO_IR_9_1_e, + iconv_ISO_e, + iconv_IT_e, + iconv_JIS_C62201969RO_e, + iconv_JIS_C6220_1969_RO_e, + iconv_JIS_C62291984B_e, + iconv_JIS_C6229_1984_B_e, + iconv_JOHAB_e, + iconv_JP_OCR_B_e, + iconv_JP_e, + iconv_JS_e, + iconv_JUS_I_B1_002_e, + iconv_KOI8R_e, + iconv_KOI8U_e, + iconv_KOI8_RU_e, + iconv_KOI8_R_e, + iconv_KOI8_T_e, + iconv_KOI8_U_e, + iconv_KOI8_e, + iconv_KOI_7_e, + iconv_KOI_8_e, + iconv_KSC5636_e, + iconv_L10_e, + iconv_L1_e, + iconv_L2_e, + iconv_L3_e, + iconv_L4_e, + iconv_L5_e, + iconv_L6_e, + iconv_L7_e, + iconv_L8_e, + iconv_LATIN10_e, + iconv_LATIN1_e, + iconv_LATIN2_e, + iconv_LATIN3_e, + iconv_LATIN4_e, + iconv_LATIN5_e, + iconv_LATIN6_e, + iconv_LATIN7_e, + iconv_LATIN8_e, + iconv_LATIN9_e, + iconv_LATINGREEK1_e, + iconv_LATINGREEK_e, + iconv_LATIN_9_e, + iconv_LATIN_GREEK_1_e, + iconv_LATIN_GREEK_e, + iconv_MACCYRILLIC_e, + iconv_MACINTOSH_e, + iconv_MACIS_e, + iconv_MACUKRAINIAN_e, + iconv_MACUK_e, + iconv_MAC_CENTRALEUROPE_e, + iconv_MAC_CYRILLIC_e, + iconv_MAC_IS_e, + iconv_MAC_SAMI_e, + iconv_MAC_UK_e, + iconv_MAC_e, + iconv_MIK_e, + iconv_MS932_e, + iconv_MS936_e, + iconv_MSCP1361_e, + iconv_MSCP949_e, + iconv_MSMACCYRILLIC_e, + iconv_MSZ_7795_3_e, + iconv_MS_ANSI_e, + iconv_MS_ARAB_e, + iconv_MS_CYRL_e, + iconv_MS_EE_e, + iconv_MS_GREEK_e, + iconv_MS_HEBR_e, + iconv_MS_KANJI_e, + iconv_MS_MAC_CYRILLIC_e, + iconv_MS_TURK_e, + iconv_NAPLPS_e, + iconv_NATSDANO_e, + iconv_NATSSEFI_e, + iconv_NATS_DANO_e, + iconv_NATS_SEFI_e, + iconv_NC_NC0010_e, + iconv_NC_NC00_10_81_e, + iconv_NC_NC00_10_e, + iconv_NF_Z_62010_1973_e, + iconv_NF_Z_62010_e, + iconv_NF_Z_62_010__1973__e, + iconv_NF_Z_62_010_1973_e, + iconv_NF_Z_62_010_e, + iconv_NO2_e, + iconv_NO_e, + iconv_NS_45511_e, + iconv_NS_45512_e, + iconv_NS_4551_1_e, + iconv_NS_4551_2_e, + iconv_OS2LATIN1_e, + iconv_OSF00010001_e, + iconv_OSF00010002_e, + iconv_OSF00010003_e, + iconv_OSF00010004_e, + iconv_OSF00010005_e, + iconv_OSF00010006_e, + iconv_OSF00010007_e, + iconv_OSF00010008_e, + iconv_OSF00010009_e, + iconv_OSF0001000A_e, + iconv_OSF00010020_e, + iconv_OSF00010100_e, + iconv_OSF00010101_e, + iconv_OSF00010102_e, + iconv_OSF00010104_e, + iconv_OSF00010105_e, + iconv_OSF00010106_e, + iconv_OSF00030010_e, + iconv_OSF0004000A_e, + iconv_OSF0005000A_e, + iconv_OSF05010001_e, + iconv_OSF10010001_e, + iconv_OSF10010004_e, + iconv_OSF10010006_e, + iconv_OSF10020025_e, + iconv_OSF10020111_e, + iconv_OSF10020115_e, + iconv_OSF10020116_e, + iconv_OSF10020118_e, + iconv_OSF1002011C_e, + iconv_OSF1002011D_e, + iconv_OSF10020122_e, + iconv_OSF10020129_e, + iconv_OSF100201A4_e, + iconv_OSF100201A8_e, + iconv_OSF100201B5_e, + iconv_OSF100201F4_e, + iconv_OSF10020352_e, + iconv_OSF10020354_e, + iconv_OSF10020357_e, + iconv_OSF10020359_e, + iconv_OSF1002035D_e, + iconv_OSF1002035E_e, + iconv_OSF1002035F_e, + iconv_OSF10020360_e, + iconv_OSF10020364_e, + iconv_OSF10020365_e, + iconv_OSF10020366_e, + iconv_OSF10020367_e, + iconv_OSF1002036B_e, + iconv_OSF10020370_e, + iconv_OSF1002037B_e, + iconv_OSF10020387_e, + iconv_OSF10020388_e, + iconv_OSF10020396_e, + iconv_OSF100203B5_e, + iconv_OSF10020402_e, + iconv_OSF10020417_e, + iconv_PT154_e, + iconv_PT2_e, + iconv_PT_e, + iconv_R8_e, + iconv_R9_e, + iconv_RK1048_e, + iconv_ROMAN8_e, + iconv_ROMAN9_e, + iconv_RUSCII_e, + iconv_SE2_e, + iconv_SEN_850200_B_e, + iconv_SEN_850200_C_e, + iconv_SE_e, + iconv_SHIFTJISX0213_e, + iconv_SHIFT_JISX0213_e, + iconv_SHIFT_JIS_e, + iconv_SJIS_OPEN_e, + iconv_SJIS_WIN_e, + iconv_SJIS_e, + iconv_SS636127_e, + iconv_STRK1048_2002_e, + iconv_ST_SEV_358_88_e, + iconv_TCVN5712_1_1993_e, + iconv_TCVN5712_1_e, + iconv_TCVN_5712_e, + iconv_TCVN_e, + iconv_THAI8_e, + iconv_TIS620_0_e, + iconv_TIS620_2529_1_e, + iconv_TIS620_2533_0_e, + iconv_TIS620_e, + iconv_TIS_620_e, + iconv_TSCII_e, + iconv_TS_5881_e, + iconv_TURKISH8_e, + iconv_T_618BIT_e, + iconv_T_61_8BIT_e, + iconv_T_61_e, + iconv_UCS2_e, + iconv_UCS4_e, + iconv_UCS_2BE_e, + iconv_UCS_2LE_e, + iconv_UCS_2_e, + iconv_UCS_4BE_e, + iconv_UCS_4LE_e, + iconv_UCS_4_e, + iconv_UHC_e, + iconv_UJIS_e, + iconv_UK_e, + iconv_UNICODEBIG_e, + iconv_UNICODELITTLE_e, + iconv_UNICODE_e, + iconv_US_ASCII_e, + iconv_US_e, + iconv_UTF16BE_e, + iconv_UTF16LE_e, + iconv_UTF16_e, + iconv_UTF32BE_e, + iconv_UTF32LE_e, + iconv_UTF32_e, + iconv_UTF7_e, + iconv_UTF8_e, + iconv_UTF_16BE_e, + iconv_UTF_16LE_e, + iconv_UTF_16_e, + iconv_UTF_32BE_e, + iconv_UTF_32LE_e, + iconv_UTF_32_e, + iconv_UTF_7_e, + iconv_UTF_8_e, // UTF_8 specifically + iconv_VISCII_e, + iconv_WCHAR_T_e, + iconv_WINBALTRIM_e, + iconv_WINDOWS_1250_e, + iconv_WINDOWS_1251_e, + iconv_WINDOWS_1252_e, + iconv_WINDOWS_1253_e, + iconv_WINDOWS_1254_e, + iconv_WINDOWS_1255_e, + iconv_WINDOWS_1256_e, + iconv_WINDOWS_1257_e, + iconv_WINDOWS_1258_e, + iconv_WINDOWS_31J_e, + iconv_WINDOWS_874_e, + iconv_WINDOWS_936_e, + iconv_WINSAMI2_e, + iconv_WIN_SAMI_2_e, + iconv_WS2_e, + iconv_YU_e, +}; + +#define ASCII_e iconv_ASCII_e +#define CP1252_e iconv_CP1252_e +#define EBCDIC_e iconv_CP1140_e +#define UTF8_e iconv_UTF_8_e +#define iso646_e iconv_ISO_10646_e + +struct encodings_t { + cbl_encoding_t type; + const char name[32]; +}; + +#endif diff --git a/libgcobol/gcobolio.h b/libgcobol/gcobolio.h index 2ca8883afc2..7a1c9ac0702 100644 --- a/libgcobol/gcobolio.h +++ b/libgcobol/gcobolio.h @@ -61,7 +61,8 @@ typedef struct cblc_field_t signed char level; // This variable's level in the naming heirarchy signed char digits; // Digits specified in PIC string; e.g. 5 for 99v999 signed char rdigits; // Digits to the right of the decimal point. 3 for 99v999 - int dummy; // GCC seems to want an even number of 32-bit values + cbl_encoding_t encoding; // + int alphabet; // Same as cbl_field_t::codeset::language } cblc_field_t; /* @@ -126,6 +127,8 @@ typedef struct cblc_file_t int recent_char; // This is the most recent char sent to the file int recent_key; cblc_file_prior_op_t prior_op; // run-time type is INT + cbl_encoding_t encoding; // We assume size int + int alphabet; // Actually cbl_encoding_t int dummy; } cblc_file_t; diff --git a/libgcobol/gfileio.cc b/libgcobol/gfileio.cc index 51a73cd5315..6d3bb5db84a 100644 --- a/libgcobol/gfileio.cc +++ b/libgcobol/gfileio.cc @@ -197,10 +197,13 @@ get_filename( const cblc_file_t *file, static size_t fname_size = MINIMUM_ALLOCATION_SIZE; static char *fname = static_cast(malloc(MINIMUM_ALLOCATION_SIZE)); massert(fname); - fname = internal_to_console(&fname, - &fname_size, - file->filename, - strlen(file->filename)); + if( strlen(file->filename)+1 > fname_size) + { + fname_size = strlen(file->filename)+1 ; + fname = static_cast(realloc(fname, fname_size)); + } + + strcpy(fname, file->filename); if( !is_quoted ) { @@ -320,10 +323,21 @@ __gg__file_init( int access, int optional, size_t record_area_min, - size_t record_area_max) + size_t record_area_max, + cbl_encoding_t encoding, + int alphabet + ) { if( !(file->flags & file_flag_initialized_e) ) { + if( encoding != iconv_CP1140_e && __gg__ebcdic_codeset_in_use ) + { + // This code is to be eliminated when 'encoding' is valid. + encoding = iconv_CP1140_e; + } + + charmap_t *charmap = __gg__get_charmap(encoding); + file->name = strdup(name); file->symbol_table_index = symbol_table_index; file->filename = NULL ; @@ -343,7 +357,7 @@ __gg__file_init( file->access = (cbl_file_access_t)access ; file->errnum = 0 ; file->io_status = FsSuccess ; - file->delimiter = internal_newline ; + file->delimiter = charmap->mapped_character(ascii_newline) ; file->flags = file_flag_none_e; file->flags |= (optional ? file_flag_optional_e : file_flag_none_e) + file_flag_initialized_e; @@ -351,6 +365,8 @@ __gg__file_init( file->record_area_max = record_area_max; file->prior_read_location = 0; file->prior_op = file_op_none; + file->encoding = encoding; + file->alphabet = alphabet; if( file->access == file_inaccessible_e ) { @@ -727,7 +743,10 @@ relative_file_delete(cblc_file_t *file, bool is_random) goto done; } - if( presult == 0 || record_marker != internal_newline ) + charmap_t *charmap = __gg__get_charmap(file->encoding); + + if( presult == 0 + || record_marker != charmap->mapped_character(ascii_newline) ) { // There isn't a record there for us to delete, which is an error file->io_status = FsNotFound; // "23" @@ -1428,7 +1447,8 @@ relative_file_start(cblc_file_t *file, // end of file goto done; } - if( record_marker == internal_newline ) + charmap_t *charmap = __gg__get_charmap(file->encoding); + if( record_marker == charmap->mapped_character(ascii_newline) ) { // The record is a valid one fpos = rfp.record_position; @@ -1881,7 +1901,8 @@ relative_file_rewrite( cblc_file_t *file, size_t length, bool is_random ) goto done; } - if( presult == 0 || record_marker != internal_newline ) + charmap_t *charmap = __gg__get_charmap(file->encoding); + if( presult == 0 || record_marker != charmap->mapped_character(ascii_newline) ) { // The record is not specified: file->io_status = FsNotFound; // "23" @@ -2336,7 +2357,8 @@ relative_file_write_varying(cblc_file_t *file, while( payload_length < file->record_area_max ) { - fputc(internal_space, file->file_pointer); + charmap_t *charmap = __gg__get_charmap(file->encoding); + fputc(charmap->mapped_character(ascii_space), file->file_pointer); if( handle_ferror(file, __func__, "fputc() error") ) { goto done; @@ -2377,7 +2399,12 @@ relative_file_write(cblc_file_t *file, file->io_status = FsErrno; long necessary_file_size; - const unsigned char achPostamble[] = {internal_cr, internal_newline}; + charmap_t *charmap = __gg__get_charmap(file->encoding); + const unsigned char achPostamble[] = + { + (unsigned char)charmap->mapped_character(ascii_cr), + (unsigned char)charmap->mapped_character(ascii_newline) + }; relative_file_parameters rfp; @@ -2425,7 +2452,7 @@ relative_file_write(cblc_file_t *file, goto done; } - if( presult == 1 && record_marker == internal_newline ) + if( presult == 1 && record_marker == charmap->mapped_character(ascii_newline) ) { // The slot has something in it already: file->io_status = FsDupWrite; // "22" @@ -2467,7 +2494,7 @@ relative_file_write(cblc_file_t *file, size_t padding = file->record_area_max - length; while(padding--) { - fputc(internal_space, file->file_pointer); + fputc(charmap->mapped_character(ascii_space), file->file_pointer); } } @@ -2502,6 +2529,8 @@ sequential_file_write(cblc_file_t *file, int lines) { // This code handles SEQUENTIAL and LINE SEQUENTIAl + charmap_t *charmap = __gg__get_charmap(file->encoding); + char ch = '\0'; size_t characters_to_write; @@ -2510,7 +2539,7 @@ sequential_file_write(cblc_file_t *file, if( lines < -1 ) { // We are using -666 for a form feed - ch = internal_ff; // Form feed + ch = charmap->mapped_character(ascii_ff); // Form feed lcount = 1; } else if( lines == -1 ) @@ -2521,12 +2550,12 @@ sequential_file_write(cblc_file_t *file, else if( lines == 0 ) { lcount = 1; - ch = internal_return; + ch = charmap->mapped_character(ascii_return); } else /* if( lines > 0 ) */ { lcount = lines; - ch = internal_newline; + ch = charmap->mapped_character(ascii_newline); } // By default, we write out the number of characters in the record area @@ -2545,19 +2574,19 @@ sequential_file_write(cblc_file_t *file, { // If file-sequential, then trailing spaces are removed: while( characters_to_write > 0 - && location[characters_to_write-1] == internal_space ) + && location[characters_to_write-1] == charmap->mapped_character(ascii_space) ) { characters_to_write -= 1; } } - if( after && file->org == file_line_sequential_e && ch == internal_newline ) + if( after && file->org == file_line_sequential_e && ch == charmap->mapped_character(ascii_newline) ) { // In general, we terminate every line with a newline. Because this // line is supposed to start with a newline, we decrement the line // counter by one if we had already sent one. - if( lcount && ( file->recent_char == internal_newline - || file->recent_char == internal_ff) ) + if( lcount && ( file->recent_char == charmap->mapped_character(ascii_newline) + || file->recent_char == charmap->mapped_character(ascii_ff)) ) { lcount -= 1; } @@ -2575,7 +2604,7 @@ sequential_file_write(cblc_file_t *file, file->recent_char = ch; } // That might have been a formfeed; switch back to newline: - ch = internal_newline; + ch = charmap->mapped_character(ascii_newline); } switch(file->org) @@ -2660,12 +2689,12 @@ sequential_file_write(cblc_file_t *file, { goto done; } - file->recent_char = internal_newline; + file->recent_char = charmap->mapped_character(ascii_newline); } if( !after ) { - // We did the output BEFORE, so now it's time to send some internal_newlines + // We did the output BEFORE, so now it's time to send some newlines while(lcount--) { fputc(ch, file->file_pointer); @@ -3004,7 +3033,9 @@ line_sequential_file_read( cblc_file_t *file) while(remaining < file->record_area_max ) { // Space fill shorty records - file->default_record->data[remaining++] = internal_space; + charmap_t *charmap = __gg__get_charmap(file->encoding); + file->default_record->data[remaining++] = + charmap->mapped_character(ascii_space); } if( hit_eof && !characters_read) @@ -3028,7 +3059,7 @@ line_sequential_file_read( cblc_file_t *file) else // We filled the whole record area. Look ahead one character { #ifdef POSSIBLY_IBM - // In this code, unread characters before the internal_newline + // In this code, unread characters before the newline // are read next time. See page 133 of the IBM Language Reference // Manual: "If the first unread character is the record delimiter, it // is discarded. Otherwise, the first unread character becomes the first @@ -3046,7 +3077,7 @@ line_sequential_file_read( cblc_file_t *file) goto done; } #else - // In this code, extra characters before the internal_newline + // In this code, extra characters before the newline // are read next time are discarded. GnuCOBOL works this way, and // the Michael Coughlin "Beginning COBOL" examples require this mode. // The ISO/IEC 2014 standard is silent on the question of LINE @@ -3165,7 +3196,10 @@ sequential_file_read( cblc_file_t *file) } if( characters_read < bytes_in_record ) { - memset(file->default_record->data, internal_space, bytes_to_read); + charmap_t *charmap = __gg__get_charmap(file->encoding); + memset( file->default_record->data, + charmap->mapped_character(ascii_space), + bytes_to_read); file->io_status = FsEofSeq; // "10" fpos = -1; goto done; @@ -3472,7 +3506,8 @@ relative_file_read( cblc_file_t *file, { goto done; } - if(record_marker == internal_newline) + charmap_t *charmap = __gg__get_charmap(file->encoding); + if(record_marker == charmap->mapped_character(ascii_newline) ) { // We have a good record to read: @@ -3953,16 +3988,6 @@ file_indexed_open(cblc_file_t *file) case '+': if( file->flags & file_flag_existed_e ) { - // We need to open the file for reading, and build the - // maps for each index: - static size_t fname_size = MINIMUM_ALLOCATION_SIZE; - static char *fname = static_cast(malloc(fname_size)); - massert(fname); - - internal_to_console(&fname, - &fname_size, - file->filename, strlen(file->filename)); - // We are going to scan through the entire file, building index // entries for each record. @@ -4102,7 +4127,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char) bool all_spaces = true; for(size_t i=0; ifilename); i++) { - if( file->filename[i] != internal_space ) + if( file->filename[i] != ascii_space ) { all_spaces = false; } @@ -4116,16 +4141,9 @@ __gg__file_reopen(cblc_file_t *file, int mode_char) goto done; } - static size_t fname_size = MINIMUM_ALLOCATION_SIZE; - static char *fname = static_cast(malloc(fname_size)); - massert(fname) - internal_to_console(&fname, - &fname_size, - file->filename, - strlen(file->filename)); warnx( "%s(): There is no environment variable named \"%s\"\n", __func__, - fname); + file->filename); file->io_status = FsNoFile; // "35" goto done; } @@ -4323,7 +4341,8 @@ __io__file_open(cblc_file_t *file, int mode_char, int is_quoted) { - // Filename is a pointer to a malloc() buffer. + // 'filename' is a pointer to a malloc() buffer. + // The 'filename' has to be in the system encoding, typically ASCII // The complication: A filename can be literal text, it can be from a COBOL // alphanumeric variable, or it can be the name of an environment variable @@ -4359,9 +4378,9 @@ __io__file_close( cblc_file_t *file, int how ) // if( file->org == file_line_sequential_e // && ( file->mode_char == 'w' || file->mode_char == 'a' ) - // && file->recent_char != internal_newline ) + // && file->recent_char != inter nal_newline ) // { - // int ch = internal_newline; + // int ch = inter nal_newline; // fputc(ch, file->file_pointer); // if( handle_ferror(file, __func__, "fputc() error [6]") ) // { @@ -4401,9 +4420,7 @@ __io__file_close( cblc_file_t *file, int how ) file_indexed_close(file); } - // The filename can be from a COBOL alphanumeric variable, which means it can - // between a file_close and a subsequent file_open. So, we get rid of it - // here + // The filename was malloced. So, we get rid of it here. free(file->filename); file->filename = NULL; @@ -4588,8 +4605,9 @@ __gg__file_open(cblc_file_t *file, int mode_char, int is_quoted) { - gcobol_io_t *functions = gcobol_io_funcs(); - functions->Open(file, filename, mode_char, is_quoted); + // The 'filename' has to be in the system encoding, typically ASCII + gcobol_io_t *functions = gcobol_io_funcs(); + functions->Open(file, filename, mode_char, is_quoted); } extern "C" diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc index 81ae638630f..c85b263d3a7 100644 --- a/libgcobol/intrinsic.cc +++ b/libgcobol/intrinsic.cc @@ -86,11 +86,11 @@ static int is_leap_year(int); typedef char * PCHAR; static void -trim_trailing_spaces(PCHAR left, PCHAR &right) +trim_trailing_spaces(PCHAR left, PCHAR &right, int mapped_space) { while( right > left ) { - if( *(right-1) != internal_space ) + if( *(right-1) != mapped_space ) { break; } @@ -99,12 +99,13 @@ trim_trailing_spaces(PCHAR left, PCHAR &right) } static bool -is_zulu_format(PCHAR left, PCHAR &right) +is_zulu_format(PCHAR left, PCHAR &right, charmap_t *charmap) { + int char_Z = charmap->mapped_character(ascii_Z); bool retval = false; if( right > left ) { - retval = std::toupper((unsigned char)*(right-1)) == internal_Z; + retval = std::toupper((unsigned char)*(right-1)) == char_Z; } return retval; } @@ -231,7 +232,8 @@ string_to_dest(cblc_field_t *dest, const char *psz) size_t dest_length = dest->capacity; size_t source_length = strlen(psz); size_t length = std::min(dest_length, source_length); - memset(dest->data, internal_space, dest_length); + charmap_t *charmap = __gg__get_charmap(dest->encoding); + memset(dest->data, charmap->mapped_character(ascii_space), dest_length); memcpy(dest->data, psz, length); } @@ -524,7 +526,8 @@ variance( size_t ncount, static void -get_all_time( char *stime, +get_all_time( const cblc_field_t *dest, // needed for the target encoding + char *stime, const struct cobol_tm &ctm) { // This routine represents a universal source for all output formatted date @@ -572,8 +575,9 @@ get_all_time( char *stime, ctm.day_of_week+1, ctm.day_of_year, ctm.ZZZZ); - // We might be operating in EBCDIC: - ascii_to_internal_str(stime, strlen(stime)); + __gg__convert_encoding(PTRCAST(char, stime), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); } static @@ -798,6 +802,7 @@ ftime_replace(char *dest, char const * const dest_end, char const * source, char const * const source_end, + charmap_t * charmap_source, char const * const ftime) { // This routine is highly dependent on the source format being correct. @@ -806,24 +811,34 @@ ftime_replace(char *dest, bool saw_decimal_point = false; bool saw_plus_sign = false; char decimal_point = __gg__get_decimal_point(); - static const int OFFSET_TO_YYYY = 0; - static const int OFFSET_TO_MM = 4; - static const int OFFSET_TO_DD = 6; - static const int OFFSET_TO_HOUR = 9; - static const int OFFSET_TO_MINUTE = 11; - static const int OFFSET_TO_SECOND = 13; - static const int OFFSET_TO_FRACTION = 16; - static const int OFFSET_TO_OFFSET = 25; - static const int OFFSET_TO_OFFSET_HOUR = 26; - static const int OFFSET_TO_OFFSET_MINUTE = 28; - static const int OFFSET_TO_WEEK = 30; - static const int OFFSET_TO_DOW = 33; - static const int OFFSET_TO_DOY = 34; - static const int OFFSET_TO_ZZZZ = 37; + static const int OFFSET_TO_YYYY = 0; + static const int OFFSET_TO_MM = 4; + static const int OFFSET_TO_DD = 6; + static const int OFFSET_TO_HOUR = 9; + static const int OFFSET_TO_MINUTE = 11; + static const int OFFSET_TO_SECOND = 13; + static const int OFFSET_TO_FRACTION = 16; + static const int OFFSET_TO_OFFSET = 25; + static const int OFFSET_TO_OFFSET_HOUR = 26; + static const int OFFSET_TO_OFFSET_MINUTE = 28; + static const int OFFSET_TO_WEEK = 30; + static const int OFFSET_TO_DOW = 33; + static const int OFFSET_TO_DOY = 34; + static const int OFFSET_TO_ZZZZ = 37; + + int source_Y = charmap_source->mapped_character(ascii_Y ); + int source_W = charmap_source->mapped_character(ascii_W ); + int source_s = charmap_source->mapped_character(ascii_s ); + int source_m = charmap_source->mapped_character(ascii_m ); + int source_h = charmap_source->mapped_character(ascii_h ); + int source_plus = charmap_source->mapped_character(ascii_plus); + int source_D = charmap_source->mapped_character(ascii_D ); + int source_M = charmap_source->mapped_character(ascii_M ); + while( source < source_end && dest < dest_end ) { char fchar = *source; - if( fchar == internal_Y ) + if( fchar == source_Y ) { // This can only be a YYYY // But, we have a choice. If there is a 'W' in the format, then we @@ -832,7 +847,7 @@ ftime_replace(char *dest, const char *p = source; while(p < source_end) { - if( *p++ == internal_W ) + if( *p++ == source_W ) { src = ftime + OFFSET_TO_ZZZZ; } @@ -840,21 +855,21 @@ ftime_replace(char *dest, ncount = 4; } - else if( fchar == internal_M ) + else if( fchar == source_M ) { // This can only be a MM ncount = 2; src = ftime + OFFSET_TO_MM; } - else if( fchar == internal_D ) + else if( fchar == source_D ) { // It can be a D, DD or DDD - if( source[2] == internal_D ) + if( source[2] == source_D ) { ncount = 3; src = ftime + OFFSET_TO_DOY; } - else if( source[1] == internal_D ) + else if( source[1] == source_D ) { ncount = 2; src = ftime + OFFSET_TO_DD; @@ -865,13 +880,13 @@ ftime_replace(char *dest, src = ftime + OFFSET_TO_DOW; } } - else if( fchar == internal_plus ) + else if( fchar == source_plus ) { saw_plus_sign = true; ncount = 1; src = ftime + OFFSET_TO_OFFSET; } - else if( fchar == internal_h ) + else if( fchar == source_h ) { ncount = 2; if(saw_plus_sign) @@ -883,7 +898,7 @@ ftime_replace(char *dest, src = ftime + OFFSET_TO_HOUR; } } - else if( fchar == internal_m ) + else if( fchar == source_m ) { ncount = 2; if(saw_plus_sign) @@ -901,7 +916,7 @@ ftime_replace(char *dest, ncount = 1; src = source; } - else if( fchar == internal_s ) + else if( fchar == source_s ) { if(saw_decimal_point) { @@ -915,7 +930,7 @@ ftime_replace(char *dest, src = ftime + OFFSET_TO_SECOND; } } - else if( fchar == internal_W ) + else if( fchar == source_W ) { ncount = 3; src = ftime + OFFSET_TO_WEEK; @@ -1141,7 +1156,8 @@ __gg__char( cblc_field_t *dest, source_size)); ordinal /= __gg__power_of_ten(rdigits); int ch = ordinal-1; - memset(dest->data, internal_space, dest->capacity); + charmap_t *charmap = __gg__get_charmap(dest->encoding); + memset(dest->data, charmap->mapped_character(ascii_space), dest->capacity); dest->data[0] = ch; } @@ -1223,7 +1239,9 @@ __gg__current_date(cblc_field_t *dest) char retval[DATE_STRING_BUFFER_SIZE]; timespec_to_string(retval, tp); - ascii_to_internal_str(retval, strlen(retval)); + __gg__convert_encoding(PTRCAST(char, retval), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); string_to_dest(dest, retval); } @@ -1235,7 +1253,7 @@ __gg__seconds_past_midnight(cblc_field_t *dest) struct cbl_timespec tp = {}; struct tm tm; __int128 retval=0; - + __gg__clock_gettime(&tp); // time_t tv_sec; long tv_nsec localtime_r(&tp.tv_sec, &tm); @@ -1462,10 +1480,18 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string { // FUNCTION FORMATTED-CURRENT-DATE + cbl_encoding_t from = input->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_from = __gg__get_charmap(from); + charmap_t *charmap_to = __gg__get_charmap(to); + + int dest_space = charmap_to->mapped_character(ascii_space); + int format_Z = charmap_from->mapped_character(ascii_Z); + // Establish the destination, and set it to spaces char *d = PTRCAST(char, dest->data); const char *dend = d + dest->capacity; - memset(d, internal_space, dest->capacity); + memset(d, dest_space, dest->capacity); // Establish the formatting string: const char *format = PTRCAST(char, (input->data+input_offset)); @@ -1477,7 +1503,7 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string while( p < format_end ) { int ch = *p++; - if( ch == internal_Z ) + if( ch == format_Z ) { is_zulu = true; break; @@ -1510,8 +1536,8 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string ctm.tz_offset = -timezone/60; char achftime[64]; - get_all_time(achftime, ctm); - ftime_replace(d, dend, format, format_end, achftime); + get_all_time(dest, achftime, ctm); + ftime_replace(d, dend, format, format_end, charmap_from, achftime); } extern "C" @@ -1526,10 +1552,17 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string { // FUNCTION FORMATTED-DATE + cbl_encoding_t from = arg1->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_from = __gg__get_charmap(from); + charmap_t *charmap_to = __gg__get_charmap(to); + + int dest_space = charmap_to->mapped_character(ascii_space); + // Establish the destination, and set it to spaces char *d = PTRCAST(char, dest->data); const char *dend = d + dest->capacity; - memset(d, internal_space, dest->capacity); + memset(d, dest_space, dest->capacity); // Establish the formatting string: char *format = PTRCAST(char, (arg1->data+arg1_offset)); @@ -1540,14 +1573,14 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string populate_ctm_from_date(ctm, arg2, arg2_offset, arg2_size); char achftime[64]; - get_all_time(achftime, ctm); + get_all_time(dest, achftime, ctm); if( __gg__exception_code ) { - memset(d, internal_space, dend-d); + memset(d, dest_space, dend-d); } else { - ftime_replace(d, dend, format, format_end, achftime); + ftime_replace(d, dend, format, format_end, charmap_from, achftime); __gg__adjust_dest_size(dest, format_end-format); } } @@ -1571,16 +1604,21 @@ __gg__formatted_datetime( cblc_field_t *dest, // Destination string { // FUNCTION FORMATTED-DATETIME + cbl_encoding_t from = par1->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_from = __gg__get_charmap(from); + charmap_t *charmap_to = __gg__get_charmap(to); + // Establish the destination, and set it to spaces char *d = PTRCAST(char, (dest->data)); const char *dend = d + dest->capacity; - memset(d, internal_space, dest->capacity); + memset(d, charmap_from->mapped_character(ascii_space), dest->capacity); // Establish the formatting string: char *format = PTRCAST(char, (par1->data+par1_o)); char *format_end = format + par1_s; - trim_trailing_spaces(format, format_end); - bool is_zulu = is_zulu_format(format, format_end); + trim_trailing_spaces(format, format_end, charmap_from->mapped_character(ascii_space)); + bool is_zulu = is_zulu_format(format, format_end, charmap_from); struct cobol_tm ctm = {}; @@ -1595,14 +1633,14 @@ __gg__formatted_datetime( cblc_field_t *dest, // Destination string } char achftime[64]; - get_all_time(achftime, ctm); + get_all_time(dest, achftime, ctm); if( __gg__exception_code ) { - memset(d, internal_space, dend-d); + memset(d, charmap_to->mapped_character(ascii_space), dend-d); } else { - ftime_replace(d, dend, format, format_end, achftime); + ftime_replace(d, dend, format, format_end, charmap_from, achftime); __gg__adjust_dest_size(dest, format_end-format); } } @@ -1623,16 +1661,25 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string { // FUNCTION FORMATTED-TIME + cbl_encoding_t from = par1->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_from = __gg__get_charmap(from); + charmap_t *charmap_to = __gg__get_charmap(to); + + int dest_space = charmap_to->mapped_character(ascii_space); + // Establish the destination, and set it to spaces char *d = PTRCAST(char, dest->data); const char *dend = d + dest->capacity; - memset(d, internal_space, dest->capacity); + memset(d, dest_space, dest->capacity); // Establish the formatting string: char *format = PTRCAST(char, (par1->data+par1_o)); char *format_end = format + par1_s; - trim_trailing_spaces(format, format_end); - bool is_zulu = is_zulu_format(format, format_end); + trim_trailing_spaces( format, + format_end, + charmap_from->mapped_character(ascii_space)); + bool is_zulu = is_zulu_format(format, format_end, charmap_from); struct cobol_tm ctm = {}; populate_ctm_from_time( ctm, @@ -1649,14 +1696,14 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string } char achftime[64]; - get_all_time(achftime, ctm); + get_all_time(dest, achftime, ctm); if( __gg__exception_code ) { - memset(d, internal_space, dend-d); + memset(d, dest_space, dend-d); } else { - ftime_replace(d, dend, format, format_end, achftime); + ftime_replace(d, dend, format, format_end, charmap_from, achftime); __gg__adjust_dest_size(dest, format_end-format); } } @@ -1988,14 +2035,27 @@ __gg__lower_case( cblc_field_t *dest, size_t input_offset, size_t input_size) { + cbl_encoding_t from = input->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_dest = __gg__get_charmap(to); + size_t dest_length = dest->capacity; size_t source_length = input_size; - memset(dest->data, internal_space, dest_length); - memcpy(dest->data, input->data+input_offset, std::min(dest_length, source_length)); - internal_to_ascii( PTRCAST(char, dest->data), dest_length); + size_t length = std::min(dest_length, source_length); + memset( dest->data, + charmap_dest->mapped_character(ascii_space), + dest_length); + memcpy(dest->data, input->data+input_offset, length); + __gg__convert_encoding_length(PTRCAST(char, dest->data), + length, + from, + DEFAULT_CHARMAP_SOURCE); std::transform(dest->data, dest->data + dest_length, dest->data, - [](unsigned char c) { return std::tolower(c); }); - ascii_to_internal_str( PTRCAST(char, dest->data), dest_length); + [](unsigned char c) { return std::tolower(c); }); + __gg__convert_encoding_length(PTRCAST(char, dest->data), + length, + DEFAULT_CHARMAP_SOURCE, + to); } extern "C" @@ -2302,8 +2362,16 @@ numval( cblc_field_t *dest, __int128 retval = 0; int retval_rdigits = 0; + charmap_t *charmap = __gg__get_charmap(input->encoding); + unsigned char decimal_point + = charmap->mapped_character(__gg__get_decimal_point()); + int mapped_0 = charmap->mapped_character(ascii_0); + int mapped_9 = charmap->mapped_character(ascii_9); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_plus = charmap->mapped_character(ascii_plus); + int mapped_minus = charmap->mapped_character(ascii_minus); + bool saw_digit= false; - char decimal_point = ascii_to_internal(__gg__get_decimal_point()); bool in_fraction = false; bool leading_sign = false; bool is_negative = false; @@ -2330,24 +2398,24 @@ numval( cblc_field_t *dest, case SPACE1: // We tolerate spaces, and expect to end with a sign, digit, // or decimal point: - if( ch == internal_space ) + if( ch == mapped_space ) { continue; } - if( ch == internal_plus ) + if( ch == mapped_plus ) { leading_sign = true; state = SPACE2; break; } - if( ch == internal_minus ) + if( ch == mapped_minus ) { leading_sign = true; is_negative = true; state = SPACE2; break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { saw_digit = true; retval = ch & 0xF; @@ -2366,11 +2434,11 @@ numval( cblc_field_t *dest, case SPACE2: // We tolerate spaces, and expect to end with a digit or decimal point: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { saw_digit = true; retval = ch & 0xF; @@ -2392,7 +2460,7 @@ numval( cblc_field_t *dest, // end with a space, a sign, "DB" or "CR", or the the end of the string // It's a bit complicated - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { saw_digit = true; retval *= 10; @@ -2413,27 +2481,27 @@ numval( cblc_field_t *dest, in_fraction = true; break; } - if( ch == internal_space ) + if( ch == mapped_space ) { state = SPACE3; break; } - if( ch == internal_plus && leading_sign) + if( ch == mapped_plus && leading_sign) { // We are allowed leading or trailing signs, but not both goto done; } - if( ch == internal_minus && leading_sign) + if( ch == mapped_minus && leading_sign) { // We are allowed leading or trailing signs, but not both goto done; } - if( ch == internal_plus ) + if( ch == mapped_plus ) { state = SPACE4; break; } - if( ch == internal_minus ) + if( ch == mapped_minus ) { is_negative = true; state = SPACE4; @@ -2477,26 +2545,26 @@ numval( cblc_field_t *dest, case SPACE3: // We tolerate spaces, or we end with a sign: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch == internal_plus && leading_sign) + if( ch == mapped_plus && leading_sign) { // We are allowed leading or trailing signs, but not both goto done; } - if( ch == internal_minus && leading_sign) + if( ch == mapped_minus && leading_sign) { // We are allowed leading or trailing signs, but not both goto done; } - if( ch == internal_plus ) + if( ch == mapped_plus ) { state = SPACE4; break; } - if( ch == internal_minus ) + if( ch == mapped_minus ) { is_negative = true; state = SPACE4; @@ -2537,7 +2605,7 @@ numval( cblc_field_t *dest, goto done; break; case SPACE4: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } @@ -2597,8 +2665,24 @@ numval_c( cblc_field_t *dest, int sign = 0; int rdigits = 0; int rdigit_bump = 0; - unsigned char decimal_point = ascii_to_internal(__gg__get_decimal_point()); - unsigned char decimal_separator = ascii_to_internal(__gg__get_decimal_separator()); + charmap_t *charmap = __gg__get_charmap(src->encoding); + unsigned char decimal_point + = charmap->mapped_character(__gg__get_decimal_point()); + unsigned char decimal_separator + = charmap->mapped_character(__gg__get_decimal_separator()); + int mapped_0 = charmap->mapped_character(ascii_0); + int mapped_9 = charmap->mapped_character(ascii_9); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_plus = charmap->mapped_character(ascii_plus); + int mapped_minus = charmap->mapped_character(ascii_minus); + int mapped_C = charmap->mapped_character(ascii_C); + int mapped_R = charmap->mapped_character(ascii_R); + int mapped_D = charmap->mapped_character(ascii_D); + int mapped_B = charmap->mapped_character(ascii_B); + int mapped_c = charmap->mapped_character(ascii_c); + int mapped_r = charmap->mapped_character(ascii_r); + int mapped_d = charmap->mapped_character(ascii_d); + int mapped_b = charmap->mapped_character(ascii_b); char *currency_start; char *currency_end; @@ -2614,12 +2698,12 @@ numval_c( cblc_field_t *dest, } char *pcurrency = currency_start; // Trim off spaces from the currency: - while( *pcurrency == internal_space && pcurrency < currency_end ) + while( *pcurrency == mapped_space && pcurrency < currency_end ) { pcurrency += 1; } - while( *(currency_end-1) == internal_space && currency_end > currency_start ) + while( *(currency_end-1) == mapped_space && currency_end > currency_start ) { currency_end -= 1; } @@ -2646,11 +2730,12 @@ numval_c( cblc_field_t *dest, { case first_space : // Eat up spaces, if any, and then dispatch on the first non-space: - if( ch != internal_space ) + if( ch != mapped_space ) { // ch can now be a plus, a minus, a digit, or the first character // of the currency string - if( ch == internal_plus || ch == internal_minus ) + if( ch == mapped_plus + || ch == mapped_minus ) { state = first_sign; // Decrement to pointer in order to pick up the character again @@ -2661,7 +2746,7 @@ numval_c( cblc_field_t *dest, state = currency; p -= 1; } - else if( (ch >= internal_0 && ch <= internal_9) + else if( (ch >= mapped_0 && ch <= mapped_9) || ch == decimal_point ) { state = digits; @@ -2681,7 +2766,7 @@ numval_c( cblc_field_t *dest, case first_sign : // We know the character is a plus or a minus: - if( ch == internal_plus ) + if( ch == mapped_plus ) { sign = 1; state = second_space; @@ -2696,14 +2781,14 @@ numval_c( cblc_field_t *dest, case second_space : // Eat up spaces, if any. This segment has to end with a currency or // a digit: - if( ch != internal_space ) + if( ch != mapped_space ) { if( ch == *pcurrency ) { state = currency; p -= 1; } - else if( (ch >= internal_0 && ch <= internal_9) + else if( (ch >= mapped_0 && ch <= mapped_9) || ch == decimal_point ) { state = digits; @@ -2744,9 +2829,9 @@ numval_c( cblc_field_t *dest, case before_digits : // Eat up spaces, if any. This segment has to end with a digit - if( ch != internal_space ) + if( ch != mapped_space ) { - if( (ch >= internal_0 && ch <= internal_9) + if( (ch >= mapped_0 && ch <= mapped_9) || ch == decimal_point ) { state = digits; @@ -2766,7 +2851,7 @@ numval_c( cblc_field_t *dest, case digits : // The only thing allowed here are digits, decimal points, and // decimal separators - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { // We have a digit. rdigits += rdigit_bump; @@ -2799,14 +2884,14 @@ numval_c( cblc_field_t *dest, case after_digits : // after digits, the only valid things are spaces, plus, minus, D, or C - if( ch != internal_space ) + if( ch != charmap->mapped_character(ascii_space) ) { - if( ch == internal_plus - || ch == internal_minus - || ch == internal_D - || ch == internal_d - || ch == internal_C - || ch == internal_c ) + if( ch == mapped_plus + || ch == mapped_minus + || ch == mapped_D + || ch == mapped_d + || ch == mapped_C + || ch == mapped_c ) { state = second_sign; p -= 1; @@ -2821,24 +2906,24 @@ numval_c( cblc_field_t *dest, errcode = p - pstart; p = pend; } - if( ch == internal_plus ) + if( ch == mapped_plus ) { sign = 1; } - else if( ch == internal_minus ) + else if( ch == mapped_minus ) { sign = -1; } - else if( (ch == internal_D || ch == internal_d) + else if( (ch == mapped_D || ch == mapped_d) && p < pend - && (*p == internal_B || *p == internal_b) ) + && (*p == mapped_B || *p == mapped_b) ) { sign = -1; p += 1; } - else if( (ch == internal_C || ch == internal_c) + else if( (ch == mapped_C || ch == mapped_c) && p < pend - && (*p == internal_R || *p == internal_r) ) + && (*p == mapped_R || *p == mapped_r) ) { sign = -1; p += 1; @@ -2848,7 +2933,7 @@ numval_c( cblc_field_t *dest, case final_space : // There should be only spaces until the end - if( ch == internal_space ) + if( ch == mapped_space ) { continue; } @@ -2963,15 +3048,12 @@ __gg__ord(cblc_field_t *dest, size_t input_offset, size_t /*input_size*/) { - // We get our input in internal_character form. + // FUNCTION ORD const char *arg = PTRCAST(char, (input->data + input_offset)); // The ORD function takes a single-character string and returns the // ordinal position of that character. - // In ASCII mode, an A is 0x41, so we return 0x42 - // In EBCDIC mode, an A is 0xC1, so we return 0xC2 - size_t retval = (arg[0]&0xFF) + 1; __gg__int128_to_field(dest, retval, @@ -3317,6 +3399,11 @@ __gg__trim( cblc_field_t *dest, size_t arg2_offset, size_t arg2_size) { + cbl_encoding_t from = arg1->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap = __gg__get_charmap(to); + int mapped_space = charmap->mapped_character(ascii_space); + int rdigits; __int128 type = __gg__binary_value_from_qualified_field(&rdigits, arg2, @@ -3330,30 +3417,39 @@ __gg__trim( cblc_field_t *dest, !(dest->attr & intermediate_e) ) { fprintf(stderr, - "We expect the target of a FUNCTION TIME to " + "We expect the target of a FUNCTION TRIM to " "be an intermediate alphanumeric\n"); abort(); } dest->capacity = dest->offset; + // Make a copy of the input: + char *copy = static_cast(malloc(arg1_size)); + massert(copy); + memcpy(copy, arg1->data+arg1_offset, arg1_size); + + // Convert it to the destination encoding + __gg__convert_encoding_length(copy, arg1_size, from, to); + + // No matter what, we want to find the leftmost non-space and the // rightmost non-space: - char *left = PTRCAST(char, (arg1->data+arg1_offset)); + char *left = copy; char *right = left + arg1_size-1; // Find left and right: the first and last non-spaces while( left <= right ) { - if( *left != internal_space && *right != internal_space ) + if( *left != mapped_space && *right != mapped_space ) { break; } - if( *left == internal_space ) + if( *left == mapped_space ) { left += 1; } - if( *right == internal_space ) + if( *right == mapped_space ) { right -= 1; } @@ -3362,13 +3458,13 @@ __gg__trim( cblc_field_t *dest, { // We want to leave any trailing spaces, so we return 'right' to its // original value: - right = PTRCAST(char, (arg1->data+arg1_offset)) + arg1_size-1; + right = copy + arg1_size-1; } else if( type == TRAILING ) { // We want to leave any leading spaces, so we return 'left' to its // original value: - left = PTRCAST(char, (arg1->data+arg1_offset)); + left = copy; } if( left > right ) @@ -3382,12 +3478,6 @@ __gg__trim( cblc_field_t *dest, size_t ncount = right+1 - left; __gg__adjust_dest_size(dest, ncount); - // Because it's a temporary, we are weakly confident that we can change - // the capacity to match what we want. At this writing, we aren't 100% - // sure of the implications of the run-time capacity not matching what the - // compiler believes the capacity to be at compile-time. But we obviously - // think it'll be okay. - char *dest_left = PTRCAST(char, dest->data); char *dest_right = dest_left + dest->capacity - 1; const char *dest_end = dest_left + dest->capacity; @@ -3398,7 +3488,7 @@ __gg__trim( cblc_field_t *dest, } while(dest_left < dest_end) { - *dest_left++ = internal_space; + *dest_left++ = mapped_space; } } @@ -3430,7 +3520,7 @@ __gg__random( cblc_field_t *dest, // This is the very first time through buf = (random_data *)malloc(sizeof(struct random_data)); buf->state = NULL; - state = (char *)malloc(state_len); + state = static_cast(malloc(state_len)); struct cbl_timespec ts; __gg__clock_gettime(&ts); @@ -3471,7 +3561,7 @@ __gg__random_next(cblc_field_t *dest) // This is the very first time through buf = (random_data *)malloc(sizeof(struct random_data)); buf->state = NULL; - state = (char *)malloc(state_len); + state = static_cast(malloc(state_len)); struct cbl_timespec ts; __gg__clock_gettime(&ts); initstate_r( ts.tv_nsec, state, state_len, buf); @@ -3494,18 +3584,37 @@ __gg__reverse(cblc_field_t *dest, size_t input_offset, size_t input_size) { + cbl_encoding_t from = input->encoding; + cbl_encoding_t to = dest->encoding; + size_t dest_length = dest->capacity; size_t source_length = input_size; size_t length = std::min(dest_length, source_length); - memset(dest->data, internal_space, dest_length); + + // Make a copy of the input + char *copy = static_cast(malloc(length)); + massert(copy); + memcpy(copy, input->data+input_offset, length); + + // Convert the input to the destination encoding + __gg__convert_encoding_length(copy, + length, + from, + to); + + // Set the destination to all spaces + charmap_t *charmap = __gg__get_charmap(to); + memset(dest->data, charmap->mapped_character(ascii_space), dest_length); for(size_t i=0; idata[i] = (input->data+input_offset)[source_length-1-i]; + dest->data[i] = copy[source_length-1-i]; } if( (dest->attr & intermediate_e) ) { dest->capacity = std::min(dest_length, source_length); } + + free(copy); } extern "C" @@ -3745,14 +3854,27 @@ __gg__upper_case( cblc_field_t *dest, size_t input_offset, size_t input_size) { + cbl_encoding_t from = input->encoding; + cbl_encoding_t to = dest->encoding; + charmap_t *charmap_dest = __gg__get_charmap(to); + size_t dest_length = dest->capacity; size_t source_length = input_size; - memset(dest->data, internal_space, dest_length); - memcpy(dest->data, input->data+input_offset, std::min(dest_length, source_length)); - internal_to_ascii( PTRCAST(char, dest->data), dest_length); + size_t length = std::min(dest_length, source_length); + memset( dest->data, + charmap_dest->mapped_character(ascii_space), + dest_length); + memcpy(dest->data, input->data+input_offset, length); + __gg__convert_encoding_length(PTRCAST(char, dest->data), + length, + from, + DEFAULT_CHARMAP_SOURCE); std::transform(dest->data, dest->data + dest_length, dest->data, - [](unsigned char c) { return std::toupper(c); }); - ascii_to_internal_str( PTRCAST(char, dest->data), dest_length); + [](unsigned char c) { return std::toupper(c); }); + __gg__convert_encoding_length(PTRCAST(char, dest->data), + length, + DEFAULT_CHARMAP_SOURCE, + to); } extern "C" @@ -3781,7 +3903,9 @@ __gg__when_compiled(cblc_field_t *dest, size_t tv_sec, long tv_nsec) tp.tv_nsec = tv_nsec; char retval[DATE_STRING_BUFFER_SIZE]; timespec_to_string(retval, tp); - ascii_to_internal_str(retval, strlen(retval)); + __gg__convert_encoding(PTRCAST(char, retval), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); string_to_dest(dest, retval); } @@ -3815,12 +3939,20 @@ __gg__year_to_yyyy( cblc_field_t *dest, static int -gets_int(int ndigits, const char *p, const char *pend, int *digits) +gets_int( int ndigits, + const char *p, + const char *pend, + charmap_t *charmap, + int *digits) { // This routine returns the value of the integer at p. If there is something // wrong with the integer, it returns a negative number, the value being the // position (starting at 1) where the problem is. int retval = 0; + + int checked_0 = charmap->mapped_character(ascii_0); + int checked_9 = charmap->mapped_character(ascii_9); + memset(digits, 0xFF, ndigits * sizeof(int)); for(int i=1; i<=ndigits; i++) { @@ -3831,7 +3963,7 @@ gets_int(int ndigits, const char *p, const char *pend, int *digits) break; } int ch = *p++; - if( ch < internal_0 || ch > internal_9 ) + if( ch < checked_0 || ch > checked_9 ) { // This isn't a digit zero through nine retval = -i; @@ -3846,7 +3978,10 @@ gets_int(int ndigits, const char *p, const char *pend, int *digits) static int -gets_year(const char *p, const char *pend, struct cobol_tm &ctm) +gets_year(const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // Populates ctm.YYYY, ctm.days_in_year, and ctm.weeks_in_year, which are // all determined by the YYYY value. @@ -3856,7 +3991,7 @@ gets_year(const char *p, const char *pend, struct cobol_tm &ctm) int retval = 0; int digits[4]; - int YYYY = gets_int(4, p, pend, digits); + int YYYY = gets_int(4, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] == 0 ) { @@ -3910,7 +4045,10 @@ gets_year(const char *p, const char *pend, struct cobol_tm &ctm) static int -gets_month(const char *p, const char *pend, struct cobol_tm &ctm) +gets_month( const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // Populates ctm.MM @@ -3919,7 +4057,7 @@ gets_month(const char *p, const char *pend, struct cobol_tm &ctm) int digits[2]; int retval = 0; - int MM = gets_int(2, p, pend, digits); + int MM = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 1) { @@ -3957,7 +4095,10 @@ gets_month(const char *p, const char *pend, struct cobol_tm &ctm) static int -gets_day(const char *p, const char *pend, struct cobol_tm &ctm) +gets_day( const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // Populates ctm.DD, ctm.day_of_week, ctm.week_of_year, ctm.day_of_week @@ -3965,7 +4106,7 @@ gets_day(const char *p, const char *pend, struct cobol_tm &ctm) int digits[2]; int retval = 0; - int DD = gets_int(2, p, pend, digits); + int DD = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 3) { @@ -4026,12 +4167,15 @@ gets_day(const char *p, const char *pend, struct cobol_tm &ctm) static int -gets_day_of_week(const char *p, const char *pend, struct cobol_tm &ctm) +gets_day_of_week( const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // This is just a simple D, for day-of-week. The COBOL spec is that // it be 1 to 7, 1 being Monday int digits[1]; - int day_of_week = gets_int(1, p, pend, digits); + int day_of_week = gets_int(1, p, pend, charmap, digits); if( day_of_week<0 || day_of_week >7) { // The single character at source is no good: @@ -4075,11 +4219,14 @@ gets_day_of_week(const char *p, const char *pend, struct cobol_tm &ctm) static int -gets_day_of_year(const char *p, const char *pend, struct cobol_tm &ctm) +gets_day_of_year( const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // This is a three-digit day-of-year, 001 through 365,366 int digits[3]; - int DDD = gets_int(3, p, pend, digits); + int DDD = gets_int(3, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 3) { return 1; @@ -4132,11 +4279,14 @@ gets_day_of_year(const char *p, const char *pend, struct cobol_tm &ctm) static int -gets_week(const char *p, const char *pend, struct cobol_tm &ctm) +gets_week(const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // This is a two-digit value, 01 through 52,53 int digits[2]; - int ww = gets_int(2, p, pend, digits); + int ww = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 5 ) { return 1; @@ -4174,12 +4324,13 @@ static int gets_hours( const char *p, const char *pend, + charmap_t *charmap, struct cobol_tm &ctm, bool in_offset) { // This is a two-digit value, 01 through 23 int digits[2]; - int hh = gets_int(2, p, pend, digits); + int hh = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 2 ) { @@ -4222,12 +4373,13 @@ static int gets_minutes( const char *p, const char *pend, + charmap_t *charmap, struct cobol_tm &ctm, bool in_offset) { // This is a two-digit value, 01 through 59 int digits[2]; - int mm = gets_int(2, p, pend, digits); + int mm = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 5 ) { return 1; @@ -4261,11 +4413,14 @@ gets_minutes( const char *p, static int -gets_seconds(const char *p, const char *pend, struct cobol_tm &ctm) +gets_seconds( const char *p, + const char *pend, + charmap_t *charmap, + struct cobol_tm &ctm) { // This is a two-digit value, 01 through 59 int digits[2]; - int ss = gets_int(2, p, pend, digits); + int ss = gets_int(2, p, pend, charmap, digits); if( digits[0] == -1 || digits[0] > 5 ) { return 1; @@ -4295,26 +4450,32 @@ gets_nanoseconds( const char *f, const char *f_end, const char *p, const char *pend, - struct cobol_tm &ctm) + struct cobol_tm &ctm, + charmap_t *charmap_format, + charmap_t *charmap_source) { // Because nanoseconds digits to the right of the decimal point can vary from // one digit to our implementation-specific limit of nine characters, this // routine is slightly different. If there is an error, that causes a // positive return value. A negative return value contains the number of - // digits we processed + // digits we processed, + + int format_s = charmap_format->mapped_character(ascii_s); + int source_0 = charmap_source->mapped_character(ascii_0); + int source_9 = charmap_source->mapped_character(ascii_9); int errpos = 0; int ncount = 0; int nanoseconds = 0; const char *pinit = p; - while( f < f_end && *f == internal_s && p < pend ) + while( f < f_end && *f == format_s && p < pend ) { f += 1; int ch = *p++; errpos += 1; - if( ch < internal_0 || ch > internal_9 ) + if( ch < source_0 || ch > source_9 ) { // Let our caller know we see a bad character return errpos; @@ -4354,9 +4515,29 @@ fill_cobol_tm(cobol_tm &ctm, char *source = PTRCAST(char, (par2->data+par2_offset)); char *source_end = source + par2_size; + charmap_t *charmap_format = __gg__get_charmap(par1->encoding); + charmap_t *charmap_checked = __gg__get_charmap(par2->encoding); + int checked_space = charmap_checked->mapped_character(ascii_space); + int format_space = charmap_format->mapped_character(ascii_space); + int format_T = charmap_format->mapped_character(ascii_T ); + int format_colon = charmap_format->mapped_character(ascii_colon ); + int format_plus = charmap_format->mapped_character(ascii_plus ); + int format_minus = charmap_format->mapped_character(ascii_minus ); + int format_W = charmap_format->mapped_character(ascii_W ); + int format_Z = charmap_format->mapped_character(ascii_Z ); + int format_z = charmap_format->mapped_character(ascii_z ); + int format_s = charmap_format->mapped_character(ascii_s ); + int format_m = charmap_format->mapped_character(ascii_m ); + int format_h = charmap_format->mapped_character(ascii_h ); + int format_w = charmap_format->mapped_character(ascii_w ); + int format_Y = charmap_format->mapped_character(ascii_Y ); + int format_M = charmap_format->mapped_character(ascii_M ); + int format_D = charmap_format->mapped_character(ascii_D ); + int format_zero = charmap_format->mapped_character(ascii_zero ); + // Let's eliminate trailing spaces... - trim_trailing_spaces(format, format_end); - trim_trailing_spaces(source, source_end); + trim_trailing_spaces(format, format_end, format_space); + trim_trailing_spaces(source, source_end, checked_space); bool in_offset = false; bool in_nanoseconds = false; @@ -4375,10 +4556,10 @@ fill_cobol_tm(cobol_tm &ctm, { char ch = *format; - if( ch == internal_T - || ch == internal_colon - || ch == internal_minus - || ch == internal_W) + if( ch == format_T + || ch == format_colon + || ch == format_minus + || ch == format_W) { // These are just formatting characters. They need to be duplicated, // but are otherwise ignored. @@ -4390,34 +4571,34 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_plus ) + if( ch == format_plus ) { // This flags a following hhmm offset. It needs to match a '+' or '-' - if( *source != internal_plus - && *source != internal_minus - && *source != internal_zero) + if( *source != format_plus + && *source != format_minus + && *source != format_zero) { break; } - if( *source == internal_zero ) + if( *source == format_zero ) { // The next four characters have to be zeroes - if( source[1] != internal_zero ) + if( source[1] != format_zero ) { retval += 1; break; } - if( source[2] != internal_zero ) + if( source[2] != format_zero ) { retval += 2; break; } - if( source[3] != internal_zero ) + if( source[3] != format_zero ) { retval += 3; break; } - if( source[4] != internal_zero ) + if( source[4] != format_zero ) { retval += 4; break; @@ -4441,9 +4622,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_Y ) + if( ch == format_Y ) { - errpos = gets_year(source, source_end, ctm); + errpos = gets_year(source, source_end, charmap_checked, ctm); if( errpos > 0 ) { retval += errpos - 1; @@ -4453,9 +4634,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_M ) + if( ch == format_M ) { - errpos = gets_month(source, source_end, ctm); + errpos = gets_month(source, source_end, charmap_checked, ctm); if( errpos > 0 ) { retval += errpos - 1; @@ -4465,13 +4646,13 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_D ) + if( ch == format_D ) { // We have three possibilities: DDD, DD, and D - if( format[1] != internal_D ) + if( format[1] != format_D ) { // A singleton 'D' is a day-of-week - errpos = gets_day_of_week(source, source_end, ctm); + errpos = gets_day_of_week(source, source_end, charmap_checked, ctm); if( errpos > 0) { retval += errpos - 1; @@ -4479,10 +4660,10 @@ fill_cobol_tm(cobol_tm &ctm, } bump = 1; } - else if( format[2] != internal_D ) + else if( format[2] != format_D ) { // This is DD, for day-of-month - errpos = gets_day(source, source_end, ctm); + errpos = gets_day(source, source_end, charmap_checked, ctm); if( errpos > 0) { retval += errpos - 1; @@ -4494,7 +4675,7 @@ fill_cobol_tm(cobol_tm &ctm, { // Arriving here means that it is DDD, for day-of-year // This is DD, for day-of-month - errpos = gets_day_of_year(source, source_end, ctm); + errpos = gets_day_of_year(source, source_end, charmap_checked, ctm); if( errpos > 0) { retval += errpos - 1; @@ -4505,9 +4686,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_w ) + if( ch == format_w ) { - errpos = gets_week(source, source_end, ctm); + errpos = gets_week(source, source_end, charmap_checked, ctm); if( errpos > 0 ) { retval += errpos - 1; @@ -4517,9 +4698,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_h ) + if( ch == format_h ) { - errpos = gets_hours(source, source_end, ctm, in_offset); + errpos = gets_hours(source, source_end, charmap_checked, ctm, in_offset); if( errpos > 0 ) { retval += errpos - 1; @@ -4529,9 +4710,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_m ) + if( ch == format_m ) { - errpos = gets_minutes(source, source_end, ctm, in_offset); + errpos = gets_minutes(source, source_end, charmap_checked, ctm, in_offset); if( errpos > 0 ) { retval += errpos - 1; @@ -4541,9 +4722,9 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_s && !in_nanoseconds ) + if( ch == format_s && !in_nanoseconds ) { - errpos = gets_seconds(source, source_end, ctm); + errpos = gets_seconds(source, source_end, charmap_checked, ctm); if( errpos > 0 ) { retval += errpos - 1; @@ -4553,10 +4734,16 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_s && in_nanoseconds ) + if( ch == format_s && in_nanoseconds ) { // Peel off digits to the right of the decimal point one at a time - errpos = gets_nanoseconds(format, format_end, source, source_end, ctm); + errpos = gets_nanoseconds(format, + format_end, + source, + source_end, + ctm, + charmap_format, + charmap_checked); if( errpos > 0 ) { retval += errpos - 1; @@ -4566,7 +4753,7 @@ fill_cobol_tm(cobol_tm &ctm, goto proceed; } - if( ch == internal_Z || ch == internal_z ) + if( ch == format_Z || ch == format_z ) { // This has to be the end of the road if( std::toupper((unsigned char)source[0]) != 'Z' ) @@ -4691,14 +4878,15 @@ __gg__hex_of(cblc_field_t *dest, size_t field_offset, size_t field_size) { + charmap_t *charmap = __gg__get_charmap(dest->encoding); static const char hex[17] = "0123456789ABCDEF"; size_t bytes = field_size; __gg__adjust_dest_size(dest, 2*bytes); for(size_t i=0; idata+field_offset)[i]; - dest->data[2*i] = ascii_to_internal(hex[byte>>4]); - dest->data[2*i+1] = ascii_to_internal(hex[byte&0xF]); + dest->data[2*i ] = charmap->mapped_character(hex[byte>>4]); + dest->data[2*i+1] = charmap->mapped_character(hex[byte&0xF]); } } @@ -4809,10 +4997,21 @@ __gg__lowest_algebraic( cblc_field_t *dest, } static int -floating_format_tester(char const * const f, char const * const f_end) +floating_format_tester( char const * const f, + char const * const f_end, + cbl_encoding_t encoding) { + charmap_t *charmap = __gg__get_charmap(encoding); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_plus = charmap->mapped_character(ascii_plus); + int mapped_minus = charmap->mapped_character(ascii_minus); + int mapped_0 = charmap->mapped_character(ascii_0); + int mapped_9 = charmap->mapped_character(ascii_9); + int mapped_E = charmap->mapped_character(ascii_E); + int mapped_e = charmap->mapped_character(ascii_e); + int decimal_point = charmap->mapped_character(__gg__get_decimal_point()); + int retval = -1; - char decimal_point = __gg__get_decimal_point(); enum { @@ -4833,18 +5032,18 @@ floating_format_tester(char const * const f, char const * const f_end) switch(state) { case SPACE1: - if( ch == internal_space ) + if( ch == mapped_space ) { // Just keep looking break; } - if( ch == internal_minus - || ch == internal_plus) + if( ch == mapped_minus + || ch == mapped_plus) { state = SPACE2; break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { state = DIGITS1; break; @@ -4859,11 +5058,11 @@ floating_format_tester(char const * const f, char const * const f_end) break; case SPACE2: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { state = DIGITS1; break; @@ -4877,7 +5076,7 @@ floating_format_tester(char const * const f, char const * const f_end) break; case DIGITS1: - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { break; } @@ -4886,7 +5085,7 @@ floating_format_tester(char const * const f, char const * const f_end) state = DIGITS2; break; } - if( ch == internal_space ) + if( ch == mapped_space ) { state = SPACE3; break; @@ -4895,16 +5094,16 @@ floating_format_tester(char const * const f, char const * const f_end) break; case DIGITS2: - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { break; } - if( ch == internal_space ) + if( ch == mapped_space ) { state = SPACE3; break; } - if( ch == internal_E || ch == internal_e ) + if( ch == mapped_E || ch == mapped_e ) { state = SPACE4; break; @@ -4913,16 +5112,16 @@ floating_format_tester(char const * const f, char const * const f_end) break; case SPACE3: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { retval = index; break; } - if( ch == internal_E || ch == internal_e ) + if( ch == mapped_E || ch == mapped_e ) { state = SPACE4; break; @@ -4931,16 +5130,16 @@ floating_format_tester(char const * const f, char const * const f_end) break; case SPACE4: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch == internal_minus || ch == internal_plus ) + if( ch == mapped_minus || ch == mapped_plus ) { state = SPACE5; break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { state = DIGITS3; break; @@ -4949,11 +5148,11 @@ floating_format_tester(char const * const f, char const * const f_end) break; case SPACE5: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { state = DIGITS3; break; @@ -4962,11 +5161,11 @@ floating_format_tester(char const * const f, char const * const f_end) break; case DIGITS3: - if( ch >= internal_0 && ch <= internal_9 ) + if( ch >= mapped_0 && ch <= mapped_9 ) { break; } - if( ch == internal_space ) + if( ch == mapped_space ) { state = SPACE6; break; @@ -4975,7 +5174,7 @@ floating_format_tester(char const * const f, char const * const f_end) break; case SPACE6: - if( ch == internal_space ) + if( ch == mapped_space ) { break; } @@ -5004,8 +5203,12 @@ __gg__numval_f( cblc_field_t *dest, GCOB_FP128 value = 0; const char *data = PTRCAST(char, (source->data + source_offset)); const char *data_end = data + source_size; + charmap_t *charmap = __gg__get_charmap(source->encoding); + int mapped_space = charmap->mapped_character(ascii_space); - int error = floating_format_tester(data, data_end); + int error = floating_format_tester( data, + data_end, + source->encoding); if( error || source_size >= 256 ) { @@ -5019,7 +5222,7 @@ __gg__numval_f( cblc_field_t *dest, while( data < data_end ) { char ch = *data++; - if( ch != internal_space ) + if( ch != mapped_space ) { *p++ = ch; } @@ -5043,7 +5246,9 @@ __gg__test_numval_f(cblc_field_t *dest, const char *data = PTRCAST(char, (source->data + source_offset)); const char *data_end = data + source_size; - int error = floating_format_tester(data, data_end); + int error = floating_format_tester( data, + data_end, + source->encoding); __gg__int128_to_field(dest, error, @@ -5122,7 +5327,7 @@ strcasestr( const char *haystack, return retval; } -static +static const char * strlaststr( const char *haystack, const char *haystack_e, @@ -5142,7 +5347,7 @@ strlaststr( const char *haystack, return retval; } -static +static const char * strcaselaststr( const char *haystack, const char *haystack_e, @@ -5164,7 +5369,7 @@ strcaselaststr( const char *haystack, extern "C" -void +void __gg__substitute( cblc_field_t *dest, const cblc_field_t *arg1_f, size_t arg1_o, @@ -5376,8 +5581,10 @@ __gg__locale_compare( cblc_field_t *dest, } __gg__adjust_dest_size(dest, 1); - ascii_to_internal_str(achretval, 1); dest->data[0] = *achretval; + __gg__convert_encoding(PTRCAST(char, dest->data), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); } extern "C" @@ -5412,7 +5619,9 @@ __gg__locale_date(cblc_field_t *dest, } __gg__adjust_dest_size(dest, strlen(ach)); - ascii_to_internal_str(ach, strlen(ach)); + __gg__convert_encoding(PTRCAST(char, dest->data), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); memcpy(dest->data, ach, strlen(ach)); } @@ -5448,7 +5657,9 @@ __gg__locale_time(cblc_field_t *dest, } __gg__adjust_dest_size(dest, strlen(ach)); - ascii_to_internal_str(ach, strlen(ach)); + __gg__convert_encoding(PTRCAST(char, dest->data), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); memcpy(dest->data, ach, strlen(ach)); } @@ -5486,6 +5697,8 @@ __gg__locale_time_from_seconds( cblc_field_t *dest, } __gg__adjust_dest_size(dest, strlen(ach)); - ascii_to_internal_str(ach, strlen(ach)); + __gg__convert_encoding(PTRCAST(char, dest->data), + DEFAULT_CHARMAP_SOURCE, + dest->encoding); memcpy(dest->data, ach, strlen(ach)); } diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index 1b54cfdc389..c355051e43a 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -53,6 +53,7 @@ #if __has_include() # include // for program_invocation_short_name #endif +#include #include "config.h" #include "libgcobol-fp.h" @@ -138,6 +139,7 @@ int __gg__default_compute_error = 0 ; int __gg__rdigits = 0 ; int __gg__nop = 0 ; int __gg__main_called = 0 ; +cbl_encoding_t __gg__console_encoding = no_encoding_e ; // During SORT operations, we don't want the end-of-file condition, which // happens as a matter of course, from setting the EOF exception condition. @@ -483,7 +485,7 @@ struct program_state memset(rt_currency_signs, 0, sizeof(rt_currency_signs)); // The default collating sequence: - if( internal_is_ebcdic ) + if( __gg__ebcdic_codeset_in_use ) { rt_collation = __gg__cp1140_to_cp1252_values; } @@ -491,6 +493,7 @@ struct program_state { rt_collation = __gg__one_to_one_values; } +// rt_collation = __gg__one_to_one_values; rt_program_name = NULL; } @@ -966,63 +969,6 @@ binary_to_little_endian( unsigned char *dest, memcpy(dest, &value, bytes); } -static void -turn_sign_bit_on(unsigned char *location) - { - if( internal_is_ebcdic ) - { - *location = (*location & 0xF) + 0xD0; - } - else - { - *location = (*location & 0xF) + 0x70; - } - } - -static void -turn_sign_bit_off(unsigned char *location) - { - if( internal_is_ebcdic ) - { - *location = (*location & 0xF) + 0xF0; - } - else - { - *location = (*location & 0xF) + 0x30; - } - } - -static bool -is_sign_bit_on(char ch) - { - bool retval; - if( (unsigned char)ch == 0xFF || ch == 0x00 ) - { - // Don't let HIGH-VALUE or LOW_VALUE confuse sign detection - retval = false; - } - else - { - retval = (ch & NUMERIC_DISPLAY_SIGN_BIT) != 0; - } - return retval; - } - -extern "C" -void -__gg__string_to_alpha_edited_ascii( char *dest, - const char *source, - int slength, - const char *picture) - { - char *dupe = static_cast(malloc(slength)); - massert(dupe); - memcpy(dupe, source, slength); - ascii_to_internal_str(dupe, slength); - __gg__string_to_alpha_edited(dest, dupe, slength, picture); - free(dupe); - } - static __int128 int128_to_int128_rounded( cbl_round_t rounded, __int128 value, @@ -1534,7 +1480,7 @@ int128_to_field(cblc_field_t *var, // Value is now scaled to the target's target_rdigits bool size_error = false; - int is_negative = value < 0 ; + bool is_negative = value < 0 ; if( !(var->attr & signable_e) && is_negative ) { @@ -1563,41 +1509,46 @@ int128_to_field(cblc_field_t *var, // Note that sending a signed value to an alphanumeric strips off // any plus or minus signs. memset(location, 0, length); - size_error = __gg__binary_to_string_internal( + size_error = __gg__binary_to_string_encoded( PTRCAST(char, location), length > MAX_FIXED_POINT_DIGITS ? MAX_FIXED_POINT_DIGITS : length, - value); + value, + var->encoding); break; case FldNumericDisplay: if( var->attr & signable_e ) { - /* There is a regrettable plethora of possibilities, here. */ - + charmap_t *charmap = __gg__get_charmap(var->encoding); // Things get exciting when a numeric-display value is signable if( var->attr & separate_e ) { // Whether positive or negative, a sign there will be: - char sign_ch = is_negative ? internal_minus : internal_plus ; + char sign_ch = is_negative ? + charmap->mapped_character(ascii_minus) + : charmap->mapped_character(ascii_plus) ; if( var->attr & leading_e ) { // The sign character goes into the first location size_error = - __gg__binary_to_string_internal(PTRCAST(char, location+1), + __gg__binary_to_string_encoded(PTRCAST(char, location+1), length-1, - value); + value, + var->encoding); location[0] = sign_ch; } else { // The sign character goes into the last location size_error = - __gg__binary_to_string_internal(PTRCAST(char, location), - length-1, value); + __gg__binary_to_string_encoded(PTRCAST(char, location), + length-1, + value, + var->encoding); location[length-1] = sign_ch; } } @@ -1611,9 +1562,10 @@ int128_to_field(cblc_field_t *var, // First, convert the binary value to the correct-length string size_error = - __gg__binary_to_string_internal(PTRCAST(char, location), + __gg__binary_to_string_encoded(PTRCAST(char, location), length, - value); + value, + var->encoding); // Check for a size error on a negative value. It conceivably // was truncated down to zero, in which case we need to @@ -1625,7 +1577,7 @@ int128_to_field(cblc_field_t *var, is_negative = false; for(size_t i=0; imapped_character(ascii_0) ) { is_negative = true; break; @@ -1636,34 +1588,28 @@ int128_to_field(cblc_field_t *var, unsigned char *sign_location = var->attr & leading_e ? location : location + length - 1; - if( internal_is_ebcdic ) - { - // Change the sign location from 0xF0 to 0xC0. - *sign_location &= (ZONE_SIGNED_EBCDIC + 0xF); - } - - if( is_negative ) - { - *sign_location |= NUMERIC_DISPLAY_SIGN_BIT; - } + *sign_location = charmap->set_digit_negative(*sign_location, + is_negative); } } else { // It's a simple positive number - size_error = __gg__binary_to_string_internal( + size_error = __gg__binary_to_string_encoded( PTRCAST(char, location), length, - value); + value, + var->encoding); } break; case FldNumericEdited: { + charmap_t *charmap = __gg__get_charmap(var->encoding); if( value == 0 && (var->attr & blank_zero_e) ) { - memset(location, internal_space, length); + memset(location, charmap->mapped_character(ascii_space), length); } else { @@ -1671,7 +1617,9 @@ int128_to_field(cblc_field_t *var, // At this point, value is scaled to the target's rdigits - size_error = __gg__binary_to_string_ascii(ach, var->digits, value); + size_error = __gg__binary_to_string_ascii(ach, + var->digits, + value); ach[var->digits] = NULLCH; // Convert that string according to the PICTURE clause @@ -1681,7 +1629,14 @@ int128_to_field(cblc_field_t *var, target_rdigits, is_negative, var->picture); - ascii_to_internal_str( PTRCAST(char, location), var->capacity); + size_t outlength; + const char *converted = __gg__iconverter( + DEFAULT_CHARMAP_SOURCE, + var->encoding, + PTRCAST(char, location), + var->capacity, + &outlength); + memcpy(location, converted, outlength); } break; @@ -1701,8 +1656,8 @@ int128_to_field(cblc_field_t *var, // Weirdly, this might be a figurative constant, hopefully usually // ZERO. Everything but HIGH-VALUE will end up zero. HIGH-VALUE // will become one, but it is, apparently harmless. The HIGH-VALUE - // must get processed separately elsewhere. As the author, it would - // be nice if I knew -- but I don't. + // must get processed separately elsewhere. As the author, it + // would be nice if I knew -- but I don't. binary_to_little_endian(location, length, value); @@ -1717,10 +1672,11 @@ int128_to_field(cblc_field_t *var, // Convert that string according to the PICTURE clause __gg__string_to_alpha_edited( - PTRCAST(char, location), - ach, - strlen(ach), - var->picture); + PTRCAST(char, location), + var->encoding, + ach, + strlen(ach), + var->picture); break; } @@ -1809,10 +1765,13 @@ int128_to_field(cblc_field_t *var, } static __int128 -edited_to_binary( char *ps_, +edited_to_binary( const cblc_field_t *field, + char *ps_, int length, int *rdigits) { + charmap_t *charmap = __gg__get_charmap(field->encoding); + const unsigned char *ps = const_cast(PTRCAST(unsigned char, ps_)); // This routine is used for converting NumericEdited strings to // binary. @@ -1843,15 +1802,17 @@ edited_to_binary( char *ps_, // is negative: if( length >= 2) { - if(((ps[length-2]&0xFF) == internal_D || (ps[length-2]&0xFF) == internal_d ) - &&((ps[length-1]&0xFF) == internal_B || (ps[length-1]&0xFF) == internal_b)) + if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_D) + || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_d)) + && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_B) + || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_b)) ) { hyphen = 1; } - else if( ((ps[length-2]&0xFF) == internal_C - || (ps[length-2]&0xFF) == internal_c) - && ((ps[length-1]&0xFF) == internal_R - || (ps[length-1]&0xFF) == internal_r) ) + else if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_C) + || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_c)) + && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_R) + || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_r)) ) { hyphen = 1; } @@ -1860,18 +1821,19 @@ edited_to_binary( char *ps_, while( index < length ) { unsigned char ch = ps[index++] & 0xFF; - if( ch == ascii_to_internal(__gg__decimal_point) ) + if( ch == charmap->mapped_character(__gg__decimal_point) ) { delta_r = 1; continue; } - if( ch == internal_minus ) + if( ch == charmap->mapped_character(ascii_minus) ) { hyphen = 1; continue; } - if( internal_0 <= ch && ch <= internal_9 ) + if( charmap->mapped_character(ascii_0) <= ch + && ch <= charmap->mapped_character(ascii_9) ) { result *= 10; // In both EBCDIC and ASCII, this works: @@ -1997,19 +1959,16 @@ get_binary_value_local( int *rdigits, case FldLiteralA : fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__); abort(); - // // Read the data area as a dirty string: - // retval = __gg__dirty_to_binary_internal( (const char *)resolved_location, - // resolved_length, - // rdigits ); break; case FldGroup : case FldAlphanumeric : // Read the data area as a dirty string: - retval = __gg__dirty_to_binary_internal( PTRCAST(const char, - resolved_location), - resolved_length, - rdigits ); + retval = __gg__dirty_to_binary( + PTRCAST(const char, resolved_location), + resolved_var->encoding, + resolved_length, + rdigits ); break; case FldNumericDisplay: @@ -2086,15 +2045,17 @@ get_binary_value_local( int *rdigits, } retval = __gg__numeric_display_to_binary(sign_byte_location, digits, - ndigits); + ndigits, + resolved_var->encoding); } break; } case FldNumericEdited : - retval = edited_to_binary( PTRCAST(char, resolved_location), - resolved_length, - rdigits); + retval = edited_to_binary(resolved_var, + PTRCAST(char, resolved_location), + resolved_length, + rdigits); break; case FldNumericBinary : @@ -2193,7 +2154,7 @@ cobol_time() extern "C" char * -__gg__get_date_yymmdd() +__gg__get_date_yymmdd(const cblc_field_t *field) { char ach[32]; @@ -2205,14 +2166,18 @@ __gg__get_date_yymmdd() local->tm_year % 100, local->tm_mon+1 % 100, local->tm_mday % 100 ); - - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } extern "C" char * -__gg__get_date_yyyymmdd() +__gg__get_date_yyyymmdd(const cblc_field_t *field) { char ach[32]; @@ -2225,13 +2190,18 @@ __gg__get_date_yyyymmdd() local->tm_mon+1, local->tm_mday); - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } extern "C" char * -__gg__get_date_yyddd() +__gg__get_date_yyddd(const cblc_field_t *field) { char ach[32]; @@ -2243,13 +2213,18 @@ __gg__get_date_yyddd() local->tm_year % 100, local->tm_yday+1); - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } extern "C" char * -__gg__get_yyyyddd() +__gg__get_yyyyddd(const cblc_field_t *field) { char ach[32]; @@ -2261,13 +2236,18 @@ __gg__get_yyyyddd() local->tm_year + 1900, local->tm_yday+1); - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } extern "C" char * -__gg__get_date_dow() +__gg__get_date_dow(const cblc_field_t *field) { char ach[32]; @@ -2278,8 +2258,13 @@ __gg__get_date_dow() "%1.1d", local->tm_wday == 0 ? 7 : local->tm_wday); - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } static int @@ -2379,7 +2364,7 @@ __gg__clock_gettime(struct cbl_timespec *tp) extern "C" char * -__gg__get_date_hhmmssff() +__gg__get_date_hhmmssff(const cblc_field_t *field) { char ach[32]; @@ -2404,8 +2389,13 @@ __gg__get_date_hhmmssff() tm.tm_sec, hundredths); - ascii_to_internal_str(ach, strlen(ach)); - return strdup(ach); + size_t charsout; + const char *converted = __gg__iconverter(__gg__console_encoding, + field->encoding, + ach, + strlen(ach), + &charsout); + return strdup(converted); } extern "C" @@ -2413,7 +2403,8 @@ int __gg__setop_compare( const char *candidate, int capacity, - char *domain) + char *domain, + cbl_encoding_t domain_encoding) { // This routine is called to compare the characters of 'candidate' // against the list of character pairs in 'domain' @@ -2423,6 +2414,13 @@ __gg__setop_compare( int h; char *d; + /* At the present writing, the domain was created in "source code" space, + meaning that 'A' comes through as 0x41 no matter what the + domain->encoding. Until Jim gets around to providing me with target + values from the parser, we're doing the conversion here. */ + + charmap_t *charmap = __gg__get_charmap(domain_encoding); + for(int i=0; imapped_character(l); + h = charmap->mapped_character(h); if( ch >= l && ch <= h ) { // This character is acceptable @@ -2554,9 +2555,10 @@ __gg__dirty_to_binary_source(const char *dirty, extern "C" __int128 -__gg__dirty_to_binary_internal( const char *dirty, - int length, - int *rdigits) +__gg__dirty_to_binary(const char *dirty, + cbl_encoding_t encoding, + int length, + int *rdigits) { // This routine is used for converting uncontrolled strings to a // a 128-bit signed binary number. @@ -2574,7 +2576,17 @@ __gg__dirty_to_binary_internal( const char *dirty, // The binary number, if signed, is returned as a negative number. - // We are limiting the number of digits in the number to MAX_FIXED_POINT_DIGITS + // We are limiting the number of digits in the number to + // MAX_FIXED_POINT_DIGITS + + charmap_t *charmap = __gg__get_charmap(encoding); + int mapped_minus = charmap->mapped_character(ascii_minus); + int mapped_plus = charmap->mapped_character(ascii_plus); + int mapped_decimal_point = charmap->mapped_character(__gg__decimal_point); + int mapped_0 = charmap->mapped_character(ascii_0); + int mapped_9 = charmap->mapped_character(ascii_9); + int mapped_E = charmap->mapped_character(ascii_E); + int mapped_e = charmap->mapped_character(ascii_e); __int128 retval = 0; @@ -2593,21 +2605,22 @@ __gg__dirty_to_binary_internal( const char *dirty, if(length-- > 0) { ch = *dirty++; - if( ch == internal_minus ) + if( ch == mapped_minus ) { hyphen = 1; } - else if( ch == internal_plus ) + else if( ch == mapped_plus ) { // A plus sign is okay } - else if( ch == ascii_to_internal(__gg__decimal_point) ) + else if( ch == mapped_decimal_point ) { delta_r = 1; } - else if( ch >= internal_0 && ch <= internal_9 ) + else if( ch >= mapped_0 + && ch <= mapped_9 ) { - retval = ch - internal_0 ; + retval = ch - mapped_0 ; if( retval ) { digit_count += 1; @@ -2625,14 +2638,15 @@ __gg__dirty_to_binary_internal( const char *dirty, while( length-- > 0 ) { ch = *dirty++; - if( ch == ascii_to_internal(__gg__decimal_point) && delta_r == 0 ) + if( ch == mapped_decimal_point && delta_r == 0 ) { // This is the first decimal point we've seen, so we // can start counting rdigits: delta_r = 1; continue; } - if( ch < internal_0 || ch > internal_9 ) + if( ch < mapped_0 + || ch > mapped_9 ) { // When we hit something that isn't a digit, then we are done break; @@ -2640,7 +2654,7 @@ __gg__dirty_to_binary_internal( const char *dirty, if( digit_count < MAX_FIXED_POINT_DIGITS ) { retval *= 10; - retval += ch - internal_0 ; + retval += ch - mapped_0 ; *rdigits += delta_r; if( retval ) { @@ -2650,19 +2664,20 @@ __gg__dirty_to_binary_internal( const char *dirty, } // Let's check for an exponent: - if( ch == internal_E || ch == internal_e ) + if( ch == mapped_E + || ch == mapped_e ) { int exponent = 0; int exponent_sign = 1; if( length > 0 ) { ch = *dirty; - if( ch == internal_plus) + if( ch == mapped_plus) { length -= 1; dirty += 1; } - else if (ch == internal_minus) + else if( ch == mapped_minus ) { exponent_sign = -1; length -= 1; @@ -2672,13 +2687,14 @@ __gg__dirty_to_binary_internal( const char *dirty, while(length-- > 0) { ch = *dirty++; - if( ch < internal_0 || ch > internal_9 ) + if( ch < mapped_0 + || ch > mapped_9 ) { // When we hit something that isn't a digit, then we are done break; } exponent *= 10; - exponent += ch - internal_0 ; + exponent += ch - mapped_0 ; } exponent *= exponent_sign; // We need to adjust the retval and the rdigits based on the exponent. @@ -2717,7 +2733,8 @@ __gg__dirty_to_binary_internal( const char *dirty, extern "C" GCOB_FP128 __gg__dirty_to_float( const char *dirty, - int length) + int length, + const cblc_field_t *field) { // This routine is used for converting uncontrolled strings to a // a _Float128 @@ -2744,24 +2761,27 @@ __gg__dirty_to_float( const char *dirty, // We now loop over the remaining input characters: char ch = '\0'; + charmap_t *charmap = __gg__get_charmap(field->encoding); + if(length-- > 0) { ch = *dirty++; - if( ch == internal_minus ) + if( ch == charmap->mapped_character(ascii_minus) ) { hyphen = 1; } - else if( ch == internal_plus ) + else if( ch == charmap->mapped_character(ascii_plus) ) { // A plus sign is okay } - else if( ch == ascii_to_internal(__gg__decimal_point) ) + else if( ch == charmap->mapped_character(__gg__decimal_point) ) { delta_r = 1; } - else if( ch >= internal_0 && ch <= internal_9 ) + else if( ch >= charmap->mapped_character(ascii_0) + && ch <= charmap->mapped_character(ascii_9) ) { - retval = ch - internal_0 ; + retval = ch - charmap->mapped_character(ascii_0) ; } else { @@ -2776,37 +2796,39 @@ __gg__dirty_to_float( const char *dirty, while( length-- > 0 ) { ch = *dirty++; - if( ch == ascii_to_internal(__gg__decimal_point) && delta_r == 0 ) + if( ch == charmap->mapped_character(__gg__decimal_point) && delta_r == 0 ) { // This is the first decimal point we've seen, so we // can start counting rdigits: delta_r = 1; continue; } - if( ch < internal_0 || ch > internal_9 ) + if( ch < charmap->mapped_character(ascii_0) + || ch > charmap->mapped_character(ascii_9) ) { // When we hit something that isn't a digit, then we are done break; } retval *= 10; - retval += ch - internal_0 ; + retval += ch - charmap->mapped_character(ascii_0) ; rdigits += delta_r; } // Let's check for an exponent: int exponent = 0; - if( ch == internal_E || ch == internal_e ) + if( ch == charmap->mapped_character(ascii_E) + || ch == charmap->mapped_character(ascii_e) ) { int exponent_sign = 1; if( length > 0 ) { ch = *dirty; - if( ch == internal_plus) + if( ch == charmap->mapped_character(ascii_plus) ) { length -= 1; dirty += 1; } - else if (ch == internal_minus) + else if (ch == charmap->mapped_character(ascii_minus) ) { exponent_sign = -1; length -= 1; @@ -2816,13 +2838,14 @@ __gg__dirty_to_float( const char *dirty, while(length-- > 0) { ch = *dirty++; - if( ch < internal_0 || ch > internal_9 ) + if( ch < charmap->mapped_character(ascii_0) + || ch > charmap->mapped_character(ascii_9) ) { // When we hit something that isn't a digit, then we are done break; } exponent *= 10; - exponent += ch - internal_0 ; + exponent += ch - charmap->mapped_character(ascii_0) ; } exponent *= exponent_sign; } @@ -2884,17 +2907,6 @@ __gg__get_integer_binary_value(cblc_field_t *var) return retval; } -static -void psz_to_internal(char *psz) - { - char *p = psz; - while( *p ) - { - *p = ascii_to_internal(*p); - p += 1; - } - } - static int get_scaled_rdigits(const cblc_field_t *field) { @@ -2924,7 +2936,7 @@ get_scaled_rdigits(const cblc_field_t *field) return retval; } -static char * +static cbl_encoding_t format_for_display_internal(char **dest, size_t *dest_size, cblc_field_t *var, @@ -2934,11 +2946,17 @@ format_for_display_internal(char **dest, { // dest and dest_size represent a malloced buffer of dest_size. + // The returned value is NUL-terminated. + // This routine will put the formatted result into dest if it fits, and // realloc dest if it doesn't. The new_size goes into the dest_size // reference. Used properly, the caller's buffer just keeps getting bigger // as necessary, cutting down on the number of reallocations needed. + // The routine returns the cbl_encoding_t of the result. + + cbl_encoding_t retval = var->encoding; + int source_rdigits = var->rdigits; if( var->attr & scaled_e ) @@ -2959,8 +2977,7 @@ format_for_display_internal(char **dest, (int)(2*sizeof(void *)), (int)(2*sizeof(void *)), (unsigned long)actual_location); - ascii_to_internal_str(*dest, strlen(*dest)); - + retval = __gg__console_encoding; goto done; } @@ -2980,73 +2997,22 @@ format_for_display_internal(char **dest, { fprintf(stderr, "attempting to display a NULL pointer in %s\n", var->name); abort(); - //memset(*dest, internal_query, actual_length); - //memcpy(*dest, actual_location, actual_length); } (*dest)[actual_length] = NULLCH; break; case FldNumericDisplay: { - // Because a NumericDisplay can have any damned thing as a character, - // we are going force things that aren't digits to display as '0' - - // 0xFF is an exception, so that a HIGH-VALUE in a numeric display shows - // up in a unique way. - static const uint8_t ascii_chars[256] = - { - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x00 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x10 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x20 - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '0', '0', '0', '0', '0', // 0x30 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x40 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x50 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x60 - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '0', '0', '0', '0', '0', // 0x70 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x80 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x90 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xa0 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xb0 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xc0 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xd0 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xe0 - '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 0xFF, // 0xf0 - }; - static const uint8_t ebcdic_chars[256] = - { - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x00 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x10 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x20 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x30 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x40 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x50 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x60 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x70 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x80 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x90 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xa0 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xb0 - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xc0 - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xd0 - 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xe0 - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xFF, // 0xf0 - } ; - - // We are going to make use of fact that a NumericDisplay's data is - // almost already in the format we need. We have to add a decimal point, - // if necessary, in the right place, and we need to tack on leading or - // trailing zeroes for PPP999 and 999PPP scaled-e variables. - if( var_is_refmod(var) ) { + // Because we are dealing with a refmod, we just output those + // characters. __gg__realloc_if_necessary(dest, dest_size, actual_length+1); memcpy((*dest), actual_location, actual_length); (*dest)[actual_length] = NULLCH; break; } - const unsigned char *running_location = actual_location; - // We need the counts of digits to the left and right of the decimal point int rdigits = get_scaled_rdigits(var); int ldigits = var->digits - rdigits; @@ -3076,75 +3042,144 @@ format_for_display_internal(char **dest, if( actual_location ) { + // This buffer is larger than can validly be needed + unsigned char converted[128]; + size_t outlength; + retval = DEFAULT_CHARMAP_SOURCE; + const char *mapped = __gg__iconverter( + var->encoding, + retval, + PTRCAST(char, actual_location), + actual_length, + &outlength); + memcpy(converted, mapped, outlength); + charmap_t *charmap = __gg__get_charmap(retval); + + // converted[] is now an ASCII version of the value in memory. We are + // going to "validate" the characters, which might be garbage. + + int signtype = (var->attr & signable_e ? 4 : 0) + + (var->attr & separate_e ? 2 : 0) + + (var->attr & leading_e ? 1 : 0); + unsigned char *signloc; + unsigned char *digits; + const unsigned char *digits_e; + bool is_negative; int index = 0; // This is the running index into our output destination - if( var->attr & signable_e ) + + ptrdiff_t signoffset; + switch(signtype) { - if( var->attr & separate_e ) - { - // We are dealing with a sign character maintained separately in - // the data. - if( var->attr & leading_e ) - { - // The first character is the sign character - (*dest)[index++] = *running_location++; - } - } - else + case 0: + case 1: + case 2: + case 3: + // not signable + signloc = converted; + digits = converted; + digits_e = converted + actual_length; + is_negative = false; + break; + case 4: + // internal trailing + signloc = converted + actual_length-1; + digits = converted; + digits_e = converted + actual_length; + /* In ascii, negative is indicated by turning bit 0x40 on. + In ebcdic, by turning bit 0x20 off. In both cases, the result + is outside of the range '0' through '9'. Working this way is + slightly dangerous, because we might miss some particular + set of garbage that might have been READ or REDEFINED into the + variable's memory. I am not overly concerned. + */ + is_negative = *signloc > ascii_9 || *signloc < ascii_0; + signoffset = signloc-converted; + *signloc = charmap->mapped_character(ascii_0) + + (actual_location[signoffset] & 0x0F); + break; + case 5: + // internal leading + signloc = converted; + digits = converted; + digits_e = converted + actual_length; + is_negative = *signloc > ascii_9 || *signloc < ascii_0; + signoffset = signloc-converted; + *signloc = charmap->mapped_character(ascii_0) + + (actual_location[signoffset] & 0x0F); + break; + case 6: + // separate trailing + signloc = converted + actual_length-1; + digits = converted; + digits_e = converted + actual_length-1; + is_negative = *signloc == ascii_minus; + break; + case 7: + // separate leading + signloc = converted; + digits = converted+1; + digits_e = converted + actual_length; + is_negative = *signloc == ascii_minus; + break; + } + // We have the sign sorted out; make sure that the digits are valid: + unsigned char *running_location = digits; + while(running_location < digits_e) + { + if( *running_location < ascii_0 || *running_location > ascii_9 ) { - // The sign character is not separate. It's in either the first - // or last byte of the data: - size_t sign_location = var->attr & leading_e ? 0 : actual_length-1 ; - if( is_sign_bit_on( actual_location[sign_location]) ) - { - (*dest)[index++] = internal_minus; - } - else - { - (*dest)[index++] = internal_plus; - } + // An invalid digit becomes '0', and the value is flagged positive + *running_location = ascii_0; + is_negative = false; } + running_location += 1; } + // converted[] is now full of valid digits, and is_negative has been + // established. + + switch(signtype) + { + case 0: + case 1: + case 2: + case 3: + // not signable + break; + case 4: + case 5: + // internal trailing + // internal leading + (*dest)[index++] = is_negative ? ascii_minus : ascii_plus; + break; + case 6: + // separate trailing + break; + case 7: + // separate leading + (*dest)[index++] = is_negative ? ascii_minus : ascii_plus; + break; + } + running_location = digits; + // copy over the characters to the left of the decimal point: for(int i=0; idecimal_point(); if( ldigits < 0 ) { // This is a scaled_e value, and we need that many zeroes: for( int i=0; i<-ldigits; i++ ) { - (*dest)[index++] = internal_zero; + (*dest)[index++] = ascii_0; } } @@ -3152,14 +3187,6 @@ format_for_display_internal(char **dest, for(int i=0; irdigits); i++) { - (*dest)[index++] = internal_zero; + (*dest)[index++] = ascii_0; } } @@ -3176,7 +3203,8 @@ format_for_display_internal(char **dest, && var->attr & separate_e && !(var->attr & leading_e) ) { - (*dest)[index++] = actual_location[actual_length-1]; + // We need a trailing plus or minus + (*dest)[index++] = is_negative ? ascii_minus : ascii_plus; } (*dest)[index++] = NULLCH; @@ -3185,8 +3213,6 @@ format_for_display_internal(char **dest, { fprintf(stderr, "attempting to display a NULL pointer in %s\n", var->name); abort(); - // memset(*dest, internal_query, nsize-1); - // (*dest)[nsize] = NULLCH; } } break; @@ -3236,7 +3262,7 @@ format_for_display_internal(char **dest, digits = 19; break; case 16: - digits = 38; + digits = MAX_FIXED_POINT_DIGITS; break; default: warnx("%s(): %s has capacity %ld\n", @@ -3249,7 +3275,10 @@ format_for_display_internal(char **dest, } char ach[128]; - __gg__binary_to_string_internal(ach, digits, value); + retval = DEFAULT_CHARMAP_SOURCE; + charmap_t *charmap = __gg__get_charmap(retval); + + __gg__binary_to_string_ascii(ach, digits, value); // And copy the code from up above: int nsize = digits+1; @@ -3270,11 +3299,11 @@ format_for_display_internal(char **dest, { if( value < 0 ) { - (*dest)[index++] = internal_minus; + (*dest)[index++] = ascii_minus; } else { - (*dest)[index++] = internal_plus; + (*dest)[index++] = ascii_plus; } } // copy over the characters to the left of the decimal point: @@ -3282,7 +3311,7 @@ format_for_display_internal(char **dest, index += digits - source_rdigits; if( source_rdigits ) { - (*dest)[index++] = ascii_to_internal(__gg__decimal_point); + (*dest)[index++] = charmap->decimal_point(); memcpy((*dest)+index, ach+(digits-source_rdigits), source_rdigits); index += source_rdigits; } @@ -3303,6 +3332,7 @@ format_for_display_internal(char **dest, sprintf(ach, "%lu", (unsigned long)value); __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1); strcpy(*dest, ach); + retval = __gg__console_encoding; } break; @@ -3321,7 +3351,7 @@ format_for_display_internal(char **dest, memset(*dest, 0, retsize); strcpy(*dest, ""); } - + retval = __gg__console_encoding; break; } @@ -3339,7 +3369,7 @@ format_for_display_internal(char **dest, (int)(2*sizeof(void *)), (int)(2*sizeof(void *)), (unsigned long)value); - ascii_to_internal_str(*dest, strlen(*dest)); + retval = __gg__console_encoding; break; } @@ -3383,7 +3413,6 @@ format_for_display_internal(char **dest, __gg__remove_trailing_zeroes(ach); __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1); } - psz_to_internal(ach); strcpy(*dest, ach); break; } @@ -3424,7 +3453,6 @@ format_for_display_internal(char **dest, __gg__remove_trailing_zeroes(ach); __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1); } - psz_to_internal(ach); strcpy(*dest, ach); break; } @@ -3471,11 +3499,11 @@ format_for_display_internal(char **dest, __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1); } - psz_to_internal(ach); strcpy(*dest, ach); break; } } + retval = __gg__console_encoding; break; } @@ -3487,8 +3515,10 @@ format_for_display_internal(char **dest, break; } - if( var->attr & scaled_e && var->type != FldNumericDisplay ) + if( (var->attr & scaled_e) && var->type != FldNumericDisplay ) { + charmap_t *charmap = __gg__get_charmap(retval); + static size_t buffer_size = MINIMUM_ALLOCATION_SIZE; static char *buffer = static_cast(malloc(buffer_size)); massert(buffer); @@ -3501,16 +3531,16 @@ format_for_display_internal(char **dest, __gg__realloc_if_necessary(&buffer, &buffer_size, new_length); - memset(buffer, internal_0, new_length); + memset(buffer, ascii_0, new_length); char *p = buffer; char *s = *dest; - if( ((*dest)[0]&0xFF) < internal_0 - || ((*dest)[0]&0xFF) > internal_9 ) + if( ((*dest)[0]&0xFF) < ascii_0 + || ((*dest)[0]&0xFF) > ascii_9 ) { *p++ = (*dest)[0]; s += 1; } - *p++ = ascii_to_internal(__gg__decimal_point); + *p++ = charmap->decimal_point(); p += var->rdigits; // Skip over the zeroes strcpy(p, s); @@ -3524,7 +3554,7 @@ format_for_display_internal(char **dest, size_t new_length = strlen(*dest) + -var->rdigits + 1; __gg__realloc_if_necessary(&buffer, &buffer_size, new_length); - memset(buffer, internal_0, new_length); + memset(buffer, charmap->mapped_character(ascii_0), new_length); buffer[new_length-1] = NULLCH; memcpy(buffer, *dest, strlen(*dest)); @@ -3537,16 +3567,16 @@ format_for_display_internal(char **dest, { // Because this is a intermediate Bin5, let's strip off leading zeroes. // - // Because we don't know what we are dealing with, we created a 38-digit + // Because we don't know what we are dealing with, we created a 37-digit // number with a variable number of rdigits. So, we usually have a boatload // of leading zeroes. I find that display offensive, so let's fix it: unsigned char *p1 = (unsigned char *)(*dest); - if( *p1 == internal_plus || *p1 == internal_minus ) + if( *p1 == ascii_plus || *p1 == ascii_minus ) { p1 += 1; } unsigned char *p2 = p1; - while( p2[0] == internal_zero && p2[1] != '\0' ) + while( p2[0] == ascii_0 && p2[1] != '\0' ) { p2 += 1; } @@ -3554,44 +3584,23 @@ format_for_display_internal(char **dest, } done: - return *dest; - } - -static char * -format_for_display_local( char **dest, - size_t *dest_size, - cblc_field_t *var, - size_t var_offset, - size_t var_size, - int var_flags) - { - if(var) + if( retval == custom_encoding_e ) { - // At this point, format the entire length. It's up to our caller to - // trim it further, because this routine is used by both receivers and - // senders - format_for_display_internal(dest, - dest_size, - var, - var->data + var_offset, - var_size, - var_flags & REFER_T_ADDRESS_OF); - } - else - { - **dest = '\0'; + fprintf(stderr, "Bum encoding in format_for_display_internal\n"); + abort(); } - return *dest; + return retval; } static int compare_88( const char *list, const char *list_e, bool fig_const, - cblc_field_t * /*conditional*/, + const cblc_field_t *conditional, unsigned char *conditional_location, int conditional_length) { + charmap_t *charmap = __gg__get_charmap(conditional->encoding); int list_len = (int)(list_e-list); int test_len; char *test; @@ -3606,31 +3615,24 @@ compare_88( const char *list, // nonetheless can magically be expanded into figurative // constants: - int ch = internal_space; + int ch = charmap->mapped_character(ascii_space); // Check for the strings starting with 0xFF whose second character // indicates a figurative constant: if( list[0] == ascii_Z ) { - ch = internal_zero; + ch = charmap->mapped_character(ascii_0); } else if( list[0] == ascii_H ) { - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - ch = __gg__high_value_character; - } - else - { - ch = ascii_to_internal(__gg__high_value_character); - } + ch = charmap->high_value_character(); } else if( list[0] == ascii_Q ) { - ch = ascii_to_internal(__gg__quote_character); + ch = charmap->quote_character(); } else if( list[0] == ascii_L ) { - ch = ascii_to_internal(__gg__low_value_character); + ch = charmap->low_value_character(); } memset( test, ch, conditional_length ); } @@ -3640,7 +3642,7 @@ compare_88( const char *list, test = static_cast(malloc(conditional_length)); massert(test); test_len = conditional_length; - memset(test, internal_space, conditional_length); + memset(test, charmap->mapped_character(ascii_space), conditional_length); memcpy(test, list, list_len); } else @@ -3744,10 +3746,10 @@ get_float128( const cblc_field_t *field, static int -compare_field_class(cblc_field_t *conditional, - unsigned char *conditional_location, - int conditional_length, - cblc_field_t *list) +compare_field_class(const cblc_field_t *conditional, + unsigned char *conditional_location, + int conditional_length, + cblc_field_t *list) { int retval = 1; // Zero means equal __int128 value; @@ -3803,8 +3805,9 @@ compare_field_class(cblc_field_t *conditional, } else { - left_value = __gg__dirty_to_binary_internal( + left_value = __gg__dirty_to_binary( left, + conditional->encoding, left_len, &left_rdigits); } @@ -3817,8 +3820,9 @@ compare_field_class(cblc_field_t *conditional, } else { - right_value = __gg__dirty_to_binary_internal( + right_value = __gg__dirty_to_binary( right, + conditional->encoding, right_len, &right_rdigits); } @@ -3947,7 +3951,8 @@ compare_field_class(cblc_field_t *conditional, else { left_value = __gg__dirty_to_float(left, - left_len); + left_len, + conditional); } GCOB_FP128 right_value; @@ -3958,7 +3963,8 @@ compare_field_class(cblc_field_t *conditional, else { right_value = __gg__dirty_to_float( right, - right_len); + right_len, + conditional); } if( left_value <= fp128 && fp128 <= right_value ) @@ -4046,7 +4052,8 @@ compare_strings(const char *left_string, bool left_all, const char *right_string, size_t right_length, - bool right_all) + bool right_all, + cbl_encoding_t encoding) { int retval = 0; size_t i = 0; @@ -4065,8 +4072,9 @@ compare_strings(const char *left_string, while( !retval && imapped_character(ascii_space)); i += 1; } } @@ -4095,9 +4104,10 @@ compare_strings(const char *left_string, if( !left_all ) { + charmap_t *charmap = __gg__get_charmap(encoding); while( !retval && imapped_character(ascii_space)) - collated((unsigned char)right_string[i]); i += 1; } @@ -4154,75 +4164,24 @@ __gg__compare_2(cblc_field_t *left_side, //bool left_refmod = !!(left_flags & REFER_T_REFMOD ); bool right_refmod = !!(right_flags & REFER_T_REFMOD ); + // There are a bunch of cases where we might be dealing with encoding: + cbl_encoding_t encoding_left = left_side->encoding; + cbl_encoding_t encoding_right = right_side->encoding; + charmap_t *charmap_left = __gg__get_charmap(encoding_left); + charmap_t *charmap_right = __gg__get_charmap(encoding_right); + // Figure out if we have any figurative constants cbl_figconst_t left_figconst = (cbl_figconst_t)(left_attr & FIGCONST_MASK); cbl_figconst_t right_figconst = (cbl_figconst_t)(right_attr & FIGCONST_MASK); unsigned int fig_left = 0; unsigned int fig_right = 0; + fig_left = charmap_left->figconst_character(left_figconst); + fig_right = charmap_right->figconst_character(right_figconst); - switch(left_figconst) - { - case normal_value_e : - fig_left = 0; - break; - case low_value_e : - fig_left = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e : - fig_left = internal_zero; - break; - case space_value_e : - fig_left = internal_space; - break; - case quote_value_e : - fig_left = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - fig_left = __gg__high_value_character; - } - else - { - fig_left = ascii_to_internal(__gg__high_value_character); - } - break; - case null_value_e: - break; - } - switch(right_figconst) - { - case normal_value_e : - fig_right = 0; - break; - case low_value_e : - fig_right = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e : - fig_right = internal_zero; - break; - case space_value_e : - fig_right = internal_space; - break; - case quote_value_e : - fig_right = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - fig_right = __gg__high_value_character; - } - else - { - fig_right = ascii_to_internal(__gg__high_value_character); - } - break; - case null_value_e: - break; - } + // We have four high-level conditions to consider depending on whether + // left and/or right are figurative constants: - // We have four high-level conditions to consider: int retval = 0; bool compare = false; @@ -4262,8 +4221,12 @@ __gg__compare_2(cblc_field_t *left_side, retval = 0; for(size_t i=0; ifigconst_character(right_figconst); retval = collated((unsigned int)left_location[i]) - - collated(fig_right); + - collated(fig_of_left); if( retval ) { break; @@ -4291,8 +4254,6 @@ __gg__compare_2(cblc_field_t *left_side, int rdigits; __int128 value; - if( left_side) - value = get_binary_value_local( &rdigits, left_side, left_location, @@ -4320,8 +4281,10 @@ __gg__compare_2(cblc_field_t *left_side, retval = 0; for(size_t i=0; ifigconst_character(right_figconst); retval = collated((unsigned int)left_location[i]) - - collated(fig_right); + - collated(fig_of_left); if( retval ) { break; @@ -4345,12 +4308,21 @@ __gg__compare_2(cblc_field_t *left_side, if( local_is_alpha(left_side->type, left_address_of) && local_is_alpha(right_side->type, right_address_of) ) { + if( encoding_left != encoding_right ) + { + fprintf(stderr, "We don't yet know how to compare strings of different encodings\n"); + fprintf(stderr, "Let Dubner and Lowden of cobolworx know about this\n"); + abort(); + } + retval = compare_strings( reinterpret_cast(left_location), left_length, left_all, reinterpret_cast(right_location), right_length, - right_all ); + right_all, + encoding_left + ); compare = true; goto fixup_retval; @@ -4522,7 +4494,8 @@ __gg__compare_2(cblc_field_t *left_side, left_all, reinterpret_cast(right_location), right_length, - right_all); + right_all, + left_side->encoding); compare = true; goto fixup_retval; } @@ -4546,13 +4519,26 @@ __gg__compare_2(cblc_field_t *left_side, static char *right_string = static_cast(malloc(right_string_size)); - right_string = format_for_display_internal( - &right_string, - &right_string_size, - right_side, - right_location, - right_length, - 0); + cbl_encoding_t encoding_formatted = + format_for_display_internal( &right_string, + &right_string_size, + right_side, + right_location, + right_length, + 0); + + if( encoding_formatted != encoding_left ) + { + // The encodings are not the same. We need to convert the right_string + // to the same encoding as the left side: + size_t outsize; + const char *converted = __gg__iconverter(encoding_formatted, + encoding_left, + right_string, + strlen(right_string), + &outsize); + memcpy(right_string, converted, outsize); + } // There is a tricky aspect to comparing an alphanumeric to // a string. In short, we have to strip off any leading plus sign @@ -4562,14 +4548,16 @@ __gg__compare_2(cblc_field_t *left_side, // considered a "pseudo-move", and the rule for moving a negative // number to an alphanumeric is that negative signs get stripped off - if( *left_location == internal_plus || *left_location == internal_minus ) + if( *left_location == charmap_left->mapped_character(ascii_plus) + || *left_location == charmap_left->mapped_character(ascii_minus) ) { left_location += 1; left_length -= 1; } const char *right_fixed; - if( *right_string == internal_plus || *right_string == internal_minus ) + if( *right_string == charmap_right->mapped_character(ascii_plus) + || *right_string == charmap_right->mapped_character(ascii_minus) ) { right_fixed = right_string + 1; } @@ -4583,7 +4571,8 @@ __gg__compare_2(cblc_field_t *left_side, left_all, right_fixed, strlen(right_fixed), - right_all); + right_all, + encoding_left); compare = true; goto fixup_retval; } @@ -4935,7 +4924,7 @@ init_var_both(cblc_field_t *var, if( var->level == LEVEL88 ) { - // We need to convert the options to the internal native codeset + // We need to convert the options to the var->encoding size_t buffer_size = 4; char *buffer = static_cast(malloc(buffer_size)); @@ -4955,23 +4944,40 @@ init_var_both(cblc_field_t *var, static char *first = static_cast(malloc(first_size)); static size_t last_size = MINIMUM_ALLOCATION_SIZE; static char *last = static_cast(malloc(last_size)); + if( strlen(walker)+1 > first_size ) + { + first_size = strlen(walker)+1; + first = static_cast(realloc(first, first_size)); + } if( (*walker & 0xFF) == 0xFF ) { + // I don't recall what 0xFF means, and I neglected to comment it + // the first time through. Probably means numerical values follow. strcpy(first, walker); } else { - raw_to_internal(&first, &first_size, walker, strlen(walker)); + strcpy(first, walker); + __gg__convert_encoding( first, + DEFAULT_CHARMAP_SOURCE, + var->encoding); } walker += strlen(first) + 1; + if( strlen(walker)+1 > last_size ) + { + last_size = strlen(walker)+1; + last = static_cast(realloc(last, last_size)); + } if( (*walker & 0xFF) == 0xFF ) { strcpy(last, walker); } else { - raw_to_internal(&last, &last_size, walker, strlen(walker)); + __gg__convert_encoding( last, + DEFAULT_CHARMAP_SOURCE, + var->encoding); } walker += strlen(last) + 1; while(index + strlen(first) + strlen(last) + 3 > buffer_size) @@ -5145,8 +5151,9 @@ init_var_both(cblc_field_t *var, { if( !defaultbyte_in_play ) { + charmap_t *charmap = __gg__get_charmap(var->encoding); memset( outer_location, - internal_space, + charmap->mapped_character(ascii_space), capacity ); } else @@ -5161,8 +5168,8 @@ init_var_both(cblc_field_t *var, case FldNumericDisplay: { - // Any initialization values were converted to single-byte-coding in the - // right codeset during parser_symbol_add() + // Any initialization values were converted to single-byte-coding in + // the right codeset during parser_symbol_add() if( var->initial ) { memcpy(outer_location, var->initial, var->capacity); @@ -5171,18 +5178,20 @@ init_var_both(cblc_field_t *var, { if( !defaultbyte_in_play ) { + charmap_t *charmap = __gg__get_charmap(var->encoding); memset( outer_location, - internal_zero, + charmap->mapped_character(ascii_zero), capacity ); if( (var->attr & signable_e) && (var->attr & separate_e) ) { if( var->attr & leading_e ) { - outer_location[0] = internal_plus; + outer_location[0] = charmap->mapped_character(ascii_plus); } else { - outer_location[var->capacity-1] = internal_plus; + outer_location[var->capacity-1] = + charmap->mapped_character(ascii_plus); } } } @@ -5316,7 +5325,10 @@ alpha_to_alpha_move_from_location(cblc_field_t *field, bool move_all) { // This is a helper function, called when it is known that both source - // and dest are alphanumeric + // and dest are alphanumeric. + + // It is also required that at this point the source_location data be in the + // same encoding as field->encoding. dest_length = dest_length ? dest_length : field->capacity; char *to = reinterpret_cast(field->data + dest_offset); @@ -5369,10 +5381,11 @@ alpha_to_alpha_move_from_location(cblc_field_t *field, // the leading characters. // We do the move first, in case this is an overlapping move // involving characters that will be space-filled + charmap_t *charmap = __gg__get_charmap(field->encoding); memmove(to + (dest_length-count), from, count); - memset(to, internal_space, dest_length-count); + memset(to, charmap->mapped_character(ascii_space), dest_length-count); } } else @@ -5400,11 +5413,12 @@ alpha_to_alpha_move_from_location(cblc_field_t *field, // the trailing characters. // We do the move first, in case this is an overlapping move // involving characters that will be space-filled + charmap_t *charmap = __gg__get_charmap(field->encoding); memmove(to, from, count); memset( to + count, - internal_space, + charmap->mapped_character(ascii_space), dest_length-count); } } @@ -5415,17 +5429,54 @@ static void alpha_to_alpha_move(cblc_field_t *dest, size_t dest_offset, size_t dest_size, - const cblc_field_t *source, + const cblc_field_t *source, size_t source_offset, size_t source_size, bool source_move_all) { - alpha_to_alpha_move_from_location( dest, - dest_offset, - dest_size, - reinterpret_cast(source->data + source_offset), - source_size, - source_move_all); + const char *source_location + = reinterpret_cast(source->data + source_offset); + size_t outlength; + if(dest->encoding == source->encoding) + { + outlength = source_size; + } + else + { + // Before calling the mover, we need to convert the source to the + // destination encoding: + static size_t bufsize = 0; + static char *buffer = nullptr; + + // Supposing we might someday want to convert to UCS-4, then we need four + // output bytes for each input SBC character. This is a dumb way of + // thinking about it. By rights we should be calculating the worst case + // dest size as (source_size / min_bytes_per_source_position) times the + // max_bytes_per_dest_position. + + // But that's work for another day. This is harmless, if perhaps a bit + // wasteful of memory. + + size_t needed = 4 * source_size; + if( needed > bufsize ) + { + bufsize = needed; + buffer = static_cast(realloc(buffer, bufsize)); + massert(buffer); + } + + source_location = __gg__iconverter( source->encoding, + dest->encoding, + source_location, + source_size, + &outlength); + } + alpha_to_alpha_move_from_location(dest, + dest_offset, + dest_size, + source_location, + outlength, + source_move_all); } extern "C" @@ -5487,6 +5538,8 @@ __gg__move( cblc_field_t *fdest, || fdest->type == FldFloat ) ) { + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); + // Regardless of what you see below, as time went on it became clear that // high-value and low-value required special processing in order to cope // with code. Or, at least, to cope with legacy tests. @@ -5506,19 +5559,19 @@ __gg__move( cblc_field_t *fdest, int special_char = 0; // quiets cppcheck if( source_figconst == low_value_e ) { - special_char = ascii_to_internal(__gg__low_value_character); + special_char = charmap_dest->low_value_character(); } else if( source_figconst == high_value_e ) { - special_char = ascii_to_internal(__gg__high_value_character); + special_char = charmap_dest->high_value_character(); } else if( source_figconst == quote_value_e ) { - special_char = ascii_to_internal(__gg__quote_character); + special_char = charmap_dest->quote_character(); } else if( source_figconst == space_value_e ) { - special_char = ascii_to_internal(ascii_space); + special_char = charmap_dest->mapped_character(ascii_space); } memset( fdest->data + dest_offset, special_char, @@ -5542,9 +5595,11 @@ __gg__move( cblc_field_t *fdest, case FldPacked: case FldNumericBin5: case FldGroup: + { // This is a little bold, but non-alphabetics will never // have the rjust_e or MOVE_ALL bits on, so it's safe // enough. + alpha_to_alpha_move(fdest, dest_offset, dest_size, @@ -5553,6 +5608,7 @@ __gg__move( cblc_field_t *fdest, source_size, !!(source_flags & REFER_T_MOVE_ALL)); break; + } default: moved = false; @@ -5608,6 +5664,8 @@ __gg__move( cblc_field_t *fdest, // alphanumeric. We ignore any sign bit, and just // move the characters: + charmap_t *charmap = __gg__get_charmap(fdest->encoding); + size_t source_digits = fsource->digits + ( fsource->rdigits < 0 @@ -5623,7 +5681,10 @@ __gg__move( cblc_field_t *fdest, // Convert it to the full complement of digits available // from the source...but no more - __gg__binary_to_string_internal(ach, source_digits, value); + __gg__binary_to_string_encoded(ach, + source_digits, + value, + fdest->encoding); if( !(fdest->attr & rjust_e) ) { @@ -5636,7 +5697,7 @@ __gg__move( cblc_field_t *fdest, // have to space-fill the excess bytes in the // destination: memset( fdest->data + dest_offset + min_length, - internal_space, + charmap->mapped_character(ascii_space), dest_size - min_length ); } } @@ -5662,7 +5723,7 @@ __gg__move( cblc_field_t *fdest, ach, source_digits ); memset( fdest->data + dest_offset, - internal_space, + charmap->mapped_character(ascii_space), dest_size - source_digits); } } @@ -5685,6 +5746,7 @@ __gg__move( cblc_field_t *fdest, } else { + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); char ach[128]; // Turn the integer source into a value: @@ -5696,9 +5758,10 @@ __gg__move( cblc_field_t *fdest, source_size = fsource->digits; // Turn the integer value into a string: - __gg__binary_to_string_internal(ach, - source_size, - value); + __gg__binary_to_string_encoded(ach, + source_size, + value, + fdest->encoding); char *pach = ach; @@ -5712,13 +5775,14 @@ __gg__move( cblc_field_t *fdest, if( fsource->attr & intermediate_e ) { + charmap_t *charmap_src = __gg__get_charmap(fsource->encoding); while(source_size > 1) // This ensures we leave one '0' { if( *(pach+1) == '\0' ) { break; } - if( ((*pach)&0xFF) != internal_zero ) + if( ((*pach)&0xFF) != charmap_src->mapped_character(ascii_0)) { break; } @@ -5737,13 +5801,14 @@ __gg__move( cblc_field_t *fdest, // min_length is smaller than dest_length, so we have to // space-fill the excess bytes in the destination: memset( fdest->data+dest_offset + min_length, - internal_space, + charmap_dest->mapped_character(ascii_space), dest_size - min_length ); } } else { - // Destination is right-justified, so things are slightly more complex + // Destination is right-justified, so things are slightly more + // complex if( source_size >= dest_size ) { // We need to truncate the source data on the left: @@ -5758,7 +5823,9 @@ __gg__move( cblc_field_t *fdest, memmove(fdest->data+dest_offset + (dest_size - source_size), pach, source_size ); - memset(fdest->data+dest_offset, internal_space, (dest_size - source_size)); + memset(fdest->data+dest_offset, + charmap_dest->mapped_character(ascii_space), + (dest_size - source_size)); } } } @@ -5766,6 +5833,7 @@ __gg__move( cblc_field_t *fdest, case FldIndex: { + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); char ach[128]; // Turn the integer source into a value: @@ -5787,7 +5855,7 @@ __gg__move( cblc_field_t *fdest, // min_length is smaller than dest_length, so we have to // space-fill the excess bytes in the destination: memset( fdest->data+dest_offset + min_length, - internal_space, + charmap_dest->mapped_character(ascii_space), dest_size - min_length ); } } @@ -5808,7 +5876,9 @@ __gg__move( cblc_field_t *fdest, memmove(fdest->data+dest_offset + (dest_size - source_size), pach, source_size ); - memset(fdest->data+dest_offset, internal_space, (dest_size - source_size)); + memset(fdest->data+dest_offset, + charmap_dest->mapped_character(ascii_space), + (dest_size - source_size)); } } } @@ -5823,6 +5893,7 @@ __gg__move( cblc_field_t *fdest, case FldNumericBinary: { + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); switch( source_type ) { case FldGroup: @@ -5833,7 +5904,7 @@ __gg__move( cblc_field_t *fdest, // min_length is smaller than dest_length, so we have to // space-fill the excess bytes in the destination: memset( fdest->data+dest_offset + min_length, - internal_space, + charmap_dest->mapped_character(ascii_space), dest_size - min_length ); } fdest->attr &= ~FIGCONST_MASK; @@ -5968,8 +6039,10 @@ __gg__move( cblc_field_t *fdest, case FldNumericBin5: case FldPacked: case FldIndex: + { // Bin5 and Index are treated with no truncation, as if they were // trunc_bin_e. The other types aren't subject to truncation. + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); switch( source_type ) { case FldGroup: @@ -5980,7 +6053,7 @@ __gg__move( cblc_field_t *fdest, // min_length is smaller than dest_length, so we have to // space-fill the excess bytes in the destination: memset( fdest->data+dest_offset + min_length, - internal_space, + charmap_dest->mapped_character(ascii_space), dest_size - min_length ); } break; @@ -6049,9 +6122,11 @@ __gg__move( cblc_field_t *fdest, break; } break; + } case FldAlphaEdited: { + charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding); switch( source_type ) { case FldGroup: @@ -6062,14 +6137,15 @@ __gg__move( cblc_field_t *fdest, // min_length is smaller than dest_length, so we have to // space-fill the excess bytes in the destination: memset( fdest->data+dest_offset + min_length, - internal_space, + charmap_dest->mapped_character(ascii_space), dest_size - min_length ); } break; case FldNumericDisplay: { - int source_digits = fsource->digits + (fsource->rdigits<0 ? -fsource->rdigits : 0) ; + int source_digits = fsource->digits + + (fsource->rdigits<0 ? -fsource->rdigits : 0) ; // Pick up the absolute value of the source value = __gg__binary_value_from_qualified_field(&rdigits, @@ -6080,19 +6156,17 @@ __gg__move( cblc_field_t *fdest, // Convert it to the full complement of digits available // from the source...but no more - __gg__binary_to_string_ascii(ach, source_digits, value); - - // Binary to string returns ASCII characters: - for(int i=0; iencoding); // And move them into place: - __gg__string_to_alpha_edited( reinterpret_cast(fdest->data+dest_offset), - ach, - source_digits, - fdest->picture); + __gg__string_to_alpha_edited( + reinterpret_cast(fdest->data+dest_offset), + fdest->encoding, + ach, + source_digits, + fdest->picture); break; } @@ -6106,41 +6180,33 @@ __gg__move( cblc_field_t *fdest, &display_string_size, display_string_length); - if( source_figconst == low_value_e ) - { - memset(display_string, ascii_to_internal(__gg__low_value_character), dest_size); - } - else if( source_figconst == zero_value_e ) - { - memset(display_string, internal_zero, dest_size); - } - else if( source_figconst == space_value_e ) - { - memset(display_string, internal_space, dest_size); - } - else if( source_figconst == quote_value_e ) + int fc_char = __gg__fc_char(fsource); + if( fc_char > -1 ) { - memset(display_string, ascii_to_internal(__gg__quote_character), dest_size); - } - else if( source_figconst == high_value_e ) - { - memset(display_string, ascii_to_internal(__gg__high_value_character), dest_size); + memset(display_string, fc_char, dest_size); + __gg__convert_encoding_length(display_string, + dest_size, + fsource->encoding, + fdest->encoding ); } else { - display_string = format_for_display_internal( - &display_string, - &display_string_size, - fsource, - reinterpret_cast(fsource->data+source_offset), - source_size, - source_flags && REFER_T_ADDRESS_OF); + format_for_display_internal( + &display_string, + &display_string_size, + fsource, + reinterpret_cast + (fsource->data+source_offset), + source_size, + source_flags && REFER_T_ADDRESS_OF); display_string_length = strlen(display_string); } - __gg__string_to_alpha_edited( reinterpret_cast(fdest->data+dest_offset), - display_string, - display_string_length, - fdest->picture); + __gg__string_to_alpha_edited( reinterpret_cast + (fdest->data+dest_offset), + fdest->encoding, + display_string, + display_string_length, + fdest->picture); break; } } @@ -6153,11 +6219,18 @@ __gg__move( cblc_field_t *fdest, { case FldAlphanumeric: { + // Converting alphanumeric to float means first converting to + // ascii: + size_t charsout; + const char *converted = __gg__iconverter(fsource->encoding, + DEFAULT_CHARMAP_SOURCE, + PTRCAST(char, fsource->data+source_offset), + source_size, + &charsout); char ach[256]; size_t len = std::min(source_size, sizeof(ach)-1); - memcpy(ach, fsource->data+source_offset, len); + memcpy(ach, converted, len); ach[len] = '\0'; - __gg__internal_to_console_in_place(ach, len); switch( fdest->capacity ) { case 4: @@ -6172,7 +6245,6 @@ __gg__move( cblc_field_t *fdest, } case 16: { - //*(_Float128 *)(fdest->data+dest_offset) = strtofp128(ach, NULL); GCOB_FP128 t = strtofp128(ach, NULL); memcpy(fdest->data+dest_offset, &t, 16); break; @@ -6208,10 +6280,13 @@ int __gg__move_literala(cblc_field_t *field, size_t field_offset, size_t field_size, - cbl_round_t rounded_, - const char *str, - size_t strlen ) + cbl_round_t rounded_, + const char *str, + size_t strlen ) { + // It is required that the source 'str' be encoded the same as the + // field-encoding. + cbl_round_t rounded = static_cast(rounded_ & ~REFER_ALL_BIT); bool move_all = !!(rounded_ & REFER_ALL_BIT); @@ -6233,15 +6308,21 @@ __gg__move_literala(cblc_field_t *field, case FldGroup: case FldAlphanumeric: { - alpha_to_alpha_move_from_location(field, field_offset, field_size, str, strlen, move_all); + alpha_to_alpha_move_from_location(field, + field_offset, + field_size, + str, + strlen, + move_all); break; } case FldNumericBinary: { - value = __gg__dirty_to_binary_internal( str, - strlen, - &rdigits ); + value = __gg__dirty_to_binary(str, + field->encoding, + strlen, + &rdigits ); if( truncation_mode == trunc_std_e ) { // We need to adjust the value to have the rdigits of the @@ -6289,9 +6370,10 @@ __gg__move_literala(cblc_field_t *field, // Bin5 and Index are treated with no truncation, as if they were // trunc_bin_e. The other types aren't subject to truncation. // We are moving a number to a number: - value = __gg__dirty_to_binary_internal( str, - strlen, - &rdigits ); + value = __gg__dirty_to_binary(str, + field->encoding, + strlen, + &rdigits ); __gg__int128_to_qualified_field( field, field_offset, @@ -6311,13 +6393,18 @@ __gg__move_literala(cblc_field_t *field, &display_string_size, field_size); - memset(display_string, internal_space, display_string_size); + charmap_t *charmap = __gg__get_charmap(field->encoding); + memset( display_string, + charmap->mapped_character(ascii_space), + display_string_size); size_t len = std::min(display_string_size, strlen); memcpy(display_string, str, len); - __gg__string_to_alpha_edited( reinterpret_cast(field->data+field_offset), - display_string, - field_size, - field->picture); + __gg__string_to_alpha_edited( + reinterpret_cast(field->data+field_offset), + field->encoding, + display_string, + field_size, + field->picture); break; } @@ -6766,31 +6853,31 @@ typedef struct id_2_result } id_2_result; static normalized_operand -normalize_id( const cblc_field_t *refer, - size_t refer_o, - size_t refer_s - ) +normalize_id( const cblc_field_t *field, + size_t field_o, + size_t field_s, + cbl_encoding_t encoding ) { normalized_operand retval; - if( refer ) + if( field ) { - const unsigned char *data = refer->data + refer_o; + const unsigned char *data = field->data + field_o; cbl_figconst_t figconst - = (cbl_figconst_t)(refer->attr & FIGCONST_MASK); + = (cbl_figconst_t)(field->attr & FIGCONST_MASK); retval.offset = 0; - retval.length = refer_s; + retval.length = field_s; - if( refer->type == FldNumericDisplay ) + if( field->type == FldNumericDisplay ) { // The value is NumericDisplay. - if( refer->attr & separate_e ) + if( field->attr & separate_e ) { // Because the sign is a separate plus or minus, the length // gets reduced by one: - retval.length = refer_s - 1; - if( refer->attr & leading_e ) + retval.length = field_s - 1; + if( field->attr & leading_e ) { // Because the sign character is LEADING, we increase the // offset by one @@ -6799,11 +6886,10 @@ normalize_id( const cblc_field_t *refer, } for( size_t i=retval.offset; iencoding); // Because we are dealing with a NumericDisplay that might have // the minus bit turned on, we need to mask it off - unsigned char ch = data[i]; - turn_sign_bit_off(&ch); - retval.the_characters += ch; + retval.the_characters += charmap->set_digit_negative(data[i], false); } } else @@ -6818,37 +6904,8 @@ normalize_id( const cblc_field_t *refer, } else { - char ch=0; - switch( figconst ) - { - case low_value_e : - ch = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e : - ch = internal_zero; - break; - case space_value_e : - ch = internal_space; - break; - case quote_value_e : - ch = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - ch = __gg__high_value_character; - } - else - { - ch = ascii_to_internal(__gg__high_value_character); - } - break; - case normal_value_e: - // We can't get here - break; - case null_value_e: - break; - } + charmap_t *charmap = __gg__get_charmap(encoding); + char ch = charmap->figconst_character(figconst); for( size_t i=retval.offset; iencoding); std::vector comparands; @@ -7080,10 +7137,10 @@ inspect_backward_format_1(const size_t integers[]) cblc_index += 1; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); next_comparand.alpha = normalized_id_1.the_characters.c_str(); @@ -7128,7 +7185,7 @@ inspect_backward_format_1(const size_t integers[]) cblc_index += 1; next_comparand.identifier_3 - = normalize_id(id3, id3_o, id3_s); + = normalize_id(id3, id3_o, id3_s, id1->encoding); next_comparand.alpha = normalized_id_1.the_characters.c_str(); @@ -7136,10 +7193,10 @@ inspect_backward_format_1(const size_t integers[]) = next_comparand.alpha + normalized_id_1.length; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); the_alpha_and_omega_backward( normalized_id_4_before, normalized_id_4_after, @@ -7371,7 +7428,7 @@ __gg__inspect_format_1(int backward, size_t integers[]) cblc_index += 1; // normalize it, according to the language specification. normalized_operand normalized_id_1 - = normalize_id(id1, id1_o, id1_s); + = normalize_id(id1, id1_o, id1_s, id1->encoding); std::vector comparands; @@ -7417,10 +7474,10 @@ __gg__inspect_format_1(int backward, size_t integers[]) cblc_index += 1; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); next_comparand.alpha = normalized_id_1.the_characters.c_str(); @@ -7465,7 +7522,10 @@ __gg__inspect_format_1(int backward, size_t integers[]) cblc_index += 1; next_comparand.identifier_3 - = normalize_id(id3, id3_o, id3_s); + = normalize_id(id3, + id3_o, + id3_s, + id1->encoding); next_comparand.alpha = normalized_id_1.the_characters.c_str(); @@ -7473,10 +7533,10 @@ __gg__inspect_format_1(int backward, size_t integers[]) = next_comparand.alpha + normalized_id_1.length; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); the_alpha_and_omega(normalized_id_4_before, normalized_id_4_after, @@ -7703,7 +7763,7 @@ inspect_backward_format_2(const size_t integers[]) // normalize it, according to the language specification. normalized_operand normalized_id_1 - = normalize_id(id1, id1_o, id1_s); + = normalize_id(id1, id1_o, id1_s, id1->encoding); std::vector comparands; @@ -7737,11 +7797,11 @@ inspect_backward_format_2(const size_t integers[]) cblc_index += 1; next_comparand.identifier_5 - = normalize_id(id5, id5_o, id5_s); + = normalize_id(id5, id5_o, id5_s, id1->encoding); normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); // Because this is a CHARACTER operation, the lengths of // identifier-3 and identifier-5 should be one. Let's avoid the @@ -7792,8 +7852,8 @@ inspect_backward_format_2(const size_t integers[]) size_t id4_after_s = __gg__treeplet_1s[cblc_index]; cblc_index += 1; - next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s); - next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s); + next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s, id1->encoding); + next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s, id1->encoding); // Identifiers 3 and 5 have to be the same length. But // but either, or both, can be figurative constants. If @@ -7818,9 +7878,9 @@ inspect_backward_format_2(const size_t integers[]) = next_comparand.alpha + normalized_id_1.length; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); the_alpha_and_omega_backward( normalized_id_4_before, normalized_id_4_after, @@ -8007,14 +8067,17 @@ inspect_backward_format_2(const size_t integers[]) { for(size_t i=0; iencoding); id1_data[index_dest] = normalized_id_1.the_characters[i]; - if( is_sign_bit_on (normalized_id_1.the_characters[i]) ) + if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) ) { - turn_sign_bit_on(&id1_data[index_dest]); + id1_data[index_dest] + = charmap->set_digit_negative(id1_data[index_dest], true); } else { - turn_sign_bit_off(&id1_data[index_dest]); + id1_data[index_dest] + = charmap->set_digit_negative(id1_data[index_dest], false); } index_dest += 1; } @@ -8050,7 +8113,7 @@ __gg__inspect_format_2(int backward, size_t integers[]) // normalize it, according to the language specification. normalized_operand normalized_id_1 - = normalize_id(id1, id1_o, id1_s); + = normalize_id(id1, id1_o, id1_s, id1->encoding); std::vector comparands; @@ -8085,11 +8148,11 @@ __gg__inspect_format_2(int backward, size_t integers[]) cblc_index += 1; next_comparand.identifier_5 - = normalize_id(id5, id5_o, id5_s); + = normalize_id(id5, id5_o, id5_s, id1->encoding); normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); // Because this is a CHARACTER operation, the lengths of // identifier-3 and identifier-5 should be one. Let's avoid the @@ -8140,8 +8203,14 @@ __gg__inspect_format_2(int backward, size_t integers[]) size_t id4_after_s = __gg__treeplet_1s[cblc_index]; cblc_index += 1; - next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s); - next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s); + next_comparand.identifier_3 = normalize_id(id3, + id3_o, + id3_s, + id1->encoding); + next_comparand.identifier_5 = normalize_id(id5, + id5_o, + id5_s, + id1->encoding); // Identifiers 3 and 5 have to be the same length. But // but either, or both, can be figurative constants. If @@ -8166,9 +8235,9 @@ __gg__inspect_format_2(int backward, size_t integers[]) = next_comparand.alpha + normalized_id_1.length; normalized_operand normalized_id_4_before - = normalize_id(id4_before, id4_before_o, id4_before_s); + = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding); normalized_operand normalized_id_4_after - = normalize_id(id4_after, id4_after_o, id4_after_s); + = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding); the_alpha_and_omega(normalized_id_4_before, normalized_id_4_after, @@ -8363,14 +8432,17 @@ __gg__inspect_format_2(int backward, size_t integers[]) { for(size_t i=0; iencoding); id1_data[index_dest] = normalized_id_1.the_characters[i]; - if( is_sign_bit_on (normalized_id_1.the_characters[i]) ) + if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) ) { - turn_sign_bit_on(&id1_data[index_dest]); + id1_data[index_dest] + = charmap->set_digit_negative(id1_data[index_dest], true); } else { - turn_sign_bit_off(&id1_data[index_dest]); + id1_data[index_dest] + = charmap->set_digit_negative(id1_data[index_dest], false); } index_dest += 1; } @@ -8385,90 +8457,169 @@ __gg__inspect_format_2(int backward, size_t integers[]) return; } +static char * +normalize_for_inspect_format_4( size_t *dest_size, + const cblc_field_t *var, + size_t var_offset, + size_t var_size, + cbl_encoding_t encoding) + { + // Returns a malloced pointer; the caller needs to free it. + char *retval; + retval = static_cast(malloc(var_size+1)); + if(var) + { + cbl_figconst_t figconst = + static_cast(var->attr & FIGCONST_MASK); + if( figconst ) + { + // Build up an var_size array of figconst characters + charmap_t *charmap = __gg__get_charmap(encoding); + char figchar = '\0'; + switch( figconst ) + { + case low_value_e : + figchar = charmap->low_value_character(); + break; + case zero_value_e : + figchar = charmap->mapped_character(ascii_0); + break; + case space_value_e : + figchar = charmap->mapped_character(ascii_space); + break; + case quote_value_e : + figchar = charmap->quote_character(); + break; + case high_value_e : + figchar = charmap->high_value_character(); + break; + case null_value_e: + break; + default: + figchar = '\0'; + abort(); + break; + } + memset(retval, figchar, var_size); + retval[var_size] = '\0'; + } + else + { + // It's not a figurative constant, so convert var to the target encoding. + memcpy(retval, + __gg__iconverter(var->encoding, + encoding, + PTRCAST(char, var->data) + var_offset, + var_size, + dest_size), + var_size); + retval[var_size] = '\0'; + } + } + else + { + retval = nullptr; + *dest_size = 0; + } + return retval; + } + extern "C" void __gg__inspect_format_4( int backward, - cblc_field_t *input, + cblc_field_t *input, // identifier-1 size_t input_offset, size_t input_size, - cblc_field_t *original, + const cblc_field_t *original, // id-6 / literal-4 size_t original_offset, size_t original_size, - cblc_field_t *replacement, + const cblc_field_t *replacement, // id-7 / literal-5 size_t replacement_offset, size_t replacement_size, - cblc_field_t *after, + const cblc_field_t *after, // id-4 / literal-2 size_t after_offset, size_t after_size, - cblc_field_t *before, + const cblc_field_t *before, // id-4 / literal-2 size_t before_offset, size_t before_size ) { - static size_t psz_input_size = MINIMUM_ALLOCATION_SIZE; - static size_t psz_original_size = MINIMUM_ALLOCATION_SIZE; - static size_t psz_replacement_size = MINIMUM_ALLOCATION_SIZE; - static size_t psz_after_size = MINIMUM_ALLOCATION_SIZE; - static size_t psz_before_size = MINIMUM_ALLOCATION_SIZE; - static size_t psz_figstring_size = MINIMUM_ALLOCATION_SIZE; - - static char *psz_input = static_cast(malloc(psz_input_size )); - static char *psz_original = static_cast(malloc(psz_original_size )); - static char *psz_replacement = static_cast(malloc(psz_replacement_size)); - static char *psz_after = static_cast(malloc(psz_after_size )); - static char *psz_before = static_cast(malloc(psz_before_size )); - static char *psz_figstring = static_cast(malloc(psz_figstring_size )); - - bool all = replacement_size == (size_t)(-1LL); - if( all ) + // We need to cope with multiple encodings; the ISO specification says only + // that identifier-1 and -3 through -n are display or national. + + // We will leave the input encoded as whatever it is, and we will convert the + // others to match. + + // We also need to cope with anything except identifier-1 being a figurative + // constant. + + cbl_figconst_t figconst_original = + static_cast(original->attr & FIGCONST_MASK); + cbl_figconst_t figconst_replacement = + static_cast(replacement->attr & FIGCONST_MASK); + int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0); + switch( figswitch ) { - replacement_size = psz_original_size; + case 0: + // Neither are figconst; we leave the sizes alone + break; + case 1: + // Only replacement is figconst, so we make its size the same as the + // original. This will cause CONVERTING "ABC" TO ZERO to be the same as + // CONVERTING "ABC" TO "000" + replacement_size = original_size; + break; + case 2: + // Only original is figconst. Set the size to one. (This is necessary + // because the size of NULL is eight, since NULL does double-duty as both + // a character (this is a MicroFocus specification) and a pointer. + original_size = 1; + break; + case 3: + // Both are figconst + replacement_size = original_size = 1; + break; } - psz_input = format_for_display_local(&psz_input , &psz_input_size , input , input_offset , input_size , 0); - psz_original = format_for_display_local(&psz_original , &psz_original_size , original , original_offset , original_size , 0); - psz_replacement = format_for_display_local(&psz_replacement, &psz_replacement_size, replacement, replacement_offset, replacement_size, 0); - psz_after = format_for_display_local(&psz_after , &psz_after_size , after , after_offset , after_size , 0); - psz_before = format_for_display_local(&psz_before , &psz_before_size , before , before_offset , before_size , 0); + // Because before and after can be figurative constant NULL, we have to make + // sure that in such cases the size is 1: + if(before && before_size && before->attr & FIGCONST_MASK) + { + before_size = 1; + } + if(after && after_size && after->attr & FIGCONST_MASK) + { + after_size = 1; + } + + size_t psz_input_size ; + size_t psz_original_size ; + size_t psz_replacement_size; + size_t psz_after_size ; + size_t psz_before_size ; + bool all = (replacement_size == (size_t)(-1LL)); if( all ) { - memset(psz_replacement, *(replacement->data+replacement_offset), replacement_size); + // A replacement_size of -1 means that the statement is something like + // INSPECT XYZ CONVERTING "abcxyz" to ALL "?" That means replacement is + // a single character. We need to convert it to the target encoding. + replacement_size = 1; } - cbl_figconst_t figconst = - (cbl_figconst_t)(replacement->attr & FIGCONST_MASK); - if( figconst ) + char *psz_input = normalize_for_inspect_format_4(&psz_input_size , input , input_offset , input_size , input->encoding); + char *psz_original = normalize_for_inspect_format_4(&psz_original_size , original , original_offset , original_size , input->encoding); + char *psz_replacement = normalize_for_inspect_format_4(&psz_replacement_size, replacement, replacement_offset, replacement_size, input->encoding); + char *psz_after = normalize_for_inspect_format_4(&psz_after_size , after , after_offset , after_size , input->encoding); + char *psz_before = normalize_for_inspect_format_4(&psz_before_size , before , before_offset , before_size , input->encoding); + + if( all ) { - size_t figchars = strlen(psz_input)+1; - __gg__realloc_if_necessary(&psz_figstring, &psz_figstring_size, figchars); - char figchar = '\0'; - switch( figconst ) - { - case normal_value_e: - abort(); - break; - case low_value_e : - figchar = __gg__low_value_character; - break; - case zero_value_e : - figchar = internal_0; - break; - case space_value_e : - figchar = internal_space; - break; - case quote_value_e : - figchar = ascii_to_internal(__gg__quote_character); - break; - case high_value_e : - figchar = __gg__high_value_character; - break; - case null_value_e: - break; - } - memset(psz_figstring, figchar, figchars-1); - psz_figstring[figchars] = '\0'; - psz_replacement = psz_figstring; + // We now expand the single-byte replacement to be the same length as + // original. + psz_replacement_size = psz_original_size; + psz_replacement = static_cast(realloc(psz_replacement, psz_replacement_size)); + memset(psz_replacement, psz_replacement[0], psz_replacement_size); } // Use a simple map to make this O(N), rather than an O(N-squared), @@ -8493,28 +8644,24 @@ __gg__inspect_format_4( int backward, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; unsigned char map[256]; - unsigned char replaced[256]; + // Initialize the map to a one-to-one correspondence. memcpy(map, map_init, 256); - memset(replaced, 0, 256); - for(size_t i=0; idata+input_offset, psz_input, input_size); + + free(psz_input ); + free(psz_original ); + free(psz_replacement ); + free(psz_after ); + free(psz_before ); } static void @@ -8589,6 +8741,7 @@ move_string(cblc_field_t *field, size_t offset, size_t length, const char *from, + cbl_encoding_t src_encoding, size_t strlen_from = (size_t)(-1) ) { bool moved = true; @@ -8607,8 +8760,16 @@ move_string(cblc_field_t *field, char *to = reinterpret_cast(field->data + offset); size_t dest_length = length ? length : field->capacity; size_t source_length = strlen_from; - size_t count = std::min(dest_length, source_length); + // We need to convert the source string to the destination encoding: + size_t charsout; + const char *converted = __gg__iconverter(src_encoding, + field->encoding, + from, + source_length, + &charsout); + + size_t count = std::min(dest_length, source_length); if( source_length >= dest_length ) { // We have more source characters than places to put them @@ -8617,7 +8778,7 @@ move_string(cblc_field_t *field, // Destination is right-justified, so we // discard the leading source characters: memmove(to, - from + (source_length - count), + converted + (source_length - count), count); } else @@ -8625,7 +8786,7 @@ move_string(cblc_field_t *field, // Destination is right-justified, so we // discard the trailing source characters: memmove(to, - from, + converted, count); } } @@ -8638,9 +8799,12 @@ move_string(cblc_field_t *field, // ordinary string too short to fill it. So, we space-fill // the leading characters. memmove(to + (dest_length-count), - from, + converted, count); - memset(to, internal_space, dest_length-count); + // Get the charmap after the move, because it can mess with the + // static 'to' buffer. + charmap_t *charmap = __gg__get_charmap(field->encoding); + memset(to, charmap->mapped_character(ascii_space), dest_length-count); } else { @@ -8648,10 +8812,11 @@ move_string(cblc_field_t *field, // We do the move first, in case this is an overlapping move // involving characters that will be space-filled memmove(to, - from, + converted, count); + charmap_t *charmap = __gg__get_charmap(field->encoding); memset( to + count, - internal_space, + charmap->mapped_character(ascii_space), dest_length-count); } } @@ -8668,9 +8833,10 @@ move_string(cblc_field_t *field, // We are starting with a string, and setting it to a numerical // target. int rdigits; - __int128 value = __gg__dirty_to_binary_internal( from, - strlen_from, - &rdigits); + __int128 value = __gg__dirty_to_binary(from, + src_encoding, + strlen_from, + &rdigits); __gg__int128_to_qualified_field(field, offset, length, @@ -8696,15 +8862,17 @@ move_string(cblc_field_t *field, } static char * -brute_force_trim(char *str) +brute_force_trim(char *str, cbl_encoding_t encoding) { + charmap_t *charmap = __gg__get_charmap(encoding); + char *retval = str; - while( *retval == internal_space ) + while( *retval == charmap->mapped_character(ascii_space) ) { retval += 1; } char *p = retval + strlen(retval)-1; - while( p > retval && *p == internal_space ) + while( p > retval && *p == charmap->mapped_character(ascii_space) ) { *p-- = NULLCH; } @@ -8734,24 +8902,20 @@ __gg__string(const size_t integers[]) size_t index_cblc = 0 ; - char figlow[2] = {ascii_to_internal(__gg__low_value_character), 0x00}; - char fighigh[2] = {ascii_to_internal(__gg__high_value_character), 0x00}; - char figzero[2] = {(char)internal_zero, 0x00}; - char figquote[2] = {ascii_to_internal(__gg__quote_character), 0x00}; - char figspace[2] = {(char)internal_space, 0x00}; + // Pick up the target + const cblc_field_t *tgt = ref[index_cblc]; - if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) - { - fighigh[0] = __gg__high_value_character; - } - else - { - fighigh[0] = ascii_to_internal(__gg__high_value_character); - } + // Pick up the target encoding, which we assume controls all the parameters + cbl_encoding_t tgt_encoding = tgt->encoding; + charmap_t *charmap = __gg__get_charmap(tgt_encoding); + char figlow[2] = {(char)__gg__low_value_character, 0x00}; + char fighigh[2] = {(char)__gg__high_value_character, 0x00}; + char figzero[2] = {(char)charmap->mapped_character(ascii_zero), 0x00}; + char figquote[2] = {(char)charmap->mapped_character(__gg__quote_character), 0x00}; + char figspace[2] = {(char)charmap->mapped_character(ascii_space), 0x00}; - // Pick up the target - const cblc_field_t *tgt = ref[index_cblc]; + // Pick up the rest of the parameters size_t tgt_o = ref_o[index_cblc]; size_t tgt_s = ref_s[index_cblc]; index_cblc += 1; @@ -8927,35 +9091,56 @@ display_both(cblc_field_t *field, static size_t display_string_size = MINIMUM_ALLOCATION_SIZE; static char *display_string = static_cast(malloc(display_string_size)); - format_for_display_internal(&display_string, - &display_string_size, - field, - qual_data, - qual_size, - !!(flags & REFER_T_ADDRESS_OF) ); + cbl_encoding_t encoding = format_for_display_internal( + &display_string, + &display_string_size, + field, + qual_data, + qual_size, + !!(flags & REFER_T_ADDRESS_OF) ); + + cbl_encoding_t encout = __gg__console_encoding; + + // It can be the case in COBOL programs that a variable set to HIGH-VALUE is + // displayed. In CP1252, the result for 0xFF is a y-with diaresis. + + // In EBCDIC CP1140, however, the 0xFF character is non-printing. It's my + // opinion that's protentially confusing, especially when debugging. - // Let's honor the locale of the system, as best we can: - static size_t converted_size = MINIMUM_ALLOCATION_SIZE; - static char *converted = static_cast(malloc(converted_size)); + // So, I am going to go out on a limb. When the character set is known to be + // EBCDIC-ish, I am going to scan the output string and convert 0xFF to 0xDF. - internal_to_console(&converted, &converted_size, display_string, strlen(display_string)); + // In this way both ASCII and EBCDIC displays of HIGH-VALUE will be the same. - ssize_t ss = write( file_descriptor, - converted, - strlen(converted)); - if(ss == -1) + // There are valid arguments against doing this. But when I was doing some + // debugging, I found the EBCDIC behavior of displaying nothing for + // HIGH-VALUE to be more astonishing than printing a y-with-diaresis. There + // is, of course, the potential for confusing a real y-with-diaresis with a + // a HIGH-VALUE character. But it is my opinion that those will be resolved + // by examining the context. + + const charmap_t *charmap = __gg__get_charmap(encoding); + if( charmap->is_like_ebcdic() ) { - fprintf(stderr, "__gg__display() %s %p\n", field->name, qual_data); - fprintf(stderr, "__gg__display() %ld\n", static_cast(converted_size)); - fprintf(stderr, "__gg__display() "); - for(size_t i=0; i(malloc(converted_size)); - - size_t max_possible = 2 * length; - if( max_possible > converted_size ) - { - converted_size = max_possible; - converted = static_cast(realloc(converted, converted_size)); - } - - __gg__ascii_to_console(&converted, &converted_size, str, length); - +__gg__display_string( int file_descriptor, + cbl_encoding_t encoding, + const char *str, + size_t length, + int advance ) + { + cbl_encoding_t encout = __gg__console_encoding; + + size_t outlength; + const char *converted = __gg__iconverter( encoding, + encout, + str, + length, + &outlength); write( file_descriptor, converted, - strlen(converted)); + outlength); if( advance ) { write( file_descriptor, @@ -9081,11 +9262,11 @@ not_mangled_core(const char *s, const char *eos) const char *s2 = eos; bool has_dash = false; - while( s < eos && *s == internal_space ) + while( s < eos && *s == ascii_space ) { s += 1; } - while( s < eos && *(eos-1) == internal_space ) + while( s < eos && *(eos-1) == ascii_space ) { eos -= 1; } @@ -9204,11 +9385,11 @@ __gg__accept( enum special_name_t special_e, case FldGroup : case FldAlphanumeric : case FldAlphaEdited : - console_to_internal(buffer, i); move_string(field, offset, length, buffer, + __gg__console_encoding, strlen(buffer)); break; @@ -9258,10 +9439,10 @@ we_are_done: *p = NULLCH; int rdigits; - __int128 value = __gg__dirty_to_binary_source( buffer, - (int)i, - &rdigits); - + __int128 value = __gg__dirty_to_binary(buffer, + __gg__console_encoding, + (int)i, + &rdigits); __gg__int128_to_qualified_field(field, offset, length, @@ -9731,14 +9912,51 @@ __gg__bitwise_op( cblc_field_t *tgt, } } +/* + * Because this variable is static, the contructor runs before main and is + * guaranted to run. + */ +static class rt_encoding_t + { + const char *ctype, *lc_ctype; + public: + rt_encoding_t() : ctype( setlocale(LC_CTYPE, "") ) + { + lc_ctype = nl_langinfo(CODESET); + // Let's learn what the computer is using for the console: + // We need to establish the codeset used by the system console: + __gg__console_encoding = use_locale(); + + if( getenv("CODESET") ) + { + fprintf(stderr, "%s:%d: ctype=%s,lc_ctype=%s\n", __func__, __LINE__, + ctype? ctype : "error" , lc_ctype); + } + } + cbl_encoding_t use_locale() const + { + auto encoding = strstr(ctype, "UTF-8") ? + iconv_UTF_8_e : __gg__encoding_iconv_type(lc_ctype); + if( getenv("CODESET") ) + { + fprintf(stderr, "%s:%d: console encoding is '%s'\n", __func__, __LINE__, + __gg__encoding_iconv_name(encoding) ); + } + return encoding; + } + } rt_encoding; + extern "C" void -__gg__set_initial_switch_value( ) +__gg__onetime_initialization( ) { - // We need to establish the initial value of the UPSI-1 switch register - // We are using IBM's conventions: + // This routine gets called once per executable before anything else runs + + // We need to establish the initial value of the UPSI-1 switch register We + // are using IBM's conventions: // https://www.ibm.com/docs/en/zvse/6.2?topic=SSB27H_6.2.0/fa2sf_communicate_appl_progs_via_job_control.html - // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that SW-0, SW-5, and SW-6 are on. + // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that SW-0, + // SW-5, and SW-6 are on. __int128 value = 0; __int128 bit = 1; @@ -9772,6 +9990,8 @@ is_numeric_edited_numeric(cblc_field_t *, size_t, size_t ) static int is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) { + charmap_t *charmap = __gg__get_charmap(field->encoding); + int retval = 1; bool signable = !!(field->attr & signable_e); bool leading = !!(field->attr & leading_e); @@ -9784,8 +10004,8 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) { // First character must be +/- if( digits < digits_e - || ( *digits != internal_plus - && *digits != internal_minus) ) + || ( *digits != charmap->mapped_character(ascii_plus) + && *digits != charmap->mapped_character(ascii_minus)) ) { retval = 0; } @@ -9797,8 +10017,8 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) // Last character must be +/- digits_e -= 1; if( digits < digits_e - || ( *digits_e != internal_plus - && *digits_e != internal_minus) ) + || ( *digits_e != charmap->mapped_character(ascii_plus) + && *digits_e != charmap->mapped_character(ascii_minus)) ) { retval = 0; } @@ -9810,8 +10030,9 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) if( digits < digits_e ) { unsigned char first_char = (unsigned char)*digits; - turn_sign_bit_off(&first_char); - if(first_charinternal_9) + first_char = charmap->set_digit_negative(first_char, false); + if( first_char < charmap->mapped_character(ascii_0) + || first_char > charmap->mapped_character(ascii_9)) { retval = 0; } @@ -9826,8 +10047,9 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) { digits_e -= 1; unsigned char final_char = (unsigned char)*digits_e; - turn_sign_bit_off(&final_char); - if(final_charinternal_9) + final_char = charmap->set_digit_negative(final_char, false); + if( final_charmapped_character(ascii_0) + || final_char>charmap->mapped_character(ascii_9) ) { retval = 0; } @@ -9837,8 +10059,8 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size) // all remaining characters are supposed to be zero through nine while( digits < digits_e ) { - if( (unsigned char)(*digits)internal_9 ) + if( (unsigned char)(*digits)mapped_character(ascii_0) + || (unsigned char)(*digits)>charmap->mapped_character(ascii_9) ) { retval = 0; break; @@ -9928,13 +10150,16 @@ is_alpha_a_number(const cblc_field_t *field, size_t offset, size_t size) { + charmap_t *charmap = __gg__get_charmap(field->encoding); + int mapped_0 = charmap->mapped_character(ascii_0); + int mapped_9 = charmap->mapped_character(ascii_9); int retval = 1; const unsigned char *bytes = (field->data + offset); for( size_t i=0; i internal_9) ) + if( (ch < mapped_0) + || (ch > mapped_9) ) { retval = 0; break; @@ -10005,10 +10230,25 @@ __gg__classify( classify_t type, } case ClassAlphabeticType: + { + charmap_t *charmap = __gg__get_charmap(field->encoding); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_A = charmap->mapped_character(ascii_A); + int mapped_I = charmap->mapped_character(ascii_I); + int mapped_J = charmap->mapped_character(ascii_J); + int mapped_R = charmap->mapped_character(ascii_R); + int mapped_S = charmap->mapped_character(ascii_S); + int mapped_Z = charmap->mapped_character(ascii_Z); + int mapped_a = charmap->mapped_character(ascii_a); + int mapped_i = charmap->mapped_character(ascii_i); + int mapped_j = charmap->mapped_character(ascii_j); + int mapped_r = charmap->mapped_character(ascii_r); + int mapped_s = charmap->mapped_character(ascii_s); + int mapped_z = charmap->mapped_character(ascii_z); while(alpha < omega) { ch = (*alpha++)&0xFF; - if( ch == internal_space ) + if( ch == mapped_space ) { continue; } @@ -10021,12 +10261,12 @@ __gg__classify( classify_t type, // many consecutive punches in a column because it would weaken the card // to the point where its structural integrity might be threatened, the // coding for the letter of the alphabet are not contiguous. - if(!( ( ch >= internal_A && ch <= internal_I) - || (ch >= internal_J && ch <= internal_R) - || (ch >= internal_S && ch <= internal_Z) - || (ch >= internal_a && ch <= internal_i) - || (ch >= internal_j && ch <= internal_r) - || (ch >= internal_s && ch <= internal_z) ) ) + if(!( ( ch >= mapped_A && ch <= mapped_I) + || (ch >= mapped_J && ch <= mapped_R) + || (ch >= mapped_S && ch <= mapped_Z) + || (ch >= mapped_a && ch <= mapped_i) + || (ch >= mapped_j && ch <= mapped_r) + || (ch >= mapped_s && ch <= mapped_z) ) ) { // The character is not alphabetic retval = 0; @@ -10034,41 +10274,63 @@ __gg__classify( classify_t type, } } break; + } case ClassLowerType: + { + charmap_t *charmap = __gg__get_charmap(field->encoding); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_a = charmap->mapped_character(ascii_a); + int mapped_i = charmap->mapped_character(ascii_i); + int mapped_j = charmap->mapped_character(ascii_j); + int mapped_r = charmap->mapped_character(ascii_r); + int mapped_s = charmap->mapped_character(ascii_s); + int mapped_z = charmap->mapped_character(ascii_z); while(alpha < omega) { ch = *alpha++; - if( ch == internal_space ) + if( ch == mapped_space ) { continue; } - if(!( ( ch >= internal_a && ch <= internal_i) - || (ch >= internal_j && ch <= internal_r) - || (ch >= internal_s && ch <= internal_z) ) ) + if(!( ( ch >= mapped_a && ch <= mapped_i) + || (ch >= mapped_j && ch <= mapped_r) + || (ch >= mapped_s && ch <= mapped_z) ) ) { retval = 0; break; } } break; + } + case ClassUpperType: + { + charmap_t *charmap = __gg__get_charmap(field->encoding); + int mapped_space = charmap->mapped_character(ascii_space); + int mapped_A = charmap->mapped_character(ascii_A); + int mapped_I = charmap->mapped_character(ascii_I); + int mapped_J = charmap->mapped_character(ascii_J); + int mapped_R = charmap->mapped_character(ascii_R); + int mapped_S = charmap->mapped_character(ascii_S); + int mapped_Z = charmap->mapped_character(ascii_Z); while(alpha < omega) { ch = *alpha++; - if( ch == internal_space ) + if( ch == mapped_space ) { continue; } - if(!( ( ch >= internal_A && ch <= internal_I) - || (ch >= internal_J && ch <= internal_R) - || (ch >= internal_S && ch <= internal_Z) ) ) + if(!( ( ch >= mapped_A && ch <= mapped_I) + || (ch >= mapped_J && ch <= mapped_R) + || (ch >= mapped_S && ch <= mapped_Z) ) ) { retval = 0; break; } } break; + } case ClassInvalidType: case ClassDbcsType: @@ -10084,38 +10346,77 @@ __gg__classify( classify_t type, return retval; } +extern "C" +void +__gg__convert_encoding( char *psz, + cbl_encoding_t from, + cbl_encoding_t to ) + { + // This does an in-place conversion of psz + if( from > custom_encoding_e ) + { + size_t charsout; + const char *converted = __gg__iconverter(from, + to, + psz, + strlen(psz), + &charsout); + strcpy(psz, converted); + } + } + +extern "C" +void +__gg__convert_encoding_length(char *pch, + size_t length, + cbl_encoding_t from, + cbl_encoding_t to ) + { + // This does an in-place conversion of length characters at pch + if( from > custom_encoding_e ) + { + size_t charsout; + const char *converted = __gg__iconverter(from, + to, + pch, + length, + &charsout); + memcpy(pch, converted, length); + } + } + static int -accept_envar( cblc_field_t *tgt, - size_t tgt_offset, - size_t tgt_length, - const char *psz_name) +accept_envar( cblc_field_t *tgt, + size_t tgt_offset, + size_t tgt_length, + const char *psz_name, + cbl_encoding_t encoding) { int retval = 1; // 1 means we couldn't find it if( psz_name ) { tgt_length = tgt_length ? tgt_length : tgt->capacity; - // Pick up the environment variable name, which is in the internal codeset + // Pick up the environment variable name char *env = strdup(psz_name); massert(env); - // Get rid of leading and trailing internal_space characters: - char *trimmed_env = brute_force_trim(env); + // Get rid of leading and trailing space characters: + char *trimmed_env = brute_force_trim( env, + encoding ); // Convert the name to the console codeset: - __gg__internal_to_console_in_place(trimmed_env, strlen(trimmed_env)); + __gg__convert_encoding( trimmed_env, + encoding, + DEFAULT_CHARMAP_SOURCE); // Pick up the environment variable, and convert it to the internal codeset const char *p = getenv(trimmed_env); if(p) { - char *pp = strdup(p); - massert(pp); - console_to_internal(pp, strlen(pp)); retval = 0; // Okay - move_string(tgt, tgt_offset, tgt_length, pp); - free(pp); + move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_CHARMAP_SOURCE); } free(env); } @@ -10146,7 +10447,8 @@ __gg__accept_envar( cblc_field_t *tgt, int retval = accept_envar(tgt, tgt_offset, tgt_length, - p); + p, + tgt->encoding); free(p); return retval; } @@ -10183,19 +10485,28 @@ __gg__set_envar(cblc_field_t *name, massert(val); massert(env); - // The name and the value arrive in the internal codeset: - memcpy(env, name->data+name_offset , name_length); + const char *converted; + size_t charsout; + + converted = __gg__iconverter(name->encoding, + __gg__console_encoding, + PTRCAST(char, name->data+name_offset), + name_length, + &charsout ); + memcpy(env, converted, name_length); env[name_length] = '\0'; - memcpy(val, value->data+value_offset, value_length); - val[value_length] = '\0'; - // Get rid of leading and trailing internal_space characters - char *trimmed_env = brute_force_trim(env); - char *trimmed_val = brute_force_trim(val); + converted = __gg__iconverter(value->encoding, + __gg__console_encoding, + PTRCAST(char, value->data+value_offset), + value_length, + &charsout ); + memcpy(val, converted, value_length); + val[value_length] = '\0'; - // Conver them to the console codeset - __gg__internal_to_console_in_place(trimmed_env, strlen(trimmed_env)); - __gg__internal_to_console_in_place(trimmed_val, strlen(trimmed_val)); + // Get rid of leading and trailing space characters + char *trimmed_env = brute_force_trim(env, __gg__console_encoding); + char *trimmed_val = brute_force_trim(val, __gg__console_encoding); if( getenv(trimmed_env) ) { @@ -10210,6 +10521,7 @@ __gg__set_envar(cblc_field_t *name, } static int stashed_argc = 0; +// The stashed arguments are in __gg__console_encoding. static char **stashed_argv = NULL; extern "C" @@ -10276,8 +10588,7 @@ __gg__get_argc(cblc_field_t *dest, size_t offset, size_t length) command_line_plan_b(); char ach[128]; sprintf(ach, "%d", stashed_argc); - ascii_to_internal_str(ach, strlen(ach)); - move_string(dest, offset, length, ach); + move_string(dest, offset, length, ach, __gg__console_encoding); } extern "C" @@ -10312,10 +10623,11 @@ __gg__get_argv( cblc_field_t *dest, } else { - char *retval = strdup(stashed_argv[N]); - console_to_internal(retval, strlen(retval)); - move_string(dest, dest_offset, dest_length, retval); - free(retval); + move_string(dest, + dest_offset, + dest_length, + stashed_argv[N], + DEFAULT_CHARMAP_SOURCE); retcode = 0; // Okay } return retcode; @@ -10352,8 +10664,7 @@ __gg__get_command_line( cblc_field_t *field, if( *retval ) { flength = flength ? flength : field->capacity; - console_to_internal(retval, strlen(retval)); - move_string(field, offset, flength, retval); + move_string(field, offset, flength, retval, __gg__console_encoding); retcode = 0; // Okay } else @@ -10429,10 +10740,10 @@ __gg__alphabet_use( cbl_encoding_t encoding, // state needs to be saved -- for example, if we are doing a SORT with an // ALPHABET override -- that's up to the caller - // When there is no DATA DIVISION, program_states can be empty when - // we arrive here. if( program_states.empty() ) { + // When there is no DATA DIVISION, program_states can be empty when + // we arrive here. So, we need to remedy that: initialize_program_state(); } @@ -10446,7 +10757,7 @@ __gg__alphabet_use( cbl_encoding_t encoding, program_states.back().rt_low_value_character = DEGENERATE_LOW_VALUE; program_states.back().rt_high_value_character = DEGENERATE_HIGH_VALUE; - if( !internal_is_ebcdic ) + if( !__gg__ebcdic_codeset_in_use ) { program_states.back().rt_collation = __gg__one_to_one_values; } @@ -10463,7 +10774,7 @@ __gg__alphabet_use( cbl_encoding_t encoding, program_states.back().rt_low_value_character = DEGENERATE_LOW_VALUE; program_states.back().rt_high_value_character = DEGENERATE_HIGH_VALUE; - if( internal_is_ebcdic ) + if( __gg__ebcdic_codeset_in_use ) { program_states.back().rt_collation = __gg__one_to_one_values; } @@ -10490,54 +10801,38 @@ __gg__alphabet_use( cbl_encoding_t encoding, program_states.back().rt_collation = it->second.collation; break; } + + default: + break; } return; } -extern "C" -void -__gg__ascii_to_internal_field(cblc_field_t *var) - { - ascii_to_internal_str(reinterpret_cast(var->data), var->capacity); - } - -extern "C" -void -__gg__ascii_to_internal(char *location, size_t length) - { - ascii_to_internal_str(location, length); - } - -extern "C" -void -__gg__console_to_internal(char *location, size_t length) - { - console_to_internal(location, length); - } - extern "C" void __gg__parser_set_conditional(cblc_field_t *var, int figconst_) { + charmap_t *charmap = __gg__get_charmap(var->encoding); + cbl_figconst_t figconst = (cbl_figconst_t)figconst_; - unsigned char special = internal_space; + unsigned char special = charmap->mapped_character(ascii_space); switch(figconst) { case space_value_e: - special = *__gg__data_space; + special = charmap->mapped_character(ascii_space); break; case low_value_e: - special = *__gg__data_low_values; + special = charmap->low_value_character(); break; case high_value_e: - special = *__gg__data_high_values; + special = charmap->high_value_character(); break; case zero_value_e: - special = *__gg__data_zeros; + special = charmap->mapped_character(ascii_0); break; case quote_value_e: - special = *__gg__data_quotes; + special = charmap->quote_character(); break; default: break; @@ -10545,17 +10840,6 @@ __gg__parser_set_conditional(cblc_field_t *var, int figconst_) memset( var->data, special, var->capacity); } -extern "C" -void -__gg__internal_to_console_in_place(char *loc, size_t length) - { - static size_t dest_size = MINIMUM_ALLOCATION_SIZE; - static char *dest = static_cast(malloc(dest_size)); - - internal_to_console(&dest, &dest_size, loc, length); - memcpy(loc, dest, length); - } - extern "C" int __gg__routine_to_call(const char *name, @@ -10734,12 +11018,14 @@ __gg__literaln_alpha_compare(const char *left_side, { length = right->capacity; } + retval = compare_strings( left_side, strlen(left_side), false, reinterpret_cast((right->data + offset)), length, - !!(flags & REFER_T_MOVE_ALL) ); + !!(flags & REFER_T_MOVE_ALL), + right->encoding); return retval; } @@ -10781,7 +11067,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring size_t id1_o, size_t id1_s, size_t ndelimiteds, // The number of DELIMITED entries - const char *all_flags, // The number of ALL flags, one per ndelimiteds + const char *all_flags, // The number of ALL flags, one per ndelimiteds size_t nreceivers, // The number of DELIMITER receivers cblc_field_t *id7, // The index of characters, both for starting updated at end size_t id7_o, @@ -10885,7 +11171,12 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring } // Move the data into place: - move_string(id4[i], id4_o[i], id4_s[i], left, id_4_size); + move_string(id4[i], + id4_o[i], + id4_s[i], + left, + id1->encoding, + id_4_size); // Update the state variables: left += id_4_size; @@ -10906,15 +11197,18 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring int ifound = -1; cbl_figconst_t figconst; char achfigconst[1]; + cbl_encoding_t fig_encoding; for( size_t i=0; iencoding; + charmap_t *charmap = __gg__get_charmap(fig_encoding); char *pfound; figconst = (cbl_figconst_t)(id2[i]->attr & FIGCONST_MASK); switch(figconst) { case low_value_e : - achfigconst[0] = ascii_to_internal(__gg__low_value_character); + achfigconst[0] = charmap->figconst_character(figconst); pfound = string_in( left, right, achfigconst, @@ -10922,7 +11216,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring break; case zero_value_e : - achfigconst[0] = internal_zero; + achfigconst[0] = charmap->figconst_character(figconst); pfound = string_in( left, right, achfigconst, @@ -10930,7 +11224,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring break; case space_value_e : - achfigconst[0] = internal_space; + achfigconst[0] = charmap->figconst_character(figconst); pfound = string_in( left, right, achfigconst, @@ -10938,7 +11232,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring break; case quote_value_e : - achfigconst[0] = ascii_to_internal(__gg__quote_character); + achfigconst[0] = charmap->figconst_character(figconst); pfound = string_in( left, right, achfigconst, @@ -10946,7 +11240,7 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring break; case high_value_e : - achfigconst[0] = ascii_to_internal(__gg__high_value_character); + achfigconst[0] = charmap->figconst_character(figconst); pfound = string_in( left, right, achfigconst, @@ -11013,7 +11307,12 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring size_t examined = leftmost_delimiter - left; // Move the data into place: - move_string(id4[nreceiver], id4_o[nreceiver], id4_s[nreceiver], left, examined); + move_string(id4[nreceiver], + id4_o[nreceiver], + id4_s[nreceiver], + left, + id1->encoding, + examined); // Update the left pointer left = leftmost_delimiter; @@ -11029,20 +11328,30 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring { if( figconst ) { - move_string(id5[nreceiver], id5_o[nreceiver], id5_s[nreceiver], + move_string(id5[nreceiver], + id5_o[nreceiver], + id5_s[nreceiver], achfigconst, + fig_encoding, 1); } else { - move_string(id5[nreceiver], id5_o[nreceiver], id5_s[nreceiver], + move_string(id5[nreceiver], + id5_o[nreceiver], + id5_s[nreceiver], reinterpret_cast(id2[ifound]->data+id2_o[ifound]), + id2[ifound]->encoding, id2_s[ifound]); } } else { - move_string(id5[nreceiver], id5_o[nreceiver], id5_s[nreceiver], ""); + move_string(id5[nreceiver], + id5_o[nreceiver], + id5_s[nreceiver], + "", + DEFAULT_CHARMAP_SOURCE); } } @@ -11168,7 +11477,6 @@ struct exception_descr_t { }; struct cbl_exception_t { -// size_t program, size_t file; ec_type_t type; cbl_file_mode_t mode; @@ -12166,11 +12474,13 @@ void __gg__codeset_figurative_constants() { // This routine gets called after the codeset has been changed - *__gg__data_space = internal_space; - *__gg__data_low_values = ascii_to_internal(__gg__low_value_character); - *__gg__data_zeros = internal_0; - *__gg__data_high_values = ascii_to_internal(__gg__high_value_character); - *__gg__data_quotes = ascii_to_internal(__gg__quote_character);; + + // __gg__data_space and __gg__data_zeros don't change because they are + // permanently encoded as iconv_CP1252_e. These other three can be changed + // as either compiler options or ALPHABET clauses. + *__gg__data_low_values = __gg__low_value_character; + *__gg__data_high_values = __gg__high_value_character; + *__gg__data_quotes = __gg__quote_character; } extern "C" @@ -12305,7 +12615,7 @@ extern "C" void * __gg__function_handle_from_cobpath( char *unmangled_name, char *mangled_name) { - void *retval = NULL; + void *retval; // We search for a function. We check first for the unmangled name, and then // the mangled name. We do this first for the executable, then for .so @@ -12314,12 +12624,9 @@ __gg__function_handle_from_cobpath( char *unmangled_name, char *mangled_name) static void *handle_executable = NULL; if( !handle_executable ) { - handle_executable = dlopen(NULL, RTLD_LAZY); - } - //if( !retval ) - { - retval = dlsym(handle_executable, unmangled_name); + handle_executable = dlopen(NULL, RTLD_NOW); } + retval = dlsym(handle_executable, unmangled_name); if( !retval ) { retval = dlsym(handle_executable, mangled_name); @@ -12353,14 +12660,16 @@ __gg__just_mangle_name( const cblc_field_t *field, size_t length; length = field->capacity; - memcpy(ach_name, field->data, length); - ach_name[length] = '\0'; - if( internal_is_ebcdic) - { - // The name is in EBCDIC - __gg__ebcdic_to_ascii(ach_name, length); - } + // We need ach_name to be in ASCII: + size_t charsout; + const char *converted = __gg__iconverter(field->encoding, + DEFAULT_CHARMAP_SOURCE, + PTRCAST(char, field->data), + length, + &charsout); + memcpy(ach_name, converted, charsout); + ach_name[charsout] = '\0'; bool is_pointer = false; @@ -12444,13 +12753,13 @@ __gg__function_handle_from_name(int program_id, length = field->capacity; } - memcpy(ach_name, field->data + offset, length); - - if( internal_is_ebcdic) - { - // The name is in EBCDIC - __gg__ebcdic_to_ascii(ach_name, length); - } + size_t charsout; + const char *converted = __gg__iconverter(field->encoding, + DEFAULT_CHARMAP_SOURCE, + PTRCAST(char, field->data + offset), + length, + &charsout); + memcpy(ach_name, converted, length); // At this point we have a null-terminated ascii function name. @@ -12758,30 +13067,11 @@ get_the_byte(cblc_field_t *field) int retval = -1; if( field ) { - cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); - switch(figconst) + // Get the encoded character associated with the figconst + retval = __gg__fc_char(field); + if(retval == -1) { - case null_value_e: - retval = 0; - break; - case low_value_e: - retval = ascii_to_internal(__gg__low_value_character); - break; - case zero_value_e: - retval = internal_zero; - break; - case space_value_e: - retval = internal_space; - break; - case quote_value_e: - retval = ascii_to_internal(__gg__quote_character); - break; - case high_value_e: - retval = ascii_to_internal(__gg__high_value_character) & 0xFF; - break; - case normal_value_e: - retval = (int)__gg__get_integer_binary_value(field); - break; + retval = (int)__gg__get_integer_binary_value(field); } } return retval; @@ -13192,8 +13482,17 @@ __gg__set_env_name( const cblc_field_t *var, free(sv_envname); sv_envname = static_cast(malloc(length+1)); massert(sv_envname); - memcpy(sv_envname, var->data+offset, length); + + // We need to convert the name to the console encoding: + size_t charsout; + const char *converted = __gg__iconverter(var->encoding, + __gg__console_encoding, + PTRCAST(char, var->data+offset), + length, + &charsout); + memcpy(sv_envname, converted, length); sv_envname[length] = '\0'; + brute_force_trim(sv_envname, __gg__console_encoding); } @@ -13205,11 +13504,15 @@ __gg__get_env_name( cblc_field_t *dest, { // Implements ACCEPT FROM ENVIRONMENT-NAME // It returns the value previously established by __gg__set_env_name. - if( sv_envname ) + if( !sv_envname ) { sv_envname = strdup(""); } - move_string(dest, dest_offset, dest_length, sv_envname); + move_string(dest, + dest_offset, + dest_length, + sv_envname, + __gg__console_encoding); } extern "C" @@ -13221,7 +13524,8 @@ __gg__get_env_value(cblc_field_t *dest, return accept_envar(dest, dest_offset, dest_length, - sv_envname); + sv_envname, + __gg__console_encoding); } extern "C" @@ -13231,42 +13535,33 @@ __gg__set_env_value(const cblc_field_t *value, size_t length ) { // implements DISPLAY UPON ENVIRONMENT-VALUE - size_t name_length = strlen(sv_envname); size_t value_length = length; - static size_t env_length = 16; - static char *env = static_cast(malloc(env_length+1)); - static size_t val_length = 16; - static char *val = static_cast(malloc(val_length+1)); - if( env_length < name_length+1 ) - { - env_length = name_length+1; - env = static_cast(realloc(env, env_length)); - } - if( val_length < value_length+1 ) + static size_t val_length = 0; + static char *val = nullptr; + if( val_length < length+1 ) { - val_length = value_length+1; + val_length = length+1; val = static_cast(realloc(val, val_length)); } - massert(env); massert(val); - // The name and the value arrive in the internal codeset: - memcpy(env, sv_envname, name_length); - env[name_length] = '\0'; memcpy(val, value->data+offset, value_length); val[value_length] = '\0'; - // Get rid of leading and trailing internal_space characters - char *trimmed_env = brute_force_trim(env); - char *trimmed_val = brute_force_trim(val); + __gg__convert_encoding( val, + value->encoding, + __gg__console_encoding); - // Conver them to the console codeset - __gg__internal_to_console_in_place(trimmed_env, strlen(trimmed_env)); - __gg__internal_to_console_in_place(trimmed_val, strlen(trimmed_val)); + + // Get rid of leading and trailing space characters + char *trimmed_val = brute_force_trim(val, __gg__console_encoding); // And now, anticlimactically, set the variable: - setenv(trimmed_env, trimmed_val, 1); + if( sv_envname ) + { + setenv(sv_envname, trimmed_val, 1); + } } extern "C" @@ -13286,7 +13581,6 @@ __gg__fprintf_stderr(const char *format_string, ...) va_end(ap); } - static int sv_argument_number = 0; extern "C" @@ -13325,10 +13619,11 @@ __gg__accept_arg_value( cblc_field_t *dest, } else { - char *retval = strdup(stashed_argv[sv_argument_number]); - console_to_internal(retval, strlen(retval)); - move_string(dest, dest_offset, dest_length, retval); - free(retval); + move_string(dest, + dest_offset, + dest_length, + stashed_argv[sv_argument_number], + DEFAULT_CHARMAP_SOURCE); retcode = 0; // Okay // The Fujitsu spec says bump this value by one. @@ -13364,3 +13659,15 @@ __gg__get_file_descriptor(const char *device) } return retval; } + +int +__gg__fc_char(const cblc_field_t *field) + { + // This returns the figconst character for a field, if the field->attr + // indicates that the field is a figconst. Otherwise, it comes back -1 + int retval = -1; + charmap_t *charmap = __gg__get_charmap(field->encoding); + cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + retval = charmap->figconst_character(figconst); + return retval; + } diff --git a/libgcobol/libgcobol.h b/libgcobol/libgcobol.h index 2f5c8b0e63c..2871f713a68 100644 --- a/libgcobol/libgcobol.h +++ b/libgcobol/libgcobol.h @@ -52,9 +52,10 @@ extern "C" __int128 __gg__power_of_ten(int n); extern "C" __int128 __gg__dirty_to_binary_source( const char *dirty, int length, int *rdigits); -extern "C" __int128 __gg__dirty_to_binary_internal( const char *dirty, - int length, - int *rdigits); +extern "C" __int128 __gg__dirty_to_binary(const char *dirty, + cbl_encoding_t encoding, + int length, + int *rdigits); extern "C" __int128 __gg__binary_value_from_field( int *rdigits, cblc_field_t *var); @@ -116,7 +117,6 @@ extern "C" void __gg__realloc_if_necessary( char **dest, size_t *dest_size, size_t new_size); extern "C" void __gg__set_exception_file(const cblc_file_t *file); -extern "C" void __gg__internal_to_console_in_place(char *loc, size_t length); extern "C" __int128 __gg__binary_value_from_qualified_field(int *rdigits, const cblc_field_t *var, size_t offset, @@ -129,4 +129,17 @@ extern "C" __int128 __gg__integer_from_qualified_field(cblc_field_t *var, size_t var_size); void __gg__abort(const char *msg); +int __gg__fc_char(const cblc_field_t *field); + +extern "C" +void __gg__convert_encoding(char *psz, + cbl_encoding_t from, + cbl_encoding_t to ); + +extern "C" +void __gg__convert_encoding_length(char *pch, + size_t length, + cbl_encoding_t from, + cbl_encoding_t to ); + #endif diff --git a/libgcobol/stringbin.cc b/libgcobol/stringbin.cc index 63976cf4964..acbc510ace2 100644 --- a/libgcobol/stringbin.cc +++ b/libgcobol/stringbin.cc @@ -153,7 +153,7 @@ string_from_combined(const COMBINED &combined) { case 1: // We know that val8 is a single digit - combined_string[combined.start] = combined.val8 + zero_char;; + combined_string[combined.start] = combined.val8 + zero_char; break; case 2: @@ -298,9 +298,13 @@ __gg__binary_to_string_ascii(char *result, int digits, __int128 value) } bool -__gg__binary_to_string_internal(char *result, int digits, __int128 value) +__gg__binary_to_string_encoded( char *result, + int digits, + __int128 value, + cbl_encoding_t encoding) { - zero_char = internal_zero; + charmap_t *charmap = __gg__get_charmap(encoding); + zero_char = charmap->mapped_character(ascii_0); // Note that this routine does not terminate the generated string with a // NUL. This routine is sometimes used to generate a NumericDisplay string @@ -328,7 +332,6 @@ __gg__binary_to_string_internal(char *result, int digits, __int128 value) return retval; } - static void packed_from_combined(const COMBINED &combined) @@ -480,7 +483,8 @@ extern "C" __int128 __gg__numeric_display_to_binary(unsigned char *signp, const unsigned char *psz, - int n ) + int n, + cbl_encoding_t encoding) { /* This is specific to numeric display values. @@ -504,6 +508,11 @@ __gg__numeric_display_to_binary(unsigned char *signp, and so we build up a 128-bit result in three 64-bit pieces, and assemble them at the end. */ + charmap_t *charmap = __gg__get_charmap(encoding); + unsigned char zero = charmap->mapped_character(ascii_0); + unsigned char minus = charmap->mapped_character(ascii_minus); + + bool is_ebcdic = (zero == 0xF0); static const uint8_t lookup[] = { @@ -575,10 +584,10 @@ __gg__numeric_display_to_binary(unsigned char *signp, unsigned char sign_byte = *signp; const unsigned char *mapper; - if( internal_is_ebcdic ) + if( is_ebcdic ) { mapper = from_ebcdic; - if( sign_byte == EBCDIC_MINUS ) + if( sign_byte == minus ) { is_negative = true; } @@ -595,7 +604,7 @@ __gg__numeric_display_to_binary(unsigned char *signp, else { mapper = from_ascii; - if( sign_byte == '-' ) + if( sign_byte == minus ) { is_negative = true; } @@ -692,7 +701,6 @@ __gg__numeric_display_to_binary(unsigned char *signp, // Replace the original sign byte: *signp = sign_byte; // cppcheck-suppress redundantAssignment - return retval; } @@ -788,6 +796,7 @@ __gg__packed_to_binary(const unsigned char *psz, // back up one byte to fetch the sign nybble. uint8_t sign_nybble = *(psz-1) & 0x0F; + enum{ PACKED_NYBBLE_MINUS= 0x0D}; if( sign_nybble > 9 ) { diff --git a/libgcobol/stringbin.h b/libgcobol/stringbin.h index de003e79054..48c4874292a 100644 --- a/libgcobol/stringbin.h +++ b/libgcobol/stringbin.h @@ -35,9 +35,10 @@ bool __gg__binary_to_string_ascii(char *result, int digits, __int128 value); extern "C" -bool __gg__binary_to_string_internal( char *result, - int digits, - __int128 value); +bool __gg__binary_to_string_encoded(char *result, + int digits, + __int128 value, + cbl_encoding_t encoding); extern "C" void __gg__binary_to_packed( unsigned char *result, @@ -47,7 +48,8 @@ void __gg__binary_to_packed( unsigned char *result, extern "C" __int128 __gg__numeric_display_to_binary( unsigned char *sign_byte, const unsigned char *digits, - int ndigits ); + int ndigits, + cbl_encoding_t encoding); extern "C" __int128 diff --git a/libgcobol/valconv.cc b/libgcobol/valconv.cc index aaa89f57ea9..7d28c9013b1 100644 --- a/libgcobol/valconv.cc +++ b/libgcobol/valconv.cc @@ -39,22 +39,11 @@ #include "ec.h" #include "common-defs.h" +#include "valconv.h" #include "charmaps.h" -#include "valconv.h" #include "exceptl.h" -int __gg__decimal_point = '.' ; -int __gg__decimal_separator = ',' ; -int __gg__quote_character = '"' ; -int __gg__low_value_character = 0x00 ; -int __gg__high_value_character = 0xFF ; -char **__gg__currency_signs ; - -int __gg__default_currency_sign; - -char *__gg__ct_currency_signs[256]; // Compile-time currency signs - std::unordered_map __gg__alphabet_states; extern "C" @@ -113,7 +102,6 @@ __gg__alphabet_create( cbl_encoding_t encoding, return; } - static int expand_picture(char *dest, const char *picture) { @@ -227,6 +215,10 @@ __gg__string_to_numeric_edited( char * const dest, int is_negative, const char *picture) { + // This routine operates in ASCII space. Life is hard enough without trying + // to do this in EBCDIC, too. So, 'source' and 'picture' are assumed to be + // CP1252 + // We need to expand the picture string. We assume that the caller left // enough room in dest to take the expanded picture string. @@ -449,7 +441,6 @@ __gg__string_to_numeric_edited( char * const dest, } } - if( index_s >= decimal_point_index ) { // We are to the right of the decimal point, and so we @@ -1214,24 +1205,29 @@ got_float: } } } - bool retval = false; + bool retval = false; return retval; } extern "C" void __gg__string_to_alpha_edited( char *dest, + cbl_encoding_t dest_encoding, const char *source, int slength, const char *picture) { + // 'source' is in 'dest' encoding + // Put the PICTURE into the data area. If the caller didn't leave enough // room, well, poo on them. Said another way; if they specify disaster, // disaster is what they will get. // This routine expands picture into dest using ascii characters, but - // replaces them with internal characters + // replaces them with encoded characters + + charmap_t *charmap_dest = __gg__get_charmap(dest_encoding); int destlength = expand_picture(dest, picture); @@ -1246,15 +1242,15 @@ __gg__string_to_alpha_edited( char *dest, { case ascii_b: // Replaced with space case ascii_B: - dest[dindex] = internal_space; + dest[dindex] = charmap_dest->mapped_character(ascii_space); break; case ascii_zero: // These are left alone: - dest[dindex] = ascii_to_internal(ascii_zero); + dest[dindex] = charmap_dest->mapped_character(ascii_0); break; case ascii_slash: - dest[dindex] = ascii_to_internal(ascii_slash); + dest[dindex] = charmap_dest->mapped_character(ascii_slash); break; default: @@ -1267,14 +1263,14 @@ __gg__string_to_alpha_edited( char *dest, } else { - sch = internal_space; + sch = charmap_dest->mapped_character(ascii_space);; } dest[dindex] = sch; } dindex += 1; } } - + extern "C" void __gg__currency_sign_init() @@ -1323,7 +1319,7 @@ __gg__remove_trailing_zeroes(char *p) if( strchr(left, '.') ) { - while(*right == '0' || *right == internal_space) + while( *right == '0' ) { right -= 1; } diff --git a/libgcobol/valconv.h b/libgcobol/valconv.h index 1efb2b9bf66..7b2c415830d 100644 --- a/libgcobol/valconv.h +++ b/libgcobol/valconv.h @@ -31,16 +31,6 @@ #ifndef __VALCONV_H #define __VALCONV_H -extern int __gg__decimal_point ; -extern int __gg__decimal_separator ; -extern int __gg__quote_character ; -extern int __gg__low_value_character ; -extern int __gg__high_value_character ; -extern char **__gg__currency_signs ; -extern int __gg__default_currency_sign; -extern char *__gg__ct_currency_signs[256]; // Compile-time currency signs - - // All "ordinals" are zero-based ordinals. The COBOL spec's ordinal values // for ordinary ASCII/EBCDIC ranger from 1 to 256, so we call them zero through // 255. We use unsigned ints so that when an custom alphabet is described, we @@ -69,6 +59,7 @@ extern "C" int is_negative, const char *picture); void __gg__string_to_alpha_edited(char *dest, + cbl_encoding_t dest_encoding, const char *source, int slength, const char *picture);